using System.IO; using Microsoft.SharePoint.Client; using Serilog; using SharepointToolbox.Core.Helpers; using SharepointToolbox.Core.Models; namespace SharepointToolbox.Services; /// /// Orchestrates file copy/move between two SharePoint libraries (same or /// different tenants). Hybrid strategy: server-side /// first (zero local bandwidth), then transparent fallback to stream copy /// (OpenBinaryDirect/SaveBinaryDirect) on a list-view-threshold /// failure so transfers still succeed against libraries above the 5,000-item /// cap. Folder enumeration uses paged CAML; folder creation is cached per job /// to avoid re-checking the same path for every file. /// public class FileTransferService : IFileTransferService { private const int ListViewThresholdItemCount = 5000; private const int LargeLibraryPageSize = 500; /// /// Runs the configured . Enumerates source files /// (unless the job is folder-only), pre-creates destination folders, then /// copies or moves each file according to /// and . Returns a per-item /// summary where failures are reported individually — the method does /// not abort on first error so partial transfers are recoverable. /// public async Task> TransferAsync( ClientContext sourceCtx, ClientContext destCtx, TransferJob job, IProgress progress, CancellationToken ct) { // 1. Pre-flight: discover library item counts so we can pick a page size // for source enumeration and warn early that the server-side copy path // may trip the list-view threshold. The stream fallback in // TransferSingleFileAsync handles the LVT case transparently, but the // counts help size-tune enumeration up front. var srcItemCount = await TryGetListItemCountAsync(sourceCtx, job.SourceLibrary, progress, ct); var dstItemCount = await TryGetListItemCountAsync(destCtx, job.DestinationLibrary, progress, ct); Log.Information( "Transfer pre-flight: source={SrcLib} ({SrcCount} items), dest={DstLib} ({DstCount} items)", job.SourceLibrary, srcItemCount, job.DestinationLibrary, dstItemCount); if (srcItemCount > ListViewThresholdItemCount || dstItemCount > ListViewThresholdItemCount) { progress.Report(OperationProgress.Indeterminate( $"Large library detected (source: {srcItemCount}, dest: {dstItemCount}). " + "Using paged enumeration and stream-copy fallback when needed.")); } // 2. Enumerate files from source (unless contents are suppressed). IReadOnlyList files; if (job.CopyFolderContents) { progress.Report(new OperationProgress(0, 0, "Enumerating source files...")); files = await EnumerateFilesAsync(sourceCtx, job, srcItemCount, progress, ct); } else { files = Array.Empty(); } // When CopyFolderContents is off, the job is folder-only: ensure the // destination folder is created below (IncludeSourceFolder branch) and // return without iterating any files. if (files.Count == 0 && !job.IncludeSourceFolder) { progress.Report(new OperationProgress(0, 0, "No files found to transfer.")); return new BulkOperationSummary(new List>()); } // 3. Build source and destination base paths. Resolve library roots via // CSOM — constructing from title breaks for localized libraries whose // URL segment differs (e.g. title "Documents" → URL "Shared Documents"), // causing "Access denied" when CSOM tries to touch a non-existent path. var srcBasePath = await ResolveLibraryPathAsync( sourceCtx, job.SourceLibrary, job.SourceFolderPath, progress, ct); var dstBasePath = await ResolveLibraryPathAsync( destCtx, job.DestinationLibrary, job.DestinationFolderPath, progress, ct); // Per-job cache of destination folders we've already ensured. Without // this, EnsureFolderAsync re-checks .Exists for every file in the same // folder — thousands of round-trips on a flat directory transfer. var ensuredFolders = new HashSet(StringComparer.OrdinalIgnoreCase); // When IncludeSourceFolder is set, recreate the source folder name under // destination so dest/srcFolderName/... mirrors the source tree. When // no SourceFolderPath is set, fall back to the source library name. // Also pre-create the folder itself — per-file EnsureFolder only fires // for nested paths, so flat files at the root of the source folder // would otherwise copy into a missing parent and fail. if (job.IncludeSourceFolder) { var srcFolderName = !string.IsNullOrEmpty(job.SourceFolderPath) ? Path.GetFileName(job.SourceFolderPath.TrimEnd('/')) : job.SourceLibrary; if (!string.IsNullOrEmpty(srcFolderName)) { dstBasePath = $"{dstBasePath}/{srcFolderName}"; await EnsureFolderCachedAsync(destCtx, dstBasePath, ensuredFolders, progress, ct); } } // 4. Transfer each file using BulkOperationRunner return await BulkOperationRunner.RunAsync( files, async (fileRelUrl, idx, token) => { // Compute destination path by replacing source base with dest base var relativePart = fileRelUrl; if (fileRelUrl.StartsWith(srcBasePath, StringComparison.OrdinalIgnoreCase)) relativePart = fileRelUrl.Substring(srcBasePath.Length).TrimStart('/'); // Ensure destination folder exists (cached) var destFolderRelative = dstBasePath; var fileFolder = Path.GetDirectoryName(relativePart)?.Replace('\\', '/'); if (!string.IsNullOrEmpty(fileFolder)) { destFolderRelative = $"{dstBasePath}/{fileFolder}"; await EnsureFolderCachedAsync(destCtx, destFolderRelative, ensuredFolders, progress, token); } var fileName = Path.GetFileName(relativePart); var destFileUrl = $"{destFolderRelative}/{fileName}"; await TransferSingleFileAsync(sourceCtx, destCtx, fileRelUrl, destFileUrl, job, progress, token); Log.Information("Transferred: {Source} -> {Dest}", fileRelUrl, destFileUrl); }, progress, ct); } private async Task TransferSingleFileAsync( ClientContext sourceCtx, ClientContext destCtx, string srcFileUrl, string dstFileUrl, TransferJob job, IProgress progress, CancellationToken ct) { // Hybrid path: try the server-side MoveCopyUtil first (bytes never // leave SharePoint). If the destination (or source) library trips the // list-view threshold, fall back to a stream copy via HTTP-direct APIs // that bypass list internals. try { await ServerSideTransferAsync(sourceCtx, destCtx, srcFileUrl, dstFileUrl, job, progress, ct); } catch (ServerException ex) when (IsListViewThresholdException(ex)) { Log.Warning( "Server-side transfer hit list-view threshold for {File} — falling back to stream copy.", srcFileUrl); await StreamTransferAsync(sourceCtx, destCtx, srcFileUrl, dstFileUrl, job, progress, ct); } } private async Task ServerSideTransferAsync( ClientContext sourceCtx, ClientContext destCtx, string srcFileUrl, string dstFileUrl, TransferJob job, IProgress progress, CancellationToken ct) { // MoveCopyUtil.CopyFileByPath expects absolute URLs (scheme + host), // not server-relative paths. Passing "/sites/..." silently fails or // returns no error yet copies nothing — especially across site // collections. Prefix with the owning site's scheme+host. var srcAbs = ToAbsoluteUrl(sourceCtx, srcFileUrl); var dstAbs = ToAbsoluteUrl(destCtx, dstFileUrl); var srcPath = ResourcePath.FromDecodedUrl(srcAbs); var dstPath = ResourcePath.FromDecodedUrl(dstAbs); bool overwrite = job.ConflictPolicy == ConflictPolicy.Overwrite; var options = new MoveCopyOptions { KeepBoth = job.ConflictPolicy == ConflictPolicy.Rename, ResetAuthorAndCreatedOnCopy = false, // best-effort metadata preservation }; try { if (job.Mode == TransferMode.Copy) { MoveCopyUtil.CopyFileByPath(sourceCtx, srcPath, dstPath, overwrite, options); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(sourceCtx, progress, ct); } else // Move { MoveCopyUtil.MoveFileByPath(sourceCtx, srcPath, dstPath, overwrite, options); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(sourceCtx, progress, ct); } } catch (ServerException ex) when (job.ConflictPolicy == ConflictPolicy.Skip && ex.Message.Contains("already exists", StringComparison.OrdinalIgnoreCase)) { Log.Warning("Skipped (already exists): {File}", srcFileUrl); } } /// /// Path-based stream copy fallback. Reads the source via /// and writes /// to the destination via Folder.Files.Add(FileCreationInformation). /// Both target a specific folder by path rather than querying list items, /// so they succeed against libraries that exceed the list-view threshold. /// Bytes do round-trip through the local machine — this is strictly the /// fallback when server-side copy is unavailable. /// private async Task StreamTransferAsync( ClientContext sourceCtx, ClientContext destCtx, string srcFileUrl, string dstFileUrl, TransferJob job, IProgress progress, CancellationToken ct) { // Resolve the destination file name for conflict handling. Returns null // when policy=Skip and the file already exists. var effectiveDestUrl = await ResolveDestinationOnConflictAsync(destCtx, dstFileUrl, job, progress, ct); if (effectiveDestUrl == null) { Log.Warning("Skipped (already exists, stream fallback): {File}", srcFileUrl); return; } // Rename policy guarantees a free path via ResolveDestinationOnConflictAsync, // so overwrite is only needed for the explicit Overwrite policy. bool overwrite = job.ConflictPolicy == ConflictPolicy.Overwrite; ct.ThrowIfCancellationRequested(); // 1. Download the source bytes into memory. OpenBinaryStream is a // ClientResult — usable only after ExecuteQuery. var srcFile = sourceCtx.Web.GetFileByServerRelativeUrl(srcFileUrl); var streamResult = srcFile.OpenBinaryStream(); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(sourceCtx, progress, ct); using var buffer = new MemoryStream(); await streamResult.Value.CopyToAsync(buffer, 81920, ct); buffer.Position = 0; // 2. Upload to the destination folder. Files.Add with ContentStream // streams the payload in one request and does not touch list-view // metadata, so it bypasses LVT. var slash = effectiveDestUrl.LastIndexOf('/'); var destFolderUrl = effectiveDestUrl.Substring(0, slash); var destFileName = effectiveDestUrl.Substring(slash + 1); var destFolder = destCtx.Web.GetFolderByServerRelativeUrl(destFolderUrl); var creation = new FileCreationInformation { Url = destFileName, Overwrite = overwrite, ContentStream = buffer, }; destFolder.Files.Add(creation); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(destCtx, progress, ct); if (job.Mode == TransferMode.Move) { // Stream copy cannot atomically move; delete the source after a // successful upload to honour Move semantics. var srcDelete = sourceCtx.Web.GetFileByServerRelativeUrl(srcFileUrl); srcDelete.DeleteObject(); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(sourceCtx, progress, ct); } } /// /// Honours when the destination /// path already exists. Returns the URL to write to, or null when /// the file should be skipped. For , /// probes name (1).ext, name (2).ext, ... until a free slot /// is found. /// private static async Task ResolveDestinationOnConflictAsync( ClientContext destCtx, string dstFileUrl, TransferJob job, IProgress progress, CancellationToken ct) { if (job.ConflictPolicy == ConflictPolicy.Overwrite) return dstFileUrl; bool exists = await FileExistsAsync(destCtx, dstFileUrl, progress, ct); if (!exists) return dstFileUrl; if (job.ConflictPolicy == ConflictPolicy.Skip) return null; // Rename: keep both. Append " (n)" before the extension. var dir = dstFileUrl.Substring(0, dstFileUrl.LastIndexOf('/')); var leaf = dstFileUrl.Substring(dstFileUrl.LastIndexOf('/') + 1); var stem = Path.GetFileNameWithoutExtension(leaf); var ext = Path.GetExtension(leaf); for (int n = 1; n <= 999; n++) { var candidate = $"{dir}/{stem} ({n}){ext}"; if (!await FileExistsAsync(destCtx, candidate, progress, ct)) return candidate; } // Extremely unlikely; surface as failure rather than silent overwrite. throw new InvalidOperationException( $"Could not find an unused destination filename for {dstFileUrl} after 999 attempts."); } private static async Task FileExistsAsync( ClientContext ctx, string fileServerRelativeUrl, IProgress progress, CancellationToken ct) { try { var file = ctx.Web.GetFileByServerRelativeUrl(fileServerRelativeUrl); ctx.Load(file, f => f.Exists); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); return file.Exists; } catch { return false; } } /// /// Detects SharePoint's list-view-threshold ServerException across locales. /// English: "exceeds the list view threshold". French: "depasse le seuil /// d'affichage de liste". German: "Listenansichtsschwellenwert". /// internal static bool IsListViewThresholdException(Exception ex) { var msg = ex.Message ?? string.Empty; return msg.Contains("list view threshold", StringComparison.OrdinalIgnoreCase) || msg.Contains("seuil d'affichage", StringComparison.OrdinalIgnoreCase) || msg.Contains("seuil d", StringComparison.OrdinalIgnoreCase) && msg.Contains("liste", StringComparison.OrdinalIgnoreCase) || msg.Contains("Listenansichtsschwellenwert", StringComparison.OrdinalIgnoreCase) || msg.Contains("umbral de vista de lista", StringComparison.OrdinalIgnoreCase); } private async Task> EnumerateFilesAsync( ClientContext ctx, TransferJob job, int sourceItemCount, IProgress progress, CancellationToken ct) { var list = ctx.Web.Lists.GetByTitle(job.SourceLibrary); var rootFolder = list.RootFolder; ctx.Load(rootFolder, f => f.ServerRelativeUrl); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); var libraryRoot = rootFolder.ServerRelativeUrl.TrimEnd('/'); // Explicit per-file selection overrides folder enumeration. Paths are // library-relative (e.g. "SubFolder/file.docx") and get resolved to // full server-relative URLs here. if (job.SelectedFilePaths.Count > 0) { return job.SelectedFilePaths .Where(p => !string.IsNullOrWhiteSpace(p)) .Select(p => $"{libraryRoot}/{p.TrimStart('/')}") .ToList(); } var baseFolderUrl = libraryRoot; if (!string.IsNullOrEmpty(job.SourceFolderPath)) baseFolderUrl = $"{baseFolderUrl}/{job.SourceFolderPath.TrimStart('/')}"; // Paginated recursive CAML query — Folder.Files / Folder.Folders lazy // loading hits the list-view threshold on libraries > 5,000 items. var files = new List(); await foreach (var item in SharePointPaginationHelper.GetItemsInFolderAsync( ctx, list, baseFolderUrl, recursive: true, viewFields: new[] { "FSObjType", "FileRef", "FileDirRef" }, ct: ct)) { ct.ThrowIfCancellationRequested(); if (item["FSObjType"]?.ToString() != "0") continue; // files only var fileRef = item["FileRef"]?.ToString(); if (string.IsNullOrEmpty(fileRef)) continue; // Skip files under SharePoint system folders (e.g. "Forms", "_*"). var dir = item["FileDirRef"]?.ToString() ?? string.Empty; if (HasSystemFolderSegment(dir, baseFolderUrl)) continue; files.Add(fileRef); } return files; } private static bool HasSystemFolderSegment(string fileDirRef, string baseFolderUrl) { if (string.IsNullOrEmpty(fileDirRef)) return false; var baseTrim = baseFolderUrl.TrimEnd('/'); if (!fileDirRef.StartsWith(baseTrim, StringComparison.OrdinalIgnoreCase)) return false; var tail = fileDirRef.Substring(baseTrim.Length).Trim('/'); if (string.IsNullOrEmpty(tail)) return false; foreach (var seg in tail.Split('/', StringSplitOptions.RemoveEmptyEntries)) { if (seg.StartsWith("_", StringComparison.Ordinal) || seg.Equals("Forms", StringComparison.OrdinalIgnoreCase)) return true; } return false; } private async Task TryGetListItemCountAsync( ClientContext ctx, string libraryTitle, IProgress progress, CancellationToken ct) { try { var list = ctx.Web.Lists.GetByTitle(libraryTitle); ctx.Load(list, l => l.ItemCount); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); return list.ItemCount; } catch (Exception ex) { // Non-fatal: pre-flight count is purely informational. Treat as // unknown (-1) so the rest of the pipeline still runs. Log.Warning("Failed to read ItemCount for {Library}: {Error}", libraryTitle, ex.Message); return -1; } } /// /// EnsureFolderAsync wrapper that records successful checks in a per-job /// set so the same destination folder isn't re-validated for every file. /// private async Task EnsureFolderCachedAsync( ClientContext ctx, string folderServerRelativeUrl, HashSet cache, IProgress progress, CancellationToken ct) { var normalized = folderServerRelativeUrl.TrimEnd('/'); if (!cache.Add(normalized)) return; await EnsureFolderAsync(ctx, normalized, progress, ct); } private async Task EnsureFolderAsync( ClientContext ctx, string folderServerRelativeUrl, IProgress progress, CancellationToken ct) { folderServerRelativeUrl = folderServerRelativeUrl.TrimEnd('/'); // Already there? try { var existing = ctx.Web.GetFolderByServerRelativeUrl(folderServerRelativeUrl); ctx.Load(existing, f => f.Exists); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); if (existing.Exists) return; } catch { /* not present — fall through to creation */ } // Walk the path, creating each missing segment. `Web.Folders.Add(url)` is // ambiguous across CSOM versions (some treat the arg as relative to Web, // others server-relative), which produces bogus paths + "Access denied". // Resolve the parent explicitly and add only the leaf name instead. int slash = folderServerRelativeUrl.LastIndexOf('/'); if (slash <= 0) return; var parentUrl = folderServerRelativeUrl.Substring(0, slash); var leafName = folderServerRelativeUrl.Substring(slash + 1); if (string.IsNullOrEmpty(leafName)) return; // Recurse to guarantee the parent exists first. await EnsureFolderAsync(ctx, parentUrl, progress, ct); var parent = ctx.Web.GetFolderByServerRelativeUrl(parentUrl); parent.Folders.Add(leafName); try { await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); } catch (Exception ex) { Log.Warning("EnsureFolder failed at {Parent}/{Leaf}: {Error}", parentUrl, leafName, ex.Message); throw; } } private static string ToAbsoluteUrl(ClientContext ctx, string pathOrUrl) { if (pathOrUrl.StartsWith("http://", StringComparison.OrdinalIgnoreCase) || pathOrUrl.StartsWith("https://", StringComparison.OrdinalIgnoreCase)) return pathOrUrl; var uri = new Uri(ctx.Url); return $"{uri.Scheme}://{uri.Host}{(pathOrUrl.StartsWith("/") ? "" : "/")}{pathOrUrl}"; } private static async Task ResolveLibraryPathAsync( ClientContext ctx, string libraryTitle, string relativeFolderPath, IProgress progress, CancellationToken ct) { var list = ctx.Web.Lists.GetByTitle(libraryTitle); ctx.Load(list, l => l.RootFolder.ServerRelativeUrl); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); var basePath = list.RootFolder.ServerRelativeUrl.TrimEnd('/'); if (!string.IsNullOrEmpty(relativeFolderPath)) basePath = $"{basePath}/{relativeFolderPath.TrimStart('/')}"; return basePath; } }