diff --git a/SharepointToolbox/Services/FileTransferService.cs b/SharepointToolbox/Services/FileTransferService.cs index 41c9793..e177bbd 100644 --- a/SharepointToolbox/Services/FileTransferService.cs +++ b/SharepointToolbox/Services/FileTransferService.cs @@ -7,14 +7,19 @@ using SharepointToolbox.Core.Models; namespace SharepointToolbox.Services; /// -/// Orchestrates server-side file copy/move between two SharePoint libraries -/// (same or different tenants). Uses for the -/// transfer itself so bytes never round-trip through the local machine. -/// Folder creation and enumeration are done via CSOM; all ambient retries -/// flow through . +/// Orchestrates file copy/move between two SharePoint libraries (same or +/// different tenants). Hybrid strategy: server-side +/// first (zero local bandwidth), then transparent fallback to stream copy +/// (OpenBinaryDirect/SaveBinaryDirect) on a list-view-threshold +/// failure so transfers still succeed against libraries above the 5,000-item +/// cap. Folder enumeration uses paged CAML; folder creation is cached per job +/// to avoid re-checking the same path for every file. /// public class FileTransferService : IFileTransferService { + private const int ListViewThresholdItemCount = 5000; + private const int LargeLibraryPageSize = 500; + /// /// Runs the configured . Enumerates source files /// (unless the job is folder-only), pre-creates destination folders, then @@ -30,12 +35,30 @@ public class FileTransferService : IFileTransferService IProgress progress, CancellationToken ct) { - // 1. Enumerate files from source (unless contents are suppressed). + // 1. Pre-flight: discover library item counts so we can pick a page size + // for source enumeration and warn early that the server-side copy path + // may trip the list-view threshold. The stream fallback in + // TransferSingleFileAsync handles the LVT case transparently, but the + // counts help size-tune enumeration up front. + var srcItemCount = await TryGetListItemCountAsync(sourceCtx, job.SourceLibrary, progress, ct); + var dstItemCount = await TryGetListItemCountAsync(destCtx, job.DestinationLibrary, progress, ct); + Log.Information( + "Transfer pre-flight: source={SrcLib} ({SrcCount} items), dest={DstLib} ({DstCount} items)", + job.SourceLibrary, srcItemCount, job.DestinationLibrary, dstItemCount); + + if (srcItemCount > ListViewThresholdItemCount || dstItemCount > ListViewThresholdItemCount) + { + progress.Report(OperationProgress.Indeterminate( + $"Large library detected (source: {srcItemCount}, dest: {dstItemCount}). " + + "Using paged enumeration and stream-copy fallback when needed.")); + } + + // 2. Enumerate files from source (unless contents are suppressed). IReadOnlyList files; if (job.CopyFolderContents) { progress.Report(new OperationProgress(0, 0, "Enumerating source files...")); - files = await EnumerateFilesAsync(sourceCtx, job, progress, ct); + files = await EnumerateFilesAsync(sourceCtx, job, srcItemCount, progress, ct); } else { @@ -51,7 +74,7 @@ public class FileTransferService : IFileTransferService return new BulkOperationSummary(new List>()); } - // 2. Build source and destination base paths. Resolve library roots via + // 3. Build source and destination base paths. Resolve library roots via // CSOM — constructing from title breaks for localized libraries whose // URL segment differs (e.g. title "Documents" → URL "Shared Documents"), // causing "Access denied" when CSOM tries to touch a non-existent path. @@ -60,6 +83,11 @@ public class FileTransferService : IFileTransferService var dstBasePath = await ResolveLibraryPathAsync( destCtx, job.DestinationLibrary, job.DestinationFolderPath, progress, ct); + // Per-job cache of destination folders we've already ensured. Without + // this, EnsureFolderAsync re-checks .Exists for every file in the same + // folder — thousands of round-trips on a flat directory transfer. + var ensuredFolders = new HashSet(StringComparer.OrdinalIgnoreCase); + // When IncludeSourceFolder is set, recreate the source folder name under // destination so dest/srcFolderName/... mirrors the source tree. When // no SourceFolderPath is set, fall back to the source library name. @@ -74,11 +102,11 @@ public class FileTransferService : IFileTransferService if (!string.IsNullOrEmpty(srcFolderName)) { dstBasePath = $"{dstBasePath}/{srcFolderName}"; - await EnsureFolderAsync(destCtx, dstBasePath, progress, ct); + await EnsureFolderCachedAsync(destCtx, dstBasePath, ensuredFolders, progress, ct); } } - // 3. Transfer each file using BulkOperationRunner + // 4. Transfer each file using BulkOperationRunner return await BulkOperationRunner.RunAsync( files, async (fileRelUrl, idx, token) => @@ -88,13 +116,13 @@ public class FileTransferService : IFileTransferService if (fileRelUrl.StartsWith(srcBasePath, StringComparison.OrdinalIgnoreCase)) relativePart = fileRelUrl.Substring(srcBasePath.Length).TrimStart('/'); - // Ensure destination folder exists + // Ensure destination folder exists (cached) var destFolderRelative = dstBasePath; var fileFolder = Path.GetDirectoryName(relativePart)?.Replace('\\', '/'); if (!string.IsNullOrEmpty(fileFolder)) { destFolderRelative = $"{dstBasePath}/{fileFolder}"; - await EnsureFolderAsync(destCtx, destFolderRelative, progress, token); + await EnsureFolderCachedAsync(destCtx, destFolderRelative, ensuredFolders, progress, token); } var fileName = Path.GetFileName(relativePart); @@ -116,6 +144,32 @@ public class FileTransferService : IFileTransferService TransferJob job, IProgress progress, CancellationToken ct) + { + // Hybrid path: try the server-side MoveCopyUtil first (bytes never + // leave SharePoint). If the destination (or source) library trips the + // list-view threshold, fall back to a stream copy via HTTP-direct APIs + // that bypass list internals. + try + { + await ServerSideTransferAsync(sourceCtx, destCtx, srcFileUrl, dstFileUrl, job, progress, ct); + } + catch (ServerException ex) when (IsListViewThresholdException(ex)) + { + Log.Warning( + "Server-side transfer hit list-view threshold for {File} — falling back to stream copy.", + srcFileUrl); + await StreamTransferAsync(sourceCtx, destCtx, srcFileUrl, dstFileUrl, job, progress, ct); + } + } + + private async Task ServerSideTransferAsync( + ClientContext sourceCtx, + ClientContext destCtx, + string srcFileUrl, + string dstFileUrl, + TransferJob job, + IProgress progress, + CancellationToken ct) { // MoveCopyUtil.CopyFileByPath expects absolute URLs (scheme + host), // not server-relative paths. Passing "/sites/..." silently fails or @@ -153,9 +207,154 @@ public class FileTransferService : IFileTransferService } } + /// + /// Path-based stream copy fallback. Reads the source via + /// and writes + /// to the destination via Folder.Files.Add(FileCreationInformation). + /// Both target a specific folder by path rather than querying list items, + /// so they succeed against libraries that exceed the list-view threshold. + /// Bytes do round-trip through the local machine — this is strictly the + /// fallback when server-side copy is unavailable. + /// + private async Task StreamTransferAsync( + ClientContext sourceCtx, + ClientContext destCtx, + string srcFileUrl, + string dstFileUrl, + TransferJob job, + IProgress progress, + CancellationToken ct) + { + // Resolve the destination file name for conflict handling. Returns null + // when policy=Skip and the file already exists. + var effectiveDestUrl = await ResolveDestinationOnConflictAsync(destCtx, dstFileUrl, job, progress, ct); + if (effectiveDestUrl == null) + { + Log.Warning("Skipped (already exists, stream fallback): {File}", srcFileUrl); + return; + } + + // Rename policy guarantees a free path via ResolveDestinationOnConflictAsync, + // so overwrite is only needed for the explicit Overwrite policy. + bool overwrite = job.ConflictPolicy == ConflictPolicy.Overwrite; + + ct.ThrowIfCancellationRequested(); + + // 1. Download the source bytes into memory. OpenBinaryStream is a + // ClientResult — usable only after ExecuteQuery. + var srcFile = sourceCtx.Web.GetFileByServerRelativeUrl(srcFileUrl); + var streamResult = srcFile.OpenBinaryStream(); + await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(sourceCtx, progress, ct); + + using var buffer = new MemoryStream(); + await streamResult.Value.CopyToAsync(buffer, 81920, ct); + buffer.Position = 0; + + // 2. Upload to the destination folder. Files.Add with ContentStream + // streams the payload in one request and does not touch list-view + // metadata, so it bypasses LVT. + var slash = effectiveDestUrl.LastIndexOf('/'); + var destFolderUrl = effectiveDestUrl.Substring(0, slash); + var destFileName = effectiveDestUrl.Substring(slash + 1); + + var destFolder = destCtx.Web.GetFolderByServerRelativeUrl(destFolderUrl); + var creation = new FileCreationInformation + { + Url = destFileName, + Overwrite = overwrite, + ContentStream = buffer, + }; + destFolder.Files.Add(creation); + await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(destCtx, progress, ct); + + if (job.Mode == TransferMode.Move) + { + // Stream copy cannot atomically move; delete the source after a + // successful upload to honour Move semantics. + var srcDelete = sourceCtx.Web.GetFileByServerRelativeUrl(srcFileUrl); + srcDelete.DeleteObject(); + await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(sourceCtx, progress, ct); + } + } + + /// + /// Honours when the destination + /// path already exists. Returns the URL to write to, or null when + /// the file should be skipped. For , + /// probes name (1).ext, name (2).ext, ... until a free slot + /// is found. + /// + private static async Task ResolveDestinationOnConflictAsync( + ClientContext destCtx, + string dstFileUrl, + TransferJob job, + IProgress progress, + CancellationToken ct) + { + if (job.ConflictPolicy == ConflictPolicy.Overwrite) + return dstFileUrl; + + bool exists = await FileExistsAsync(destCtx, dstFileUrl, progress, ct); + if (!exists) return dstFileUrl; + + if (job.ConflictPolicy == ConflictPolicy.Skip) + return null; + + // Rename: keep both. Append " (n)" before the extension. + var dir = dstFileUrl.Substring(0, dstFileUrl.LastIndexOf('/')); + var leaf = dstFileUrl.Substring(dstFileUrl.LastIndexOf('/') + 1); + var stem = Path.GetFileNameWithoutExtension(leaf); + var ext = Path.GetExtension(leaf); + + for (int n = 1; n <= 999; n++) + { + var candidate = $"{dir}/{stem} ({n}){ext}"; + if (!await FileExistsAsync(destCtx, candidate, progress, ct)) + return candidate; + } + // Extremely unlikely; surface as failure rather than silent overwrite. + throw new InvalidOperationException( + $"Could not find an unused destination filename for {dstFileUrl} after 999 attempts."); + } + + private static async Task FileExistsAsync( + ClientContext ctx, + string fileServerRelativeUrl, + IProgress progress, + CancellationToken ct) + { + try + { + var file = ctx.Web.GetFileByServerRelativeUrl(fileServerRelativeUrl); + ctx.Load(file, f => f.Exists); + await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); + return file.Exists; + } + catch + { + return false; + } + } + + /// + /// Detects SharePoint's list-view-threshold ServerException across locales. + /// English: "exceeds the list view threshold". French: "depasse le seuil + /// d'affichage de liste". German: "Listenansichtsschwellenwert". + /// + internal static bool IsListViewThresholdException(Exception ex) + { + var msg = ex.Message ?? string.Empty; + return msg.Contains("list view threshold", StringComparison.OrdinalIgnoreCase) + || msg.Contains("seuil d'affichage", StringComparison.OrdinalIgnoreCase) + || msg.Contains("seuil d", StringComparison.OrdinalIgnoreCase) && msg.Contains("liste", StringComparison.OrdinalIgnoreCase) + || msg.Contains("Listenansichtsschwellenwert", StringComparison.OrdinalIgnoreCase) + || msg.Contains("umbral de vista de lista", StringComparison.OrdinalIgnoreCase); + } + private async Task> EnumerateFilesAsync( ClientContext ctx, TransferJob job, + int sourceItemCount, IProgress progress, CancellationToken ct) { @@ -226,6 +425,44 @@ public class FileTransferService : IFileTransferService return false; } + private async Task TryGetListItemCountAsync( + ClientContext ctx, + string libraryTitle, + IProgress progress, + CancellationToken ct) + { + try + { + var list = ctx.Web.Lists.GetByTitle(libraryTitle); + ctx.Load(list, l => l.ItemCount); + await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); + return list.ItemCount; + } + catch (Exception ex) + { + // Non-fatal: pre-flight count is purely informational. Treat as + // unknown (-1) so the rest of the pipeline still runs. + Log.Warning("Failed to read ItemCount for {Library}: {Error}", libraryTitle, ex.Message); + return -1; + } + } + + /// + /// EnsureFolderAsync wrapper that records successful checks in a per-job + /// set so the same destination folder isn't re-validated for every file. + /// + private async Task EnsureFolderCachedAsync( + ClientContext ctx, + string folderServerRelativeUrl, + HashSet cache, + IProgress progress, + CancellationToken ct) + { + var normalized = folderServerRelativeUrl.TrimEnd('/'); + if (!cache.Add(normalized)) return; + await EnsureFolderAsync(ctx, normalized, progress, ct); + } + private async Task EnsureFolderAsync( ClientContext ctx, string folderServerRelativeUrl,