using System.IO;
using Microsoft.SharePoint.Client;
using Serilog;
using SharepointToolbox.Core.Helpers;
using SharepointToolbox.Core.Models;
namespace SharepointToolbox.Services;
///
/// Orchestrates file copy/move between two SharePoint libraries (same or
/// different tenants). Hybrid strategy: server-side
/// first (zero local bandwidth), then transparent fallback to stream copy
/// (OpenBinaryDirect/SaveBinaryDirect) on a list-view-threshold
/// failure so transfers still succeed against libraries above the 5,000-item
/// cap. Folder enumeration uses paged CAML; folder creation is cached per job
/// to avoid re-checking the same path for every file.
///
public class FileTransferService : IFileTransferService
{
private const int ListViewThresholdItemCount = 5000;
private const int LargeLibraryPageSize = 500;
///
/// Runs the configured . Enumerates source files
/// (unless the job is folder-only), pre-creates destination folders, then
/// copies or moves each file according to
/// and . Returns a per-item
/// summary where failures are reported individually — the method does
/// not abort on first error so partial transfers are recoverable.
///
public async Task> TransferAsync(
ClientContext sourceCtx,
ClientContext destCtx,
TransferJob job,
IProgress progress,
CancellationToken ct)
{
// 1. Pre-flight: discover library item counts so we can pick a page size
// for source enumeration and warn early that the server-side copy path
// may trip the list-view threshold. The stream fallback in
// TransferSingleFileAsync handles the LVT case transparently, but the
// counts help size-tune enumeration up front.
var srcItemCount = await TryGetListItemCountAsync(sourceCtx, job.SourceLibrary, progress, ct);
var dstItemCount = await TryGetListItemCountAsync(destCtx, job.DestinationLibrary, progress, ct);
Log.Information(
"Transfer pre-flight: source={SrcLib} ({SrcCount} items), dest={DstLib} ({DstCount} items)",
job.SourceLibrary, srcItemCount, job.DestinationLibrary, dstItemCount);
if (srcItemCount > ListViewThresholdItemCount || dstItemCount > ListViewThresholdItemCount)
{
progress.Report(OperationProgress.Indeterminate(
$"Large library detected (source: {srcItemCount}, dest: {dstItemCount}). " +
"Using paged enumeration and stream-copy fallback when needed."));
}
// 2. Enumerate files from source (unless contents are suppressed).
IReadOnlyList files;
if (job.CopyFolderContents)
{
progress.Report(new OperationProgress(0, 0, "Enumerating source files..."));
files = await EnumerateFilesAsync(sourceCtx, job, srcItemCount, progress, ct);
}
else
{
files = Array.Empty();
}
// When CopyFolderContents is off, the job is folder-only: ensure the
// destination folder is created below (IncludeSourceFolder branch) and
// return without iterating any files.
if (files.Count == 0 && !job.IncludeSourceFolder)
{
progress.Report(new OperationProgress(0, 0, "No files found to transfer."));
return new BulkOperationSummary(new List>());
}
// 3. Build source and destination base paths. Resolve library roots via
// CSOM — constructing from title breaks for localized libraries whose
// URL segment differs (e.g. title "Documents" → URL "Shared Documents"),
// causing "Access denied" when CSOM tries to touch a non-existent path.
var srcBasePath = await ResolveLibraryPathAsync(
sourceCtx, job.SourceLibrary, job.SourceFolderPath, progress, ct);
var dstBasePath = await ResolveLibraryPathAsync(
destCtx, job.DestinationLibrary, job.DestinationFolderPath, progress, ct);
// Per-job cache of destination folders we've already ensured. Without
// this, EnsureFolderAsync re-checks .Exists for every file in the same
// folder — thousands of round-trips on a flat directory transfer.
var ensuredFolders = new HashSet(StringComparer.OrdinalIgnoreCase);
// When IncludeSourceFolder is set, recreate the source folder name under
// destination so dest/srcFolderName/... mirrors the source tree. When
// no SourceFolderPath is set, fall back to the source library name.
// Also pre-create the folder itself — per-file EnsureFolder only fires
// for nested paths, so flat files at the root of the source folder
// would otherwise copy into a missing parent and fail.
if (job.IncludeSourceFolder)
{
var srcFolderName = !string.IsNullOrEmpty(job.SourceFolderPath)
? Path.GetFileName(job.SourceFolderPath.TrimEnd('/'))
: job.SourceLibrary;
if (!string.IsNullOrEmpty(srcFolderName))
{
dstBasePath = $"{dstBasePath}/{srcFolderName}";
await EnsureFolderCachedAsync(destCtx, dstBasePath, ensuredFolders, progress, ct);
}
}
// 4. Transfer each file using BulkOperationRunner
return await BulkOperationRunner.RunAsync(
files,
async (fileRelUrl, idx, token) =>
{
// Compute destination path by replacing source base with dest base
var relativePart = fileRelUrl;
if (fileRelUrl.StartsWith(srcBasePath, StringComparison.OrdinalIgnoreCase))
relativePart = fileRelUrl.Substring(srcBasePath.Length).TrimStart('/');
// Ensure destination folder exists (cached)
var destFolderRelative = dstBasePath;
var fileFolder = Path.GetDirectoryName(relativePart)?.Replace('\\', '/');
if (!string.IsNullOrEmpty(fileFolder))
{
destFolderRelative = $"{dstBasePath}/{fileFolder}";
await EnsureFolderCachedAsync(destCtx, destFolderRelative, ensuredFolders, progress, token);
}
var fileName = Path.GetFileName(relativePart);
var destFileUrl = $"{destFolderRelative}/{fileName}";
await TransferSingleFileAsync(sourceCtx, destCtx, fileRelUrl, destFileUrl, job, progress, token);
Log.Information("Transferred: {Source} -> {Dest}", fileRelUrl, destFileUrl);
},
progress,
ct);
}
private async Task TransferSingleFileAsync(
ClientContext sourceCtx,
ClientContext destCtx,
string srcFileUrl,
string dstFileUrl,
TransferJob job,
IProgress progress,
CancellationToken ct)
{
// Hybrid path: try the server-side MoveCopyUtil first (bytes never
// leave SharePoint). If the destination (or source) library trips the
// list-view threshold, fall back to a stream copy via HTTP-direct APIs
// that bypass list internals.
try
{
await ServerSideTransferAsync(sourceCtx, destCtx, srcFileUrl, dstFileUrl, job, progress, ct);
}
catch (ServerException ex) when (IsListViewThresholdException(ex))
{
Log.Warning(
"Server-side transfer hit list-view threshold for {File} — falling back to stream copy.",
srcFileUrl);
await StreamTransferAsync(sourceCtx, destCtx, srcFileUrl, dstFileUrl, job, progress, ct);
}
}
private async Task ServerSideTransferAsync(
ClientContext sourceCtx,
ClientContext destCtx,
string srcFileUrl,
string dstFileUrl,
TransferJob job,
IProgress progress,
CancellationToken ct)
{
// MoveCopyUtil.CopyFileByPath expects absolute URLs (scheme + host),
// not server-relative paths. Passing "/sites/..." silently fails or
// returns no error yet copies nothing — especially across site
// collections. Prefix with the owning site's scheme+host.
var srcAbs = ToAbsoluteUrl(sourceCtx, srcFileUrl);
var dstAbs = ToAbsoluteUrl(destCtx, dstFileUrl);
var srcPath = ResourcePath.FromDecodedUrl(srcAbs);
var dstPath = ResourcePath.FromDecodedUrl(dstAbs);
bool overwrite = job.ConflictPolicy == ConflictPolicy.Overwrite;
var options = new MoveCopyOptions
{
KeepBoth = job.ConflictPolicy == ConflictPolicy.Rename,
ResetAuthorAndCreatedOnCopy = false, // best-effort metadata preservation
};
try
{
if (job.Mode == TransferMode.Copy)
{
MoveCopyUtil.CopyFileByPath(sourceCtx, srcPath, dstPath, overwrite, options);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(sourceCtx, progress, ct);
}
else // Move
{
MoveCopyUtil.MoveFileByPath(sourceCtx, srcPath, dstPath, overwrite, options);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(sourceCtx, progress, ct);
}
}
catch (ServerException ex) when (job.ConflictPolicy == ConflictPolicy.Skip &&
ex.Message.Contains("already exists", StringComparison.OrdinalIgnoreCase))
{
Log.Warning("Skipped (already exists): {File}", srcFileUrl);
}
}
///
/// Path-based stream copy fallback. Reads the source via
/// and writes
/// to the destination via Folder.Files.Add(FileCreationInformation).
/// Both target a specific folder by path rather than querying list items,
/// so they succeed against libraries that exceed the list-view threshold.
/// Bytes do round-trip through the local machine — this is strictly the
/// fallback when server-side copy is unavailable.
///
private async Task StreamTransferAsync(
ClientContext sourceCtx,
ClientContext destCtx,
string srcFileUrl,
string dstFileUrl,
TransferJob job,
IProgress progress,
CancellationToken ct)
{
// Resolve the destination file name for conflict handling. Returns null
// when policy=Skip and the file already exists.
var effectiveDestUrl = await ResolveDestinationOnConflictAsync(destCtx, dstFileUrl, job, progress, ct);
if (effectiveDestUrl == null)
{
Log.Warning("Skipped (already exists, stream fallback): {File}", srcFileUrl);
return;
}
// Rename policy guarantees a free path via ResolveDestinationOnConflictAsync,
// so overwrite is only needed for the explicit Overwrite policy.
bool overwrite = job.ConflictPolicy == ConflictPolicy.Overwrite;
ct.ThrowIfCancellationRequested();
// 1. Download the source bytes into memory. OpenBinaryStream is a
// ClientResult — usable only after ExecuteQuery.
var srcFile = sourceCtx.Web.GetFileByServerRelativeUrl(srcFileUrl);
var streamResult = srcFile.OpenBinaryStream();
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(sourceCtx, progress, ct);
using var buffer = new MemoryStream();
await streamResult.Value.CopyToAsync(buffer, 81920, ct);
buffer.Position = 0;
// 2. Upload to the destination folder. Files.Add with ContentStream
// streams the payload in one request and does not touch list-view
// metadata, so it bypasses LVT.
var slash = effectiveDestUrl.LastIndexOf('/');
var destFolderUrl = effectiveDestUrl.Substring(0, slash);
var destFileName = effectiveDestUrl.Substring(slash + 1);
var destFolder = destCtx.Web.GetFolderByServerRelativeUrl(destFolderUrl);
var creation = new FileCreationInformation
{
Url = destFileName,
Overwrite = overwrite,
ContentStream = buffer,
};
destFolder.Files.Add(creation);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(destCtx, progress, ct);
if (job.Mode == TransferMode.Move)
{
// Stream copy cannot atomically move; delete the source after a
// successful upload to honour Move semantics.
var srcDelete = sourceCtx.Web.GetFileByServerRelativeUrl(srcFileUrl);
srcDelete.DeleteObject();
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(sourceCtx, progress, ct);
}
}
///
/// Honours when the destination
/// path already exists. Returns the URL to write to, or null when
/// the file should be skipped. For ,
/// probes name (1).ext, name (2).ext, ... until a free slot
/// is found.
///
private static async Task ResolveDestinationOnConflictAsync(
ClientContext destCtx,
string dstFileUrl,
TransferJob job,
IProgress progress,
CancellationToken ct)
{
if (job.ConflictPolicy == ConflictPolicy.Overwrite)
return dstFileUrl;
bool exists = await FileExistsAsync(destCtx, dstFileUrl, progress, ct);
if (!exists) return dstFileUrl;
if (job.ConflictPolicy == ConflictPolicy.Skip)
return null;
// Rename: keep both. Append " (n)" before the extension.
var dir = dstFileUrl.Substring(0, dstFileUrl.LastIndexOf('/'));
var leaf = dstFileUrl.Substring(dstFileUrl.LastIndexOf('/') + 1);
var stem = Path.GetFileNameWithoutExtension(leaf);
var ext = Path.GetExtension(leaf);
for (int n = 1; n <= 999; n++)
{
var candidate = $"{dir}/{stem} ({n}){ext}";
if (!await FileExistsAsync(destCtx, candidate, progress, ct))
return candidate;
}
// Extremely unlikely; surface as failure rather than silent overwrite.
throw new InvalidOperationException(
$"Could not find an unused destination filename for {dstFileUrl} after 999 attempts.");
}
private static async Task FileExistsAsync(
ClientContext ctx,
string fileServerRelativeUrl,
IProgress progress,
CancellationToken ct)
{
try
{
var file = ctx.Web.GetFileByServerRelativeUrl(fileServerRelativeUrl);
ctx.Load(file, f => f.Exists);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
return file.Exists;
}
catch
{
return false;
}
}
///
/// Detects SharePoint's list-view-threshold ServerException across locales.
/// English: "exceeds the list view threshold". French: "depasse le seuil
/// d'affichage de liste". German: "Listenansichtsschwellenwert".
///
internal static bool IsListViewThresholdException(Exception ex)
{
var msg = ex.Message ?? string.Empty;
return msg.Contains("list view threshold", StringComparison.OrdinalIgnoreCase)
|| msg.Contains("seuil d'affichage", StringComparison.OrdinalIgnoreCase)
|| msg.Contains("seuil d", StringComparison.OrdinalIgnoreCase) && msg.Contains("liste", StringComparison.OrdinalIgnoreCase)
|| msg.Contains("Listenansichtsschwellenwert", StringComparison.OrdinalIgnoreCase)
|| msg.Contains("umbral de vista de lista", StringComparison.OrdinalIgnoreCase);
}
private async Task> EnumerateFilesAsync(
ClientContext ctx,
TransferJob job,
int sourceItemCount,
IProgress progress,
CancellationToken ct)
{
var list = ctx.Web.Lists.GetByTitle(job.SourceLibrary);
var rootFolder = list.RootFolder;
ctx.Load(rootFolder, f => f.ServerRelativeUrl);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
var libraryRoot = rootFolder.ServerRelativeUrl.TrimEnd('/');
// Explicit per-file selection overrides folder enumeration. Paths are
// library-relative (e.g. "SubFolder/file.docx") and get resolved to
// full server-relative URLs here.
if (job.SelectedFilePaths.Count > 0)
{
return job.SelectedFilePaths
.Where(p => !string.IsNullOrWhiteSpace(p))
.Select(p => $"{libraryRoot}/{p.TrimStart('/')}")
.ToList();
}
var baseFolderUrl = libraryRoot;
if (!string.IsNullOrEmpty(job.SourceFolderPath))
baseFolderUrl = $"{baseFolderUrl}/{job.SourceFolderPath.TrimStart('/')}";
// Paginated recursive CAML query — Folder.Files / Folder.Folders lazy
// loading hits the list-view threshold on libraries > 5,000 items.
var files = new List();
await foreach (var item in SharePointPaginationHelper.GetItemsInFolderAsync(
ctx, list, baseFolderUrl, recursive: true,
viewFields: new[] { "FSObjType", "FileRef", "FileDirRef" },
ct: ct))
{
ct.ThrowIfCancellationRequested();
if (item["FSObjType"]?.ToString() != "0") continue; // files only
var fileRef = item["FileRef"]?.ToString();
if (string.IsNullOrEmpty(fileRef)) continue;
// Skip files under SharePoint system folders (e.g. "Forms", "_*").
var dir = item["FileDirRef"]?.ToString() ?? string.Empty;
if (HasSystemFolderSegment(dir, baseFolderUrl)) continue;
files.Add(fileRef);
}
return files;
}
private static bool HasSystemFolderSegment(string fileDirRef, string baseFolderUrl)
{
if (string.IsNullOrEmpty(fileDirRef)) return false;
var baseTrim = baseFolderUrl.TrimEnd('/');
if (!fileDirRef.StartsWith(baseTrim, StringComparison.OrdinalIgnoreCase))
return false;
var tail = fileDirRef.Substring(baseTrim.Length).Trim('/');
if (string.IsNullOrEmpty(tail)) return false;
foreach (var seg in tail.Split('/', StringSplitOptions.RemoveEmptyEntries))
{
if (seg.StartsWith("_", StringComparison.Ordinal) ||
seg.Equals("Forms", StringComparison.OrdinalIgnoreCase))
return true;
}
return false;
}
private async Task TryGetListItemCountAsync(
ClientContext ctx,
string libraryTitle,
IProgress progress,
CancellationToken ct)
{
try
{
var list = ctx.Web.Lists.GetByTitle(libraryTitle);
ctx.Load(list, l => l.ItemCount);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
return list.ItemCount;
}
catch (Exception ex)
{
// Non-fatal: pre-flight count is purely informational. Treat as
// unknown (-1) so the rest of the pipeline still runs.
Log.Warning("Failed to read ItemCount for {Library}: {Error}", libraryTitle, ex.Message);
return -1;
}
}
///
/// EnsureFolderAsync wrapper that records successful checks in a per-job
/// set so the same destination folder isn't re-validated for every file.
///
private async Task EnsureFolderCachedAsync(
ClientContext ctx,
string folderServerRelativeUrl,
HashSet cache,
IProgress progress,
CancellationToken ct)
{
var normalized = folderServerRelativeUrl.TrimEnd('/');
if (!cache.Add(normalized)) return;
await EnsureFolderAsync(ctx, normalized, progress, ct);
}
private async Task EnsureFolderAsync(
ClientContext ctx,
string folderServerRelativeUrl,
IProgress progress,
CancellationToken ct)
{
folderServerRelativeUrl = folderServerRelativeUrl.TrimEnd('/');
// Already there?
try
{
var existing = ctx.Web.GetFolderByServerRelativeUrl(folderServerRelativeUrl);
ctx.Load(existing, f => f.Exists);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
if (existing.Exists) return;
}
catch { /* not present — fall through to creation */ }
// Walk the path, creating each missing segment. `Web.Folders.Add(url)` is
// ambiguous across CSOM versions (some treat the arg as relative to Web,
// others server-relative), which produces bogus paths + "Access denied".
// Resolve the parent explicitly and add only the leaf name instead.
int slash = folderServerRelativeUrl.LastIndexOf('/');
if (slash <= 0) return;
var parentUrl = folderServerRelativeUrl.Substring(0, slash);
var leafName = folderServerRelativeUrl.Substring(slash + 1);
if (string.IsNullOrEmpty(leafName)) return;
// Recurse to guarantee the parent exists first.
await EnsureFolderAsync(ctx, parentUrl, progress, ct);
var parent = ctx.Web.GetFolderByServerRelativeUrl(parentUrl);
parent.Folders.Add(leafName);
try
{
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
}
catch (Exception ex)
{
Log.Warning("EnsureFolder failed at {Parent}/{Leaf}: {Error}",
parentUrl, leafName, ex.Message);
throw;
}
}
private static string ToAbsoluteUrl(ClientContext ctx, string pathOrUrl)
{
if (pathOrUrl.StartsWith("http://", StringComparison.OrdinalIgnoreCase) ||
pathOrUrl.StartsWith("https://", StringComparison.OrdinalIgnoreCase))
return pathOrUrl;
var uri = new Uri(ctx.Url);
return $"{uri.Scheme}://{uri.Host}{(pathOrUrl.StartsWith("/") ? "" : "/")}{pathOrUrl}";
}
private static async Task ResolveLibraryPathAsync(
ClientContext ctx,
string libraryTitle,
string relativeFolderPath,
IProgress progress,
CancellationToken ct)
{
var list = ctx.Web.Lists.GetByTitle(libraryTitle);
ctx.Load(list, l => l.RootFolder.ServerRelativeUrl);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
var basePath = list.RootFolder.ServerRelativeUrl.TrimEnd('/');
if (!string.IsNullOrEmpty(relativeFolderPath))
basePath = $"{basePath}/{relativeFolderPath.TrimStart('/')}";
return basePath;
}
}