diff --git a/SharepointToolbox/Services/DuplicatesService.cs b/SharepointToolbox/Services/DuplicatesService.cs
new file mode 100644
index 0000000..63d6744
--- /dev/null
+++ b/SharepointToolbox/Services/DuplicatesService.cs
@@ -0,0 +1,250 @@
+using Microsoft.SharePoint.Client;
+using Microsoft.SharePoint.Client.Search.Query;
+using SharepointToolbox.Core.Helpers;
+using SharepointToolbox.Core.Models;
+
+namespace SharepointToolbox.Services;
+
+///
+/// Duplicate file and folder detection.
+/// Files: Search API (same KQL engine as SearchService) + client-side composite key grouping.
+/// Folders: CSOM CAML FSObjType=1 via SharePointPaginationHelper + composite key grouping.
+/// Port of PS Find-DuplicateFiles / Find-DuplicateFolders (PS lines 4942-5036).
+///
+public class DuplicatesService : IDuplicatesService
+{
+ private const int BatchSize = 500;
+ private const int MaxStartRow = 50_000;
+
+ public async Task> ScanDuplicatesAsync(
+ ClientContext ctx,
+ DuplicateScanOptions options,
+ IProgress progress,
+ CancellationToken ct)
+ {
+ ct.ThrowIfCancellationRequested();
+
+ List allItems;
+
+ if (options.Mode == "Folders")
+ allItems = await CollectFolderItemsAsync(ctx, options, progress, ct);
+ else
+ allItems = await CollectFileItemsAsync(ctx, options, progress, ct);
+
+ progress.Report(OperationProgress.Indeterminate($"Grouping {allItems.Count:N0} items by duplicate key\u2026"));
+
+ var groups = allItems
+ .GroupBy(item => MakeKey(item, options))
+ .Where(g => g.Count() >= 2)
+ .Select(g => new DuplicateGroup
+ {
+ GroupKey = g.Key,
+ Name = g.First().Name,
+ Items = g.ToList()
+ })
+ .OrderByDescending(g => g.Items.Count)
+ .ThenBy(g => g.Name)
+ .ToList();
+
+ return groups;
+ }
+
+ // ── File collection via Search API ────────────────────────────────────────
+
+ private static async Task> CollectFileItemsAsync(
+ ClientContext ctx,
+ DuplicateScanOptions options,
+ IProgress progress,
+ CancellationToken ct)
+ {
+ // KQL: all documents, optionally scoped to a library
+ var kqlParts = new List { "ContentType:Document" };
+ if (!string.IsNullOrEmpty(options.Library))
+ kqlParts.Add($"Path:\"{ctx.Url.TrimEnd('/')}/{options.Library.TrimStart('/')}*\"");
+ string kql = string.Join(" AND ", kqlParts);
+
+ var allItems = new List();
+ int startRow = 0;
+
+ do
+ {
+ ct.ThrowIfCancellationRequested();
+
+ var kq = new KeywordQuery(ctx)
+ {
+ QueryText = kql,
+ StartRow = startRow,
+ RowLimit = BatchSize,
+ TrimDuplicates = false
+ };
+ foreach (var prop in new[] { "Title", "Path", "FileExtension", "Created",
+ "LastModifiedTime", "Size", "ParentLink" })
+ kq.SelectProperties.Add(prop);
+
+ var executor = new SearchExecutor(ctx);
+ ClientResult clientResult = executor.ExecuteQuery(kq);
+ await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
+
+ var table = clientResult.Value
+ .FirstOrDefault(t => t.TableType == KnownTableTypes.RelevantResults);
+ if (table == null || table.RowCount == 0) break;
+
+ foreach (System.Collections.Hashtable row in table.ResultRows)
+ {
+ var dict = row.Cast()
+ .ToDictionary(e => e.Key.ToString()!, e => e.Value ?? (object)string.Empty);
+
+ string path = GetStr(dict, "Path");
+ if (path.Contains("/_vti_history/", StringComparison.OrdinalIgnoreCase))
+ continue;
+
+ string name = System.IO.Path.GetFileName(path);
+ if (string.IsNullOrEmpty(name))
+ name = GetStr(dict, "Title");
+
+ string raw = GetStr(dict, "Size");
+ string digits = System.Text.RegularExpressions.Regex.Replace(raw, "[^0-9]", "");
+ long size = long.TryParse(digits, out var sv) ? sv : 0L;
+
+ DateTime? created = ParseDate(GetStr(dict, "Created"));
+ DateTime? modified = ParseDate(GetStr(dict, "LastModifiedTime"));
+
+ // Derive library from path segments
+ string library = ExtractLibraryFromPath(path, ctx.Url);
+
+ allItems.Add(new DuplicateItem
+ {
+ Name = name,
+ Path = path,
+ Library = library,
+ SizeBytes = size,
+ Created = created,
+ Modified = modified
+ });
+ }
+
+ progress.Report(new OperationProgress(allItems.Count, MaxStartRow,
+ $"Collected {allItems.Count:N0} files\u2026"));
+
+ startRow += BatchSize;
+ }
+ while (startRow <= MaxStartRow);
+
+ return allItems;
+ }
+
+ // ── Folder collection via CAML ────────────────────────────────────────────
+
+ private static async Task> CollectFolderItemsAsync(
+ ClientContext ctx,
+ DuplicateScanOptions options,
+ IProgress progress,
+ CancellationToken ct)
+ {
+ // Load all document libraries on the site
+ ctx.Load(ctx.Web,
+ w => w.Lists.Include(
+ l => l.Title, l => l.Hidden, l => l.BaseType));
+ await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
+
+ var libs = ctx.Web.Lists
+ .Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary)
+ .ToList();
+
+ // Filter to specific library if requested
+ if (!string.IsNullOrEmpty(options.Library))
+ {
+ libs = libs
+ .Where(l => l.Title.Equals(options.Library, StringComparison.OrdinalIgnoreCase))
+ .ToList();
+ }
+
+ var camlQuery = new CamlQuery
+ {
+ ViewXml = """
+
+
+
+
+
+ 1
+
+
+
+ 2000
+
+ """
+ };
+
+ var allItems = new List();
+
+ foreach (var lib in libs)
+ {
+ ct.ThrowIfCancellationRequested();
+ progress.Report(OperationProgress.Indeterminate($"Scanning folders in {lib.Title}\u2026"));
+
+ await foreach (var item in SharePointPaginationHelper.GetAllItemsAsync(ctx, lib, camlQuery, ct))
+ {
+ ct.ThrowIfCancellationRequested();
+
+ var fv = item.FieldValues;
+ string name = fv["FileLeafRef"]?.ToString() ?? string.Empty;
+ string fileRef = fv["FileRef"]?.ToString() ?? string.Empty;
+ int subCount = Convert.ToInt32(fv["FolderChildCount"] ?? 0);
+ int childCount = Convert.ToInt32(fv["ItemChildCount"] ?? 0);
+ int fileCount = Math.Max(0, childCount - subCount);
+ DateTime? created = fv["Created"] is DateTime cr ? cr : (DateTime?)null;
+ DateTime? modified = fv["Modified"] is DateTime md ? md : (DateTime?)null;
+
+ allItems.Add(new DuplicateItem
+ {
+ Name = name,
+ Path = fileRef,
+ Library = lib.Title,
+ FolderCount = subCount,
+ FileCount = fileCount,
+ Created = created,
+ Modified = modified
+ });
+ }
+ }
+
+ return allItems;
+ }
+
+ // ── Composite key builder (matches test scaffold in DuplicatesServiceTests) ──
+
+ internal static string MakeKey(DuplicateItem item, DuplicateScanOptions opts)
+ {
+ var parts = new List { item.Name.ToLowerInvariant() };
+ if (opts.MatchSize && item.SizeBytes.HasValue) parts.Add(item.SizeBytes.Value.ToString());
+ if (opts.MatchCreated && item.Created.HasValue) parts.Add(item.Created.Value.Date.ToString("yyyy-MM-dd"));
+ if (opts.MatchModified && item.Modified.HasValue) parts.Add(item.Modified.Value.Date.ToString("yyyy-MM-dd"));
+ if (opts.MatchSubfolderCount && item.FolderCount.HasValue) parts.Add(item.FolderCount.Value.ToString());
+ if (opts.MatchFileCount && item.FileCount.HasValue) parts.Add(item.FileCount.Value.ToString());
+ return string.Join("|", parts);
+ }
+
+ // ── Private utilities ─────────────────────────────────────────────────────
+
+ private static string GetStr(IDictionary r, string key) =>
+ r.TryGetValue(key, out var v) ? v?.ToString() ?? string.Empty : string.Empty;
+
+ private static DateTime? ParseDate(string s) =>
+ DateTime.TryParse(s, out var dt) ? dt : (DateTime?)null;
+
+ private static string ExtractLibraryFromPath(string path, string siteUrl)
+ {
+ // Extract first path segment after the site URL as library name
+ // e.g. https://tenant.sharepoint.com/sites/MySite/Shared Documents/file.docx -> "Shared Documents"
+ if (string.IsNullOrEmpty(path) || string.IsNullOrEmpty(siteUrl))
+ return string.Empty;
+
+ string relative = path.StartsWith(siteUrl.TrimEnd('/'), StringComparison.OrdinalIgnoreCase)
+ ? path.Substring(siteUrl.TrimEnd('/').Length).TrimStart('/')
+ : path;
+
+ int slash = relative.IndexOf('/');
+ return slash > 0 ? relative.Substring(0, slash) : relative;
+ }
+}