using System.IO; using Microsoft.SharePoint.Client; using SharepointToolbox.Core.Helpers; using SharepointToolbox.Core.Models; namespace SharepointToolbox.Services; /// /// CSOM-based storage metrics scanner. /// Port of PowerShell Collect-FolderStorage / Get-PnPFolderStorageMetric pattern. /// public class StorageService : IStorageService { /// /// Collects per-library and per-folder storage metrics for a single /// SharePoint site. Depth and indentation are controlled via /// ; libraries flagged Hidden are skipped. /// Traversal is breadth-first and leans on /// so libraries above the 5,000-item threshold remain scannable. /// public async Task> CollectStorageAsync( ClientContext ctx, StorageScanOptions options, IProgress progress, CancellationToken ct) { ct.ThrowIfCancellationRequested(); // Load web-level metadata in one round-trip ctx.Load(ctx.Web, w => w.Title, w => w.Url, w => w.ServerRelativeUrl, w => w.Lists.Include( l => l.Title, l => l.Hidden, l => l.BaseType, l => l.RootFolder.ServerRelativeUrl)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); string webSrl = ctx.Web.ServerRelativeUrl.TrimEnd('/'); string siteTitle = ctx.Web.Title; var result = new List(); var libs = ctx.Web.Lists .Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary) .ToList(); int idx = 0; foreach (var lib in libs) { ct.ThrowIfCancellationRequested(); idx++; progress.Report(new OperationProgress(idx, libs.Count, $"Loading storage metrics: {lib.Title} ({idx}/{libs.Count})")); var libNode = await LoadFolderNodeAsync( ctx, lib.RootFolder.ServerRelativeUrl, lib.Title, siteTitle, lib.Title, 0, progress, ct); if (options.FolderDepth > 0) { await CollectSubfoldersAsync( ctx, lib, lib.RootFolder.ServerRelativeUrl, libNode, 1, options.FolderDepth, siteTitle, lib.Title, progress, ct); } result.Add(libNode); } return result; } /// /// Aggregates file counts and total sizes by extension across every /// non-hidden document library on the site. Extensions are normalised to /// lowercase; files without an extension roll up into a single bucket. /// public async Task> CollectFileTypeMetricsAsync( ClientContext ctx, IProgress progress, CancellationToken ct) { ct.ThrowIfCancellationRequested(); // Load all non-hidden document libraries ctx.Load(ctx.Web, w => w.Lists.Include( l => l.Title, l => l.Hidden, l => l.BaseType, l => l.ItemCount)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); var libs = ctx.Web.Lists .Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary) .ToList(); // Accumulate file sizes by extension across all libraries var extensionMap = new Dictionary(StringComparer.OrdinalIgnoreCase); int libIdx = 0; foreach (var lib in libs) { ct.ThrowIfCancellationRequested(); libIdx++; progress.Report(new OperationProgress(libIdx, libs.Count, $"Scanning files by type: {lib.Title} ({libIdx}/{libs.Count})")); // Paginated CAML without a WHERE clause — WHERE on non-indexed fields // (FSObjType) throws list-view threshold on libraries > 5,000 items. // Filter files client-side via FSObjType. var query = new CamlQuery { ViewXml = @" 5000 " }; ListItemCollection items; do { ct.ThrowIfCancellationRequested(); items = lib.GetItems(query); ctx.Load(items, ic => ic.ListItemCollectionPosition, ic => ic.Include( i => i["FSObjType"], i => i["FileLeafRef"], i => i["File_x0020_Size"])); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); foreach (var item in items) { if (item["FSObjType"]?.ToString() != "0") continue; // skip folders string fileName = item["FileLeafRef"]?.ToString() ?? string.Empty; string sizeStr = item["File_x0020_Size"]?.ToString() ?? "0"; if (!long.TryParse(sizeStr, out long fileSize)) fileSize = 0; string ext = Path.GetExtension(fileName).ToLowerInvariant(); if (extensionMap.TryGetValue(ext, out var existing)) extensionMap[ext] = (existing.totalSize + fileSize, existing.count + 1); else extensionMap[ext] = (fileSize, 1); } query.ListItemCollectionPosition = items.ListItemCollectionPosition; } while (items.ListItemCollectionPosition != null); } // Convert to FileTypeMetric list, sorted by size descending return extensionMap .Select(kvp => new FileTypeMetric(kvp.Key, kvp.Value.totalSize, kvp.Value.count)) .OrderByDescending(m => m.TotalSizeBytes) .ToList(); } public async Task BackfillZeroNodesAsync( ClientContext ctx, IReadOnlyList nodes, IProgress progress, CancellationToken ct) { // Find root-level library nodes that have any zero-valued nodes in their tree var libNodes = nodes.Where(n => n.IndentLevel == 0).ToList(); var needsBackfill = libNodes.Where(lib => lib.TotalFileCount == 0 || HasZeroChild(lib)).ToList(); if (needsBackfill.Count == 0) return; // Load libraries to get RootFolder.ServerRelativeUrl for path matching ctx.Load(ctx.Web, w => w.ServerRelativeUrl, w => w.Lists.Include( l => l.Title, l => l.Hidden, l => l.BaseType, l => l.RootFolder.ServerRelativeUrl)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); var libs = ctx.Web.Lists .Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary) .ToDictionary(l => l.Title, StringComparer.OrdinalIgnoreCase); int idx = 0; foreach (var libNode in needsBackfill) { ct.ThrowIfCancellationRequested(); idx++; if (!libs.TryGetValue(libNode.Name, out var lib)) continue; progress.Report(new OperationProgress(idx, needsBackfill.Count, $"Counting files: {libNode.Name} ({idx}/{needsBackfill.Count})")); string libRootSrl = lib.RootFolder.ServerRelativeUrl.TrimEnd('/'); // Build a lookup of all folder nodes in this library's tree (by server-relative path) var folderLookup = new Dictionary(StringComparer.OrdinalIgnoreCase); BuildFolderLookup(libNode, libRootSrl, folderLookup); // Capture original TotalSizeBytes before reset — StorageMetrics.TotalSize // includes version overhead, which cannot be rederived from a file scan // (File_x0020_Size is the current stream size only). var originalTotals = new Dictionary(); CaptureTotals(libNode, originalTotals); // Reset all nodes in this tree to zero before accumulating ResetNodeCounts(libNode); // Paginated CAML without WHERE (filter folders client-side via FSObjType). // SMTotalSize = per-file total including all versions (version-aware). // SMTotalFileStreamSize = current stream only. File_x0020_Size is a fallback // when SMTotalSize is unavailable (older tenants / custom fields stripped). var query = new CamlQuery { ViewXml = @" 5000 " }; ListItemCollection items; do { ct.ThrowIfCancellationRequested(); items = lib.GetItems(query); ctx.Load(items, ic => ic.ListItemCollectionPosition, ic => ic.Include( i => i["FSObjType"], i => i["FileDirRef"], i => i["File_x0020_Size"], i => i["SMTotalSize"], i => i["SMTotalFileStreamSize"])); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); foreach (var item in items) { if (item["FSObjType"]?.ToString() != "0") continue; // skip folders long streamSize = ParseLong(item["File_x0020_Size"]); long smStream = ParseLong(SafeGet(item, "SMTotalFileStreamSize")); long smTotal = ParseLong(SafeGet(item, "SMTotalSize")); // Prefer SM fields when present; fall back to File_x0020_Size otherwise. if (smStream > 0) streamSize = smStream; long totalSize = smTotal > 0 ? smTotal : streamSize; string fileDirRef = item["FileDirRef"]?.ToString() ?? ""; // Always count toward the library root libNode.TotalSizeBytes += totalSize; libNode.FileStreamSizeBytes += streamSize; libNode.TotalFileCount++; // Also count toward the most specific matching subfolder var matchedFolder = FindDeepestFolder(fileDirRef, folderLookup); if (matchedFolder != null && matchedFolder != libNode) { matchedFolder.TotalSizeBytes += totalSize; matchedFolder.FileStreamSizeBytes += streamSize; matchedFolder.TotalFileCount++; } } query.ListItemCollectionPosition = items.ListItemCollectionPosition; } while (items.ListItemCollectionPosition != null); // Restore original TotalSizeBytes where it exceeded the recomputed value. // Preserves StorageMetrics.TotalSize for nodes whose original metrics were // valid but SMTotalSize was missing on individual files. foreach (var kv in originalTotals) { if (kv.Value > kv.Key.TotalSizeBytes) kv.Key.TotalSizeBytes = kv.Value; } } } private static long ParseLong(object? value) { if (value == null) return 0; return long.TryParse(value.ToString(), out long n) ? n : 0; } private static object? SafeGet(ListItem item, string fieldName) { try { return item[fieldName]; } catch { return null; } } private static void CaptureTotals(StorageNode node, Dictionary map) { map[node] = node.TotalSizeBytes; foreach (var child in node.Children) CaptureTotals(child, map); } private static bool HasZeroChild(StorageNode node) { foreach (var child in node.Children) { if (child.TotalFileCount == 0) return true; if (HasZeroChild(child)) return true; } return false; } private static void ResetNodeCounts(StorageNode node) { node.TotalSizeBytes = 0; node.FileStreamSizeBytes = 0; node.TotalFileCount = 0; foreach (var child in node.Children) ResetNodeCounts(child); } private static void BuildFolderLookup(StorageNode node, string parentPath, Dictionary lookup) { string nodePath = node.IndentLevel == 0 ? parentPath : parentPath + "/" + node.Name; lookup[nodePath] = node; foreach (var child in node.Children) BuildFolderLookup(child, nodePath, lookup); } private static StorageNode? FindDeepestFolder(string fileDirRef, Dictionary lookup) { // fileDirRef is the server-relative folder path, e.g. "/sites/hr/Shared Documents/Reports" // Try exact match, then walk up until we find a match string path = fileDirRef.TrimEnd('/'); while (!string.IsNullOrEmpty(path)) { if (lookup.TryGetValue(path, out var node)) return node; int lastSlash = path.LastIndexOf('/'); if (lastSlash <= 0) break; path = path[..lastSlash]; } return null; } // -- Private helpers ----------------------------------------------------- private static async Task LoadFolderNodeAsync( ClientContext ctx, string serverRelativeUrl, string name, string siteTitle, string library, int indentLevel, IProgress progress, CancellationToken ct) { ct.ThrowIfCancellationRequested(); Folder folder = ctx.Web.GetFolderByServerRelativeUrl(serverRelativeUrl); ctx.Load(folder, f => f.StorageMetrics, f => f.TimeLastModified, f => f.ServerRelativeUrl, f => f.Name); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); DateTime? lastMod = folder.StorageMetrics.LastModified > DateTime.MinValue ? folder.StorageMetrics.LastModified : folder.TimeLastModified > DateTime.MinValue ? folder.TimeLastModified : (DateTime?)null; return new StorageNode { Name = name, Url = ctx.Url.TrimEnd('/') + serverRelativeUrl, SiteTitle = siteTitle, Library = library, TotalSizeBytes = folder.StorageMetrics.TotalSize, FileStreamSizeBytes = folder.StorageMetrics.TotalFileStreamSize, TotalFileCount = folder.StorageMetrics.TotalFileCount, LastModified = lastMod, IndentLevel = indentLevel, Children = new List() }; } private static async Task CollectSubfoldersAsync( ClientContext ctx, List list, string parentServerRelativeUrl, StorageNode parentNode, int currentDepth, int maxDepth, string siteTitle, string library, IProgress progress, CancellationToken ct) { if (currentDepth > maxDepth) return; ct.ThrowIfCancellationRequested(); // Enumerate direct child folders via paginated CAML scoped to the parent. // Folder.Folders lazy loading hits the list-view threshold on libraries // > 5,000 items; a paged CAML query with no WHERE bypasses it. var subfolders = new List<(string Name, string ServerRelativeUrl)>(); await foreach (var item in SharePointPaginationHelper.GetItemsInFolderAsync( ctx, list, parentServerRelativeUrl, recursive: false, viewFields: new[] { "FSObjType", "FileLeafRef", "FileRef" }, ct: ct)) { if (item["FSObjType"]?.ToString() != "1") continue; // folders only string name = item["FileLeafRef"]?.ToString() ?? string.Empty; string url = item["FileRef"]?.ToString() ?? string.Empty; if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(url)) continue; // Skip SharePoint system folders if (name.Equals("Forms", StringComparison.OrdinalIgnoreCase) || name.StartsWith("_", StringComparison.Ordinal)) continue; subfolders.Add((name, url)); } foreach (var sub in subfolders) { ct.ThrowIfCancellationRequested(); var childNode = await LoadFolderNodeAsync( ctx, sub.ServerRelativeUrl, sub.Name, siteTitle, library, currentDepth, progress, ct); if (currentDepth < maxDepth) { await CollectSubfoldersAsync( ctx, list, sub.ServerRelativeUrl, childNode, currentDepth + 1, maxDepth, siteTitle, library, progress, ct); } parentNode.Children.Add(childNode); } } }