using System.IO; using Microsoft.SharePoint.Client; using SharepointToolbox.Core.Helpers; using SharepointToolbox.Core.Models; namespace SharepointToolbox.Services; /// /// CSOM-based storage metrics scanner. /// Port of PowerShell Collect-FolderStorage / Get-PnPFolderStorageMetric pattern. /// public class StorageService : IStorageService { public async Task> CollectStorageAsync( ClientContext ctx, StorageScanOptions options, IProgress progress, CancellationToken ct) { ct.ThrowIfCancellationRequested(); // Load web-level metadata in one round-trip ctx.Load(ctx.Web, w => w.Title, w => w.Url, w => w.ServerRelativeUrl, w => w.Lists.Include( l => l.Title, l => l.Hidden, l => l.BaseType, l => l.RootFolder.ServerRelativeUrl)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); string webSrl = ctx.Web.ServerRelativeUrl.TrimEnd('/'); string siteTitle = ctx.Web.Title; var result = new List(); var libs = ctx.Web.Lists .Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary) .ToList(); int idx = 0; foreach (var lib in libs) { ct.ThrowIfCancellationRequested(); idx++; progress.Report(new OperationProgress(idx, libs.Count, $"Loading storage metrics: {lib.Title} ({idx}/{libs.Count})")); var libNode = await LoadFolderNodeAsync( ctx, lib.RootFolder.ServerRelativeUrl, lib.Title, siteTitle, lib.Title, 0, progress, ct); if (options.FolderDepth > 0) { await CollectSubfoldersAsync( ctx, lib.RootFolder.ServerRelativeUrl, libNode, 1, options.FolderDepth, siteTitle, lib.Title, progress, ct); } result.Add(libNode); } return result; } public async Task> CollectFileTypeMetricsAsync( ClientContext ctx, IProgress progress, CancellationToken ct) { ct.ThrowIfCancellationRequested(); // Load all non-hidden document libraries ctx.Load(ctx.Web, w => w.Lists.Include( l => l.Title, l => l.Hidden, l => l.BaseType, l => l.ItemCount)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); var libs = ctx.Web.Lists .Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary) .ToList(); // Accumulate file sizes by extension across all libraries var extensionMap = new Dictionary(StringComparer.OrdinalIgnoreCase); int libIdx = 0; foreach (var lib in libs) { ct.ThrowIfCancellationRequested(); libIdx++; progress.Report(new OperationProgress(libIdx, libs.Count, $"Scanning files by type: {lib.Title} ({libIdx}/{libs.Count})")); // Use CamlQuery to enumerate all files in the library // Paginate with 500 items per batch to avoid list view threshold issues var query = new CamlQuery { ViewXml = @" 0 500 " }; ListItemCollection items; do { ct.ThrowIfCancellationRequested(); items = lib.GetItems(query); ctx.Load(items, ic => ic.ListItemCollectionPosition, ic => ic.Include( i => i["FileLeafRef"], i => i["File_x0020_Size"])); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); foreach (var item in items) { string fileName = item["FileLeafRef"]?.ToString() ?? string.Empty; string sizeStr = item["File_x0020_Size"]?.ToString() ?? "0"; if (!long.TryParse(sizeStr, out long fileSize)) fileSize = 0; string ext = Path.GetExtension(fileName).ToLowerInvariant(); // ext is "" for extensionless files, ".docx" etc. for others if (extensionMap.TryGetValue(ext, out var existing)) extensionMap[ext] = (existing.totalSize + fileSize, existing.count + 1); else extensionMap[ext] = (fileSize, 1); } // Move to next page query.ListItemCollectionPosition = items.ListItemCollectionPosition; } while (items.ListItemCollectionPosition != null); } // Convert to FileTypeMetric list, sorted by size descending return extensionMap .Select(kvp => new FileTypeMetric(kvp.Key, kvp.Value.totalSize, kvp.Value.count)) .OrderByDescending(m => m.TotalSizeBytes) .ToList(); } public async Task BackfillZeroNodesAsync( ClientContext ctx, IReadOnlyList nodes, IProgress progress, CancellationToken ct) { // Find root-level library nodes that have any zero-valued nodes in their tree var libNodes = nodes.Where(n => n.IndentLevel == 0).ToList(); var needsBackfill = libNodes.Where(lib => lib.TotalFileCount == 0 || HasZeroChild(lib)).ToList(); if (needsBackfill.Count == 0) return; // Load libraries to get RootFolder.ServerRelativeUrl for path matching ctx.Load(ctx.Web, w => w.ServerRelativeUrl, w => w.Lists.Include( l => l.Title, l => l.Hidden, l => l.BaseType, l => l.RootFolder.ServerRelativeUrl)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); var libs = ctx.Web.Lists .Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary) .ToDictionary(l => l.Title, StringComparer.OrdinalIgnoreCase); int idx = 0; foreach (var libNode in needsBackfill) { ct.ThrowIfCancellationRequested(); idx++; if (!libs.TryGetValue(libNode.Name, out var lib)) continue; progress.Report(new OperationProgress(idx, needsBackfill.Count, $"Counting files: {libNode.Name} ({idx}/{needsBackfill.Count})")); string libRootSrl = lib.RootFolder.ServerRelativeUrl.TrimEnd('/'); // Build a lookup of all folder nodes in this library's tree (by server-relative path) var folderLookup = new Dictionary(StringComparer.OrdinalIgnoreCase); BuildFolderLookup(libNode, libRootSrl, folderLookup); // Reset all nodes in this tree to zero before accumulating ResetNodeCounts(libNode); // Enumerate all files with their folder path var query = new CamlQuery { ViewXml = @" 0 500 " }; ListItemCollection items; do { ct.ThrowIfCancellationRequested(); items = lib.GetItems(query); ctx.Load(items, ic => ic.ListItemCollectionPosition, ic => ic.Include( i => i["FileDirRef"], i => i["File_x0020_Size"])); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); foreach (var item in items) { long size = 0; if (long.TryParse(item["File_x0020_Size"]?.ToString() ?? "0", out long s)) size = s; string fileDirRef = item["FileDirRef"]?.ToString() ?? ""; // Always count toward the library root libNode.TotalSizeBytes += size; libNode.FileStreamSizeBytes += size; libNode.TotalFileCount++; // Also count toward the most specific matching subfolder var matchedFolder = FindDeepestFolder(fileDirRef, folderLookup); if (matchedFolder != null && matchedFolder != libNode) { matchedFolder.TotalSizeBytes += size; matchedFolder.FileStreamSizeBytes += size; matchedFolder.TotalFileCount++; } } query.ListItemCollectionPosition = items.ListItemCollectionPosition; } while (items.ListItemCollectionPosition != null); } } private static bool HasZeroChild(StorageNode node) { foreach (var child in node.Children) { if (child.TotalFileCount == 0) return true; if (HasZeroChild(child)) return true; } return false; } private static void ResetNodeCounts(StorageNode node) { node.TotalSizeBytes = 0; node.FileStreamSizeBytes = 0; node.TotalFileCount = 0; foreach (var child in node.Children) ResetNodeCounts(child); } private static void BuildFolderLookup(StorageNode node, string parentPath, Dictionary lookup) { string nodePath = node.IndentLevel == 0 ? parentPath : parentPath + "/" + node.Name; lookup[nodePath] = node; foreach (var child in node.Children) BuildFolderLookup(child, nodePath, lookup); } private static StorageNode? FindDeepestFolder(string fileDirRef, Dictionary lookup) { // fileDirRef is the server-relative folder path, e.g. "/sites/hr/Shared Documents/Reports" // Try exact match, then walk up until we find a match string path = fileDirRef.TrimEnd('/'); while (!string.IsNullOrEmpty(path)) { if (lookup.TryGetValue(path, out var node)) return node; int lastSlash = path.LastIndexOf('/'); if (lastSlash <= 0) break; path = path[..lastSlash]; } return null; } // -- Private helpers ----------------------------------------------------- private static async Task LoadFolderNodeAsync( ClientContext ctx, string serverRelativeUrl, string name, string siteTitle, string library, int indentLevel, IProgress progress, CancellationToken ct) { ct.ThrowIfCancellationRequested(); Folder folder = ctx.Web.GetFolderByServerRelativeUrl(serverRelativeUrl); ctx.Load(folder, f => f.StorageMetrics, f => f.TimeLastModified, f => f.ServerRelativeUrl, f => f.Name); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); DateTime? lastMod = folder.StorageMetrics.LastModified > DateTime.MinValue ? folder.StorageMetrics.LastModified : folder.TimeLastModified > DateTime.MinValue ? folder.TimeLastModified : (DateTime?)null; return new StorageNode { Name = name, Url = ctx.Url.TrimEnd('/') + serverRelativeUrl, SiteTitle = siteTitle, Library = library, TotalSizeBytes = folder.StorageMetrics.TotalSize, FileStreamSizeBytes = folder.StorageMetrics.TotalFileStreamSize, TotalFileCount = folder.StorageMetrics.TotalFileCount, LastModified = lastMod, IndentLevel = indentLevel, Children = new List() }; } private static async Task CollectSubfoldersAsync( ClientContext ctx, string parentServerRelativeUrl, StorageNode parentNode, int currentDepth, int maxDepth, string siteTitle, string library, IProgress progress, CancellationToken ct) { if (currentDepth > maxDepth) return; ct.ThrowIfCancellationRequested(); // Load direct child folders of this folder Folder parentFolder = ctx.Web.GetFolderByServerRelativeUrl(parentServerRelativeUrl); ctx.Load(parentFolder, f => f.Folders.Include( sf => sf.Name, sf => sf.ServerRelativeUrl)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); foreach (Folder subFolder in parentFolder.Folders) { ct.ThrowIfCancellationRequested(); // Skip SharePoint system folders if (subFolder.Name.Equals("Forms", StringComparison.OrdinalIgnoreCase) || subFolder.Name.StartsWith("_", StringComparison.Ordinal)) continue; var childNode = await LoadFolderNodeAsync( ctx, subFolder.ServerRelativeUrl, subFolder.Name, siteTitle, library, currentDepth, progress, ct); if (currentDepth < maxDepth) { await CollectSubfoldersAsync( ctx, subFolder.ServerRelativeUrl, childNode, currentDepth + 1, maxDepth, siteTitle, library, progress, ct); } parentNode.Children.Add(childNode); } } }