using System.IO; using Microsoft.SharePoint.Client; using SharepointToolbox.Core.Helpers; using SharepointToolbox.Core.Models; namespace SharepointToolbox.Services; /// /// CSOM-based storage metrics scanner. /// Captures every storage source SharePoint reports for a site: /// document libraries (visible + hidden), the Preservation Hold Library, /// list attachments, the recycle bin (1st + 2nd stage), and optionally /// subsites. Each carries a /// so the caller can filter what appears in the report. /// public class StorageService : IStorageService { // PreservationHoldLibrary base template id. private const int PreservationHoldTemplate = 851; public async Task> CollectStorageAsync( ClientContext ctx, StorageScanOptions options, IProgress progress, CancellationToken ct) { var result = new List(); await CollectForWebAsync(ctx, ctx.Web, options, result, progress, ct); return result; } private async Task CollectForWebAsync( ClientContext ctx, Web web, StorageScanOptions options, List result, IProgress progress, CancellationToken ct) { ct.ThrowIfCancellationRequested(); ctx.Load(web, w => w.Title, w => w.Url, w => w.ServerRelativeUrl, w => w.Lists.Include( l => l.Title, l => l.Hidden, l => l.BaseType, l => l.BaseTemplate, l => l.ItemCount, l => l.RootFolder.ServerRelativeUrl)); if (options.IncludeSubsites) ctx.Load(web.Webs, ws => ws.Include(w => w.ServerRelativeUrl, w => w.Title)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); string siteTitle = web.Title; var lists = web.Lists.ToList(); // ── Document libraries (incl. hidden + Preservation Hold) ─────────── // Track each library's RootFolder server-relative URL so bin items can // be attributed back to their source library (matches storman.aspx, // which folds bin contents into the owning library's Total Size). var docLibs = lists.Where(l => l.BaseType == BaseType.DocumentLibrary).ToList(); var libsByRoot = new Dictionary(StringComparer.OrdinalIgnoreCase); int idx = 0; foreach (var lib in docLibs) { ct.ThrowIfCancellationRequested(); idx++; StorageNodeKind kind = ClassifyLibrary(lib); if (kind == StorageNodeKind.HiddenLibrary && !options.IncludeHiddenLibraries) continue; if (kind == StorageNodeKind.PreservationHold && !options.IncludePreservationHold) continue; progress.Report(new OperationProgress(idx, docLibs.Count, $"Loading storage metrics: {lib.Title} ({idx}/{docLibs.Count})")); var libNode = await LoadFolderNodeAsync( ctx, lib.RootFolder.ServerRelativeUrl, lib.Title, siteTitle, lib.Title, 0, kind, progress, ct); if (options.FolderDepth > 0) { await CollectSubfoldersAsync( ctx, lib, lib.RootFolder.ServerRelativeUrl, libNode, 1, options.FolderDepth, siteTitle, lib.Title, kind, progress, ct); } // CSOM Folder.StorageMetrics is unreliable across the board for // larger libraries — sometimes returns the storman value, sometimes // returns a fraction of it, sometimes zero. Subfolder StorageMetrics // are equally inconsistent. The only CSOM path that matches storman // is per-file File.Length + File.Versions[*].Size enumeration, so // run it unconditionally, replacing the CSOM totals. ResetNodeCounts(libNode); await BackfillLibFromFilesAsync(ctx, lib, libNode, progress, ct); result.Add(libNode); libsByRoot[NormalizeServerRelative(lib.RootFolder.ServerRelativeUrl)] = libNode; } // ── List attachments (non-document-library lists) ─────────────────── if (options.IncludeListAttachments) { var nonDocLists = lists .Where(l => l.BaseType != BaseType.DocumentLibrary && !l.Hidden && l.ItemCount > 0) .ToList(); int aIdx = 0; foreach (var list in nonDocLists) { ct.ThrowIfCancellationRequested(); aIdx++; progress.Report(new OperationProgress(aIdx, nonDocLists.Count, $"Scanning list attachments: {list.Title} ({aIdx}/{nonDocLists.Count})")); var attachNode = await TryLoadAttachmentsNodeAsync(ctx, list, siteTitle, progress, ct); if (attachNode != null && attachNode.TotalSizeBytes > 0) result.Add(attachNode); } } // ── Recycle bin (stage 1 + stage 2) ───────────────────────────────── if (options.IncludeRecycleBin) { progress.Report(OperationProgress.Indeterminate( $"Scanning recycle bin: {siteTitle}...")); var (rbNodes, perDir) = await LoadRecycleBinNodesAsync(ctx, web, siteTitle, progress, ct); // Attribute bin items to owning library (longest-prefix match on DirName) // so library Total Size matches storman.aspx, which counts an item's // bytes against its source library even after deletion. if (perDir.Count > 0 && libsByRoot.Count > 0) { var libRootsByLength = libsByRoot .OrderByDescending(kv => kv.Key.Length) .ToList(); foreach (var kv in perDir) { string dirNorm = NormalizeServerRelative(kv.Key); foreach (var lib in libRootsByLength) { if (dirNorm.Equals(lib.Key, StringComparison.OrdinalIgnoreCase) || dirNorm.StartsWith(lib.Key + "/", StringComparison.OrdinalIgnoreCase)) { lib.Value.TotalSizeBytes += kv.Value.Size; lib.Value.TotalFileCount += kv.Value.Count; break; } } } } result.AddRange(rbNodes); } // ── Subsites (recursive) ──────────────────────────────────────────── if (options.IncludeSubsites) { var subwebs = web.Webs.ToList(); foreach (var sub in subwebs) { ct.ThrowIfCancellationRequested(); // Build a node header so subsite results are visually grouped. var subResult = new List(); await CollectForWebAsync(ctx, sub, options, subResult, progress, ct); if (subResult.Count == 0) continue; // Bin contents already rolled up into each library's TotalSizeBytes // (storman behavior); summing root RecycleBin children too would // double-count. Filter them out here. var subRoot = new StorageNode { Name = sub.Title, Url = ctx.Url.TrimEnd('/') + sub.ServerRelativeUrl, SiteTitle = sub.Title, Library = string.Empty, Kind = StorageNodeKind.Subsite, IndentLevel = 0, Children = subResult, TotalSizeBytes = subResult.Where(n => n.Kind != StorageNodeKind.RecycleBin).Sum(n => n.TotalSizeBytes), FileStreamSizeBytes = subResult.Where(n => n.Kind != StorageNodeKind.RecycleBin).Sum(n => n.FileStreamSizeBytes), TotalFileCount = subResult.Where(n => n.Kind != StorageNodeKind.RecycleBin).Sum(n => n.TotalFileCount) }; result.Add(subRoot); } } } private static StorageNodeKind ClassifyLibrary(List lib) { if (lib.BaseTemplate == PreservationHoldTemplate || string.Equals(lib.Title, "Preservation Hold Library", StringComparison.OrdinalIgnoreCase)) return StorageNodeKind.PreservationHold; return lib.Hidden ? StorageNodeKind.HiddenLibrary : StorageNodeKind.Library; } private static async Task TryLoadAttachmentsNodeAsync( ClientContext ctx, List list, string siteTitle, IProgress progress, CancellationToken ct) { // Per-list attachments live in /Attachments//. // The Attachments folder may or may not exist depending on whether any // item ever had an attachment — guard with try/catch. string attachmentsUrl = list.RootFolder.ServerRelativeUrl.TrimEnd('/') + "/Attachments"; try { var folder = ctx.Web.GetFolderByServerRelativeUrl(attachmentsUrl); ctx.Load(folder, f => f.Exists, f => f.StorageMetrics, f => f.TimeLastModified, f => f.ServerRelativeUrl); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); if (!folder.Exists || folder.StorageMetrics.TotalFileCount == 0) return null; DateTime? lastMod = folder.StorageMetrics.LastModified > DateTime.MinValue ? folder.StorageMetrics.LastModified : folder.TimeLastModified > DateTime.MinValue ? folder.TimeLastModified : (DateTime?)null; return new StorageNode { Name = $"[Attachments] {list.Title}", Url = ctx.Url.TrimEnd('/') + attachmentsUrl, SiteTitle = siteTitle, Library = list.Title, Kind = StorageNodeKind.ListAttachments, TotalSizeBytes = folder.StorageMetrics.TotalSize, FileStreamSizeBytes = folder.StorageMetrics.TotalFileStreamSize, TotalFileCount = folder.StorageMetrics.TotalFileCount, LastModified = lastMod, IndentLevel = 0, Children = new List() }; } catch { // Attachments folder absent for this list — not an error. return null; } } private static async Task<(List Nodes, Dictionary PerDir)> LoadRecycleBinNodesAsync( ClientContext ctx, Web web, string siteTitle, IProgress progress, CancellationToken ct) { var nodes = new List(); var perDir = new Dictionary(StringComparer.OrdinalIgnoreCase); try { // Web-scoped: ctx.Site.RecycleBin would return the entire site-collection // bin and inflate totals by (1 + N_subsites) when IncludeSubsites is on. var bin = web.RecycleBin; ctx.Load(bin, b => b.Include( i => i.Size, i => i.ItemState, i => i.DeletedDate, i => i.DirName)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); // RecycleBinItem.DirName is web-relative on SharePoint Online // (e.g. "Documents/SubFolder" without leading slash or web URL). // Prepend the web's ServerRelativeUrl so the result matches // List.RootFolder.ServerRelativeUrl form used by libsByRoot. string webSrl = NormalizeServerRelative(web.ServerRelativeUrl); long stage1Size = 0, stage2Size = 0; int stage1Count = 0, stage2Count = 0; DateTime? stage1Last = null, stage2Last = null; foreach (var item in bin) { if (item.ItemState == RecycleBinItemState.SecondStageRecycleBin) { stage2Size += item.Size; stage2Count++; if (stage2Last is null || item.DeletedDate > stage2Last) stage2Last = item.DeletedDate; } else { stage1Size += item.Size; stage1Count++; if (stage1Last is null || item.DeletedDate > stage1Last) stage1Last = item.DeletedDate; } string raw = item.DirName ?? string.Empty; string dirSrl; if (raw.StartsWith('/')) dirSrl = NormalizeServerRelative(raw); else if (string.IsNullOrEmpty(raw)) dirSrl = webSrl; else dirSrl = NormalizeServerRelative(webSrl + "/" + raw); if (perDir.TryGetValue(dirSrl, out var tally)) perDir[dirSrl] = (tally.Size + item.Size, tally.Count + 1); else perDir[dirSrl] = (item.Size, 1); } if (stage1Count > 0) nodes.Add(new StorageNode { Name = "[Recycle Bin] First-stage", SiteTitle = siteTitle, Library = "RecycleBin", Kind = StorageNodeKind.RecycleBin, TotalSizeBytes = stage1Size, FileStreamSizeBytes = stage1Size, TotalFileCount = stage1Count, LastModified = stage1Last, IndentLevel = 0, Children = new List() }); if (stage2Count > 0) nodes.Add(new StorageNode { Name = "[Recycle Bin] Second-stage", SiteTitle = siteTitle, Library = "RecycleBin", Kind = StorageNodeKind.RecycleBin, TotalSizeBytes = stage2Size, FileStreamSizeBytes = stage2Size, TotalFileCount = stage2Count, LastModified = stage2Last, IndentLevel = 0, Children = new List() }); } catch { // Insufficient permission to read recycle bin or feature unavailable. } return (nodes, perDir); } /// /// Normalizes a server-relative path for consistent prefix matching: /// trims trailing slash, ensures single leading slash. SharePoint /// inconsistently returns DirName with or without leading slash across /// API surfaces, so the caller cannot rely on a canonical form. /// private static string NormalizeServerRelative(string? path) { if (string.IsNullOrEmpty(path)) return string.Empty; string trimmed = path.Trim().TrimEnd('/'); if (trimmed.Length == 0) return string.Empty; return trimmed.StartsWith('/') ? trimmed : "/" + trimmed; } public async Task> CollectFileTypeMetricsAsync( ClientContext ctx, IProgress progress, CancellationToken ct) { ct.ThrowIfCancellationRequested(); ctx.Load(ctx.Web, w => w.Lists.Include( l => l.Title, l => l.Hidden, l => l.BaseType, l => l.ItemCount)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); var libs = ctx.Web.Lists .Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary) .ToList(); var extensionMap = new Dictionary(StringComparer.OrdinalIgnoreCase); int libIdx = 0; foreach (var lib in libs) { ct.ThrowIfCancellationRequested(); libIdx++; progress.Report(new OperationProgress(libIdx, libs.Count, $"Scanning files by type: {lib.Title} ({libIdx}/{libs.Count})")); // No clause: filtering on FSObjType (non-indexed) on a list // beyond 5000 items breaches the list view threshold. Page lightly, // then second-pass load File.Length + Versions[*].Size so per-type // totals include version bytes (matches per-library totals). var query = new CamlQuery { ViewXml = @" 500 " }; ListItemCollection items; do { ct.ThrowIfCancellationRequested(); items = lib.GetItems(query); ctx.Load(items, ic => ic.ListItemCollectionPosition, ic => ic.Include( i => i["FSObjType"], i => i["FileLeafRef"])); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); var fileRows = new List<(ListItem Item, string Name)>(); foreach (var item in items) { if (item["FSObjType"]?.ToString() != "0") continue; string fileName = item["FileLeafRef"]?.ToString() ?? string.Empty; fileRows.Add((item, fileName)); ctx.Load(item.File, f => f.Length); ctx.Load(item.File.Versions, vc => vc.Include(v => v.Size)); } if (fileRows.Count > 0) { await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); } foreach (var row in fileRows) { long current; try { current = row.Item.File.Length; } catch { continue; } long versions = 0; try { foreach (var v in row.Item.File.Versions) versions += v.Size; } catch { /* no version history */ } long fileSize = current + versions; string ext = Path.GetExtension(row.Name).ToLowerInvariant(); if (extensionMap.TryGetValue(ext, out var existing)) extensionMap[ext] = (existing.totalSize + fileSize, existing.count + 1); else extensionMap[ext] = (fileSize, 1); } query.ListItemCollectionPosition = items.ListItemCollectionPosition; } while (items.ListItemCollectionPosition != null); } return extensionMap .Select(kvp => new FileTypeMetric(kvp.Key, kvp.Value.totalSize, kvp.Value.count)) .OrderByDescending(m => m.TotalSizeBytes) .ToList(); } /// /// Per-library backfill executed inline by CollectForWebAsync when CSOM's /// Folder.StorageMetrics returns zero counts. Enumerates every file via /// CamlQuery and explicitly loads File.Length + File.Versions.Size so /// version bytes are summed accurately — matches what storman.aspx reports. /// private static async Task BackfillLibFromFilesAsync( ClientContext ctx, List lib, StorageNode libNode, IProgress progress, CancellationToken ct) { progress.Report(OperationProgress.Indeterminate( $"Counting files: {libNode.Name}...")); string libRootSrl = NormalizeServerRelative(lib.RootFolder.ServerRelativeUrl); var folderLookup = new Dictionary(StringComparer.OrdinalIgnoreCase); BuildFolderLookup(libNode, libRootSrl, folderLookup); // No clause: filtering on FSObjType (non-indexed) on a list // beyond the 5000-item view threshold throws "The attempted operation // is prohibited because it exceeds the list view threshold". Paged // retrieval without Where is unaffected by the threshold; we filter // out folders client-side and skip File.Length access for them. // Smaller page size because each row carries the full Versions collection. var query = new CamlQuery { ViewXml = @" 500 " }; ListItemCollection items; do { ct.ThrowIfCancellationRequested(); items = lib.GetItems(query); ctx.Load(items, ic => ic.ListItemCollectionPosition, ic => ic.Include( i => i["FSObjType"], i => i["FileDirRef"])); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); // Second pass: queue File.Length + File.Versions[*].Size only for // file rows. Including these in the page 1 query throws a // ServerObjectNullReferenceException on folder rows (item.File is // null for folders). Filtering FSObjType client-side here keeps // per-page round-trips at two regardless of file count. var fileRows = new List<(ListItem Item, string DirRef)>(); foreach (var item in items) { if (item["FSObjType"]?.ToString() != "0") continue; var dirRef = item["FileDirRef"]?.ToString() ?? string.Empty; fileRows.Add((item, dirRef)); ctx.Load(item.File, f => f.Length); ctx.Load(item.File.Versions, vc => vc.Include(v => v.Size)); } if (fileRows.Count > 0) { await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); } foreach (var row in fileRows) { long current; try { current = row.Item.File.Length; } catch { continue; } long versions = 0; try { foreach (var v in row.Item.File.Versions) versions += v.Size; } catch { // Versioning disabled / no version history — leave at 0. } long totalSize = current + versions; // Attribute each file to its deepest matching folder only. // Parent rollup happens once after all pages are processed, // adding direct + descendants — matches storman's per-folder // total. Fall back to libNode for files at lib root or in // folders excluded from the tree (Forms, _-prefixed system // folders, depth-limited subfolders). var target = FindDeepestFolder(row.DirRef, folderLookup) ?? libNode; target.TotalSizeBytes += totalSize; target.FileStreamSizeBytes += current; target.TotalFileCount++; } query.ListItemCollectionPosition = items.ListItemCollectionPosition; } while (items.ListItemCollectionPosition != null); // Post-pass rollup: each folder's totals become own-direct + sum of // descendants. libNode ends up as total of every file in the tree. RollupFolderTotals(libNode); } /// /// Recursively rolls up direct-file totals into ancestor folders so each /// node's reported size includes everything beneath it. Pre-condition: each /// node holds only its directly-attributed files (no descendant amounts). /// private static void RollupFolderTotals(StorageNode node) { foreach (var child in node.Children) { RollupFolderTotals(child); node.TotalSizeBytes += child.TotalSizeBytes; node.FileStreamSizeBytes += child.FileStreamSizeBytes; node.TotalFileCount += child.TotalFileCount; } } /// /// No-op retained for interface compatibility. Backfill now runs inline /// inside via BackfillLibFromFilesAsync, /// which has access to the CSOM library reference and runs before bin /// distribution so the count==0 trigger is not polluted by bin items. /// public Task BackfillZeroNodesAsync( ClientContext ctx, IReadOnlyList nodes, IProgress progress, CancellationToken ct) => Task.CompletedTask; public async Task GetSiteUsageStorageBytesAsync( ClientContext ctx, IProgress progress, CancellationToken ct) { try { ctx.Load(ctx.Site, s => s.Usage); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); return ctx.Site.Usage.Storage; } catch { return 0L; } } private static void ResetNodeCounts(StorageNode node) { node.TotalSizeBytes = 0; node.FileStreamSizeBytes = 0; node.TotalFileCount = 0; foreach (var child in node.Children) ResetNodeCounts(child); } private static void BuildFolderLookup(StorageNode node, string parentPath, Dictionary lookup) { string nodePath = node.IndentLevel == 0 ? parentPath : parentPath + "/" + node.Name; lookup[nodePath] = node; foreach (var child in node.Children) BuildFolderLookup(child, nodePath, lookup); } private static StorageNode? FindDeepestFolder(string fileDirRef, Dictionary lookup) { string path = fileDirRef.TrimEnd('/'); while (!string.IsNullOrEmpty(path)) { if (lookup.TryGetValue(path, out var node)) return node; int lastSlash = path.LastIndexOf('/'); if (lastSlash <= 0) break; path = path[..lastSlash]; } return null; } // ── Library/folder loading helpers ────────────────────────────────────── private static async Task LoadFolderNodeAsync( ClientContext ctx, string serverRelativeUrl, string name, string siteTitle, string library, int indentLevel, StorageNodeKind kind, IProgress progress, CancellationToken ct) { ct.ThrowIfCancellationRequested(); Folder folder = ctx.Web.GetFolderByServerRelativeUrl(serverRelativeUrl); ctx.Load(folder, f => f.StorageMetrics, f => f.TimeLastModified, f => f.ServerRelativeUrl, f => f.Name); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); DateTime? lastMod = folder.StorageMetrics.LastModified > DateTime.MinValue ? folder.StorageMetrics.LastModified : folder.TimeLastModified > DateTime.MinValue ? folder.TimeLastModified : (DateTime?)null; return new StorageNode { Name = name, Url = ctx.Url.TrimEnd('/') + serverRelativeUrl, SiteTitle = siteTitle, Library = library, Kind = kind, TotalSizeBytes = folder.StorageMetrics.TotalSize, FileStreamSizeBytes = folder.StorageMetrics.TotalFileStreamSize, TotalFileCount = folder.StorageMetrics.TotalFileCount, LastModified = lastMod, IndentLevel = indentLevel, Children = new List() }; } private static async Task CollectSubfoldersAsync( ClientContext ctx, List list, string parentServerRelativeUrl, StorageNode parentNode, int currentDepth, int maxDepth, string siteTitle, string library, StorageNodeKind kind, IProgress progress, CancellationToken ct) { if (currentDepth > maxDepth) return; ct.ThrowIfCancellationRequested(); var subfolders = new List<(string Name, string ServerRelativeUrl)>(); await foreach (var item in SharePointPaginationHelper.GetItemsInFolderAsync( ctx, list, parentServerRelativeUrl, recursive: false, viewFields: new[] { "FSObjType", "FileLeafRef", "FileRef" }, ct: ct)) { if (item["FSObjType"]?.ToString() != "1") continue; string name = item["FileLeafRef"]?.ToString() ?? string.Empty; string url = item["FileRef"]?.ToString() ?? string.Empty; if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(url)) continue; if (name.Equals("Forms", StringComparison.OrdinalIgnoreCase) || name.StartsWith("_", StringComparison.Ordinal)) continue; subfolders.Add((name, url)); } foreach (var sub in subfolders) { ct.ThrowIfCancellationRequested(); var childNode = await LoadFolderNodeAsync( ctx, sub.ServerRelativeUrl, sub.Name, siteTitle, library, currentDepth, kind, progress, ct); if (currentDepth < maxDepth) { await CollectSubfoldersAsync( ctx, list, sub.ServerRelativeUrl, childNode, currentDepth + 1, maxDepth, siteTitle, library, kind, progress, ct); } parentNode.Children.Add(childNode); } } }