diff --git a/SharepointToolbox/Services/Export/StorageHtmlExportService.cs b/SharepointToolbox/Services/Export/StorageHtmlExportService.cs index 5fe979c..4c114f4 100644 --- a/SharepointToolbox/Services/Export/StorageHtmlExportService.cs +++ b/SharepointToolbox/Services/Export/StorageHtmlExportService.cs @@ -90,7 +90,10 @@ public class StorageHtmlExportService """); - foreach (var node in nodes) + // Only iterate root-level nodes; RenderNode recurses into Children + // inline. Iterating the flat list would render every descendant a + // second time as a top-level row. + foreach (var node in nodes.Where(n => n.IndentLevel == 0)) { RenderNode(sb, node); } @@ -224,7 +227,10 @@ public class StorageHtmlExportService """); - foreach (var node in nodes) + // Only iterate root-level nodes; RenderNode recurses into Children + // inline. Iterating the flat list would render every descendant a + // second time as a top-level row. + foreach (var node in nodes.Where(n => n.IndentLevel == 0)) { RenderNode(sb, node); } diff --git a/SharepointToolbox/Services/StorageService.cs b/SharepointToolbox/Services/StorageService.cs index 8b510c3..22623d2 100644 --- a/SharepointToolbox/Services/StorageService.cs +++ b/SharepointToolbox/Services/StorageService.cs @@ -58,7 +58,11 @@ public class StorageService : IStorageService var lists = web.Lists.ToList(); // ── Document libraries (incl. hidden + Preservation Hold) ─────────── + // Track each library's RootFolder server-relative URL so bin items can + // be attributed back to their source library (matches storman.aspx, + // which folds bin contents into the owning library's Total Size). var docLibs = lists.Where(l => l.BaseType == BaseType.DocumentLibrary).ToList(); + var libsByRoot = new Dictionary(StringComparer.OrdinalIgnoreCase); int idx = 0; foreach (var lib in docLibs) { @@ -84,7 +88,17 @@ public class StorageService : IStorageService siteTitle, lib.Title, kind, progress, ct); } + // CSOM Folder.StorageMetrics is unreliable across the board for + // larger libraries — sometimes returns the storman value, sometimes + // returns a fraction of it, sometimes zero. Subfolder StorageMetrics + // are equally inconsistent. The only CSOM path that matches storman + // is per-file File.Length + File.Versions[*].Size enumeration, so + // run it unconditionally, replacing the CSOM totals. + ResetNodeCounts(libNode); + await BackfillLibFromFilesAsync(ctx, lib, libNode, progress, ct); + result.Add(libNode); + libsByRoot[NormalizeServerRelative(lib.RootFolder.ServerRelativeUrl)] = libNode; } // ── List attachments (non-document-library lists) ─────────────────── @@ -114,7 +128,33 @@ public class StorageService : IStorageService progress.Report(OperationProgress.Indeterminate( $"Scanning recycle bin: {siteTitle}...")); - var rbNodes = await LoadRecycleBinNodesAsync(ctx, siteTitle, progress, ct); + var (rbNodes, perDir) = await LoadRecycleBinNodesAsync(ctx, web, siteTitle, progress, ct); + + // Attribute bin items to owning library (longest-prefix match on DirName) + // so library Total Size matches storman.aspx, which counts an item's + // bytes against its source library even after deletion. + if (perDir.Count > 0 && libsByRoot.Count > 0) + { + var libRootsByLength = libsByRoot + .OrderByDescending(kv => kv.Key.Length) + .ToList(); + + foreach (var kv in perDir) + { + string dirNorm = NormalizeServerRelative(kv.Key); + foreach (var lib in libRootsByLength) + { + if (dirNorm.Equals(lib.Key, StringComparison.OrdinalIgnoreCase) || + dirNorm.StartsWith(lib.Key + "/", StringComparison.OrdinalIgnoreCase)) + { + lib.Value.TotalSizeBytes += kv.Value.Size; + lib.Value.TotalFileCount += kv.Value.Count; + break; + } + } + } + } + result.AddRange(rbNodes); } @@ -131,6 +171,9 @@ public class StorageService : IStorageService await CollectForWebAsync(ctx, sub, options, subResult, progress, ct); if (subResult.Count == 0) continue; + // Bin contents already rolled up into each library's TotalSizeBytes + // (storman behavior); summing root RecycleBin children too would + // double-count. Filter them out here. var subRoot = new StorageNode { Name = sub.Title, @@ -140,9 +183,9 @@ public class StorageService : IStorageService Kind = StorageNodeKind.Subsite, IndentLevel = 0, Children = subResult, - TotalSizeBytes = subResult.Sum(n => n.TotalSizeBytes), - FileStreamSizeBytes = subResult.Sum(n => n.FileStreamSizeBytes), - TotalFileCount = subResult.Sum(n => n.TotalFileCount) + TotalSizeBytes = subResult.Where(n => n.Kind != StorageNodeKind.RecycleBin).Sum(n => n.TotalSizeBytes), + FileStreamSizeBytes = subResult.Where(n => n.Kind != StorageNodeKind.RecycleBin).Sum(n => n.FileStreamSizeBytes), + TotalFileCount = subResult.Where(n => n.Kind != StorageNodeKind.RecycleBin).Sum(n => n.TotalFileCount) }; result.Add(subRoot); } @@ -210,23 +253,34 @@ public class StorageService : IStorageService } } - private static async Task> LoadRecycleBinNodesAsync( + private static async Task<(List Nodes, Dictionary PerDir)> LoadRecycleBinNodesAsync( ClientContext ctx, + Web web, string siteTitle, IProgress progress, CancellationToken ct) { var nodes = new List(); + var perDir = new Dictionary(StringComparer.OrdinalIgnoreCase); try { - var bin = ctx.Site.RecycleBin; + // Web-scoped: ctx.Site.RecycleBin would return the entire site-collection + // bin and inflate totals by (1 + N_subsites) when IncludeSubsites is on. + var bin = web.RecycleBin; ctx.Load(bin, b => b.Include( i => i.Size, i => i.ItemState, - i => i.DeletedDate)); + i => i.DeletedDate, + i => i.DirName)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); + // RecycleBinItem.DirName is web-relative on SharePoint Online + // (e.g. "Documents/SubFolder" without leading slash or web URL). + // Prepend the web's ServerRelativeUrl so the result matches + // List.RootFolder.ServerRelativeUrl form used by libsByRoot. + string webSrl = NormalizeServerRelative(web.ServerRelativeUrl); + long stage1Size = 0, stage2Size = 0; int stage1Count = 0, stage2Count = 0; DateTime? stage1Last = null, stage2Last = null; @@ -245,6 +299,20 @@ public class StorageService : IStorageService stage1Count++; if (stage1Last is null || item.DeletedDate > stage1Last) stage1Last = item.DeletedDate; } + + string raw = item.DirName ?? string.Empty; + string dirSrl; + if (raw.StartsWith('/')) + dirSrl = NormalizeServerRelative(raw); + else if (string.IsNullOrEmpty(raw)) + dirSrl = webSrl; + else + dirSrl = NormalizeServerRelative(webSrl + "/" + raw); + + if (perDir.TryGetValue(dirSrl, out var tally)) + perDir[dirSrl] = (tally.Size + item.Size, tally.Count + 1); + else + perDir[dirSrl] = (item.Size, 1); } if (stage1Count > 0) @@ -282,7 +350,21 @@ public class StorageService : IStorageService // Insufficient permission to read recycle bin or feature unavailable. } - return nodes; + return (nodes, perDir); + } + + /// + /// Normalizes a server-relative path for consistent prefix matching: + /// trims trailing slash, ensures single leading slash. SharePoint + /// inconsistently returns DirName with or without leading slash across + /// API surfaces, so the caller cannot rely on a canonical form. + /// + private static string NormalizeServerRelative(string? path) + { + if (string.IsNullOrEmpty(path)) return string.Empty; + string trimmed = path.Trim().TrimEnd('/'); + if (trimmed.Length == 0) return string.Empty; + return trimmed.StartsWith('/') ? trimmed : "/" + trimmed; } public async Task> CollectFileTypeMetricsAsync( @@ -314,6 +396,10 @@ public class StorageService : IStorageService progress.Report(new OperationProgress(libIdx, libs.Count, $"Scanning files by type: {lib.Title} ({libIdx}/{libs.Count})")); + // No clause: filtering on FSObjType (non-indexed) on a list + // beyond 5000 items breaches the list view threshold. Page lightly, + // then second-pass load File.Length + Versions[*].Size so per-type + // totals include version bytes (matches per-library totals). var query = new CamlQuery { ViewXml = @" @@ -321,9 +407,8 @@ public class StorageService : IStorageService - - 5000 + 500 " }; @@ -335,21 +420,40 @@ public class StorageService : IStorageService ctx.Load(items, ic => ic.ListItemCollectionPosition, ic => ic.Include( i => i["FSObjType"], - i => i["FileLeafRef"], - i => i["File_x0020_Size"])); + i => i["FileLeafRef"])); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); + var fileRows = new List<(ListItem Item, string Name)>(); foreach (var item in items) { if (item["FSObjType"]?.ToString() != "0") continue; - string fileName = item["FileLeafRef"]?.ToString() ?? string.Empty; - string sizeStr = item["File_x0020_Size"]?.ToString() ?? "0"; + fileRows.Add((item, fileName)); + ctx.Load(item.File, f => f.Length); + ctx.Load(item.File.Versions, vc => vc.Include(v => v.Size)); + } - if (!long.TryParse(sizeStr, out long fileSize)) - fileSize = 0; + if (fileRows.Count > 0) + { + await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); + } - string ext = Path.GetExtension(fileName).ToLowerInvariant(); + foreach (var row in fileRows) + { + long current; + try { current = row.Item.File.Length; } + catch { continue; } + + long versions = 0; + try + { + foreach (var v in row.Item.File.Versions) + versions += v.Size; + } + catch { /* no version history */ } + + long fileSize = current + versions; + string ext = Path.GetExtension(row.Name).ToLowerInvariant(); if (extensionMap.TryGetValue(ext, out var existing)) extensionMap[ext] = (existing.totalSize + fileSize, existing.count + 1); @@ -368,120 +472,144 @@ public class StorageService : IStorageService .ToList(); } - public async Task BackfillZeroNodesAsync( + /// + /// Per-library backfill executed inline by CollectForWebAsync when CSOM's + /// Folder.StorageMetrics returns zero counts. Enumerates every file via + /// CamlQuery and explicitly loads File.Length + File.Versions.Size so + /// version bytes are summed accurately — matches what storman.aspx reports. + /// + private static async Task BackfillLibFromFilesAsync( + ClientContext ctx, + List lib, + StorageNode libNode, + IProgress progress, + CancellationToken ct) + { + progress.Report(OperationProgress.Indeterminate( + $"Counting files: {libNode.Name}...")); + + string libRootSrl = NormalizeServerRelative(lib.RootFolder.ServerRelativeUrl); + + var folderLookup = new Dictionary(StringComparer.OrdinalIgnoreCase); + BuildFolderLookup(libNode, libRootSrl, folderLookup); + + // No clause: filtering on FSObjType (non-indexed) on a list + // beyond the 5000-item view threshold throws "The attempted operation + // is prohibited because it exceeds the list view threshold". Paged + // retrieval without Where is unaffected by the threshold; we filter + // out folders client-side and skip File.Length access for them. + // Smaller page size because each row carries the full Versions collection. + var query = new CamlQuery + { + ViewXml = @" + + + + + + 500 + " + }; + + ListItemCollection items; + do + { + ct.ThrowIfCancellationRequested(); + items = lib.GetItems(query); + ctx.Load(items, ic => ic.ListItemCollectionPosition, + ic => ic.Include( + i => i["FSObjType"], + i => i["FileDirRef"])); + await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); + + // Second pass: queue File.Length + File.Versions[*].Size only for + // file rows. Including these in the page 1 query throws a + // ServerObjectNullReferenceException on folder rows (item.File is + // null for folders). Filtering FSObjType client-side here keeps + // per-page round-trips at two regardless of file count. + var fileRows = new List<(ListItem Item, string DirRef)>(); + foreach (var item in items) + { + if (item["FSObjType"]?.ToString() != "0") continue; + var dirRef = item["FileDirRef"]?.ToString() ?? string.Empty; + fileRows.Add((item, dirRef)); + ctx.Load(item.File, f => f.Length); + ctx.Load(item.File.Versions, vc => vc.Include(v => v.Size)); + } + + if (fileRows.Count > 0) + { + await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); + } + + foreach (var row in fileRows) + { + long current; + try { current = row.Item.File.Length; } + catch { continue; } + + long versions = 0; + try + { + foreach (var v in row.Item.File.Versions) + versions += v.Size; + } + catch + { + // Versioning disabled / no version history — leave at 0. + } + + long totalSize = current + versions; + + // Attribute each file to its deepest matching folder only. + // Parent rollup happens once after all pages are processed, + // adding direct + descendants — matches storman's per-folder + // total. Fall back to libNode for files at lib root or in + // folders excluded from the tree (Forms, _-prefixed system + // folders, depth-limited subfolders). + var target = FindDeepestFolder(row.DirRef, folderLookup) ?? libNode; + target.TotalSizeBytes += totalSize; + target.FileStreamSizeBytes += current; + target.TotalFileCount++; + } + + query.ListItemCollectionPosition = items.ListItemCollectionPosition; + } + while (items.ListItemCollectionPosition != null); + + // Post-pass rollup: each folder's totals become own-direct + sum of + // descendants. libNode ends up as total of every file in the tree. + RollupFolderTotals(libNode); + } + + /// + /// Recursively rolls up direct-file totals into ancestor folders so each + /// node's reported size includes everything beneath it. Pre-condition: each + /// node holds only its directly-attributed files (no descendant amounts). + /// + private static void RollupFolderTotals(StorageNode node) + { + foreach (var child in node.Children) + { + RollupFolderTotals(child); + node.TotalSizeBytes += child.TotalSizeBytes; + node.FileStreamSizeBytes += child.FileStreamSizeBytes; + node.TotalFileCount += child.TotalFileCount; + } + } + + /// + /// No-op retained for interface compatibility. Backfill now runs inline + /// inside via BackfillLibFromFilesAsync, + /// which has access to the CSOM library reference and runs before bin + /// distribution so the count==0 trigger is not polluted by bin items. + /// + public Task BackfillZeroNodesAsync( ClientContext ctx, IReadOnlyList nodes, IProgress progress, CancellationToken ct) - { - // Only backfill nodes scanned through CSOM document-library StorageMetrics — - // synthetic categories (recycle bin, list attachments, subsite headers) - // cannot be re-derived from File_x0020_Size. - var libNodes = nodes.Where(n => n.IndentLevel == 0 && - (n.Kind == StorageNodeKind.Library || - n.Kind == StorageNodeKind.HiddenLibrary || - n.Kind == StorageNodeKind.PreservationHold)).ToList(); - var needsBackfill = libNodes.Where(lib => - lib.TotalFileCount == 0 || HasZeroChild(lib)).ToList(); - if (needsBackfill.Count == 0) return; - - ctx.Load(ctx.Web, w => w.ServerRelativeUrl, - w => w.Lists.Include( - l => l.Title, l => l.Hidden, l => l.BaseType, - l => l.RootFolder.ServerRelativeUrl)); - await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); - - var libs = ctx.Web.Lists - .Where(l => l.BaseType == BaseType.DocumentLibrary) - .ToDictionary(l => l.Title, StringComparer.OrdinalIgnoreCase); - - int idx = 0; - foreach (var libNode in needsBackfill) - { - ct.ThrowIfCancellationRequested(); - idx++; - - if (!libs.TryGetValue(libNode.Library, out var lib)) continue; - - progress.Report(new OperationProgress(idx, needsBackfill.Count, - $"Counting files: {libNode.Name} ({idx}/{needsBackfill.Count})")); - - string libRootSrl = lib.RootFolder.ServerRelativeUrl.TrimEnd('/'); - - var folderLookup = new Dictionary(StringComparer.OrdinalIgnoreCase); - BuildFolderLookup(libNode, libRootSrl, folderLookup); - - var originalTotals = new Dictionary(); - CaptureTotals(libNode, originalTotals); - - ResetNodeCounts(libNode); - - var query = new CamlQuery - { - ViewXml = @" - - - - - - - - - 5000 - " - }; - - ListItemCollection items; - do - { - ct.ThrowIfCancellationRequested(); - items = lib.GetItems(query); - ctx.Load(items, ic => ic.ListItemCollectionPosition, - ic => ic.Include( - i => i["FSObjType"], - i => i["FileDirRef"], - i => i["File_x0020_Size"], - i => i["SMTotalSize"], - i => i["SMTotalFileStreamSize"])); - await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); - - foreach (var item in items) - { - if (item["FSObjType"]?.ToString() != "0") continue; - - long streamSize = ParseLong(item["File_x0020_Size"]); - long smStream = ParseLong(SafeGet(item, "SMTotalFileStreamSize")); - long smTotal = ParseLong(SafeGet(item, "SMTotalSize")); - - if (smStream > 0) streamSize = smStream; - long totalSize = smTotal > 0 ? smTotal : streamSize; - - string fileDirRef = item["FileDirRef"]?.ToString() ?? ""; - - libNode.TotalSizeBytes += totalSize; - libNode.FileStreamSizeBytes += streamSize; - libNode.TotalFileCount++; - - var matchedFolder = FindDeepestFolder(fileDirRef, folderLookup); - if (matchedFolder != null && matchedFolder != libNode) - { - matchedFolder.TotalSizeBytes += totalSize; - matchedFolder.FileStreamSizeBytes += streamSize; - matchedFolder.TotalFileCount++; - } - } - - query.ListItemCollectionPosition = items.ListItemCollectionPosition; - } - while (items.ListItemCollectionPosition != null); - - foreach (var kv in originalTotals) - { - if (kv.Value > kv.Key.TotalSizeBytes) - kv.Key.TotalSizeBytes = kv.Value; - } - } - } + => Task.CompletedTask; public async Task GetSiteUsageStorageBytesAsync( ClientContext ctx, @@ -500,40 +628,11 @@ public class StorageService : IStorageService } } - private static long ParseLong(object? value) - { - if (value == null) return 0; - return long.TryParse(value.ToString(), out long n) ? n : 0; - } - - private static object? SafeGet(ListItem item, string fieldName) - { - try { return item[fieldName]; } - catch { return null; } - } - - private static void CaptureTotals(StorageNode node, Dictionary map) - { - map[node] = node.TotalSizeBytes; - foreach (var child in node.Children) - CaptureTotals(child, map); - } - - private static bool HasZeroChild(StorageNode node) - { - foreach (var child in node.Children) - { - if (child.TotalFileCount == 0) return true; - if (HasZeroChild(child)) return true; - } - return false; - } - private static void ResetNodeCounts(StorageNode node) { - node.TotalSizeBytes = 0; + node.TotalSizeBytes = 0; node.FileStreamSizeBytes = 0; - node.TotalFileCount = 0; + node.TotalFileCount = 0; foreach (var child in node.Children) ResetNodeCounts(child); } diff --git a/SharepointToolbox/ViewModels/Tabs/StorageViewModel.cs b/SharepointToolbox/ViewModels/Tabs/StorageViewModel.cs index df1edee..4ef45b9 100644 --- a/SharepointToolbox/ViewModels/Tabs/StorageViewModel.cs +++ b/SharepointToolbox/ViewModels/Tabs/StorageViewModel.cs @@ -163,15 +163,27 @@ public partial class StorageViewModel : FeatureViewModelBase } // ── Summary properties (computed from root-level library nodes) ───────── + // + // Recycle-bin contents are rolled into each library's TotalSizeBytes by the + // StorageService (matches storman.aspx). Including the synthetic root-level + // RecycleBin nodes here would double-count those bytes — filter them out. + // SummaryRecycleBinSize below still reads from _allNodes so the bin metric + // remains visible to the user. - /// Sum of TotalSizeBytes across root-level library nodes. - public long SummaryTotalSize => Results.Where(n => n.IndentLevel == 0).Sum(n => n.TotalSizeBytes); + /// Sum of TotalSizeBytes across root-level non-bin nodes. + public long SummaryTotalSize => Results + .Where(n => n.IndentLevel == 0 && n.Kind != StorageNodeKind.RecycleBin) + .Sum(n => n.TotalSizeBytes); - /// Sum of VersionSizeBytes across root-level library nodes. - public long SummaryVersionSize => Results.Where(n => n.IndentLevel == 0).Sum(n => n.VersionSizeBytes); + /// Sum of VersionSizeBytes across root-level non-bin nodes. + public long SummaryVersionSize => Results + .Where(n => n.IndentLevel == 0 && n.Kind != StorageNodeKind.RecycleBin) + .Sum(n => n.VersionSizeBytes); - /// Sum of TotalFileCount across root-level library nodes. - public long SummaryFileCount => Results.Where(n => n.IndentLevel == 0).Sum(n => n.TotalFileCount); + /// Sum of TotalFileCount across root-level non-bin nodes. + public long SummaryFileCount => Results + .Where(n => n.IndentLevel == 0 && n.Kind != StorageNodeKind.RecycleBin) + .Sum(n => n.TotalFileCount); /// /// Aggregate recycle-bin size (stage 1 + stage 2 across all sites). Reads