Compare commits

...

3 Commits

5 changed files with 534 additions and 259 deletions
@@ -24,4 +24,41 @@ internal static class ExportFileWriter
/// <summary>Writes <paramref name="html"/> to <paramref name="filePath"/> as UTF-8 without BOM.</summary>
public static Task WriteHtmlAsync(string filePath, string html, CancellationToken ct)
=> File.WriteAllTextAsync(filePath, html, Utf8NoBom, ct);
/// <summary>
/// Streams a <see cref="StringBuilder"/> directly to disk as UTF-8 with
/// BOM, chunk by chunk. Avoids the full-document <c>ToString()</c> copy
/// and the separate UTF-8 byte buffer that <see cref="File.WriteAllTextAsync(string, string, Encoding, CancellationToken)"/>
/// would otherwise allocate — meaningful for large CSV exports.
/// </summary>
public static Task WriteCsvChunksAsync(string filePath, StringBuilder builder, CancellationToken ct)
=> WriteChunksAsync(filePath, builder, Utf8WithBom, ct);
/// <summary>
/// Streams a <see cref="StringBuilder"/> directly to disk as UTF-8 without
/// BOM. Same rationale as <see cref="WriteCsvChunksAsync"/> — for large
/// HTML reports it halves peak memory by skipping the intermediate string.
/// </summary>
public static Task WriteHtmlChunksAsync(string filePath, StringBuilder builder, CancellationToken ct)
=> WriteChunksAsync(filePath, builder, Utf8NoBom, ct);
private static async Task WriteChunksAsync(string filePath, StringBuilder builder, Encoding encoding, CancellationToken ct)
{
// FileOptions.Asynchronous lets StreamWriter use true async I/O.
await using var fs = new FileStream(
filePath,
FileMode.Create,
FileAccess.Write,
FileShare.None,
bufferSize: 64 * 1024,
options: FileOptions.Asynchronous | FileOptions.SequentialScan);
await using var sw = new StreamWriter(fs, encoding, bufferSize: 64 * 1024);
foreach (var chunk in builder.GetChunks())
{
ct.ThrowIfCancellationRequested();
await sw.WriteAsync(chunk, ct);
}
await sw.FlushAsync(ct);
}
}
@@ -1,5 +1,6 @@
using SharepointToolbox.Core.Models;
using SharepointToolbox.Localization;
using System.Globalization;
using System.IO;
using System.Text;
@@ -18,34 +19,58 @@ public class StorageCsvExportService
/// </summary>
public string BuildCsv(IReadOnlyList<StorageNode> nodes)
{
var T = TranslationSource.Instance;
var sb = new StringBuilder();
// Pre-size: ~110 chars/row + header avoids most StringBuilder growth.
var sb = new StringBuilder(128 + nodes.Count * 110);
WriteCsv(sb, nodes);
return sb.ToString();
}
// Header
sb.AppendLine($"{T["report.col.library"]},{T["stor.col.kind"]},{T["report.col.site"]},{T["report.stat.files"]},{T["report.col.total_size_mb"]},{T["report.col.version_size_mb"]},{T["report.col.last_modified"]}");
private static void WriteCsv(StringBuilder sb, IReadOnlyList<StorageNode> nodes)
{
var T = TranslationSource.Instance;
// Hoist resource lookups out of the row loop: ResourceManager.GetString
// is a culture-aware dictionary probe — caching once per export saves
// O(rows × columns) lookups on large tenants.
string colLibrary = T["report.col.library"];
string colKind = T["stor.col.kind"];
string colSite = T["report.col.site"];
string colFiles = T["report.stat.files"];
string colTotalMb = T["report.col.total_size_mb"];
string colVerMb = T["report.col.version_size_mb"];
string colLastMod = T["report.col.last_modified"];
sb.Append(colLibrary).Append(',')
.Append(colKind).Append(',')
.Append(colSite).Append(',')
.Append(colFiles).Append(',')
.Append(colTotalMb).Append(',')
.Append(colVerMb).Append(',')
.AppendLine(colLastMod);
var kindLabels = BuildKindLabelCache();
foreach (var node in nodes)
{
sb.AppendLine(string.Join(",",
Csv(node.Name),
Csv(KindLabel(node.Kind)),
Csv(node.SiteTitle),
node.TotalFileCount.ToString(),
FormatMb(node.TotalSizeBytes),
FormatMb(node.VersionSizeBytes),
node.LastModified.HasValue
? Csv(node.LastModified.Value.ToString("yyyy-MM-dd"))
: string.Empty));
AppendCsvField(sb, node.Name).Append(',');
AppendCsvField(sb, kindLabels[(int)node.Kind]).Append(',');
AppendCsvField(sb, node.SiteTitle).Append(',');
sb.Append(node.TotalFileCount).Append(',');
AppendMb(sb, node.TotalSizeBytes).Append(',');
AppendMb(sb, node.VersionSizeBytes).Append(',');
if (node.LastModified.HasValue)
AppendCsvField(sb, node.LastModified.Value.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture));
sb.AppendLine();
}
return sb.ToString();
}
/// <summary>Writes the library-level CSV to <paramref name="filePath"/> with UTF-8 BOM.</summary>
public async Task WriteAsync(IReadOnlyList<StorageNode> nodes, string filePath, CancellationToken ct)
{
var csv = BuildCsv(nodes);
await ExportFileWriter.WriteCsvAsync(filePath, csv, ct);
// Stream straight to disk: skip the StringBuilder→string copy and the
// separate UTF-8 buffer that File.WriteAllTextAsync materializes.
var sb = new StringBuilder(128 + nodes.Count * 110);
WriteCsv(sb, nodes);
await ExportFileWriter.WriteCsvChunksAsync(filePath, sb, ct);
}
/// <summary>
@@ -53,44 +78,68 @@ public class StorageCsvExportService
/// </summary>
public string BuildCsv(IReadOnlyList<StorageNode> nodes, IReadOnlyList<FileTypeMetric> fileTypeMetrics)
{
var T = TranslationSource.Instance;
var sb = new StringBuilder();
var sb = new StringBuilder(192 + nodes.Count * 100 + fileTypeMetrics.Count * 40);
WriteCsv(sb, nodes, fileTypeMetrics);
return sb.ToString();
}
private static void WriteCsv(StringBuilder sb, IReadOnlyList<StorageNode> nodes, IReadOnlyList<FileTypeMetric> fileTypeMetrics)
{
var T = TranslationSource.Instance;
string colLibrary = T["report.col.library"];
string colSite = T["report.col.site"];
string colFiles = T["report.stat.files"];
string colTotalMb = T["report.col.total_size_mb"];
string colVerMb = T["report.col.version_size_mb"];
string colLastMod = T["report.col.last_modified"];
sb.Append(colLibrary).Append(',')
.Append(colSite).Append(',')
.Append(colFiles).Append(',')
.Append(colTotalMb).Append(',')
.Append(colVerMb).Append(',')
.AppendLine(colLastMod);
// Library details
sb.AppendLine($"{T["report.col.library"]},{T["report.col.site"]},{T["report.stat.files"]},{T["report.col.total_size_mb"]},{T["report.col.version_size_mb"]},{T["report.col.last_modified"]}");
foreach (var node in nodes)
{
sb.AppendLine(string.Join(",",
Csv(node.Name),
Csv(node.SiteTitle),
node.TotalFileCount.ToString(),
FormatMb(node.TotalSizeBytes),
FormatMb(node.VersionSizeBytes),
node.LastModified.HasValue
? Csv(node.LastModified.Value.ToString("yyyy-MM-dd"))
: string.Empty));
AppendCsvField(sb, node.Name).Append(',');
AppendCsvField(sb, node.SiteTitle).Append(',');
sb.Append(node.TotalFileCount).Append(',');
AppendMb(sb, node.TotalSizeBytes).Append(',');
AppendMb(sb, node.VersionSizeBytes).Append(',');
if (node.LastModified.HasValue)
AppendCsvField(sb, node.LastModified.Value.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture));
sb.AppendLine();
}
// File type breakdown
if (fileTypeMetrics.Count > 0)
{
string colFileType = T["report.col.file_type"];
string colSizeMb = T["report.col.size_mb"];
string colFileCnt = T["report.col.file_count"];
string noExtLabel = T["report.text.no_extension"];
sb.AppendLine();
sb.AppendLine($"{T["report.col.file_type"]},{T["report.col.size_mb"]},{T["report.col.file_count"]}");
sb.Append(colFileType).Append(',')
.Append(colSizeMb).Append(',')
.AppendLine(colFileCnt);
foreach (var m in fileTypeMetrics)
{
string label = string.IsNullOrEmpty(m.Extension) ? T["report.text.no_extension"] : m.Extension;
sb.AppendLine(string.Join(",", Csv(label), FormatMb(m.TotalSizeBytes), m.FileCount.ToString()));
string label = string.IsNullOrEmpty(m.Extension) ? noExtLabel : m.Extension;
AppendCsvField(sb, label).Append(',');
AppendMb(sb, m.TotalSizeBytes).Append(',');
sb.Append(m.FileCount).AppendLine();
}
}
return sb.ToString();
}
/// <summary>Writes the two-section CSV (libraries + file-type breakdown) with UTF-8 BOM.</summary>
public async Task WriteAsync(IReadOnlyList<StorageNode> nodes, IReadOnlyList<FileTypeMetric> fileTypeMetrics, string filePath, CancellationToken ct)
{
var csv = BuildCsv(nodes, fileTypeMetrics);
await ExportFileWriter.WriteCsvAsync(filePath, csv, ct);
var sb = new StringBuilder(192 + nodes.Count * 100 + fileTypeMetrics.Count * 40);
WriteCsv(sb, nodes, fileTypeMetrics);
await ExportFileWriter.WriteCsvChunksAsync(filePath, sb, ct);
}
/// <summary>
@@ -139,11 +188,27 @@ public class StorageCsvExportService
// ── Helpers ───────────────────────────────────────────────────────────────
private static string FormatMb(long bytes)
=> (bytes / (1024.0 * 1024.0)).ToString("F2");
private static StringBuilder AppendMb(StringBuilder sb, long bytes)
=> sb.Append((bytes / (1024.0 * 1024.0)).ToString("F2", CultureInfo.InvariantCulture));
/// <summary>RFC 4180 CSV field quoting with formula-injection guard.</summary>
private static string Csv(string value) => CsvSanitizer.EscapeMinimal(value);
private static StringBuilder AppendCsvField(StringBuilder sb, string value)
=> sb.Append(CsvSanitizer.EscapeMinimal(value));
/// <summary>
/// Pre-resolves localized labels for every <see cref="StorageNodeKind"/>
/// once per export, indexed by the enum's int value. Avoids a
/// <c>ResourceManager.GetString</c> call per row in hot CSV loops.
/// </summary>
private static string[] BuildKindLabelCache()
{
var values = (StorageNodeKind[])Enum.GetValues(typeof(StorageNodeKind));
int max = 0;
foreach (var v in values) { int i = (int)v; if (i > max) max = i; }
var cache = new string[max + 1];
for (int i = 0; i < cache.Length; i++) cache[i] = ((StorageNodeKind)i).ToString();
foreach (var v in values) cache[(int)v] = KindLabel(v);
return cache;
}
private static string KindLabel(StorageNodeKind kind)
{
@@ -13,6 +13,8 @@ namespace SharepointToolbox.Services.Export;
public class StorageHtmlExportService
{
private int _togIdx;
private string[] _kindLabels = Array.Empty<string>();
private string[] _kindLabelsHtml = Array.Empty<string>();
/// <summary>
/// Builds a self-contained HTML report with one collapsible row per
@@ -21,10 +23,18 @@ public class StorageHtmlExportService
/// breakdown section is desired.
/// </summary>
public string BuildHtml(IReadOnlyList<StorageNode> nodes, ReportBranding? branding = null)
{
var sb = new StringBuilder(3072 + nodes.Count * 340);
BuildHtmlCore(sb, nodes, branding);
return sb.ToString();
}
private void BuildHtmlCore(StringBuilder sb, IReadOnlyList<StorageNode> nodes, ReportBranding? branding)
{
var T = TranslationSource.Instance;
_togIdx = 0;
var sb = new StringBuilder();
_kindLabels = BuildKindLabelCache();
_kindLabelsHtml = BuildHtmlEncodedCache(_kindLabels);
sb.AppendLine("<!DOCTYPE html>");
sb.AppendLine("<html lang=\"en\">");
@@ -60,11 +70,18 @@ public class StorageHtmlExportService
sb.Append(BrandingHtmlHelper.BuildBrandingHeader(branding));
sb.AppendLine($"<h1>{T["report.title.storage"]}</h1>");
// Summary cards
var rootNodes0 = nodes.Where(n => n.IndentLevel == 0).ToList();
long siteTotal0 = rootNodes0.Sum(n => n.TotalSizeBytes);
long versionTotal0 = rootNodes0.Sum(n => n.VersionSizeBytes);
long fileTotal0 = rootNodes0.Sum(n => n.TotalFileCount);
// Single-pass root aggregation: replaces 4 separate enumerations
// (.Where().ToList() + 3× .Sum() + a final .Where() during render).
var rootNodes0 = new List<StorageNode>(Math.Min(nodes.Count, 64));
long siteTotal0 = 0, versionTotal0 = 0, fileTotal0 = 0;
foreach (var n in nodes)
{
if (n.IndentLevel != 0) continue;
rootNodes0.Add(n);
siteTotal0 += n.TotalSizeBytes;
versionTotal0 += n.VersionSizeBytes;
fileTotal0 += n.TotalFileCount;
}
sb.AppendLine($"""
<div style="display:flex;gap:16px;margin:16px 0;flex-wrap:wrap">
@@ -90,7 +107,10 @@ public class StorageHtmlExportService
<tbody>
""");
foreach (var node in nodes)
// Render only the pre-materialized root list — recursing into
// Children handles descendants. Iterating the flat list would render
// every descendant a second time as a top-level row.
foreach (var node in rootNodes0)
{
RenderNode(sb, node);
}
@@ -102,18 +122,24 @@ public class StorageHtmlExportService
sb.AppendLine($"<p class=\"generated\">{T["report.text.generated_colon"]} {DateTime.Now:yyyy-MM-dd HH:mm}</p>");
sb.AppendLine("</body></html>");
return sb.ToString();
}
/// <summary>
/// Builds an HTML report including a file-type breakdown chart section.
/// </summary>
public string BuildHtml(IReadOnlyList<StorageNode> nodes, IReadOnlyList<FileTypeMetric> fileTypeMetrics, ReportBranding? branding = null)
{
var sb = new StringBuilder(4096 + nodes.Count * 340 + fileTypeMetrics.Count * 220);
BuildHtmlCore(sb, nodes, fileTypeMetrics, branding);
return sb.ToString();
}
private void BuildHtmlCore(StringBuilder sb, IReadOnlyList<StorageNode> nodes, IReadOnlyList<FileTypeMetric> fileTypeMetrics, ReportBranding? branding)
{
var T = TranslationSource.Instance;
_togIdx = 0;
var sb = new StringBuilder();
_kindLabels = BuildKindLabelCache();
_kindLabelsHtml = BuildHtmlEncodedCache(_kindLabels);
sb.AppendLine("<!DOCTYPE html>");
sb.AppendLine("<html lang=\"en\">");
@@ -160,11 +186,17 @@ public class StorageHtmlExportService
sb.Append(BrandingHtmlHelper.BuildBrandingHeader(branding));
sb.AppendLine($"<h1>{T["report.title.storage"]}</h1>");
// ── Summary cards ──
var rootNodes = nodes.Where(n => n.IndentLevel == 0).ToList();
long siteTotal = rootNodes.Sum(n => n.TotalSizeBytes);
long versionTotal = rootNodes.Sum(n => n.VersionSizeBytes);
long fileTotal = rootNodes.Sum(n => n.TotalFileCount);
// ── Summary cards (single-pass aggregation) ──
var rootNodes = new List<StorageNode>(Math.Min(nodes.Count, 64));
long siteTotal = 0, versionTotal = 0, fileTotal = 0;
foreach (var n in nodes)
{
if (n.IndentLevel != 0) continue;
rootNodes.Add(n);
siteTotal += n.TotalSizeBytes;
versionTotal += n.VersionSizeBytes;
fileTotal += n.TotalFileCount;
}
sb.AppendLine("<div class=\"stats\">");
sb.AppendLine($" <div class=\"stat-card\"><div class=\"value\">{FormatSize(siteTotal)}</div><div class=\"label\">{T["report.stat.total_size"]}</div></div>");
@@ -224,7 +256,10 @@ public class StorageHtmlExportService
<tbody>
""");
foreach (var node in nodes)
// Render only the pre-materialized root list — recursing into
// Children handles descendants. Iterating the flat list would render
// every descendant a second time as a top-level row.
foreach (var node in rootNodes)
{
RenderNode(sb, node);
}
@@ -236,22 +271,24 @@ public class StorageHtmlExportService
sb.AppendLine($"<p class=\"generated\">{T["report.text.generated_colon"]} {DateTime.Now:yyyy-MM-dd HH:mm}</p>");
sb.AppendLine("</body></html>");
return sb.ToString();
}
/// <summary>Writes the library-only HTML report to <paramref name="filePath"/>.</summary>
public async Task WriteAsync(IReadOnlyList<StorageNode> nodes, string filePath, CancellationToken ct, ReportBranding? branding = null)
{
var html = BuildHtml(nodes, branding);
await File.WriteAllTextAsync(filePath, html, Encoding.UTF8, ct);
// Build into StringBuilder, stream chunks straight to disk —
// skips a full-document char-array copy from sb.ToString().
var sb = new StringBuilder(3072 + nodes.Count * 340);
BuildHtmlCore(sb, nodes, branding);
await ExportFileWriter.WriteHtmlChunksAsync(filePath, sb, ct);
}
/// <summary>Writes the HTML report including the file-type breakdown chart.</summary>
public async Task WriteAsync(IReadOnlyList<StorageNode> nodes, IReadOnlyList<FileTypeMetric> fileTypeMetrics, string filePath, CancellationToken ct, ReportBranding? branding = null)
{
var html = BuildHtml(nodes, fileTypeMetrics, branding);
await File.WriteAllTextAsync(filePath, html, Encoding.UTF8, ct);
var sb = new StringBuilder(4096 + nodes.Count * 340 + fileTypeMetrics.Count * 220);
BuildHtmlCore(sb, nodes, fileTypeMetrics, branding);
await ExportFileWriter.WriteHtmlChunksAsync(filePath, sb, ct);
}
/// <summary>
@@ -307,21 +344,7 @@ public class StorageHtmlExportService
? $"<button class=\"toggle-btn\" onclick=\"toggle({myIdx})\">&#9654;</button>{HtmlEncode(node.Name)}"
: $"<span style=\"margin-left:{node.IndentLevel * 16}px\">{HtmlEncode(node.Name)}</span>";
string lastMod = node.LastModified.HasValue
? node.LastModified.Value.ToString("yyyy-MM-dd")
: string.Empty;
sb.AppendLine($"""
<tr>
<td>{nameCell}</td>
<td>{HtmlEncode(KindLabel(node.Kind))}</td>
<td>{HtmlEncode(node.SiteTitle)}</td>
<td class="num">{node.TotalFileCount:N0}</td>
<td class="num">{FormatSize(node.TotalSizeBytes)}</td>
<td class="num">{FormatSize(node.VersionSizeBytes)}</td>
<td>{lastMod}</td>
</tr>
""");
AppendRow(sb, node, nameCell);
if (hasChildren)
{
@@ -346,21 +369,7 @@ public class StorageHtmlExportService
? $"<span style=\"{indent}\"><button class=\"toggle-btn\" onclick=\"toggle({myIdx})\">&#9654;</button>{HtmlEncode(node.Name)}</span>"
: $"<span style=\"{indent}\">{HtmlEncode(node.Name)}</span>";
string lastMod = node.LastModified.HasValue
? node.LastModified.Value.ToString("yyyy-MM-dd")
: string.Empty;
sb.AppendLine($"""
<tr>
<td>{nameCell}</td>
<td>{HtmlEncode(KindLabel(node.Kind))}</td>
<td>{HtmlEncode(node.SiteTitle)}</td>
<td class="num">{node.TotalFileCount:N0}</td>
<td class="num">{FormatSize(node.TotalSizeBytes)}</td>
<td class="num">{FormatSize(node.VersionSizeBytes)}</td>
<td>{lastMod}</td>
</tr>
""");
AppendRow(sb, node, nameCell);
if (hasChildren)
{
@@ -375,6 +384,35 @@ public class StorageHtmlExportService
}
}
/// <summary>
/// Appends one data row given the pre-rendered name cell. Hot path:
/// pulls localized kind labels from <see cref="_kindLabelsHtml"/> instead
/// of going through <c>ResourceManager.GetString</c> + <c>HtmlEncode</c>
/// per row.
/// </summary>
private void AppendRow(StringBuilder sb, StorageNode node, string nameCell)
{
int kindIdx = (int)node.Kind;
string kindLabel = (uint)kindIdx < (uint)_kindLabelsHtml.Length
? _kindLabelsHtml[kindIdx]
: HtmlEncode(node.Kind.ToString());
string lastMod = node.LastModified.HasValue
? node.LastModified.Value.ToString("yyyy-MM-dd")
: string.Empty;
sb.AppendLine($"""
<tr>
<td>{nameCell}</td>
<td>{kindLabel}</td>
<td>{HtmlEncode(node.SiteTitle)}</td>
<td class="num">{node.TotalFileCount:N0}</td>
<td class="num">{FormatSize(node.TotalSizeBytes)}</td>
<td class="num">{FormatSize(node.VersionSizeBytes)}</td>
<td>{lastMod}</td>
</tr>
""");
}
private static string FormatSize(long bytes)
{
if (bytes >= 1_073_741_824L) return $"{bytes / 1_073_741_824.0:F2} GB";
@@ -400,4 +438,28 @@ public class StorageHtmlExportService
_ => kind.ToString()
};
}
/// <summary>
/// Pre-resolves localized labels for every <see cref="StorageNodeKind"/>
/// once per export. Cached array index lookup avoids
/// <c>ResourceManager.GetString</c> per row in hot rendering loops.
/// </summary>
private static string[] BuildKindLabelCache()
{
var values = (StorageNodeKind[])Enum.GetValues(typeof(StorageNodeKind));
int max = 0;
foreach (var v in values) { int i = (int)v; if (i > max) max = i; }
var cache = new string[max + 1];
for (int i = 0; i < cache.Length; i++) cache[i] = ((StorageNodeKind)i).ToString();
foreach (var v in values) cache[(int)v] = KindLabel(v);
return cache;
}
/// <summary>HTML-encodes each entry of <paramref name="raw"/> once.</summary>
private static string[] BuildHtmlEncodedCache(string[] raw)
{
var encoded = new string[raw.Length];
for (int i = 0; i < raw.Length; i++) encoded[i] = HtmlEncode(raw[i]);
return encoded;
}
}
+216 -117
View File
@@ -58,7 +58,11 @@ public class StorageService : IStorageService
var lists = web.Lists.ToList();
// ── Document libraries (incl. hidden + Preservation Hold) ───────────
// Track each library's RootFolder server-relative URL so bin items can
// be attributed back to their source library (matches storman.aspx,
// which folds bin contents into the owning library's Total Size).
var docLibs = lists.Where(l => l.BaseType == BaseType.DocumentLibrary).ToList();
var libsByRoot = new Dictionary<string, StorageNode>(StringComparer.OrdinalIgnoreCase);
int idx = 0;
foreach (var lib in docLibs)
{
@@ -84,7 +88,17 @@ public class StorageService : IStorageService
siteTitle, lib.Title, kind, progress, ct);
}
// CSOM Folder.StorageMetrics is unreliable across the board for
// larger libraries — sometimes returns the storman value, sometimes
// returns a fraction of it, sometimes zero. Subfolder StorageMetrics
// are equally inconsistent. The only CSOM path that matches storman
// is per-file File.Length + File.Versions[*].Size enumeration, so
// run it unconditionally, replacing the CSOM totals.
ResetNodeCounts(libNode);
await BackfillLibFromFilesAsync(ctx, lib, libNode, progress, ct);
result.Add(libNode);
libsByRoot[NormalizeServerRelative(lib.RootFolder.ServerRelativeUrl)] = libNode;
}
// ── List attachments (non-document-library lists) ───────────────────
@@ -114,7 +128,33 @@ public class StorageService : IStorageService
progress.Report(OperationProgress.Indeterminate(
$"Scanning recycle bin: {siteTitle}..."));
var rbNodes = await LoadRecycleBinNodesAsync(ctx, siteTitle, progress, ct);
var (rbNodes, perDir) = await LoadRecycleBinNodesAsync(ctx, web, siteTitle, progress, ct);
// Attribute bin items to owning library (longest-prefix match on DirName)
// so library Total Size matches storman.aspx, which counts an item's
// bytes against its source library even after deletion.
if (perDir.Count > 0 && libsByRoot.Count > 0)
{
var libRootsByLength = libsByRoot
.OrderByDescending(kv => kv.Key.Length)
.ToList();
foreach (var kv in perDir)
{
string dirNorm = NormalizeServerRelative(kv.Key);
foreach (var lib in libRootsByLength)
{
if (dirNorm.Equals(lib.Key, StringComparison.OrdinalIgnoreCase) ||
dirNorm.StartsWith(lib.Key + "/", StringComparison.OrdinalIgnoreCase))
{
lib.Value.TotalSizeBytes += kv.Value.Size;
lib.Value.TotalFileCount += kv.Value.Count;
break;
}
}
}
}
result.AddRange(rbNodes);
}
@@ -131,6 +171,9 @@ public class StorageService : IStorageService
await CollectForWebAsync(ctx, sub, options, subResult, progress, ct);
if (subResult.Count == 0) continue;
// Bin contents already rolled up into each library's TotalSizeBytes
// (storman behavior); summing root RecycleBin children too would
// double-count. Filter them out here.
var subRoot = new StorageNode
{
Name = sub.Title,
@@ -140,9 +183,9 @@ public class StorageService : IStorageService
Kind = StorageNodeKind.Subsite,
IndentLevel = 0,
Children = subResult,
TotalSizeBytes = subResult.Sum(n => n.TotalSizeBytes),
FileStreamSizeBytes = subResult.Sum(n => n.FileStreamSizeBytes),
TotalFileCount = subResult.Sum(n => n.TotalFileCount)
TotalSizeBytes = subResult.Where(n => n.Kind != StorageNodeKind.RecycleBin).Sum(n => n.TotalSizeBytes),
FileStreamSizeBytes = subResult.Where(n => n.Kind != StorageNodeKind.RecycleBin).Sum(n => n.FileStreamSizeBytes),
TotalFileCount = subResult.Where(n => n.Kind != StorageNodeKind.RecycleBin).Sum(n => n.TotalFileCount)
};
result.Add(subRoot);
}
@@ -210,23 +253,34 @@ public class StorageService : IStorageService
}
}
private static async Task<List<StorageNode>> LoadRecycleBinNodesAsync(
private static async Task<(List<StorageNode> Nodes, Dictionary<string, (long Size, int Count)> PerDir)> LoadRecycleBinNodesAsync(
ClientContext ctx,
Web web,
string siteTitle,
IProgress<OperationProgress> progress,
CancellationToken ct)
{
var nodes = new List<StorageNode>();
var perDir = new Dictionary<string, (long Size, int Count)>(StringComparer.OrdinalIgnoreCase);
try
{
var bin = ctx.Site.RecycleBin;
// Web-scoped: ctx.Site.RecycleBin would return the entire site-collection
// bin and inflate totals by (1 + N_subsites) when IncludeSubsites is on.
var bin = web.RecycleBin;
ctx.Load(bin, b => b.Include(
i => i.Size,
i => i.ItemState,
i => i.DeletedDate));
i => i.DeletedDate,
i => i.DirName));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
// RecycleBinItem.DirName is web-relative on SharePoint Online
// (e.g. "Documents/SubFolder" without leading slash or web URL).
// Prepend the web's ServerRelativeUrl so the result matches
// List.RootFolder.ServerRelativeUrl form used by libsByRoot.
string webSrl = NormalizeServerRelative(web.ServerRelativeUrl);
long stage1Size = 0, stage2Size = 0;
int stage1Count = 0, stage2Count = 0;
DateTime? stage1Last = null, stage2Last = null;
@@ -245,6 +299,20 @@ public class StorageService : IStorageService
stage1Count++;
if (stage1Last is null || item.DeletedDate > stage1Last) stage1Last = item.DeletedDate;
}
string raw = item.DirName ?? string.Empty;
string dirSrl;
if (raw.StartsWith('/'))
dirSrl = NormalizeServerRelative(raw);
else if (string.IsNullOrEmpty(raw))
dirSrl = webSrl;
else
dirSrl = NormalizeServerRelative(webSrl + "/" + raw);
if (perDir.TryGetValue(dirSrl, out var tally))
perDir[dirSrl] = (tally.Size + item.Size, tally.Count + 1);
else
perDir[dirSrl] = (item.Size, 1);
}
if (stage1Count > 0)
@@ -282,7 +350,21 @@ public class StorageService : IStorageService
// Insufficient permission to read recycle bin or feature unavailable.
}
return nodes;
return (nodes, perDir);
}
/// <summary>
/// Normalizes a server-relative path for consistent prefix matching:
/// trims trailing slash, ensures single leading slash. SharePoint
/// inconsistently returns DirName with or without leading slash across
/// API surfaces, so the caller cannot rely on a canonical form.
/// </summary>
private static string NormalizeServerRelative(string? path)
{
if (string.IsNullOrEmpty(path)) return string.Empty;
string trimmed = path.Trim().TrimEnd('/');
if (trimmed.Length == 0) return string.Empty;
return trimmed.StartsWith('/') ? trimmed : "/" + trimmed;
}
public async Task<IReadOnlyList<FileTypeMetric>> CollectFileTypeMetricsAsync(
@@ -314,6 +396,10 @@ public class StorageService : IStorageService
progress.Report(new OperationProgress(libIdx, libs.Count,
$"Scanning files by type: {lib.Title} ({libIdx}/{libs.Count})"));
// No <Where> clause: filtering on FSObjType (non-indexed) on a list
// beyond 5000 items breaches the list view threshold. Page lightly,
// then second-pass load File.Length + Versions[*].Size so per-type
// totals include version bytes (matches per-library totals).
var query = new CamlQuery
{
ViewXml = @"<View Scope='RecursiveAll'>
@@ -321,9 +407,8 @@ public class StorageService : IStorageService
<ViewFields>
<FieldRef Name='FSObjType' />
<FieldRef Name='FileLeafRef' />
<FieldRef Name='File_x0020_Size' />
</ViewFields>
<RowLimit Paged='TRUE'>5000</RowLimit>
<RowLimit Paged='TRUE'>500</RowLimit>
</View>"
};
@@ -335,21 +420,40 @@ public class StorageService : IStorageService
ctx.Load(items, ic => ic.ListItemCollectionPosition,
ic => ic.Include(
i => i["FSObjType"],
i => i["FileLeafRef"],
i => i["File_x0020_Size"]));
i => i["FileLeafRef"]));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
var fileRows = new List<(ListItem Item, string Name)>();
foreach (var item in items)
{
if (item["FSObjType"]?.ToString() != "0") continue;
string fileName = item["FileLeafRef"]?.ToString() ?? string.Empty;
string sizeStr = item["File_x0020_Size"]?.ToString() ?? "0";
fileRows.Add((item, fileName));
ctx.Load(item.File, f => f.Length);
ctx.Load(item.File.Versions, vc => vc.Include(v => v.Size));
}
if (!long.TryParse(sizeStr, out long fileSize))
fileSize = 0;
if (fileRows.Count > 0)
{
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
}
string ext = Path.GetExtension(fileName).ToLowerInvariant();
foreach (var row in fileRows)
{
long current;
try { current = row.Item.File.Length; }
catch { continue; }
long versions = 0;
try
{
foreach (var v in row.Item.File.Versions)
versions += v.Size;
}
catch { /* no version history */ }
long fileSize = current + versions;
string ext = Path.GetExtension(row.Name).ToLowerInvariant();
if (extensionMap.TryGetValue(ext, out var existing))
extensionMap[ext] = (existing.totalSize + fileSize, existing.count + 1);
@@ -368,54 +472,33 @@ public class StorageService : IStorageService
.ToList();
}
public async Task BackfillZeroNodesAsync(
/// <summary>
/// Per-library backfill executed inline by CollectForWebAsync when CSOM's
/// Folder.StorageMetrics returns zero counts. Enumerates every file via
/// CamlQuery and explicitly loads File.Length + File.Versions.Size so
/// version bytes are summed accurately — matches what storman.aspx reports.
/// </summary>
private static async Task BackfillLibFromFilesAsync(
ClientContext ctx,
IReadOnlyList<StorageNode> nodes,
List lib,
StorageNode libNode,
IProgress<OperationProgress> progress,
CancellationToken ct)
{
// Only backfill nodes scanned through CSOM document-library StorageMetrics —
// synthetic categories (recycle bin, list attachments, subsite headers)
// cannot be re-derived from File_x0020_Size.
var libNodes = nodes.Where(n => n.IndentLevel == 0 &&
(n.Kind == StorageNodeKind.Library ||
n.Kind == StorageNodeKind.HiddenLibrary ||
n.Kind == StorageNodeKind.PreservationHold)).ToList();
var needsBackfill = libNodes.Where(lib =>
lib.TotalFileCount == 0 || HasZeroChild(lib)).ToList();
if (needsBackfill.Count == 0) return;
progress.Report(OperationProgress.Indeterminate(
$"Counting files: {libNode.Name}..."));
ctx.Load(ctx.Web, w => w.ServerRelativeUrl,
w => w.Lists.Include(
l => l.Title, l => l.Hidden, l => l.BaseType,
l => l.RootFolder.ServerRelativeUrl));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
var libs = ctx.Web.Lists
.Where(l => l.BaseType == BaseType.DocumentLibrary)
.ToDictionary(l => l.Title, StringComparer.OrdinalIgnoreCase);
int idx = 0;
foreach (var libNode in needsBackfill)
{
ct.ThrowIfCancellationRequested();
idx++;
if (!libs.TryGetValue(libNode.Library, out var lib)) continue;
progress.Report(new OperationProgress(idx, needsBackfill.Count,
$"Counting files: {libNode.Name} ({idx}/{needsBackfill.Count})"));
string libRootSrl = lib.RootFolder.ServerRelativeUrl.TrimEnd('/');
string libRootSrl = NormalizeServerRelative(lib.RootFolder.ServerRelativeUrl);
var folderLookup = new Dictionary<string, StorageNode>(StringComparer.OrdinalIgnoreCase);
BuildFolderLookup(libNode, libRootSrl, folderLookup);
var originalTotals = new Dictionary<StorageNode, long>();
CaptureTotals(libNode, originalTotals);
ResetNodeCounts(libNode);
// No <Where> clause: filtering on FSObjType (non-indexed) on a list
// beyond the 5000-item view threshold throws "The attempted operation
// is prohibited because it exceeds the list view threshold". Paged
// retrieval without Where is unaffected by the threshold; we filter
// out folders client-side and skip File.Length access for them.
// Smaller page size because each row carries the full Versions collection.
var query = new CamlQuery
{
ViewXml = @"<View Scope='RecursiveAll'>
@@ -423,11 +506,8 @@ public class StorageService : IStorageService
<ViewFields>
<FieldRef Name='FSObjType' />
<FieldRef Name='FileDirRef' />
<FieldRef Name='File_x0020_Size' />
<FieldRef Name='SMTotalSize' />
<FieldRef Name='SMTotalFileStreamSize' />
</ViewFields>
<RowLimit Paged='TRUE'>5000</RowLimit>
<RowLimit Paged='TRUE'>500</RowLimit>
</View>"
};
@@ -439,50 +519,98 @@ public class StorageService : IStorageService
ctx.Load(items, ic => ic.ListItemCollectionPosition,
ic => ic.Include(
i => i["FSObjType"],
i => i["FileDirRef"],
i => i["File_x0020_Size"],
i => i["SMTotalSize"],
i => i["SMTotalFileStreamSize"]));
i => i["FileDirRef"]));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
// Second pass: queue File.Length + File.Versions[*].Size only for
// file rows. Including these in the page 1 query throws a
// ServerObjectNullReferenceException on folder rows (item.File is
// null for folders). Filtering FSObjType client-side here keeps
// per-page round-trips at two regardless of file count.
var fileRows = new List<(ListItem Item, string DirRef)>();
foreach (var item in items)
{
if (item["FSObjType"]?.ToString() != "0") continue;
long streamSize = ParseLong(item["File_x0020_Size"]);
long smStream = ParseLong(SafeGet(item, "SMTotalFileStreamSize"));
long smTotal = ParseLong(SafeGet(item, "SMTotalSize"));
if (smStream > 0) streamSize = smStream;
long totalSize = smTotal > 0 ? smTotal : streamSize;
string fileDirRef = item["FileDirRef"]?.ToString() ?? "";
libNode.TotalSizeBytes += totalSize;
libNode.FileStreamSizeBytes += streamSize;
libNode.TotalFileCount++;
var matchedFolder = FindDeepestFolder(fileDirRef, folderLookup);
if (matchedFolder != null && matchedFolder != libNode)
{
matchedFolder.TotalSizeBytes += totalSize;
matchedFolder.FileStreamSizeBytes += streamSize;
matchedFolder.TotalFileCount++;
var dirRef = item["FileDirRef"]?.ToString() ?? string.Empty;
fileRows.Add((item, dirRef));
ctx.Load(item.File, f => f.Length);
ctx.Load(item.File.Versions, vc => vc.Include(v => v.Size));
}
if (fileRows.Count > 0)
{
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
}
foreach (var row in fileRows)
{
long current;
try { current = row.Item.File.Length; }
catch { continue; }
long versions = 0;
try
{
foreach (var v in row.Item.File.Versions)
versions += v.Size;
}
catch
{
// Versioning disabled / no version history — leave at 0.
}
long totalSize = current + versions;
// Attribute each file to its deepest matching folder only.
// Parent rollup happens once after all pages are processed,
// adding direct + descendants — matches storman's per-folder
// total. Fall back to libNode for files at lib root or in
// folders excluded from the tree (Forms, _-prefixed system
// folders, depth-limited subfolders).
var target = FindDeepestFolder(row.DirRef, folderLookup) ?? libNode;
target.TotalSizeBytes += totalSize;
target.FileStreamSizeBytes += current;
target.TotalFileCount++;
}
query.ListItemCollectionPosition = items.ListItemCollectionPosition;
}
while (items.ListItemCollectionPosition != null);
foreach (var kv in originalTotals)
// Post-pass rollup: each folder's totals become own-direct + sum of
// descendants. libNode ends up as total of every file in the tree.
RollupFolderTotals(libNode);
}
/// <summary>
/// Recursively rolls up direct-file totals into ancestor folders so each
/// node's reported size includes everything beneath it. Pre-condition: each
/// node holds only its directly-attributed files (no descendant amounts).
/// </summary>
private static void RollupFolderTotals(StorageNode node)
{
if (kv.Value > kv.Key.TotalSizeBytes)
kv.Key.TotalSizeBytes = kv.Value;
}
foreach (var child in node.Children)
{
RollupFolderTotals(child);
node.TotalSizeBytes += child.TotalSizeBytes;
node.FileStreamSizeBytes += child.FileStreamSizeBytes;
node.TotalFileCount += child.TotalFileCount;
}
}
/// <summary>
/// No-op retained for interface compatibility. Backfill now runs inline
/// inside <see cref="CollectStorageAsync"/> via BackfillLibFromFilesAsync,
/// which has access to the CSOM library reference and runs before bin
/// distribution so the count==0 trigger is not polluted by bin items.
/// </summary>
public Task BackfillZeroNodesAsync(
ClientContext ctx,
IReadOnlyList<StorageNode> nodes,
IProgress<OperationProgress> progress,
CancellationToken ct)
=> Task.CompletedTask;
public async Task<long> GetSiteUsageStorageBytesAsync(
ClientContext ctx,
IProgress<OperationProgress> progress,
@@ -500,35 +628,6 @@ public class StorageService : IStorageService
}
}
private static long ParseLong(object? value)
{
if (value == null) return 0;
return long.TryParse(value.ToString(), out long n) ? n : 0;
}
private static object? SafeGet(ListItem item, string fieldName)
{
try { return item[fieldName]; }
catch { return null; }
}
private static void CaptureTotals(StorageNode node, Dictionary<StorageNode, long> map)
{
map[node] = node.TotalSizeBytes;
foreach (var child in node.Children)
CaptureTotals(child, map);
}
private static bool HasZeroChild(StorageNode node)
{
foreach (var child in node.Children)
{
if (child.TotalFileCount == 0) return true;
if (HasZeroChild(child)) return true;
}
return false;
}
private static void ResetNodeCounts(StorageNode node)
{
node.TotalSizeBytes = 0;
@@ -163,15 +163,27 @@ public partial class StorageViewModel : FeatureViewModelBase
}
// ── Summary properties (computed from root-level library nodes) ─────────
//
// Recycle-bin contents are rolled into each library's TotalSizeBytes by the
// StorageService (matches storman.aspx). Including the synthetic root-level
// RecycleBin nodes here would double-count those bytes — filter them out.
// SummaryRecycleBinSize below still reads from _allNodes so the bin metric
// remains visible to the user.
/// <summary>Sum of TotalSizeBytes across root-level library nodes.</summary>
public long SummaryTotalSize => Results.Where(n => n.IndentLevel == 0).Sum(n => n.TotalSizeBytes);
/// <summary>Sum of TotalSizeBytes across root-level non-bin nodes.</summary>
public long SummaryTotalSize => Results
.Where(n => n.IndentLevel == 0 && n.Kind != StorageNodeKind.RecycleBin)
.Sum(n => n.TotalSizeBytes);
/// <summary>Sum of VersionSizeBytes across root-level library nodes.</summary>
public long SummaryVersionSize => Results.Where(n => n.IndentLevel == 0).Sum(n => n.VersionSizeBytes);
/// <summary>Sum of VersionSizeBytes across root-level non-bin nodes.</summary>
public long SummaryVersionSize => Results
.Where(n => n.IndentLevel == 0 && n.Kind != StorageNodeKind.RecycleBin)
.Sum(n => n.VersionSizeBytes);
/// <summary>Sum of TotalFileCount across root-level library nodes.</summary>
public long SummaryFileCount => Results.Where(n => n.IndentLevel == 0).Sum(n => n.TotalFileCount);
/// <summary>Sum of TotalFileCount across root-level non-bin nodes.</summary>
public long SummaryFileCount => Results
.Where(n => n.IndentLevel == 0 && n.Kind != StorageNodeKind.RecycleBin)
.Sum(n => n.TotalFileCount);
/// <summary>
/// Aggregate recycle-bin size (stage 1 + stage 2 across all sites). Reads