Files
Sharepoint-Toolbox/SharepointToolbox/Services/StorageService.cs
T
Dev 12dd1de9f2 chore: release v2.4
- Add theme system (Dark/Light palettes, ModernTheme, ThemeManager)
- Add InputDialog, Spinner common view
- Add DuplicatesCsvExportService
- Refresh views, dialogs, and view models across tabs
- Update localization strings (en/fr)
- Tweak services (transfer, permissions, search, user access, ownership elevation, bulk operations)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 10:50:03 +02:00

464 lines
18 KiB
C#

using System.IO;
using Microsoft.SharePoint.Client;
using SharepointToolbox.Core.Helpers;
using SharepointToolbox.Core.Models;
namespace SharepointToolbox.Services;
/// <summary>
/// CSOM-based storage metrics scanner.
/// Port of PowerShell Collect-FolderStorage / Get-PnPFolderStorageMetric pattern.
/// </summary>
public class StorageService : IStorageService
{
/// <summary>
/// Collects per-library and per-folder storage metrics for a single
/// SharePoint site. Depth and indentation are controlled via
/// <paramref name="options"/>; libraries flagged <c>Hidden</c> are skipped.
/// Traversal is breadth-first and leans on <see cref="SharePointPaginationHelper"/>
/// so libraries above the 5,000-item threshold remain scannable.
/// </summary>
public async Task<IReadOnlyList<StorageNode>> CollectStorageAsync(
ClientContext ctx,
StorageScanOptions options,
IProgress<OperationProgress> progress,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
// Load web-level metadata in one round-trip
ctx.Load(ctx.Web,
w => w.Title,
w => w.Url,
w => w.ServerRelativeUrl,
w => w.Lists.Include(
l => l.Title,
l => l.Hidden,
l => l.BaseType,
l => l.RootFolder.ServerRelativeUrl));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
string webSrl = ctx.Web.ServerRelativeUrl.TrimEnd('/');
string siteTitle = ctx.Web.Title;
var result = new List<StorageNode>();
var libs = ctx.Web.Lists
.Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary)
.ToList();
int idx = 0;
foreach (var lib in libs)
{
ct.ThrowIfCancellationRequested();
idx++;
progress.Report(new OperationProgress(idx, libs.Count,
$"Loading storage metrics: {lib.Title} ({idx}/{libs.Count})"));
var libNode = await LoadFolderNodeAsync(
ctx, lib.RootFolder.ServerRelativeUrl, lib.Title,
siteTitle, lib.Title, 0, progress, ct);
if (options.FolderDepth > 0)
{
await CollectSubfoldersAsync(
ctx, lib, lib.RootFolder.ServerRelativeUrl,
libNode, 1, options.FolderDepth,
siteTitle, lib.Title, progress, ct);
}
result.Add(libNode);
}
return result;
}
/// <summary>
/// Aggregates file counts and total sizes by extension across every
/// non-hidden document library on the site. Extensions are normalised to
/// lowercase; files without an extension roll up into a single bucket.
/// </summary>
public async Task<IReadOnlyList<FileTypeMetric>> CollectFileTypeMetricsAsync(
ClientContext ctx,
IProgress<OperationProgress> progress,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
// Load all non-hidden document libraries
ctx.Load(ctx.Web,
w => w.Lists.Include(
l => l.Title,
l => l.Hidden,
l => l.BaseType,
l => l.ItemCount));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
var libs = ctx.Web.Lists
.Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary)
.ToList();
// Accumulate file sizes by extension across all libraries
var extensionMap = new Dictionary<string, (long totalSize, int count)>(StringComparer.OrdinalIgnoreCase);
int libIdx = 0;
foreach (var lib in libs)
{
ct.ThrowIfCancellationRequested();
libIdx++;
progress.Report(new OperationProgress(libIdx, libs.Count,
$"Scanning files by type: {lib.Title} ({libIdx}/{libs.Count})"));
// Paginated CAML without a WHERE clause — WHERE on non-indexed fields
// (FSObjType) throws list-view threshold on libraries > 5,000 items.
// Filter files client-side via FSObjType.
var query = new CamlQuery
{
ViewXml = @"<View Scope='RecursiveAll'>
<Query></Query>
<ViewFields>
<FieldRef Name='FSObjType' />
<FieldRef Name='FileLeafRef' />
<FieldRef Name='File_x0020_Size' />
</ViewFields>
<RowLimit Paged='TRUE'>5000</RowLimit>
</View>"
};
ListItemCollection items;
do
{
ct.ThrowIfCancellationRequested();
items = lib.GetItems(query);
ctx.Load(items, ic => ic.ListItemCollectionPosition,
ic => ic.Include(
i => i["FSObjType"],
i => i["FileLeafRef"],
i => i["File_x0020_Size"]));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
foreach (var item in items)
{
if (item["FSObjType"]?.ToString() != "0") continue; // skip folders
string fileName = item["FileLeafRef"]?.ToString() ?? string.Empty;
string sizeStr = item["File_x0020_Size"]?.ToString() ?? "0";
if (!long.TryParse(sizeStr, out long fileSize))
fileSize = 0;
string ext = Path.GetExtension(fileName).ToLowerInvariant();
if (extensionMap.TryGetValue(ext, out var existing))
extensionMap[ext] = (existing.totalSize + fileSize, existing.count + 1);
else
extensionMap[ext] = (fileSize, 1);
}
query.ListItemCollectionPosition = items.ListItemCollectionPosition;
}
while (items.ListItemCollectionPosition != null);
}
// Convert to FileTypeMetric list, sorted by size descending
return extensionMap
.Select(kvp => new FileTypeMetric(kvp.Key, kvp.Value.totalSize, kvp.Value.count))
.OrderByDescending(m => m.TotalSizeBytes)
.ToList();
}
public async Task BackfillZeroNodesAsync(
ClientContext ctx,
IReadOnlyList<StorageNode> nodes,
IProgress<OperationProgress> progress,
CancellationToken ct)
{
// Find root-level library nodes that have any zero-valued nodes in their tree
var libNodes = nodes.Where(n => n.IndentLevel == 0).ToList();
var needsBackfill = libNodes.Where(lib =>
lib.TotalFileCount == 0 || HasZeroChild(lib)).ToList();
if (needsBackfill.Count == 0) return;
// Load libraries to get RootFolder.ServerRelativeUrl for path matching
ctx.Load(ctx.Web, w => w.ServerRelativeUrl,
w => w.Lists.Include(
l => l.Title, l => l.Hidden, l => l.BaseType,
l => l.RootFolder.ServerRelativeUrl));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
var libs = ctx.Web.Lists
.Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary)
.ToDictionary(l => l.Title, StringComparer.OrdinalIgnoreCase);
int idx = 0;
foreach (var libNode in needsBackfill)
{
ct.ThrowIfCancellationRequested();
idx++;
if (!libs.TryGetValue(libNode.Name, out var lib)) continue;
progress.Report(new OperationProgress(idx, needsBackfill.Count,
$"Counting files: {libNode.Name} ({idx}/{needsBackfill.Count})"));
string libRootSrl = lib.RootFolder.ServerRelativeUrl.TrimEnd('/');
// Build a lookup of all folder nodes in this library's tree (by server-relative path)
var folderLookup = new Dictionary<string, StorageNode>(StringComparer.OrdinalIgnoreCase);
BuildFolderLookup(libNode, libRootSrl, folderLookup);
// Capture original TotalSizeBytes before reset — StorageMetrics.TotalSize
// includes version overhead, which cannot be rederived from a file scan
// (File_x0020_Size is the current stream size only).
var originalTotals = new Dictionary<StorageNode, long>();
CaptureTotals(libNode, originalTotals);
// Reset all nodes in this tree to zero before accumulating
ResetNodeCounts(libNode);
// Paginated CAML without WHERE (filter folders client-side via FSObjType).
// SMTotalSize = per-file total including all versions (version-aware).
// SMTotalFileStreamSize = current stream only. File_x0020_Size is a fallback
// when SMTotalSize is unavailable (older tenants / custom fields stripped).
var query = new CamlQuery
{
ViewXml = @"<View Scope='RecursiveAll'>
<Query></Query>
<ViewFields>
<FieldRef Name='FSObjType' />
<FieldRef Name='FileDirRef' />
<FieldRef Name='File_x0020_Size' />
<FieldRef Name='SMTotalSize' />
<FieldRef Name='SMTotalFileStreamSize' />
</ViewFields>
<RowLimit Paged='TRUE'>5000</RowLimit>
</View>"
};
ListItemCollection items;
do
{
ct.ThrowIfCancellationRequested();
items = lib.GetItems(query);
ctx.Load(items, ic => ic.ListItemCollectionPosition,
ic => ic.Include(
i => i["FSObjType"],
i => i["FileDirRef"],
i => i["File_x0020_Size"],
i => i["SMTotalSize"],
i => i["SMTotalFileStreamSize"]));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
foreach (var item in items)
{
if (item["FSObjType"]?.ToString() != "0") continue; // skip folders
long streamSize = ParseLong(item["File_x0020_Size"]);
long smStream = ParseLong(SafeGet(item, "SMTotalFileStreamSize"));
long smTotal = ParseLong(SafeGet(item, "SMTotalSize"));
// Prefer SM fields when present; fall back to File_x0020_Size otherwise.
if (smStream > 0) streamSize = smStream;
long totalSize = smTotal > 0 ? smTotal : streamSize;
string fileDirRef = item["FileDirRef"]?.ToString() ?? "";
// Always count toward the library root
libNode.TotalSizeBytes += totalSize;
libNode.FileStreamSizeBytes += streamSize;
libNode.TotalFileCount++;
// Also count toward the most specific matching subfolder
var matchedFolder = FindDeepestFolder(fileDirRef, folderLookup);
if (matchedFolder != null && matchedFolder != libNode)
{
matchedFolder.TotalSizeBytes += totalSize;
matchedFolder.FileStreamSizeBytes += streamSize;
matchedFolder.TotalFileCount++;
}
}
query.ListItemCollectionPosition = items.ListItemCollectionPosition;
}
while (items.ListItemCollectionPosition != null);
// Restore original TotalSizeBytes where it exceeded the recomputed value.
// Preserves StorageMetrics.TotalSize for nodes whose original metrics were
// valid but SMTotalSize was missing on individual files.
foreach (var kv in originalTotals)
{
if (kv.Value > kv.Key.TotalSizeBytes)
kv.Key.TotalSizeBytes = kv.Value;
}
}
}
private static long ParseLong(object? value)
{
if (value == null) return 0;
return long.TryParse(value.ToString(), out long n) ? n : 0;
}
private static object? SafeGet(ListItem item, string fieldName)
{
try { return item[fieldName]; }
catch { return null; }
}
private static void CaptureTotals(StorageNode node, Dictionary<StorageNode, long> map)
{
map[node] = node.TotalSizeBytes;
foreach (var child in node.Children)
CaptureTotals(child, map);
}
private static bool HasZeroChild(StorageNode node)
{
foreach (var child in node.Children)
{
if (child.TotalFileCount == 0) return true;
if (HasZeroChild(child)) return true;
}
return false;
}
private static void ResetNodeCounts(StorageNode node)
{
node.TotalSizeBytes = 0;
node.FileStreamSizeBytes = 0;
node.TotalFileCount = 0;
foreach (var child in node.Children)
ResetNodeCounts(child);
}
private static void BuildFolderLookup(StorageNode node, string parentPath,
Dictionary<string, StorageNode> lookup)
{
string nodePath = node.IndentLevel == 0
? parentPath
: parentPath + "/" + node.Name;
lookup[nodePath] = node;
foreach (var child in node.Children)
BuildFolderLookup(child, nodePath, lookup);
}
private static StorageNode? FindDeepestFolder(string fileDirRef,
Dictionary<string, StorageNode> lookup)
{
// fileDirRef is the server-relative folder path, e.g. "/sites/hr/Shared Documents/Reports"
// Try exact match, then walk up until we find a match
string path = fileDirRef.TrimEnd('/');
while (!string.IsNullOrEmpty(path))
{
if (lookup.TryGetValue(path, out var node))
return node;
int lastSlash = path.LastIndexOf('/');
if (lastSlash <= 0) break;
path = path[..lastSlash];
}
return null;
}
// -- Private helpers -----------------------------------------------------
private static async Task<StorageNode> LoadFolderNodeAsync(
ClientContext ctx,
string serverRelativeUrl,
string name,
string siteTitle,
string library,
int indentLevel,
IProgress<OperationProgress> progress,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
Folder folder = ctx.Web.GetFolderByServerRelativeUrl(serverRelativeUrl);
ctx.Load(folder,
f => f.StorageMetrics,
f => f.TimeLastModified,
f => f.ServerRelativeUrl,
f => f.Name);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
DateTime? lastMod = folder.StorageMetrics.LastModified > DateTime.MinValue
? folder.StorageMetrics.LastModified
: folder.TimeLastModified > DateTime.MinValue
? folder.TimeLastModified
: (DateTime?)null;
return new StorageNode
{
Name = name,
Url = ctx.Url.TrimEnd('/') + serverRelativeUrl,
SiteTitle = siteTitle,
Library = library,
TotalSizeBytes = folder.StorageMetrics.TotalSize,
FileStreamSizeBytes = folder.StorageMetrics.TotalFileStreamSize,
TotalFileCount = folder.StorageMetrics.TotalFileCount,
LastModified = lastMod,
IndentLevel = indentLevel,
Children = new List<StorageNode>()
};
}
private static async Task CollectSubfoldersAsync(
ClientContext ctx,
List list,
string parentServerRelativeUrl,
StorageNode parentNode,
int currentDepth,
int maxDepth,
string siteTitle,
string library,
IProgress<OperationProgress> progress,
CancellationToken ct)
{
if (currentDepth > maxDepth) return;
ct.ThrowIfCancellationRequested();
// Enumerate direct child folders via paginated CAML scoped to the parent.
// Folder.Folders lazy loading hits the list-view threshold on libraries
// > 5,000 items; a paged CAML query with no WHERE bypasses it.
var subfolders = new List<(string Name, string ServerRelativeUrl)>();
await foreach (var item in SharePointPaginationHelper.GetItemsInFolderAsync(
ctx, list, parentServerRelativeUrl, recursive: false,
viewFields: new[] { "FSObjType", "FileLeafRef", "FileRef" },
ct: ct))
{
if (item["FSObjType"]?.ToString() != "1") continue; // folders only
string name = item["FileLeafRef"]?.ToString() ?? string.Empty;
string url = item["FileRef"]?.ToString() ?? string.Empty;
if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(url)) continue;
// Skip SharePoint system folders
if (name.Equals("Forms", StringComparison.OrdinalIgnoreCase) ||
name.StartsWith("_", StringComparison.Ordinal))
continue;
subfolders.Add((name, url));
}
foreach (var sub in subfolders)
{
ct.ThrowIfCancellationRequested();
var childNode = await LoadFolderNodeAsync(
ctx, sub.ServerRelativeUrl, sub.Name,
siteTitle, library, currentDepth, progress, ct);
if (currentDepth < maxDepth)
{
await CollectSubfoldersAsync(
ctx, list, sub.ServerRelativeUrl, childNode,
currentDepth + 1, maxDepth,
siteTitle, library, progress, ct);
}
parentNode.Children.Add(childNode);
}
}
}