Files
Sharepoint-Toolbox/SharepointToolbox/Services/StorageService.cs
Dev 81e3dcac6d feat(09-02): implement CollectFileTypeMetricsAsync in StorageService
- CamlQuery with RecursiveAll scope enumerates files across all non-hidden document libraries
- Paginated 500-item batches avoid list view threshold issues
- Files grouped by extension (case-insensitive) with summed size and count
- Results returned as IReadOnlyList<FileTypeMetric> sorted by TotalSizeBytes descending
- Existing CollectStorageAsync, LoadFolderNodeAsync, CollectSubfoldersAsync unchanged

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 15:24:09 +02:00

251 lines
9.0 KiB
C#

using System.IO;
using Microsoft.SharePoint.Client;
using SharepointToolbox.Core.Helpers;
using SharepointToolbox.Core.Models;
namespace SharepointToolbox.Services;
/// <summary>
/// CSOM-based storage metrics scanner.
/// Port of PowerShell Collect-FolderStorage / Get-PnPFolderStorageMetric pattern.
/// </summary>
public class StorageService : IStorageService
{
public async Task<IReadOnlyList<StorageNode>> CollectStorageAsync(
ClientContext ctx,
StorageScanOptions options,
IProgress<OperationProgress> progress,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
// Load web-level metadata in one round-trip
ctx.Load(ctx.Web,
w => w.Title,
w => w.Url,
w => w.ServerRelativeUrl,
w => w.Lists.Include(
l => l.Title,
l => l.Hidden,
l => l.BaseType,
l => l.RootFolder.ServerRelativeUrl));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
string webSrl = ctx.Web.ServerRelativeUrl.TrimEnd('/');
string siteTitle = ctx.Web.Title;
var result = new List<StorageNode>();
var libs = ctx.Web.Lists
.Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary)
.ToList();
int idx = 0;
foreach (var lib in libs)
{
ct.ThrowIfCancellationRequested();
idx++;
progress.Report(new OperationProgress(idx, libs.Count,
$"Loading storage metrics: {lib.Title} ({idx}/{libs.Count})"));
var libNode = await LoadFolderNodeAsync(
ctx, lib.RootFolder.ServerRelativeUrl, lib.Title,
siteTitle, lib.Title, 0, progress, ct);
if (options.FolderDepth > 0)
{
await CollectSubfoldersAsync(
ctx, lib.RootFolder.ServerRelativeUrl,
libNode, 1, options.FolderDepth,
siteTitle, lib.Title, progress, ct);
}
result.Add(libNode);
}
return result;
}
public async Task<IReadOnlyList<FileTypeMetric>> CollectFileTypeMetricsAsync(
ClientContext ctx,
IProgress<OperationProgress> progress,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
// Load all non-hidden document libraries
ctx.Load(ctx.Web,
w => w.Lists.Include(
l => l.Title,
l => l.Hidden,
l => l.BaseType,
l => l.ItemCount));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
var libs = ctx.Web.Lists
.Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary)
.ToList();
// Accumulate file sizes by extension across all libraries
var extensionMap = new Dictionary<string, (long totalSize, int count)>(StringComparer.OrdinalIgnoreCase);
int libIdx = 0;
foreach (var lib in libs)
{
ct.ThrowIfCancellationRequested();
libIdx++;
progress.Report(new OperationProgress(libIdx, libs.Count,
$"Scanning files by type: {lib.Title} ({libIdx}/{libs.Count})"));
// Use CamlQuery to enumerate all files in the library
// Paginate with 500 items per batch to avoid list view threshold issues
var query = new CamlQuery
{
ViewXml = @"<View Scope='RecursiveAll'>
<Query>
<Where>
<Eq>
<FieldRef Name='FSObjType' />
<Value Type='Integer'>0</Value>
</Eq>
</Where>
</Query>
<ViewFields>
<FieldRef Name='FileLeafRef' />
<FieldRef Name='File_x0020_Size' />
</ViewFields>
<RowLimit Paged='TRUE'>500</RowLimit>
</View>"
};
ListItemCollection items;
do
{
ct.ThrowIfCancellationRequested();
items = lib.GetItems(query);
ctx.Load(items, ic => ic.ListItemCollectionPosition,
ic => ic.Include(
i => i["FileLeafRef"],
i => i["File_x0020_Size"]));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
foreach (var item in items)
{
string fileName = item["FileLeafRef"]?.ToString() ?? string.Empty;
string sizeStr = item["File_x0020_Size"]?.ToString() ?? "0";
if (!long.TryParse(sizeStr, out long fileSize))
fileSize = 0;
string ext = Path.GetExtension(fileName).ToLowerInvariant();
// ext is "" for extensionless files, ".docx" etc. for others
if (extensionMap.TryGetValue(ext, out var existing))
extensionMap[ext] = (existing.totalSize + fileSize, existing.count + 1);
else
extensionMap[ext] = (fileSize, 1);
}
// Move to next page
query.ListItemCollectionPosition = items.ListItemCollectionPosition;
}
while (items.ListItemCollectionPosition != null);
}
// Convert to FileTypeMetric list, sorted by size descending
return extensionMap
.Select(kvp => new FileTypeMetric(kvp.Key, kvp.Value.totalSize, kvp.Value.count))
.OrderByDescending(m => m.TotalSizeBytes)
.ToList();
}
// -- Private helpers -----------------------------------------------------
private static async Task<StorageNode> LoadFolderNodeAsync(
ClientContext ctx,
string serverRelativeUrl,
string name,
string siteTitle,
string library,
int indentLevel,
IProgress<OperationProgress> progress,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
Folder folder = ctx.Web.GetFolderByServerRelativeUrl(serverRelativeUrl);
ctx.Load(folder,
f => f.StorageMetrics,
f => f.TimeLastModified,
f => f.ServerRelativeUrl,
f => f.Name);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
DateTime? lastMod = folder.StorageMetrics.LastModified > DateTime.MinValue
? folder.StorageMetrics.LastModified
: folder.TimeLastModified > DateTime.MinValue
? folder.TimeLastModified
: (DateTime?)null;
return new StorageNode
{
Name = name,
Url = ctx.Url.TrimEnd('/') + serverRelativeUrl,
SiteTitle = siteTitle,
Library = library,
TotalSizeBytes = folder.StorageMetrics.TotalSize,
FileStreamSizeBytes = folder.StorageMetrics.TotalFileStreamSize,
TotalFileCount = folder.StorageMetrics.TotalFileCount,
LastModified = lastMod,
IndentLevel = indentLevel,
Children = new List<StorageNode>()
};
}
private static async Task CollectSubfoldersAsync(
ClientContext ctx,
string parentServerRelativeUrl,
StorageNode parentNode,
int currentDepth,
int maxDepth,
string siteTitle,
string library,
IProgress<OperationProgress> progress,
CancellationToken ct)
{
if (currentDepth > maxDepth) return;
ct.ThrowIfCancellationRequested();
// Load direct child folders of this folder
Folder parentFolder = ctx.Web.GetFolderByServerRelativeUrl(parentServerRelativeUrl);
ctx.Load(parentFolder,
f => f.Folders.Include(
sf => sf.Name,
sf => sf.ServerRelativeUrl));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
foreach (Folder subFolder in parentFolder.Folders)
{
ct.ThrowIfCancellationRequested();
// Skip SharePoint system folders
if (subFolder.Name.Equals("Forms", StringComparison.OrdinalIgnoreCase) ||
subFolder.Name.StartsWith("_", StringComparison.Ordinal))
continue;
var childNode = await LoadFolderNodeAsync(
ctx, subFolder.ServerRelativeUrl, subFolder.Name,
siteTitle, library, currentDepth, progress, ct);
if (currentDepth < maxDepth)
{
await CollectSubfoldersAsync(
ctx, subFolder.ServerRelativeUrl, childNode,
currentDepth + 1, maxDepth,
siteTitle, library, progress, ct);
}
parentNode.Children.Add(childNode);
}
}
}