feat(09-02): implement CollectFileTypeMetricsAsync in StorageService

- CamlQuery with RecursiveAll scope enumerates files across all non-hidden document libraries
- Paginated 500-item batches avoid list view threshold issues
- Files grouped by extension (case-insensitive) with summed size and count
- Results returned as IReadOnlyList<FileTypeMetric> sorted by TotalSizeBytes descending
- Existing CollectStorageAsync, LoadFolderNodeAsync, CollectSubfoldersAsync unchanged

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dev
2026-04-07 15:24:09 +02:00
parent 18fe97f975
commit 81e3dcac6d

View File

@@ -1,3 +1,4 @@
using System.IO;
using Microsoft.SharePoint.Client; using Microsoft.SharePoint.Client;
using SharepointToolbox.Core.Helpers; using SharepointToolbox.Core.Helpers;
using SharepointToolbox.Core.Models; using SharepointToolbox.Core.Models;
@@ -64,6 +65,99 @@ public class StorageService : IStorageService
return result; return result;
} }
public async Task<IReadOnlyList<FileTypeMetric>> CollectFileTypeMetricsAsync(
ClientContext ctx,
IProgress<OperationProgress> progress,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
// Load all non-hidden document libraries
ctx.Load(ctx.Web,
w => w.Lists.Include(
l => l.Title,
l => l.Hidden,
l => l.BaseType,
l => l.ItemCount));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
var libs = ctx.Web.Lists
.Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary)
.ToList();
// Accumulate file sizes by extension across all libraries
var extensionMap = new Dictionary<string, (long totalSize, int count)>(StringComparer.OrdinalIgnoreCase);
int libIdx = 0;
foreach (var lib in libs)
{
ct.ThrowIfCancellationRequested();
libIdx++;
progress.Report(new OperationProgress(libIdx, libs.Count,
$"Scanning files by type: {lib.Title} ({libIdx}/{libs.Count})"));
// Use CamlQuery to enumerate all files in the library
// Paginate with 500 items per batch to avoid list view threshold issues
var query = new CamlQuery
{
ViewXml = @"<View Scope='RecursiveAll'>
<Query>
<Where>
<Eq>
<FieldRef Name='FSObjType' />
<Value Type='Integer'>0</Value>
</Eq>
</Where>
</Query>
<ViewFields>
<FieldRef Name='FileLeafRef' />
<FieldRef Name='File_x0020_Size' />
</ViewFields>
<RowLimit Paged='TRUE'>500</RowLimit>
</View>"
};
ListItemCollection items;
do
{
ct.ThrowIfCancellationRequested();
items = lib.GetItems(query);
ctx.Load(items, ic => ic.ListItemCollectionPosition,
ic => ic.Include(
i => i["FileLeafRef"],
i => i["File_x0020_Size"]));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
foreach (var item in items)
{
string fileName = item["FileLeafRef"]?.ToString() ?? string.Empty;
string sizeStr = item["File_x0020_Size"]?.ToString() ?? "0";
if (!long.TryParse(sizeStr, out long fileSize))
fileSize = 0;
string ext = Path.GetExtension(fileName).ToLowerInvariant();
// ext is "" for extensionless files, ".docx" etc. for others
if (extensionMap.TryGetValue(ext, out var existing))
extensionMap[ext] = (existing.totalSize + fileSize, existing.count + 1);
else
extensionMap[ext] = (fileSize, 1);
}
// Move to next page
query.ListItemCollectionPosition = items.ListItemCollectionPosition;
}
while (items.ListItemCollectionPosition != null);
}
// Convert to FileTypeMetric list, sorted by size descending
return extensionMap
.Select(kvp => new FileTypeMetric(kvp.Key, kvp.Value.totalSize, kvp.Value.count))
.OrderByDescending(m => m.TotalSizeBytes)
.ToList();
}
// -- Private helpers ----------------------------------------------------- // -- Private helpers -----------------------------------------------------
private static async Task<StorageNode> LoadFolderNodeAsync( private static async Task<StorageNode> LoadFolderNodeAsync(