- CamlQuery with RecursiveAll scope enumerates files across all non-hidden document libraries - Paginated 500-item batches avoid list view threshold issues - Files grouped by extension (case-insensitive) with summed size and count - Results returned as IReadOnlyList<FileTypeMetric> sorted by TotalSizeBytes descending - Existing CollectStorageAsync, LoadFolderNodeAsync, CollectSubfoldersAsync unchanged Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
251 lines
9.0 KiB
C#
251 lines
9.0 KiB
C#
using System.IO;
|
|
using Microsoft.SharePoint.Client;
|
|
using SharepointToolbox.Core.Helpers;
|
|
using SharepointToolbox.Core.Models;
|
|
|
|
namespace SharepointToolbox.Services;
|
|
|
|
/// <summary>
|
|
/// CSOM-based storage metrics scanner.
|
|
/// Port of PowerShell Collect-FolderStorage / Get-PnPFolderStorageMetric pattern.
|
|
/// </summary>
|
|
public class StorageService : IStorageService
|
|
{
|
|
public async Task<IReadOnlyList<StorageNode>> CollectStorageAsync(
|
|
ClientContext ctx,
|
|
StorageScanOptions options,
|
|
IProgress<OperationProgress> progress,
|
|
CancellationToken ct)
|
|
{
|
|
ct.ThrowIfCancellationRequested();
|
|
|
|
// Load web-level metadata in one round-trip
|
|
ctx.Load(ctx.Web,
|
|
w => w.Title,
|
|
w => w.Url,
|
|
w => w.ServerRelativeUrl,
|
|
w => w.Lists.Include(
|
|
l => l.Title,
|
|
l => l.Hidden,
|
|
l => l.BaseType,
|
|
l => l.RootFolder.ServerRelativeUrl));
|
|
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
|
|
|
|
string webSrl = ctx.Web.ServerRelativeUrl.TrimEnd('/');
|
|
string siteTitle = ctx.Web.Title;
|
|
|
|
var result = new List<StorageNode>();
|
|
var libs = ctx.Web.Lists
|
|
.Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary)
|
|
.ToList();
|
|
|
|
int idx = 0;
|
|
foreach (var lib in libs)
|
|
{
|
|
ct.ThrowIfCancellationRequested();
|
|
idx++;
|
|
progress.Report(new OperationProgress(idx, libs.Count,
|
|
$"Loading storage metrics: {lib.Title} ({idx}/{libs.Count})"));
|
|
|
|
var libNode = await LoadFolderNodeAsync(
|
|
ctx, lib.RootFolder.ServerRelativeUrl, lib.Title,
|
|
siteTitle, lib.Title, 0, progress, ct);
|
|
|
|
if (options.FolderDepth > 0)
|
|
{
|
|
await CollectSubfoldersAsync(
|
|
ctx, lib.RootFolder.ServerRelativeUrl,
|
|
libNode, 1, options.FolderDepth,
|
|
siteTitle, lib.Title, progress, ct);
|
|
}
|
|
|
|
result.Add(libNode);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
public async Task<IReadOnlyList<FileTypeMetric>> CollectFileTypeMetricsAsync(
|
|
ClientContext ctx,
|
|
IProgress<OperationProgress> progress,
|
|
CancellationToken ct)
|
|
{
|
|
ct.ThrowIfCancellationRequested();
|
|
|
|
// Load all non-hidden document libraries
|
|
ctx.Load(ctx.Web,
|
|
w => w.Lists.Include(
|
|
l => l.Title,
|
|
l => l.Hidden,
|
|
l => l.BaseType,
|
|
l => l.ItemCount));
|
|
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
|
|
|
|
var libs = ctx.Web.Lists
|
|
.Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary)
|
|
.ToList();
|
|
|
|
// Accumulate file sizes by extension across all libraries
|
|
var extensionMap = new Dictionary<string, (long totalSize, int count)>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
int libIdx = 0;
|
|
foreach (var lib in libs)
|
|
{
|
|
ct.ThrowIfCancellationRequested();
|
|
libIdx++;
|
|
progress.Report(new OperationProgress(libIdx, libs.Count,
|
|
$"Scanning files by type: {lib.Title} ({libIdx}/{libs.Count})"));
|
|
|
|
// Use CamlQuery to enumerate all files in the library
|
|
// Paginate with 500 items per batch to avoid list view threshold issues
|
|
var query = new CamlQuery
|
|
{
|
|
ViewXml = @"<View Scope='RecursiveAll'>
|
|
<Query>
|
|
<Where>
|
|
<Eq>
|
|
<FieldRef Name='FSObjType' />
|
|
<Value Type='Integer'>0</Value>
|
|
</Eq>
|
|
</Where>
|
|
</Query>
|
|
<ViewFields>
|
|
<FieldRef Name='FileLeafRef' />
|
|
<FieldRef Name='File_x0020_Size' />
|
|
</ViewFields>
|
|
<RowLimit Paged='TRUE'>500</RowLimit>
|
|
</View>"
|
|
};
|
|
|
|
ListItemCollection items;
|
|
do
|
|
{
|
|
ct.ThrowIfCancellationRequested();
|
|
items = lib.GetItems(query);
|
|
ctx.Load(items, ic => ic.ListItemCollectionPosition,
|
|
ic => ic.Include(
|
|
i => i["FileLeafRef"],
|
|
i => i["File_x0020_Size"]));
|
|
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
|
|
|
|
foreach (var item in items)
|
|
{
|
|
string fileName = item["FileLeafRef"]?.ToString() ?? string.Empty;
|
|
string sizeStr = item["File_x0020_Size"]?.ToString() ?? "0";
|
|
|
|
if (!long.TryParse(sizeStr, out long fileSize))
|
|
fileSize = 0;
|
|
|
|
string ext = Path.GetExtension(fileName).ToLowerInvariant();
|
|
// ext is "" for extensionless files, ".docx" etc. for others
|
|
|
|
if (extensionMap.TryGetValue(ext, out var existing))
|
|
extensionMap[ext] = (existing.totalSize + fileSize, existing.count + 1);
|
|
else
|
|
extensionMap[ext] = (fileSize, 1);
|
|
}
|
|
|
|
// Move to next page
|
|
query.ListItemCollectionPosition = items.ListItemCollectionPosition;
|
|
}
|
|
while (items.ListItemCollectionPosition != null);
|
|
}
|
|
|
|
// Convert to FileTypeMetric list, sorted by size descending
|
|
return extensionMap
|
|
.Select(kvp => new FileTypeMetric(kvp.Key, kvp.Value.totalSize, kvp.Value.count))
|
|
.OrderByDescending(m => m.TotalSizeBytes)
|
|
.ToList();
|
|
}
|
|
|
|
// -- Private helpers -----------------------------------------------------
|
|
|
|
private static async Task<StorageNode> LoadFolderNodeAsync(
|
|
ClientContext ctx,
|
|
string serverRelativeUrl,
|
|
string name,
|
|
string siteTitle,
|
|
string library,
|
|
int indentLevel,
|
|
IProgress<OperationProgress> progress,
|
|
CancellationToken ct)
|
|
{
|
|
ct.ThrowIfCancellationRequested();
|
|
|
|
Folder folder = ctx.Web.GetFolderByServerRelativeUrl(serverRelativeUrl);
|
|
ctx.Load(folder,
|
|
f => f.StorageMetrics,
|
|
f => f.TimeLastModified,
|
|
f => f.ServerRelativeUrl,
|
|
f => f.Name);
|
|
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
|
|
|
|
DateTime? lastMod = folder.StorageMetrics.LastModified > DateTime.MinValue
|
|
? folder.StorageMetrics.LastModified
|
|
: folder.TimeLastModified > DateTime.MinValue
|
|
? folder.TimeLastModified
|
|
: (DateTime?)null;
|
|
|
|
return new StorageNode
|
|
{
|
|
Name = name,
|
|
Url = ctx.Url.TrimEnd('/') + serverRelativeUrl,
|
|
SiteTitle = siteTitle,
|
|
Library = library,
|
|
TotalSizeBytes = folder.StorageMetrics.TotalSize,
|
|
FileStreamSizeBytes = folder.StorageMetrics.TotalFileStreamSize,
|
|
TotalFileCount = folder.StorageMetrics.TotalFileCount,
|
|
LastModified = lastMod,
|
|
IndentLevel = indentLevel,
|
|
Children = new List<StorageNode>()
|
|
};
|
|
}
|
|
|
|
private static async Task CollectSubfoldersAsync(
|
|
ClientContext ctx,
|
|
string parentServerRelativeUrl,
|
|
StorageNode parentNode,
|
|
int currentDepth,
|
|
int maxDepth,
|
|
string siteTitle,
|
|
string library,
|
|
IProgress<OperationProgress> progress,
|
|
CancellationToken ct)
|
|
{
|
|
if (currentDepth > maxDepth) return;
|
|
ct.ThrowIfCancellationRequested();
|
|
|
|
// Load direct child folders of this folder
|
|
Folder parentFolder = ctx.Web.GetFolderByServerRelativeUrl(parentServerRelativeUrl);
|
|
ctx.Load(parentFolder,
|
|
f => f.Folders.Include(
|
|
sf => sf.Name,
|
|
sf => sf.ServerRelativeUrl));
|
|
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
|
|
|
|
foreach (Folder subFolder in parentFolder.Folders)
|
|
{
|
|
ct.ThrowIfCancellationRequested();
|
|
|
|
// Skip SharePoint system folders
|
|
if (subFolder.Name.Equals("Forms", StringComparison.OrdinalIgnoreCase) ||
|
|
subFolder.Name.StartsWith("_", StringComparison.Ordinal))
|
|
continue;
|
|
|
|
var childNode = await LoadFolderNodeAsync(
|
|
ctx, subFolder.ServerRelativeUrl, subFolder.Name,
|
|
siteTitle, library, currentDepth, progress, ct);
|
|
|
|
if (currentDepth < maxDepth)
|
|
{
|
|
await CollectSubfoldersAsync(
|
|
ctx, subFolder.ServerRelativeUrl, childNode,
|
|
currentDepth + 1, maxDepth,
|
|
siteTitle, library, progress, ct);
|
|
}
|
|
|
|
parentNode.Children.Add(childNode);
|
|
}
|
|
}
|
|
}
|