using System.IO;
using Microsoft.SharePoint.Client;
using SharepointToolbox.Core.Helpers;
using SharepointToolbox.Core.Models;
namespace SharepointToolbox.Services;
///
/// CSOM-based storage metrics scanner.
/// Port of PowerShell Collect-FolderStorage / Get-PnPFolderStorageMetric pattern.
///
public class StorageService : IStorageService
{
public async Task> CollectStorageAsync(
ClientContext ctx,
StorageScanOptions options,
IProgress progress,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
// Load web-level metadata in one round-trip
ctx.Load(ctx.Web,
w => w.Title,
w => w.Url,
w => w.ServerRelativeUrl,
w => w.Lists.Include(
l => l.Title,
l => l.Hidden,
l => l.BaseType,
l => l.RootFolder.ServerRelativeUrl));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
string webSrl = ctx.Web.ServerRelativeUrl.TrimEnd('/');
string siteTitle = ctx.Web.Title;
var result = new List();
var libs = ctx.Web.Lists
.Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary)
.ToList();
int idx = 0;
foreach (var lib in libs)
{
ct.ThrowIfCancellationRequested();
idx++;
progress.Report(new OperationProgress(idx, libs.Count,
$"Loading storage metrics: {lib.Title} ({idx}/{libs.Count})"));
var libNode = await LoadFolderNodeAsync(
ctx, lib.RootFolder.ServerRelativeUrl, lib.Title,
siteTitle, lib.Title, 0, progress, ct);
if (options.FolderDepth > 0)
{
await CollectSubfoldersAsync(
ctx, lib.RootFolder.ServerRelativeUrl,
libNode, 1, options.FolderDepth,
siteTitle, lib.Title, progress, ct);
}
result.Add(libNode);
}
return result;
}
public async Task> CollectFileTypeMetricsAsync(
ClientContext ctx,
IProgress progress,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
// Load all non-hidden document libraries
ctx.Load(ctx.Web,
w => w.Lists.Include(
l => l.Title,
l => l.Hidden,
l => l.BaseType,
l => l.ItemCount));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
var libs = ctx.Web.Lists
.Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary)
.ToList();
// Accumulate file sizes by extension across all libraries
var extensionMap = new Dictionary(StringComparer.OrdinalIgnoreCase);
int libIdx = 0;
foreach (var lib in libs)
{
ct.ThrowIfCancellationRequested();
libIdx++;
progress.Report(new OperationProgress(libIdx, libs.Count,
$"Scanning files by type: {lib.Title} ({libIdx}/{libs.Count})"));
// Use CamlQuery to enumerate all files in the library
// Paginate with 500 items per batch to avoid list view threshold issues
var query = new CamlQuery
{
ViewXml = @"
0
500
"
};
ListItemCollection items;
do
{
ct.ThrowIfCancellationRequested();
items = lib.GetItems(query);
ctx.Load(items, ic => ic.ListItemCollectionPosition,
ic => ic.Include(
i => i["FileLeafRef"],
i => i["File_x0020_Size"]));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
foreach (var item in items)
{
string fileName = item["FileLeafRef"]?.ToString() ?? string.Empty;
string sizeStr = item["File_x0020_Size"]?.ToString() ?? "0";
if (!long.TryParse(sizeStr, out long fileSize))
fileSize = 0;
string ext = Path.GetExtension(fileName).ToLowerInvariant();
// ext is "" for extensionless files, ".docx" etc. for others
if (extensionMap.TryGetValue(ext, out var existing))
extensionMap[ext] = (existing.totalSize + fileSize, existing.count + 1);
else
extensionMap[ext] = (fileSize, 1);
}
// Move to next page
query.ListItemCollectionPosition = items.ListItemCollectionPosition;
}
while (items.ListItemCollectionPosition != null);
}
// Convert to FileTypeMetric list, sorted by size descending
return extensionMap
.Select(kvp => new FileTypeMetric(kvp.Key, kvp.Value.totalSize, kvp.Value.count))
.OrderByDescending(m => m.TotalSizeBytes)
.ToList();
}
public async Task BackfillZeroNodesAsync(
ClientContext ctx,
IReadOnlyList nodes,
IProgress progress,
CancellationToken ct)
{
// Find root-level library nodes that have any zero-valued nodes in their tree
var libNodes = nodes.Where(n => n.IndentLevel == 0).ToList();
var needsBackfill = libNodes.Where(lib =>
lib.TotalFileCount == 0 || HasZeroChild(lib)).ToList();
if (needsBackfill.Count == 0) return;
// Load libraries to get RootFolder.ServerRelativeUrl for path matching
ctx.Load(ctx.Web, w => w.ServerRelativeUrl,
w => w.Lists.Include(
l => l.Title, l => l.Hidden, l => l.BaseType,
l => l.RootFolder.ServerRelativeUrl));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
var libs = ctx.Web.Lists
.Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary)
.ToDictionary(l => l.Title, StringComparer.OrdinalIgnoreCase);
int idx = 0;
foreach (var libNode in needsBackfill)
{
ct.ThrowIfCancellationRequested();
idx++;
if (!libs.TryGetValue(libNode.Name, out var lib)) continue;
progress.Report(new OperationProgress(idx, needsBackfill.Count,
$"Counting files: {libNode.Name} ({idx}/{needsBackfill.Count})"));
string libRootSrl = lib.RootFolder.ServerRelativeUrl.TrimEnd('/');
// Build a lookup of all folder nodes in this library's tree (by server-relative path)
var folderLookup = new Dictionary(StringComparer.OrdinalIgnoreCase);
BuildFolderLookup(libNode, libRootSrl, folderLookup);
// Reset all nodes in this tree to zero before accumulating
ResetNodeCounts(libNode);
// Enumerate all files with their folder path
var query = new CamlQuery
{
ViewXml = @"
0
500
"
};
ListItemCollection items;
do
{
ct.ThrowIfCancellationRequested();
items = lib.GetItems(query);
ctx.Load(items, ic => ic.ListItemCollectionPosition,
ic => ic.Include(
i => i["FileDirRef"],
i => i["File_x0020_Size"]));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
foreach (var item in items)
{
long size = 0;
if (long.TryParse(item["File_x0020_Size"]?.ToString() ?? "0", out long s))
size = s;
string fileDirRef = item["FileDirRef"]?.ToString() ?? "";
// Always count toward the library root
libNode.TotalSizeBytes += size;
libNode.FileStreamSizeBytes += size;
libNode.TotalFileCount++;
// Also count toward the most specific matching subfolder
var matchedFolder = FindDeepestFolder(fileDirRef, folderLookup);
if (matchedFolder != null && matchedFolder != libNode)
{
matchedFolder.TotalSizeBytes += size;
matchedFolder.FileStreamSizeBytes += size;
matchedFolder.TotalFileCount++;
}
}
query.ListItemCollectionPosition = items.ListItemCollectionPosition;
}
while (items.ListItemCollectionPosition != null);
}
}
private static bool HasZeroChild(StorageNode node)
{
foreach (var child in node.Children)
{
if (child.TotalFileCount == 0) return true;
if (HasZeroChild(child)) return true;
}
return false;
}
private static void ResetNodeCounts(StorageNode node)
{
node.TotalSizeBytes = 0;
node.FileStreamSizeBytes = 0;
node.TotalFileCount = 0;
foreach (var child in node.Children)
ResetNodeCounts(child);
}
private static void BuildFolderLookup(StorageNode node, string parentPath,
Dictionary lookup)
{
string nodePath = node.IndentLevel == 0
? parentPath
: parentPath + "/" + node.Name;
lookup[nodePath] = node;
foreach (var child in node.Children)
BuildFolderLookup(child, nodePath, lookup);
}
private static StorageNode? FindDeepestFolder(string fileDirRef,
Dictionary lookup)
{
// fileDirRef is the server-relative folder path, e.g. "/sites/hr/Shared Documents/Reports"
// Try exact match, then walk up until we find a match
string path = fileDirRef.TrimEnd('/');
while (!string.IsNullOrEmpty(path))
{
if (lookup.TryGetValue(path, out var node))
return node;
int lastSlash = path.LastIndexOf('/');
if (lastSlash <= 0) break;
path = path[..lastSlash];
}
return null;
}
// -- Private helpers -----------------------------------------------------
private static async Task LoadFolderNodeAsync(
ClientContext ctx,
string serverRelativeUrl,
string name,
string siteTitle,
string library,
int indentLevel,
IProgress progress,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
Folder folder = ctx.Web.GetFolderByServerRelativeUrl(serverRelativeUrl);
ctx.Load(folder,
f => f.StorageMetrics,
f => f.TimeLastModified,
f => f.ServerRelativeUrl,
f => f.Name);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
DateTime? lastMod = folder.StorageMetrics.LastModified > DateTime.MinValue
? folder.StorageMetrics.LastModified
: folder.TimeLastModified > DateTime.MinValue
? folder.TimeLastModified
: (DateTime?)null;
return new StorageNode
{
Name = name,
Url = ctx.Url.TrimEnd('/') + serverRelativeUrl,
SiteTitle = siteTitle,
Library = library,
TotalSizeBytes = folder.StorageMetrics.TotalSize,
FileStreamSizeBytes = folder.StorageMetrics.TotalFileStreamSize,
TotalFileCount = folder.StorageMetrics.TotalFileCount,
LastModified = lastMod,
IndentLevel = indentLevel,
Children = new List()
};
}
private static async Task CollectSubfoldersAsync(
ClientContext ctx,
string parentServerRelativeUrl,
StorageNode parentNode,
int currentDepth,
int maxDepth,
string siteTitle,
string library,
IProgress progress,
CancellationToken ct)
{
if (currentDepth > maxDepth) return;
ct.ThrowIfCancellationRequested();
// Load direct child folders of this folder
Folder parentFolder = ctx.Web.GetFolderByServerRelativeUrl(parentServerRelativeUrl);
ctx.Load(parentFolder,
f => f.Folders.Include(
sf => sf.Name,
sf => sf.ServerRelativeUrl));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
foreach (Folder subFolder in parentFolder.Folders)
{
ct.ThrowIfCancellationRequested();
// Skip SharePoint system folders
if (subFolder.Name.Equals("Forms", StringComparison.OrdinalIgnoreCase) ||
subFolder.Name.StartsWith("_", StringComparison.Ordinal))
continue;
var childNode = await LoadFolderNodeAsync(
ctx, subFolder.ServerRelativeUrl, subFolder.Name,
siteTitle, library, currentDepth, progress, ct);
if (currentDepth < maxDepth)
{
await CollectSubfoldersAsync(
ctx, subFolder.ServerRelativeUrl, childNode,
currentDepth + 1, maxDepth,
siteTitle, library, progress, ct);
}
parentNode.Children.Add(childNode);
}
}
}