using System.IO;
using Microsoft.SharePoint.Client;
using SharepointToolbox.Core.Helpers;
using SharepointToolbox.Core.Models;
namespace SharepointToolbox.Services;
///
/// CSOM-based storage metrics scanner.
/// Captures every storage source SharePoint reports for a site:
/// document libraries (visible + hidden), the Preservation Hold Library,
/// list attachments, the recycle bin (1st + 2nd stage), and optionally
/// subsites. Each carries a
/// so the caller can filter what appears in the report.
///
public class StorageService : IStorageService
{
// PreservationHoldLibrary base template id.
private const int PreservationHoldTemplate = 851;
public async Task> CollectStorageAsync(
ClientContext ctx,
StorageScanOptions options,
IProgress progress,
CancellationToken ct)
{
var result = new List();
await CollectForWebAsync(ctx, ctx.Web, options, result, progress, ct);
return result;
}
private async Task CollectForWebAsync(
ClientContext ctx,
Web web,
StorageScanOptions options,
List result,
IProgress progress,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
ctx.Load(web,
w => w.Title,
w => w.Url,
w => w.ServerRelativeUrl,
w => w.Lists.Include(
l => l.Title,
l => l.Hidden,
l => l.BaseType,
l => l.BaseTemplate,
l => l.ItemCount,
l => l.RootFolder.ServerRelativeUrl));
if (options.IncludeSubsites)
ctx.Load(web.Webs, ws => ws.Include(w => w.ServerRelativeUrl, w => w.Title));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
string siteTitle = web.Title;
var lists = web.Lists.ToList();
// ── Document libraries (incl. hidden + Preservation Hold) ───────────
var docLibs = lists.Where(l => l.BaseType == BaseType.DocumentLibrary).ToList();
int idx = 0;
foreach (var lib in docLibs)
{
ct.ThrowIfCancellationRequested();
idx++;
StorageNodeKind kind = ClassifyLibrary(lib);
if (kind == StorageNodeKind.HiddenLibrary && !options.IncludeHiddenLibraries) continue;
if (kind == StorageNodeKind.PreservationHold && !options.IncludePreservationHold) continue;
progress.Report(new OperationProgress(idx, docLibs.Count,
$"Loading storage metrics: {lib.Title} ({idx}/{docLibs.Count})"));
var libNode = await LoadFolderNodeAsync(
ctx, lib.RootFolder.ServerRelativeUrl, lib.Title,
siteTitle, lib.Title, 0, kind, progress, ct);
if (options.FolderDepth > 0)
{
await CollectSubfoldersAsync(
ctx, lib, lib.RootFolder.ServerRelativeUrl,
libNode, 1, options.FolderDepth,
siteTitle, lib.Title, kind, progress, ct);
}
result.Add(libNode);
}
// ── List attachments (non-document-library lists) ───────────────────
if (options.IncludeListAttachments)
{
var nonDocLists = lists
.Where(l => l.BaseType != BaseType.DocumentLibrary && !l.Hidden && l.ItemCount > 0)
.ToList();
int aIdx = 0;
foreach (var list in nonDocLists)
{
ct.ThrowIfCancellationRequested();
aIdx++;
progress.Report(new OperationProgress(aIdx, nonDocLists.Count,
$"Scanning list attachments: {list.Title} ({aIdx}/{nonDocLists.Count})"));
var attachNode = await TryLoadAttachmentsNodeAsync(ctx, list, siteTitle, progress, ct);
if (attachNode != null && attachNode.TotalSizeBytes > 0)
result.Add(attachNode);
}
}
// ── Recycle bin (stage 1 + stage 2) ─────────────────────────────────
if (options.IncludeRecycleBin)
{
progress.Report(OperationProgress.Indeterminate(
$"Scanning recycle bin: {siteTitle}..."));
var rbNodes = await LoadRecycleBinNodesAsync(ctx, siteTitle, progress, ct);
result.AddRange(rbNodes);
}
// ── Subsites (recursive) ────────────────────────────────────────────
if (options.IncludeSubsites)
{
var subwebs = web.Webs.ToList();
foreach (var sub in subwebs)
{
ct.ThrowIfCancellationRequested();
// Build a node header so subsite results are visually grouped.
var subResult = new List();
await CollectForWebAsync(ctx, sub, options, subResult, progress, ct);
if (subResult.Count == 0) continue;
var subRoot = new StorageNode
{
Name = sub.Title,
Url = ctx.Url.TrimEnd('/') + sub.ServerRelativeUrl,
SiteTitle = sub.Title,
Library = string.Empty,
Kind = StorageNodeKind.Subsite,
IndentLevel = 0,
Children = subResult,
TotalSizeBytes = subResult.Sum(n => n.TotalSizeBytes),
FileStreamSizeBytes = subResult.Sum(n => n.FileStreamSizeBytes),
TotalFileCount = subResult.Sum(n => n.TotalFileCount)
};
result.Add(subRoot);
}
}
}
private static StorageNodeKind ClassifyLibrary(List lib)
{
if (lib.BaseTemplate == PreservationHoldTemplate ||
string.Equals(lib.Title, "Preservation Hold Library", StringComparison.OrdinalIgnoreCase))
return StorageNodeKind.PreservationHold;
return lib.Hidden ? StorageNodeKind.HiddenLibrary : StorageNodeKind.Library;
}
private static async Task TryLoadAttachmentsNodeAsync(
ClientContext ctx,
List list,
string siteTitle,
IProgress progress,
CancellationToken ct)
{
// Per-list attachments live in /Attachments//.
// The Attachments folder may or may not exist depending on whether any
// item ever had an attachment — guard with try/catch.
string attachmentsUrl = list.RootFolder.ServerRelativeUrl.TrimEnd('/') + "/Attachments";
try
{
var folder = ctx.Web.GetFolderByServerRelativeUrl(attachmentsUrl);
ctx.Load(folder,
f => f.Exists,
f => f.StorageMetrics,
f => f.TimeLastModified,
f => f.ServerRelativeUrl);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
if (!folder.Exists || folder.StorageMetrics.TotalFileCount == 0)
return null;
DateTime? lastMod = folder.StorageMetrics.LastModified > DateTime.MinValue
? folder.StorageMetrics.LastModified
: folder.TimeLastModified > DateTime.MinValue
? folder.TimeLastModified
: (DateTime?)null;
return new StorageNode
{
Name = $"[Attachments] {list.Title}",
Url = ctx.Url.TrimEnd('/') + attachmentsUrl,
SiteTitle = siteTitle,
Library = list.Title,
Kind = StorageNodeKind.ListAttachments,
TotalSizeBytes = folder.StorageMetrics.TotalSize,
FileStreamSizeBytes = folder.StorageMetrics.TotalFileStreamSize,
TotalFileCount = folder.StorageMetrics.TotalFileCount,
LastModified = lastMod,
IndentLevel = 0,
Children = new List()
};
}
catch
{
// Attachments folder absent for this list — not an error.
return null;
}
}
private static async Task> LoadRecycleBinNodesAsync(
ClientContext ctx,
string siteTitle,
IProgress progress,
CancellationToken ct)
{
var nodes = new List();
try
{
var bin = ctx.Site.RecycleBin;
ctx.Load(bin, b => b.Include(
i => i.Size,
i => i.ItemState,
i => i.DeletedDate));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
long stage1Size = 0, stage2Size = 0;
int stage1Count = 0, stage2Count = 0;
DateTime? stage1Last = null, stage2Last = null;
foreach (var item in bin)
{
if (item.ItemState == RecycleBinItemState.SecondStageRecycleBin)
{
stage2Size += item.Size;
stage2Count++;
if (stage2Last is null || item.DeletedDate > stage2Last) stage2Last = item.DeletedDate;
}
else
{
stage1Size += item.Size;
stage1Count++;
if (stage1Last is null || item.DeletedDate > stage1Last) stage1Last = item.DeletedDate;
}
}
if (stage1Count > 0)
nodes.Add(new StorageNode
{
Name = "[Recycle Bin] First-stage",
SiteTitle = siteTitle,
Library = "RecycleBin",
Kind = StorageNodeKind.RecycleBin,
TotalSizeBytes = stage1Size,
FileStreamSizeBytes = stage1Size,
TotalFileCount = stage1Count,
LastModified = stage1Last,
IndentLevel = 0,
Children = new List()
});
if (stage2Count > 0)
nodes.Add(new StorageNode
{
Name = "[Recycle Bin] Second-stage",
SiteTitle = siteTitle,
Library = "RecycleBin",
Kind = StorageNodeKind.RecycleBin,
TotalSizeBytes = stage2Size,
FileStreamSizeBytes = stage2Size,
TotalFileCount = stage2Count,
LastModified = stage2Last,
IndentLevel = 0,
Children = new List()
});
}
catch
{
// Insufficient permission to read recycle bin or feature unavailable.
}
return nodes;
}
public async Task> CollectFileTypeMetricsAsync(
ClientContext ctx,
IProgress progress,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
ctx.Load(ctx.Web,
w => w.Lists.Include(
l => l.Title,
l => l.Hidden,
l => l.BaseType,
l => l.ItemCount));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
var libs = ctx.Web.Lists
.Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary)
.ToList();
var extensionMap = new Dictionary(StringComparer.OrdinalIgnoreCase);
int libIdx = 0;
foreach (var lib in libs)
{
ct.ThrowIfCancellationRequested();
libIdx++;
progress.Report(new OperationProgress(libIdx, libs.Count,
$"Scanning files by type: {lib.Title} ({libIdx}/{libs.Count})"));
var query = new CamlQuery
{
ViewXml = @"
5000
"
};
ListItemCollection items;
do
{
ct.ThrowIfCancellationRequested();
items = lib.GetItems(query);
ctx.Load(items, ic => ic.ListItemCollectionPosition,
ic => ic.Include(
i => i["FSObjType"],
i => i["FileLeafRef"],
i => i["File_x0020_Size"]));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
foreach (var item in items)
{
if (item["FSObjType"]?.ToString() != "0") continue;
string fileName = item["FileLeafRef"]?.ToString() ?? string.Empty;
string sizeStr = item["File_x0020_Size"]?.ToString() ?? "0";
if (!long.TryParse(sizeStr, out long fileSize))
fileSize = 0;
string ext = Path.GetExtension(fileName).ToLowerInvariant();
if (extensionMap.TryGetValue(ext, out var existing))
extensionMap[ext] = (existing.totalSize + fileSize, existing.count + 1);
else
extensionMap[ext] = (fileSize, 1);
}
query.ListItemCollectionPosition = items.ListItemCollectionPosition;
}
while (items.ListItemCollectionPosition != null);
}
return extensionMap
.Select(kvp => new FileTypeMetric(kvp.Key, kvp.Value.totalSize, kvp.Value.count))
.OrderByDescending(m => m.TotalSizeBytes)
.ToList();
}
public async Task BackfillZeroNodesAsync(
ClientContext ctx,
IReadOnlyList nodes,
IProgress progress,
CancellationToken ct)
{
// Only backfill nodes scanned through CSOM document-library StorageMetrics —
// synthetic categories (recycle bin, list attachments, subsite headers)
// cannot be re-derived from File_x0020_Size.
var libNodes = nodes.Where(n => n.IndentLevel == 0 &&
(n.Kind == StorageNodeKind.Library ||
n.Kind == StorageNodeKind.HiddenLibrary ||
n.Kind == StorageNodeKind.PreservationHold)).ToList();
var needsBackfill = libNodes.Where(lib =>
lib.TotalFileCount == 0 || HasZeroChild(lib)).ToList();
if (needsBackfill.Count == 0) return;
ctx.Load(ctx.Web, w => w.ServerRelativeUrl,
w => w.Lists.Include(
l => l.Title, l => l.Hidden, l => l.BaseType,
l => l.RootFolder.ServerRelativeUrl));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
var libs = ctx.Web.Lists
.Where(l => l.BaseType == BaseType.DocumentLibrary)
.ToDictionary(l => l.Title, StringComparer.OrdinalIgnoreCase);
int idx = 0;
foreach (var libNode in needsBackfill)
{
ct.ThrowIfCancellationRequested();
idx++;
if (!libs.TryGetValue(libNode.Library, out var lib)) continue;
progress.Report(new OperationProgress(idx, needsBackfill.Count,
$"Counting files: {libNode.Name} ({idx}/{needsBackfill.Count})"));
string libRootSrl = lib.RootFolder.ServerRelativeUrl.TrimEnd('/');
var folderLookup = new Dictionary(StringComparer.OrdinalIgnoreCase);
BuildFolderLookup(libNode, libRootSrl, folderLookup);
var originalTotals = new Dictionary();
CaptureTotals(libNode, originalTotals);
ResetNodeCounts(libNode);
var query = new CamlQuery
{
ViewXml = @"
5000
"
};
ListItemCollection items;
do
{
ct.ThrowIfCancellationRequested();
items = lib.GetItems(query);
ctx.Load(items, ic => ic.ListItemCollectionPosition,
ic => ic.Include(
i => i["FSObjType"],
i => i["FileDirRef"],
i => i["File_x0020_Size"],
i => i["SMTotalSize"],
i => i["SMTotalFileStreamSize"]));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
foreach (var item in items)
{
if (item["FSObjType"]?.ToString() != "0") continue;
long streamSize = ParseLong(item["File_x0020_Size"]);
long smStream = ParseLong(SafeGet(item, "SMTotalFileStreamSize"));
long smTotal = ParseLong(SafeGet(item, "SMTotalSize"));
if (smStream > 0) streamSize = smStream;
long totalSize = smTotal > 0 ? smTotal : streamSize;
string fileDirRef = item["FileDirRef"]?.ToString() ?? "";
libNode.TotalSizeBytes += totalSize;
libNode.FileStreamSizeBytes += streamSize;
libNode.TotalFileCount++;
var matchedFolder = FindDeepestFolder(fileDirRef, folderLookup);
if (matchedFolder != null && matchedFolder != libNode)
{
matchedFolder.TotalSizeBytes += totalSize;
matchedFolder.FileStreamSizeBytes += streamSize;
matchedFolder.TotalFileCount++;
}
}
query.ListItemCollectionPosition = items.ListItemCollectionPosition;
}
while (items.ListItemCollectionPosition != null);
foreach (var kv in originalTotals)
{
if (kv.Value > kv.Key.TotalSizeBytes)
kv.Key.TotalSizeBytes = kv.Value;
}
}
}
public async Task GetSiteUsageStorageBytesAsync(
ClientContext ctx,
IProgress progress,
CancellationToken ct)
{
try
{
ctx.Load(ctx.Site, s => s.Usage);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
return ctx.Site.Usage.Storage;
}
catch
{
return 0L;
}
}
private static long ParseLong(object? value)
{
if (value == null) return 0;
return long.TryParse(value.ToString(), out long n) ? n : 0;
}
private static object? SafeGet(ListItem item, string fieldName)
{
try { return item[fieldName]; }
catch { return null; }
}
private static void CaptureTotals(StorageNode node, Dictionary map)
{
map[node] = node.TotalSizeBytes;
foreach (var child in node.Children)
CaptureTotals(child, map);
}
private static bool HasZeroChild(StorageNode node)
{
foreach (var child in node.Children)
{
if (child.TotalFileCount == 0) return true;
if (HasZeroChild(child)) return true;
}
return false;
}
private static void ResetNodeCounts(StorageNode node)
{
node.TotalSizeBytes = 0;
node.FileStreamSizeBytes = 0;
node.TotalFileCount = 0;
foreach (var child in node.Children)
ResetNodeCounts(child);
}
private static void BuildFolderLookup(StorageNode node, string parentPath,
Dictionary lookup)
{
string nodePath = node.IndentLevel == 0
? parentPath
: parentPath + "/" + node.Name;
lookup[nodePath] = node;
foreach (var child in node.Children)
BuildFolderLookup(child, nodePath, lookup);
}
private static StorageNode? FindDeepestFolder(string fileDirRef,
Dictionary lookup)
{
string path = fileDirRef.TrimEnd('/');
while (!string.IsNullOrEmpty(path))
{
if (lookup.TryGetValue(path, out var node))
return node;
int lastSlash = path.LastIndexOf('/');
if (lastSlash <= 0) break;
path = path[..lastSlash];
}
return null;
}
// ── Library/folder loading helpers ──────────────────────────────────────
private static async Task LoadFolderNodeAsync(
ClientContext ctx,
string serverRelativeUrl,
string name,
string siteTitle,
string library,
int indentLevel,
StorageNodeKind kind,
IProgress progress,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
Folder folder = ctx.Web.GetFolderByServerRelativeUrl(serverRelativeUrl);
ctx.Load(folder,
f => f.StorageMetrics,
f => f.TimeLastModified,
f => f.ServerRelativeUrl,
f => f.Name);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
DateTime? lastMod = folder.StorageMetrics.LastModified > DateTime.MinValue
? folder.StorageMetrics.LastModified
: folder.TimeLastModified > DateTime.MinValue
? folder.TimeLastModified
: (DateTime?)null;
return new StorageNode
{
Name = name,
Url = ctx.Url.TrimEnd('/') + serverRelativeUrl,
SiteTitle = siteTitle,
Library = library,
Kind = kind,
TotalSizeBytes = folder.StorageMetrics.TotalSize,
FileStreamSizeBytes = folder.StorageMetrics.TotalFileStreamSize,
TotalFileCount = folder.StorageMetrics.TotalFileCount,
LastModified = lastMod,
IndentLevel = indentLevel,
Children = new List()
};
}
private static async Task CollectSubfoldersAsync(
ClientContext ctx,
List list,
string parentServerRelativeUrl,
StorageNode parentNode,
int currentDepth,
int maxDepth,
string siteTitle,
string library,
StorageNodeKind kind,
IProgress progress,
CancellationToken ct)
{
if (currentDepth > maxDepth) return;
ct.ThrowIfCancellationRequested();
var subfolders = new List<(string Name, string ServerRelativeUrl)>();
await foreach (var item in SharePointPaginationHelper.GetItemsInFolderAsync(
ctx, list, parentServerRelativeUrl, recursive: false,
viewFields: new[] { "FSObjType", "FileLeafRef", "FileRef" },
ct: ct))
{
if (item["FSObjType"]?.ToString() != "1") continue;
string name = item["FileLeafRef"]?.ToString() ?? string.Empty;
string url = item["FileRef"]?.ToString() ?? string.Empty;
if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(url)) continue;
if (name.Equals("Forms", StringComparison.OrdinalIgnoreCase) ||
name.StartsWith("_", StringComparison.Ordinal))
continue;
subfolders.Add((name, url));
}
foreach (var sub in subfolders)
{
ct.ThrowIfCancellationRequested();
var childNode = await LoadFolderNodeAsync(
ctx, sub.ServerRelativeUrl, sub.Name,
siteTitle, library, currentDepth, kind, progress, ct);
if (currentDepth < maxDepth)
{
await CollectSubfoldersAsync(
ctx, list, sub.ServerRelativeUrl, childNode,
currentDepth + 1, maxDepth,
siteTitle, library, kind, progress, ct);
}
parentNode.Children.Add(childNode);
}
}
}