using System.IO; using Microsoft.SharePoint.Client; using SharepointToolbox.Core.Helpers; using SharepointToolbox.Core.Models; namespace SharepointToolbox.Services; /// /// CSOM-based storage metrics scanner. /// Captures every storage source SharePoint reports for a site: /// document libraries (visible + hidden), the Preservation Hold Library, /// list attachments, the recycle bin (1st + 2nd stage), and optionally /// subsites. Each carries a /// so the caller can filter what appears in the report. /// public class StorageService : IStorageService { // PreservationHoldLibrary base template id. private const int PreservationHoldTemplate = 851; public async Task> CollectStorageAsync( ClientContext ctx, StorageScanOptions options, IProgress progress, CancellationToken ct) { var result = new List(); await CollectForWebAsync(ctx, ctx.Web, options, result, progress, ct); return result; } private async Task CollectForWebAsync( ClientContext ctx, Web web, StorageScanOptions options, List result, IProgress progress, CancellationToken ct) { ct.ThrowIfCancellationRequested(); ctx.Load(web, w => w.Title, w => w.Url, w => w.ServerRelativeUrl, w => w.Lists.Include( l => l.Title, l => l.Hidden, l => l.BaseType, l => l.BaseTemplate, l => l.ItemCount, l => l.RootFolder.ServerRelativeUrl)); if (options.IncludeSubsites) ctx.Load(web.Webs, ws => ws.Include(w => w.ServerRelativeUrl, w => w.Title)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); string siteTitle = web.Title; var lists = web.Lists.ToList(); // ── Document libraries (incl. hidden + Preservation Hold) ─────────── var docLibs = lists.Where(l => l.BaseType == BaseType.DocumentLibrary).ToList(); int idx = 0; foreach (var lib in docLibs) { ct.ThrowIfCancellationRequested(); idx++; StorageNodeKind kind = ClassifyLibrary(lib); if (kind == StorageNodeKind.HiddenLibrary && !options.IncludeHiddenLibraries) continue; if (kind == StorageNodeKind.PreservationHold && !options.IncludePreservationHold) continue; progress.Report(new OperationProgress(idx, docLibs.Count, $"Loading storage metrics: {lib.Title} ({idx}/{docLibs.Count})")); var libNode = await LoadFolderNodeAsync( ctx, lib.RootFolder.ServerRelativeUrl, lib.Title, siteTitle, lib.Title, 0, kind, progress, ct); if (options.FolderDepth > 0) { await CollectSubfoldersAsync( ctx, lib, lib.RootFolder.ServerRelativeUrl, libNode, 1, options.FolderDepth, siteTitle, lib.Title, kind, progress, ct); } result.Add(libNode); } // ── List attachments (non-document-library lists) ─────────────────── if (options.IncludeListAttachments) { var nonDocLists = lists .Where(l => l.BaseType != BaseType.DocumentLibrary && !l.Hidden && l.ItemCount > 0) .ToList(); int aIdx = 0; foreach (var list in nonDocLists) { ct.ThrowIfCancellationRequested(); aIdx++; progress.Report(new OperationProgress(aIdx, nonDocLists.Count, $"Scanning list attachments: {list.Title} ({aIdx}/{nonDocLists.Count})")); var attachNode = await TryLoadAttachmentsNodeAsync(ctx, list, siteTitle, progress, ct); if (attachNode != null && attachNode.TotalSizeBytes > 0) result.Add(attachNode); } } // ── Recycle bin (stage 1 + stage 2) ───────────────────────────────── if (options.IncludeRecycleBin) { progress.Report(OperationProgress.Indeterminate( $"Scanning recycle bin: {siteTitle}...")); var rbNodes = await LoadRecycleBinNodesAsync(ctx, siteTitle, progress, ct); result.AddRange(rbNodes); } // ── Subsites (recursive) ──────────────────────────────────────────── if (options.IncludeSubsites) { var subwebs = web.Webs.ToList(); foreach (var sub in subwebs) { ct.ThrowIfCancellationRequested(); // Build a node header so subsite results are visually grouped. var subResult = new List(); await CollectForWebAsync(ctx, sub, options, subResult, progress, ct); if (subResult.Count == 0) continue; var subRoot = new StorageNode { Name = sub.Title, Url = ctx.Url.TrimEnd('/') + sub.ServerRelativeUrl, SiteTitle = sub.Title, Library = string.Empty, Kind = StorageNodeKind.Subsite, IndentLevel = 0, Children = subResult, TotalSizeBytes = subResult.Sum(n => n.TotalSizeBytes), FileStreamSizeBytes = subResult.Sum(n => n.FileStreamSizeBytes), TotalFileCount = subResult.Sum(n => n.TotalFileCount) }; result.Add(subRoot); } } } private static StorageNodeKind ClassifyLibrary(List lib) { if (lib.BaseTemplate == PreservationHoldTemplate || string.Equals(lib.Title, "Preservation Hold Library", StringComparison.OrdinalIgnoreCase)) return StorageNodeKind.PreservationHold; return lib.Hidden ? StorageNodeKind.HiddenLibrary : StorageNodeKind.Library; } private static async Task TryLoadAttachmentsNodeAsync( ClientContext ctx, List list, string siteTitle, IProgress progress, CancellationToken ct) { // Per-list attachments live in /Attachments//. // The Attachments folder may or may not exist depending on whether any // item ever had an attachment — guard with try/catch. string attachmentsUrl = list.RootFolder.ServerRelativeUrl.TrimEnd('/') + "/Attachments"; try { var folder = ctx.Web.GetFolderByServerRelativeUrl(attachmentsUrl); ctx.Load(folder, f => f.Exists, f => f.StorageMetrics, f => f.TimeLastModified, f => f.ServerRelativeUrl); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); if (!folder.Exists || folder.StorageMetrics.TotalFileCount == 0) return null; DateTime? lastMod = folder.StorageMetrics.LastModified > DateTime.MinValue ? folder.StorageMetrics.LastModified : folder.TimeLastModified > DateTime.MinValue ? folder.TimeLastModified : (DateTime?)null; return new StorageNode { Name = $"[Attachments] {list.Title}", Url = ctx.Url.TrimEnd('/') + attachmentsUrl, SiteTitle = siteTitle, Library = list.Title, Kind = StorageNodeKind.ListAttachments, TotalSizeBytes = folder.StorageMetrics.TotalSize, FileStreamSizeBytes = folder.StorageMetrics.TotalFileStreamSize, TotalFileCount = folder.StorageMetrics.TotalFileCount, LastModified = lastMod, IndentLevel = 0, Children = new List() }; } catch { // Attachments folder absent for this list — not an error. return null; } } private static async Task> LoadRecycleBinNodesAsync( ClientContext ctx, string siteTitle, IProgress progress, CancellationToken ct) { var nodes = new List(); try { var bin = ctx.Site.RecycleBin; ctx.Load(bin, b => b.Include( i => i.Size, i => i.ItemState, i => i.DeletedDate)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); long stage1Size = 0, stage2Size = 0; int stage1Count = 0, stage2Count = 0; DateTime? stage1Last = null, stage2Last = null; foreach (var item in bin) { if (item.ItemState == RecycleBinItemState.SecondStageRecycleBin) { stage2Size += item.Size; stage2Count++; if (stage2Last is null || item.DeletedDate > stage2Last) stage2Last = item.DeletedDate; } else { stage1Size += item.Size; stage1Count++; if (stage1Last is null || item.DeletedDate > stage1Last) stage1Last = item.DeletedDate; } } if (stage1Count > 0) nodes.Add(new StorageNode { Name = "[Recycle Bin] First-stage", SiteTitle = siteTitle, Library = "RecycleBin", Kind = StorageNodeKind.RecycleBin, TotalSizeBytes = stage1Size, FileStreamSizeBytes = stage1Size, TotalFileCount = stage1Count, LastModified = stage1Last, IndentLevel = 0, Children = new List() }); if (stage2Count > 0) nodes.Add(new StorageNode { Name = "[Recycle Bin] Second-stage", SiteTitle = siteTitle, Library = "RecycleBin", Kind = StorageNodeKind.RecycleBin, TotalSizeBytes = stage2Size, FileStreamSizeBytes = stage2Size, TotalFileCount = stage2Count, LastModified = stage2Last, IndentLevel = 0, Children = new List() }); } catch { // Insufficient permission to read recycle bin or feature unavailable. } return nodes; } public async Task> CollectFileTypeMetricsAsync( ClientContext ctx, IProgress progress, CancellationToken ct) { ct.ThrowIfCancellationRequested(); ctx.Load(ctx.Web, w => w.Lists.Include( l => l.Title, l => l.Hidden, l => l.BaseType, l => l.ItemCount)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); var libs = ctx.Web.Lists .Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary) .ToList(); var extensionMap = new Dictionary(StringComparer.OrdinalIgnoreCase); int libIdx = 0; foreach (var lib in libs) { ct.ThrowIfCancellationRequested(); libIdx++; progress.Report(new OperationProgress(libIdx, libs.Count, $"Scanning files by type: {lib.Title} ({libIdx}/{libs.Count})")); var query = new CamlQuery { ViewXml = @" 5000 " }; ListItemCollection items; do { ct.ThrowIfCancellationRequested(); items = lib.GetItems(query); ctx.Load(items, ic => ic.ListItemCollectionPosition, ic => ic.Include( i => i["FSObjType"], i => i["FileLeafRef"], i => i["File_x0020_Size"])); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); foreach (var item in items) { if (item["FSObjType"]?.ToString() != "0") continue; string fileName = item["FileLeafRef"]?.ToString() ?? string.Empty; string sizeStr = item["File_x0020_Size"]?.ToString() ?? "0"; if (!long.TryParse(sizeStr, out long fileSize)) fileSize = 0; string ext = Path.GetExtension(fileName).ToLowerInvariant(); if (extensionMap.TryGetValue(ext, out var existing)) extensionMap[ext] = (existing.totalSize + fileSize, existing.count + 1); else extensionMap[ext] = (fileSize, 1); } query.ListItemCollectionPosition = items.ListItemCollectionPosition; } while (items.ListItemCollectionPosition != null); } return extensionMap .Select(kvp => new FileTypeMetric(kvp.Key, kvp.Value.totalSize, kvp.Value.count)) .OrderByDescending(m => m.TotalSizeBytes) .ToList(); } public async Task BackfillZeroNodesAsync( ClientContext ctx, IReadOnlyList nodes, IProgress progress, CancellationToken ct) { // Only backfill nodes scanned through CSOM document-library StorageMetrics — // synthetic categories (recycle bin, list attachments, subsite headers) // cannot be re-derived from File_x0020_Size. var libNodes = nodes.Where(n => n.IndentLevel == 0 && (n.Kind == StorageNodeKind.Library || n.Kind == StorageNodeKind.HiddenLibrary || n.Kind == StorageNodeKind.PreservationHold)).ToList(); var needsBackfill = libNodes.Where(lib => lib.TotalFileCount == 0 || HasZeroChild(lib)).ToList(); if (needsBackfill.Count == 0) return; ctx.Load(ctx.Web, w => w.ServerRelativeUrl, w => w.Lists.Include( l => l.Title, l => l.Hidden, l => l.BaseType, l => l.RootFolder.ServerRelativeUrl)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); var libs = ctx.Web.Lists .Where(l => l.BaseType == BaseType.DocumentLibrary) .ToDictionary(l => l.Title, StringComparer.OrdinalIgnoreCase); int idx = 0; foreach (var libNode in needsBackfill) { ct.ThrowIfCancellationRequested(); idx++; if (!libs.TryGetValue(libNode.Library, out var lib)) continue; progress.Report(new OperationProgress(idx, needsBackfill.Count, $"Counting files: {libNode.Name} ({idx}/{needsBackfill.Count})")); string libRootSrl = lib.RootFolder.ServerRelativeUrl.TrimEnd('/'); var folderLookup = new Dictionary(StringComparer.OrdinalIgnoreCase); BuildFolderLookup(libNode, libRootSrl, folderLookup); var originalTotals = new Dictionary(); CaptureTotals(libNode, originalTotals); ResetNodeCounts(libNode); var query = new CamlQuery { ViewXml = @" 5000 " }; ListItemCollection items; do { ct.ThrowIfCancellationRequested(); items = lib.GetItems(query); ctx.Load(items, ic => ic.ListItemCollectionPosition, ic => ic.Include( i => i["FSObjType"], i => i["FileDirRef"], i => i["File_x0020_Size"], i => i["SMTotalSize"], i => i["SMTotalFileStreamSize"])); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); foreach (var item in items) { if (item["FSObjType"]?.ToString() != "0") continue; long streamSize = ParseLong(item["File_x0020_Size"]); long smStream = ParseLong(SafeGet(item, "SMTotalFileStreamSize")); long smTotal = ParseLong(SafeGet(item, "SMTotalSize")); if (smStream > 0) streamSize = smStream; long totalSize = smTotal > 0 ? smTotal : streamSize; string fileDirRef = item["FileDirRef"]?.ToString() ?? ""; libNode.TotalSizeBytes += totalSize; libNode.FileStreamSizeBytes += streamSize; libNode.TotalFileCount++; var matchedFolder = FindDeepestFolder(fileDirRef, folderLookup); if (matchedFolder != null && matchedFolder != libNode) { matchedFolder.TotalSizeBytes += totalSize; matchedFolder.FileStreamSizeBytes += streamSize; matchedFolder.TotalFileCount++; } } query.ListItemCollectionPosition = items.ListItemCollectionPosition; } while (items.ListItemCollectionPosition != null); foreach (var kv in originalTotals) { if (kv.Value > kv.Key.TotalSizeBytes) kv.Key.TotalSizeBytes = kv.Value; } } } public async Task GetSiteUsageStorageBytesAsync( ClientContext ctx, IProgress progress, CancellationToken ct) { try { ctx.Load(ctx.Site, s => s.Usage); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); return ctx.Site.Usage.Storage; } catch { return 0L; } } private static long ParseLong(object? value) { if (value == null) return 0; return long.TryParse(value.ToString(), out long n) ? n : 0; } private static object? SafeGet(ListItem item, string fieldName) { try { return item[fieldName]; } catch { return null; } } private static void CaptureTotals(StorageNode node, Dictionary map) { map[node] = node.TotalSizeBytes; foreach (var child in node.Children) CaptureTotals(child, map); } private static bool HasZeroChild(StorageNode node) { foreach (var child in node.Children) { if (child.TotalFileCount == 0) return true; if (HasZeroChild(child)) return true; } return false; } private static void ResetNodeCounts(StorageNode node) { node.TotalSizeBytes = 0; node.FileStreamSizeBytes = 0; node.TotalFileCount = 0; foreach (var child in node.Children) ResetNodeCounts(child); } private static void BuildFolderLookup(StorageNode node, string parentPath, Dictionary lookup) { string nodePath = node.IndentLevel == 0 ? parentPath : parentPath + "/" + node.Name; lookup[nodePath] = node; foreach (var child in node.Children) BuildFolderLookup(child, nodePath, lookup); } private static StorageNode? FindDeepestFolder(string fileDirRef, Dictionary lookup) { string path = fileDirRef.TrimEnd('/'); while (!string.IsNullOrEmpty(path)) { if (lookup.TryGetValue(path, out var node)) return node; int lastSlash = path.LastIndexOf('/'); if (lastSlash <= 0) break; path = path[..lastSlash]; } return null; } // ── Library/folder loading helpers ────────────────────────────────────── private static async Task LoadFolderNodeAsync( ClientContext ctx, string serverRelativeUrl, string name, string siteTitle, string library, int indentLevel, StorageNodeKind kind, IProgress progress, CancellationToken ct) { ct.ThrowIfCancellationRequested(); Folder folder = ctx.Web.GetFolderByServerRelativeUrl(serverRelativeUrl); ctx.Load(folder, f => f.StorageMetrics, f => f.TimeLastModified, f => f.ServerRelativeUrl, f => f.Name); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); DateTime? lastMod = folder.StorageMetrics.LastModified > DateTime.MinValue ? folder.StorageMetrics.LastModified : folder.TimeLastModified > DateTime.MinValue ? folder.TimeLastModified : (DateTime?)null; return new StorageNode { Name = name, Url = ctx.Url.TrimEnd('/') + serverRelativeUrl, SiteTitle = siteTitle, Library = library, Kind = kind, TotalSizeBytes = folder.StorageMetrics.TotalSize, FileStreamSizeBytes = folder.StorageMetrics.TotalFileStreamSize, TotalFileCount = folder.StorageMetrics.TotalFileCount, LastModified = lastMod, IndentLevel = indentLevel, Children = new List() }; } private static async Task CollectSubfoldersAsync( ClientContext ctx, List list, string parentServerRelativeUrl, StorageNode parentNode, int currentDepth, int maxDepth, string siteTitle, string library, StorageNodeKind kind, IProgress progress, CancellationToken ct) { if (currentDepth > maxDepth) return; ct.ThrowIfCancellationRequested(); var subfolders = new List<(string Name, string ServerRelativeUrl)>(); await foreach (var item in SharePointPaginationHelper.GetItemsInFolderAsync( ctx, list, parentServerRelativeUrl, recursive: false, viewFields: new[] { "FSObjType", "FileLeafRef", "FileRef" }, ct: ct)) { if (item["FSObjType"]?.ToString() != "1") continue; string name = item["FileLeafRef"]?.ToString() ?? string.Empty; string url = item["FileRef"]?.ToString() ?? string.Empty; if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(url)) continue; if (name.Equals("Forms", StringComparison.OrdinalIgnoreCase) || name.StartsWith("_", StringComparison.Ordinal)) continue; subfolders.Add((name, url)); } foreach (var sub in subfolders) { ct.ThrowIfCancellationRequested(); var childNode = await LoadFolderNodeAsync( ctx, sub.ServerRelativeUrl, sub.Name, siteTitle, library, currentDepth, kind, progress, ct); if (currentDepth < maxDepth) { await CollectSubfoldersAsync( ctx, list, sub.ServerRelativeUrl, childNode, currentDepth + 1, maxDepth, siteTitle, library, kind, progress, ct); } parentNode.Children.Add(childNode); } } }