using System.IO; using Microsoft.SharePoint.Client; using SharepointToolbox.Web.Core.Helpers; using SharepointToolbox.Web.Core.Models; using SpWeb = Microsoft.SharePoint.Client.Web; namespace SharepointToolbox.Web.Services; public class StorageService : IStorageService { private const int PreservationHoldTemplate = 851; public async Task> CollectStorageAsync( ClientContext ctx, StorageScanOptions options, IProgress progress, CancellationToken ct) { var result = new List(); await CollectForWebAsync(ctx, ctx.Web, options, result, progress, ct); return result; } private async Task CollectForWebAsync(ClientContext ctx, SpWeb web, StorageScanOptions options, List result, IProgress progress, CancellationToken ct) { ct.ThrowIfCancellationRequested(); ctx.Load(web, w => w.Title, w => w.Url, w => w.ServerRelativeUrl, w => w.Lists.Include(l => l.Title, l => l.Hidden, l => l.BaseType, l => l.BaseTemplate, l => l.ItemCount, l => l.RootFolder.ServerRelativeUrl)); if (options.IncludeSubsites) ctx.Load(web.Webs, ws => ws.Include(w => w.ServerRelativeUrl, w => w.Title)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); string siteTitle = web.Title; var lists = web.Lists.ToList(); var docLibs = lists.Where(l => l.BaseType == BaseType.DocumentLibrary).ToList(); var libsByRoot = new Dictionary(StringComparer.OrdinalIgnoreCase); int idx = 0; foreach (var lib in docLibs) { ct.ThrowIfCancellationRequested(); idx++; var kind = ClassifyLibrary(lib); if (kind == StorageNodeKind.HiddenLibrary && !options.IncludeHiddenLibraries) continue; if (kind == StorageNodeKind.PreservationHold && !options.IncludePreservationHold) continue; progress.Report(new OperationProgress(idx, docLibs.Count, $"Loading storage: {lib.Title} ({idx}/{docLibs.Count})")); var libNode = await LoadFolderNodeAsync(ctx, lib.RootFolder.ServerRelativeUrl, lib.Title, siteTitle, lib.Title, 0, kind, progress, ct); if (options.FolderDepth > 0) await CollectSubfoldersAsync(ctx, lib, lib.RootFolder.ServerRelativeUrl, libNode, 1, options.FolderDepth, siteTitle, lib.Title, kind, progress, ct); ResetNodeCounts(libNode); await BackfillLibFromFilesAsync(ctx, lib, libNode, progress, ct); result.Add(libNode); libsByRoot[NormalizeServerRelative(lib.RootFolder.ServerRelativeUrl)] = libNode; } if (options.IncludeListAttachments) { var nonDocLists = lists.Where(l => l.BaseType != BaseType.DocumentLibrary && !l.Hidden && l.ItemCount > 0).ToList(); int aIdx = 0; foreach (var list in nonDocLists) { ct.ThrowIfCancellationRequested(); aIdx++; progress.Report(new OperationProgress(aIdx, nonDocLists.Count, $"Scanning attachments: {list.Title}")); var attachNode = await TryLoadAttachmentsNodeAsync(ctx, list, siteTitle, progress, ct); if (attachNode != null && attachNode.TotalSizeBytes > 0) result.Add(attachNode); } } if (options.IncludeRecycleBin) { progress.Report(OperationProgress.Indeterminate($"Scanning recycle bin: {siteTitle}...")); var (rbNodes, perDir) = await LoadRecycleBinNodesAsync(ctx, web, siteTitle, progress, ct); if (perDir.Count > 0 && libsByRoot.Count > 0) { var libRootsByLength = libsByRoot.OrderByDescending(kv => kv.Key.Length).ToList(); foreach (var kv in perDir) { string dirNorm = NormalizeServerRelative(kv.Key); foreach (var lib in libRootsByLength) { if (dirNorm.Equals(lib.Key, StringComparison.OrdinalIgnoreCase) || dirNorm.StartsWith(lib.Key + "/", StringComparison.OrdinalIgnoreCase)) { lib.Value.TotalSizeBytes += kv.Value.Size; lib.Value.TotalFileCount += kv.Value.Count; break; } } } } result.AddRange(rbNodes); } if (options.IncludeSubsites) { foreach (var sub in web.Webs.ToList()) { ct.ThrowIfCancellationRequested(); var subResult = new List(); await CollectForWebAsync(ctx, sub, options, subResult, progress, ct); if (subResult.Count == 0) continue; result.Add(new StorageNode { Name = sub.Title, Url = ctx.Url.TrimEnd('/') + sub.ServerRelativeUrl, SiteTitle = sub.Title, Kind = StorageNodeKind.Subsite, IndentLevel = 0, Children = subResult, TotalSizeBytes = subResult.Where(n => n.Kind != StorageNodeKind.RecycleBin).Sum(n => n.TotalSizeBytes), FileStreamSizeBytes = subResult.Where(n => n.Kind != StorageNodeKind.RecycleBin).Sum(n => n.FileStreamSizeBytes), TotalFileCount = subResult.Where(n => n.Kind != StorageNodeKind.RecycleBin).Sum(n => n.TotalFileCount) }); } } } private static StorageNodeKind ClassifyLibrary(List lib) => lib.BaseTemplate == PreservationHoldTemplate || lib.Title.Equals("Preservation Hold Library", StringComparison.OrdinalIgnoreCase) ? StorageNodeKind.PreservationHold : lib.Hidden ? StorageNodeKind.HiddenLibrary : StorageNodeKind.Library; private static async Task TryLoadAttachmentsNodeAsync(ClientContext ctx, List list, string siteTitle, IProgress progress, CancellationToken ct) { string url = list.RootFolder.ServerRelativeUrl.TrimEnd('/') + "/Attachments"; try { var folder = ctx.Web.GetFolderByServerRelativeUrl(url); ctx.Load(folder, f => f.Exists, f => f.StorageMetrics, f => f.TimeLastModified, f => f.ServerRelativeUrl); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); if (!folder.Exists || folder.StorageMetrics.TotalFileCount == 0) return null; return new StorageNode { Name = $"[Attachments] {list.Title}", Url = ctx.Url.TrimEnd('/') + url, SiteTitle = siteTitle, Library = list.Title, Kind = StorageNodeKind.ListAttachments, TotalSizeBytes = folder.StorageMetrics.TotalSize, FileStreamSizeBytes = folder.StorageMetrics.TotalFileStreamSize, TotalFileCount = folder.StorageMetrics.TotalFileCount, LastModified = folder.StorageMetrics.LastModified > DateTime.MinValue ? folder.StorageMetrics.LastModified : (DateTime?)null }; } catch { return null; } } private static async Task<(List Nodes, Dictionary PerDir)> LoadRecycleBinNodesAsync(ClientContext ctx, SpWeb web, string siteTitle, IProgress progress, CancellationToken ct) { var nodes = new List(); var perDir = new Dictionary(StringComparer.OrdinalIgnoreCase); try { var bin = web.RecycleBin; ctx.Load(bin, b => b.Include(i => i.Size, i => i.ItemState, i => i.DeletedDate, i => i.DirName)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); string webSrl = NormalizeServerRelative(web.ServerRelativeUrl); long stage1Size = 0, stage2Size = 0; int stage1Count = 0, stage2Count = 0; DateTime? stage1Last = null, stage2Last = null; foreach (var item in bin) { if (item.ItemState == RecycleBinItemState.SecondStageRecycleBin) { stage2Size += item.Size; stage2Count++; if (stage2Last is null || item.DeletedDate > stage2Last) stage2Last = item.DeletedDate; } else { stage1Size += item.Size; stage1Count++; if (stage1Last is null || item.DeletedDate > stage1Last) stage1Last = item.DeletedDate; } string raw = item.DirName ?? string.Empty; string dirSrl = raw.StartsWith('/') ? NormalizeServerRelative(raw) : string.IsNullOrEmpty(raw) ? webSrl : NormalizeServerRelative(webSrl + "/" + raw); if (perDir.TryGetValue(dirSrl, out var tally)) perDir[dirSrl] = (tally.Size + item.Size, tally.Count + 1); else perDir[dirSrl] = (item.Size, 1); } if (stage1Count > 0) nodes.Add(new StorageNode { Name = "[Recycle Bin] First-stage", SiteTitle = siteTitle, Library = "RecycleBin", Kind = StorageNodeKind.RecycleBin, TotalSizeBytes = stage1Size, FileStreamSizeBytes = stage1Size, TotalFileCount = stage1Count, LastModified = stage1Last }); if (stage2Count > 0) nodes.Add(new StorageNode { Name = "[Recycle Bin] Second-stage", SiteTitle = siteTitle, Library = "RecycleBin", Kind = StorageNodeKind.RecycleBin, TotalSizeBytes = stage2Size, FileStreamSizeBytes = stage2Size, TotalFileCount = stage2Count, LastModified = stage2Last }); } catch { } return (nodes, perDir); } private static string NormalizeServerRelative(string? path) { if (string.IsNullOrEmpty(path)) return string.Empty; string t = path.Trim().TrimEnd('/'); if (t.Length == 0) return string.Empty; return t.StartsWith('/') ? t : "/" + t; } public async Task> CollectFileTypeMetricsAsync(ClientContext ctx, IProgress progress, CancellationToken ct) { ct.ThrowIfCancellationRequested(); ctx.Load(ctx.Web, w => w.Lists.Include(l => l.Title, l => l.Hidden, l => l.BaseType, l => l.ItemCount)); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); var libs = ctx.Web.Lists.Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary).ToList(); var extensionMap = new Dictionary(StringComparer.OrdinalIgnoreCase); int libIdx = 0; foreach (var lib in libs) { ct.ThrowIfCancellationRequested(); libIdx++; progress.Report(new OperationProgress(libIdx, libs.Count, $"Scanning files by type: {lib.Title}")); var query = new CamlQuery { ViewXml = "500" }; ListItemCollection items; do { ct.ThrowIfCancellationRequested(); items = lib.GetItems(query); ctx.Load(items, ic => ic.ListItemCollectionPosition, ic => ic.Include(i => i["FSObjType"], i => i["FileLeafRef"])); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); var fileRows = new List<(ListItem Item, string Name)>(); foreach (var item in items) { if (item["FSObjType"]?.ToString() != "0") continue; string fileName = item["FileLeafRef"]?.ToString() ?? string.Empty; fileRows.Add((item, fileName)); ctx.Load(item.File, f => f.Length); ctx.Load(item.File.Versions, vc => vc.Include(v => v.Size)); } if (fileRows.Count > 0) await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); foreach (var row in fileRows) { long current; try { current = row.Item.File.Length; } catch { continue; } long versions = 0; try { foreach (var v in row.Item.File.Versions) versions += v.Size; } catch { } string ext = Path.GetExtension(row.Name).ToLowerInvariant(); if (extensionMap.TryGetValue(ext, out var existing)) extensionMap[ext] = (existing.totalSize + current + versions, existing.count + 1); else extensionMap[ext] = (current + versions, 1); } query.ListItemCollectionPosition = items.ListItemCollectionPosition; } while (items.ListItemCollectionPosition != null); } return extensionMap.Select(kvp => new FileTypeMetric(kvp.Key, kvp.Value.totalSize, kvp.Value.count)).OrderByDescending(m => m.TotalSizeBytes).ToList(); } private static async Task BackfillLibFromFilesAsync(ClientContext ctx, List lib, StorageNode libNode, IProgress progress, CancellationToken ct) { progress.Report(OperationProgress.Indeterminate($"Counting files: {libNode.Name}...")); string libRootSrl = NormalizeServerRelative(lib.RootFolder.ServerRelativeUrl); var folderLookup = new Dictionary(StringComparer.OrdinalIgnoreCase); BuildFolderLookup(libNode, libRootSrl, folderLookup); var query = new CamlQuery { ViewXml = "500" }; ListItemCollection items; do { ct.ThrowIfCancellationRequested(); items = lib.GetItems(query); ctx.Load(items, ic => ic.ListItemCollectionPosition, ic => ic.Include(i => i["FSObjType"], i => i["FileDirRef"])); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); var fileRows = new List<(ListItem Item, string DirRef)>(); foreach (var item in items) { if (item["FSObjType"]?.ToString() != "0") continue; fileRows.Add((item, item["FileDirRef"]?.ToString() ?? string.Empty)); ctx.Load(item.File, f => f.Length); ctx.Load(item.File.Versions, vc => vc.Include(v => v.Size)); } if (fileRows.Count > 0) await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); foreach (var row in fileRows) { long current; try { current = row.Item.File.Length; } catch { continue; } long versions = 0; try { foreach (var v in row.Item.File.Versions) versions += v.Size; } catch { } var target = FindDeepestFolder(row.DirRef, folderLookup) ?? libNode; target.TotalSizeBytes += current + versions; target.FileStreamSizeBytes += current; target.TotalFileCount++; } query.ListItemCollectionPosition = items.ListItemCollectionPosition; } while (items.ListItemCollectionPosition != null); RollupFolderTotals(libNode); } private static void RollupFolderTotals(StorageNode node) { foreach (var child in node.Children) { RollupFolderTotals(child); node.TotalSizeBytes += child.TotalSizeBytes; node.FileStreamSizeBytes += child.FileStreamSizeBytes; node.TotalFileCount += child.TotalFileCount; } } public Task BackfillZeroNodesAsync(ClientContext ctx, IReadOnlyList nodes, IProgress progress, CancellationToken ct) => Task.CompletedTask; public async Task GetSiteUsageStorageBytesAsync(ClientContext ctx, IProgress progress, CancellationToken ct) { try { ctx.Load(ctx.Site, s => s.Usage); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); return ctx.Site.Usage.Storage; } catch { return 0L; } } private static void ResetNodeCounts(StorageNode node) { node.TotalSizeBytes = 0; node.FileStreamSizeBytes = 0; node.TotalFileCount = 0; foreach (var c in node.Children) ResetNodeCounts(c); } private static void BuildFolderLookup(StorageNode node, string parentPath, Dictionary lookup) { string nodePath = node.IndentLevel == 0 ? parentPath : parentPath + "/" + node.Name; lookup[nodePath] = node; foreach (var child in node.Children) BuildFolderLookup(child, nodePath, lookup); } private static StorageNode? FindDeepestFolder(string fileDirRef, Dictionary lookup) { string path = fileDirRef.TrimEnd('/'); while (!string.IsNullOrEmpty(path)) { if (lookup.TryGetValue(path, out var node)) return node; int last = path.LastIndexOf('/'); if (last <= 0) break; path = path[..last]; } return null; } private static async Task LoadFolderNodeAsync(ClientContext ctx, string serverRelativeUrl, string name, string siteTitle, string library, int indentLevel, StorageNodeKind kind, IProgress progress, CancellationToken ct) { ct.ThrowIfCancellationRequested(); var folder = ctx.Web.GetFolderByServerRelativeUrl(serverRelativeUrl); ctx.Load(folder, f => f.StorageMetrics, f => f.TimeLastModified, f => f.ServerRelativeUrl, f => f.Name); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); DateTime? lastMod = folder.StorageMetrics.LastModified > DateTime.MinValue ? folder.StorageMetrics.LastModified : folder.TimeLastModified > DateTime.MinValue ? folder.TimeLastModified : (DateTime?)null; return new StorageNode { Name = name, Url = ctx.Url.TrimEnd('/') + serverRelativeUrl, SiteTitle = siteTitle, Library = library, Kind = kind, TotalSizeBytes = folder.StorageMetrics.TotalSize, FileStreamSizeBytes = folder.StorageMetrics.TotalFileStreamSize, TotalFileCount = folder.StorageMetrics.TotalFileCount, LastModified = lastMod, IndentLevel = indentLevel, Children = new List() }; } private static async Task CollectSubfoldersAsync(ClientContext ctx, List list, string parentServerRelativeUrl, StorageNode parentNode, int currentDepth, int maxDepth, string siteTitle, string library, StorageNodeKind kind, IProgress progress, CancellationToken ct) { if (currentDepth > maxDepth) return; ct.ThrowIfCancellationRequested(); var subfolders = new List<(string Name, string ServerRelativeUrl)>(); await foreach (var item in SharePointPaginationHelper.GetItemsInFolderAsync(ctx, list, parentServerRelativeUrl, recursive: false, viewFields: new[] { "FSObjType", "FileLeafRef", "FileRef" }, ct: ct)) { if (item["FSObjType"]?.ToString() != "1") continue; string name = item["FileLeafRef"]?.ToString() ?? string.Empty; string url = item["FileRef"]?.ToString() ?? string.Empty; if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(url)) continue; if (name.Equals("Forms", StringComparison.OrdinalIgnoreCase) || name.StartsWith("_")) continue; subfolders.Add((name, url)); } foreach (var sub in subfolders) { ct.ThrowIfCancellationRequested(); var childNode = await LoadFolderNodeAsync(ctx, sub.ServerRelativeUrl, sub.Name, siteTitle, library, currentDepth, kind, progress, ct); if (currentDepth < maxDepth) await CollectSubfoldersAsync(ctx, list, sub.ServerRelativeUrl, childNode, currentDepth + 1, maxDepth, siteTitle, library, kind, progress, ct); parentNode.Children.Add(childNode); } } }