Files
SharepointToolbox-Web/Services/StorageService.cs
T
2026-06-02 10:56:03 +02:00

321 lines
19 KiB
C#

using System.IO;
using Microsoft.SharePoint.Client;
using SharepointToolbox.Web.Core.Helpers;
using SharepointToolbox.Web.Core.Models;
using SpWeb = Microsoft.SharePoint.Client.Web;
namespace SharepointToolbox.Web.Services;
public class StorageService : IStorageService
{
private const int PreservationHoldTemplate = 851;
public async Task<IReadOnlyList<StorageNode>> CollectStorageAsync(
ClientContext ctx, StorageScanOptions options,
IProgress<OperationProgress> progress, CancellationToken ct)
{
var result = new List<StorageNode>();
await CollectForWebAsync(ctx, ctx.Web, options, result, progress, ct);
return result;
}
private async Task CollectForWebAsync(ClientContext ctx, SpWeb web, StorageScanOptions options,
List<StorageNode> result, IProgress<OperationProgress> progress, CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
ctx.Load(web, w => w.Title, w => w.Url, w => w.ServerRelativeUrl,
w => w.Lists.Include(l => l.Title, l => l.Hidden, l => l.BaseType, l => l.BaseTemplate,
l => l.ItemCount, l => l.RootFolder.ServerRelativeUrl));
if (options.IncludeSubsites) ctx.Load(web.Webs, ws => ws.Include(w => w.ServerRelativeUrl, w => w.Title));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
string siteTitle = web.Title;
var lists = web.Lists.ToList();
var docLibs = lists.Where(l => l.BaseType == BaseType.DocumentLibrary).ToList();
var libsByRoot = new Dictionary<string, StorageNode>(StringComparer.OrdinalIgnoreCase);
int idx = 0;
foreach (var lib in docLibs)
{
ct.ThrowIfCancellationRequested();
idx++;
var kind = ClassifyLibrary(lib);
if (kind == StorageNodeKind.HiddenLibrary && !options.IncludeHiddenLibraries) continue;
if (kind == StorageNodeKind.PreservationHold && !options.IncludePreservationHold) continue;
progress.Report(new OperationProgress(idx, docLibs.Count, $"Loading storage: {lib.Title} ({idx}/{docLibs.Count})"));
var libNode = await LoadFolderNodeAsync(ctx, lib.RootFolder.ServerRelativeUrl, lib.Title, siteTitle, lib.Title, 0, kind, progress, ct);
if (options.FolderDepth > 0)
await CollectSubfoldersAsync(ctx, lib, lib.RootFolder.ServerRelativeUrl, libNode, 1, options.FolderDepth, siteTitle, lib.Title, kind, progress, ct);
ResetNodeCounts(libNode);
await BackfillLibFromFilesAsync(ctx, lib, libNode, progress, ct);
result.Add(libNode);
libsByRoot[NormalizeServerRelative(lib.RootFolder.ServerRelativeUrl)] = libNode;
}
if (options.IncludeListAttachments)
{
var nonDocLists = lists.Where(l => l.BaseType != BaseType.DocumentLibrary && !l.Hidden && l.ItemCount > 0).ToList();
int aIdx = 0;
foreach (var list in nonDocLists)
{
ct.ThrowIfCancellationRequested();
aIdx++;
progress.Report(new OperationProgress(aIdx, nonDocLists.Count, $"Scanning attachments: {list.Title}"));
var attachNode = await TryLoadAttachmentsNodeAsync(ctx, list, siteTitle, progress, ct);
if (attachNode != null && attachNode.TotalSizeBytes > 0) result.Add(attachNode);
}
}
if (options.IncludeRecycleBin)
{
progress.Report(OperationProgress.Indeterminate($"Scanning recycle bin: {siteTitle}..."));
var (rbNodes, perDir) = await LoadRecycleBinNodesAsync(ctx, web, siteTitle, progress, ct);
if (perDir.Count > 0 && libsByRoot.Count > 0)
{
var libRootsByLength = libsByRoot.OrderByDescending(kv => kv.Key.Length).ToList();
foreach (var kv in perDir)
{
string dirNorm = NormalizeServerRelative(kv.Key);
foreach (var lib in libRootsByLength)
{
if (dirNorm.Equals(lib.Key, StringComparison.OrdinalIgnoreCase) ||
dirNorm.StartsWith(lib.Key + "/", StringComparison.OrdinalIgnoreCase))
{
lib.Value.TotalSizeBytes += kv.Value.Size;
lib.Value.TotalFileCount += kv.Value.Count;
break;
}
}
}
}
result.AddRange(rbNodes);
}
if (options.IncludeSubsites)
{
foreach (var sub in web.Webs.ToList())
{
ct.ThrowIfCancellationRequested();
var subResult = new List<StorageNode>();
await CollectForWebAsync(ctx, sub, options, subResult, progress, ct);
if (subResult.Count == 0) continue;
result.Add(new StorageNode
{
Name = sub.Title, Url = ctx.Url.TrimEnd('/') + sub.ServerRelativeUrl,
SiteTitle = sub.Title, Kind = StorageNodeKind.Subsite, IndentLevel = 0,
Children = subResult,
TotalSizeBytes = subResult.Where(n => n.Kind != StorageNodeKind.RecycleBin).Sum(n => n.TotalSizeBytes),
FileStreamSizeBytes = subResult.Where(n => n.Kind != StorageNodeKind.RecycleBin).Sum(n => n.FileStreamSizeBytes),
TotalFileCount = subResult.Where(n => n.Kind != StorageNodeKind.RecycleBin).Sum(n => n.TotalFileCount)
});
}
}
}
private static StorageNodeKind ClassifyLibrary(List lib) =>
lib.BaseTemplate == PreservationHoldTemplate || lib.Title.Equals("Preservation Hold Library", StringComparison.OrdinalIgnoreCase)
? StorageNodeKind.PreservationHold : lib.Hidden ? StorageNodeKind.HiddenLibrary : StorageNodeKind.Library;
private static async Task<StorageNode?> TryLoadAttachmentsNodeAsync(ClientContext ctx, List list, string siteTitle, IProgress<OperationProgress> progress, CancellationToken ct)
{
string url = list.RootFolder.ServerRelativeUrl.TrimEnd('/') + "/Attachments";
try
{
var folder = ctx.Web.GetFolderByServerRelativeUrl(url);
ctx.Load(folder, f => f.Exists, f => f.StorageMetrics, f => f.TimeLastModified, f => f.ServerRelativeUrl);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
if (!folder.Exists || folder.StorageMetrics.TotalFileCount == 0) return null;
return new StorageNode { Name = $"[Attachments] {list.Title}", Url = ctx.Url.TrimEnd('/') + url, SiteTitle = siteTitle, Library = list.Title, Kind = StorageNodeKind.ListAttachments, TotalSizeBytes = folder.StorageMetrics.TotalSize, FileStreamSizeBytes = folder.StorageMetrics.TotalFileStreamSize, TotalFileCount = folder.StorageMetrics.TotalFileCount, LastModified = folder.StorageMetrics.LastModified > DateTime.MinValue ? folder.StorageMetrics.LastModified : (DateTime?)null };
}
catch { return null; }
}
private static async Task<(List<StorageNode> Nodes, Dictionary<string, (long Size, int Count)> PerDir)> LoadRecycleBinNodesAsync(ClientContext ctx, SpWeb web, string siteTitle, IProgress<OperationProgress> progress, CancellationToken ct)
{
var nodes = new List<StorageNode>();
var perDir = new Dictionary<string, (long Size, int Count)>(StringComparer.OrdinalIgnoreCase);
try
{
var bin = web.RecycleBin;
ctx.Load(bin, b => b.Include(i => i.Size, i => i.ItemState, i => i.DeletedDate, i => i.DirName));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
string webSrl = NormalizeServerRelative(web.ServerRelativeUrl);
long stage1Size = 0, stage2Size = 0; int stage1Count = 0, stage2Count = 0;
DateTime? stage1Last = null, stage2Last = null;
foreach (var item in bin)
{
if (item.ItemState == RecycleBinItemState.SecondStageRecycleBin) { stage2Size += item.Size; stage2Count++; if (stage2Last is null || item.DeletedDate > stage2Last) stage2Last = item.DeletedDate; }
else { stage1Size += item.Size; stage1Count++; if (stage1Last is null || item.DeletedDate > stage1Last) stage1Last = item.DeletedDate; }
string raw = item.DirName ?? string.Empty;
string dirSrl = raw.StartsWith('/') ? NormalizeServerRelative(raw) : string.IsNullOrEmpty(raw) ? webSrl : NormalizeServerRelative(webSrl + "/" + raw);
if (perDir.TryGetValue(dirSrl, out var tally)) perDir[dirSrl] = (tally.Size + item.Size, tally.Count + 1);
else perDir[dirSrl] = (item.Size, 1);
}
if (stage1Count > 0) nodes.Add(new StorageNode { Name = "[Recycle Bin] First-stage", SiteTitle = siteTitle, Library = "RecycleBin", Kind = StorageNodeKind.RecycleBin, TotalSizeBytes = stage1Size, FileStreamSizeBytes = stage1Size, TotalFileCount = stage1Count, LastModified = stage1Last });
if (stage2Count > 0) nodes.Add(new StorageNode { Name = "[Recycle Bin] Second-stage", SiteTitle = siteTitle, Library = "RecycleBin", Kind = StorageNodeKind.RecycleBin, TotalSizeBytes = stage2Size, FileStreamSizeBytes = stage2Size, TotalFileCount = stage2Count, LastModified = stage2Last });
}
catch { }
return (nodes, perDir);
}
private static string NormalizeServerRelative(string? path)
{
if (string.IsNullOrEmpty(path)) return string.Empty;
string t = path.Trim().TrimEnd('/');
if (t.Length == 0) return string.Empty;
return t.StartsWith('/') ? t : "/" + t;
}
public async Task<IReadOnlyList<FileTypeMetric>> CollectFileTypeMetricsAsync(ClientContext ctx, IProgress<OperationProgress> progress, CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
ctx.Load(ctx.Web, w => w.Lists.Include(l => l.Title, l => l.Hidden, l => l.BaseType, l => l.ItemCount));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
var libs = ctx.Web.Lists.Where(l => !l.Hidden && l.BaseType == BaseType.DocumentLibrary).ToList();
var extensionMap = new Dictionary<string, (long totalSize, int count)>(StringComparer.OrdinalIgnoreCase);
int libIdx = 0;
foreach (var lib in libs)
{
ct.ThrowIfCancellationRequested();
libIdx++;
progress.Report(new OperationProgress(libIdx, libs.Count, $"Scanning files by type: {lib.Title}"));
var query = new CamlQuery { ViewXml = "<View Scope='RecursiveAll'><Query></Query><ViewFields><FieldRef Name='FSObjType' /><FieldRef Name='FileLeafRef' /></ViewFields><RowLimit Paged='TRUE'>500</RowLimit></View>" };
ListItemCollection items;
do
{
ct.ThrowIfCancellationRequested();
items = lib.GetItems(query);
ctx.Load(items, ic => ic.ListItemCollectionPosition, ic => ic.Include(i => i["FSObjType"], i => i["FileLeafRef"]));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
var fileRows = new List<(ListItem Item, string Name)>();
foreach (var item in items)
{
if (item["FSObjType"]?.ToString() != "0") continue;
string fileName = item["FileLeafRef"]?.ToString() ?? string.Empty;
fileRows.Add((item, fileName));
ctx.Load(item.File, f => f.Length);
ctx.Load(item.File.Versions, vc => vc.Include(v => v.Size));
}
if (fileRows.Count > 0) await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
foreach (var row in fileRows)
{
long current; try { current = row.Item.File.Length; } catch { continue; }
long versions = 0; try { foreach (var v in row.Item.File.Versions) versions += v.Size; } catch { }
string ext = Path.GetExtension(row.Name).ToLowerInvariant();
if (extensionMap.TryGetValue(ext, out var existing)) extensionMap[ext] = (existing.totalSize + current + versions, existing.count + 1);
else extensionMap[ext] = (current + versions, 1);
}
query.ListItemCollectionPosition = items.ListItemCollectionPosition;
}
while (items.ListItemCollectionPosition != null);
}
return extensionMap.Select(kvp => new FileTypeMetric(kvp.Key, kvp.Value.totalSize, kvp.Value.count)).OrderByDescending(m => m.TotalSizeBytes).ToList();
}
private static async Task BackfillLibFromFilesAsync(ClientContext ctx, List lib, StorageNode libNode, IProgress<OperationProgress> progress, CancellationToken ct)
{
progress.Report(OperationProgress.Indeterminate($"Counting files: {libNode.Name}..."));
string libRootSrl = NormalizeServerRelative(lib.RootFolder.ServerRelativeUrl);
var folderLookup = new Dictionary<string, StorageNode>(StringComparer.OrdinalIgnoreCase);
BuildFolderLookup(libNode, libRootSrl, folderLookup);
var query = new CamlQuery { ViewXml = "<View Scope='RecursiveAll'><Query></Query><ViewFields><FieldRef Name='FSObjType' /><FieldRef Name='FileDirRef' /></ViewFields><RowLimit Paged='TRUE'>500</RowLimit></View>" };
ListItemCollection items;
do
{
ct.ThrowIfCancellationRequested();
items = lib.GetItems(query);
ctx.Load(items, ic => ic.ListItemCollectionPosition, ic => ic.Include(i => i["FSObjType"], i => i["FileDirRef"]));
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
var fileRows = new List<(ListItem Item, string DirRef)>();
foreach (var item in items)
{
if (item["FSObjType"]?.ToString() != "0") continue;
fileRows.Add((item, item["FileDirRef"]?.ToString() ?? string.Empty));
ctx.Load(item.File, f => f.Length);
ctx.Load(item.File.Versions, vc => vc.Include(v => v.Size));
}
if (fileRows.Count > 0) await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
foreach (var row in fileRows)
{
long current; try { current = row.Item.File.Length; } catch { continue; }
long versions = 0; try { foreach (var v in row.Item.File.Versions) versions += v.Size; } catch { }
var target = FindDeepestFolder(row.DirRef, folderLookup) ?? libNode;
target.TotalSizeBytes += current + versions;
target.FileStreamSizeBytes += current;
target.TotalFileCount++;
}
query.ListItemCollectionPosition = items.ListItemCollectionPosition;
}
while (items.ListItemCollectionPosition != null);
RollupFolderTotals(libNode);
}
private static void RollupFolderTotals(StorageNode node)
{
foreach (var child in node.Children)
{
RollupFolderTotals(child);
node.TotalSizeBytes += child.TotalSizeBytes;
node.FileStreamSizeBytes += child.FileStreamSizeBytes;
node.TotalFileCount += child.TotalFileCount;
}
}
public Task BackfillZeroNodesAsync(ClientContext ctx, IReadOnlyList<StorageNode> nodes, IProgress<OperationProgress> progress, CancellationToken ct) => Task.CompletedTask;
public async Task<long> GetSiteUsageStorageBytesAsync(ClientContext ctx, IProgress<OperationProgress> progress, CancellationToken ct)
{
try { ctx.Load(ctx.Site, s => s.Usage); await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct); return ctx.Site.Usage.Storage; }
catch { return 0L; }
}
private static void ResetNodeCounts(StorageNode node) { node.TotalSizeBytes = 0; node.FileStreamSizeBytes = 0; node.TotalFileCount = 0; foreach (var c in node.Children) ResetNodeCounts(c); }
private static void BuildFolderLookup(StorageNode node, string parentPath, Dictionary<string, StorageNode> lookup)
{
string nodePath = node.IndentLevel == 0 ? parentPath : parentPath + "/" + node.Name;
lookup[nodePath] = node;
foreach (var child in node.Children) BuildFolderLookup(child, nodePath, lookup);
}
private static StorageNode? FindDeepestFolder(string fileDirRef, Dictionary<string, StorageNode> lookup)
{
string path = fileDirRef.TrimEnd('/');
while (!string.IsNullOrEmpty(path)) { if (lookup.TryGetValue(path, out var node)) return node; int last = path.LastIndexOf('/'); if (last <= 0) break; path = path[..last]; }
return null;
}
private static async Task<StorageNode> LoadFolderNodeAsync(ClientContext ctx, string serverRelativeUrl, string name, string siteTitle, string library, int indentLevel, StorageNodeKind kind, IProgress<OperationProgress> progress, CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
var folder = ctx.Web.GetFolderByServerRelativeUrl(serverRelativeUrl);
ctx.Load(folder, f => f.StorageMetrics, f => f.TimeLastModified, f => f.ServerRelativeUrl, f => f.Name);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
DateTime? lastMod = folder.StorageMetrics.LastModified > DateTime.MinValue ? folder.StorageMetrics.LastModified : folder.TimeLastModified > DateTime.MinValue ? folder.TimeLastModified : (DateTime?)null;
return new StorageNode { Name = name, Url = ctx.Url.TrimEnd('/') + serverRelativeUrl, SiteTitle = siteTitle, Library = library, Kind = kind, TotalSizeBytes = folder.StorageMetrics.TotalSize, FileStreamSizeBytes = folder.StorageMetrics.TotalFileStreamSize, TotalFileCount = folder.StorageMetrics.TotalFileCount, LastModified = lastMod, IndentLevel = indentLevel, Children = new List<StorageNode>() };
}
private static async Task CollectSubfoldersAsync(ClientContext ctx, List list, string parentServerRelativeUrl, StorageNode parentNode, int currentDepth, int maxDepth, string siteTitle, string library, StorageNodeKind kind, IProgress<OperationProgress> progress, CancellationToken ct)
{
if (currentDepth > maxDepth) return;
ct.ThrowIfCancellationRequested();
var subfolders = new List<(string Name, string ServerRelativeUrl)>();
await foreach (var item in SharePointPaginationHelper.GetItemsInFolderAsync(ctx, list, parentServerRelativeUrl, recursive: false, viewFields: new[] { "FSObjType", "FileLeafRef", "FileRef" }, ct: ct))
{
if (item["FSObjType"]?.ToString() != "1") continue;
string name = item["FileLeafRef"]?.ToString() ?? string.Empty;
string url = item["FileRef"]?.ToString() ?? string.Empty;
if (string.IsNullOrEmpty(name) || string.IsNullOrEmpty(url)) continue;
if (name.Equals("Forms", StringComparison.OrdinalIgnoreCase) || name.StartsWith("_")) continue;
subfolders.Add((name, url));
}
foreach (var sub in subfolders)
{
ct.ThrowIfCancellationRequested();
var childNode = await LoadFolderNodeAsync(ctx, sub.ServerRelativeUrl, sub.Name, siteTitle, library, currentDepth, kind, progress, ct);
if (currentDepth < maxDepth) await CollectSubfoldersAsync(ctx, list, sub.ServerRelativeUrl, childNode, currentDepth + 1, maxDepth, siteTitle, library, kind, progress, ct);
parentNode.Children.Add(childNode);
}
}
}