Files
T
Dev f4cc81bb71 chore: release v2.4
- Add theme system (Dark/Light palettes, ModernTheme, ThemeManager)
- Add InputDialog, Spinner common view
- Add DuplicatesCsvExportService
- Refresh views, dialogs, and view models across tabs
- Update localization strings (en/fr)
- Tweak services (transfer, permissions, search, user access, ownership elevation, bulk operations)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-20 11:23:11 +02:00

213 lines
8.7 KiB
C#
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
using Microsoft.SharePoint.Client;
using Microsoft.SharePoint.Client.Search.Query;
using SharepointToolbox.Core.Helpers;
using SharepointToolbox.Core.Models;
using System.Text.RegularExpressions;
namespace SharepointToolbox.Services;
/// <summary>
/// File search using SharePoint KQL Search API.
/// Port of PS Search-SPOFiles pattern (PS lines 4747-4987).
/// Pagination: 500 rows per batch, hard cap StartRow=50,000 (SharePoint Search boundary).
/// </summary>
public class SearchService : ISearchService
{
private const int BatchSize = 500;
private const int MaxStartRow = 50_000;
public async Task<IReadOnlyList<SearchResult>> SearchFilesAsync(
ClientContext ctx,
SearchOptions options,
IProgress<OperationProgress> progress,
CancellationToken ct)
{
ct.ThrowIfCancellationRequested();
string kql = BuildKql(options);
ValidateKqlLength(kql);
Regex? regexFilter = null;
if (!string.IsNullOrWhiteSpace(options.Regex))
{
regexFilter = new Regex(options.Regex,
RegexOptions.IgnoreCase | RegexOptions.Compiled,
TimeSpan.FromSeconds(2));
}
var allResults = new List<SearchResult>();
int startRow = 0;
int maxResults = Math.Min(options.MaxResults, MaxStartRow);
do
{
ct.ThrowIfCancellationRequested();
var kq = new KeywordQuery(ctx)
{
QueryText = kql,
StartRow = startRow,
RowLimit = BatchSize,
TrimDuplicates = false
};
foreach (var prop in new[] { "Title", "Path", "Author", "LastModifiedTime",
"FileExtension", "Created", "ModifiedBy", "Size" })
kq.SelectProperties.Add(prop);
var executor = new SearchExecutor(ctx);
ClientResult<ResultTableCollection> clientResult = executor.ExecuteQuery(kq);
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
var table = clientResult.Value
.FirstOrDefault(t => t.TableType == KnownTableTypes.RelevantResults);
if (table == null || table.RowCount == 0) break;
foreach (var rawRow in table.ResultRows)
{
// CSOM has returned ResultRows as either Hashtable or
// Dictionary<string,object> across versions — accept both.
IDictionary<string, object> dict;
if (rawRow is IDictionary<string, object> generic)
{
dict = generic;
}
else if (rawRow is System.Collections.IDictionary legacy)
{
dict = new Dictionary<string, object>();
foreach (System.Collections.DictionaryEntry e in legacy)
dict[e.Key.ToString()!] = e.Value ?? string.Empty;
}
else
{
continue;
}
// Skip SharePoint version history paths
string path = Str(dict, "Path");
if (path.Contains("/_vti_history/", StringComparison.OrdinalIgnoreCase))
continue;
var result = ParseRow(dict);
// Client-side Regex filter on file name
if (regexFilter != null)
{
string fileName = System.IO.Path.GetFileName(result.Path);
if (!regexFilter.IsMatch(fileName) && !regexFilter.IsMatch(result.Title))
continue;
}
allResults.Add(result);
if (allResults.Count >= maxResults) goto done;
}
progress.Report(new OperationProgress(allResults.Count, maxResults,
$"Retrieved {allResults.Count:N0} results\u2026"));
startRow += BatchSize;
}
while (startRow <= MaxStartRow && allResults.Count < maxResults);
done:
return allResults;
}
// ── Extension point: bypassing the 50,000-item cap ───────────────────────
//
// The StartRow approach has a hard ceiling at 50,000 (SharePoint Search boundary).
// To go beyond it, replace the StartRow loop with a DocId cursor:
//
// 1. Add "DocId" to SelectProperties.
// 2. Add query.SortList.Add("DocId", SortDirection.Ascending).
// 3. First page KQL: unchanged.
// Subsequent pages: append "AND DocId>{lastDocId}" to the KQL (StartRow stays 0).
// 4. Track lastDocId = Convert.ToInt64(lastRow["DocId"]) after each batch.
// 5. Stop when batch.RowCount < BatchSize.
//
// Caveats:
// - DocId is per-site-collection; for multi-site searches, maintain a separate
// cursor per ClientContext (site URL).
// - The search index can shift between batches (new items indexed mid-scan);
// the DocId cursor is safer than StartRow but cannot guarantee zero drift.
// - DocId is not returned by default — it must be in SelectProperties.
//
// This is deliberately not implemented here because SRCH-02 caps results at 50,000,
// which the StartRow approach already covers exactly (100 pages × 500 rows).
// Implement the DocId cursor if the cap needs to be lifted in a future version.
// ── KQL builder ───────────────────────────────────────────────────────────
internal static string BuildKql(SearchOptions opts)
{
var parts = new List<string> { "ContentType:Document" };
if (opts.Extensions.Length > 0)
{
var extParts = opts.Extensions
.Select(e => $"FileExtension:{e.TrimStart('.').ToLowerInvariant()}");
parts.Add($"({string.Join(" OR ", extParts)})");
}
if (opts.CreatedAfter.HasValue)
parts.Add($"Created>={opts.CreatedAfter.Value:yyyy-MM-dd}");
if (opts.CreatedBefore.HasValue)
parts.Add($"Created<={opts.CreatedBefore.Value:yyyy-MM-dd}");
if (opts.ModifiedAfter.HasValue)
parts.Add($"Write>={opts.ModifiedAfter.Value:yyyy-MM-dd}");
if (opts.ModifiedBefore.HasValue)
parts.Add($"Write<={opts.ModifiedBefore.Value:yyyy-MM-dd}");
if (!string.IsNullOrEmpty(opts.CreatedBy))
parts.Add($"Author:\"{opts.CreatedBy}\"");
if (!string.IsNullOrEmpty(opts.ModifiedBy))
parts.Add($"ModifiedBy:\"{opts.ModifiedBy}\"");
if (!string.IsNullOrEmpty(opts.Library) && !string.IsNullOrEmpty(opts.SiteUrl))
parts.Add($"Path:\"{opts.SiteUrl.TrimEnd('/')}/{opts.Library.TrimStart('/')}*\"");
return string.Join(" AND ", parts);
}
private static void ValidateKqlLength(string kql)
{
// SharePoint Search KQL text hard cap is 4096 characters
if (kql.Length > 4096)
throw new InvalidOperationException(
$"KQL query exceeds 4096-character SharePoint Search limit ({kql.Length} chars). " +
"Reduce the number of extension filters.");
}
// ── Row parser ────────────────────────────────────────────────────────────
private static SearchResult ParseRow(IDictionary<string, object> row)
{
static string Str(IDictionary<string, object> r, string key) =>
r.TryGetValue(key, out var v) ? v?.ToString() ?? string.Empty : string.Empty;
static DateTime? Date(IDictionary<string, object> r, string key)
{
var s = Str(r, key);
return DateTime.TryParse(s, out var dt) ? dt : (DateTime?)null;
}
static long ParseSize(IDictionary<string, object> r, string key)
{
var raw = Str(r, key);
var digits = Regex.Replace(raw, "[^0-9]", "");
return long.TryParse(digits, out var v) ? v : 0L;
}
return new SearchResult
{
Title = Str(row, "Title"),
Path = Str(row, "Path"),
FileExtension = Str(row, "FileExtension"),
Created = Date(row, "Created"),
LastModified = Date(row, "LastModifiedTime"),
Author = Str(row, "Author"),
ModifiedBy = Str(row, "ModifiedBy"),
SizeBytes = ParseSize(row, "Size")
};
}
private static string Str(IDictionary<string, object> r, string key) =>
r.TryGetValue(key, out var v) ? v?.ToString() ?? string.Empty : string.Empty;
}