94 lines
5.3 KiB
C#
94 lines
5.3 KiB
C#
using Microsoft.SharePoint.Client;
|
|
using Microsoft.SharePoint.Client.Search.Query;
|
|
using System.Text.RegularExpressions;
|
|
using SharepointToolbox.Web.Core.Helpers;
|
|
using SharepointToolbox.Web.Core.Models;
|
|
|
|
namespace SharepointToolbox.Web.Services;
|
|
|
|
public class SearchService : ISearchService
|
|
{
|
|
private const int BatchSize = 500;
|
|
private const int MaxStartRow = 50_000;
|
|
|
|
public async Task<IReadOnlyList<SearchResult>> SearchFilesAsync(
|
|
ClientContext ctx, SearchOptions options,
|
|
IProgress<OperationProgress> progress, CancellationToken ct)
|
|
{
|
|
ct.ThrowIfCancellationRequested();
|
|
string kql = BuildKql(options);
|
|
if (kql.Length > 4096) throw new InvalidOperationException($"KQL query exceeds 4096-char limit ({kql.Length} chars).");
|
|
|
|
Regex? regexFilter = null;
|
|
if (!string.IsNullOrWhiteSpace(options.Regex))
|
|
regexFilter = new Regex(options.Regex, RegexOptions.IgnoreCase | RegexOptions.Compiled, TimeSpan.FromSeconds(2));
|
|
|
|
var allResults = new List<SearchResult>();
|
|
int startRow = 0;
|
|
int maxResults = Math.Min(options.MaxResults, MaxStartRow);
|
|
|
|
do
|
|
{
|
|
ct.ThrowIfCancellationRequested();
|
|
var kq = new KeywordQuery(ctx) { QueryText = kql, StartRow = startRow, RowLimit = BatchSize, TrimDuplicates = false };
|
|
foreach (var prop in new[] { "Title", "Path", "Author", "LastModifiedTime", "FileExtension", "Created", "ModifiedBy", "Size" })
|
|
kq.SelectProperties.Add(prop);
|
|
var executor = new SearchExecutor(ctx);
|
|
var clientResult = executor.ExecuteQuery(kq);
|
|
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, progress, ct);
|
|
var table = clientResult.Value.FirstOrDefault(t => t.TableType == KnownTableTypes.RelevantResults);
|
|
if (table == null || table.RowCount == 0) break;
|
|
|
|
foreach (var rawRow in table.ResultRows)
|
|
{
|
|
IDictionary<string, object> dict;
|
|
if (rawRow is IDictionary<string, object> generic) dict = generic;
|
|
else if (rawRow is System.Collections.IDictionary legacy) { dict = new Dictionary<string, object>(); foreach (System.Collections.DictionaryEntry e in legacy) dict[e.Key.ToString()!] = e.Value ?? string.Empty; }
|
|
else continue;
|
|
|
|
string path = Str(dict, "Path");
|
|
if (path.Contains("/_vti_history/", StringComparison.OrdinalIgnoreCase)) continue;
|
|
var result = ParseRow(dict);
|
|
if (regexFilter != null)
|
|
{
|
|
string fileName = System.IO.Path.GetFileName(result.Path);
|
|
if (!regexFilter.IsMatch(fileName) && !regexFilter.IsMatch(result.Title)) continue;
|
|
}
|
|
allResults.Add(result);
|
|
if (allResults.Count >= maxResults) goto done;
|
|
}
|
|
progress.Report(new OperationProgress(allResults.Count, maxResults, $"Retrieved {allResults.Count:N0} results…"));
|
|
startRow += BatchSize;
|
|
}
|
|
while (startRow <= MaxStartRow && allResults.Count < maxResults);
|
|
done:
|
|
return allResults;
|
|
}
|
|
|
|
internal static string BuildKql(SearchOptions opts)
|
|
{
|
|
var parts = new List<string> { "ContentType:Document" };
|
|
if (opts.Extensions.Length > 0)
|
|
parts.Add($"({string.Join(" OR ", opts.Extensions.Select(e => $"FileExtension:{e.TrimStart('.').ToLowerInvariant()}"))})");
|
|
if (opts.CreatedAfter.HasValue) parts.Add($"Created>={opts.CreatedAfter.Value:yyyy-MM-dd}");
|
|
if (opts.CreatedBefore.HasValue) parts.Add($"Created<={opts.CreatedBefore.Value:yyyy-MM-dd}");
|
|
if (opts.ModifiedAfter.HasValue) parts.Add($"Write>={opts.ModifiedAfter.Value:yyyy-MM-dd}");
|
|
if (opts.ModifiedBefore.HasValue) parts.Add($"Write<={opts.ModifiedBefore.Value:yyyy-MM-dd}");
|
|
if (!string.IsNullOrEmpty(opts.CreatedBy)) parts.Add($"Author:\"{opts.CreatedBy}\"");
|
|
if (!string.IsNullOrEmpty(opts.ModifiedBy)) parts.Add($"ModifiedBy:\"{opts.ModifiedBy}\"");
|
|
if (!string.IsNullOrEmpty(opts.Library) && !string.IsNullOrEmpty(opts.SiteUrl))
|
|
parts.Add($"Path:\"{opts.SiteUrl.TrimEnd('/')}/{opts.Library.TrimStart('/')}*\"");
|
|
return string.Join(" AND ", parts);
|
|
}
|
|
|
|
private static SearchResult ParseRow(IDictionary<string, object> row)
|
|
{
|
|
static string S(IDictionary<string, object> r, string k) => r.TryGetValue(k, out var v) ? v?.ToString() ?? string.Empty : string.Empty;
|
|
static DateTime? D(IDictionary<string, object> r, string k) { var s = S(r, k); return DateTime.TryParse(s, out var dt) ? dt : (DateTime?)null; }
|
|
static long L(IDictionary<string, object> r, string k) { var raw = S(r, k); var digits = Regex.Replace(raw, "[^0-9]", ""); return long.TryParse(digits, out var v) ? v : 0L; }
|
|
return new SearchResult { Title = S(row, "Title"), Path = S(row, "Path"), FileExtension = S(row, "FileExtension"), Created = D(row, "Created"), LastModified = D(row, "LastModifiedTime"), Author = S(row, "Author"), ModifiedBy = S(row, "ModifiedBy"), SizeBytes = L(row, "Size") };
|
|
}
|
|
|
|
private static string Str(IDictionary<string, object> r, string key) => r.TryGetValue(key, out var v) ? v?.ToString() ?? string.Empty : string.Empty;
|
|
}
|