Fixed site discovery process that did not return all the sites
This commit is contained in:
@@ -1,23 +1,31 @@
|
|||||||
using Microsoft.Graph;
|
using Microsoft.Online.SharePoint.TenantAdministration;
|
||||||
using Microsoft.Graph.Models;
|
using Microsoft.SharePoint.Client;
|
||||||
using Microsoft.Kiota.Abstractions;
|
using SharepointToolbox.Web.Core.Helpers;
|
||||||
using SharepointToolbox.Web.Core.Models;
|
using SharepointToolbox.Web.Core.Models;
|
||||||
using AppGraphClientFactory = SharepointToolbox.Web.Infrastructure.Auth.GraphClientFactory;
|
using SharepointToolbox.Web.Services.Session;
|
||||||
|
|
||||||
namespace SharepointToolbox.Web.Services;
|
namespace SharepointToolbox.Web.Services;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Delegated Graph implementation of <see cref="ISiteDiscoveryService"/>.
|
/// Delegated CSOM implementation of <see cref="ISiteDiscoveryService"/>.
|
||||||
/// Uses the <c>/sites?search=*</c> endpoint, paging through every result.
|
///
|
||||||
/// Requires the delegated <c>Sites.Read.All</c> scope.
|
/// Enumerates every site collection via the SharePoint tenant admin endpoint
|
||||||
|
/// (<c>Tenant.GetSitePropertiesFromSharePointByFilters</c>), paging through all
|
||||||
|
/// results. Requires the signed-in user to be a SharePoint administrator.
|
||||||
|
///
|
||||||
|
/// The Graph <c>/sites?search=*</c> endpoint was deliberately abandoned: it ranks
|
||||||
|
/// by relevance and is capped server-side, so it silently dropped sites and
|
||||||
|
/// returned varying counts run-to-run. <c>/sites/getAllSites</c> is app-only and
|
||||||
|
/// 403s on a delegated user token. The tenant admin enumeration is the only path
|
||||||
|
/// that returns the complete, stable set under the app's delegated auth model.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public class SiteDiscoveryService : ISiteDiscoveryService
|
public class SiteDiscoveryService : ISiteDiscoveryService
|
||||||
{
|
{
|
||||||
private readonly AppGraphClientFactory _graphClientFactory;
|
private readonly ISessionManager _sessionManager;
|
||||||
|
|
||||||
public SiteDiscoveryService(AppGraphClientFactory graphClientFactory)
|
public SiteDiscoveryService(ISessionManager sessionManager)
|
||||||
{
|
{
|
||||||
_graphClientFactory = graphClientFactory;
|
_sessionManager = sessionManager;
|
||||||
}
|
}
|
||||||
|
|
||||||
public async Task<IReadOnlyList<SiteInfo>> SearchSitesAsync(
|
public async Task<IReadOnlyList<SiteInfo>> SearchSitesAsync(
|
||||||
@@ -25,47 +33,52 @@ public class SiteDiscoveryService : ISiteDiscoveryService
|
|||||||
string? query = null,
|
string? query = null,
|
||||||
CancellationToken ct = default)
|
CancellationToken ct = default)
|
||||||
{
|
{
|
||||||
var graphClient = await _graphClientFactory.CreateClientAsync(profile);
|
ArgumentException.ThrowIfNullOrEmpty(profile.TenantUrl);
|
||||||
// "*" is the Graph convention for "return all sites".
|
|
||||||
var search = string.IsNullOrWhiteSpace(query) ? "*" : query!;
|
|
||||||
|
|
||||||
// The typed Sites.GetAsync maps its Search property to OData "$search",
|
// Site enumeration only exists on the tenant admin endpoint.
|
||||||
// which routes "*" through KQL and fails ("'*' is not valid at position 0").
|
var adminProfile = new TenantProfile
|
||||||
// The all-sites wildcard only works via the bare, non-OData "search"
|
|
||||||
// query parameter, so build the request manually.
|
|
||||||
var requestInfo = new RequestInformation
|
|
||||||
{
|
{
|
||||||
HttpMethod = Method.GET,
|
Id = profile.Id,
|
||||||
UrlTemplate = "{+baseurl}/sites{?search,%24top}",
|
Name = profile.Name,
|
||||||
PathParameters = new Dictionary<string, object>
|
TenantUrl = BuildAdminUrl(profile.TenantUrl),
|
||||||
{
|
TenantId = profile.TenantId,
|
||||||
{ "baseurl", graphClient.RequestAdapter.BaseUrl ?? "https://graph.microsoft.com/v1.0" }
|
ClientId = profile.ClientId,
|
||||||
},
|
ClientLogo = profile.ClientLogo,
|
||||||
};
|
};
|
||||||
requestInfo.QueryParameters.Add("search", search);
|
|
||||||
requestInfo.QueryParameters.Add("%24top", 999);
|
|
||||||
requestInfo.Headers.Add("Accept", "application/json");
|
|
||||||
|
|
||||||
var response = await graphClient.RequestAdapter.SendAsync<SiteCollectionResponse>(
|
var ctx = await _sessionManager.GetOrCreateContextAsync(adminProfile, ct);
|
||||||
requestInfo, SiteCollectionResponse.CreateFromDiscriminatorValue, cancellationToken: ct);
|
var tenant = new Tenant(ctx);
|
||||||
|
|
||||||
if (response is null) return Array.Empty<SiteInfo>();
|
var filter = new SPOSitePropertiesEnumerableFilter
|
||||||
|
{
|
||||||
|
IncludeDetail = false,
|
||||||
|
IncludePersonalSite = PersonalSiteFilter.Exclude,
|
||||||
|
StartIndex = null,
|
||||||
|
Template = null,
|
||||||
|
};
|
||||||
|
|
||||||
var results = new List<SiteInfo>();
|
var results = new List<SiteInfo>();
|
||||||
var iter = PageIterator<Site, SiteCollectionResponse>.CreatePageIterator(
|
SPOSitePropertiesEnumerable page;
|
||||||
graphClient, response,
|
do
|
||||||
site =>
|
|
||||||
{
|
{
|
||||||
if (ct.IsCancellationRequested) return false;
|
ct.ThrowIfCancellationRequested();
|
||||||
var url = site.WebUrl ?? string.Empty;
|
|
||||||
if (string.IsNullOrEmpty(url)) return true;
|
page = await FetchPageWithColdTokenRetryAsync(ctx, tenant, filter, ct);
|
||||||
// Skip OneDrive personal sites — not useful for these scans.
|
|
||||||
if (url.Contains("/personal/", StringComparison.OrdinalIgnoreCase)) return true;
|
foreach (var sp in page)
|
||||||
var title = site.DisplayName ?? site.Name ?? url;
|
{
|
||||||
|
var url = sp.Url ?? string.Empty;
|
||||||
|
if (string.IsNullOrEmpty(url)) continue;
|
||||||
|
// Belt-and-braces: PersonalSiteFilter.Exclude already drops OneDrive.
|
||||||
|
if (url.Contains("/personal/", StringComparison.OrdinalIgnoreCase)) continue;
|
||||||
|
var title = string.IsNullOrEmpty(sp.Title) ? url : sp.Title;
|
||||||
results.Add(new SiteInfo(url, title));
|
results.Add(new SiteInfo(url, title));
|
||||||
return true;
|
}
|
||||||
});
|
|
||||||
await iter.IterateAsync(ct);
|
// NextStartIndexFromSharePoint is empty/null once the last page is returned.
|
||||||
|
filter.StartIndex = page.NextStartIndexFromSharePoint;
|
||||||
|
}
|
||||||
|
while (!string.IsNullOrEmpty(filter.StartIndex));
|
||||||
|
|
||||||
return results
|
return results
|
||||||
.GroupBy(s => s.Url, StringComparer.OrdinalIgnoreCase)
|
.GroupBy(s => s.Url, StringComparer.OrdinalIgnoreCase)
|
||||||
@@ -73,4 +86,52 @@ public class SiteDiscoveryService : ISiteDiscoveryService
|
|||||||
.OrderBy(s => s.Title, StringComparer.OrdinalIgnoreCase)
|
.OrderBy(s => s.Title, StringComparer.OrdinalIgnoreCase)
|
||||||
.ToList();
|
.ToList();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private const int MaxColdTokenAttempts = 4;
|
||||||
|
private const int BackoffBaseSeconds = 3;
|
||||||
|
|
||||||
|
// The tenant admin endpoint transiently 403s on a cold delegated token (the same
|
||||||
|
// behaviour the elevation coordinator handles): the first call against the admin
|
||||||
|
// host can be denied while the token warms, then clears within seconds. Retry the
|
||||||
|
// admin query on access-denied with backoff. A genuine lack of SharePoint tenant
|
||||||
|
// administrator rights keeps failing and surfaces the enriched 403 after retries —
|
||||||
|
// elevation cannot self-grant tenant-admin, so there is nothing to auto-correct.
|
||||||
|
//
|
||||||
|
// The request (GetSiteProperties + Load) MUST be re-issued inside the loop: a failed
|
||||||
|
// CSOM ExecuteQuery clears the context's pending-request queue, so retrying the
|
||||||
|
// execute alone would run an empty batch, leave the page uninitialized, and throw
|
||||||
|
// "The collection has not been initialized" on iteration.
|
||||||
|
private static async Task<SPOSitePropertiesEnumerable> FetchPageWithColdTokenRetryAsync(
|
||||||
|
ClientContext ctx, Tenant tenant, SPOSitePropertiesEnumerableFilter filter, CancellationToken ct)
|
||||||
|
{
|
||||||
|
for (int attempt = 1; ; attempt++)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var page = tenant.GetSitePropertiesFromSharePointByFilters(filter);
|
||||||
|
ctx.Load(page);
|
||||||
|
await ExecuteQueryRetryHelper.ExecuteQueryRetryAsync(ctx, null, ct);
|
||||||
|
return page;
|
||||||
|
}
|
||||||
|
catch (SharePointAccessDeniedException ex) when (attempt < MaxColdTokenAttempts)
|
||||||
|
{
|
||||||
|
var delay = TimeSpan.FromSeconds(BackoffBaseSeconds * attempt);
|
||||||
|
Serilog.Log.Warning(
|
||||||
|
"Tenant admin endpoint denied during site discovery (attempt {N}/{Max}); " +
|
||||||
|
"retrying in {Delay}s. {Err}",
|
||||||
|
attempt, MaxColdTokenAttempts, delay.TotalSeconds, ex.Message);
|
||||||
|
await Task.Delay(delay, ct);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://contoso.sharepoint.com[/sites/Foo] → https://contoso-admin.sharepoint.com
|
||||||
|
private static string BuildAdminUrl(string tenantUrl)
|
||||||
|
{
|
||||||
|
if (!Uri.TryCreate(tenantUrl, UriKind.Absolute, out var uri))
|
||||||
|
return tenantUrl;
|
||||||
|
var adminHost = uri.Host.Replace(".sharepoint.com", "-admin.sharepoint.com",
|
||||||
|
StringComparison.OrdinalIgnoreCase);
|
||||||
|
return $"{uri.Scheme}://{adminHost}";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user