Files
kawa 881f3a8bac Add backoff-retry to elevation for transient admin 403 and grant lag
Logs showed the failure was a transient 403 on the tenant admin endpoint
(loading CurrentUser on -admin.sharepoint.com returned E_ACCESSDENIED on a
cold token), and that re-running the operation a few seconds later succeeded.
The site-collection admin grant is also eventually consistent on Group/Teams
sites, taking a few seconds to reach the content endpoint.

Retry both stages with backoff (3s, 6s, 9s; 4 attempts) instead of failing
on the first denial:
- ElevateAsync retries the admin-endpoint grant on transient access-denied; a
  genuine lack of tenant-admin rights still surfaces after retries exhaust.
- After a successful grant, the post-elevation operation retries on continued
  access-denied to absorb grant-propagation lag.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 14:39:29 +02:00

165 lines
7.3 KiB
C#

using Serilog;
using SharepointToolbox.Web.Core.Helpers;
using SharepointToolbox.Web.Services.Session;
namespace SharepointToolbox.Web.Services;
/// <summary>
/// Scoped per Blazor circuit. Catches <see cref="SharePointAccessDeniedException"/> from any
/// wrapped operation and, when AutoTakeOwnership is enabled, grants the current user
/// site-collection admin on the failing site (via the tenant admin endpoint) before retrying.
///
/// Retry is safe because the wrapped operation closure re-issues its own CSOM loads on each
/// attempt; the granted permission is server-side and takes effect for the existing delegated
/// token without re-authentication. Each site is elevated at most once per circuit to prevent loops.
///
/// Both the admin-endpoint grant and the post-grant operation are retried with backoff: the
/// tenant admin endpoint can transiently 403 on a cold token, and the site-collection admin grant
/// is eventually consistent (notably on Group/Teams-connected sites), taking a few seconds to apply.
/// </summary>
public class ElevationCoordinator : IElevationCoordinator
{
private readonly ISessionManager _sessionManager;
private readonly IOwnershipElevationService _ownership;
private readonly IUserSessionService _session;
private readonly HashSet<string> _elevatedSites = new(StringComparer.OrdinalIgnoreCase);
public ElevationCoordinator(
ISessionManager sessionManager,
IOwnershipElevationService ownership,
IUserSessionService session)
{
_sessionManager = sessionManager;
_ownership = ownership;
_session = session;
}
public async Task RunAsync(Func<CancellationToken, Task> operation, CancellationToken ct) =>
await RunAsync<object?>(async c => { await operation(c); return null; }, ct);
public async Task<T> RunAsync<T>(Func<CancellationToken, Task<T>> operation, CancellationToken ct)
{
try
{
return await operation(ct);
}
catch (SharePointAccessDeniedException ex)
{
if (!_session.Settings.AutoTakeOwnership)
throw;
var siteUrl = ex.SiteUrl.TrimEnd('/');
var key = siteUrl.ToLowerInvariant();
// Already elevated this site and still denied → elevation can't fix it. Surface original.
if (_elevatedSites.Contains(key))
throw;
// Elevation targets the tenant admin endpoint; denials there aren't site-ownership issues.
if (siteUrl.Contains("-admin.sharepoint.com", StringComparison.OrdinalIgnoreCase))
throw;
await ElevateAsync(siteUrl, ct);
_elevatedSites.Add(key);
// Verify the grant actually took effect for this delegated token before retrying,
// so the logs distinguish "grant failed/no-op" from "scan still fails for another reason".
await VerifyAdminAsync(siteUrl, ct);
// The site-collection admin grant is eventually consistent — on Group/Teams sites it
// can take a few seconds to propagate to the content endpoint. Retry with backoff.
for (int attempt = 1; ; attempt++)
{
try
{
return await operation(ct);
}
catch (SharePointAccessDeniedException) when (attempt < MaxBackoffAttempts)
{
var delay = TimeSpan.FromSeconds(BackoffBaseSeconds * attempt);
Log.Warning("Post-elevation scan still denied for {Site} (attempt {N}/{Max}); retrying in {Delay}s.",
siteUrl, attempt, MaxBackoffAttempts, delay.TotalSeconds);
await Task.Delay(delay, ct);
}
}
}
}
private const int MaxBackoffAttempts = 4;
private const int BackoffBaseSeconds = 3;
private async Task ElevateAsync(string siteUrl, CancellationToken ct)
{
var profile = _session.CurrentProfile
?? throw new InvalidOperationException("Cannot elevate ownership: no active profile.");
var adminProfile = new Core.Models.TenantProfile
{
Id = profile.Id,
Name = profile.Name,
TenantUrl = BuildAdminUrl(siteUrl),
TenantId = profile.TenantId,
ClientId = profile.ClientId,
ClientLogo = profile.ClientLogo,
};
var adminCtx = await _sessionManager.GetOrCreateContextAsync(adminProfile, ct);
Log.Information("Auto-elevating site-collection admin ownership for {Site} via {Admin}",
siteUrl, adminProfile.TenantUrl);
for (int attempt = 1; ; attempt++)
{
try
{
// loginName empty → ElevateAsync resolves the current (delegated) user from the admin context.
await _ownership.ElevateAsync(adminCtx, siteUrl, loginName: string.Empty, ct);
return;
}
// The admin endpoint can transiently 403 on a cold token / first call; it clears within
// seconds. A genuine lack of tenant-admin rights keeps failing and surfaces after retries.
catch (SharePointAccessDeniedException ex) when (attempt < MaxBackoffAttempts)
{
var delay = TimeSpan.FromSeconds(BackoffBaseSeconds * attempt);
Log.Warning("Admin endpoint denied for {Site} (attempt {N}/{Max}); retrying in {Delay}s. {Err}",
siteUrl, attempt, MaxBackoffAttempts, delay.TotalSeconds, ex.Message);
await Task.Delay(delay, ct);
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
Log.Error(ex, "Auto-elevate ownership failed for {Site}", siteUrl);
throw new InvalidOperationException(
$"Auto-elevate ownership failed for {siteUrl}. Granting site-collection admin requires " +
$"SharePoint tenant administrator rights on the signed-in account. ({ex.Message})", ex);
}
}
}
// Reads the current user's site-admin flag on the target site right after elevation.
// Diagnostic only — never throws into the operation flow.
private async Task VerifyAdminAsync(string siteUrl, CancellationToken ct)
{
try
{
var ctx = await _sessionManager.GetOrCreateContextAsync(siteUrl, _session.CurrentProfile!, ct);
ctx.Load(ctx.Web.CurrentUser, u => u.LoginName, u => u.IsSiteAdmin);
await ctx.ExecuteQueryAsync();
Log.Information("Post-elevation check {Site}: user={Login} IsSiteAdmin={IsAdmin}",
siteUrl, ctx.Web.CurrentUser.LoginName, ctx.Web.CurrentUser.IsSiteAdmin);
}
catch (Exception ex)
{
Log.Warning("Post-elevation check failed for {Site}: {Error}", siteUrl, ex.Message);
}
}
// https://abcube.sharepoint.com/sites/Foo → https://abcube-admin.sharepoint.com
private static string BuildAdminUrl(string siteUrl)
{
if (!Uri.TryCreate(siteUrl, UriKind.Absolute, out var uri))
return siteUrl;
var adminHost = uri.Host.Replace(".sharepoint.com", "-admin.sharepoint.com",
StringComparison.OrdinalIgnoreCase);
return $"{uri.Scheme}://{adminHost}";
}
}