mirror of
https://github.com/fosrl/pangolin.git
synced 2026-06-26 09:09:05 +00:00
Add locks to rebuilds
This commit is contained in:
@@ -29,8 +29,11 @@ import { updateResourcePolicies } from "./resourcePolicies";
|
||||
import { BlueprintSource } from "@server/routers/blueprints/types";
|
||||
import { stringify as stringifyYaml } from "yaml";
|
||||
import { generateName } from "@server/db/names";
|
||||
import { handleMessagingForUpdatedSiteResource } from "@server/routers/siteResource";
|
||||
import { rebuildClientAssociationsFromSiteResource } from "../rebuildClientAssociations";
|
||||
import {
|
||||
handleMessagingForUpdatedSiteResource,
|
||||
rebuildClientAssociationsFromSiteResource,
|
||||
waitForSiteResourceRebuildIdle
|
||||
} from "../rebuildClientAssociations";
|
||||
|
||||
type ApplyBlueprintArgs = {
|
||||
orgId: string;
|
||||
@@ -138,26 +141,25 @@ export async function applyBlueprint({
|
||||
for (const result of privateResourcesResults) {
|
||||
rebuildClientAssociationsFromSiteResource(
|
||||
result.newSiteResource
|
||||
).catch((e) => {
|
||||
logger.error(
|
||||
`Failed to rebuild client associations for site resource ${result.newSiteResource.siteResourceId}. Error: ${e}`
|
||||
);
|
||||
});
|
||||
|
||||
handleMessagingForUpdatedSiteResource(
|
||||
result.oldSiteResource,
|
||||
result.newSiteResource,
|
||||
result.oldSites.map((site) => ({
|
||||
// only need to run this on the old sites because the new sites are added above
|
||||
siteId: site.siteId,
|
||||
orgId: result.newSiteResource.orgId
|
||||
}))
|
||||
).catch((err) => {
|
||||
logger.error(
|
||||
`Error handling messaging for updated site resource ${result.newSiteResource.siteResourceId}:`,
|
||||
err
|
||||
);
|
||||
});
|
||||
)
|
||||
.then(() =>
|
||||
waitForSiteResourceRebuildIdle(
|
||||
result.newSiteResource.siteResourceId
|
||||
)
|
||||
)
|
||||
.then(() =>
|
||||
handleMessagingForUpdatedSiteResource(
|
||||
result.oldSiteResource,
|
||||
result.newSiteResource,
|
||||
result.oldSites.map((s) => s.siteId),
|
||||
result.newSites.map((s) => s.siteId)
|
||||
)
|
||||
)
|
||||
.catch((e) => {
|
||||
logger.error(
|
||||
`Failed to rebuild and handle messaging for site resource ${result.newSiteResource.siteResourceId}. Error: ${e}`
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
logger.debug(
|
||||
|
||||
@@ -35,7 +35,7 @@ export class LockManager {
|
||||
ttl: number;
|
||||
owner?: string;
|
||||
}> {
|
||||
return { exists: true, ownedByMe: true, ttl: 0 };
|
||||
return { exists: false, ownedByMe: false, ttl: 0 };
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -49,6 +49,112 @@ import { rebuildQueue } from "#dynamic/lib/rebuildQueue";
|
||||
// peer/proxy updates, so give them a generous window.
|
||||
const REBUILD_ASSOCIATIONS_LOCK_TTL_MS = 120000;
|
||||
|
||||
const REBUILD_IDLE_POLL_INTERVAL_MS = 300;
|
||||
const REBUILD_IDLE_DEFAULT_TIMEOUT_MS = 130_000; // slightly longer than lock TTL
|
||||
const REBUILD_IDLE_HANDLER_TIMEOUT_MS = 5_000;
|
||||
|
||||
/**
|
||||
* Returns true if a rebuild for the given site resource is currently active
|
||||
* (holding the distributed lock) or is pending in the rebuild queue.
|
||||
*/
|
||||
export async function hasActiveSiteResourceRebuild(
|
||||
siteResourceId: number
|
||||
): Promise<boolean> {
|
||||
const lockKey = `rebuild-client-associations:site-resource:${siteResourceId}`;
|
||||
const lockInfo = await lockManager.getLockInfo(lockKey);
|
||||
if (lockInfo.exists) return true;
|
||||
return rebuildQueue.isQueued({ type: "site-resource", id: siteResourceId });
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves once there is no active or queued rebuild for the given site resource.
|
||||
* Logs a warning and resolves early if the timeout is reached.
|
||||
*/
|
||||
export async function waitForSiteResourceRebuildIdle(
|
||||
siteResourceId: number,
|
||||
timeoutMs = REBUILD_IDLE_DEFAULT_TIMEOUT_MS
|
||||
): Promise<void> {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
if (!(await hasActiveSiteResourceRebuild(siteResourceId))) return;
|
||||
await new Promise<void>((r) =>
|
||||
setTimeout(r, REBUILD_IDLE_POLL_INTERVAL_MS)
|
||||
);
|
||||
}
|
||||
logger.warn(
|
||||
`waitForSiteResourceRebuildIdle: timed out after ${timeoutMs}ms waiting for siteResourceId=${siteResourceId}`
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves once there are no active or queued rebuilds for any site resource
|
||||
* associated with the given site.
|
||||
*/
|
||||
export async function waitForSiteRebuildIdle(
|
||||
siteId: number,
|
||||
timeoutMs = REBUILD_IDLE_HANDLER_TIMEOUT_MS
|
||||
): Promise<void> {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
const resourceRows = await db
|
||||
.select({ siteResourceId: siteResources.siteResourceId })
|
||||
.from(siteResources)
|
||||
.innerJoin(
|
||||
siteNetworks,
|
||||
eq(siteNetworks.networkId, siteResources.networkId)
|
||||
)
|
||||
.where(eq(siteNetworks.siteId, siteId));
|
||||
let allIdle = true;
|
||||
for (const { siteResourceId } of resourceRows) {
|
||||
if (await hasActiveSiteResourceRebuild(siteResourceId)) {
|
||||
allIdle = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (allIdle) return;
|
||||
await new Promise<void>((r) =>
|
||||
setTimeout(r, REBUILD_IDLE_POLL_INTERVAL_MS)
|
||||
);
|
||||
}
|
||||
logger.warn(
|
||||
`waitForSiteRebuildIdle: timed out after ${timeoutMs}ms waiting for siteId=${siteId}`
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves once there are no active or queued rebuilds for any site resource
|
||||
* associated with the given client.
|
||||
*/
|
||||
export async function waitForClientRebuildIdle(
|
||||
clientId: number,
|
||||
timeoutMs = REBUILD_IDLE_HANDLER_TIMEOUT_MS
|
||||
): Promise<void> {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
const resourceRows = await db
|
||||
.select({
|
||||
siteResourceId:
|
||||
clientSiteResourcesAssociationsCache.siteResourceId
|
||||
})
|
||||
.from(clientSiteResourcesAssociationsCache)
|
||||
.where(eq(clientSiteResourcesAssociationsCache.clientId, clientId));
|
||||
let allIdle = true;
|
||||
for (const { siteResourceId } of resourceRows) {
|
||||
if (await hasActiveSiteResourceRebuild(siteResourceId)) {
|
||||
allIdle = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (allIdle) return;
|
||||
await new Promise<void>((r) =>
|
||||
setTimeout(r, REBUILD_IDLE_POLL_INTERVAL_MS)
|
||||
);
|
||||
}
|
||||
logger.warn(
|
||||
`waitForClientRebuildIdle: timed out after ${timeoutMs}ms waiting for clientId=${clientId}`
|
||||
);
|
||||
}
|
||||
|
||||
export async function getClientSiteResourceAccess(
|
||||
siteResource: SiteResource,
|
||||
trx: Transaction | typeof db = db
|
||||
@@ -1060,6 +1166,8 @@ export async function handleMessagingForUpdatedSiteResource(
|
||||
);
|
||||
|
||||
// get all of the clients from the cache
|
||||
const { mergedAllClients, mergedAllClientIds } =
|
||||
await getClientSiteResourceAccess(updatedSiteResource, trx);
|
||||
|
||||
const targets = await generateSubnetProxyTargetV2(
|
||||
updatedSiteResource,
|
||||
|
||||
@@ -13,11 +13,15 @@ export interface RebuildJobHandlers {
|
||||
export interface RebuildQueueManager {
|
||||
enqueue(job: RebuildJob): Promise<void>;
|
||||
startProcessing(handlers: RebuildJobHandlers): void;
|
||||
isQueued(job: RebuildJob): Promise<boolean>;
|
||||
}
|
||||
|
||||
class NoopRebuildQueue implements RebuildQueueManager {
|
||||
async enqueue(_job: RebuildJob): Promise<void> {}
|
||||
startProcessing(_handlers: RebuildJobHandlers): void {}
|
||||
async isQueued(_job: RebuildJob): Promise<boolean> {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export const rebuildQueue: RebuildQueueManager = new NoopRebuildQueue();
|
||||
|
||||
Reference in New Issue
Block a user