Merge branch 'main' into dev

This commit is contained in:
Owen
2026-05-20 15:59:01 -07:00
8 changed files with 621 additions and 152 deletions

View File

@@ -18,7 +18,7 @@ import {
userOrgRoles,
userSiteResources
} from "@server/db";
import { and, eq, inArray, ne } from "drizzle-orm";
import { and, count, eq, inArray, ne } from "drizzle-orm";
import { deletePeer as newtDeletePeer } from "@server/routers/newt/peers";
import {
@@ -39,6 +39,11 @@ import {
removePeerData,
removeTargets as removeSubnetProxyTargets
} from "@server/routers/client/targets";
import { lockManager } from "#dynamic/lib/lock";
// TTL for rebuild-association locks. These functions can fan out into many
// peer/proxy updates, so give them a generous window.
const REBUILD_ASSOCIATIONS_LOCK_TTL_MS = 120000;
export async function getClientSiteResourceAccess(
siteResource: SiteResource,
@@ -161,6 +166,23 @@ export async function rebuildClientAssociationsFromSiteResource(
pubKey: string | null;
subnet: string | null;
}[];
}> {
return await lockManager.withLock(
`rebuild-client-associations:site-resource:${siteResource.siteResourceId}`,
() => rebuildClientAssociationsFromSiteResourceImpl(siteResource, trx),
REBUILD_ASSOCIATIONS_LOCK_TTL_MS
);
}
async function rebuildClientAssociationsFromSiteResourceImpl(
siteResource: SiteResource,
trx: Transaction | typeof db = db
): Promise<{
mergedAllClients: {
clientId: number;
pubKey: string | null;
subnet: string | null;
}[];
}> {
logger.debug(
`rebuildClientAssociations: [rebuildClientAssociationsFromSiteResource] START siteResourceId=${siteResource.siteResourceId} networkId=${siteResource.networkId} orgId=${siteResource.orgId}`
@@ -539,6 +561,29 @@ async function handleMessagesForSiteClients(
}
}
// get the number of sites on each of these clients so we can log it and make decisions about whether to send messages based on it
const clientSiteCounts: Record<number, number> = {};
if (clientsToProcess.size > 0) {
const clientIdsToProcess = Array.from(clientsToProcess.keys());
const siteCounts = await trx
.select({
clientId: clientSitesAssociationsCache.clientId,
siteCount: count(clientSitesAssociationsCache.siteId)
})
.from(clientSitesAssociationsCache)
.where(
inArray(
clientSitesAssociationsCache.clientId,
clientIdsToProcess
)
)
.groupBy(clientSitesAssociationsCache.clientId);
for (const row of siteCounts) {
clientSiteCounts[row.clientId] = Number(row.siteCount);
}
}
for (const client of clientsToProcess.values()) {
// UPDATE THE NEWT
if (!client.subnet || !client.pubKey) {
@@ -582,7 +627,14 @@ async function handleMessagesForSiteClients(
}
if (isAdd) {
// TODO: if we are in jit mode here should we really be sending this?
if (clientSiteCounts[client.clientId] > 250) {
// skip adding the peer if we have more than 250 sites because we are in jit mode anyway
logger.info(
`rebuildClientAssociations: Client ${client.clientId} has ${clientSiteCounts[client.clientId]} sites so skipping adding peer to newt and olm because it is likely in jit mode`
);
continue;
}
await initPeerAddHandshake(
// this will kick off the add peer process for the client
client.clientId,
@@ -600,9 +652,24 @@ async function handleMessagesForSiteClients(
exitNodeJobs.push(updateClientSiteDestinations(client, trx));
}
await Promise.all(exitNodeJobs);
await Promise.all(newtJobs); // do the servers first to make sure they are ready?
await Promise.all(olmJobs);
Promise.all(exitNodeJobs).catch((error) => {
logger.error(
`rebuildClientAssociations: Error updating client site destinations for site ${site.siteId}:`,
error
);
});
Promise.all(newtJobs).catch((error) => {
logger.error(
`rebuildClientAssociations: Error updating Newt peers for site ${site.siteId}:`,
error
);
});
Promise.all(olmJobs).catch((error) => {
logger.error(
`rebuildClientAssociations: Error updating Olm peers for site ${site.siteId}:`,
error
);
});
}
interface PeerDestination {
@@ -885,6 +952,17 @@ async function handleSubnetProxyTargetUpdates(
export async function rebuildClientAssociationsFromClient(
client: Client,
trx: Transaction | typeof db = db
): Promise<void> {
return await lockManager.withLock(
`rebuild-client-associations:client:${client.clientId}`,
() => rebuildClientAssociationsFromClientImpl(client, trx),
REBUILD_ASSOCIATIONS_LOCK_TTL_MS
);
}
async function rebuildClientAssociationsFromClientImpl(
client: Client,
trx: Transaction | typeof db = db
): Promise<void> {
let newSiteResourceIds: number[] = [];
@@ -1157,6 +1235,12 @@ async function handleMessagesForClientSites(
const olmJobs: Promise<any>[] = [];
const exitNodeJobs: Promise<any>[] = [];
const totalSitesOnClient = await trx
.select({ count: count(clientSitesAssociationsCache.siteId) })
.from(clientSitesAssociationsCache)
.where(eq(clientSitesAssociationsCache.clientId, client.clientId))
.then((rows) => Number(rows[0].count));
for (const siteData of sitesData) {
const site = siteData.sites;
const exitNode = siteData.exitNodes;
@@ -1217,7 +1301,14 @@ async function handleMessagesForClientSites(
continue;
}
// TODO: if we are in jit mode here should we really be sending this?
if (totalSitesOnClient > 250) {
// skip adding the site if we have more than 250 because we are in jit mode anyway
logger.info(
`rebuildClientAssociations: Client ${client.clientId} has ${totalSitesOnClient} sites so skipping adding peer to newt and olm because it is likely in jit mode`
);
continue;
}
await initPeerAddHandshake(
// this will kick off the add peer process for the client
client.clientId,
@@ -1245,9 +1336,24 @@ async function handleMessagesForClientSites(
);
}
await Promise.all(exitNodeJobs);
await Promise.all(newtJobs);
await Promise.all(olmJobs);
Promise.all(exitNodeJobs).catch((error) => {
logger.error(
`rebuildClientAssociations: Error updating client site destinations for client ${client.clientId}:`,
error
);
});
Promise.all(newtJobs).catch((error) => {
logger.error(
`rebuildClientAssociations: Error updating Newt peers for client ${client.clientId}:`,
error
);
});
Promise.all(olmJobs).catch((error) => {
logger.error(
`rebuildClientAssociations: Error updating Olm peers for client ${client.clientId}:`,
error
);
});
}
async function handleMessagesForClientResources(
@@ -1528,3 +1634,195 @@ async function handleMessagesForClientResources(
await Promise.all([...proxyJobs, ...olmJobs]);
}
export type ClientAssociationsCacheVerification = {
clientId: number;
consistent: boolean;
// What permissions say the cache should contain
expectedSiteResourceIds: number[];
expectedSiteIds: number[];
// What the cache currently contains
actualSiteResourceIds: number[];
actualSiteIds: number[];
// Diff
missingSiteResourceIds: number[]; // present in expected, missing from cache
extraSiteResourceIds: number[]; // present in cache, not in expected
missingSiteIds: number[];
extraSiteIds: number[];
};
// verifyClientAssociationsCache walks the same permission-derivation logic as
// rebuildClientAssociationsFromClient but does NOT modify the database. It
// returns the expected vs actual cache contents and a boolean indicating
// whether the cache is in sync with what permissions imply.
export async function verifyClientAssociationsCache(
client: Client,
trx: Transaction | typeof db = db
): Promise<ClientAssociationsCacheVerification> {
let newSiteResourceIds: number[] = [];
// 1. Direct client associations
const directSiteResources = await trx
.select({ siteResourceId: clientSiteResources.siteResourceId })
.from(clientSiteResources)
.innerJoin(
siteResources,
eq(siteResources.siteResourceId, clientSiteResources.siteResourceId)
)
.where(
and(
eq(clientSiteResources.clientId, client.clientId),
eq(siteResources.orgId, client.orgId)
)
);
newSiteResourceIds.push(
...directSiteResources.map((r) => r.siteResourceId)
);
// 2. User-based and role-based access (if client has a userId)
if (client.userId) {
const userSiteResourceIds = await trx
.select({ siteResourceId: userSiteResources.siteResourceId })
.from(userSiteResources)
.innerJoin(
siteResources,
eq(
siteResources.siteResourceId,
userSiteResources.siteResourceId
)
)
.where(
and(
eq(userSiteResources.userId, client.userId),
eq(siteResources.orgId, client.orgId)
)
);
newSiteResourceIds.push(
...userSiteResourceIds.map((r) => r.siteResourceId)
);
const roleIds = await trx
.select({ roleId: userOrgRoles.roleId })
.from(userOrgRoles)
.where(
and(
eq(userOrgRoles.userId, client.userId),
eq(userOrgRoles.orgId, client.orgId)
)
)
.then((rows) => rows.map((row) => row.roleId));
if (roleIds.length > 0) {
const roleSiteResourceIds = await trx
.select({ siteResourceId: roleSiteResources.siteResourceId })
.from(roleSiteResources)
.innerJoin(
siteResources,
eq(
siteResources.siteResourceId,
roleSiteResources.siteResourceId
)
)
.where(
and(
inArray(roleSiteResources.roleId, roleIds),
eq(siteResources.orgId, client.orgId)
)
);
newSiteResourceIds.push(
...roleSiteResourceIds.map((r) => r.siteResourceId)
);
}
}
newSiteResourceIds = Array.from(new Set(newSiteResourceIds));
const newSiteResources =
newSiteResourceIds.length > 0
? await trx
.select()
.from(siteResources)
.where(
inArray(siteResources.siteResourceId, newSiteResourceIds)
)
: [];
const networkIds = Array.from(
new Set(
newSiteResources
.map((sr) => sr.networkId)
.filter((id): id is number => id !== null)
)
);
const newSiteIds =
networkIds.length > 0
? await trx
.select({ siteId: siteNetworks.siteId })
.from(siteNetworks)
.where(inArray(siteNetworks.networkId, networkIds))
.then((rows) =>
Array.from(new Set(rows.map((r) => r.siteId)))
)
: [];
// Read the existing cache state
const existingResourceAssociations = await trx
.select({
siteResourceId: clientSiteResourcesAssociationsCache.siteResourceId
})
.from(clientSiteResourcesAssociationsCache)
.where(
eq(clientSiteResourcesAssociationsCache.clientId, client.clientId)
);
const existingSiteResourceIds = existingResourceAssociations.map(
(r) => r.siteResourceId
);
const existingSiteAssociations = await trx
.select({ siteId: clientSitesAssociationsCache.siteId })
.from(clientSitesAssociationsCache)
.where(eq(clientSitesAssociationsCache.clientId, client.clientId));
const existingSiteIds = existingSiteAssociations.map((s) => s.siteId);
const expectedSiteResourceSet = new Set(newSiteResourceIds);
const actualSiteResourceSet = new Set(existingSiteResourceIds);
const expectedSiteSet = new Set(newSiteIds);
const actualSiteSet = new Set(existingSiteIds);
const missingSiteResourceIds = newSiteResourceIds.filter(
(id) => !actualSiteResourceSet.has(id)
);
const extraSiteResourceIds = existingSiteResourceIds.filter(
(id) => !expectedSiteResourceSet.has(id)
);
const missingSiteIds = newSiteIds.filter((id) => !actualSiteSet.has(id));
const extraSiteIds = existingSiteIds.filter(
(id) => !expectedSiteSet.has(id)
);
const consistent =
missingSiteResourceIds.length === 0 &&
extraSiteResourceIds.length === 0 &&
missingSiteIds.length === 0 &&
extraSiteIds.length === 0;
return {
clientId: client.clientId,
consistent,
expectedSiteResourceIds: Array.from(expectedSiteResourceSet).sort(
(a, b) => a - b
),
expectedSiteIds: Array.from(expectedSiteSet).sort((a, b) => a - b),
actualSiteResourceIds: Array.from(actualSiteResourceSet).sort(
(a, b) => a - b
),
actualSiteIds: Array.from(actualSiteSet).sort((a, b) => a - b),
missingSiteResourceIds: missingSiteResourceIds.sort((a, b) => a - b),
extraSiteResourceIds: extraSiteResourceIds.sort((a, b) => a - b),
missingSiteIds: missingSiteIds.sort((a, b) => a - b),
extraSiteIds: extraSiteIds.sort((a, b) => a - b)
};
}