diff --git a/server/lib/exitNodes/subnet.ts b/server/lib/exitNodes/subnet.ts index 49e28bd57..8c4f3e99e 100644 --- a/server/lib/exitNodes/subnet.ts +++ b/server/lib/exitNodes/subnet.ts @@ -1,30 +1,55 @@ -import { db, exitNodes } from "@server/db"; +import { db, exitNodes, Transaction } from "@server/db"; import config from "@server/lib/config"; import { findNextAvailableCidr } from "@server/lib/ip"; +import { lockManager } from "#dynamic/lib/lock"; -export async function getNextAvailableSubnet(): Promise { - // Get all existing subnets from routes table - const existingAddresses = await db - .select({ - address: exitNodes.address - }) - .from(exitNodes); - - const addresses = existingAddresses.map((a) => a.address); - let subnet = findNextAvailableCidr( - addresses, - config.getRawConfig().gerbil.block_size, - config.getRawConfig().gerbil.subnet_group - ); - if (!subnet) { - throw new Error("No available subnets remaining in space"); +/** + * Reserves the next available exit node subnet. + * + * Exit node subnets must never overlap with one another - regardless of + * which org(s) they belong to - since HA exit nodes can end up routing for + * the same org. This acquires a lock that the caller MUST release (via the + * returned `release`) only after the chosen address has been durably + * persisted (e.g. after the enclosing transaction commits), otherwise + * concurrent callers can race and pick the same subnet. + */ +export async function getNextAvailableSubnet( + trx: Transaction | typeof db = db +): Promise<{ value: string; release: () => Promise }> { + const lockKey = "exit-node-subnet-allocation"; + const acquired = await lockManager.acquireLockWithRetry(lockKey, 6000); + if (!acquired) { + throw new Error(`Failed to acquire lock: ${lockKey}`); } + const release = () => lockManager.releaseLock(lockKey, acquired); - // replace the last octet with 1 - subnet = - subnet.split(".").slice(0, 3).join(".") + - ".1" + - "/" + - subnet.split("/")[1]; - return subnet; + try { + // Get all existing subnets from routes table + const existingAddresses = await trx + .select({ + address: exitNodes.address + }) + .from(exitNodes); + + const addresses = existingAddresses.map((a) => a.address); + let subnet = findNextAvailableCidr( + addresses, + config.getRawConfig().gerbil.block_size, + config.getRawConfig().gerbil.subnet_group + ); + if (!subnet) { + throw new Error("No available subnets remaining in space"); + } + + // replace the last octet with 1 + subnet = + subnet.split(".").slice(0, 3).join(".") + + ".1" + + "/" + + subnet.split("/")[1]; + return { value: subnet, release }; + } catch (e) { + await release(); + throw e; + } } diff --git a/server/private/routers/gerbil/createExitNode.ts b/server/private/routers/gerbil/createExitNode.ts index 818c5f0e1..cfa7c42eb 100644 --- a/server/private/routers/gerbil/createExitNode.ts +++ b/server/private/routers/gerbil/createExitNode.ts @@ -29,37 +29,41 @@ export async function createExitNode( .where(eq(exitNodes.publicKey, publicKey)); let exitNode: ExitNode; if (!exitNodeQuery) { - const address = await getNextAvailableSubnet(); - // TODO: eventually we will want to get the next available port so that we can multiple exit nodes - // const listenPort = await getNextAvailablePort(); - const listenPort = config.getRawConfig().gerbil.start_port; - let subEndpoint = ""; - if (config.getRawConfig().gerbil.use_subdomain) { - subEndpoint = await getUniqueExitNodeEndpointName(); + const { value: address, release } = await getNextAvailableSubnet(); + try { + // TODO: eventually we will want to get the next available port so that we can multiple exit nodes + // const listenPort = await getNextAvailablePort(); + const listenPort = config.getRawConfig().gerbil.start_port; + let subEndpoint = ""; + if (config.getRawConfig().gerbil.use_subdomain) { + subEndpoint = await getUniqueExitNodeEndpointName(); + } + + const exitNodeName = + config.getRawConfig().gerbil.exit_node_name || + `Exit Node ${publicKey.slice(0, 8)}`; + + // create a new exit node + [exitNode] = await db + .insert(exitNodes) + .values({ + publicKey, + endpoint: `${subEndpoint}${subEndpoint != "" ? "." : ""}${config.getRawConfig().gerbil.base_endpoint}`, + address, + listenPort, + online: true, + reachableAt, + name: exitNodeName + }) + .returning() + .execute(); + + logger.info( + `Created new exit node ${exitNode.name} with address ${exitNode.address} and port ${exitNode.listenPort}` + ); + } finally { + await release(); } - - const exitNodeName = - config.getRawConfig().gerbil.exit_node_name || - `Exit Node ${publicKey.slice(0, 8)}`; - - // create a new exit node - [exitNode] = await db - .insert(exitNodes) - .values({ - publicKey, - endpoint: `${subEndpoint}${subEndpoint != "" ? "." : ""}${config.getRawConfig().gerbil.base_endpoint}`, - address, - listenPort, - online: true, - reachableAt, - name: exitNodeName - }) - .returning() - .execute(); - - logger.info( - `Created new exit node ${exitNode.name} with address ${exitNode.address} and port ${exitNode.listenPort}` - ); } else { // update the reachable at [exitNode] = await db diff --git a/server/private/routers/remoteExitNode/createRemoteExitNode.ts b/server/private/routers/remoteExitNode/createRemoteExitNode.ts index bb16f228d..bf86ed107 100644 --- a/server/private/routers/remoteExitNode/createRemoteExitNode.ts +++ b/server/private/routers/remoteExitNode/createRemoteExitNode.ts @@ -114,8 +114,6 @@ export async function createRemoteExitNode( } const secretHash = await hashPassword(secret); - // const address = await getNextAvailableSubnet(); - const address = "100.89.140.1/24"; // FOR NOW LETS HARDCODE THESE ADDRESSES const [existingRemoteExitNode] = await db .select() @@ -191,89 +189,106 @@ export async function createRemoteExitNode( ); } - await db.transaction(async (trx) => { - if (!existingExitNode) { - const [res] = await trx - .insert(exitNodes) - .values({ - name: remoteExitNodeId, - address, - endpoint: "", - publicKey: "", - listenPort: 0, - online: false, - type: "remoteExitNode" - }) - .returning(); - existingExitNode = res; - } + // If this remote exit node isn't already backing an exit node in + // another org, we're about to create a brand new one. Reserve a + // subnet for it up front so the allocation lock is held across the + // whole insert - this guarantees exit node subnets never overlap, + // even under concurrent creation, which matters for HA setups. + let releaseSubnetLock: (() => Promise) | null = null; + let newExitNodeAddress: string | null = null; + if (!existingExitNode) { + const { value, release } = await getNextAvailableSubnet(); + newExitNodeAddress = value; + releaseSubnetLock = release; + } - if (!existingRemoteExitNode) { - await trx.insert(remoteExitNodes).values({ - remoteExitNodeId: remoteExitNodeId, - secretHash, - dateCreated: moment().toISOString(), - exitNodeId: existingExitNode.exitNodeId - }); - } else { - // update the existing remote exit node - await trx - .update(remoteExitNodes) - .set({ - exitNodeId: existingExitNode.exitNodeId - }) - .where( - eq( - remoteExitNodes.remoteExitNodeId, - existingRemoteExitNode.remoteExitNodeId - ) - ); - } - - if (!existingExitNodeOrg) { - await trx.insert(exitNodeOrgs).values({ - exitNodeId: existingExitNode.exitNodeId, - orgId: orgId - }); - } - - // calculate if the node is in any other of the orgs before we count it as an add to the billing org - if (org.billingOrgId) { - const otherBillingOrgs = await trx - .select() - .from(orgs) - .where( - and( - eq(orgs.billingOrgId, org.billingOrgId), - ne(orgs.orgId, orgId) - ) - ); - - const billingOrgIds = otherBillingOrgs.map((o) => o.orgId); - - const orgsInBillingDomainThatTheNodeIsStillIn = await trx - .select() - .from(exitNodeOrgs) - .where( - and( - eq( - exitNodeOrgs.exitNodeId, - existingExitNode.exitNodeId - ), - inArray(exitNodeOrgs.orgId, billingOrgIds) - ) - ); - - if (orgsInBillingDomainThatTheNodeIsStillIn.length === 0) { - await usageService.add( - orgId, - LimitId.REMOTE_EXIT_NODES, - 1, - trx - ); + try { + await db.transaction(async (trx) => { + if (!existingExitNode) { + const [res] = await trx + .insert(exitNodes) + .values({ + name: remoteExitNodeId, + address: newExitNodeAddress!, + endpoint: "", + publicKey: "", + listenPort: 0, + online: false, + type: "remoteExitNode" + }) + .returning(); + existingExitNode = res; } - } - }); + + if (!existingRemoteExitNode) { + await trx.insert(remoteExitNodes).values({ + remoteExitNodeId: remoteExitNodeId, + secretHash, + dateCreated: moment().toISOString(), + exitNodeId: existingExitNode.exitNodeId + }); + } else { + // update the existing remote exit node + await trx + .update(remoteExitNodes) + .set({ + exitNodeId: existingExitNode.exitNodeId + }) + .where( + eq( + remoteExitNodes.remoteExitNodeId, + existingRemoteExitNode.remoteExitNodeId + ) + ); + } + + if (!existingExitNodeOrg) { + await trx.insert(exitNodeOrgs).values({ + exitNodeId: existingExitNode.exitNodeId, + orgId: orgId + }); + } + + // calculate if the node is in any other of the orgs before we count it as an add to the billing org + if (org.billingOrgId) { + const otherBillingOrgs = await trx + .select() + .from(orgs) + .where( + and( + eq(orgs.billingOrgId, org.billingOrgId), + ne(orgs.orgId, orgId) + ) + ); + + const billingOrgIds = otherBillingOrgs.map((o) => o.orgId); + + const orgsInBillingDomainThatTheNodeIsStillIn = await trx + .select() + .from(exitNodeOrgs) + .where( + and( + eq( + exitNodeOrgs.exitNodeId, + existingExitNode.exitNodeId + ), + inArray(exitNodeOrgs.orgId, billingOrgIds) + ) + ); + + if (orgsInBillingDomainThatTheNodeIsStillIn.length === 0) { + await usageService.add( + orgId, + LimitId.REMOTE_EXIT_NODES, + 1, + trx + ); + } + } + }); + } finally { + await releaseSubnetLock?.(); + } const token = generateSessionToken(); await createRemoteExitNodeSession(token, remoteExitNodeId); diff --git a/server/routers/gerbil/createExitNode.ts b/server/routers/gerbil/createExitNode.ts index bc9650367..9e93cf575 100644 --- a/server/routers/gerbil/createExitNode.ts +++ b/server/routers/gerbil/createExitNode.ts @@ -13,37 +13,41 @@ export async function createExitNode( const [exitNodeQuery] = await db.select().from(exitNodes).limit(1); let exitNode: ExitNode; if (!exitNodeQuery) { - const address = await getNextAvailableSubnet(); - // TODO: eventually we will want to get the next available port so that we can multiple exit nodes - // const listenPort = await getNextAvailablePort(); - const listenPort = config.getRawConfig().gerbil.start_port; - let subEndpoint = ""; - if (config.getRawConfig().gerbil.use_subdomain) { - subEndpoint = await getUniqueExitNodeEndpointName(); + const { value: address, release } = await getNextAvailableSubnet(); + try { + // TODO: eventually we will want to get the next available port so that we can multiple exit nodes + // const listenPort = await getNextAvailablePort(); + const listenPort = config.getRawConfig().gerbil.start_port; + let subEndpoint = ""; + if (config.getRawConfig().gerbil.use_subdomain) { + subEndpoint = await getUniqueExitNodeEndpointName(); + } + + const exitNodeName = + config.getRawConfig().gerbil.exit_node_name || + `Exit Node ${publicKey.slice(0, 8)}`; + + // create a new exit node + [exitNode] = await db + .insert(exitNodes) + .values({ + publicKey, + endpoint: `${subEndpoint}${subEndpoint != "" ? "." : ""}${config.getRawConfig().gerbil.base_endpoint}`, + address, + online: true, + listenPort, + reachableAt, + name: exitNodeName + }) + .returning() + .execute(); + + logger.info( + `Created new exit node ${exitNode.name} with address ${exitNode.address} and port ${exitNode.listenPort}` + ); + } finally { + await release(); } - - const exitNodeName = - config.getRawConfig().gerbil.exit_node_name || - `Exit Node ${publicKey.slice(0, 8)}`; - - // create a new exit node - [exitNode] = await db - .insert(exitNodes) - .values({ - publicKey, - endpoint: `${subEndpoint}${subEndpoint != "" ? "." : ""}${config.getRawConfig().gerbil.base_endpoint}`, - address, - online: true, - listenPort, - reachableAt, - name: exitNodeName - }) - .returning() - .execute(); - - logger.info( - `Created new exit node ${exitNode.name} with address ${exitNode.address} and port ${exitNode.listenPort}` - ); } else { // update the existing exit node [exitNode] = await db