Handle all of the alerting from the functions

This commit is contained in:
Owen
2026-04-22 18:03:26 -07:00
parent 8481b0a073
commit dcbd22b4ad
12 changed files with 200 additions and 189 deletions

View File

@@ -157,7 +157,8 @@ export const resources = pgTable("resources", {
maintenanceTitle: text("maintenanceTitle"),
maintenanceMessage: text("maintenanceMessage"),
maintenanceEstimatedTime: text("maintenanceEstimatedTime"),
postAuthPath: text("postAuthPath")
postAuthPath: text("postAuthPath"),
health: varchar("health") // "healthy", "unhealthy"
});
export const targets = pgTable("targets", {

View File

@@ -178,7 +178,8 @@ export const resources = sqliteTable("resources", {
maintenanceTitle: text("maintenanceTitle"),
maintenanceMessage: text("maintenanceMessage"),
maintenanceEstimatedTime: text("maintenanceEstimatedTime"),
postAuthPath: text("postAuthPath")
postAuthPath: text("postAuthPath"),
health: text("health") // "healthy", "unhealthy"
});
export const targets = sqliteTable("targets", {

View File

@@ -13,6 +13,18 @@
import logger from "@server/logger";
import { processAlerts } from "../processAlerts";
import {
db,
statusHistory,
targetHealthCheck,
targets,
resources
} from "@server/db";
import { eq } from "drizzle-orm";
import {
fireResourceHealthyAlert,
fireResourceUnhealthyAlert
} from "./resourceEvents";
// ---------------------------------------------------------------------------
// Public API
@@ -33,9 +45,20 @@ export async function fireHealthCheckHealthyAlert(
orgId: string,
healthCheckId: number,
healthCheckName?: string | null,
healthCheckTargetId?: number | null,
extra?: Record<string, unknown>
): Promise<void> {
try {
await db.insert(statusHistory).values({
entityType: "health_check",
entityId: healthCheckId,
orgId: orgId,
status: "healthy",
timestamp: Math.floor(Date.now() / 1000)
});
await handleResource(orgId, healthCheckTargetId);
await processAlerts({
eventType: "health_check_healthy",
orgId,
@@ -78,9 +101,20 @@ export async function fireHealthCheckUnhealthyAlert(
orgId: string,
healthCheckId: number,
healthCheckName?: string | null,
healthCheckTargetId?: number | null,
extra?: Record<string, unknown>
): Promise<void> {
try {
await db.insert(statusHistory).values({
entityType: "health_check",
entityId: healthCheckId,
orgId: orgId,
status: "unhealthy",
timestamp: Math.floor(Date.now() / 1000)
});
await handleResource(orgId, healthCheckTargetId);
await processAlerts({
eventType: "health_check_unhealthy",
orgId,
@@ -107,3 +141,63 @@ export async function fireHealthCheckUnhealthyAlert(
);
}
}
async function handleResource(orgId: string, healthCheckTargetId?: number | null) {
if (!healthCheckTargetId) {
return;
}
// we have resources lets get them
const [target] = await db
.select()
.from(targets)
.where(eq(targets.targetId, healthCheckTargetId))
.limit(1);
if (!target) {
return;
}
const [resource] = await db
.select()
.from(resources)
.where(eq(resources.resourceId, target.resourceId))
.limit(1);
if (!resource) {
return;
}
const otherTargets = await db
.select({ hcHealth: targetHealthCheck.hcHealth })
.from(targets)
.where(eq(targets.resourceId, resource.resourceId));
let health = "healthy";
const allHealthy = otherTargets.every((t) => t.hcHealth === "healthy");
if (!allHealthy) {
logger.debug(
`Not marking resource ${resource.resourceId} as healthy because not all targets are healthy`
);
health = "unhealthy";
}
if (health != resource.health) {
// it changed
await db
.update(resources)
.set({ health })
.where(eq(resources.resourceId, resource.resourceId));
if (health === "unhealthy") {
await fireResourceUnhealthyAlert(
orgId,
resource.resourceId,
resource.name
);
} else if (health === "healthy") {
await fireResourceHealthyAlert(
orgId,
resource.resourceId,
resource.name
);
}
}
}

View File

@@ -13,6 +13,7 @@
import logger from "@server/logger";
import { processAlerts } from "../processAlerts";
import { db, statusHistory } from "@server/db";
// ---------------------------------------------------------------------------
// Public API
@@ -36,6 +37,14 @@ export async function fireResourceHealthyAlert(
extra?: Record<string, unknown>
): Promise<void> {
try {
await db.insert(statusHistory).values({
entityType: "resource",
entityId: resourceId,
orgId: orgId,
status: "healthy",
timestamp: Math.floor(Date.now() / 1000)
});
await processAlerts({
eventType: "resource_healthy",
orgId,
@@ -81,6 +90,14 @@ export async function fireResourceUnhealthyAlert(
extra?: Record<string, unknown>
): Promise<void> {
try {
await db.insert(statusHistory).values({
entityType: "resource",
entityId: resourceId,
orgId: orgId,
status: "unhealthy",
timestamp: Math.floor(Date.now() / 1000)
});
await processAlerts({
eventType: "resource_unhealthy",
orgId,

View File

@@ -13,6 +13,9 @@
import logger from "@server/logger";
import { processAlerts } from "../processAlerts";
import { db, sites, statusHistory, targetHealthCheck } from "@server/db";
import { and, eq, inArray } from "drizzle-orm";
import { fireHealthCheckUnhealthyAlert } from "./healthCheckEvents";
// ---------------------------------------------------------------------------
// Public API
@@ -36,6 +39,14 @@ export async function fireSiteOnlineAlert(
extra?: Record<string, unknown>
): Promise<void> {
try {
await db.insert(statusHistory).values({
entityType: "site",
entityId: siteId,
orgId: orgId,
status: "online",
timestamp: Math.floor(Date.now() / 1000)
});
await processAlerts({
eventType: "site_online",
orgId,
@@ -81,6 +92,37 @@ export async function fireSiteOfflineAlert(
extra?: Record<string, unknown>
): Promise<void> {
try {
await db.insert(statusHistory).values({
entityType: "site",
entityId: siteId,
orgId: orgId,
status: "offline",
timestamp: Math.floor(Date.now() / 1000)
});
const unhealthyHealthChecks = await db
.update(targetHealthCheck)
.set({ hcHealth: "unhealthy" })
.where(
and(
eq(targetHealthCheck.orgId, orgId),
eq(targetHealthCheck.siteId, siteId)
)
)
.returning();
for (const healthCheck of unhealthyHealthChecks) {
logger.info(
`Marking health check ${healthCheck.targetHealthCheckId} unhealthy due to site ${siteId} being marked offline`
);
await fireHealthCheckUnhealthyAlert(
healthCheck.orgId,
healthCheck.targetHealthCheckId,
healthCheck.name
);
}
await processAlerts({
eventType: "site_offline",
orgId,

View File

@@ -91,14 +91,6 @@ export async function triggerHealthCheckAlert(
);
}
await db.insert(statusHistory).values({
entityType: "healthCheck",
entityId: healthCheckId,
orgId,
status: eventType === "health_check_healthy" ? "healthy" : "unhealthy",
timestamp: Math.floor(Date.now() / 1000)
});
if (eventType === "health_check_healthy") {
await fireHealthCheckHealthyAlert(
orgId,

View File

@@ -89,16 +89,6 @@ export async function triggerResourceAlert(
);
}
if (eventType === "resource_healthy" || eventType === "resource_unhealthy") {
await db.insert(statusHistory).values({
entityType: "resource",
entityId: resourceId,
orgId,
status: eventType === "resource_healthy" ? "healthy" : "unhealthy",
timestamp: Math.floor(Date.now() / 1000)
});
}
if (eventType === "resource_healthy") {
await fireResourceHealthyAlert(
orgId,
@@ -132,4 +122,4 @@ export async function triggerResourceAlert(
createHttpError(HttpCode.INTERNAL_SERVER_ERROR, "An error occurred")
);
}
}
}

View File

@@ -83,14 +83,6 @@ export async function triggerSiteAlert(
);
}
await db.insert(statusHistory).values({
entityType: "site",
entityId: siteId,
orgId,
status: eventType === "site_online" ? "online" : "offline",
timestamp: Math.floor(Date.now() / 1000)
});
if (eventType === "site_online") {
await fireSiteOnlineAlert(orgId, siteId, site.name ?? undefined);
} else {
@@ -110,4 +102,4 @@ export async function triggerSiteAlert(
createHttpError(HttpCode.INTERNAL_SERVER_ERROR, "An error occurred")
);
}
}
}

View File

@@ -1,5 +1,12 @@
import { MessageHandler } from "@server/routers/ws";
import { db, Newt, sites } from "@server/db";
import {
db,
Newt,
sites,
statusHistory,
targetHealthCheck,
targets
} from "@server/db";
import { eq } from "drizzle-orm";
import logger from "@server/logger";
import { fireSiteOfflineAlert } from "@server/lib/alerts";

View File

@@ -1,8 +1,13 @@
import { db, newts, sites, targetHealthCheck, targets, statusHistory } from "@server/db";
import {
hasActiveConnections,
} from "#dynamic/routers/ws";
import { eq, lt, isNull, and, or, ne, not } from "drizzle-orm";
db,
newts,
sites,
targetHealthCheck,
targets,
statusHistory
} from "@server/db";
import { hasActiveConnections } from "#dynamic/routers/ws";
import { eq, lt, isNull, and, or, ne, not, inArray } from "drizzle-orm";
import logger from "@server/logger";
import { fireSiteOfflineAlert, fireSiteOnlineAlert } from "#dynamic/lib/alerts";
@@ -77,43 +82,11 @@ export const startNewtOfflineChecker = (): void => {
.set({ online: false })
.where(eq(sites.siteId, staleSite.siteId));
await db.insert(statusHistory).values({
entityType: "site",
entityId: staleSite.siteId,
orgId: staleSite.orgId,
status: "offline",
timestamp: Math.floor(Date.now() / 1000),
}).execute();
const healthChecksOnSite = await db
.select()
.from(targetHealthCheck)
.innerJoin(
targets,
eq(targets.targetId, targetHealthCheck.targetId)
)
.innerJoin(sites, eq(sites.siteId, targets.siteId))
.where(eq(sites.siteId, staleSite.siteId));
for (const healthCheck of healthChecksOnSite) {
logger.info(
`Marking health check ${healthCheck.targetHealthCheck.targetHealthCheckId} offline due to site ${staleSite.siteId} being marked offline`
);
await db
.update(targetHealthCheck)
.set({ hcHealth: "unknown" })
.where(
eq(
targetHealthCheck.targetHealthCheckId,
healthCheck.targetHealthCheck
.targetHealthCheckId
)
);
// TODO: should we be firing an alert here when the health check goes to unknown?
}
await fireSiteOfflineAlert(staleSite.orgId, staleSite.siteId, staleSite.name);
await fireSiteOfflineAlert(
staleSite.orgId,
staleSite.siteId,
staleSite.name
);
}
// this part only effects self hosted. Its not efficient but we dont expect people to have very many wireguard sites
@@ -155,15 +128,11 @@ export const startNewtOfflineChecker = (): void => {
.set({ online: false })
.where(eq(sites.siteId, site.siteId));
await db.insert(statusHistory).values({
entityType: "site",
entityId: site.siteId,
orgId: site.orgId,
status: "offline",
timestamp: Math.floor(Date.now() / 1000),
}).execute();
await fireSiteOfflineAlert(site.orgId, site.siteId, site.name);
await fireSiteOfflineAlert(
site.orgId,
site.siteId,
site.name
);
} else if (
lastBandwidthUpdate >= wireguardOfflineThreshold &&
!site.online
@@ -177,15 +146,11 @@ export const startNewtOfflineChecker = (): void => {
.set({ online: true })
.where(eq(sites.siteId, site.siteId));
await db.insert(statusHistory).values({
entityType: "site",
entityId: site.siteId,
orgId: site.orgId,
status: "online",
timestamp: Math.floor(Date.now() / 1000),
}).execute();
await fireSiteOnlineAlert(site.orgId, site.siteId, site.name);
await fireSiteOnlineAlert(
site.orgId,
site.siteId,
site.name
);
}
}
} catch (error) {

View File

@@ -1,5 +1,5 @@
import { db } from "@server/db";
import { sites, clients, olms, statusHistory } from "@server/db";
import { sites, clients, olms } from "@server/db";
import { and, eq, inArray } from "drizzle-orm";
import logger from "@server/logger";
import { fireSiteOnlineAlert } from "#dynamic/lib/alerts";
@@ -147,13 +147,6 @@ async function flushSitePingsToDb(): Promise<void> {
}, "flushSitePingsToDb");
for (const site of newlyOnlineSites) {
await db.insert(statusHistory).values({
entityType: "site",
entityId: site.siteId,
orgId: site.orgId,
status: "online",
timestamp: Math.floor(Date.now() / 1000),
}).execute();
await fireSiteOnlineAlert(site.orgId, site.siteId, site.name);
}
} catch (error) {

View File

@@ -94,26 +94,13 @@ export const handleHealthcheckStatusMessage: MessageHandler = async (
const [targetCheck] = await db
.select({
targetId: targets.targetId,
siteId: targets.siteId,
targetId: targetHealthCheck.targetId,
orgId: targetHealthCheck.orgId,
targetHealthCheckId: targetHealthCheck.targetHealthCheckId,
resourceOrgId: resources.orgId,
resourceId: resources.resourceId,
resourceName: resources.name,
name: targetHealthCheck.name,
hcHealth: targetHealthCheck.hcHealth
})
.from(targetHealthCheck)
.innerJoin(sites, eq(targetHealthCheck.siteId, sites.siteId))
.innerJoin(
targets,
eq(targetHealthCheck.targetId, targets.targetId)
)
.innerJoin(
resources,
eq(targets.resourceId, resources.resourceId)
)
.where(
and(
eq(targetHealthCheck.targetHealthCheckId, targetIdNum),
@@ -147,92 +134,22 @@ export const handleHealthcheckStatusMessage: MessageHandler = async (
| "healthy"
| "unhealthy"
})
.where(eq(targetHealthCheck.targetId, targetCheck.targetId));
const orgId = targetCheck.orgId || targetCheck.resourceOrgId; // for backwards compatibility, check both orgId fields because the target health checks dont have the orgId
if (!orgId) {
logger.warn(
`No org ID found for target ${targetId}, skipping status history logging`
);
continue;
}
// Log the state change to status history
await db.insert(statusHistory).values({
entityType: "healthCheck",
entityId: targetCheck.targetHealthCheckId,
orgId: orgId,
status: healthStatus.status,
timestamp: Math.floor(Date.now() / 1000)
});
if (targetCheck.resourceId) {
// Log the state change to status history for the resource as well
// so we can show the resource status along with the site
// if the status is healthy we should check if ALL of the targets on the resource are currently healthy and if not then dont mark the resource as healthy yet, we want to wait until all targets are healthy to mark the resource as healthy
let status = healthStatus.status;
if (healthStatus.status === "healthy") {
const otherTargets = await db
.select({ hcHealth: targetHealthCheck.hcHealth })
.from(targets)
.innerJoin(
targetHealthCheck,
eq(targets.targetId, targetHealthCheck.targetId)
)
.where(
and(
eq(targets.resourceId, targetCheck.resourceId),
ne(targets.targetId, targetCheck.targetId) // only check the other targets, not the one we just updated
)
);
const allHealthy = otherTargets.every(
(t) => t.hcHealth === "healthy"
);
if (!allHealthy) {
logger.debug(
`Not marking resource ${targetCheck.resourceId} as healthy because not all targets are healthy`
);
status = "unhealthy";
}
}
await db.insert(statusHistory).values({
entityType: "resource",
entityId: targetCheck.resourceId,
orgId: orgId,
status: status,
timestamp: Math.floor(Date.now() / 1000)
});
if (status === "unhealthy") {
await fireResourceUnhealthyAlert(
orgId,
targetCheck.resourceId,
targetCheck.resourceName
);
} else if (status === "healthy") {
await fireResourceHealthyAlert(
orgId,
targetCheck.resourceId,
targetCheck.resourceName
);
}
}
.where(eq(targetHealthCheck.targetHealthCheckId, targetCheck.targetHealthCheckId));
// because we are checking above if there was a change we can fire the alert here because it changed
if (healthStatus.status === "unhealthy") {
await fireHealthCheckUnhealthyAlert(
orgId,
targetCheck.orgId,
targetCheck.targetHealthCheckId,
targetCheck.name ?? undefined
targetCheck.name ?? undefined,
targetCheck.targetId
);
} else if (healthStatus.status === "healthy") {
await fireHealthCheckHealthyAlert(
orgId,
targetCheck.orgId,
targetCheck.targetHealthCheckId,
targetCheck.name ?? undefined
targetCheck.name ?? undefined,
targetCheck.targetId
);
}