mirror of
https://github.com/fosrl/pangolin.git
synced 2026-05-31 04:56:43 +00:00
Handle all of the alerting from the functions
This commit is contained in:
@@ -157,7 +157,8 @@ export const resources = pgTable("resources", {
|
||||
maintenanceTitle: text("maintenanceTitle"),
|
||||
maintenanceMessage: text("maintenanceMessage"),
|
||||
maintenanceEstimatedTime: text("maintenanceEstimatedTime"),
|
||||
postAuthPath: text("postAuthPath")
|
||||
postAuthPath: text("postAuthPath"),
|
||||
health: varchar("health") // "healthy", "unhealthy"
|
||||
});
|
||||
|
||||
export const targets = pgTable("targets", {
|
||||
|
||||
@@ -178,7 +178,8 @@ export const resources = sqliteTable("resources", {
|
||||
maintenanceTitle: text("maintenanceTitle"),
|
||||
maintenanceMessage: text("maintenanceMessage"),
|
||||
maintenanceEstimatedTime: text("maintenanceEstimatedTime"),
|
||||
postAuthPath: text("postAuthPath")
|
||||
postAuthPath: text("postAuthPath"),
|
||||
health: text("health") // "healthy", "unhealthy"
|
||||
});
|
||||
|
||||
export const targets = sqliteTable("targets", {
|
||||
|
||||
@@ -13,6 +13,18 @@
|
||||
|
||||
import logger from "@server/logger";
|
||||
import { processAlerts } from "../processAlerts";
|
||||
import {
|
||||
db,
|
||||
statusHistory,
|
||||
targetHealthCheck,
|
||||
targets,
|
||||
resources
|
||||
} from "@server/db";
|
||||
import { eq } from "drizzle-orm";
|
||||
import {
|
||||
fireResourceHealthyAlert,
|
||||
fireResourceUnhealthyAlert
|
||||
} from "./resourceEvents";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public API
|
||||
@@ -33,9 +45,20 @@ export async function fireHealthCheckHealthyAlert(
|
||||
orgId: string,
|
||||
healthCheckId: number,
|
||||
healthCheckName?: string | null,
|
||||
healthCheckTargetId?: number | null,
|
||||
extra?: Record<string, unknown>
|
||||
): Promise<void> {
|
||||
try {
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "health_check",
|
||||
entityId: healthCheckId,
|
||||
orgId: orgId,
|
||||
status: "healthy",
|
||||
timestamp: Math.floor(Date.now() / 1000)
|
||||
});
|
||||
|
||||
await handleResource(orgId, healthCheckTargetId);
|
||||
|
||||
await processAlerts({
|
||||
eventType: "health_check_healthy",
|
||||
orgId,
|
||||
@@ -78,9 +101,20 @@ export async function fireHealthCheckUnhealthyAlert(
|
||||
orgId: string,
|
||||
healthCheckId: number,
|
||||
healthCheckName?: string | null,
|
||||
healthCheckTargetId?: number | null,
|
||||
extra?: Record<string, unknown>
|
||||
): Promise<void> {
|
||||
try {
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "health_check",
|
||||
entityId: healthCheckId,
|
||||
orgId: orgId,
|
||||
status: "unhealthy",
|
||||
timestamp: Math.floor(Date.now() / 1000)
|
||||
});
|
||||
|
||||
await handleResource(orgId, healthCheckTargetId);
|
||||
|
||||
await processAlerts({
|
||||
eventType: "health_check_unhealthy",
|
||||
orgId,
|
||||
@@ -107,3 +141,63 @@ export async function fireHealthCheckUnhealthyAlert(
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async function handleResource(orgId: string, healthCheckTargetId?: number | null) {
|
||||
if (!healthCheckTargetId) {
|
||||
return;
|
||||
}
|
||||
// we have resources lets get them
|
||||
const [target] = await db
|
||||
.select()
|
||||
.from(targets)
|
||||
.where(eq(targets.targetId, healthCheckTargetId))
|
||||
.limit(1);
|
||||
|
||||
if (!target) {
|
||||
return;
|
||||
}
|
||||
const [resource] = await db
|
||||
.select()
|
||||
.from(resources)
|
||||
.where(eq(resources.resourceId, target.resourceId))
|
||||
.limit(1);
|
||||
|
||||
if (!resource) {
|
||||
return;
|
||||
}
|
||||
const otherTargets = await db
|
||||
.select({ hcHealth: targetHealthCheck.hcHealth })
|
||||
.from(targets)
|
||||
.where(eq(targets.resourceId, resource.resourceId));
|
||||
|
||||
let health = "healthy";
|
||||
const allHealthy = otherTargets.every((t) => t.hcHealth === "healthy");
|
||||
if (!allHealthy) {
|
||||
logger.debug(
|
||||
`Not marking resource ${resource.resourceId} as healthy because not all targets are healthy`
|
||||
);
|
||||
health = "unhealthy";
|
||||
}
|
||||
|
||||
if (health != resource.health) {
|
||||
// it changed
|
||||
await db
|
||||
.update(resources)
|
||||
.set({ health })
|
||||
.where(eq(resources.resourceId, resource.resourceId));
|
||||
|
||||
if (health === "unhealthy") {
|
||||
await fireResourceUnhealthyAlert(
|
||||
orgId,
|
||||
resource.resourceId,
|
||||
resource.name
|
||||
);
|
||||
} else if (health === "healthy") {
|
||||
await fireResourceHealthyAlert(
|
||||
orgId,
|
||||
resource.resourceId,
|
||||
resource.name
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
|
||||
import logger from "@server/logger";
|
||||
import { processAlerts } from "../processAlerts";
|
||||
import { db, statusHistory } from "@server/db";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public API
|
||||
@@ -36,6 +37,14 @@ export async function fireResourceHealthyAlert(
|
||||
extra?: Record<string, unknown>
|
||||
): Promise<void> {
|
||||
try {
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "resource",
|
||||
entityId: resourceId,
|
||||
orgId: orgId,
|
||||
status: "healthy",
|
||||
timestamp: Math.floor(Date.now() / 1000)
|
||||
});
|
||||
|
||||
await processAlerts({
|
||||
eventType: "resource_healthy",
|
||||
orgId,
|
||||
@@ -81,6 +90,14 @@ export async function fireResourceUnhealthyAlert(
|
||||
extra?: Record<string, unknown>
|
||||
): Promise<void> {
|
||||
try {
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "resource",
|
||||
entityId: resourceId,
|
||||
orgId: orgId,
|
||||
status: "unhealthy",
|
||||
timestamp: Math.floor(Date.now() / 1000)
|
||||
});
|
||||
|
||||
await processAlerts({
|
||||
eventType: "resource_unhealthy",
|
||||
orgId,
|
||||
|
||||
@@ -13,6 +13,9 @@
|
||||
|
||||
import logger from "@server/logger";
|
||||
import { processAlerts } from "../processAlerts";
|
||||
import { db, sites, statusHistory, targetHealthCheck } from "@server/db";
|
||||
import { and, eq, inArray } from "drizzle-orm";
|
||||
import { fireHealthCheckUnhealthyAlert } from "./healthCheckEvents";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public API
|
||||
@@ -36,6 +39,14 @@ export async function fireSiteOnlineAlert(
|
||||
extra?: Record<string, unknown>
|
||||
): Promise<void> {
|
||||
try {
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "site",
|
||||
entityId: siteId,
|
||||
orgId: orgId,
|
||||
status: "online",
|
||||
timestamp: Math.floor(Date.now() / 1000)
|
||||
});
|
||||
|
||||
await processAlerts({
|
||||
eventType: "site_online",
|
||||
orgId,
|
||||
@@ -81,6 +92,37 @@ export async function fireSiteOfflineAlert(
|
||||
extra?: Record<string, unknown>
|
||||
): Promise<void> {
|
||||
try {
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "site",
|
||||
entityId: siteId,
|
||||
orgId: orgId,
|
||||
status: "offline",
|
||||
timestamp: Math.floor(Date.now() / 1000)
|
||||
});
|
||||
|
||||
const unhealthyHealthChecks = await db
|
||||
.update(targetHealthCheck)
|
||||
.set({ hcHealth: "unhealthy" })
|
||||
.where(
|
||||
and(
|
||||
eq(targetHealthCheck.orgId, orgId),
|
||||
eq(targetHealthCheck.siteId, siteId)
|
||||
)
|
||||
)
|
||||
.returning();
|
||||
|
||||
for (const healthCheck of unhealthyHealthChecks) {
|
||||
logger.info(
|
||||
`Marking health check ${healthCheck.targetHealthCheckId} unhealthy due to site ${siteId} being marked offline`
|
||||
);
|
||||
|
||||
await fireHealthCheckUnhealthyAlert(
|
||||
healthCheck.orgId,
|
||||
healthCheck.targetHealthCheckId,
|
||||
healthCheck.name
|
||||
);
|
||||
}
|
||||
|
||||
await processAlerts({
|
||||
eventType: "site_offline",
|
||||
orgId,
|
||||
|
||||
@@ -91,14 +91,6 @@ export async function triggerHealthCheckAlert(
|
||||
);
|
||||
}
|
||||
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "healthCheck",
|
||||
entityId: healthCheckId,
|
||||
orgId,
|
||||
status: eventType === "health_check_healthy" ? "healthy" : "unhealthy",
|
||||
timestamp: Math.floor(Date.now() / 1000)
|
||||
});
|
||||
|
||||
if (eventType === "health_check_healthy") {
|
||||
await fireHealthCheckHealthyAlert(
|
||||
orgId,
|
||||
|
||||
@@ -89,16 +89,6 @@ export async function triggerResourceAlert(
|
||||
);
|
||||
}
|
||||
|
||||
if (eventType === "resource_healthy" || eventType === "resource_unhealthy") {
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "resource",
|
||||
entityId: resourceId,
|
||||
orgId,
|
||||
status: eventType === "resource_healthy" ? "healthy" : "unhealthy",
|
||||
timestamp: Math.floor(Date.now() / 1000)
|
||||
});
|
||||
}
|
||||
|
||||
if (eventType === "resource_healthy") {
|
||||
await fireResourceHealthyAlert(
|
||||
orgId,
|
||||
@@ -132,4 +122,4 @@ export async function triggerResourceAlert(
|
||||
createHttpError(HttpCode.INTERNAL_SERVER_ERROR, "An error occurred")
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,14 +83,6 @@ export async function triggerSiteAlert(
|
||||
);
|
||||
}
|
||||
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "site",
|
||||
entityId: siteId,
|
||||
orgId,
|
||||
status: eventType === "site_online" ? "online" : "offline",
|
||||
timestamp: Math.floor(Date.now() / 1000)
|
||||
});
|
||||
|
||||
if (eventType === "site_online") {
|
||||
await fireSiteOnlineAlert(orgId, siteId, site.name ?? undefined);
|
||||
} else {
|
||||
@@ -110,4 +102,4 @@ export async function triggerSiteAlert(
|
||||
createHttpError(HttpCode.INTERNAL_SERVER_ERROR, "An error occurred")
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
import { MessageHandler } from "@server/routers/ws";
|
||||
import { db, Newt, sites } from "@server/db";
|
||||
import {
|
||||
db,
|
||||
Newt,
|
||||
sites,
|
||||
statusHistory,
|
||||
targetHealthCheck,
|
||||
targets
|
||||
} from "@server/db";
|
||||
import { eq } from "drizzle-orm";
|
||||
import logger from "@server/logger";
|
||||
import { fireSiteOfflineAlert } from "@server/lib/alerts";
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
import { db, newts, sites, targetHealthCheck, targets, statusHistory } from "@server/db";
|
||||
import {
|
||||
hasActiveConnections,
|
||||
} from "#dynamic/routers/ws";
|
||||
import { eq, lt, isNull, and, or, ne, not } from "drizzle-orm";
|
||||
db,
|
||||
newts,
|
||||
sites,
|
||||
targetHealthCheck,
|
||||
targets,
|
||||
statusHistory
|
||||
} from "@server/db";
|
||||
import { hasActiveConnections } from "#dynamic/routers/ws";
|
||||
import { eq, lt, isNull, and, or, ne, not, inArray } from "drizzle-orm";
|
||||
import logger from "@server/logger";
|
||||
import { fireSiteOfflineAlert, fireSiteOnlineAlert } from "#dynamic/lib/alerts";
|
||||
|
||||
@@ -77,43 +82,11 @@ export const startNewtOfflineChecker = (): void => {
|
||||
.set({ online: false })
|
||||
.where(eq(sites.siteId, staleSite.siteId));
|
||||
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "site",
|
||||
entityId: staleSite.siteId,
|
||||
orgId: staleSite.orgId,
|
||||
status: "offline",
|
||||
timestamp: Math.floor(Date.now() / 1000),
|
||||
}).execute();
|
||||
|
||||
const healthChecksOnSite = await db
|
||||
.select()
|
||||
.from(targetHealthCheck)
|
||||
.innerJoin(
|
||||
targets,
|
||||
eq(targets.targetId, targetHealthCheck.targetId)
|
||||
)
|
||||
.innerJoin(sites, eq(sites.siteId, targets.siteId))
|
||||
.where(eq(sites.siteId, staleSite.siteId));
|
||||
|
||||
for (const healthCheck of healthChecksOnSite) {
|
||||
logger.info(
|
||||
`Marking health check ${healthCheck.targetHealthCheck.targetHealthCheckId} offline due to site ${staleSite.siteId} being marked offline`
|
||||
);
|
||||
await db
|
||||
.update(targetHealthCheck)
|
||||
.set({ hcHealth: "unknown" })
|
||||
.where(
|
||||
eq(
|
||||
targetHealthCheck.targetHealthCheckId,
|
||||
healthCheck.targetHealthCheck
|
||||
.targetHealthCheckId
|
||||
)
|
||||
);
|
||||
|
||||
// TODO: should we be firing an alert here when the health check goes to unknown?
|
||||
}
|
||||
|
||||
await fireSiteOfflineAlert(staleSite.orgId, staleSite.siteId, staleSite.name);
|
||||
await fireSiteOfflineAlert(
|
||||
staleSite.orgId,
|
||||
staleSite.siteId,
|
||||
staleSite.name
|
||||
);
|
||||
}
|
||||
|
||||
// this part only effects self hosted. Its not efficient but we dont expect people to have very many wireguard sites
|
||||
@@ -155,15 +128,11 @@ export const startNewtOfflineChecker = (): void => {
|
||||
.set({ online: false })
|
||||
.where(eq(sites.siteId, site.siteId));
|
||||
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "site",
|
||||
entityId: site.siteId,
|
||||
orgId: site.orgId,
|
||||
status: "offline",
|
||||
timestamp: Math.floor(Date.now() / 1000),
|
||||
}).execute();
|
||||
|
||||
await fireSiteOfflineAlert(site.orgId, site.siteId, site.name);
|
||||
await fireSiteOfflineAlert(
|
||||
site.orgId,
|
||||
site.siteId,
|
||||
site.name
|
||||
);
|
||||
} else if (
|
||||
lastBandwidthUpdate >= wireguardOfflineThreshold &&
|
||||
!site.online
|
||||
@@ -177,15 +146,11 @@ export const startNewtOfflineChecker = (): void => {
|
||||
.set({ online: true })
|
||||
.where(eq(sites.siteId, site.siteId));
|
||||
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "site",
|
||||
entityId: site.siteId,
|
||||
orgId: site.orgId,
|
||||
status: "online",
|
||||
timestamp: Math.floor(Date.now() / 1000),
|
||||
}).execute();
|
||||
|
||||
await fireSiteOnlineAlert(site.orgId, site.siteId, site.name);
|
||||
await fireSiteOnlineAlert(
|
||||
site.orgId,
|
||||
site.siteId,
|
||||
site.name
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { db } from "@server/db";
|
||||
import { sites, clients, olms, statusHistory } from "@server/db";
|
||||
import { sites, clients, olms } from "@server/db";
|
||||
import { and, eq, inArray } from "drizzle-orm";
|
||||
import logger from "@server/logger";
|
||||
import { fireSiteOnlineAlert } from "#dynamic/lib/alerts";
|
||||
@@ -147,13 +147,6 @@ async function flushSitePingsToDb(): Promise<void> {
|
||||
}, "flushSitePingsToDb");
|
||||
|
||||
for (const site of newlyOnlineSites) {
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "site",
|
||||
entityId: site.siteId,
|
||||
orgId: site.orgId,
|
||||
status: "online",
|
||||
timestamp: Math.floor(Date.now() / 1000),
|
||||
}).execute();
|
||||
await fireSiteOnlineAlert(site.orgId, site.siteId, site.name);
|
||||
}
|
||||
} catch (error) {
|
||||
|
||||
@@ -94,26 +94,13 @@ export const handleHealthcheckStatusMessage: MessageHandler = async (
|
||||
|
||||
const [targetCheck] = await db
|
||||
.select({
|
||||
targetId: targets.targetId,
|
||||
siteId: targets.siteId,
|
||||
targetId: targetHealthCheck.targetId,
|
||||
orgId: targetHealthCheck.orgId,
|
||||
targetHealthCheckId: targetHealthCheck.targetHealthCheckId,
|
||||
resourceOrgId: resources.orgId,
|
||||
resourceId: resources.resourceId,
|
||||
resourceName: resources.name,
|
||||
name: targetHealthCheck.name,
|
||||
hcHealth: targetHealthCheck.hcHealth
|
||||
})
|
||||
.from(targetHealthCheck)
|
||||
.innerJoin(sites, eq(targetHealthCheck.siteId, sites.siteId))
|
||||
.innerJoin(
|
||||
targets,
|
||||
eq(targetHealthCheck.targetId, targets.targetId)
|
||||
)
|
||||
.innerJoin(
|
||||
resources,
|
||||
eq(targets.resourceId, resources.resourceId)
|
||||
)
|
||||
.where(
|
||||
and(
|
||||
eq(targetHealthCheck.targetHealthCheckId, targetIdNum),
|
||||
@@ -147,92 +134,22 @@ export const handleHealthcheckStatusMessage: MessageHandler = async (
|
||||
| "healthy"
|
||||
| "unhealthy"
|
||||
})
|
||||
.where(eq(targetHealthCheck.targetId, targetCheck.targetId));
|
||||
|
||||
const orgId = targetCheck.orgId || targetCheck.resourceOrgId; // for backwards compatibility, check both orgId fields because the target health checks dont have the orgId
|
||||
if (!orgId) {
|
||||
logger.warn(
|
||||
`No org ID found for target ${targetId}, skipping status history logging`
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Log the state change to status history
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "healthCheck",
|
||||
entityId: targetCheck.targetHealthCheckId,
|
||||
orgId: orgId,
|
||||
status: healthStatus.status,
|
||||
timestamp: Math.floor(Date.now() / 1000)
|
||||
});
|
||||
|
||||
if (targetCheck.resourceId) {
|
||||
// Log the state change to status history for the resource as well
|
||||
// so we can show the resource status along with the site
|
||||
|
||||
// if the status is healthy we should check if ALL of the targets on the resource are currently healthy and if not then dont mark the resource as healthy yet, we want to wait until all targets are healthy to mark the resource as healthy
|
||||
let status = healthStatus.status;
|
||||
if (healthStatus.status === "healthy") {
|
||||
const otherTargets = await db
|
||||
.select({ hcHealth: targetHealthCheck.hcHealth })
|
||||
.from(targets)
|
||||
.innerJoin(
|
||||
targetHealthCheck,
|
||||
eq(targets.targetId, targetHealthCheck.targetId)
|
||||
)
|
||||
.where(
|
||||
and(
|
||||
eq(targets.resourceId, targetCheck.resourceId),
|
||||
ne(targets.targetId, targetCheck.targetId) // only check the other targets, not the one we just updated
|
||||
)
|
||||
);
|
||||
|
||||
const allHealthy = otherTargets.every(
|
||||
(t) => t.hcHealth === "healthy"
|
||||
);
|
||||
if (!allHealthy) {
|
||||
logger.debug(
|
||||
`Not marking resource ${targetCheck.resourceId} as healthy because not all targets are healthy`
|
||||
);
|
||||
status = "unhealthy";
|
||||
}
|
||||
}
|
||||
|
||||
await db.insert(statusHistory).values({
|
||||
entityType: "resource",
|
||||
entityId: targetCheck.resourceId,
|
||||
orgId: orgId,
|
||||
status: status,
|
||||
timestamp: Math.floor(Date.now() / 1000)
|
||||
});
|
||||
|
||||
if (status === "unhealthy") {
|
||||
await fireResourceUnhealthyAlert(
|
||||
orgId,
|
||||
targetCheck.resourceId,
|
||||
targetCheck.resourceName
|
||||
);
|
||||
} else if (status === "healthy") {
|
||||
await fireResourceHealthyAlert(
|
||||
orgId,
|
||||
targetCheck.resourceId,
|
||||
targetCheck.resourceName
|
||||
);
|
||||
}
|
||||
}
|
||||
.where(eq(targetHealthCheck.targetHealthCheckId, targetCheck.targetHealthCheckId));
|
||||
|
||||
// because we are checking above if there was a change we can fire the alert here because it changed
|
||||
if (healthStatus.status === "unhealthy") {
|
||||
await fireHealthCheckUnhealthyAlert(
|
||||
orgId,
|
||||
targetCheck.orgId,
|
||||
targetCheck.targetHealthCheckId,
|
||||
targetCheck.name ?? undefined
|
||||
targetCheck.name ?? undefined,
|
||||
targetCheck.targetId
|
||||
);
|
||||
} else if (healthStatus.status === "healthy") {
|
||||
await fireHealthCheckHealthyAlert(
|
||||
orgId,
|
||||
targetCheck.orgId,
|
||||
targetCheck.targetHealthCheckId,
|
||||
targetCheck.name ?? undefined
|
||||
targetCheck.name ?? undefined,
|
||||
targetCheck.targetId
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user