Add resource degraded

This commit is contained in:
Owen
2026-04-24 17:47:08 -07:00
parent 8e16ff07a9
commit 82212af643
16 changed files with 162 additions and 42 deletions

View File

@@ -23,6 +23,7 @@ import {
} from "@server/db";
import { eq } from "drizzle-orm";
import {
fireResourceDegradedAlert,
fireResourceHealthyAlert,
fireResourceUnhealthyAlert
} from "./resourceEvents";
@@ -217,6 +218,14 @@ async function handleResource(orgId: string, healthCheckTargetId?: number | null
undefined,
trx
);
} else if (health === "degraded") {
await fireResourceDegradedAlert(
orgId,
resource.resourceId,
resource.name,
undefined,
trx
);
}
}
}

View File

@@ -130,9 +130,9 @@ export async function fireResourceUnhealthyAlert(
}
/**
* Fire a `resource_toggle` alert for the given resource.
* Fire a `resource_degraded` alert for the given resource.
*
* Call this when a resource's enabled/disabled status is toggled so that any
* Call this after a resource has been detected as degraded so that any
* matching `alertRules` can dispatch their email and webhook actions.
*
* @param orgId - Organisation that owns the resource.
@@ -140,7 +140,7 @@ export async function fireResourceUnhealthyAlert(
* @param resourceName - Human-readable name shown in notifications (optional).
* @param extra - Any additional key/value pairs to include in the payload.
*/
export async function fireResourceToggleAlert(
export async function fireResourceDegradedAlert(
orgId: string,
resourceId: number,
resourceName?: string | null,
@@ -148,8 +148,16 @@ export async function fireResourceToggleAlert(
trx: Transaction | typeof db = db
): Promise<void> {
try {
await trx.insert(statusHistory).values({
entityType: "resource",
entityId: resourceId,
orgId: orgId,
status: "degraded",
timestamp: Math.floor(Date.now() / 1000)
});
await processAlerts({
eventType: "resource_toggle",
eventType: "resource_degraded",
orgId,
resourceId,
data: {
@@ -157,9 +165,20 @@ export async function fireResourceToggleAlert(
...extra
}
});
await processAlerts({
eventType: "resource_toggle",
orgId,
resourceId,
data: {
resourceId,
status: "degraded",
...(resourceName != null ? { resourceName } : {}),
...extra
}
});
} catch (err) {
logger.error(
`fireResourceToggleAlert: unexpected error for resourceId ${resourceId}`,
`fireResourceDegradedAlert: unexpected error for resourceId ${resourceId}`,
err
);
}

View File

@@ -88,6 +88,8 @@ function buildSubject(context: AlertContext): string {
return "[Alert] Resource Healthy";
case "resource_unhealthy":
return "[Alert] Resource Unhealthy";
case "resource_degraded":
return "[Alert] Resource Degraded";
case "resource_toggle":
return "[Alert] Resource Status Changed";
default: {

View File

@@ -12,7 +12,10 @@
*/
import logger from "@server/logger";
import { AlertContext, WebhookAlertConfig } from "@server/routers/alertRule/types";
import {
AlertContext,
WebhookAlertConfig
} from "@server/routers/alertRule/types";
const REQUEST_TIMEOUT_MS = 15_000;
const MAX_RETRIES = 3;
@@ -56,7 +59,10 @@ export async function sendAlertWebhook(
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
const controller = new AbortController();
const timeoutHandle = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
const timeoutHandle = setTimeout(
() => controller.abort(),
REQUEST_TIMEOUT_MS
);
let response: Response;
try {
@@ -75,7 +81,9 @@ export async function sendAlertWebhook(
);
} else {
const msg = err instanceof Error ? err.message : String(err);
lastError = new Error(`Alert webhook: request to "${url}" failed ${msg}`);
lastError = new Error(
`Alert webhook: request to "${url}" failed ${msg}`
);
}
if (attempt < MAX_RETRIES) {
const delay = RETRY_BASE_DELAY_MS * 2 ** (attempt - 1);
@@ -111,11 +119,18 @@ export async function sendAlertWebhook(
continue;
}
logger.debug(`Alert webhook sent successfully to "${url}" for event "${context.eventType}" (attempt ${attempt}/${MAX_RETRIES})`);
logger.debug(
`Alert webhook sent successfully to "${url}" for event "${context.eventType}" (attempt ${attempt}/${MAX_RETRIES})`
);
return;
}
throw lastError ?? new Error(`Alert webhook: all ${MAX_RETRIES} attempts failed for "${url}"`);
throw (
lastError ??
new Error(
`Alert webhook: all ${MAX_RETRIES} attempts failed for "${url}"`
)
);
}
// ---------------------------------------------------------------------------
@@ -139,6 +154,8 @@ function deriveStatus(
case "health_check_unhealthy":
case "resource_unhealthy":
return "unhealthy";
case "resource_degraded":
return "degraded";
case "health_check_toggle":
case "resource_toggle":
return String(data.status ?? "unknown");
@@ -154,7 +171,9 @@ function deriveStatus(
// Header construction (mirrors HttpLogDestination.buildHeaders)
// ---------------------------------------------------------------------------
function buildHeaders(webhookConfig: WebhookAlertConfig): Record<string, string> {
function buildHeaders(
webhookConfig: WebhookAlertConfig
): Record<string, string> {
const headers: Record<string, string> = {
"Content-Type": "application/json"
};

View File

@@ -24,7 +24,8 @@ import { eq, and } from "drizzle-orm";
import {
fireResourceHealthyAlert,
fireResourceUnhealthyAlert,
fireResourceToggleAlert
fireResourceToggleAlert,
fireResourceDegradedAlert
} from "#private/lib/alerts/events/resourceEvents";
const paramsSchema = z.strictObject({
@@ -33,7 +34,12 @@ const paramsSchema = z.strictObject({
});
const bodySchema = z.strictObject({
eventType: z.enum(["resource_healthy", "resource_unhealthy", "resource_toggle"])
eventType: z.enum([
"resource_healthy",
"resource_unhealthy",
"resource_degraded",
"resource_toggle"
])
});
export type TriggerResourceAlertResponse = {
@@ -101,8 +107,8 @@ export async function triggerResourceAlert(
resourceId,
resource.name ?? undefined
);
} else {
await fireResourceToggleAlert(
} else if (eventType === "resource_degraded") {
await fireResourceDegradedAlert(
orgId,
resourceId,
resource.name ?? undefined

View File

@@ -33,7 +33,11 @@ import { encrypt } from "@server/lib/crypto";
import config from "@server/lib/config";
import { CreateAlertRuleResponse } from "@server/routers/alertRule/types";
export const SITE_EVENT_TYPES = ["site_online", "site_offline", "site_toggle"] as const;
export const SITE_EVENT_TYPES = [
"site_online",
"site_offline",
"site_toggle"
] as const;
export const HC_EVENT_TYPES = [
"health_check_healthy",
"health_check_unhealthy",
@@ -42,6 +46,7 @@ export const HC_EVENT_TYPES = [
export const RESOURCE_EVENT_TYPES = [
"resource_healthy",
"resource_unhealthy",
"resource_degraded",
"resource_toggle"
] as const;
@@ -92,19 +97,24 @@ const bodySchema = z
const isHcEvent = (HC_EVENT_TYPES as readonly string[]).includes(
val.eventType
);
const isResourceEvent = (RESOURCE_EVENT_TYPES as readonly string[]).includes(
val.eventType
);
const isResourceEvent = (
RESOURCE_EVENT_TYPES as readonly string[]
).includes(val.eventType);
if (isSiteEvent && !val.allSites && val.siteIds.length === 0) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "At least one siteId is required for site event types when allSites is false",
message:
"At least one siteId is required for site event types when allSites is false",
path: ["siteIds"]
});
}
if (isHcEvent && !val.allHealthChecks && val.healthCheckIds.length === 0) {
if (
isHcEvent &&
!val.allHealthChecks &&
val.healthCheckIds.length === 0
) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message:
@@ -129,10 +139,15 @@ const bodySchema = z
});
}
if (isResourceEvent && !val.allResources && val.resourceIds.length === 0) {
if (
isResourceEvent &&
!val.allResources &&
val.resourceIds.length === 0
) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "At least one resourceId is required for resource event types when allResources is false",
message:
"At least one resourceId is required for resource event types when allResources is false",
path: ["resourceIds"]
});
}
@@ -148,7 +163,8 @@ const bodySchema = z
if (isResourceEvent && val.healthCheckIds.length > 0) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "healthCheckIds must not be set for resource event types",
message:
"healthCheckIds must not be set for resource event types",
path: ["healthCheckIds"]
});
}
@@ -164,7 +180,8 @@ const bodySchema = z
if (isHcEvent && val.resourceIds.length > 0) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "resourceIds must not be set for health check event types",
message:
"resourceIds must not be set for health check event types",
path: ["resourceIds"]
});
}
@@ -284,9 +301,7 @@ export async function createAlertRule(
// Create the email action pivot row and recipients if any recipients
// were supplied (userIds, roleIds, or raw emails).
const hasRecipients =
userIds.length > 0 ||
roleIds.length > 0 ||
emails.length > 0;
userIds.length > 0 || roleIds.length > 0 || emails.length > 0;
if (hasRecipients) {
const [emailActionRow] = await db

View File

@@ -76,6 +76,7 @@ const SITE_ALERT_EVENT_TYPES = [
const RESOURCE_ALERT_EVENT_TYPES = [
"resource_healthy",
"resource_unhealthy",
"resource_degraded",
"resource_toggle"
] as const;