mirror of
https://github.com/fosrl/pangolin.git
synced 2026-03-21 14:21:42 +00:00
Compare commits
3 Commits
1.16.2-s.1
...
1.16.2-s.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
17105f3a51 | ||
|
|
edcfbd26e4 | ||
|
|
0c4d9ea164 |
40
server/lib/sanitize.ts
Normal file
40
server/lib/sanitize.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
/**
|
||||
* Sanitize a string field before inserting into a database TEXT column.
|
||||
*
|
||||
* Two passes are applied:
|
||||
*
|
||||
* 1. Lone UTF-16 surrogates – JavaScript strings can hold unpaired surrogates
|
||||
* (e.g. \uD800 without a following \uDC00-\uDFFF codepoint). These are
|
||||
* valid in JS but cannot be encoded as UTF-8, triggering
|
||||
* `report_invalid_encoding` in SQLite / Postgres. They are replaced with
|
||||
* the Unicode replacement character U+FFFD so the data is preserved as a
|
||||
* visible signal that something was malformed.
|
||||
*
|
||||
* 2. Null bytes and C0 control characters – SQLite stores TEXT as
|
||||
* null-terminated C strings, so \x00 in a value causes
|
||||
* `report_invalid_encoding`. Bots and scanners routinely inject null bytes
|
||||
* into URLs (e.g. `/path\u0000.jpg`). All C0 control characters in the
|
||||
* range \x00-\x1F are stripped except for the three that are legitimate in
|
||||
* text payloads: HT (\x09), LF (\x0A), and CR (\x0D). DEL (\x7F) is also
|
||||
* stripped.
|
||||
*/
|
||||
export function sanitizeString(value: string): string;
|
||||
export function sanitizeString(
|
||||
value: string | null | undefined
|
||||
): string | undefined;
|
||||
export function sanitizeString(
|
||||
value: string | null | undefined
|
||||
): string | undefined {
|
||||
if (value == null) return undefined;
|
||||
return (
|
||||
value
|
||||
// Replace lone high surrogates (not followed by a low surrogate)
|
||||
// and lone low surrogates (not preceded by a high surrogate).
|
||||
.replace(
|
||||
/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g,
|
||||
"\uFFFD"
|
||||
)
|
||||
// Strip null bytes, C0 control chars (except HT/LF/CR), and DEL.
|
||||
.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "")
|
||||
);
|
||||
}
|
||||
@@ -81,6 +81,7 @@ import { verifyResourceAccessToken } from "@server/auth/verifyResourceAccessToke
|
||||
import semver from "semver";
|
||||
import { maxmindAsnLookup } from "@server/db/maxmindAsn";
|
||||
import { checkOrgAccessPolicy } from "@server/lib/checkOrgAccessPolicy";
|
||||
import { sanitizeString } from "@server/lib/sanitize";
|
||||
|
||||
// Zod schemas for request validation
|
||||
const getResourceByDomainParamsSchema = z.strictObject({
|
||||
@@ -1859,24 +1860,24 @@ hybridRouter.post(
|
||||
})
|
||||
.map((logEntry) => ({
|
||||
timestamp: logEntry.timestamp,
|
||||
orgId: logEntry.orgId,
|
||||
actorType: logEntry.actorType,
|
||||
actor: logEntry.actor,
|
||||
actorId: logEntry.actorId,
|
||||
metadata: logEntry.metadata,
|
||||
orgId: sanitizeString(logEntry.orgId),
|
||||
actorType: sanitizeString(logEntry.actorType),
|
||||
actor: sanitizeString(logEntry.actor),
|
||||
actorId: sanitizeString(logEntry.actorId),
|
||||
metadata: sanitizeString(logEntry.metadata),
|
||||
action: logEntry.action,
|
||||
resourceId: logEntry.resourceId,
|
||||
reason: logEntry.reason,
|
||||
location: logEntry.location,
|
||||
location: sanitizeString(logEntry.location),
|
||||
// userAgent: data.userAgent, // TODO: add this
|
||||
// headers: data.body.headers,
|
||||
// query: data.body.query,
|
||||
originalRequestURL: logEntry.originalRequestURL,
|
||||
scheme: logEntry.scheme,
|
||||
host: logEntry.host,
|
||||
path: logEntry.path,
|
||||
method: logEntry.method,
|
||||
ip: logEntry.ip,
|
||||
originalRequestURL: sanitizeString(logEntry.originalRequestURL) ?? "",
|
||||
scheme: sanitizeString(logEntry.scheme) ?? "",
|
||||
host: sanitizeString(logEntry.host) ?? "",
|
||||
path: sanitizeString(logEntry.path) ?? "",
|
||||
method: sanitizeString(logEntry.method) ?? "",
|
||||
ip: sanitizeString(logEntry.ip),
|
||||
tls: logEntry.tls
|
||||
}));
|
||||
|
||||
|
||||
@@ -5,25 +5,7 @@ import cache from "#dynamic/lib/cache";
|
||||
import { calculateCutoffTimestamp } from "@server/lib/cleanupLogs";
|
||||
import { stripPortFromHost } from "@server/lib/ip";
|
||||
|
||||
/**
|
||||
* Sanitize a string field by replacing lone UTF-16 surrogates (which cannot
|
||||
* be encoded as valid UTF-8) with the Unicode replacement character, and
|
||||
* stripping ASCII control characters that are invalid in most text columns.
|
||||
*/
|
||||
function sanitizeString(value: string | undefined | null): string | undefined {
|
||||
if (value == null) return undefined;
|
||||
return (
|
||||
value
|
||||
// Replace lone high surrogates (not followed by a low surrogate)
|
||||
// and lone low surrogates (not preceded by a high surrogate)
|
||||
.replace(
|
||||
/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g,
|
||||
"\uFFFD"
|
||||
)
|
||||
// Strip C0 control characters except HT (\x09), LF (\x0A), CR (\x0D)
|
||||
.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "")
|
||||
);
|
||||
}
|
||||
import { sanitizeString } from "@server/lib/sanitize";
|
||||
|
||||
/**
|
||||
|
||||
|
||||
Reference in New Issue
Block a user