From 92c9cb9ea1e6dd28dce712120eb373aa322ccddf Mon Sep 17 00:00:00 2001 From: Christian Date: Fri, 13 Mar 2026 13:28:58 -0500 Subject: [PATCH 1/3] Replace regex with DOMParser in GPT document.write rewriting --- .../lib/src/integrations/gpt/script_guard.ts | 70 ++++---- .../integrations/gpt/script_guard.test.ts | 150 ++++++++++++++++++ 2 files changed, 190 insertions(+), 30 deletions(-) diff --git a/crates/js/lib/src/integrations/gpt/script_guard.ts b/crates/js/lib/src/integrations/gpt/script_guard.ts index c6c549d8..3aafeea8 100644 --- a/crates/js/lib/src/integrations/gpt/script_guard.ts +++ b/crates/js/lib/src/integrations/gpt/script_guard.ts @@ -199,43 +199,53 @@ function rewriteLinkHref( // Layer 1: document.write / document.writeln interception // --------------------------------------------------------------------------- -/** - * Regex that matches `src="..."` or `src='...'` attributes inside a - * `` - * - * Hostname verification still happens in [`maybeRewrite`], so URLs that merely - * contain the token in query text are left unchanged. - */ -const SCRIPT_SRC_RE = - /(]*?\bsrc\s*=\s*["'])([^"']*securepubads\.g\.doubleclick\.net[^"']*)(["'])/gi; - /** * Rewrite GPT domain URLs inside raw HTML strings passed to * `document.write` / `document.writeln`. + * + * Uses `DOMParser` for robust HTML parsing instead of regex so that + * edge-cases (unquoted attributes, unusual spacing, mixed quote styles) + * are handled by the browser's native parser. The raw `getAttribute` + * value is swapped in the original HTML string so the surrounding markup + * is preserved verbatim. + * + * If the GPT domain is present in the HTML but `DOMParser` is + * unavailable or throws, the function **fails closed** (returns an + * empty string) rather than passing the unproxied URL through. */ function rewriteHtmlString(html: string): string { - SCRIPT_SRC_RE.lastIndex = 0; - if (!SCRIPT_SRC_RE.test(html)) return html; - SCRIPT_SRC_RE.lastIndex = 0; - - return html.replace(SCRIPT_SRC_RE, (_match, prefix: string, url: string, suffix: string) => { - const { url: rewrittenUrl, didRewrite } = maybeRewrite(url); - if (!didRewrite) { - return `${prefix}${url}${suffix}`; + if (typeof DOMParser === 'undefined') { + log.warn( + `${LOG_PREFIX}: DOMParser unavailable, blocking document.write HTML that references GPT domain` + ); + return ''; + } + + try { + const doc = new DOMParser().parseFromString(html, 'text/html'); + const scripts = doc.querySelectorAll('script[src]'); + let result = html; + + for (const script of scripts) { + const rawSrc = script.getAttribute('src') ?? ''; + const { url: rewrittenUrl, didRewrite } = maybeRewrite(rawSrc); + if (!didRewrite) continue; + + log.info(`${LOG_PREFIX}: rewriting document.write script src`, { + original: rawSrc, + rewritten: rewrittenUrl, + }); + result = result.replaceAll(rawSrc, rewrittenUrl); } - log.info(`${LOG_PREFIX}: rewriting document.write script src`, { - original: url, - rewritten: rewrittenUrl, - }); - return `${prefix}${rewrittenUrl}${suffix}`; - }); + return result; + } catch (err) { + log.warn( + `${LOG_PREFIX}: failed to parse document.write HTML containing GPT domain, blocking`, + err + ); + return ''; + } } function installDocumentWritePatch(): void { diff --git a/crates/js/lib/test/integrations/gpt/script_guard.test.ts b/crates/js/lib/test/integrations/gpt/script_guard.test.ts index 6cec6369..53dbecb7 100644 --- a/crates/js/lib/test/integrations/gpt/script_guard.test.ts +++ b/crates/js/lib/test/integrations/gpt/script_guard.test.ts @@ -153,4 +153,154 @@ describe('GPT script guard', () => { '/integrations/gpt/pagead/managed/js/gpt/m202603020101/pubads_impl.js?foo=bar' ); }); + + // ----------------------------------------------------------------------- + // document.write edge-cases (DOMParser-based rewriting) + // ----------------------------------------------------------------------- + + it('rewrites document.write script src with single-quoted attribute', () => { + const nativeWriteSpy = vi.fn<(...args: string[]) => void>(); + document.write = nativeWriteSpy as unknown as typeof document.write; + + installGptGuard(); + + document.write( + "" + ); + + expect(nativeWriteSpy).toHaveBeenCalledTimes(1); + const [writtenHtml] = nativeWriteSpy.mock.calls[0] ?? []; + expect(writtenHtml).toContain(window.location.host); + expect(writtenHtml).toContain('/integrations/gpt/pagead/managed/js/gpt/current/pubads_impl.js'); + expect(writtenHtml).not.toContain('securepubads.g.doubleclick.net'); + }); + + it('rewrites document.write script src with extra whitespace around =', () => { + const nativeWriteSpy = vi.fn<(...args: string[]) => void>(); + document.write = nativeWriteSpy as unknown as typeof document.write; + + installGptGuard(); + + document.write( + '' + ); + + expect(nativeWriteSpy).toHaveBeenCalledTimes(1); + const [writtenHtml] = nativeWriteSpy.mock.calls[0] ?? []; + expect(writtenHtml).toContain(window.location.host); + expect(writtenHtml).toContain('/integrations/gpt/pagead/managed/js/gpt/current/pubads_impl.js'); + expect(writtenHtml).not.toContain('securepubads.g.doubleclick.net'); + }); + + it('rewrites multiple script tags in a single document.write call', () => { + const nativeWriteSpy = vi.fn<(...args: string[]) => void>(); + document.write = nativeWriteSpy as unknown as typeof document.write; + + installGptGuard(); + + document.write( + '' + + '' + ); + + expect(nativeWriteSpy).toHaveBeenCalledTimes(1); + const [writtenHtml] = nativeWriteSpy.mock.calls[0] ?? []; + expect(writtenHtml).toContain('/integrations/gpt/pagead/a.js'); + expect(writtenHtml).toContain('/integrations/gpt/pagead/b.js'); + expect(writtenHtml).not.toContain('securepubads.g.doubleclick.net'); + }); + + it('rewrites document.writeln the same as document.write', () => { + const nativeWritelnSpy = vi.fn<(...args: string[]) => void>(); + document.writeln = nativeWritelnSpy as unknown as typeof document.writeln; + + installGptGuard(); + + document.writeln( + '' + ); + + expect(nativeWritelnSpy).toHaveBeenCalledTimes(1); + const [writtenHtml] = nativeWritelnSpy.mock.calls[0] ?? []; + expect(writtenHtml).toContain(window.location.host); + expect(writtenHtml).toContain('/integrations/gpt/pagead/managed/js/gpt/current/pubads_impl.js'); + expect(writtenHtml).not.toContain('securepubads.g.doubleclick.net'); + }); + + it('passes through HTML with no GPT domain reference unchanged', () => { + const nativeWriteSpy = vi.fn<(...args: string[]) => void>(); + document.write = nativeWriteSpy as unknown as typeof document.write; + + installGptGuard(); + + const html = ''; + document.write(html); + + expect(nativeWriteSpy).toHaveBeenCalledWith(html); + }); + + it('rewrites protocol-relative GPT URLs in document.write', () => { + const nativeWriteSpy = vi.fn<(...args: string[]) => void>(); + document.write = nativeWriteSpy as unknown as typeof document.write; + + installGptGuard(); + + document.write( + '' + ); + + expect(nativeWriteSpy).toHaveBeenCalledTimes(1); + const [writtenHtml] = nativeWriteSpy.mock.calls[0] ?? []; + expect(writtenHtml).toContain(window.location.host); + expect(writtenHtml).toContain('/integrations/gpt/pagead/managed/js/gpt/current/pubads_impl.js'); + expect(writtenHtml).not.toContain('securepubads.g.doubleclick.net'); + }); + + // ----------------------------------------------------------------------- + // Fail-closed behaviour + // ----------------------------------------------------------------------- + + it('fails closed when DOMParser is unavailable', () => { + const nativeWriteSpy = vi.fn<(...args: string[]) => void>(); + document.write = nativeWriteSpy as unknown as typeof document.write; + + const originalDOMParser = globalThis.DOMParser; + // @ts-expect-error — simulating an environment without DOMParser + delete globalThis.DOMParser; + + try { + installGptGuard(); + + document.write(''); + + expect(nativeWriteSpy).toHaveBeenCalledTimes(1); + expect(nativeWriteSpy).toHaveBeenCalledWith(''); + } finally { + globalThis.DOMParser = originalDOMParser; + } + }); + + it('fails closed when DOMParser throws', () => { + const nativeWriteSpy = vi.fn<(...args: string[]) => void>(); + document.write = nativeWriteSpy as unknown as typeof document.write; + + const originalDOMParser = globalThis.DOMParser; + // @ts-expect-error — injecting a broken DOMParser + globalThis.DOMParser = class { + parseFromString() { + throw new Error('boom'); + } + }; + + try { + installGptGuard(); + + document.write(''); + + expect(nativeWriteSpy).toHaveBeenCalledTimes(1); + expect(nativeWriteSpy).toHaveBeenCalledWith(''); + } finally { + globalThis.DOMParser = originalDOMParser; + } + }); }); From c6c5b2f2b792b2c1ca2f1e1820d2cf5afb6ab2b7 Mon Sep 17 00:00:00 2001 From: Christian Date: Mon, 16 Mar 2026 09:08:32 -0500 Subject: [PATCH 2/3] Add GPT-domain fast-path guard and fix entity-encoded URL bypass in rewriteHtmlString --- .../lib/src/integrations/gpt/script_guard.ts | 30 +++++++++---- .../integrations/gpt/script_guard.test.ts | 42 +++++++++++++++++++ 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/crates/js/lib/src/integrations/gpt/script_guard.ts b/crates/js/lib/src/integrations/gpt/script_guard.ts index 3aafeea8..c1bc8994 100644 --- a/crates/js/lib/src/integrations/gpt/script_guard.ts +++ b/crates/js/lib/src/integrations/gpt/script_guard.ts @@ -204,16 +204,25 @@ function rewriteLinkHref( * `document.write` / `document.writeln`. * * Uses `DOMParser` for robust HTML parsing instead of regex so that - * edge-cases (unquoted attributes, unusual spacing, mixed quote styles) - * are handled by the browser's native parser. The raw `getAttribute` - * value is swapped in the original HTML string so the surrounding markup - * is preserved verbatim. + * edge-cases (unquoted attributes, unusual spacing, mixed quote styles, + * HTML-entity-encoded query parameters) are handled by the browser's + * native parser. GPT script `src` attributes are mutated in the parsed + * DOM and the result is serialized back to HTML. * * If the GPT domain is present in the HTML but `DOMParser` is * unavailable or throws, the function **fails closed** (returns an * empty string) rather than passing the unproxied URL through. + * + * Non-GPT HTML is always passed through unchanged regardless of + * `DOMParser` availability. */ function rewriteHtmlString(html: string): string { + // Fast-path: if the HTML does not reference the GPT domain at all, + // pass it through unchanged. This avoids unnecessary DOMParser + // overhead and, critically, prevents non-GPT document.write calls + // from being silently dropped when DOMParser is unavailable. + if (!html.includes(GPT_DOMAIN)) return html; + if (typeof DOMParser === 'undefined') { log.warn( `${LOG_PREFIX}: DOMParser unavailable, blocking document.write HTML that references GPT domain` @@ -224,7 +233,7 @@ function rewriteHtmlString(html: string): string { try { const doc = new DOMParser().parseFromString(html, 'text/html'); const scripts = doc.querySelectorAll('script[src]'); - let result = html; + let didRewriteAny = false; for (const script of scripts) { const rawSrc = script.getAttribute('src') ?? ''; @@ -235,10 +244,17 @@ function rewriteHtmlString(html: string): string { original: rawSrc, rewritten: rewrittenUrl, }); - result = result.replaceAll(rawSrc, rewrittenUrl); + // Mutate the parsed DOM so that HTML-entity-encoded attribute + // values (e.g. `&`) are handled correctly. Serializing the + // DOM back to HTML avoids the mismatch between decoded + // `getAttribute()` values and the raw HTML string. + script.setAttribute('src', rewrittenUrl); + didRewriteAny = true; } - return result; + // DOMParser wraps input in ……. + // Bare ' + ); + + expect(nativeWriteSpy).toHaveBeenCalledTimes(1); + const [writtenHtml] = nativeWriteSpy.mock.calls[0] ?? []; + expect(writtenHtml).toContain(window.location.host); + expect(writtenHtml).toContain('/integrations/gpt/pagead/managed/js/gpt/current/pubads_impl.js'); + expect(writtenHtml).not.toContain('securepubads.g.doubleclick.net'); + }); }); From 7376c18f0481dd10c19f9f1acf3594d07b4c3006 Mon Sep 17 00:00:00 2001 From: Christian Date: Wed, 18 Mar 2026 09:51:34 -0500 Subject: [PATCH 3/3] Supply non-placeholder secrets when building integration test WASM binary --- .github/actions/setup-integration-test-env/action.yml | 2 ++ scripts/integration-tests-browser.sh | 2 ++ scripts/integration-tests.sh | 2 ++ 3 files changed, 6 insertions(+) diff --git a/.github/actions/setup-integration-test-env/action.yml b/.github/actions/setup-integration-test-env/action.yml index 103f022f..b7656a8f 100644 --- a/.github/actions/setup-integration-test-env/action.yml +++ b/.github/actions/setup-integration-test-env/action.yml @@ -70,6 +70,8 @@ runs: shell: bash env: TRUSTED_SERVER__PUBLISHER__ORIGIN_URL: http://127.0.0.1:${{ inputs.origin-port }} + TRUSTED_SERVER__PUBLISHER__PROXY_SECRET: integration-test-proxy-secret + TRUSTED_SERVER__SYNTHETIC__SECRET_KEY: integration-test-secret-key TRUSTED_SERVER__PROXY__CERTIFICATE_CHECK: "false" run: cargo build --bin trusted-server-fastly --release --target wasm32-wasip1 diff --git a/scripts/integration-tests-browser.sh b/scripts/integration-tests-browser.sh index 3d2c7ede..900a305a 100755 --- a/scripts/integration-tests-browser.sh +++ b/scripts/integration-tests-browser.sh @@ -31,6 +31,8 @@ echo "==> Validating shared integration-test dependency versions..." # --- Build WASM binary --- echo "==> Building WASM binary (origin=http://127.0.0.1:$ORIGIN_PORT)..." TRUSTED_SERVER__PUBLISHER__ORIGIN_URL="http://127.0.0.1:$ORIGIN_PORT" \ +TRUSTED_SERVER__PUBLISHER__PROXY_SECRET="integration-test-proxy-secret" \ +TRUSTED_SERVER__SYNTHETIC__SECRET_KEY="integration-test-secret-key" \ TRUSTED_SERVER__PROXY__CERTIFICATE_CHECK=false \ cargo build --bin trusted-server-fastly --release --target wasm32-wasip1 diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index c7b64cde..566521f7 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -52,6 +52,8 @@ fi echo "==> Building WASM binary (origin=http://127.0.0.1:$ORIGIN_PORT)..." TRUSTED_SERVER__PUBLISHER__ORIGIN_URL="http://127.0.0.1:$ORIGIN_PORT" \ +TRUSTED_SERVER__PUBLISHER__PROXY_SECRET="integration-test-proxy-secret" \ +TRUSTED_SERVER__SYNTHETIC__SECRET_KEY="integration-test-secret-key" \ TRUSTED_SERVER__PROXY__CERTIFICATE_CHECK=false \ cargo build --bin trusted-server-fastly --release --target wasm32-wasip1