From ca92c2e8c08771bfc9dab8839cd491651e5a8d95 Mon Sep 17 00:00:00 2001 From: prk-Jr Date: Mon, 16 Mar 2026 14:44:54 +0530 Subject: [PATCH 1/2] Validate synthetic ID format on inbound header and cookie values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inbound synthetic IDs from the x-synthetic-id header and synthetic_id cookie were accepted without validation. An attacker could inject arbitrary strings — including very long values, special characters, or newlines — which were then set as response headers, cookies, and forwarded to third-party APIs. Adds a private is_valid_synthetic_id() validator enforcing the canonical format (64 lowercase hex chars + '.' + 6 alphanumeric chars). The length check is O(1) and runs first to bound all downstream work. Invalid values are silently discarded and a fresh ID is generated in their place; the raw value is never written to logs. Also adds a debug_assert! in generate_synthetic_id() to catch any future regression in the generator, moves VALID_SYNTHETIC_ID to test_support so it is shared across all test modules, and demotes synthetic ID values from INFO to DEBUG in log output to avoid recording pseudonymous identifiers in production log pipelines. Closes #412 --- CHANGELOG.md | 4 + crates/common/src/integrations/registry.rs | 11 +- crates/common/src/proxy.rs | 5 +- crates/common/src/synthetic.rs | 261 +++++++++++++++------ crates/common/src/test_support.rs | 4 + docs/guide/synthetic-ids.md | 4 +- 6 files changed, 215 insertions(+), 74 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c5014065..54ab8111 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Security + +- Validate synthetic ID format on inbound values from the `x-synthetic-id` header and `synthetic_id` cookie; values that do not match the expected format (`64-hex-hmac.6-alphanumeric-suffix`) are discarded and a fresh ID is generated rather than forwarded to response headers, cookies, or third-party APIs + ### Added - Implemented basic authentication for configurable endpoint paths (#73) diff --git a/crates/common/src/integrations/registry.rs b/crates/common/src/integrations/registry.rs index 158a1e29..a4a15b63 100644 --- a/crates/common/src/integrations/registry.rs +++ b/crates/common/src/integrations/registry.rs @@ -1310,8 +1310,15 @@ mod tests { let registry = IntegrationRegistry::from_routes(routes); let mut req = Request::get("https://test.example.com/integrations/test/synthetic"); - // Pre-existing cookie - req.set_header(header::COOKIE, "synthetic_id=existing_id_12345"); + // Pre-existing cookie with a valid-format synthetic ID + req.set_header( + header::COOKIE, + format!( + "{}={}", + crate::constants::COOKIE_SYNTHETIC_ID, + crate::test_support::tests::VALID_SYNTHETIC_ID + ), + ); let result = futures::executor::block_on(registry.handle_proxy( &Method::GET, diff --git a/crates/common/src/proxy.rs b/crates/common/src/proxy.rs index 99db6328..9b13884d 100644 --- a/crates/common/src/proxy.rs +++ b/crates/common/src/proxy.rs @@ -1291,7 +1291,8 @@ mod tests { sig ), ); - req.set_header(crate::constants::HEADER_X_SYNTHETIC_ID, "synthetic-123"); + let valid_synthetic_id = crate::test_support::tests::VALID_SYNTHETIC_ID; + req.set_header(crate::constants::HEADER_X_SYNTHETIC_ID, valid_synthetic_id); let resp = handle_first_party_click(&settings, req) .await @@ -1309,7 +1310,7 @@ mod tests { assert_eq!(pairs.remove("foo").as_deref(), Some("1")); assert_eq!( pairs.remove("synthetic_id").as_deref(), - Some("synthetic-123") + Some(valid_synthetic_id) ); assert!(pairs.is_empty()); } diff --git a/crates/common/src/synthetic.rs b/crates/common/src/synthetic.rs index 853c7141..7def2f51 100644 --- a/crates/common/src/synthetic.rs +++ b/crates/common/src/synthetic.rs @@ -25,6 +25,35 @@ type HmacSha256 = Hmac; const ALPHANUMERIC_CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; +/// Expected byte length of a valid synthetic ID: 64 hex chars + '.' + 6 alphanumeric chars. +const SYNTHETIC_ID_LEN: usize = 71; + +/// Validates that `value` matches the canonical synthetic ID format. +/// +/// The format is `.` where `` is exactly 64 **lowercase** hex +/// characters (HMAC-SHA256 output via [`hex::encode`]) and `` is exactly +/// 6 ASCII alphanumeric characters. Uppercase hex is rejected — the generator +/// never produces it and intermediaries that normalise case would produce an ID +/// that no longer matches its HMAC. +/// +/// The total length is checked first so that oversized attacker-supplied +/// strings are rejected in O(1) before any character scanning occurs. +fn is_valid_synthetic_id(value: &str) -> bool { + if value.len() != SYNTHETIC_ID_LEN { + return false; + } + match value.split_once('.') { + Some((hmac_part, suffix_part)) => { + hmac_part.len() == 64 + && hmac_part + .bytes() + .all(|b| matches!(b, b'0'..=b'9' | b'a'..=b'f')) + && suffix_part.bytes().all(|b| b.is_ascii_alphanumeric()) + } + None => false, + } +} + /// Normalizes an IP address for stable synthetic ID generation. /// /// For IPv6 addresses, masks to /64 prefix to handle Privacy Extensions @@ -96,7 +125,7 @@ pub fn generate_synthetic_id( message: "Failed to render synthetic ID template".to_string(), })?; - log::info!("Input string for fresh ID: {} {}", input_string, data); + log::debug!("Generating fresh synthetic ID from template inputs"); let mut mac = HmacSha256::new_from_slice(settings.synthetic.secret_key.as_bytes()) .change_context(TrustedServerError::SyntheticId { @@ -109,39 +138,60 @@ pub fn generate_synthetic_id( let random_suffix = generate_random_suffix(6); let synthetic_id = format!("{}.{}", hmac_hash, random_suffix); - log::info!("Generated fresh ID: {}", synthetic_id); + debug_assert!( + is_valid_synthetic_id(&synthetic_id), + "should generate a synthetic ID matching the expected format" + ); + + log::debug!("Generated fresh synthetic ID"); Ok(synthetic_id) } -/// Gets or creates a synthetic ID from the request. +/// Reads a validated synthetic ID from the request, if one is present. /// -/// Attempts to retrieve an existing synthetic ID from: -/// 1. The `x-synthetic-id` header -/// 2. The `synthetic_id` cookie +/// Checks the `x-synthetic-id` header first, then the `synthetic_id` cookie. +/// Values that do not match the canonical format (`<64-hex>.<6-alphanumeric>`) +/// are discarded and a warning is logged — the raw invalid value is never +/// included in log output. /// -/// If neither exists, generates a new synthetic ID. +/// Note: a non-UTF-8 `x-synthetic-id` header value is silently discarded and +/// the cookie is checked next, whereas a non-UTF-8 `Cookie` header propagates +/// as an error. +/// +/// Returns `Ok(None)` when no valid ID is found, allowing the caller to +/// generate a fresh one. /// /// # Errors /// -/// - [`TrustedServerError::Template`] if template rendering fails during generation -/// - [`TrustedServerError::SyntheticId`] if ID generation fails +/// - [`TrustedServerError::InvalidHeaderValue`] if the Cookie header contains invalid UTF-8 pub fn get_synthetic_id(req: &Request) -> Result, Report> { - if let Some(synthetic_id) = req + if let Some(raw) = req .get_header(HEADER_X_SYNTHETIC_ID) .and_then(|h| h.to_str().ok()) { - let id = synthetic_id.to_string(); - log::info!("Using existing Synthetic ID from header: {}", id); - return Ok(Some(id)); + if is_valid_synthetic_id(raw) { + log::info!("Using existing synthetic ID from header"); + return Ok(Some(raw.to_string())); + } + log::warn!( + "Rejecting synthetic ID from header: invalid format (len={})", + raw.len() + ); } match handle_request_cookies(req)? { Some(jar) => { if let Some(cookie) = jar.get(COOKIE_SYNTHETIC_ID) { - let id = cookie.value().to_string(); - log::info!("Using existing Trusted Server ID from cookie: {}", id); - return Ok(Some(id)); + let raw = cookie.value(); + if is_valid_synthetic_id(raw) { + log::info!("Using existing synthetic ID from cookie"); + return Ok(Some(raw.to_string())); + } + log::warn!( + "Rejecting synthetic ID from cookie: invalid format (len={})", + raw.len() + ); } } None => { @@ -152,17 +202,17 @@ pub fn get_synthetic_id(req: &Request) -> Result, Report bool { - let mut parts = value.split('.'); - let hmac_part = match parts.next() { - Some(part) => part, - None => return false, - }; - let suffix_part = match parts.next() { - Some(part) => part, - None => return false, - }; - if parts.next().is_some() { - return false; - } - if hmac_part.len() != 64 || suffix_part.len() != 6 { - return false; - } - if !hmac_part.chars().all(|c| c.is_ascii_hexdigit()) { - return false; - } - if !suffix_part.chars().all(|c| c.is_ascii_alphanumeric()) { - return false; - } - true - } - #[test] fn test_generate_synthetic_id() { let settings: Settings = create_test_settings(); @@ -263,60 +288,88 @@ mod tests { let synthetic_id = generate_synthetic_id(&settings, &req).expect("should generate synthetic ID"); - log::info!("Generated synthetic ID: {}", synthetic_id); assert!( - is_synthetic_id_format(&synthetic_id), + is_valid_synthetic_id(&synthetic_id), "should match synthetic ID format" ); } #[test] - fn test_is_synthetic_id_format_accepts_valid_value() { - let value = format!("{}.{}", "a".repeat(64), "Ab12z9"); + fn test_is_valid_synthetic_id_accepts_valid_value() { assert!( - is_synthetic_id_format(&value), - "should accept a valid synthetic ID format" + is_valid_synthetic_id(VALID_SYNTHETIC_ID), + "should accept a well-formed synthetic ID" ); } #[test] - fn test_is_synthetic_id_format_rejects_invalid_values() { + fn test_is_valid_synthetic_id_rejects_invalid_values() { let missing_suffix = "a".repeat(64); assert!( - !is_synthetic_id_format(&missing_suffix), + !is_valid_synthetic_id(&missing_suffix), "should reject missing suffix" ); let invalid_hex = format!("{}.{}", "a".repeat(63) + "g", "Ab12z9"); assert!( - !is_synthetic_id_format(&invalid_hex), + !is_valid_synthetic_id(&invalid_hex), "should reject non-hex HMAC content" ); let invalid_suffix = format!("{}.{}", "a".repeat(64), "ab-129"); assert!( - !is_synthetic_id_format(&invalid_suffix), + !is_valid_synthetic_id(&invalid_suffix), "should reject non-alphanumeric suffix" ); + // 74 bytes — caught by the length guard before any scan. let extra_segment = format!("{}.{}.{}", "a".repeat(64), "Ab12z9", "zz"); assert!( - !is_synthetic_id_format(&extra_segment), + !is_valid_synthetic_id(&extra_segment), "should reject extra segments" ); + + // 71 bytes, dot at position 64 (correct), but suffix contains a dot — caught by + // the suffix alphanumeric scan, not the length guard. + let dot_in_suffix = format!("{}.Ab12.z", "a".repeat(64)); + assert!( + !is_valid_synthetic_id(&dot_in_suffix), + "should reject dot within suffix" + ); + + let uppercase_hex = format!("{}.{}", "A".repeat(64), "Ab12z9"); + assert!( + !is_valid_synthetic_id(&uppercase_hex), + "should reject uppercase hex in HMAC part" + ); + + let oversized = "a".repeat(1000); + assert!( + !is_valid_synthetic_id(&oversized), + "should reject oversized input" + ); + + assert!(!is_valid_synthetic_id(""), "should reject empty string"); } #[test] fn test_get_synthetic_id_with_header() { let settings = create_test_settings(); - let req = create_test_request(vec![(HEADER_X_SYNTHETIC_ID, "existing_synthetic_id")]); + let req = create_test_request(vec![(HEADER_X_SYNTHETIC_ID, VALID_SYNTHETIC_ID)]); let synthetic_id = get_synthetic_id(&req).expect("should get synthetic ID"); - assert_eq!(synthetic_id, Some("existing_synthetic_id".to_string())); + assert_eq!( + synthetic_id, + Some(VALID_SYNTHETIC_ID.to_string()), + "should return the valid header ID" + ); let synthetic_id = get_or_generate_synthetic_id(&settings, &req) .expect("should reuse header synthetic ID"); - assert_eq!(synthetic_id, "existing_synthetic_id"); + assert_eq!( + synthetic_id, VALID_SYNTHETIC_ID, + "should reuse the valid header ID" + ); } #[test] @@ -324,22 +377,89 @@ mod tests { let settings = create_test_settings(); let req = create_test_request(vec![( header::COOKIE, - &format!("{}=existing_cookie_id", COOKIE_SYNTHETIC_ID), + &format!("{}={}", COOKIE_SYNTHETIC_ID, VALID_SYNTHETIC_ID), )]); let synthetic_id = get_synthetic_id(&req).expect("should get synthetic ID"); - assert_eq!(synthetic_id, Some("existing_cookie_id".to_string())); + assert_eq!( + synthetic_id, + Some(VALID_SYNTHETIC_ID.to_string()), + "should return the valid cookie ID" + ); let synthetic_id = get_or_generate_synthetic_id(&settings, &req) .expect("should reuse cookie synthetic ID"); - assert_eq!(synthetic_id, "existing_cookie_id"); + assert_eq!( + synthetic_id, VALID_SYNTHETIC_ID, + "should reuse the valid cookie ID" + ); + } + + #[test] + fn test_get_synthetic_id_rejects_invalid_header() { + let req = create_test_request(vec![(HEADER_X_SYNTHETIC_ID, "not-a-valid-id")]); + + let synthetic_id = get_synthetic_id(&req).expect("should not error on invalid header ID"); + assert!( + synthetic_id.is_none(), + "should discard invalid synthetic ID from header" + ); + } + + #[test] + fn test_get_synthetic_id_rejects_invalid_cookie() { + let req = create_test_request(vec![( + header::COOKIE, + &format!("{}=not-a-valid-id", COOKIE_SYNTHETIC_ID), + )]); + + let synthetic_id = get_synthetic_id(&req).expect("should not error on invalid cookie ID"); + assert!( + synthetic_id.is_none(), + "should discard invalid synthetic ID from cookie" + ); + } + + #[test] + fn test_get_synthetic_id_invalid_header_falls_through_to_valid_cookie() { + let req = create_test_request(vec![ + (HEADER_X_SYNTHETIC_ID, "not-a-valid-id"), + ( + header::COOKIE, + &format!("{}={}", COOKIE_SYNTHETIC_ID, VALID_SYNTHETIC_ID), + ), + ]); + + let synthetic_id = get_synthetic_id(&req).expect("should not error when cookie is valid"); + assert_eq!( + synthetic_id, + Some(VALID_SYNTHETIC_ID.to_string()), + "should fall through to valid cookie when header ID is invalid" + ); } #[test] fn test_get_synthetic_id_none() { let req = create_test_request(vec![]); let synthetic_id = get_synthetic_id(&req).expect("should handle missing ID"); - assert!(synthetic_id.is_none()); + assert!( + synthetic_id.is_none(), + "should return None when no ID present" + ); + } + + #[test] + fn test_get_or_generate_synthetic_id_generates_when_invalid_header() { + let settings = create_test_settings(); + // A string that is clearly not a valid synthetic ID (wrong format, wrong length) + let req = create_test_request(vec![(HEADER_X_SYNTHETIC_ID, "totally-invalid-id-value")]); + + let synthetic_id = get_or_generate_synthetic_id(&settings, &req) + .expect("should generate when header ID is invalid"); + assert!( + is_valid_synthetic_id(&synthetic_id), + "should generate a fresh valid ID when inbound ID is invalid" + ); } #[test] @@ -349,6 +469,9 @@ mod tests { let synthetic_id = get_or_generate_synthetic_id(&settings, &req) .expect("should get or generate synthetic ID"); - assert!(!synthetic_id.is_empty()); + assert!( + is_valid_synthetic_id(&synthetic_id), + "should generate a valid synthetic ID" + ); } } diff --git a/crates/common/src/test_support.rs b/crates/common/src/test_support.rs index 5e0d8f97..e3d73fa4 100644 --- a/crates/common/src/test_support.rs +++ b/crates/common/src/test_support.rs @@ -2,6 +2,10 @@ pub mod tests { use crate::settings::Settings; + /// A well-formed synthetic ID for use in tests: 64 lowercase hex chars + `'.'` + 6 alphanumeric. + pub const VALID_SYNTHETIC_ID: &str = + "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2.Ab12z9"; + #[must_use] pub fn crate_test_settings_str() -> String { r#" diff --git a/docs/guide/synthetic-ids.md b/docs/guide/synthetic-ids.md index 45914a90..b8a8b69f 100644 --- a/docs/guide/synthetic-ids.md +++ b/docs/guide/synthetic-ids.md @@ -12,10 +12,12 @@ Trusted Server surfaces the current synthetic ID via response headers and a firs ### HMAC-Based Generation -Synthetic IDs use HMAC (Hash-based Message Authentication Code) to generate a deterministic base from a configurable template, then append a short random suffix. +Synthetic IDs use HMAC (Hash-based Message Authentication Code) to generate a base from a configurable template, then append a short random alphanumeric suffix. The HMAC base is deterministic for a given set of template inputs; templates that include the built-in `{{random_uuid}}` variable produce a unique ID on every generation. **Format**: `64-hex-hmac`.`6-alphanumeric-suffix` +Inbound IDs (from the `x-synthetic-id` header or `synthetic_id` cookie) are validated against this format before use. Values that do not match — including oversized strings, special characters, or wrong structure — are discarded and a fresh ID is generated in their place. + **IP normalization**: IPv6 addresses are normalized to a /64 prefix before templating. ## Configuration From 9618dea807b0d938f038c3977dc26a1193c4a558 Mon Sep 17 00:00:00 2001 From: prk-Jr Date: Tue, 17 Mar 2026 13:14:36 +0530 Subject: [PATCH 2/2] Test that header takes precedence over cookie for synthetic ID --- crates/common/src/synthetic.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/crates/common/src/synthetic.rs b/crates/common/src/synthetic.rs index 7def2f51..22bd3eec 100644 --- a/crates/common/src/synthetic.rs +++ b/crates/common/src/synthetic.rs @@ -438,6 +438,24 @@ mod tests { ); } + #[test] + fn test_get_synthetic_id_header_takes_precedence_over_cookie() { + let cookie_id = "b2a1c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0b1a2.Zx98y7"; + let req = create_test_request(vec![ + (HEADER_X_SYNTHETIC_ID, VALID_SYNTHETIC_ID), + ( + header::COOKIE, + &format!("{}={}", COOKIE_SYNTHETIC_ID, cookie_id), + ), + ]); + let result = get_synthetic_id(&req).expect("should succeed"); + assert_eq!( + result, + Some(VALID_SYNTHETIC_ID.to_string()), + "should prefer header over cookie" + ); + } + #[test] fn test_get_synthetic_id_none() { let req = create_test_request(vec![]);