From a6d0e152549e8e34fa02d2d9bb478b1ab4b051b4 Mon Sep 17 00:00:00 2001 From: Anthony Lukach Date: Wed, 3 Jun 2026 21:01:47 -0700 Subject: [PATCH 1/3] feat(backend-federation): add CredentialCache (single-flight + proactive refresh) Roadmap item 2 of #56. Caches short-lived FederatedCredentials per credential identity so a proxy doesn't re-mint on every request: - proactive refresh once within a configurable lead window of expiry (so a credential never expires mid-use), - single-flight: concurrent callers for the same key await one in-flight fetch via a per-key futures::lock::Mutex, - runtime-agnostic: the caller passes `now` (no clock dep) and no async runtime is required. Closure-based get_or_fetch(key, now, fetch) keeps it flexible; invalidate(key) supports drop-and-refetch on backend rejection. Tested: miss/hit/refresh/ invalidate/key-isolation and concurrent single-flight. Rebased onto main: the federation crate landed on main as backend-federation (#57), so this folds the cache into that crate instead of re-adding a duplicate. Co-Authored-By: Claude Opus 4.8 (1M context) --- Cargo.lock | 2 + crates/backend-federation/Cargo.toml | 4 + crates/backend-federation/src/cache.rs | 261 +++++++++++++++++++++++++ crates/backend-federation/src/lib.rs | 2 + 4 files changed, 269 insertions(+) create mode 100644 crates/backend-federation/src/cache.rs diff --git a/Cargo.lock b/Cargo.lock index a20f437..fb70d67 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1149,10 +1149,12 @@ name = "multistore-backend-federation" version = "0.4.0" dependencies = [ "chrono", + "futures", "multistore", "quick-xml 0.37.5", "serde", "thiserror", + "tokio", "url", ] diff --git a/crates/backend-federation/Cargo.toml b/crates/backend-federation/Cargo.toml index b7ef2ea..5dd33eb 100644 --- a/crates/backend-federation/Cargo.toml +++ b/crates/backend-federation/Cargo.toml @@ -8,7 +8,11 @@ description = "Outbound credential federation (OIDC identity -> backend cloud ST [dependencies] multistore.workspace = true chrono.workspace = true +futures.workspace = true serde.workspace = true quick-xml.workspace = true url.workspace = true thiserror.workspace = true + +[dev-dependencies] +tokio = { workspace = true, features = ["rt", "macros"] } diff --git a/crates/backend-federation/src/cache.rs b/crates/backend-federation/src/cache.rs new file mode 100644 index 0000000..dec769b --- /dev/null +++ b/crates/backend-federation/src/cache.rs @@ -0,0 +1,261 @@ +//! A small credential cache with single-flight refresh. +//! +//! Federated credentials are short-lived, so a proxy that re-mints them on every +//! request would hammer the backend STS and add latency to every read. This +//! cache holds the current credentials per **credential identity** (an opaque +//! key the caller chooses — e.g. a role ARN, or the rendered OIDC subject) and: +//! +//! - serves a cached value while it's still comfortably valid, +//! - **proactively refreshes** once it's within `refresh_lead` of expiry (so a +//! credential never expires mid-use), and +//! - **single-flights** refreshes: concurrent callers for the same key await one +//! in-flight fetch rather than each launching their own. +//! +//! The cache is runtime-agnostic. It takes the current time as a parameter +//! rather than reading a clock (multistore targets both native and +//! `wasm32-unknown-unknown`, where `Utc::now()` isn't available without extra +//! features), and uses [`futures::lock::Mutex`] so it needs no async runtime. +//! +//! ```no_run +//! use chrono::{Duration, Utc}; +//! use multistore_backend_federation::CredentialCache; +//! # async fn fetch_somehow() -> Result { unimplemented!() } +//! # async fn example() -> Result<(), multistore_backend_federation::FederationError> { +//! let cache = CredentialCache::new(Duration::minutes(5)); +//! let creds = cache +//! .get_or_fetch("arn:aws:iam::123:role/r", Utc::now(), || async { fetch_somehow().await }) +//! .await?; +//! # let _ = creds; Ok(()) +//! # } +//! ``` + +use crate::credentials::FederatedCredentials; +use crate::error::FederationError; +use chrono::{DateTime, Duration, Utc}; +use futures::lock::Mutex as AsyncMutex; +use std::collections::HashMap; +use std::future::Future; +use std::sync::{Arc, Mutex}; + +type Slot = Arc>>; + +/// Caches short-lived [`FederatedCredentials`] per credential identity, with +/// proactive refresh and single-flight. +/// +/// Cheap to share behind an `Arc`; all methods take `&self`. +pub struct CredentialCache { + /// How long before expiry a cached credential is considered due for refresh. + refresh_lead: Duration, + /// One async-locked slot per key. The outer `Mutex` only guards insertion + /// into the map and is never held across an `.await`; the per-key + /// [`AsyncMutex`] is what serializes (single-flights) refreshes. + slots: Mutex>, +} + +impl CredentialCache { + /// Create a cache that refreshes credentials once they're within + /// `refresh_lead` of their expiry. + pub fn new(refresh_lead: Duration) -> Self { + Self { + refresh_lead, + slots: Mutex::new(HashMap::new()), + } + } + + /// Return cached credentials for `key` if still fresh, otherwise run `fetch` + /// (single-flighted) to obtain and cache new ones. + /// + /// `now` is the current time, supplied by the caller. A cached value is + /// considered fresh while `now < expiration - refresh_lead`. + /// + /// Single-flight: while one caller is running `fetch` for a key, concurrent + /// callers for that same key block on the per-key lock; when it releases + /// they observe the freshly-cached value and return it without calling their + /// own `fetch`. + pub async fn get_or_fetch( + &self, + key: &str, + now: DateTime, + fetch: F, + ) -> Result + where + F: FnOnce() -> Fut, + Fut: Future>, + { + let slot = self.slot(key); + let mut guard = slot.lock().await; + + if let Some(creds) = guard.as_ref() { + if self.is_fresh(creds, now) { + return Ok(creds.clone()); + } + } + + let fresh = fetch().await?; + *guard = Some(fresh.clone()); + Ok(fresh) + } + + /// Drop any cached credentials for `key`, forcing the next + /// [`get_or_fetch`](Self::get_or_fetch) to fetch. + /// + /// Useful when the backend rejects a still-"fresh" credential (e.g. a 403 + /// after an out-of-band revocation) and the caller wants to re-mint. + pub fn invalidate(&self, key: &str) { + self.slots + .lock() + .expect("credential cache mutex poisoned") + .remove(key); + } + + fn is_fresh(&self, creds: &FederatedCredentials, now: DateTime) -> bool { + now < creds.expiration - self.refresh_lead + } + + fn slot(&self, key: &str) -> Slot { + self.slots + .lock() + .expect("credential cache mutex poisoned") + .entry(key.to_string()) + .or_insert_with(|| Arc::new(AsyncMutex::new(None))) + .clone() + } +} + +impl Default for CredentialCache { + /// A cache that refreshes 5 minutes before expiry. + fn default() -> Self { + Self::new(Duration::minutes(5)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::TimeZone; + use std::sync::atomic::{AtomicUsize, Ordering}; + + fn at(hour: u32, min: u32) -> DateTime { + Utc.with_ymd_and_hms(2026, 6, 3, hour, min, 0).unwrap() + } + + fn creds(expiration: DateTime) -> FederatedCredentials { + FederatedCredentials { + access_key_id: "ASIA".into(), + secret_access_key: "secret".into(), + session_token: "session".into(), + expiration, + } + } + + #[tokio::test] + async fn fetches_on_miss() { + let cache = CredentialCache::new(Duration::minutes(5)); + let got = cache + .get_or_fetch("k", at(10, 0), || async { Ok(creds(at(11, 0))) }) + .await + .unwrap(); + assert_eq!(got.access_key_id, "ASIA"); + } + + #[tokio::test] + async fn reuses_while_fresh() { + let cache = CredentialCache::new(Duration::minutes(5)); + cache + .get_or_fetch("k", at(10, 0), || async { Ok(creds(at(11, 0))) }) + .await + .unwrap(); + // Well before the lead window (expiry 11:00, lead 5m → refresh at 10:55). + let got = cache + .get_or_fetch("k", at(10, 30), || async { + panic!("must not fetch while cached creds are fresh") + }) + .await + .unwrap(); + assert_eq!(got.expiration, at(11, 0)); + } + + #[tokio::test] + async fn refreshes_within_lead_window() { + let cache = CredentialCache::new(Duration::minutes(5)); + cache + .get_or_fetch("k", at(10, 0), || async { Ok(creds(at(11, 0))) }) + .await + .unwrap(); + // 10:56 is inside the 5-minute lead before the 11:00 expiry → refetch. + let got = cache + .get_or_fetch("k", at(10, 56), || async { Ok(creds(at(12, 0))) }) + .await + .unwrap(); + assert_eq!(got.expiration, at(12, 0)); + } + + #[tokio::test] + async fn invalidate_forces_refetch() { + let cache = CredentialCache::new(Duration::minutes(5)); + cache + .get_or_fetch("k", at(10, 0), || async { Ok(creds(at(11, 0))) }) + .await + .unwrap(); + cache.invalidate("k"); + let got = cache + .get_or_fetch("k", at(10, 1), || async { Ok(creds(at(13, 0))) }) + .await + .unwrap(); + assert_eq!(got.expiration, at(13, 0)); + } + + #[tokio::test] + async fn keys_are_isolated() { + let cache = CredentialCache::new(Duration::minutes(5)); + cache + .get_or_fetch("a", at(10, 0), || async { Ok(creds(at(11, 0))) }) + .await + .unwrap(); + let got = cache + .get_or_fetch("b", at(10, 0), || async { Ok(creds(at(12, 0))) }) + .await + .unwrap(); + assert_eq!(got.expiration, at(12, 0)); + } + + #[tokio::test] + async fn single_flights_concurrent_fetches() { + let cache = Arc::new(CredentialCache::new(Duration::minutes(5))); + let calls = Arc::new(AtomicUsize::new(0)); + let now = at(10, 0); + + let one = { + let cache = cache.clone(); + let calls = calls.clone(); + async move { + cache + .get_or_fetch("k", now, || async { + calls.fetch_add(1, Ordering::SeqCst); + // Yield while holding the per-key lock so the sibling + // future contends for it — exercising single-flight. + tokio::task::yield_now().await; + Ok(creds(at(11, 0))) + }) + .await + } + }; + let two = { + let cache = cache.clone(); + let calls = calls.clone(); + async move { + cache + .get_or_fetch("k", now, || async { + calls.fetch_add(1, Ordering::SeqCst); + Ok(creds(at(11, 0))) + }) + .await + } + }; + + let (a, b) = tokio::join!(one, two); + a.unwrap(); + b.unwrap(); + assert_eq!(calls.load(Ordering::SeqCst), 1, "fetch should run once"); + } +} diff --git a/crates/backend-federation/src/lib.rs b/crates/backend-federation/src/lib.rs index 1025899..9ded752 100644 --- a/crates/backend-federation/src/lib.rs +++ b/crates/backend-federation/src/lib.rs @@ -39,9 +39,11 @@ //! [`BucketConfig`]: multistore::types::BucketConfig pub mod aws; +mod cache; mod credentials; mod error; +pub use cache::CredentialCache; pub use credentials::FederatedCredentials; pub use error::FederationError; From f789939d231eeb0ce8b08934d02aefec29b9644f Mon Sep 17 00:00:00 2001 From: Anthony Lukach Date: Wed, 3 Jun 2026 22:41:31 -0700 Subject: [PATCH 2/3] refactor(cache): extract shared credential cache, wire oidc-provider onto it PR #61 added a single-flight, proactively-refreshing CredentialCache to backend-federation, but oidc-provider already had its own simpler get/put cache (no single-flight, calls Utc::now() directly). Two caches for one concept, and the weaker one was the one actually on the hot path. Consolidate onto one implementation: - New crate `multistore-credential-cache` holds a generic `CredentialCache` over any `T: Expiring`, plus the `Expiring` trait (with a blanket impl for `Arc`). This is where #61's single-flight + proactive-refresh + closure- based get_or_fetch primitive now lives, generalized off the concrete FederatedCredentials type. - backend-federation drops its own cache module and just implements `Expiring` for FederatedCredentials. - oidc-provider deletes its bespoke cache and rewires `get_credentials` onto `get_or_fetch`, gaining single-flight and proactive refresh for free. The clock is now injected (a `now` param) instead of `Utc::now()` inside the cache, keeping the cache runtime-agnostic; backend_auth supplies it at the one production call site (consistent with the rest of the wasm-path crates). Also document caching across runtimes: new docs/architecture/caching.md covers the in-memory (per-isolate) / Cache API (per-colo) / KV (global) / Durable Object tiers, the layering pattern, and external-cache security best practices; backend-auth.md links to it. Verified: all affected crates' tests pass; native build, wasm checks (cf-workers + example), fmt, and clippy clean; docs site builds (links valid). Co-Authored-By: Claude Opus 4.8 (1M context) --- Cargo.lock | 13 +- Cargo.toml | 3 + crates/backend-federation/Cargo.toml | 5 +- crates/backend-federation/src/credentials.rs | 8 ++ crates/backend-federation/src/lib.rs | 2 - crates/credential-cache/Cargo.toml | 13 ++ .../cache.rs => credential-cache/src/lib.rs} | 129 ++++++++++++------ crates/oidc-provider/Cargo.toml | 1 + crates/oidc-provider/src/backend_auth.rs | 7 +- crates/oidc-provider/src/cache.rs | 96 ------------- crates/oidc-provider/src/lib.rs | 66 +++++---- docs/.vitepress/config.ts | 4 + docs/architecture/caching.md | 102 ++++++++++++++ docs/auth/backend-auth.md | 7 +- 14 files changed, 278 insertions(+), 178 deletions(-) create mode 100644 crates/credential-cache/Cargo.toml rename crates/{backend-federation/src/cache.rs => credential-cache/src/lib.rs} (65%) delete mode 100644 crates/oidc-provider/src/cache.rs create mode 100644 docs/architecture/caching.md diff --git a/Cargo.lock b/Cargo.lock index fb70d67..b003b9f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1149,12 +1149,11 @@ name = "multistore-backend-federation" version = "0.4.0" dependencies = [ "chrono", - "futures", "multistore", + "multistore-credential-cache", "quick-xml 0.37.5", "serde", "thiserror", - "tokio", "url", ] @@ -1205,6 +1204,15 @@ dependencies = [ "worker", ] +[[package]] +name = "multistore-credential-cache" +version = "0.4.0" +dependencies = [ + "chrono", + "futures", + "tokio", +] + [[package]] name = "multistore-lambda" version = "0.4.0" @@ -1246,6 +1254,7 @@ dependencies = [ "chrono", "multistore", "multistore-backend-federation", + "multistore-credential-cache", "rand 0.8.5", "rsa", "serde", diff --git a/Cargo.toml b/Cargo.toml index 17f0ade..efdcc6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ members = [ "crates/sts", "crates/oidc-provider", "crates/backend-federation", + "crates/credential-cache", "examples/server", "examples/lambda", "examples/cf-workers", @@ -20,6 +21,7 @@ default-members = [ "crates/sts", "crates/oidc-provider", "crates/backend-federation", + "crates/credential-cache", "examples/server", "examples/lambda", ] @@ -111,3 +113,4 @@ multistore-cf-workers = { path = "crates/cf-workers", version = "0.4.0" } multistore-oidc-provider = { path = "crates/oidc-provider", version = "0.4.0" } multistore-path-mapping = { path = "crates/path-mapping", version = "0.4.0" } multistore-backend-federation = { path = "crates/backend-federation", version = "0.4.0" } +multistore-credential-cache = { path = "crates/credential-cache", version = "0.4.0" } diff --git a/crates/backend-federation/Cargo.toml b/crates/backend-federation/Cargo.toml index 5dd33eb..6244cac 100644 --- a/crates/backend-federation/Cargo.toml +++ b/crates/backend-federation/Cargo.toml @@ -7,12 +7,9 @@ description = "Outbound credential federation (OIDC identity -> backend cloud ST [dependencies] multistore.workspace = true +multistore-credential-cache.workspace = true chrono.workspace = true -futures.workspace = true serde.workspace = true quick-xml.workspace = true url.workspace = true thiserror.workspace = true - -[dev-dependencies] -tokio = { workspace = true, features = ["rt", "macros"] } diff --git a/crates/backend-federation/src/credentials.rs b/crates/backend-federation/src/credentials.rs index 560f929..7cfe825 100644 --- a/crates/backend-federation/src/credentials.rs +++ b/crates/backend-federation/src/credentials.rs @@ -25,6 +25,14 @@ pub struct FederatedCredentials { pub expiration: DateTime, } +/// Lets these credentials be held in a [`multistore_credential_cache::CredentialCache`] +/// so a proxy can reuse them across requests instead of re-minting per request. +impl multistore_credential_cache::Expiring for FederatedCredentials { + fn expiration(&self) -> DateTime { + self.expiration + } +} + impl FederatedCredentials { /// Inject these credentials into a [`BucketConfig`] so the multistore /// backend signs requests with them instead of going anonymous. diff --git a/crates/backend-federation/src/lib.rs b/crates/backend-federation/src/lib.rs index 9ded752..1025899 100644 --- a/crates/backend-federation/src/lib.rs +++ b/crates/backend-federation/src/lib.rs @@ -39,11 +39,9 @@ //! [`BucketConfig`]: multistore::types::BucketConfig pub mod aws; -mod cache; mod credentials; mod error; -pub use cache::CredentialCache; pub use credentials::FederatedCredentials; pub use error::FederationError; diff --git a/crates/credential-cache/Cargo.toml b/crates/credential-cache/Cargo.toml new file mode 100644 index 0000000..cde6409 --- /dev/null +++ b/crates/credential-cache/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "multistore-credential-cache" +version.workspace = true +edition.workspace = true +license.workspace = true +description = "Single-flight, proactively-refreshing in-memory cache for short-lived credentials" + +[dependencies] +chrono.workspace = true +futures.workspace = true + +[dev-dependencies] +tokio = { workspace = true, features = ["rt", "macros"] } diff --git a/crates/backend-federation/src/cache.rs b/crates/credential-cache/src/lib.rs similarity index 65% rename from crates/backend-federation/src/cache.rs rename to crates/credential-cache/src/lib.rs index dec769b..f8ca2b8 100644 --- a/crates/backend-federation/src/cache.rs +++ b/crates/credential-cache/src/lib.rs @@ -1,9 +1,10 @@ //! A small credential cache with single-flight refresh. //! -//! Federated credentials are short-lived, so a proxy that re-mints them on every -//! request would hammer the backend STS and add latency to every read. This -//! cache holds the current credentials per **credential identity** (an opaque -//! key the caller chooses — e.g. a role ARN, or the rendered OIDC subject) and: +//! Short-lived credentials (backend STS sessions, cloud bearer tokens) are +//! expensive to mint: a proxy that re-minted them on every request would hammer +//! the issuing STS and add latency to every read. This cache holds the current +//! value per **credential identity** (an opaque key the caller chooses — e.g. a +//! role ARN, or the rendered OIDC subject) and: //! //! - serves a cached value while it's still comfortably valid, //! - **proactively refreshes** once it's within `refresh_lead` of expiry (so a @@ -11,48 +12,80 @@ //! - **single-flights** refreshes: concurrent callers for the same key await one //! in-flight fetch rather than each launching their own. //! +//! The cache is generic over any value that knows when it expires (implements +//! [`Expiring`]) and over the error its fetch closure returns, so it is shared +//! by every crate that mints short-lived credentials rather than each rolling +//! its own. +//! //! The cache is runtime-agnostic. It takes the current time as a parameter //! rather than reading a clock (multistore targets both native and -//! `wasm32-unknown-unknown`, where `Utc::now()` isn't available without extra -//! features), and uses [`futures::lock::Mutex`] so it needs no async runtime. +//! `wasm32-unknown-unknown`) and uses [`futures::lock::Mutex`] so it needs no +//! async runtime. +//! +//! ``` +//! use chrono::{DateTime, Duration, Utc}; +//! use multistore_credential_cache::{CredentialCache, Expiring}; +//! +//! #[derive(Clone)] +//! struct Creds { +//! token: String, +//! expiration: DateTime, +//! } +//! impl Expiring for Creds { +//! fn expiration(&self) -> DateTime { +//! self.expiration +//! } +//! } //! -//! ```no_run -//! use chrono::{Duration, Utc}; -//! use multistore_backend_federation::CredentialCache; -//! # async fn fetch_somehow() -> Result { unimplemented!() } -//! # async fn example() -> Result<(), multistore_backend_federation::FederationError> { +//! # async fn run() -> Result<(), ()> { //! let cache = CredentialCache::new(Duration::minutes(5)); //! let creds = cache -//! .get_or_fetch("arn:aws:iam::123:role/r", Utc::now(), || async { fetch_somehow().await }) +//! .get_or_fetch("arn:aws:iam::123:role/r", Utc::now(), || async { +//! Ok::<_, ()>(Creds { token: "t".into(), expiration: Utc::now() + Duration::hours(1) }) +//! }) //! .await?; //! # let _ = creds; Ok(()) //! # } //! ``` -use crate::credentials::FederatedCredentials; -use crate::error::FederationError; use chrono::{DateTime, Duration, Utc}; use futures::lock::Mutex as AsyncMutex; use std::collections::HashMap; use std::future::Future; use std::sync::{Arc, Mutex}; -type Slot = Arc>>; +/// A cached value that knows when it expires. +/// +/// The cache uses this to decide when a value is due for proactive refresh. +/// Implement it for any credential type you want to cache. +pub trait Expiring { + /// When this value expires. + fn expiration(&self) -> DateTime; +} -/// Caches short-lived [`FederatedCredentials`] per credential identity, with +/// Caching a shared value (`Arc`) is as cacheable as caching `T` itself. +impl Expiring for Arc { + fn expiration(&self) -> DateTime { + (**self).expiration() + } +} + +type Slot = Arc>>; + +/// Caches short-lived [`Expiring`] credentials per credential identity, with /// proactive refresh and single-flight. /// /// Cheap to share behind an `Arc`; all methods take `&self`. -pub struct CredentialCache { +pub struct CredentialCache { /// How long before expiry a cached credential is considered due for refresh. refresh_lead: Duration, /// One async-locked slot per key. The outer `Mutex` only guards insertion /// into the map and is never held across an `.await`; the per-key /// [`AsyncMutex`] is what serializes (single-flights) refreshes. - slots: Mutex>, + slots: Mutex>>, } -impl CredentialCache { +impl CredentialCache { /// Create a cache that refreshes credentials once they're within /// `refresh_lead` of their expiry. pub fn new(refresh_lead: Duration) -> Self { @@ -72,15 +105,15 @@ impl CredentialCache { /// callers for that same key block on the per-key lock; when it releases /// they observe the freshly-cached value and return it without calling their /// own `fetch`. - pub async fn get_or_fetch( + pub async fn get_or_fetch( &self, key: &str, now: DateTime, fetch: F, - ) -> Result + ) -> Result where F: FnOnce() -> Fut, - Fut: Future>, + Fut: Future>, { let slot = self.slot(key); let mut guard = slot.lock().await; @@ -108,11 +141,11 @@ impl CredentialCache { .remove(key); } - fn is_fresh(&self, creds: &FederatedCredentials, now: DateTime) -> bool { - now < creds.expiration - self.refresh_lead + fn is_fresh(&self, creds: &T, now: DateTime) -> bool { + now < creds.expiration() - self.refresh_lead } - fn slot(&self, key: &str) -> Slot { + fn slot(&self, key: &str) -> Slot { self.slots .lock() .expect("credential cache mutex poisoned") @@ -122,7 +155,7 @@ impl CredentialCache { } } -impl Default for CredentialCache { +impl Default for CredentialCache { /// A cache that refreshes 5 minutes before expiry. fn default() -> Self { Self::new(Duration::minutes(5)) @@ -135,15 +168,25 @@ mod tests { use chrono::TimeZone; use std::sync::atomic::{AtomicUsize, Ordering}; + #[derive(Clone)] + struct Creds { + id: String, + expiration: DateTime, + } + + impl Expiring for Creds { + fn expiration(&self) -> DateTime { + self.expiration + } + } + fn at(hour: u32, min: u32) -> DateTime { Utc.with_ymd_and_hms(2026, 6, 3, hour, min, 0).unwrap() } - fn creds(expiration: DateTime) -> FederatedCredentials { - FederatedCredentials { - access_key_id: "ASIA".into(), - secret_access_key: "secret".into(), - session_token: "session".into(), + fn creds(expiration: DateTime) -> Creds { + Creds { + id: "ASIA".into(), expiration, } } @@ -152,22 +195,22 @@ mod tests { async fn fetches_on_miss() { let cache = CredentialCache::new(Duration::minutes(5)); let got = cache - .get_or_fetch("k", at(10, 0), || async { Ok(creds(at(11, 0))) }) + .get_or_fetch("k", at(10, 0), || async { Ok::<_, ()>(creds(at(11, 0))) }) .await .unwrap(); - assert_eq!(got.access_key_id, "ASIA"); + assert_eq!(got.id, "ASIA"); } #[tokio::test] async fn reuses_while_fresh() { let cache = CredentialCache::new(Duration::minutes(5)); cache - .get_or_fetch("k", at(10, 0), || async { Ok(creds(at(11, 0))) }) + .get_or_fetch("k", at(10, 0), || async { Ok::<_, ()>(creds(at(11, 0))) }) .await .unwrap(); // Well before the lead window (expiry 11:00, lead 5m → refresh at 10:55). let got = cache - .get_or_fetch("k", at(10, 30), || async { + .get_or_fetch::<_, _, ()>("k", at(10, 30), || async { panic!("must not fetch while cached creds are fresh") }) .await @@ -179,12 +222,12 @@ mod tests { async fn refreshes_within_lead_window() { let cache = CredentialCache::new(Duration::minutes(5)); cache - .get_or_fetch("k", at(10, 0), || async { Ok(creds(at(11, 0))) }) + .get_or_fetch("k", at(10, 0), || async { Ok::<_, ()>(creds(at(11, 0))) }) .await .unwrap(); // 10:56 is inside the 5-minute lead before the 11:00 expiry → refetch. let got = cache - .get_or_fetch("k", at(10, 56), || async { Ok(creds(at(12, 0))) }) + .get_or_fetch("k", at(10, 56), || async { Ok::<_, ()>(creds(at(12, 0))) }) .await .unwrap(); assert_eq!(got.expiration, at(12, 0)); @@ -194,12 +237,12 @@ mod tests { async fn invalidate_forces_refetch() { let cache = CredentialCache::new(Duration::minutes(5)); cache - .get_or_fetch("k", at(10, 0), || async { Ok(creds(at(11, 0))) }) + .get_or_fetch("k", at(10, 0), || async { Ok::<_, ()>(creds(at(11, 0))) }) .await .unwrap(); cache.invalidate("k"); let got = cache - .get_or_fetch("k", at(10, 1), || async { Ok(creds(at(13, 0))) }) + .get_or_fetch("k", at(10, 1), || async { Ok::<_, ()>(creds(at(13, 0))) }) .await .unwrap(); assert_eq!(got.expiration, at(13, 0)); @@ -209,11 +252,11 @@ mod tests { async fn keys_are_isolated() { let cache = CredentialCache::new(Duration::minutes(5)); cache - .get_or_fetch("a", at(10, 0), || async { Ok(creds(at(11, 0))) }) + .get_or_fetch("a", at(10, 0), || async { Ok::<_, ()>(creds(at(11, 0))) }) .await .unwrap(); let got = cache - .get_or_fetch("b", at(10, 0), || async { Ok(creds(at(12, 0))) }) + .get_or_fetch("b", at(10, 0), || async { Ok::<_, ()>(creds(at(12, 0))) }) .await .unwrap(); assert_eq!(got.expiration, at(12, 0)); @@ -235,7 +278,7 @@ mod tests { // Yield while holding the per-key lock so the sibling // future contends for it — exercising single-flight. tokio::task::yield_now().await; - Ok(creds(at(11, 0))) + Ok::<_, ()>(creds(at(11, 0))) }) .await } @@ -247,7 +290,7 @@ mod tests { cache .get_or_fetch("k", now, || async { calls.fetch_add(1, Ordering::SeqCst); - Ok(creds(at(11, 0))) + Ok::<_, ()>(creds(at(11, 0))) }) .await } diff --git a/crates/oidc-provider/Cargo.toml b/crates/oidc-provider/Cargo.toml index 500b44d..ae8448b 100644 --- a/crates/oidc-provider/Cargo.toml +++ b/crates/oidc-provider/Cargo.toml @@ -13,6 +13,7 @@ gcp = [] [dependencies] multistore.workspace = true multistore-backend-federation.workspace = true +multistore-credential-cache.workspace = true async-trait.workspace = true thiserror.workspace = true serde.workspace = true diff --git a/crates/oidc-provider/src/backend_auth.rs b/crates/oidc-provider/src/backend_auth.rs index 1ebecc5..e910333 100644 --- a/crates/oidc-provider/src/backend_auth.rs +++ b/crates/oidc-provider/src/backend_auth.rs @@ -42,9 +42,14 @@ impl AwsBackendAuth { let subject = config.option("oidc_subject").unwrap_or("s3-proxy"); let exchange = AwsExchange::new(role_arn.to_string()); + // The cache is runtime-agnostic, so the clock is supplied here. Every + // wasm-targeting crate in this workspace already uses `Utc::now()` + // (e.g. JWT minting, JWKS, sealed tokens), so this adds no new runtime + // assumption; a fully clock-injected path could thread `now` from the + // request instead. let creds = self .provider - .get_credentials(role_arn, &exchange, subject, &[]) + .get_credentials(role_arn, &exchange, subject, &[], chrono::Utc::now()) .await?; let mut options = config.backend_options.clone(); diff --git a/crates/oidc-provider/src/cache.rs b/crates/oidc-provider/src/cache.rs deleted file mode 100644 index a4c7612..0000000 --- a/crates/oidc-provider/src/cache.rs +++ /dev/null @@ -1,96 +0,0 @@ -//! TTL credential cache. -//! -//! Caches [`CloudCredentials`] by key, evicting entries that are within a -//! safety margin of expiration. This avoids redundant STS calls when the -//! same backend is accessed repeatedly within a short window. - -use std::collections::HashMap; -use std::sync::{Arc, Mutex}; - -use chrono::{Duration, Utc}; - -use crate::CloudCredentials; - -/// Safety margin before expiration — credentials are considered expired -/// this many seconds before their actual `expires_at`. -const EXPIRY_MARGIN_SECS: i64 = 60; - -/// Thread-safe TTL cache for cloud credentials. -pub struct CredentialCache { - entries: Mutex>>, -} - -impl Default for CredentialCache { - fn default() -> Self { - Self::new() - } -} - -impl CredentialCache { - /// Create an empty credential cache. - pub fn new() -> Self { - Self { - entries: Mutex::new(HashMap::new()), - } - } - - /// Retrieve cached credentials if they are still valid. - pub fn get(&self, key: &str) -> Option> { - let entries = self.entries.lock().unwrap(); - if let Some(creds) = entries.get(key) { - let margin = Duration::seconds(EXPIRY_MARGIN_SECS); - if creds.expires_at > Utc::now() + margin { - return Some(creds.clone()); - } - } - None - } - - /// Store credentials in the cache. - pub fn put(&self, key: String, creds: Arc) { - let mut entries = self.entries.lock().unwrap(); - entries.insert(key, creds); - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn make_creds(expires_in_secs: i64) -> CloudCredentials { - CloudCredentials { - access_key_id: "AKID".into(), - secret_access_key: "secret".into(), - session_token: "token".into(), - expires_at: Utc::now() + Duration::seconds(expires_in_secs), - } - } - - #[test] - fn cache_returns_valid_entry() { - let cache = CredentialCache::new(); - let creds = Arc::new(make_creds(600)); - cache.put("role-a".into(), creds.clone()); - - let got = cache.get("role-a"); - assert!(got.is_some()); - assert_eq!(got.unwrap().access_key_id, "AKID"); - } - - #[test] - fn cache_evicts_expired_entry() { - let cache = CredentialCache::new(); - // Expires in 30 seconds — within the 60-second margin - let creds = Arc::new(make_creds(30)); - cache.put("role-b".into(), creds); - - let got = cache.get("role-b"); - assert!(got.is_none()); - } - - #[test] - fn cache_miss_for_unknown_key() { - let cache = CredentialCache::new(); - assert!(cache.get("unknown").is_none()); - } -} diff --git a/crates/oidc-provider/src/lib.rs b/crates/oidc-provider/src/lib.rs index 76dfb8b..f97ddc4 100644 --- a/crates/oidc-provider/src/lib.rs +++ b/crates/oidc-provider/src/lib.rs @@ -15,7 +15,6 @@ //! can provide its own implementation. pub mod backend_auth; -pub mod cache; pub mod discovery; pub mod exchange; pub mod jwks; @@ -24,7 +23,9 @@ pub mod route_handler; use std::sync::Arc; -use cache::CredentialCache; +use chrono::{DateTime, Utc}; +use multistore_credential_cache::{CredentialCache, Expiring}; + use exchange::CredentialExchange; use jwt::JwtSigner; @@ -55,6 +56,18 @@ impl std::fmt::Debug for CloudCredentials { } } +impl Expiring for CloudCredentials { + fn expiration(&self) -> DateTime { + self.expires_at + } +} + +/// Refresh cached credentials once they're within this long of expiry, so a +/// credential never expires mid-request. +fn refresh_lead() -> chrono::Duration { + chrono::Duration::seconds(60) +} + /// HTTP client abstraction for outbound requests (STS token exchange). /// /// Each runtime provides its own implementation — `reqwest` on native, @@ -74,7 +87,7 @@ pub trait HttpExchange: /// Top-level provider that combines signing, exchange, and caching. pub struct OidcCredentialProvider { signer: JwtSigner, - cache: CredentialCache, + cache: CredentialCache>, http: H, issuer: String, audience: String, @@ -90,7 +103,7 @@ impl OidcCredentialProvider { pub fn new(signer: JwtSigner, http: H, issuer: String, audience: String) -> Self { Self { signer, - cache: CredentialCache::new(), + cache: CredentialCache::new(refresh_lead()), http, issuer, audience, @@ -101,32 +114,29 @@ impl OidcCredentialProvider { /// /// `exchange` describes how to trade the self-signed JWT for cloud /// credentials (AWS, Azure, GCP). `cache_key` identifies the backend - /// for caching purposes (e.g. the role ARN). + /// for caching purposes (e.g. the role ARN). `now` is the current time, + /// supplied by the caller so the cache stays runtime-agnostic. + /// + /// Concurrent calls for the same `cache_key` are single-flighted: only one + /// JWT mint + exchange runs, and the rest await its result. pub async fn get_credentials>( &self, cache_key: &str, exchange: &E, subject: &str, extra_claims: &[(&str, &str)], + now: DateTime, ) -> Result, OidcProviderError> { - // Check cache first - if let Some(creds) = self.cache.get(cache_key) { - return Ok(creds); - } - - // Mint a JWT - let token = self - .signer - .sign(subject, &self.issuer, &self.audience, extra_claims)?; - - // Exchange it for cloud credentials - let creds: CloudCredentials = exchange.exchange(&self.http, &token).await?; - let creds = Arc::new(creds); - - // Cache - self.cache.put(cache_key.to_string(), creds.clone()); - - Ok(creds) + self.cache + .get_or_fetch(cache_key, now, || async { + // Cache miss (or due for refresh): mint a JWT and exchange it. + let token = + self.signer + .sign(subject, &self.issuer, &self.audience, extra_claims)?; + let creds = exchange.exchange(&self.http, &token).await?; + Ok(Arc::new(creds)) + }) + .await } /// Access the underlying signer (e.g. for JWKS generation). @@ -246,7 +256,7 @@ mod tests { let exchange = exchange::aws::AwsExchange::new("arn:aws:iam::123:role/Test".into()); let creds = provider - .get_credentials("role-a", &exchange, "my-sub", &[]) + .get_credentials("role-a", &exchange, "my-sub", &[], Utc::now()) .await .unwrap(); @@ -268,14 +278,14 @@ mod tests { // First call — hits mock HTTP let creds1 = provider - .get_credentials("role-a", &exchange, "sub", &[]) + .get_credentials("role-a", &exchange, "sub", &[], Utc::now()) .await .unwrap(); assert_eq!(http.calls(), 1); // Second call — should use cache, no additional HTTP call let creds2 = provider - .get_credentials("role-a", &exchange, "sub", &[]) + .get_credentials("role-a", &exchange, "sub", &[], Utc::now()) .await .unwrap(); assert_eq!(http.calls(), 1); @@ -295,11 +305,11 @@ mod tests { let exchange = exchange::aws::AwsExchange::new("arn:aws:iam::123:role/Test".into()); provider - .get_credentials("role-a", &exchange, "sub", &[]) + .get_credentials("role-a", &exchange, "sub", &[], Utc::now()) .await .unwrap(); provider - .get_credentials("role-b", &exchange, "sub", &[]) + .get_credentials("role-b", &exchange, "sub", &[], Utc::now()) .await .unwrap(); diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts index 2c38670..6ba8de4 100644 --- a/docs/.vitepress/config.ts +++ b/docs/.vitepress/config.ts @@ -75,6 +75,10 @@ const adminSidebar = [ text: "Multi-Runtime Design", link: "/architecture/multi-runtime", }, + { + text: "Caching", + link: "/architecture/caching", + }, ], }, { diff --git a/docs/architecture/caching.md b/docs/architecture/caching.md new file mode 100644 index 0000000..4c8a2ad --- /dev/null +++ b/docs/architecture/caching.md @@ -0,0 +1,102 @@ +# Caching + +Multistore mints and fetches several kinds of short-lived data on the hot path — backend credentials, signing keys, config lookups. Re-doing that work on every request would add latency and hammer upstream services (STS, identity providers, config stores). This page covers what is cached, the shared credential-cache primitive, and — most importantly — how caching behaves differently on each runtime, with best practices for deploying it safely on Cloudflare Workers. + +## What gets cached + +| Cache | Crate | What it holds | Layer | +|-------|-------|---------------|-------| +| Credential cache | `multistore-credential-cache` | Short-lived backend/cloud credentials, keyed by credential identity | Outbound auth | +| JWKS cache | `multistore-sts` | Identity providers' public verification keys | Inbound auth | +| Config provider cache | example code (`CachedProvider`) | Bucket/role/credential config lookups | Configuration | + +These are independent layers — they protect different upstreams. This page focuses on the **credential cache**, since it is the most performance-sensitive and the most subtle to deploy correctly across runtimes. See [Caching Providers](/configuration/providers/cached) for config-lookup caching and [Backend Auth](/auth/backend-auth) for where credentials come from. + +## The credential cache + +`multistore-credential-cache` provides one shared `CredentialCache` used by every crate that mints short-lived credentials (e.g. [`multistore-oidc-provider`](/auth/backend-auth#oidc-backend-auth) caches the cloud credentials it exchanges for). Any credential type that implements the `Expiring` trait can be cached: + +```rust +use multistore_credential_cache::{CredentialCache, Expiring}; + +let cache = CredentialCache::new(chrono::Duration::minutes(5)); + +let creds = cache + .get_or_fetch(role_arn, now, || async { mint_via_sts().await }) + .await?; +``` + +It gives you three behaviours: + +- **Serve-while-fresh** — a cached value is returned directly while it is comfortably valid. +- **Proactive refresh** — once a value is within its *refresh lead* of expiry, the next access re-mints it, so a credential is never handed out about to expire mid-request. +- **Single-flight** — while one caller is minting for a key, concurrent callers for that *same* key await the in-flight result instead of each launching their own mint. This collapses a cold-cache burst into a single upstream call. + +Two design choices make it portable: + +- **Runtime-agnostic clock.** The caller passes `now` rather than the cache reading a clock, because `Utc::now()` is not available on `wasm32-unknown-unknown` without extra features. See [Multi-Runtime Design](/architecture/multi-runtime). +- **Closure-based `get_or_fetch`.** Because the cache calls *your* fetch closure on a miss, you can layer additional cache tiers (e.g. the Cloudflare Cache API) *inside* the closure without the cache crate ever depending on a runtime — see [Layering an external tier](#layering-an-external-tier). + +## Runtime caveats + +A credential cache is only as useful as the lifetime of the thing holding it. The same `CredentialCache` behaves very differently depending on the runtime and on where you construct it. + +> [!IMPORTANT] +> An in-memory cache only helps across requests if it lives in **persistent scope** (constructed once and reused), not rebuilt inside the per-request handler. If the provider holding the cache is created fresh on every request, every request starts with an empty cache and the cache does nothing. + +| Tier | Scope | Survives | Use for | +|------|-------|----------|---------| +| In-memory (`CredentialCache`) | Per-process (native) / **per-isolate** (Workers) | While the process/isolate is warm | The default; single-flight + proactive refresh | +| Cloudflare Cache API | **Per-colo** (data center) | Isolate cold starts within a colo | Sharing mints across isolates in one location | +| Workers KV | Global, eventually consistent | Everything (≈seconds to propagate) | Cross-colo sharing of short-lived creds | +| Durable Objects | Global, single owner per key | Everything | True cross-isolate single-flight | + +### Native (server) runtime + +The server runtime is a long-lived multi-threaded process. Construct the provider (and thus its `CredentialCache`) **once at startup** and share it across requests. The in-memory cache is then global to the process: one mint per credential lifetime, and single-flight collapses concurrent requests. This is the simple, fully-effective case. + +### Cloudflare Workers runtime + +Workers run in V8 **isolates**, not per-request containers. Global/module-scope state persists across requests handled by the same warm isolate — but: + +- The cache is **per-isolate**, and Cloudflare runs many isolates across many colos. With _N_ live isolates you get up to _N_ independent mints per credential lifetime, not one. +- Isolates cold-start empty and are evicted under memory pressure or idle. +- Single-flight only collapses concurrency *within* one isolate. + +Even so, this is a large win: a warm isolate serving thousands of requests for the same bucket reuses one credential instead of minting per request. To get *any* cross-request benefit, hoist the provider into module scope (e.g. a `OnceCell`) rather than rebuilding it inside the `fetch` handler. + +For sharing beyond a single isolate, layer an external tier. + +## Layering an external tier + +The Cloudflare Cache API is **colo-local**: shared across all isolates in one data center and surviving isolate cold-starts there. It is the cheapest way to stop every fresh isolate in a busy colo from re-minting. Because `get_or_fetch` calls your closure on a miss, the external tier lives *inside* the closure — keeping `multistore-credential-cache` free of any runtime dependency: + +```text +request + └─ L1: in-memory CredentialCache (per-isolate, single-flight, proactive refresh) + └─ on miss, the fetch closure does: + L2: Cache API (colo-local, shared across isolates in the colo) + └─ on miss, origin: STS / token exchange (mint) + └─ write back to L2 +``` + +This same shape works with Workers KV (global) as an L3, or Durable Objects when you need *global* single-flight (one DO instance per key serialises the mint across all isolates). + +### Best practices for an external credential cache + +> [!WARNING] +> An external cache value is a usable credential at rest. Treat it as a secret. + +- **Use a synthetic, non-routable cache key.** Namespace it under a host you control (e.g. `https://creds.internal/v1/`) so a client can never `fetch` credentials straight out of the cache. +- **Encrypt the stored value.** The proxy already holds a signing key; encrypting at rest means a leaked cache entry is not directly usable. +- **Keep TTLs short** and aligned with the credential lifetime — these are already short-lived credentials; do not extend their reach. +- **Align the external TTL with the in-memory refresh lead.** Set the external entry's `max-age` to `remaining_lifetime − refresh_lead`. Otherwise the in-memory layer enters its refresh window, reads a still-present-but-stale value from the external tier, and re-reads forever without ever minting fresh. +- **Write back without blocking the response** (e.g. `ctx.waitUntil(...)` on Workers) so populating the cache never adds latency. +- **Don't rely on presence.** External caches evict early; always re-check the embedded expiry rather than trusting that a hit is fresh. + +## See also + +- [Multi-Runtime Design](/architecture/multi-runtime) — why the cache is runtime-agnostic +- [Backend Auth](/auth/backend-auth) — what the credential cache stores and where it's minted +- [Cloudflare Workers Deployment](/deployment/cloudflare-workers) — deploying the Workers runtime +- [Caching Providers](/configuration/providers/cached) — caching config/credential *lookups* (a separate layer) diff --git a/docs/auth/backend-auth.md b/docs/auth/backend-auth.md index 6062e91..f4fba67 100644 --- a/docs/auth/backend-auth.md +++ b/docs/auth/backend-auth.md @@ -224,11 +224,14 @@ On subsequent requests, cached credentials are reused until they expire. When using OIDC backend auth, the proxy caches temporary credentials to avoid calling the cloud provider's STS on every request. Credentials are: - Keyed by the IAM role ARN -- Automatically refreshed when they expire -- Shared across concurrent requests to the same bucket +- **Proactively refreshed** shortly before they expire, so a credential is never handed out about to expire mid-request +- **Single-flighted** across concurrent requests to the same bucket — only one token exchange runs while the rest await its result This means the first request to an OIDC-backed bucket incurs a small latency cost for the credential exchange, but subsequent requests use cached credentials until they expire. +> [!NOTE] +> How effective this cache is depends on the runtime — an in-memory cache is per-process on the server but per-isolate on Cloudflare Workers. See [Caching](/architecture/caching) for the cross-runtime details and best practices (including layering the Cloudflare Cache API). + ## Choosing Between Static and OIDC | | Static Credentials | OIDC Backend Auth | From f4f5e097a3c6e2b319d2bba504e40b14275cd1eb Mon Sep 17 00:00:00 2001 From: Anthony Lukach Date: Fri, 19 Jun 2026 22:40:26 -0700 Subject: [PATCH 3/3] docs(caching): fix stale reference to the dissolved credential-cache crate The credential cache was folded into multistore-oidc-provider; the "Layering an external tier" section still named the deleted `multistore-credential-cache` crate. Refer to "the cache" instead. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/architecture/caching.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/architecture/caching.md b/docs/architecture/caching.md index 0df7eb1..6b219fc 100644 --- a/docs/architecture/caching.md +++ b/docs/architecture/caching.md @@ -64,7 +64,7 @@ For sharing beyond a single isolate, layer an external tier. ## Layering an external tier -The Cloudflare Cache API is **colo-local**: shared across all isolates in one data center and surviving isolate cold-starts there. It is the cheapest way to stop every fresh isolate in a busy colo from re-minting. Because `get_or_fetch` calls your closure on a miss, the external tier lives *inside* the closure — keeping `multistore-credential-cache` free of any runtime dependency: +The Cloudflare Cache API is **colo-local**: shared across all isolates in one data center and surviving isolate cold-starts there. It is the cheapest way to stop every fresh isolate in a busy colo from re-minting. Because `get_or_fetch` calls your closure on a miss, the external tier lives *inside* the closure — keeping the cache itself free of any runtime dependency: ```text request