From 0a7d655dcaf8181c87279e74f142546001cf06ea Mon Sep 17 00:00:00 2001 From: lmjabreu Date: Mon, 18 May 2026 23:01:29 +0100 Subject: [PATCH 01/10] feat(auth): add refresh-token support to TokenStore + PKCE provider - TokenStore.active() now returns an optional `bundle` carrying refresh token + expiry alongside the access token. The built-in keyring store always populates it; custom stores can omit it without breakage. - TokenStore.set() accepts a TokenBundle or a bare string (widening). - AuthProvider gained an optional `refreshToken` method; the PKCE provider implements it (grant_type=refresh_token, no client_secret). - New refreshAccessToken() helper drives proactive (skew window) and reactive (force) refresh, with file-lock concurrency on a sidecar `.refresh.lock` and typed AUTH_REFRESH_EXPIRED / _TRANSIENT / _UNAVAILABLE errors. - Keyring store writes refresh tokens to a sibling slot per account (`/refresh`) and exposes getRecordsLocation() so refresh helpers can derive a lock path without re-plumbing options. - runOAuthFlow persists the full bundle (access + refresh + expiry) on login instead of just the access token. - status.fetchLive context gained `bundle` so consumers can render expiry. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/auth/errors.ts | 6 + src/auth/flow.test.ts | 12 +- src/auth/flow.ts | 7 +- src/auth/index.ts | 4 + src/auth/keyring/migrate.ts | 11 +- src/auth/keyring/record-write.test.ts | 62 ++++++-- src/auth/keyring/record-write.ts | 89 +++++++++-- src/auth/keyring/token-store.test.ts | 51 +++++- src/auth/keyring/token-store.ts | 221 ++++++++++++++------------ src/auth/keyring/types.ts | 10 ++ src/auth/logout.test.ts | 3 +- src/auth/providers/pkce.test.ts | 103 +++++++++++- src/auth/providers/pkce.ts | 81 +++++++++- src/auth/refresh.test.ts | 191 ++++++++++++++++++++++ src/auth/refresh.ts | 183 +++++++++++++++++++++ src/auth/status.test.ts | 4 +- src/auth/status.ts | 8 +- src/auth/token-view.test.ts | 3 +- src/auth/types.ts | 63 +++++++- src/auth/user-flag.test.ts | 5 +- src/auth/user-flag.ts | 4 +- 21 files changed, 972 insertions(+), 149 deletions(-) create mode 100644 src/auth/refresh.test.ts create mode 100644 src/auth/refresh.ts diff --git a/src/auth/errors.ts b/src/auth/errors.ts index bf77311..753f02a 100644 --- a/src/auth/errors.ts +++ b/src/auth/errors.ts @@ -10,6 +10,12 @@ export type AuthErrorCode = | 'AUTH_TOKEN_EXCHANGE_FAILED' | 'AUTH_STORE_WRITE_FAILED' | 'AUTH_STORE_READ_FAILED' + /** Refresh token rejected — typically `invalid_grant`. Forces re-login. */ + | 'AUTH_REFRESH_EXPIRED' + /** Refresh attempt failed transiently (network, 5xx, non-JSON). Caller may retry. */ + | 'AUTH_REFRESH_TRANSIENT' + /** No refresh token stored, or provider doesn't implement `refreshToken`. */ + | 'AUTH_REFRESH_UNAVAILABLE' | 'NOT_AUTHENTICATED' | 'TOKEN_FROM_ENV' | 'NO_ACCOUNT_SELECTED' diff --git a/src/auth/flow.test.ts b/src/auth/flow.test.ts index a510304..14cadfb 100644 --- a/src/auth/flow.test.ts +++ b/src/auth/flow.test.ts @@ -31,9 +31,16 @@ function fakeStore(): TokenStore & { last?: { account: Account; token: const state: { last?: { account: Account; token: string } } = {} return { async active() { - return state.last ?? null + return state.last + ? { + token: state.last.token, + bundle: { accessToken: state.last.token }, + account: state.last.account, + } + : null }, - async set(account, token) { + async set(account, credentials) { + const token = typeof credentials === 'string' ? credentials : credentials.accessToken state.last = { account, token } }, async clear() { @@ -151,6 +158,7 @@ describe('runOAuthFlow', () => { expect(openBrowser).toHaveBeenCalledTimes(1) expect(await store.active()).toEqual({ token: 'tok-1', + bundle: { accessToken: 'tok-1' }, account: { id: '1', email: 'a@b' }, }) }) diff --git a/src/auth/flow.ts b/src/auth/flow.ts index d95611b..890a03d 100644 --- a/src/auth/flow.ts +++ b/src/auth/flow.ts @@ -188,7 +188,12 @@ export async function runOAuthFlow( checkAborted() try { - await options.store.set(account, exchange.accessToken) + await options.store.set(account, { + accessToken: exchange.accessToken, + refreshToken: exchange.refreshToken, + accessTokenExpiresAt: exchange.accessTokenExpiresAt, + refreshTokenExpiresAt: exchange.refreshTokenExpiresAt, + }) } catch (error) { if (error instanceof CliError) throw error throw new CliError( diff --git a/src/auth/index.ts b/src/auth/index.ts index 90858c9..a3d6b57 100644 --- a/src/auth/index.ts +++ b/src/auth/index.ts @@ -1,6 +1,8 @@ export type { AuthErrorCode } from './errors.js' export { runOAuthFlow } from './flow.js' export type { RunOAuthFlowOptions, RunOAuthFlowResult } from './flow.js' +export { refreshAccessToken } from './refresh.js' +export type { RefreshAccessTokenOptions } from './refresh.js' export { attachLoginCommand } from './login.js' export type { AttachLoginCommandOptions, AttachLoginContext } from './login.js' export { attachLogoutCommand } from './logout.js' @@ -32,6 +34,8 @@ export type { ExchangeResult, PrepareInput, PrepareResult, + RefreshInput, + TokenBundle, TokenStore, ValidateInput, } from './types.js' diff --git a/src/auth/keyring/migrate.ts b/src/auth/keyring/migrate.ts index 9b407ba..6572f6f 100644 --- a/src/auth/keyring/migrate.ts +++ b/src/auth/keyring/migrate.ts @@ -157,14 +157,19 @@ export async function migrateLegacyAuth( // internally (writing to `fallbackToken` instead), so any error here is // a non-keyring failure — typically a `userRecords.upsert` rejection. try { + const accountSlot = accountForUser(account.id) await writeRecordWithKeyringFallback({ - secureStore: createSecureStore({ + secureStore: createSecureStore({ serviceName, account: accountSlot }), + // Legacy single-user state never carried a refresh token, but + // wire the sibling slot anyway so a defensive delete clears any + // junk that may have been parked there by a hand-edit. + refreshSecureStore: createSecureStore({ serviceName, - account: accountForUser(account.id), + account: `${accountSlot}/refresh`, }), userRecords, account, - token: legacyToken.token, + bundle: { accessToken: legacyToken.token }, }) } catch (error) { return skipped(silent, logPrefix, 'user-record-write-failed', getErrorMessage(error)) diff --git a/src/auth/keyring/record-write.test.ts b/src/auth/keyring/record-write.test.ts index 9d79478..2a5b390 100644 --- a/src/auth/keyring/record-write.test.ts +++ b/src/auth/keyring/record-write.test.ts @@ -11,49 +11,88 @@ const account: Account = { id: '42', label: 'me', email: 'a@b.c' } describe('writeRecordWithKeyringFallback', () => { it('writes to the keyring slot and upserts a record with no fallbackToken on the happy path', async () => { const secureStore = buildSingleSlot() + const refreshSecureStore = buildSingleSlot() const { store: userRecords, state, upsertSpy } = buildUserRecords() const result = await writeRecordWithKeyringFallback({ secureStore, + refreshSecureStore, userRecords, account, - token: ' tok_secret ', + bundle: { accessToken: ' tok_secret ' }, }) expect(result.storedSecurely).toBe(true) expect(secureStore.setSpy).toHaveBeenCalledWith('tok_secret') - expect(upsertSpy).toHaveBeenCalledWith({ account }) + // No refresh token in bundle → defensive delete of the refresh slot. + expect(refreshSecureStore.deleteSpy).toHaveBeenCalled() + expect(upsertSpy).toHaveBeenCalledWith({ + account, + accessTokenExpiresAt: undefined, + refreshTokenExpiresAt: undefined, + }) + expect(state.records.get('42')?.fallbackToken).toBeUndefined() + expect(state.records.get('42')?.fallbackRefreshToken).toBeUndefined() + }) + + it('writes both access and refresh secrets when bundle includes refresh token', async () => { + const secureStore = buildSingleSlot() + const refreshSecureStore = buildSingleSlot() + const { store: userRecords, state } = buildUserRecords() + + const result = await writeRecordWithKeyringFallback({ + secureStore, + refreshSecureStore, + userRecords, + account, + bundle: { + accessToken: 'access_tok', + refreshToken: 'refresh_tok', + accessTokenExpiresAt: 1_700_000_000_000, + }, + }) + + expect(result.storedSecurely).toBe(true) + expect(secureStore.setSpy).toHaveBeenCalledWith('access_tok') + expect(refreshSecureStore.setSpy).toHaveBeenCalledWith('refresh_tok') + expect(state.records.get('42')?.accessTokenExpiresAt).toBe(1_700_000_000_000) expect(state.records.get('42')?.fallbackToken).toBeUndefined() + expect(state.records.get('42')?.fallbackRefreshToken).toBeUndefined() }) - it('falls back to fallbackToken on the user record when the keyring is offline', async () => { + it('falls back to fallback tokens on the user record when the keyring is offline', async () => { const secureStore = buildSingleSlot() secureStore.setSpy.mockRejectedValueOnce(new SecureStoreUnavailableError('no dbus')) + const refreshSecureStore = buildSingleSlot() const { store: userRecords, state } = buildUserRecords() const result = await writeRecordWithKeyringFallback({ secureStore, + refreshSecureStore, userRecords, account, - token: 'tok_plain', + bundle: { accessToken: 'tok_plain', refreshToken: 'refr_plain' }, }) expect(result.storedSecurely).toBe(false) expect(state.records.get('42')?.fallbackToken).toBe('tok_plain') + expect(state.records.get('42')?.fallbackRefreshToken).toBe('refr_plain') }) it('rethrows non-keyring errors from setSecret without writing the record', async () => { const secureStore = buildSingleSlot() const cause = new Error('unexpected backend explosion') secureStore.setSpy.mockRejectedValueOnce(cause) + const refreshSecureStore = buildSingleSlot() const { store: userRecords, state } = buildUserRecords() await expect( writeRecordWithKeyringFallback({ secureStore, + refreshSecureStore, userRecords, account, - token: 'tok', + bundle: { accessToken: 'tok' }, }), ).rejects.toBe(cause) expect(state.records.size).toBe(0) @@ -61,37 +100,40 @@ describe('writeRecordWithKeyringFallback', () => { it('rolls back the keyring write when upsert fails (no orphan credential)', async () => { const secureStore = buildSingleSlot() + const refreshSecureStore = buildSingleSlot() const { store: userRecords, upsertSpy } = buildUserRecords() upsertSpy.mockRejectedValueOnce(new Error('disk full')) await expect( writeRecordWithKeyringFallback({ secureStore, + refreshSecureStore, userRecords, account, - token: 'tok', + bundle: { accessToken: 'tok', refreshToken: 'refr' }, }), ).rejects.toThrow('disk full') expect(secureStore.deleteSpy).toHaveBeenCalledTimes(1) + expect(refreshSecureStore.deleteSpy).toHaveBeenCalledTimes(1) }) it('does not rollback the keyring on upsert failure when the write went to fallbackToken', async () => { - // No successful keyring write happened, so there is nothing to roll - // back. Verify the helper doesn't accidentally call deleteSecret - // in this branch. const secureStore = buildSingleSlot() secureStore.setSpy.mockRejectedValueOnce(new SecureStoreUnavailableError('no dbus')) + const refreshSecureStore = buildSingleSlot() const { store: userRecords, upsertSpy } = buildUserRecords() upsertSpy.mockRejectedValueOnce(new Error('disk full')) await expect( writeRecordWithKeyringFallback({ secureStore, + refreshSecureStore, userRecords, account, - token: 'tok', + bundle: { accessToken: 'tok' }, }), ).rejects.toThrow('disk full') expect(secureStore.deleteSpy).not.toHaveBeenCalled() + expect(refreshSecureStore.deleteSpy).not.toHaveBeenCalled() }) }) diff --git a/src/auth/keyring/record-write.ts b/src/auth/keyring/record-write.ts index c8c850b..8e3a667 100644 --- a/src/auth/keyring/record-write.ts +++ b/src/auth/keyring/record-write.ts @@ -1,17 +1,24 @@ -import type { AuthAccount } from '../types.js' +import type { AuthAccount, TokenBundle } from '../types.js' import { type SecureStore, SecureStoreUnavailableError } from './secure-store.js' import type { UserRecord, UserRecordStore } from './types.js' type WriteRecordOptions = { - /** Per-account keyring slot, already configured by the caller (e.g. via `createSecureStore`). */ + /** Per-account keyring slot for the access token. */ secureStore: SecureStore + /** + * Per-account keyring slot for the refresh token (separate slot under + * `${account}/refresh`). When the bundle has no refresh token this slot + * is still cleared on write so a previous refresh secret doesn't outlive + * a login that didn't return one. + */ + refreshSecureStore: SecureStore userRecords: UserRecordStore account: TAccount - token: string + bundle: TokenBundle } type WriteRecordResult = { - /** `true` when the secret landed in the OS keyring; `false` when the keyring was unavailable and the token was written to `fallbackToken` on the user record. */ + /** `true` when both access and refresh secrets landed in the OS keyring; `false` when the keyring was unavailable and the bundle was parked on the user record's `fallbackToken` / `fallbackRefreshToken`. */ storedSecurely: boolean } @@ -20,11 +27,15 @@ type WriteRecordResult = { * `migrateLegacyAuth`. Encapsulates the order-of-operations contract that * matters for credential safety: * - * 1. Keyring `setSecret` first. On `SecureStoreUnavailableError`, swallow - * the failure and record a `fallbackToken` on the user record instead. - * Any other error rethrows. - * 2. `userRecords.upsert(record)`. On failure, best-effort rollback the - * keyring write so we don't leave an orphan credential for an account + * 1. Keyring `setSecret` for access token first. On + * `SecureStoreUnavailableError`, swallow and route both tokens to the + * record's fallback slots. Any other error rethrows. + * 2. When the keyring is online and the bundle has a refresh token, write + * it to the sibling refresh slot. When the bundle has no refresh token, + * best-effort `deleteSecret()` on the refresh slot so a stale secret + * from a previous login doesn't shadow the new state. + * 3. `userRecords.upsert(record)`. On failure, best-effort rollback both + * keyring writes so we don't leave orphan credentials for an account * cli-core never managed to register. Original error rethrows. * * Default promotion (`setDefaultId`) is intentionally **not** in here — both @@ -35,20 +46,63 @@ type WriteRecordResult = { export async function writeRecordWithKeyringFallback( options: WriteRecordOptions, ): Promise { - const { secureStore, userRecords, account, token } = options - const trimmed = token.trim() + const { secureStore, refreshSecureStore, userRecords, account, bundle } = options + const trimmedAccess = bundle.accessToken.trim() + const trimmedRefresh = bundle.refreshToken?.trim() || undefined let storedSecurely = false try { - await secureStore.setSecret(trimmed) + await secureStore.setSecret(trimmedAccess) storedSecurely = true } catch (error) { if (!(error instanceof SecureStoreUnavailableError)) throw error } + // Refresh slot mirrors the access slot's online/offline branch. When + // online without a refresh token, clear the slot defensively — a + // re-login that no longer returns refresh shouldn't leave a stale + // refresh secret behind that `active()` would happily return. + let wroteRefreshSecurely = false + if (storedSecurely) { + if (trimmedRefresh) { + try { + await refreshSecureStore.setSecret(trimmedRefresh) + wroteRefreshSecurely = true + } catch (error) { + if (!(error instanceof SecureStoreUnavailableError)) throw error + // Refresh slot offline but access slot was online — treat as + // partial offline: park everything on the record fallback so + // `active()` reads from a single consistent place. + try { + await secureStore.deleteSecret() + } catch { + // best-effort rollback + } + storedSecurely = false + } + } else { + // No refresh token in this bundle — purge any previous secret. + try { + await refreshSecureStore.deleteSecret() + } catch { + // best-effort + } + } + } + const record: UserRecord = storedSecurely - ? { account } - : { account, fallbackToken: trimmed } + ? { + account, + accessTokenExpiresAt: bundle.accessTokenExpiresAt, + refreshTokenExpiresAt: bundle.refreshTokenExpiresAt, + } + : { + account, + fallbackToken: trimmedAccess, + fallbackRefreshToken: trimmedRefresh, + accessTokenExpiresAt: bundle.accessTokenExpiresAt, + refreshTokenExpiresAt: bundle.refreshTokenExpiresAt, + } try { await userRecords.upsert(record) @@ -59,6 +113,13 @@ export async function writeRecordWithKeyringFallback function fixture( opts: { keyring?: SingleSlot + /** Pre-seeded refresh-slot mock. Defaults to a fresh empty singleSlot. */ + refreshKeyring?: SingleSlot records?: Record> defaultId?: string | null factoryOpts?: Partial> } = {}, ) { const keyring = opts.keyring ?? buildSingleSlot() - mockedCreateSecureStore.mockReturnValue(keyring) + const refreshKeyring = opts.refreshKeyring ?? buildSingleSlot() + // Route by account slug: anything ending with `/refresh` lands in the + // refresh slot mock; everything else in the access slot mock. Keeps + // single-user tests' assertions about `keyring.deleteSpy` honest by + // isolating refresh-slot side effects. + mockedCreateSecureStore.mockImplementation(({ account }) => + account.endsWith('/refresh') ? refreshKeyring : keyring, + ) const harness = buildUserRecords() for (const [id, rec] of Object.entries(opts.records ?? {})) { harness.state.records.set(id, rec) @@ -61,6 +70,7 @@ function fixture( }) return { keyring, + refreshKeyring, store, state: harness.state, upsertSpy: harness.upsertSpy, @@ -79,11 +89,24 @@ describe('createKeyringTokenStore', () => { await store.set(account, 'tok_secret') expect(keyring.setSpy).toHaveBeenCalledWith('tok_secret') - expect(upsertSpy).toHaveBeenCalledWith({ account }) + expect(upsertSpy).toHaveBeenCalledWith({ + account, + accessTokenExpiresAt: undefined, + refreshTokenExpiresAt: undefined, + }) expect(state.defaultId).toBe('42') expect(store.getLastStorageResult()).toEqual({ storage: 'secure-store' }) - await expect(store.active()).resolves.toEqual({ token: 'tok_secret', account }) + await expect(store.active()).resolves.toEqual({ + token: 'tok_secret', + bundle: { + accessToken: 'tok_secret', + refreshToken: undefined, + accessTokenExpiresAt: undefined, + refreshTokenExpiresAt: undefined, + }, + account, + }) await store.clear() expect(keyring.deleteSpy).toHaveBeenCalledTimes(1) @@ -106,7 +129,16 @@ describe('createKeyringTokenStore', () => { 'system credential manager unavailable; token saved as plaintext in /tmp/fake/config.json', }) - await expect(store.active()).resolves.toEqual({ token: 'tok_plain', account }) + await expect(store.active()).resolves.toEqual({ + token: 'tok_plain', + bundle: { + accessToken: 'tok_plain', + refreshToken: undefined, + accessTokenExpiresAt: undefined, + refreshTokenExpiresAt: undefined, + }, + account, + }) expect(keyring.getSpy).not.toHaveBeenCalled() }) @@ -157,7 +189,16 @@ describe('createKeyringTokenStore', () => { const keyring = buildSingleSlot({ secret: 'tok' }) const { store } = fixture({ keyring, records: { '42': { account } } }) - await expect(store.active()).resolves.toEqual({ token: 'tok', account }) + await expect(store.active()).resolves.toEqual({ + token: 'tok', + bundle: { + accessToken: 'tok', + refreshToken: undefined, + accessTokenExpiresAt: undefined, + refreshTokenExpiresAt: undefined, + }, + account, + }) }) it('throws NO_ACCOUNT_SELECTED when multiple users exist and no default is set', async () => { diff --git a/src/auth/keyring/token-store.ts b/src/auth/keyring/token-store.ts index 3a67895..529dcc0 100644 --- a/src/auth/keyring/token-store.ts +++ b/src/auth/keyring/token-store.ts @@ -1,5 +1,5 @@ import { CliError } from '../../errors.js' -import type { AccountRef, AuthAccount, TokenStore } from '../types.js' +import type { AccountRef, AuthAccount, TokenBundle, TokenStore } from '../types.js' import { accountNotFoundError } from '../user-flag.js' import { writeRecordWithKeyringFallback } from './record-write.js' import { @@ -40,6 +40,8 @@ export type KeyringTokenStore = TokenStore( @@ -47,10 +49,19 @@ const DEFAULT_MATCH_ACCOUNT = ( ref: AccountRef, ): boolean => account.id === ref || account.label === ref +/** Sibling keyring slot for the refresh token. Kept here so every read/write site agrees on the wire format. */ +export function refreshAccountSlot(accessSlot: string): string { + return `${accessSlot}/refresh` +} + +function toBundle(credentials: string | TokenBundle): TokenBundle { + return typeof credentials === 'string' ? { accessToken: credentials } : credentials +} + /** * Multi-account `TokenStore` that keeps secrets in the OS credential manager - * and per-user metadata in the consumer's `UserRecordStore`. Falls back to a - * plaintext token on the user record when the keyring is unreachable (WSL + * and per-user metadata in the consumer's `UserRecordStore`. Falls back to + * plaintext tokens on the user record when the keyring is unreachable (WSL * without D-Bus, missing native binary, locked Keychain, …) so the CLI keeps * working at the cost of a visible warning. * @@ -60,17 +71,20 @@ const DEFAULT_MATCH_ACCOUNT = ( * keyring read is the only path. When the keyring is offline the token is * parked on the record and must be reachable on every subsequent read. * + * Refresh tokens live in a sibling keyring slot (`${account}/refresh`) so + * that `clear()` can drop them without parsing the access slot's contents. + * * Write order is keyring first, then `userRecords.upsert`. If the upsert - * fails after a successful keyring write, the keyring entry is rolled back - * via `deleteSecret()` to avoid orphan credentials for a user that cli-core - * never managed to record. + * fails after a successful keyring write, both keyring entries are rolled + * back via `deleteSecret()` to avoid orphan credentials for a user that + * cli-core never managed to record. * * Clear order is the inverse: record removal first (the source of truth that - * the rest of the CLI reads), then keyring delete. Any keyring delete - * failure after a successful removal is downgraded to a warning — the orphan - * secret is harmless because no record references it anymore, and surfacing - * the error would corrupt local state (record gone, but caller sees a thrown - * exception and assumes the clear failed). + * the rest of the CLI reads), then keyring delete (both slots). Any keyring + * delete failure after a successful removal is downgraded to a warning — the + * orphan secret is harmless because no record references it anymore, and + * surfacing the error would corrupt local state (record gone, but caller + * sees a thrown exception and assumes the clear failed). */ export function createKeyringTokenStore( options: CreateKeyringTokenStoreOptions, @@ -82,10 +96,17 @@ export function createKeyringTokenStore( let lastStorageResult: TokenStorageResult | undefined let lastClearResult: TokenStorageResult | undefined - function secureStoreFor(account: TAccount): SecureStore { + function accessStoreFor(account: TAccount): SecureStore { return createSecureStore({ serviceName, account: accountForUser(account.id) }) } + function refreshStoreFor(account: TAccount): SecureStore { + return createSecureStore({ + serviceName, + account: refreshAccountSlot(accountForUser(account.id)), + }) + } + type Snapshot = { records: UserRecord[]; defaultId: string | null } /** @@ -101,19 +122,6 @@ export function createKeyringTokenStore( return { records, defaultId } } - /** - * Resolve the snapshot target for a given ref (or the implicit default - * when `ref === undefined`). Two failure modes: - * - * - Multiple records match the `ref`: ambiguous (the default matcher - * includes `account.label`, and labels aren't guaranteed unique). - * Throws `NO_ACCOUNT_SELECTED` so the user picks a tighter ref instead - * of silently acting on whichever record `list()` returned first. - * - `ref === undefined`, no `defaultId` pinned, and more than one record - * exists. Same code — `setDefaultId` is best-effort during `set()`, - * so a typed failure here is the only non-misleading signal for "you - * have multiple accounts; pick one". - */ function resolveTarget( snapshot: Snapshot, ref: AccountRef | undefined, @@ -147,11 +155,60 @@ export function createKeyringTokenStore( } } + /** + * Read the access + refresh secrets for a record, preferring the + * plaintext fallbacks when present (mirrors the contract that the + * fallback is authoritative whenever it exists). Returns `null` for the + * "stored corrupted state" case so callers can throw `AUTH_STORE_READ_FAILED`. + */ + async function readBundleForRecord(record: UserRecord): Promise { + const fallbackAccess = record.fallbackToken?.trim() + if (fallbackAccess) { + return { + accessToken: fallbackAccess, + refreshToken: record.fallbackRefreshToken?.trim() || undefined, + accessTokenExpiresAt: record.accessTokenExpiresAt, + refreshTokenExpiresAt: record.refreshTokenExpiresAt, + } + } + + let rawAccess: string | null + try { + rawAccess = await accessStoreFor(record.account).getSecret() + } catch (error) { + if (error instanceof SecureStoreUnavailableError) { + throw new CliError( + 'AUTH_STORE_READ_FAILED', + `${SECURE_STORE_DESCRIPTION} unavailable; could not read stored token (${error.message})`, + ) + } + throw error + } + + const accessToken = rawAccess?.trim() + if (!accessToken) return null + + // Refresh slot read errors are downgraded — a missing or unreadable + // refresh token is not fatal (the access token alone is still usable + // until it expires). Surface as "no refresh present" so the caller + // sees a consistent shape. + let rawRefresh: string | null = null + try { + rawRefresh = await refreshStoreFor(record.account).getSecret() + } catch { + rawRefresh = null + } + + return { + accessToken, + refreshToken: rawRefresh?.trim() || undefined, + accessTokenExpiresAt: record.accessTokenExpiresAt, + refreshTokenExpiresAt: record.refreshTokenExpiresAt, + } + } + return { async active(ref) { - // Ref-only path skips `getDefaultId()` — `resolveTarget` never - // touches it when `ref` is supplied, so the extra read would be - // pure latency on every authenticated command. const snapshot: Snapshot = ref === undefined ? await readFullSnapshot() @@ -159,64 +216,36 @@ export function createKeyringTokenStore( const record = resolveTarget(snapshot, ref) if (!record) return null - const fallback = record.fallbackToken?.trim() - if (fallback) { - return { token: fallback, account: record.account } - } - - let raw: string | null - try { - raw = await secureStoreFor(record.account).getSecret() - } catch (error) { - // A matching record exists but the keyring can't be read. - // Surface a typed failure instead of returning `null`, which - // would otherwise be indistinguishable from "no stored - // account" and trigger `ACCOUNT_NOT_FOUND` on `--user `. - // `attachLogoutCommand` catches this specific code so an - // explicit `logout --user ` can still clear the matching - // record without needing the unreadable token. - if (error instanceof SecureStoreUnavailableError) { - throw new CliError( - 'AUTH_STORE_READ_FAILED', - `${SECURE_STORE_DESCRIPTION} unavailable; could not read stored token (${error.message})`, - ) - } - throw error - } - - const token = raw?.trim() - if (token) { - return { token, account: record.account } + const bundle = await readBundleForRecord(record) + if (!bundle) { + // Record exists, no `fallbackToken`, and the keyring slot is + // empty — credential deleted out-of-band. Corrupted state, + // not a miss. + throw new CliError( + 'AUTH_STORE_READ_FAILED', + `${SECURE_STORE_DESCRIPTION} returned no credential for the stored account; the keyring entry may have been removed externally.`, + ) } - // Record exists, no `fallbackToken`, and the keyring slot is - // empty — the credential was deleted out-of-band (user ran - // `security delete-generic-password`, `secret-tool clear`, …). - // This is corrupted state, not a miss; collapsing it to `null` - // would make `--user ` surface as `ACCOUNT_NOT_FOUND` and - // hide the real problem. - throw new CliError( - 'AUTH_STORE_READ_FAILED', - `${SECURE_STORE_DESCRIPTION} returned no credential for the stored account; the keyring entry may have been removed externally.`, - ) + return { token: bundle.accessToken, bundle, account: record.account } }, - async set(account, token) { + async set(account, credentials) { // Reset the cached storage result up front so a caller that // catches a thrown `set()` doesn't observe the previous call's // warning leaking through `getLastStorageResult`. lastStorageResult = undefined + const bundle = toBundle(credentials) const { storedSecurely } = await writeRecordWithKeyringFallback({ - secureStore: secureStoreFor(account), + secureStore: accessStoreFor(account), + refreshSecureStore: refreshStoreFor(account), userRecords, account, - token, + bundle, }) - // Best-effort default promotion: the record is already persisted, - // so a failure here must not turn into `AUTH_STORE_WRITE_FAILED` - // (the user can recover by setting a default later). + // Best-effort default promotion — same rationale as before. try { const existingDefault = await userRecords.getDefaultId() if (!existingDefault) { @@ -232,23 +261,14 @@ export function createKeyringTokenStore( }, async clear(ref) { - // Reset up front for the same reason as `set` — and so a no-op - // (no matching record) clears any stale result from a previous - // call. lastClearResult = undefined - // `clear` always needs the pinned default to decide whether to - // un-pin after the removal, so we can't skip `getDefaultId()` - // even on the explicit-ref path. const snapshot = await readFullSnapshot() const record = resolveTarget(snapshot, ref) if (!record) return await userRecords.remove(record.account.id) - // Default un-pinning is best-effort: a failure here must not - // skip the keyring delete below, otherwise we leave an - // unreachable orphan secret behind for the just-removed record. if (snapshot.defaultId === record.account.id) { try { await userRecords.setDefaultId(null) @@ -259,31 +279,31 @@ export function createKeyringTokenStore( const fallbackClear = fallbackResult('local auth state cleared in') - // Always attempt the keyring delete. Even when the record carried - // a `fallbackToken`, an older keyring entry may still be parked - // there from a prior keyring-online write that was later replaced - // by an offline-fallback write — skipping the delete would leak - // that orphan. Downgrade *any* failure to a warning: the record - // is already gone, so re-throwing would corrupt local state - // (caller sees an exception and assumes nothing was cleared, - // even though the next `account list` will show the user gone). + // Always attempt to delete both keyring slots. Either may have + // an orphan entry from a prior keyring-online write that was + // later replaced by an offline-fallback write. + let keyringClean = true try { - await secureStoreFor(record.account).deleteSecret() - lastClearResult = - record.fallbackToken !== undefined ? fallbackClear : { storage: 'secure-store' } + await accessStoreFor(record.account).deleteSecret() + } catch { + keyringClean = false + } + try { + await refreshStoreFor(record.account).deleteSecret() } catch { + keyringClean = false + } + + if (!keyringClean) { lastClearResult = fallbackClear + } else { + lastClearResult = + record.fallbackToken !== undefined ? fallbackClear : { storage: 'secure-store' } } }, async list() { const snapshot = await readFullSnapshot() - // Use `resolveTarget` to compute the *effective* default so the - // `isDefault` markers match what `active()` would resolve — that - // includes the implicit single-record case. `resolveTarget` can - // throw `NO_ACCOUNT_SELECTED`, which we want to swallow here - // (listing accounts is a diagnostic operation that must work - // even when no default is pinned). let implicitDefault: UserRecord | null = null try { implicitDefault = resolveTarget(snapshot, undefined) @@ -297,7 +317,6 @@ export function createKeyringTokenStore( }, async setDefault(ref) { - // Ref-only path — skip `getDefaultId()` like `active(ref)`. const snapshot: Snapshot = { records: await userRecords.list(), defaultId: null } const record = resolveTarget(snapshot, ref) if (!record) { @@ -313,5 +332,9 @@ export function createKeyringTokenStore( getLastClearResult() { return lastClearResult }, + + getRecordsLocation() { + return recordsLocation + }, } } diff --git a/src/auth/keyring/types.ts b/src/auth/keyring/types.ts index c873dcc..4fa593a 100644 --- a/src/auth/keyring/types.ts +++ b/src/auth/keyring/types.ts @@ -25,6 +25,16 @@ export type UserRecord = { * that would otherwise live in the OS credential manager. */ fallbackToken?: string + /** + * Plaintext refresh token, kept in step with `fallbackToken`: only ever + * present when the keyring was unavailable at write time. Same + * security-relevant treatment. + */ + fallbackRefreshToken?: string + /** Unix-epoch ms — when the persisted access token expires. */ + accessTokenExpiresAt?: number + /** Unix-epoch ms — when the persisted refresh token expires (rarely known). */ + refreshTokenExpiresAt?: number } /** diff --git a/src/auth/logout.test.ts b/src/auth/logout.test.ts index d16c060..e5daf6d 100644 --- a/src/auth/logout.test.ts +++ b/src/auth/logout.test.ts @@ -18,7 +18,8 @@ function buildStore( activeSpy: ReturnType clearSpy: ReturnType } { - const activeSpy = vi.fn(async () => initial) + const snapshot = initial && { ...initial, bundle: { accessToken: initial.token } } + const activeSpy = vi.fn(async () => snapshot) const clearSpy = vi.fn(async () => undefined) const store: TokenStore = { active: activeSpy, diff --git a/src/auth/providers/pkce.test.ts b/src/auth/providers/pkce.test.ts index 6bfebbf..dc12cc0 100644 --- a/src/auth/providers/pkce.test.ts +++ b/src/auth/providers/pkce.test.ts @@ -84,7 +84,7 @@ describe('createPkceProvider', () => { handshake: { codeVerifier: 'the-verifier', clientId: 'client-xyz' }, }) expect(result.accessToken).toBe('tok-1') - expect(result.expiresAt).toBeGreaterThan(Date.now()) + expect(result.accessTokenExpiresAt).toBeGreaterThan(Date.now()) const failing = createPkceProvider({ authorizeUrl: 'unused', @@ -144,4 +144,105 @@ describe('createPkceProvider', () => { }), ).rejects.toMatchObject({ code: 'AUTH_TOKEN_EXCHANGE_FAILED' }) }) + + describe('refreshToken', () => { + const account: Account = { id: '1', label: 'a', email: 'a@b' } + + it('POSTs grant_type=refresh_token with the stored refresh_token + client_id (no client_secret)', async () => { + let captured: { url: string; body: URLSearchParams } | null = null + const provider = createPkceProvider({ + authorizeUrl: 'unused', + tokenUrl: 'https://example.com/oauth/token', + clientId: 'client-xyz', + validate, + fetchImpl: ((url: string, init?: RequestInit) => { + captured = { + url, + body: new URLSearchParams((init?.body as string) ?? ''), + } + return Promise.resolve( + respond({ + access_token: 'new-access', + refresh_token: 'new-refresh', + expires_in: 3600, + }), + ) + }) as typeof fetch, + }) + const refresh = provider.refreshToken + expect(refresh).toBeTypeOf('function') + + const result = await refresh!({ + refreshToken: 'old-refresh', + account, + handshake: {}, + }) + + expect(captured!.url).toBe('https://example.com/oauth/token') + expect(captured!.body.get('grant_type')).toBe('refresh_token') + expect(captured!.body.get('refresh_token')).toBe('old-refresh') + expect(captured!.body.get('client_id')).toBe('client-xyz') + expect(captured!.body.has('client_secret')).toBe(false) + expect(captured!.body.has('code_verifier')).toBe(false) + expect(result.accessToken).toBe('new-access') + expect(result.refreshToken).toBe('new-refresh') + expect(result.accessTokenExpiresAt).toBeGreaterThan(Date.now()) + // Account passes through so the caller doesn't have to look it up again. + expect(result.account).toEqual(account) + }) + + it('throws AUTH_REFRESH_EXPIRED on 400 invalid_grant (refresh token revoked or expired)', async () => { + const provider = createPkceProvider({ + authorizeUrl: 'unused', + tokenUrl: 'https://example.com/oauth/token', + clientId: 'cid', + validate, + fetchImpl: (() => + Promise.resolve( + new Response(JSON.stringify({ error: 'invalid_grant' }), { status: 400 }), + )) as typeof fetch, + }) + + await expect( + provider.refreshToken!({ + refreshToken: 'rt', + account, + handshake: {}, + }), + ).rejects.toMatchObject({ code: 'AUTH_REFRESH_EXPIRED' }) + }) + + it('throws AUTH_REFRESH_TRANSIENT on non-invalid_grant errors (5xx, network)', async () => { + const transientStatus = createPkceProvider({ + authorizeUrl: 'unused', + tokenUrl: 'https://example.com/oauth/token', + clientId: 'cid', + validate, + fetchImpl: (() => + Promise.resolve(new Response('upstream', { status: 503 }))) as typeof fetch, + }) + await expect( + transientStatus.refreshToken!({ + refreshToken: 'rt', + account, + handshake: {}, + }), + ).rejects.toMatchObject({ code: 'AUTH_REFRESH_TRANSIENT' }) + + const network = createPkceProvider({ + authorizeUrl: 'unused', + tokenUrl: 'https://example.com/oauth/token', + clientId: 'cid', + validate, + fetchImpl: (() => Promise.reject(new Error('ECONNRESET'))) as typeof fetch, + }) + await expect( + network.refreshToken!({ + refreshToken: 'rt', + account, + handshake: {}, + }), + ).rejects.toMatchObject({ code: 'AUTH_REFRESH_TRANSIENT' }) + }) + }) }) diff --git a/src/auth/providers/pkce.ts b/src/auth/providers/pkce.ts index 2e641a7..5cb483a 100644 --- a/src/auth/providers/pkce.ts +++ b/src/auth/providers/pkce.ts @@ -7,6 +7,7 @@ import type { AuthorizeResult, ExchangeInput, ExchangeResult, + RefreshInput, ValidateInput, } from '../types.js' @@ -152,13 +153,91 @@ export function createPkceProvider( return { accessToken: payload.access_token, refreshToken: payload.refresh_token, - expiresAt: + accessTokenExpiresAt: typeof payload.expires_in === 'number' ? Date.now() + payload.expires_in * 1000 : undefined, } }, + async refreshToken(input: RefreshInput): Promise> { + // At refresh time there is no PKCE codeVerifier — the access + // token has already been issued, and the refresh grant doesn't + // re-prove the user. We do still need the clientId (public OAuth + // client) and tokenUrl, both resolved from the synthesised + // handshake on the stored account. + const flags = (input.handshake.flags as Record | undefined) ?? {} + const tokenUrl = resolve(options.tokenUrl, input.handshake, flags) + const clientId = resolve(options.clientId, input.handshake, flags) + + const body = new URLSearchParams({ + grant_type: 'refresh_token', + refresh_token: input.refreshToken, + client_id: clientId, + }) + + let response: Response + try { + response = await fetchImpl(tokenUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/x-www-form-urlencoded', + Accept: 'application/json', + }, + body: body.toString(), + }) + } catch (error) { + throw new CliError( + 'AUTH_REFRESH_TRANSIENT', + `Refresh request failed: ${getErrorMessage(error)}`, + ) + } + + if (!response.ok) { + const detail = await safeReadText(response) + // 400/401 with `invalid_grant` means the refresh token is + // revoked or expired and a forced re-login is the only + // recovery. Other statuses are treated as transient (server + // hiccup) — the caller may retry on the next request. + const isInvalidGrant = + (response.status === 400 || response.status === 401) && + /invalid_grant/i.test(detail ?? '') + throw new CliError( + isInvalidGrant ? 'AUTH_REFRESH_EXPIRED' : 'AUTH_REFRESH_TRANSIENT', + `Refresh token endpoint returned HTTP ${response.status}.`, + detail ? { hints: [detail] } : {}, + ) + } + + let payload: { access_token?: string; refresh_token?: string; expires_in?: number } + try { + payload = (await response.json()) as typeof payload + } catch (error) { + throw new CliError( + 'AUTH_REFRESH_TRANSIENT', + `Refresh endpoint returned non-JSON response: ${getErrorMessage(error)}`, + ) + } + if (!payload.access_token) { + throw new CliError( + 'AUTH_REFRESH_TRANSIENT', + 'Refresh endpoint response missing access_token.', + ) + } + return { + accessToken: payload.access_token, + // Some OAuth servers rotate refresh tokens; others don't. We + // can't tell from one response, so persist whatever comes + // back. The caller's `set()` will replace the stored value. + refreshToken: payload.refresh_token, + accessTokenExpiresAt: + typeof payload.expires_in === 'number' + ? Date.now() + payload.expires_in * 1000 + : undefined, + account: input.account, + } + }, + validateToken: options.validate, } } diff --git a/src/auth/refresh.test.ts b/src/auth/refresh.test.ts new file mode 100644 index 0000000..15dfbfa --- /dev/null +++ b/src/auth/refresh.test.ts @@ -0,0 +1,191 @@ +import { mkdtempSync, rmSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' + +import { refreshAccessToken } from './refresh.js' +import type { AuthProvider, TokenBundle, TokenStore } from './types.js' + +type Account = { id: string; label?: string; email: string } +const account: Account = { id: '1', label: 'a', email: 'a@b' } + +/** In-memory store that satisfies the TokenStore contract + exposes recordsLocation. */ +function buildStore(initial: TokenBundle | null) { + const state: { bundle: TokenBundle | null; setCalls: TokenBundle[] } = { + bundle: initial, + setCalls: [], + } + let recordsLocation = '/tmp/test-records' + const store: TokenStore & { getRecordsLocation(): string } = { + async active() { + return state.bundle + ? { token: state.bundle.accessToken, bundle: state.bundle, account } + : null + }, + async set(_account, credentials) { + const bundle = + typeof credentials === 'string' ? { accessToken: credentials } : credentials + state.bundle = bundle + state.setCalls.push(bundle) + }, + async clear() { + state.bundle = null + }, + async list() { + return state.bundle ? [{ account, isDefault: true }] : [] + }, + async setDefault() {}, + getRecordsLocation: () => recordsLocation, + } + return { + store, + state, + setRecordsLocation(path: string) { + recordsLocation = path + }, + } +} + +function refreshingProvider( + impl?: (input: { + refreshToken: string + account: Account + }) => Promise<{ accessToken: string; refreshToken?: string; accessTokenExpiresAt?: number }>, +): AuthProvider & { refreshSpy: ReturnType } { + const refreshSpy = vi.fn( + impl ?? + (async () => ({ + accessToken: 'new-access', + refreshToken: 'new-refresh', + accessTokenExpiresAt: Date.now() + 3_600_000, + })), + ) + const provider: AuthProvider = { + async authorize() { + return { authorizeUrl: '', handshake: {} } + }, + async exchangeCode() { + return { accessToken: 'x' } + }, + async validateToken() { + return account + }, + refreshToken: async (input) => ({ ...(await refreshSpy(input)), account: input.account }), + } + return Object.assign(provider, { refreshSpy }) +} + +describe('refreshAccessToken', () => { + let tempDir: string + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), 'cli-core-refresh-')) + }) + afterEach(() => { + rmSync(tempDir, { recursive: true, force: true }) + }) + + it('returns the active snapshot unchanged when the access token is well within its skew window', async () => { + const { store, state } = buildStore({ + accessToken: 'still-good', + refreshToken: 'rt', + accessTokenExpiresAt: Date.now() + 600_000, + }) + const provider = refreshingProvider() + + const result = await refreshAccessToken({ store, provider }) + + expect(result.token).toBe('still-good') + expect(provider.refreshSpy).not.toHaveBeenCalled() + expect(state.setCalls).toHaveLength(0) + }) + + it('refreshes when access token is past the skew window and persists the new bundle', async () => { + const { store, state, setRecordsLocation } = buildStore({ + accessToken: 'expired', + refreshToken: 'rt-old', + accessTokenExpiresAt: Date.now() - 1000, + }) + setRecordsLocation(join(tempDir, 'records.json')) + const provider = refreshingProvider() + + const result = await refreshAccessToken({ store, provider }) + + expect(provider.refreshSpy).toHaveBeenCalledWith( + expect.objectContaining({ refreshToken: 'rt-old', account }), + ) + expect(result.token).toBe('new-access') + expect(state.bundle?.refreshToken).toBe('new-refresh') + }) + + it('forces a refresh regardless of expiry when force: true (reactive 401 path)', async () => { + const { store, state } = buildStore({ + accessToken: 'rejected-by-server', + refreshToken: 'rt', + // Expiry hasn't been hit yet — but server already 401'd. + accessTokenExpiresAt: Date.now() + 600_000, + }) + const provider = refreshingProvider() + + const result = await refreshAccessToken({ store, provider, force: true }) + + expect(provider.refreshSpy).toHaveBeenCalledTimes(1) + expect(result.token).toBe('new-access') + expect(state.setCalls).toHaveLength(1) + }) + + it('throws AUTH_REFRESH_UNAVAILABLE when no refresh token is stored', async () => { + const { store } = buildStore({ + accessToken: 'expired', + accessTokenExpiresAt: Date.now() - 1000, + }) + const provider = refreshingProvider() + + await expect(refreshAccessToken({ store, provider })).rejects.toMatchObject({ + code: 'AUTH_REFRESH_UNAVAILABLE', + }) + }) + + it('throws AUTH_REFRESH_UNAVAILABLE when the provider does not implement refreshToken', async () => { + const { store } = buildStore({ + accessToken: 'expired', + refreshToken: 'rt', + accessTokenExpiresAt: Date.now() - 1000, + }) + const refreshlessProvider: AuthProvider = { + authorize: async () => ({ authorizeUrl: '', handshake: {} }), + exchangeCode: async () => ({ accessToken: 'x' }), + validateToken: async () => account, + } + + await expect( + refreshAccessToken({ store, provider: refreshlessProvider }), + ).rejects.toMatchObject({ code: 'AUTH_REFRESH_UNAVAILABLE' }) + }) + + it('throws NOT_AUTHENTICATED when the store has no active snapshot', async () => { + const { store } = buildStore(null) + const provider = refreshingProvider() + + await expect(refreshAccessToken({ store, provider })).rejects.toMatchObject({ + code: 'NOT_AUTHENTICATED', + }) + }) + + it('keeps the old refresh token when the server response omits a new one', async () => { + const { store, state } = buildStore({ + accessToken: 'expired', + refreshToken: 'keep-me', + accessTokenExpiresAt: Date.now() - 1000, + }) + const provider = refreshingProvider(async () => ({ + accessToken: 'new-access', + // no refresh_token in response — preserve the stored one + accessTokenExpiresAt: Date.now() + 3_600_000, + })) + + await refreshAccessToken({ store, provider }) + + expect(state.bundle?.refreshToken).toBe('keep-me') + }) +}) diff --git a/src/auth/refresh.ts b/src/auth/refresh.ts new file mode 100644 index 0000000..7d3e8e8 --- /dev/null +++ b/src/auth/refresh.ts @@ -0,0 +1,183 @@ +import { closeSync, openSync, unlinkSync } from 'node:fs' +import { setTimeout as sleep } from 'node:timers/promises' +import { CliError } from '../errors.js' +import type { AccountRef, AuthAccount, AuthProvider, TokenBundle, TokenStore } from './types.js' + +/** Default skew window: refresh when fewer than 60s remain on the access token. */ +const DEFAULT_SKEW_MS = 60_000 +/** Default file-lock acquisition window. Two seconds covers the median refresh round-trip. */ +const DEFAULT_LOCK_TIMEOUT_MS = 2_000 +const LOCK_POLL_INTERVAL_MS = 100 + +export type RefreshAccessTokenOptions = { + store: TokenStore + provider: AuthProvider + /** Target a stored account by ref (defaults to the active default). */ + ref?: AccountRef + /** + * Build the handshake the provider's `refreshToken` will see. Defaults + * to `{ ...account, flags: {} }`. Outline needs the `baseUrl` / + * `oauthClientId` on the account — both flow through automatically. + */ + buildHandshake?: (account: TAccount) => Record + /** + * Refresh when fewer than this many ms remain on the access token's + * `expiresAt`. Default 60s. Set to `0` for "only refresh when fully + * expired"; set to `Infinity` to force a refresh whenever a refresh + * token exists. + */ + skewMs?: number + /** + * Force a refresh regardless of expiry — used by the reactive 401-retry + * path where the server has already rejected the access token. Throws + * `AUTH_REFRESH_UNAVAILABLE` when no refresh token is stored. + */ + force?: boolean + /** + * Sidecar lock file path. Defaults to `${store.getRecordsLocation()}.refresh.lock` + * when the store is a `KeyringTokenStore` exposing `getRecordsLocation`, + * otherwise no file lock is used (single-process safety only). + */ + lockPath?: string + /** Lock acquisition window. Default 2_000ms. */ + lockTimeoutMs?: number +} + +/** + * Read the active credentials; refresh them when the access token is past + * its skew window (proactive) or `force` is set (reactive 401 path). Persists + * the new bundle and returns it. When refresh isn't needed the active bundle + * is returned unchanged. + * + * Concurrency: when a sidecar lock file path is available, the helper acquires + * it before refreshing. On contention it waits up to `lockTimeoutMs`, then + * re-reads the store — if another process has already refreshed, the fresh + * bundle is returned without firing a duplicate POST. If the lock can't be + * acquired the refresh proceeds anyway (worst case: one extra token rotation). + */ +export async function refreshAccessToken( + options: RefreshAccessTokenOptions, +): Promise<{ token: string; bundle: TokenBundle; account: TAccount }> { + const skewMs = options.skewMs ?? DEFAULT_SKEW_MS + const lockTimeoutMs = options.lockTimeoutMs ?? DEFAULT_LOCK_TIMEOUT_MS + + const snapshot = await options.store.active(options.ref) + if (!snapshot) { + throw new CliError('NOT_AUTHENTICATED', 'No stored credentials to refresh.') + } + // Synthesise a minimal bundle for stores that don't track refresh state — + // they'll fall through to `AUTH_REFRESH_UNAVAILABLE` below since + // `refreshToken` is missing, which is the right behaviour. + const bundle: TokenBundle = snapshot.bundle ?? { accessToken: snapshot.token } + const resolvedSnapshot = { token: snapshot.token, bundle, account: snapshot.account } + if (!shouldRefresh(bundle, skewMs, options.force ?? false)) { + return resolvedSnapshot + } + if (!bundle.refreshToken) { + throw new CliError( + 'AUTH_REFRESH_UNAVAILABLE', + 'Access token expired and no refresh token is stored.', + ) + } + if (!options.provider.refreshToken) { + throw new CliError( + 'AUTH_REFRESH_UNAVAILABLE', + "Auth provider does not implement 'refreshToken'.", + ) + } + + const lockPath = options.lockPath ?? deriveLockPath(options.store) + const lock = lockPath ? await acquireFileLock(lockPath, lockTimeoutMs) : null + + try { + // Re-read inside the lock: another process may have refreshed + // already, and re-using its result avoids two refreshes racing the + // server's rotation logic (the loser's refresh token would be void). + if (lock) { + const fresh = await options.store.active(options.ref) + if (fresh) { + const freshBundle = fresh.bundle ?? { accessToken: fresh.token } + if (!shouldRefresh(freshBundle, skewMs, options.force ?? false)) { + return { token: fresh.token, bundle: freshBundle, account: fresh.account } + } + } + } + + const buildHandshake = + options.buildHandshake ?? + ((account: TAccount): Record => ({ ...account, flags: {} })) + + const exchange = await options.provider.refreshToken({ + refreshToken: bundle.refreshToken, + account: snapshot.account, + handshake: buildHandshake(snapshot.account), + }) + + const nextBundle: TokenBundle = { + accessToken: exchange.accessToken, + // Rotate when the server returns one, keep the previous when it + // doesn't. Same logic the spec calls out for refresh rotation. + refreshToken: exchange.refreshToken ?? bundle.refreshToken, + accessTokenExpiresAt: exchange.accessTokenExpiresAt, + refreshTokenExpiresAt: exchange.refreshTokenExpiresAt ?? bundle.refreshTokenExpiresAt, + } + + await options.store.set(snapshot.account, nextBundle) + return { token: nextBundle.accessToken, bundle: nextBundle, account: snapshot.account } + } finally { + if (lock) lock.release() + } +} + +function shouldRefresh(bundle: TokenBundle, skewMs: number, force: boolean): boolean { + if (force) return true + if (typeof bundle.accessTokenExpiresAt !== 'number') return false + return Date.now() > bundle.accessTokenExpiresAt - skewMs +} + +function deriveLockPath( + store: TokenStore, +): string | undefined { + const candidate = store as { getRecordsLocation?: () => string } + if (typeof candidate.getRecordsLocation === 'function') { + return `${candidate.getRecordsLocation()}.refresh.lock` + } + return undefined +} + +type FileLock = { release: () => void } + +/** + * Hand-rolled lock via `O_EXCL` open of a sidecar file. On contention the + * caller waits up to `timeoutMs` for the holder to release, polling every + * `LOCK_POLL_INTERVAL_MS`. Returns `null` rather than throwing on timeout — + * the caller should still attempt the refresh (worst case is duplicate POST, + * recoverable). Stale locks from crashed processes are not detected; the + * trade-off matches the cost of correctness vs. complexity for a CLI. + */ +async function acquireFileLock(path: string, timeoutMs: number): Promise { + const deadline = Date.now() + timeoutMs + while (true) { + try { + const fd = openSync(path, 'wx') + closeSync(fd) + return { + release() { + try { + unlinkSync(path) + } catch { + // best-effort: another process may have force-cleaned + } + }, + } + } catch (error) { + if ((error as NodeJS.ErrnoException).code !== 'EEXIST') { + // ENOENT on a missing dir is the only other realistic case; + // bubble up so the caller proceeds without the lock. + return null + } + if (Date.now() >= deadline) return null + await sleep(LOCK_POLL_INTERVAL_MS) + } + } +} diff --git a/src/auth/status.test.ts b/src/auth/status.test.ts index 9af64ee..8da5ffd 100644 --- a/src/auth/status.test.ts +++ b/src/auth/status.test.ts @@ -16,7 +16,8 @@ function buildStore( store: TokenStore activeSpy: ReturnType } { - const activeSpy = vi.fn(async () => initial) + const snapshot = initial && { ...initial, bundle: { accessToken: initial.token } } + const activeSpy = vi.fn(async () => snapshot) const store: TokenStore = { active: activeSpy, set: vi.fn(), @@ -131,6 +132,7 @@ describe('attachStatusCommand', () => { expect(fetchLive).toHaveBeenCalledWith({ account, token: 'tok', + bundle: { accessToken: 'tok' }, view: { json: false, ndjson: false }, flags: {}, }) diff --git a/src/auth/status.ts b/src/auth/status.ts index c27ff09..bd109a1 100644 --- a/src/auth/status.ts +++ b/src/auth/status.ts @@ -2,7 +2,7 @@ import type { Command } from 'commander' import { CliError } from '../errors.js' import { formatJson, formatNdjson } from '../json.js' import type { ViewOptions } from '../options.js' -import type { AuthAccount, TokenStore } from './types.js' +import type { AuthAccount, TokenBundle, TokenStore } from './types.js' import { attachUserFlag, extractUserRef, requireSnapshotForRef } from './user-flag.js' export type AttachStatusContext = { @@ -25,6 +25,8 @@ export type AttachStatusCommandOptions flags: Record @@ -84,6 +86,10 @@ export function attachStatusCommand( ? await options.fetchLive({ account: snapshot.account, token: snapshot.token, + // Synthesise a minimal bundle for stores that don't track + // refresh / expiry, so consumers can always destructure + // `bundle.accessToken` without a null-check. + bundle: snapshot.bundle ?? { accessToken: snapshot.token }, view, flags, }) diff --git a/src/auth/token-view.test.ts b/src/auth/token-view.test.ts index 2907151..2cfe52a 100644 --- a/src/auth/token-view.test.ts +++ b/src/auth/token-view.test.ts @@ -15,7 +15,8 @@ function buildStore( store: TokenStore activeSpy: ReturnType } { - const activeSpy = vi.fn(async () => initial) + const snapshot = initial && { ...initial, bundle: { accessToken: initial.token } } + const activeSpy = vi.fn(async () => snapshot) const store: TokenStore = { active: activeSpy, set: vi.fn(), diff --git a/src/auth/types.ts b/src/auth/types.ts index 4949389..b8fb220 100644 --- a/src/auth/types.ts +++ b/src/auth/types.ts @@ -46,18 +46,48 @@ export type ExchangeInput = { export type ExchangeResult = { accessToken: string refreshToken?: string - /** Unix-epoch ms. cli-core does not refresh today. */ - expiresAt?: number + /** Unix-epoch ms when the access token expires. */ + accessTokenExpiresAt?: number + /** Unix-epoch ms when the refresh token expires (rarely advertised). */ + refreshTokenExpiresAt?: number /** Set when the token endpoint already identifies the account; skips `validateToken`. */ account?: TAccount } +/** + * Persisted credential triple. `refreshToken` and `accessTokenExpiresAt` + * are present only when the token endpoint returned them at login and the + * provider implements `refreshToken`. Read by `refreshAccessToken` to + * decide whether a proactive refresh is needed. + */ +export type TokenBundle = { + accessToken: string + refreshToken?: string + /** Unix-epoch ms. */ + accessTokenExpiresAt?: number + /** Unix-epoch ms. */ + refreshTokenExpiresAt?: number +} + export type ValidateInput = { token: string /** Same shape as `ExchangeInput.handshake` — carries the folded `flags` / `readOnly`. */ handshake: Record } +export type RefreshInput = { + refreshToken: string + /** + * Synthesised at refresh time from the stored account — providers that + * need a base URL or client_id at refresh should read it from here + * (mirrors the `authorize`/`exchangeCode` handshake but without PKCE + * state, which has long since been discarded). + */ + handshake: Record + /** The account whose token is being refreshed. */ + account: TAccount +} + /** * Strategy interface every auth method implements. cli-core ships * `createPkceProvider` for the standard public-client PKCE flow; bespoke @@ -70,6 +100,13 @@ export type AuthProvider = { exchangeCode(input: ExchangeInput): Promise> /** Skipped when `exchangeCode` already returned an `account`. */ validateToken(input: ValidateInput): Promise + /** + * Optional. Exchange a refresh token for a fresh access token. Providers + * whose servers don't issue refresh tokens (Twist, Todoist today) omit + * this — `refreshAccessToken` will surface `AUTH_REFRESH_UNAVAILABLE` + * when called against such a provider. + */ + refreshToken?(input: RefreshInput): Promise> } /** Opaque account selector. Stores own the matching rule (id, email, label, …). */ @@ -90,9 +127,25 @@ export type TokenStore = { * explicit-ref path and proceeds with `clear(ref)`; `attachStatusCommand` * and `attachTokenViewCommand` propagate it. */ - active(ref?: AccountRef): Promise<{ token: string; account: TAccount } | null> - /** Persist `token` for `account`, replacing any previous entry. Throw `CliError` for typed failures; other thrown values become `AUTH_STORE_WRITE_FAILED`. */ - set(account: TAccount, token: string): Promise + /** + * `bundle` is optional so consumers implementing their own `TokenStore` + * against a backend that doesn't track refresh tokens or expiry can + * keep returning just `{ token, account }`. cli-core's built-in + * `createKeyringTokenStore` always supplies it, and helpers that need + * the extras (`refreshAccessToken`, `status` rendering) fall back to a + * synthesised `{ accessToken: token }` when it's absent. + */ + active( + ref?: AccountRef, + ): Promise<{ token: string; bundle?: TokenBundle; account: TAccount } | null> + /** + * Persist credentials for `account`, replacing any previous entry. Accepts + * either a bare access-token string (for providers without refresh) or a + * full `TokenBundle` (access + optional refresh + expiry). Throw + * `CliError` for typed failures; other thrown values become + * `AUTH_STORE_WRITE_FAILED`. + */ + set(account: TAccount, credentials: string | TokenBundle): Promise /** Remove a stored credential. No-op when `ref` doesn't match. */ clear(ref?: AccountRef): Promise /** Every stored account with a default marker. */ diff --git a/src/auth/user-flag.test.ts b/src/auth/user-flag.test.ts index a2c902c..359ba29 100644 --- a/src/auth/user-flag.test.ts +++ b/src/auth/user-flag.test.ts @@ -12,8 +12,9 @@ const account: Account = { id: '1', label: 'me', email: 'a@b' } function buildStore( initial: { token: string; account: Account } | null = { token: 'tok', account }, ): TokenStore { + const snapshot = initial && { ...initial, bundle: { accessToken: initial.token } } return { - active: vi.fn(async () => initial), + active: vi.fn(async () => snapshot), set: vi.fn(), clear: vi.fn(), list: vi.fn(async () => (initial ? [{ account: initial.account, isDefault: true }] : [])), @@ -62,7 +63,7 @@ describe('requireSnapshotForRef', () => { const snapshot = await requireSnapshotForRef(store, 'alice') - expect(snapshot).toEqual({ token: 'tok', account }) + expect(snapshot).toEqual({ token: 'tok', bundle: { accessToken: 'tok' }, account }) expect(store.active).toHaveBeenCalledWith('alice') }) diff --git a/src/auth/user-flag.ts b/src/auth/user-flag.ts index 23f63fb..70065d0 100644 --- a/src/auth/user-flag.ts +++ b/src/auth/user-flag.ts @@ -1,6 +1,6 @@ import type { Command } from 'commander' import { CliError } from '../errors.js' -import type { AccountRef, AuthAccount, TokenStore } from './types.js' +import type { AccountRef, AuthAccount, TokenBundle, TokenStore } from './types.js' // Shared `--user ` wiring + snapshot-or-throw helper so the three auth // attachers can't drift on flag wording or miss semantics. Internal — not @@ -31,7 +31,7 @@ export function accountNotFoundError(ref: AccountRef): CliError { export async function requireSnapshotForRef( store: TokenStore, ref: AccountRef | undefined, -): Promise<{ token: string; account: TAccount } | null> { +): Promise<{ token: string; bundle?: TokenBundle; account: TAccount } | null> { const snapshot = await store.active(ref) if (ref !== undefined && snapshot === null) { throw accountNotFoundError(ref) From 2a9295677d5ed36ea35485f3a06c1d4be65e9d1c Mon Sep 17 00:00:00 2001 From: lmjabreu Date: Mon, 18 May 2026 23:26:19 +0100 Subject: [PATCH 02/10] fix(auth): address doistbot review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Restore TokenStore.set(account, token: string) signature; add separate optional setBundle?(account, bundle) so custom TokenStore implementations don't need to widen their existing set() to accept TokenBundle. cli-core helpers call setBundle when available, otherwise set(token). - Revert the ExchangeResult.expiresAt → accessTokenExpiresAt rename. The old field stays, plus a new optional refreshTokenExpiresAt. Custom providers returning expiresAt keep working. - Fix concurrency bug on the force=true (reactive 401) path: if the post-lock re-read shows the access token rotated, return the fresh snapshot instead of POSTing refresh with a now-stale refresh token (which would yield invalid_grant on servers with refresh rotation). - Require explicit lockPath from the caller; drop the implicit cast on getRecordsLocation. Avoids silently failing to acquire locks when consumers pass `~`-prefixed recordsLocation per the doc. - Add hasRefreshToken bit on UserRecord; gate the refresh-slot keyring read on it so accounts without a refresh token don't pay a second IPC round-trip per `active()` call. Parallel-fetch access + refresh secrets via Promise.all. - record-write: roll back the access slot when the refresh-slot write throws (any error class) so we never leave an orphan access credential with no matching user record. A non-keyring refresh-slot setSecret rethrows; a keyring-unavailable refresh-slot rolls back to the fallback record so both tokens travel together. - record-write: treat a refresh-slot delete failure (when writing a no-refresh bundle) as a write failure — otherwise a stale refresh token from an earlier login can survive and be served by active(). - Extract shared postToTokenEndpoint helper in PKCE provider so exchangeCode and refreshToken share fetch + status handling + JSON parsing instead of duplicating it. - refresh.ts: use requireSnapshotForRef instead of reimplementing snapshot lookup. - migrate.ts: use the exported refreshAccountSlot helper instead of manually interpolating the suffix. - runOAuthFlow: call setBundle when the store implements it (preserves refresh/expiry on login) and fall back to set(accessToken) otherwise. - Update README "What's in it" + add a Silent OAuth refresh section covering refreshAccessToken, TokenBundle, lockPath, and the typed AUTH_REFRESH_* error codes. Tests: - flow.test fake store now keeps the full TokenBundle (and implements setBundle) so refresh/expiry persistence is asserted, not silently discarded. New test asserts the bundle round-trip end-to-end through runOAuthFlow. - refresh.test: drop the hardcoded /tmp records path; tests now use the tempDir-scoped lockPath. New tests cover the force+concurrency scenario (losing process must return the rotated snapshot without POSTing) and the non-force lock contention path. Removed signature- restating comment. - record-write.test: new tests for partial-offline (access succeeds, refresh slot offline → both fall back to record), refresh-slot setSecret non-keyring error (rollback + rethrow), refresh-slot delete failure on no-refresh bundle. - token-store.test: new bundle round-trip test (setBundle → active → clear with refresh in the sibling slot) and hot-path optimisation test asserting the refresh slot isn't read when hasRefreshToken=false. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 69 +++++++-- src/auth/flow.test.ts | 59 ++++++-- src/auth/flow.ts | 20 ++- src/auth/keyring/migrate.ts | 3 +- src/auth/keyring/record-write.test.ts | 81 ++++++++++ src/auth/keyring/record-write.ts | 62 +++++--- src/auth/keyring/token-store.test.ts | 52 +++++++ src/auth/keyring/token-store.ts | 161 ++++++++------------ src/auth/keyring/types.ts | 10 ++ src/auth/providers/pkce.test.ts | 4 +- src/auth/providers/pkce.ts | 210 +++++++++++++------------- src/auth/refresh.test.ts | 138 +++++++++++++---- src/auth/refresh.ts | 112 ++++++++------ src/auth/types.ts | 19 ++- 14 files changed, 666 insertions(+), 334 deletions(-) diff --git a/README.md b/README.md index 8c3b595..82215f3 100644 --- a/README.md +++ b/README.md @@ -12,20 +12,20 @@ npm install @doist/cli-core ## What's in it -| Module | Key exports | Purpose | -| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `auth` (subpath) | `attachLoginCommand`, `attachLogoutCommand`, `attachStatusCommand`, `attachTokenViewCommand`, `runOAuthFlow`, `createPkceProvider`, `createSecureStore`, `createKeyringTokenStore`, `migrateLegacyAuth`, PKCE helpers, `AuthProvider` / `TokenStore` / `AccountRef` / `SecureStore` / `UserRecordStore` types, `AttachLogoutRevokeContext` | OAuth runtime plus the Commander attachers for ` [auth] login` / `logout` / `status` / `token`. `attachLogoutCommand` accepts an optional `revokeToken` hook for best-effort server-side token revocation. Ships the standard public-client PKCE flow (`createPkceProvider`), a thin cross-platform OS-keyring wrapper (`createSecureStore`), and a multi-account keyring-backed `TokenStore` (`createKeyringTokenStore`) that stores secrets in the OS credential manager and degrades to plaintext in the consumer's config when the keyring is unavailable (WSL/headless Linux/containers). `AuthProvider` and `TokenStore` remain the escape hatches for DCR or fully bespoke backends. `logout` / `status` / `token` always attach `--user ` and thread the parsed ref to `store.active(ref)` (and `store.clear(ref)` on `logout`). `commander` (when using the attachers), `open` (browser launch), and `@napi-rs/keyring` (when using `createSecureStore` or the keyring `TokenStore`) are optional peer/optional deps. | -| `commands` (subpath) | `registerChangelogCommand`, `registerUpdateCommand` (+ semver helpers) | Commander wiring for cli-core's standard commands (e.g. ` changelog`, ` update`, ` update switch`). **Requires** `commander` as an optional peer-dep. | -| `config` | `getConfigPath`, `readConfig`, `readConfigStrict`, `writeConfig`, `updateConfig`, `CoreConfig`, `UpdateChannel` | Read / write a per-CLI JSON config file with typed error codes; `CoreConfig` is the shape of fields cli-core itself owns (extend it for per-CLI fields). | -| `empty` | `printEmpty` | Print an empty-state message gated on `--json` / `--ndjson` so machine consumers never see human strings on stdout. | -| `errors` | `CliError` | Typed CLI error class with `code` and exit-code mapping. | -| `global-args` | `parseGlobalArgs`, `stripUserFlag`, `createGlobalArgsStore`, `createAccessibleGate`, `createSpinnerGate`, `getProgressJsonlPath`, `isProgressJsonlEnabled` | Parse well-known global flags (`--json`, `--ndjson`, `--quiet`, `--verbose`, `--accessible`, `--no-spinner`, `--progress-jsonl`, `--user `) and derive predicates from them. `stripUserFlag` removes `--user` tokens from argv so the cleaned array can be forwarded to Commander when the flag has no root-program attachment. | -| `json` | `formatJson`, `formatNdjson` | Stable JSON / newline-delimited JSON formatting for stdout. | -| `markdown` (subpath) | `preloadMarkdown`, `renderMarkdown`, `TerminalRendererOptions` | Lazy-init terminal markdown renderer. **Requires** `marked` and `marked-terminal-renderer` as peer-deps — install only if your CLI uses this subpath. | -| `options` | `ViewOptions` | Type contract for `{ json?, ndjson? }` per-command options that machine-output gates derive from. | -| `spinner` | `createSpinner` | Loading spinner factory wrapping `yocto-spinner` with disable gates. | -| `terminal` | `isCI`, `isStderrTTY`, `isStdinTTY`, `isStdoutTTY` | TTY / CI detection helpers. | -| `testing` (subpath) | `describeEmptyMachineOutput` | Vitest helpers reusable by consuming CLIs (e.g. parametrised empty-state suite covering `--json` / `--ndjson` / human modes). | +| Module | Key exports | Purpose | +| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `auth` (subpath) | `attachLoginCommand`, `attachLogoutCommand`, `attachStatusCommand`, `attachTokenViewCommand`, `runOAuthFlow`, `refreshAccessToken`, `createPkceProvider`, `createSecureStore`, `createKeyringTokenStore`, `migrateLegacyAuth`, PKCE helpers, `AuthProvider` / `TokenStore` / `TokenBundle` / `RefreshInput` / `AccountRef` / `SecureStore` / `UserRecordStore` types, `AttachLogoutRevokeContext` | OAuth runtime plus the Commander attachers for ` [auth] login` / `logout` / `status` / `token`. `attachLogoutCommand` accepts an optional `revokeToken` hook for best-effort server-side token revocation. Ships the standard public-client PKCE flow (`createPkceProvider` — implements both `exchangeCode` and the optional `refreshToken` grant), a thin cross-platform OS-keyring wrapper (`createSecureStore`), and a multi-account keyring-backed `TokenStore` (`createKeyringTokenStore`) that stores access + refresh secrets in the OS credential manager (refresh in a sibling slot, gated by a `hasRefreshToken` bit on the user record so accounts without one don't pay an extra keyring round-trip per command) and degrades to plaintext in the consumer's config when the keyring is unavailable (WSL/headless Linux/containers). `refreshAccessToken` drives proactive (skew window) and reactive (`force: true`, for 401 retries) silent re-auth, with optional sidecar `lockPath` for cross-process safety. `AuthProvider` and `TokenStore` remain the escape hatches for DCR or fully bespoke backends. `logout` / `status` / `token` always attach `--user ` and thread the parsed ref to `store.active(ref)` (and `store.clear(ref)` on `logout`). `commander` (when using the attachers), `open` (browser launch), and `@napi-rs/keyring` (when using `createSecureStore` or the keyring `TokenStore`) are optional peer/optional deps. | +| `commands` (subpath) | `registerChangelogCommand`, `registerUpdateCommand` (+ semver helpers) | Commander wiring for cli-core's standard commands (e.g. ` changelog`, ` update`, ` update switch`). **Requires** `commander` as an optional peer-dep. | +| `config` | `getConfigPath`, `readConfig`, `readConfigStrict`, `writeConfig`, `updateConfig`, `CoreConfig`, `UpdateChannel` | Read / write a per-CLI JSON config file with typed error codes; `CoreConfig` is the shape of fields cli-core itself owns (extend it for per-CLI fields). | +| `empty` | `printEmpty` | Print an empty-state message gated on `--json` / `--ndjson` so machine consumers never see human strings on stdout. | +| `errors` | `CliError` | Typed CLI error class with `code` and exit-code mapping. | +| `global-args` | `parseGlobalArgs`, `stripUserFlag`, `createGlobalArgsStore`, `createAccessibleGate`, `createSpinnerGate`, `getProgressJsonlPath`, `isProgressJsonlEnabled` | Parse well-known global flags (`--json`, `--ndjson`, `--quiet`, `--verbose`, `--accessible`, `--no-spinner`, `--progress-jsonl`, `--user `) and derive predicates from them. `stripUserFlag` removes `--user` tokens from argv so the cleaned array can be forwarded to Commander when the flag has no root-program attachment. | +| `json` | `formatJson`, `formatNdjson` | Stable JSON / newline-delimited JSON formatting for stdout. | +| `markdown` (subpath) | `preloadMarkdown`, `renderMarkdown`, `TerminalRendererOptions` | Lazy-init terminal markdown renderer. **Requires** `marked` and `marked-terminal-renderer` as peer-deps — install only if your CLI uses this subpath. | +| `options` | `ViewOptions` | Type contract for `{ json?, ndjson? }` per-command options that machine-output gates derive from. | +| `spinner` | `createSpinner` | Loading spinner factory wrapping `yocto-spinner` with disable gates. | +| `terminal` | `isCI`, `isStderrTTY`, `isStdinTTY`, `isStdoutTTY` | TTY / CI detection helpers. | +| `testing` (subpath) | `describeEmptyMachineOutput` | Vitest helpers reusable by consuming CLIs (e.g. parametrised empty-state suite covering `--json` / `--ndjson` / human modes). | ## Usage @@ -398,6 +398,47 @@ if (result.status === 'skipped' && result.reason === 'legacy-keyring-unreachable The helper is best-effort throughout: any failure (offline keyring, network error fetching the user, upsert blip) leaves the v1 state untouched so the consumer's runtime fallback can keep serving the legacy token until the next attempt. `markMigrated()` is called **before** the legacy keyring delete + `cleanupLegacyConfig`, so cleanup failures can't cause re-migration on the next run — the marker is the one-way gate, not cleanup success. The legacy delete and `cleanupLegacyConfig` run concurrently via `Promise.allSettled`. stderr output uses fixed phrases keyed off `MigrateSkipReason` and the success log omits the account identifier entirely so consumer-supplied error text (and PII-shaped `account.id` values like emails) can't leak into logs. +#### Silent OAuth refresh (`refreshAccessToken`) + +For OAuth providers whose servers issue refresh tokens (Outline, anything ALCs / DCR-based), `refreshAccessToken` keeps users signed in past the access token's expiry without re-running ` auth login`. Wire it through your CLI's `getApiToken()` so every authenticated request consults the same code path. The `createPkceProvider` ships with a `refreshToken` implementation; bespoke providers add their own. + +```ts +import { refreshAccessToken } from '@doist/cli-core/auth' + +export async function getApiToken(): Promise { + const refreshed = await refreshAccessToken({ + store: tokenStore, + provider: authProvider, + // Sidecar O_EXCL lock so two parallel CLI invocations don't both + // POST refresh and race the server's rotation logic. Pass an + // already-expanded absolute path (no `~`). + lockPath: `${getConfigPath('todoist-cli')}.refresh.lock`, + }) + return refreshed.token +} +``` + +`refreshAccessToken` returns the active credentials unchanged when the stored access token has more than `skewMs` (default 60s) of life left. When refresh is needed, it acquires the file lock, POSTs `grant_type=refresh_token` via the provider's `refreshToken` method, persists the new bundle through `store.setBundle ?? store.set`, and returns the new token. On contention it waits up to `lockTimeoutMs` (default 2s); if another process refreshed first it returns the rotated snapshot without firing its own POST — even on the `force` path — because POSTing with the now-rotated refresh token would yield `invalid_grant`. + +The reactive 401 path uses `force: true` so a server-rejected access token (clock skew, server-side revocation) is refreshed and retried in one shot: + +```ts +const res = await fetch(url, { headers: { Authorization: `Bearer ${token}` } }) +if (res.status === 401) { + const refreshed = await refreshAccessToken({ + store, + provider, + force: true, + lockPath, + }) + // Retry once with the fresh token... +} +``` + +Errors are typed: `AUTH_REFRESH_EXPIRED` (server returned 400/401 + `invalid_grant` — refresh token revoked, forced re-login required), `AUTH_REFRESH_TRANSIENT` (network, 5xx, non-JSON — safe to retry), `AUTH_REFRESH_UNAVAILABLE` (no refresh token stored or provider doesn't implement `refreshToken`). Map these to your CLI's existing "run ` auth login`" hint as makes sense. + +`TokenBundle` is the persisted credential triple — `{ accessToken, refreshToken?, accessTokenExpiresAt?, refreshTokenExpiresAt? }`. Stores opting into refresh implement `setBundle?(account, bundle)` alongside `set(account, token)`; `createKeyringTokenStore` implements both. Stores that omit `setBundle` lose refresh + expiry metadata on persistence — `refreshAccessToken` against them will surface `AUTH_REFRESH_UNAVAILABLE` on the first attempted refresh, which is the correct degraded behaviour. + #### `--user ` and multi-user wiring The three account-touching attachers (`attachLogoutCommand` / `attachStatusCommand` / `attachTokenViewCommand`) always attach `--user ` on their subcommand. `attachLogoutCommand` threads the parsed ref to both `store.active(ref)` and `store.clear(ref)`; `attachStatusCommand` and `attachTokenViewCommand` only call `store.active(ref)`. When `--user` is supplied but `store.active(ref)` returns `null`, each attacher throws `CliError('ACCOUNT_NOT_FOUND', …)` so the user sees a typed miss rather than `NOT_AUTHENTICATED` or a silent `✓ Logged out`. Single-user stores returning `null` for a non-matching ref is the supported way to feed this guard. diff --git a/src/auth/flow.test.ts b/src/auth/flow.test.ts index 14cadfb..9f0386a 100644 --- a/src/auth/flow.test.ts +++ b/src/auth/flow.test.ts @@ -22,26 +22,33 @@ import { execFile } from 'node:child_process' import { readFileSync } from 'node:fs' import openBrowserModule from 'open' import { type RunOAuthFlowOptions, runOAuthFlow } from './flow.js' -import type { AuthProvider, TokenStore } from './types.js' +import type { AuthProvider, TokenBundle, TokenStore } from './types.js' type Account = { id: string; label?: string; email: string } +type FakeStoreState = { account: Account; bundle: TokenBundle } -/** Tiny in-memory `TokenStore` so the flow tests don't need disk I/O. */ -function fakeStore(): TokenStore & { last?: { account: Account; token: string } } { - const state: { last?: { account: Account; token: string } } = {} +/** + * Tiny in-memory `TokenStore` so the flow tests don't need disk I/O. Keeps + * the full `TokenBundle` so tests can verify that `runOAuthFlow` persists + * refresh + expiry, not just the access token. + */ +function fakeStore(): TokenStore & { last?: FakeStoreState } { + const state: { last?: FakeStoreState } = {} return { async active() { return state.last ? { - token: state.last.token, - bundle: { accessToken: state.last.token }, + token: state.last.bundle.accessToken, + bundle: state.last.bundle, account: state.last.account, } : null }, - async set(account, credentials) { - const token = typeof credentials === 'string' ? credentials : credentials.accessToken - state.last = { account, token } + async set(account, token) { + state.last = { account, bundle: { accessToken: token } } + }, + async setBundle(account, bundle) { + state.last = { account, bundle } }, async clear() { state.last = undefined @@ -158,11 +165,43 @@ describe('runOAuthFlow', () => { expect(openBrowser).toHaveBeenCalledTimes(1) expect(await store.active()).toEqual({ token: 'tok-1', - bundle: { accessToken: 'tok-1' }, + bundle: { + accessToken: 'tok-1', + refreshToken: undefined, + accessTokenExpiresAt: undefined, + refreshTokenExpiresAt: undefined, + }, account: { id: '1', email: 'a@b' }, }) }) + it('persists the full bundle (refresh + expiry) when exchangeCode returns them', async () => { + const expiresAt = Date.now() + 3_600_000 + const { provider, getRedirect } = instrument({ + exchangeCode: async () => ({ + accessToken: 'tok-1', + refreshToken: 'rt-1', + expiresAt, + }), + }) + const store = fakeStore() + + await runOAuthFlow( + flowOptions({ provider, store, openBrowser: driveCallback(getRedirect) }), + ) + + // Regression guard: if `runOAuthFlow` drops refresh/expiry on the + // floor again (the original bug that motivated this PR), the + // snapshot here would lose the refresh token and the next + // `refreshAccessToken` call would fail with AUTH_REFRESH_UNAVAILABLE. + expect(store.last?.bundle).toEqual({ + accessToken: 'tok-1', + refreshToken: 'rt-1', + accessTokenExpiresAt: expiresAt, + refreshTokenExpiresAt: undefined, + }) + }) + it('skips validateToken when exchangeCode returns an account', async () => { const validateToken = vi.fn(async () => ({ id: 'WRONG', email: 'x@x' })) const { provider, getRedirect } = instrument({ diff --git a/src/auth/flow.ts b/src/auth/flow.ts index 890a03d..70e1ed2 100644 --- a/src/auth/flow.ts +++ b/src/auth/flow.ts @@ -188,12 +188,20 @@ export async function runOAuthFlow( checkAborted() try { - await options.store.set(account, { - accessToken: exchange.accessToken, - refreshToken: exchange.refreshToken, - accessTokenExpiresAt: exchange.accessTokenExpiresAt, - refreshTokenExpiresAt: exchange.refreshTokenExpiresAt, - }) + // Use setBundle when the store implements it so refresh + expiry + // metadata survives to enable silent re-auth later. Fall back to + // the simple `set` for stores that don't (the refresh metadata is + // lost, which is acceptable — those stores can't refresh anyway). + if (options.store.setBundle) { + await options.store.setBundle(account, { + accessToken: exchange.accessToken, + refreshToken: exchange.refreshToken, + accessTokenExpiresAt: exchange.expiresAt, + refreshTokenExpiresAt: exchange.refreshTokenExpiresAt, + }) + } else { + await options.store.set(account, exchange.accessToken) + } } catch (error) { if (error instanceof CliError) throw error throw new CliError( diff --git a/src/auth/keyring/migrate.ts b/src/auth/keyring/migrate.ts index 6572f6f..ea13179 100644 --- a/src/auth/keyring/migrate.ts +++ b/src/auth/keyring/migrate.ts @@ -7,6 +7,7 @@ import { type SecureStore, SecureStoreUnavailableError, } from './secure-store.js' +import { refreshAccountSlot } from './token-store.js' import type { UserRecordStore } from './types.js' export type MigrateLegacyAuthOptions = { @@ -165,7 +166,7 @@ export async function migrateLegacyAuth( // junk that may have been parked there by a hand-edit. refreshSecureStore: createSecureStore({ serviceName, - account: `${accountSlot}/refresh`, + account: refreshAccountSlot(accountSlot), }), userRecords, account, diff --git a/src/auth/keyring/record-write.test.ts b/src/auth/keyring/record-write.test.ts index 2a5b390..7f9d249 100644 --- a/src/auth/keyring/record-write.test.ts +++ b/src/auth/keyring/record-write.test.ts @@ -30,6 +30,7 @@ describe('writeRecordWithKeyringFallback', () => { account, accessTokenExpiresAt: undefined, refreshTokenExpiresAt: undefined, + hasRefreshToken: false, }) expect(state.records.get('42')?.fallbackToken).toBeUndefined() expect(state.records.get('42')?.fallbackRefreshToken).toBeUndefined() @@ -79,6 +80,86 @@ describe('writeRecordWithKeyringFallback', () => { expect(state.records.get('42')?.fallbackRefreshToken).toBe('refr_plain') }) + it('rolls back the access slot and parks both tokens on the record when the refresh slot is offline (partial-offline)', async () => { + // Access slot writes successfully but the refresh slot is + // unavailable — must NOT leave the access secret stranded in the + // keyring while the refresh sits in the plaintext fallback. Both + // travel together to the fallback record so `active()` always reads + // from one consistent place. + const secureStore = buildSingleSlot() + const refreshSecureStore = buildSingleSlot() + refreshSecureStore.setSpy.mockRejectedValueOnce( + new SecureStoreUnavailableError('refresh slot down'), + ) + const { store: userRecords, state } = buildUserRecords() + + const result = await writeRecordWithKeyringFallback({ + secureStore, + refreshSecureStore, + userRecords, + account, + bundle: { accessToken: 'at_split', refreshToken: 'rt_split' }, + }) + + expect(result.storedSecurely).toBe(false) + // Access slot was rolled back so the secret doesn't outlive the + // fallback record. + expect(secureStore.deleteSpy).toHaveBeenCalledTimes(1) + expect(state.records.get('42')?.fallbackToken).toBe('at_split') + expect(state.records.get('42')?.fallbackRefreshToken).toBe('rt_split') + expect(state.records.get('42')?.hasRefreshToken).toBe(true) + }) + + it('rolls back the access slot and rethrows when refresh-slot setSecret throws a non-keyring error', async () => { + // Otherwise we leave an orphan access credential with no matching + // user record — `active()` later sees the orphan and the user can't + // recover without manually clearing the keyring. + const secureStore = buildSingleSlot() + const refreshSecureStore = buildSingleSlot() + const cause = new Error('refresh slot exploded') + refreshSecureStore.setSpy.mockRejectedValueOnce(cause) + const { store: userRecords, state } = buildUserRecords() + + await expect( + writeRecordWithKeyringFallback({ + secureStore, + refreshSecureStore, + userRecords, + account, + bundle: { accessToken: 'at', refreshToken: 'rt' }, + }), + ).rejects.toBe(cause) + expect(secureStore.deleteSpy).toHaveBeenCalledTimes(1) + expect(state.records.size).toBe(0) + }) + + it('treats a refresh-slot delete failure on a no-refresh bundle as a write failure (no resurrection)', async () => { + // Without this rollback, a stale refresh secret from an earlier + // login would survive a re-login that didn't return one, and + // `active()` would later surface it. Belt-and-braces: roll back + // access too so the on-disk and in-keyring state stay aligned. + const secureStore = buildSingleSlot() + const refreshSecureStore = buildSingleSlot() + refreshSecureStore.deleteSpy.mockRejectedValueOnce( + new SecureStoreUnavailableError('cannot reach refresh slot'), + ) + const { store: userRecords, state } = buildUserRecords() + + const result = await writeRecordWithKeyringFallback({ + secureStore, + refreshSecureStore, + userRecords, + account, + bundle: { accessToken: 'at_only' }, + }) + + // Fell through to fallback because the refresh-slot delete failed. + expect(result.storedSecurely).toBe(false) + expect(secureStore.deleteSpy).toHaveBeenCalledTimes(1) + expect(state.records.get('42')?.fallbackToken).toBe('at_only') + expect(state.records.get('42')?.hasRefreshToken).toBe(false) + }) + it('rethrows non-keyring errors from setSecret without writing the record', async () => { const secureStore = buildSingleSlot() const cause = new Error('unexpected backend explosion') diff --git a/src/auth/keyring/record-write.ts b/src/auth/keyring/record-write.ts index 8e3a667..0179bac 100644 --- a/src/auth/keyring/record-write.ts +++ b/src/auth/keyring/record-write.ts @@ -23,25 +23,29 @@ type WriteRecordResult = { } /** - * Shared keyring-then-record write used by `createKeyringTokenStore.set` and - * `migrateLegacyAuth`. Encapsulates the order-of-operations contract that - * matters for credential safety: + * Shared keyring-then-record write used by `createKeyringTokenStore.set` / + * `setBundle` and `migrateLegacyAuth`. Encapsulates the order-of-operations + * contract that matters for credential safety: * - * 1. Keyring `setSecret` for access token first. On + * 1. Keyring `setSecret` for the access token first. On * `SecureStoreUnavailableError`, swallow and route both tokens to the * record's fallback slots. Any other error rethrows. * 2. When the keyring is online and the bundle has a refresh token, write - * it to the sibling refresh slot. When the bundle has no refresh token, - * best-effort `deleteSecret()` on the refresh slot so a stale secret - * from a previous login doesn't shadow the new state. - * 3. `userRecords.upsert(record)`. On failure, best-effort rollback both + * it to the sibling refresh slot. On `SecureStoreUnavailableError`, + * roll back the access-slot write and fall through to the fallback + * record (so both tokens travel together — never split state across + * keyring and record). On any other error, also roll back the access + * slot (best-effort) before rethrowing — leaving an orphan access + * credential with no matching user record breaks `active()` later. + * 3. When the keyring is online and the bundle has no refresh token, + * delete any pre-existing refresh secret. A delete failure here is + * surfaced as a write failure (raised as `SecureStoreUnavailableError`'s + * semantic equivalent: fall through to the fallback record) so a stale + * refresh secret can never outlive a login that didn't return one — + * otherwise `active()` would later read and use it. + * 4. `userRecords.upsert(record)`. On failure, best-effort rollback both * keyring writes so we don't leave orphan credentials for an account * cli-core never managed to register. Original error rethrows. - * - * Default promotion (`setDefaultId`) is intentionally **not** in here — both - * call sites do it best-effort outside the critical section because it is a - * preference, not a correctness requirement, and an error there must not - * dirty up a successful credential write. */ export async function writeRecordWithKeyringFallback( options: WriteRecordOptions, @@ -58,10 +62,6 @@ export async function writeRecordWithKeyringFallback { account, accessTokenExpiresAt: undefined, refreshTokenExpiresAt: undefined, + hasRefreshToken: false, }) expect(state.defaultId).toBe('42') expect(store.getLastStorageResult()).toEqual({ storage: 'secure-store' }) @@ -115,6 +116,57 @@ describe('createKeyringTokenStore', () => { expect(store.getLastClearResult()).toEqual({ storage: 'secure-store' }) }) + it('round-trips setBundle → active → clear with refresh token in the sibling slot', async () => { + // End-to-end coverage for the bundle path (set, hot-path read, + // clear) — the access-token-only round-trip above doesn't exercise + // refresh slot routing, hasRefreshToken gating, or the parallel read. + const { keyring, refreshKeyring, store, state } = fixture() + const expiresAt = Date.now() + 3_600_000 + + await store.setBundle!(account, { + accessToken: 'at-1', + refreshToken: 'rt-1', + accessTokenExpiresAt: expiresAt, + }) + + expect(keyring.setSpy).toHaveBeenCalledWith('at-1') + expect(refreshKeyring.setSpy).toHaveBeenCalledWith('rt-1') + expect(state.records.get('42')?.hasRefreshToken).toBe(true) + expect(state.records.get('42')?.accessTokenExpiresAt).toBe(expiresAt) + + const snapshot = await store.active() + expect(snapshot?.bundle).toEqual({ + accessToken: 'at-1', + refreshToken: 'rt-1', + accessTokenExpiresAt: expiresAt, + refreshTokenExpiresAt: undefined, + }) + + await store.clear() + // Both slots cleared on logout — otherwise an orphan refresh token + // survives in the keyring. + expect(keyring.deleteSpy).toHaveBeenCalled() + expect(refreshKeyring.deleteSpy).toHaveBeenCalled() + expect(state.records.size).toBe(0) + }) + + it('skips the refresh-slot keyring read when hasRefreshToken is false (hot-path optimisation)', async () => { + const { refreshKeyring, store } = fixture({ + keyring: buildSingleSlot({ secret: 'at-only' }), + records: { '42': { account, hasRefreshToken: false } }, + defaultId: '42', + }) + + const snapshot = await store.active() + + expect(snapshot?.token).toBe('at-only') + expect(snapshot?.bundle?.refreshToken).toBeUndefined() + // The gating bit's whole point: no second IPC round-trip when there's + // nothing in the refresh slot. A stale `true` would force an extra + // keyring hit per command for accounts that never had refresh. + expect(refreshKeyring.getSpy).not.toHaveBeenCalled() + }) + it('falls back to a plaintext token on the user record when the keyring is offline', async () => { const keyring = buildSingleSlot() keyring.setSpy.mockRejectedValueOnce(new SecureStoreUnavailableError('no dbus')) diff --git a/src/auth/keyring/token-store.ts b/src/auth/keyring/token-store.ts index 529dcc0..38ee047 100644 --- a/src/auth/keyring/token-store.ts +++ b/src/auth/keyring/token-store.ts @@ -36,12 +36,10 @@ export type CreateKeyringTokenStoreOptions = { } export type KeyringTokenStore = TokenStore & { - /** Storage result from the most recent `set()` call, or `undefined` before any (and reset to `undefined` when the most recent `set()` threw). */ + /** Storage result from the most recent `set()` / `setBundle()` call, or `undefined` before any (and reset to `undefined` when the most recent call threw). */ getLastStorageResult(): TokenStorageResult | undefined /** Storage result from the most recent `clear()` call, or `undefined` before any (and reset to `undefined` when the most recent `clear()` threw or was a no-op). */ getLastClearResult(): TokenStorageResult | undefined - /** Human-readable location of the underlying record store. Surfaced so `refreshAccessToken` can derive a sidecar lock path without re-plumbing options. */ - getRecordsLocation(): string } const DEFAULT_MATCH_ACCOUNT = ( @@ -49,42 +47,20 @@ const DEFAULT_MATCH_ACCOUNT = ( ref: AccountRef, ): boolean => account.id === ref || account.label === ref -/** Sibling keyring slot for the refresh token. Kept here so every read/write site agrees on the wire format. */ +/** Sibling keyring slot for the refresh token. Single source of truth for the wire format. */ export function refreshAccountSlot(accessSlot: string): string { return `${accessSlot}/refresh` } -function toBundle(credentials: string | TokenBundle): TokenBundle { - return typeof credentials === 'string' ? { accessToken: credentials } : credentials -} - /** * Multi-account `TokenStore` that keeps secrets in the OS credential manager * and per-user metadata in the consumer's `UserRecordStore`. Falls back to - * plaintext tokens on the user record when the keyring is unreachable (WSL - * without D-Bus, missing native binary, locked Keychain, …) so the CLI keeps - * working at the cost of a visible warning. - * - * Read order in `active()` is `fallbackToken` first, then the keyring. That - * matches the write semantics in `writeRecordWithKeyringFallback`: when the - * keyring is online the record is written with no `fallbackToken`, so the - * keyring read is the only path. When the keyring is offline the token is - * parked on the record and must be reachable on every subsequent read. + * plaintext tokens on the user record when the keyring is unreachable. * * Refresh tokens live in a sibling keyring slot (`${account}/refresh`) so - * that `clear()` can drop them without parsing the access slot's contents. - * - * Write order is keyring first, then `userRecords.upsert`. If the upsert - * fails after a successful keyring write, both keyring entries are rolled - * back via `deleteSecret()` to avoid orphan credentials for a user that - * cli-core never managed to record. - * - * Clear order is the inverse: record removal first (the source of truth that - * the rest of the CLI reads), then keyring delete (both slots). Any keyring - * delete failure after a successful removal is downgraded to a warning — the - * orphan secret is harmless because no record references it anymore, and - * surfacing the error would corrupt local state (record gone, but caller - * sees a thrown exception and assumes the clear failed). + * `clear()` can drop them without parsing the access slot's contents. Reads + * are gated on `UserRecord.hasRefreshToken` so accounts without refresh + * tokens don't pay a second keyring round-trip per `active()` call. */ export function createKeyringTokenStore( options: CreateKeyringTokenStoreOptions, @@ -109,11 +85,6 @@ export function createKeyringTokenStore( type Snapshot = { records: UserRecord[]; defaultId: string | null } - /** - * Read both `list()` and `getDefaultId()` concurrently. Used by paths - * that need the pinned default (no-ref `active`/`clear`, `list`, and - * `clear`'s default-unpin check). - */ async function readFullSnapshot(): Promise { const [records, defaultId] = await Promise.all([ userRecords.list(), @@ -157,9 +128,9 @@ export function createKeyringTokenStore( /** * Read the access + refresh secrets for a record, preferring the - * plaintext fallbacks when present (mirrors the contract that the - * fallback is authoritative whenever it exists). Returns `null` for the - * "stored corrupted state" case so callers can throw `AUTH_STORE_READ_FAILED`. + * plaintext fallbacks when present. Returns `null` when the access slot + * is empty (corrupted state, surfaced to the caller as + * `AUTH_STORE_READ_FAILED`). */ async function readBundleForRecord(record: UserRecord): Promise { const fallbackAccess = record.fallbackToken?.trim() @@ -172,33 +143,34 @@ export function createKeyringTokenStore( } } - let rawAccess: string | null - try { - rawAccess = await accessStoreFor(record.account).getSecret() - } catch (error) { - if (error instanceof SecureStoreUnavailableError) { - throw new CliError( - 'AUTH_STORE_READ_FAILED', - `${SECURE_STORE_DESCRIPTION} unavailable; could not read stored token (${error.message})`, - ) - } - throw error - } + // Parallel reads: access slot always needed; refresh slot only when + // the record says it exists. Sequential reads here would double the + // IPC latency on every authenticated command. The refresh read is + // wrapped in `.catch(() => null)` so a transient keyring hiccup on + // the (non-essential) refresh slot doesn't fail an otherwise-valid + // access-token lookup. + const accessPromise = accessStoreFor(record.account) + .getSecret() + .catch((error: unknown) => { + if (error instanceof SecureStoreUnavailableError) { + throw new CliError( + 'AUTH_STORE_READ_FAILED', + `${SECURE_STORE_DESCRIPTION} unavailable; could not read stored token (${error.message})`, + ) + } + throw error + }) + const refreshPromise = record.hasRefreshToken + ? refreshStoreFor(record.account) + .getSecret() + .catch(() => null) + : Promise.resolve(null) + + const [rawAccess, rawRefresh] = await Promise.all([accessPromise, refreshPromise]) const accessToken = rawAccess?.trim() if (!accessToken) return null - // Refresh slot read errors are downgraded — a missing or unreadable - // refresh token is not fatal (the access token alone is still usable - // until it expires). Surface as "no refresh present" so the caller - // sees a consistent shape. - let rawRefresh: string | null = null - try { - rawRefresh = await refreshStoreFor(record.account).getSecret() - } catch { - rawRefresh = null - } - return { accessToken, refreshToken: rawRefresh?.trim() || undefined, @@ -207,6 +179,34 @@ export function createKeyringTokenStore( } } + async function persistBundle(account: TAccount, bundle: TokenBundle): Promise { + lastStorageResult = undefined + + const { storedSecurely } = await writeRecordWithKeyringFallback({ + secureStore: accessStoreFor(account), + refreshSecureStore: refreshStoreFor(account), + userRecords, + account, + bundle, + }) + + // Best-effort default promotion: a failure here must not turn into + // `AUTH_STORE_WRITE_FAILED` (the user can recover by setting a + // default later). + try { + const existingDefault = await userRecords.getDefaultId() + if (!existingDefault) { + await userRecords.setDefaultId(account.id) + } + } catch { + // best-effort + } + + lastStorageResult = storedSecurely + ? { storage: 'secure-store' } + : fallbackResult('token saved as plaintext in') + } + return { async active(ref) { const snapshot: Snapshot = @@ -218,9 +218,6 @@ export function createKeyringTokenStore( const bundle = await readBundleForRecord(record) if (!bundle) { - // Record exists, no `fallbackToken`, and the keyring slot is - // empty — credential deleted out-of-band. Corrupted state, - // not a miss. throw new CliError( 'AUTH_STORE_READ_FAILED', `${SECURE_STORE_DESCRIPTION} returned no credential for the stored account; the keyring entry may have been removed externally.`, @@ -230,34 +227,12 @@ export function createKeyringTokenStore( return { token: bundle.accessToken, bundle, account: record.account } }, - async set(account, credentials) { - // Reset the cached storage result up front so a caller that - // catches a thrown `set()` doesn't observe the previous call's - // warning leaking through `getLastStorageResult`. - lastStorageResult = undefined - - const bundle = toBundle(credentials) - const { storedSecurely } = await writeRecordWithKeyringFallback({ - secureStore: accessStoreFor(account), - refreshSecureStore: refreshStoreFor(account), - userRecords, - account, - bundle, - }) - - // Best-effort default promotion — same rationale as before. - try { - const existingDefault = await userRecords.getDefaultId() - if (!existingDefault) { - await userRecords.setDefaultId(account.id) - } - } catch { - // best-effort - } + async set(account, token) { + await persistBundle(account, { accessToken: token }) + }, - lastStorageResult = storedSecurely - ? { storage: 'secure-store' } - : fallbackResult('token saved as plaintext in') + async setBundle(account, bundle) { + await persistBundle(account, bundle) }, async clear(ref) { @@ -332,9 +307,5 @@ export function createKeyringTokenStore( getLastClearResult() { return lastClearResult }, - - getRecordsLocation() { - return recordsLocation - }, } } diff --git a/src/auth/keyring/types.ts b/src/auth/keyring/types.ts index 4fa593a..be41cfb 100644 --- a/src/auth/keyring/types.ts +++ b/src/auth/keyring/types.ts @@ -35,6 +35,16 @@ export type UserRecord = { accessTokenExpiresAt?: number /** Unix-epoch ms — when the persisted refresh token expires (rarely known). */ refreshTokenExpiresAt?: number + /** + * `true` iff a refresh token is known to exist in the sibling keyring + * slot (or in `fallbackRefreshToken`). The keyring token store reads it + * to decide whether to spend a second keyring round-trip on the refresh + * slot during `active()`. A stale `true` (e.g. the refresh slot was + * deleted out-of-band) downgrades to "no refresh available" via the + * read path; a stale `false` would silently hide a refresh credential, + * so writes must always update this bit atomically with the slot. + */ + hasRefreshToken?: boolean } /** diff --git a/src/auth/providers/pkce.test.ts b/src/auth/providers/pkce.test.ts index dc12cc0..fb25b78 100644 --- a/src/auth/providers/pkce.test.ts +++ b/src/auth/providers/pkce.test.ts @@ -84,7 +84,7 @@ describe('createPkceProvider', () => { handshake: { codeVerifier: 'the-verifier', clientId: 'client-xyz' }, }) expect(result.accessToken).toBe('tok-1') - expect(result.accessTokenExpiresAt).toBeGreaterThan(Date.now()) + expect(result.expiresAt).toBeGreaterThan(Date.now()) const failing = createPkceProvider({ authorizeUrl: 'unused', @@ -186,7 +186,7 @@ describe('createPkceProvider', () => { expect(captured!.body.has('code_verifier')).toBe(false) expect(result.accessToken).toBe('new-access') expect(result.refreshToken).toBe('new-refresh') - expect(result.accessTokenExpiresAt).toBeGreaterThan(Date.now()) + expect(result.expiresAt).toBeGreaterThan(Date.now()) // Account passes through so the caller doesn't have to look it up again. expect(result.account).toEqual(account) }) diff --git a/src/auth/providers/pkce.ts b/src/auth/providers/pkce.ts index 5cb483a..2db31d4 100644 --- a/src/auth/providers/pkce.ts +++ b/src/auth/providers/pkce.ts @@ -1,4 +1,4 @@ -import { CliError, getErrorMessage } from '../../errors.js' +import { CliError, type CliErrorCode, getErrorMessage } from '../../errors.js' import { deriveChallenge, generateVerifier } from '../pkce.js' import type { AuthAccount, @@ -38,15 +38,22 @@ export type PkceProviderOptions = { fetchImpl?: typeof fetch } +type TokenEndpointPayload = { + access_token?: string + refresh_token?: string + expires_in?: number + error?: string +} + /** * Build an `AuthProvider` for the standard "PKCE S256, public client (no * client_secret)" flow. Covers Outline (user-supplied client_id + base_url) * and Todoist (pre-registered client_id, custom verifier alphabet, * comma-separated scope string). * - * The scope list itself is resolved by the caller before invoking - * `runOAuthFlow` and arrives on `AuthorizeInput.scopes`; this factory does - * not own scope resolution. + * Implements `exchangeCode` (authorization_code grant) and `refreshToken` + * (refresh_token grant). Both POSTs share `postToTokenEndpoint` so the + * fetch + error handling + JSON-parsing logic lives in exactly one place. * * Flows that need DCR or HTTP Basic auth on the token endpoint implement * the `AuthProvider` interface directly. @@ -93,9 +100,6 @@ export function createPkceProvider( 'Internal: PKCE handshake state lost between authorize and exchange.', ) } - // `runOAuthFlow` folds the runtime `flags` into the handshake - // before calling exchange, so a `tokenUrl: ({ flags }) => ...` - // resolver sees the same flags it saw during authorize. const flags = (input.handshake.flags as Record | undefined) ?? {} const tokenUrl = resolve(options.tokenUrl, input.handshake, flags) @@ -107,53 +111,18 @@ export function createPkceProvider( code_verifier: verifier, }) - let response: Response - try { - response = await fetchImpl(tokenUrl, { - method: 'POST', - headers: { - 'Content-Type': 'application/x-www-form-urlencoded', - Accept: 'application/json', - }, - body: body.toString(), - }) - } catch (error) { - throw new CliError( - 'AUTH_TOKEN_EXCHANGE_FAILED', - `Token endpoint request failed: ${getErrorMessage(error)}`, - ) - } - - if (!response.ok) { - const detail = await safeReadText(response) - throw new CliError( - 'AUTH_TOKEN_EXCHANGE_FAILED', - `Token endpoint returned HTTP ${response.status}.`, - detail ? { hints: [detail] } : {}, - ) - } + const payload = await postToTokenEndpoint({ + fetchImpl, + tokenUrl, + body, + errorCode: 'AUTH_TOKEN_EXCHANGE_FAILED', + label: 'Token', + }) - // Parse defensively — a misconfigured proxy can return a 2xx HTML - // error page that would otherwise blow up with a raw SyntaxError. - let payload: { access_token?: string; refresh_token?: string; expires_in?: number } - try { - payload = (await response.json()) as typeof payload - } catch (error) { - throw new CliError( - 'AUTH_TOKEN_EXCHANGE_FAILED', - `Token endpoint returned non-JSON response: ${getErrorMessage(error)}`, - ) - } - if (!payload.access_token) { - throw new CliError( - 'AUTH_TOKEN_EXCHANGE_FAILED', - 'Token endpoint response missing access_token.', - ) - } return { - accessToken: payload.access_token, + accessToken: payload.access_token!, refreshToken: payload.refresh_token, - accessTokenExpiresAt: + expiresAt: typeof payload.expires_in === 'number' ? Date.now() + payload.expires_in * 1000 : undefined, @@ -176,61 +145,26 @@ export function createPkceProvider( client_id: clientId, }) - let response: Response - try { - response = await fetchImpl(tokenUrl, { - method: 'POST', - headers: { - 'Content-Type': 'application/x-www-form-urlencoded', - Accept: 'application/json', - }, - body: body.toString(), - }) - } catch (error) { - throw new CliError( - 'AUTH_REFRESH_TRANSIENT', - `Refresh request failed: ${getErrorMessage(error)}`, - ) - } - - if (!response.ok) { - const detail = await safeReadText(response) - // 400/401 with `invalid_grant` means the refresh token is - // revoked or expired and a forced re-login is the only - // recovery. Other statuses are treated as transient (server - // hiccup) — the caller may retry on the next request. - const isInvalidGrant = - (response.status === 400 || response.status === 401) && - /invalid_grant/i.test(detail ?? '') - throw new CliError( - isInvalidGrant ? 'AUTH_REFRESH_EXPIRED' : 'AUTH_REFRESH_TRANSIENT', - `Refresh token endpoint returned HTTP ${response.status}.`, - detail ? { hints: [detail] } : {}, - ) - } + const payload = await postToTokenEndpoint({ + fetchImpl, + tokenUrl, + body, + // 400/401 + `invalid_grant` is the spec signal that the + // refresh token itself is revoked/expired. We tunnel that + // discriminator through the helper via `classifyErrorStatus` + // so the caller code stays one return path. + errorCode: 'AUTH_REFRESH_TRANSIENT', + label: 'Refresh token', + classifyErrorStatus: (status, detail) => + (status === 400 || status === 401) && /invalid_grant/i.test(detail ?? '') + ? 'AUTH_REFRESH_EXPIRED' + : 'AUTH_REFRESH_TRANSIENT', + }) - let payload: { access_token?: string; refresh_token?: string; expires_in?: number } - try { - payload = (await response.json()) as typeof payload - } catch (error) { - throw new CliError( - 'AUTH_REFRESH_TRANSIENT', - `Refresh endpoint returned non-JSON response: ${getErrorMessage(error)}`, - ) - } - if (!payload.access_token) { - throw new CliError( - 'AUTH_REFRESH_TRANSIENT', - 'Refresh endpoint response missing access_token.', - ) - } return { - accessToken: payload.access_token, - // Some OAuth servers rotate refresh tokens; others don't. We - // can't tell from one response, so persist whatever comes - // back. The caller's `set()` will replace the stored value. + accessToken: payload.access_token!, refreshToken: payload.refresh_token, - accessTokenExpiresAt: + expiresAt: typeof payload.expires_in === 'number' ? Date.now() + payload.expires_in * 1000 : undefined, @@ -250,6 +184,76 @@ function resolve( return typeof resolver === 'function' ? resolver({ handshake, flags }) : resolver } +type PostOptions = { + fetchImpl: typeof fetch + tokenUrl: string + body: URLSearchParams + /** Error code used for network failures, 5xx, non-JSON, and missing access_token. */ + errorCode: CliErrorCode + /** Human label for error messages ("Token", "Refresh token"). */ + label: string + /** + * For 4xx responses where the body discriminates the error type (e.g. + * `invalid_grant` on a refresh). Returning a different code routes the + * thrown `CliError` to a different recovery path in the caller. + */ + classifyErrorStatus?: (status: number, detail: string | undefined) => CliErrorCode +} + +/** + * Shared OAuth token-endpoint POST. Used by both `exchangeCode` and + * `refreshToken` so the fetch + status handling + JSON parsing logic is + * defined once. Always-JSON content type, urlencoded body, no auth header + * (PKCE = public client). + */ +async function postToTokenEndpoint(options: PostOptions): Promise { + let response: Response + try { + response = await options.fetchImpl(options.tokenUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/x-www-form-urlencoded', + Accept: 'application/json', + }, + body: options.body.toString(), + }) + } catch (error) { + throw new CliError( + options.errorCode, + `${options.label} endpoint request failed: ${getErrorMessage(error)}`, + ) + } + + if (!response.ok) { + const detail = await safeReadText(response) + const code = options.classifyErrorStatus + ? options.classifyErrorStatus(response.status, detail) + : options.errorCode + throw new CliError( + code, + `${options.label} endpoint returned HTTP ${response.status}.`, + detail ? { hints: [detail] } : {}, + ) + } + + let payload: TokenEndpointPayload + try { + payload = (await response.json()) as TokenEndpointPayload + } catch (error) { + throw new CliError( + options.errorCode, + `${options.label} endpoint returned non-JSON response: ${getErrorMessage(error)}`, + ) + } + if (!payload.access_token) { + throw new CliError( + options.errorCode, + `${options.label} endpoint response missing access_token.`, + ) + } + return payload +} + async function safeReadText(response: Response): Promise { try { const text = (await response.text()).trim() diff --git a/src/auth/refresh.test.ts b/src/auth/refresh.test.ts index 15dfbfa..b4a0480 100644 --- a/src/auth/refresh.test.ts +++ b/src/auth/refresh.test.ts @@ -1,4 +1,4 @@ -import { mkdtempSync, rmSync } from 'node:fs' +import { mkdtempSync, rmSync, writeFileSync } from 'node:fs' import { tmpdir } from 'node:os' import { join } from 'node:path' import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' @@ -9,22 +9,23 @@ import type { AuthProvider, TokenBundle, TokenStore } from './types.js' type Account = { id: string; label?: string; email: string } const account: Account = { id: '1', label: 'a', email: 'a@b' } -/** In-memory store that satisfies the TokenStore contract + exposes recordsLocation. */ function buildStore(initial: TokenBundle | null) { const state: { bundle: TokenBundle | null; setCalls: TokenBundle[] } = { bundle: initial, setCalls: [], } - let recordsLocation = '/tmp/test-records' - const store: TokenStore & { getRecordsLocation(): string } = { + const store: TokenStore = { async active() { return state.bundle ? { token: state.bundle.accessToken, bundle: state.bundle, account } : null }, - async set(_account, credentials) { - const bundle = - typeof credentials === 'string' ? { accessToken: credentials } : credentials + async set(_account, token) { + const bundle = { accessToken: token } + state.bundle = bundle + state.setCalls.push(bundle) + }, + async setBundle(_account, bundle) { state.bundle = bundle state.setCalls.push(bundle) }, @@ -35,29 +36,22 @@ function buildStore(initial: TokenBundle | null) { return state.bundle ? [{ account, isDefault: true }] : [] }, async setDefault() {}, - getRecordsLocation: () => recordsLocation, - } - return { - store, - state, - setRecordsLocation(path: string) { - recordsLocation = path - }, } + return { store, state } } function refreshingProvider( impl?: (input: { refreshToken: string account: Account - }) => Promise<{ accessToken: string; refreshToken?: string; accessTokenExpiresAt?: number }>, + }) => Promise<{ accessToken: string; refreshToken?: string; expiresAt?: number }>, ): AuthProvider & { refreshSpy: ReturnType } { const refreshSpy = vi.fn( impl ?? (async () => ({ accessToken: 'new-access', refreshToken: 'new-refresh', - accessTokenExpiresAt: Date.now() + 3_600_000, + expiresAt: Date.now() + 3_600_000, })), ) const provider: AuthProvider = { @@ -77,9 +71,11 @@ function refreshingProvider( describe('refreshAccessToken', () => { let tempDir: string + let lockPath: string beforeEach(() => { tempDir = mkdtempSync(join(tmpdir(), 'cli-core-refresh-')) + lockPath = join(tempDir, 'refresh.lock') }) afterEach(() => { rmSync(tempDir, { recursive: true, force: true }) @@ -93,7 +89,7 @@ describe('refreshAccessToken', () => { }) const provider = refreshingProvider() - const result = await refreshAccessToken({ store, provider }) + const result = await refreshAccessToken({ store, provider, lockPath }) expect(result.token).toBe('still-good') expect(provider.refreshSpy).not.toHaveBeenCalled() @@ -101,15 +97,14 @@ describe('refreshAccessToken', () => { }) it('refreshes when access token is past the skew window and persists the new bundle', async () => { - const { store, state, setRecordsLocation } = buildStore({ + const { store, state } = buildStore({ accessToken: 'expired', refreshToken: 'rt-old', accessTokenExpiresAt: Date.now() - 1000, }) - setRecordsLocation(join(tempDir, 'records.json')) const provider = refreshingProvider() - const result = await refreshAccessToken({ store, provider }) + const result = await refreshAccessToken({ store, provider, lockPath }) expect(provider.refreshSpy).toHaveBeenCalledWith( expect.objectContaining({ refreshToken: 'rt-old', account }), @@ -122,12 +117,11 @@ describe('refreshAccessToken', () => { const { store, state } = buildStore({ accessToken: 'rejected-by-server', refreshToken: 'rt', - // Expiry hasn't been hit yet — but server already 401'd. accessTokenExpiresAt: Date.now() + 600_000, }) const provider = refreshingProvider() - const result = await refreshAccessToken({ store, provider, force: true }) + const result = await refreshAccessToken({ store, provider, force: true, lockPath }) expect(provider.refreshSpy).toHaveBeenCalledTimes(1) expect(result.token).toBe('new-access') @@ -141,7 +135,7 @@ describe('refreshAccessToken', () => { }) const provider = refreshingProvider() - await expect(refreshAccessToken({ store, provider })).rejects.toMatchObject({ + await expect(refreshAccessToken({ store, provider, lockPath })).rejects.toMatchObject({ code: 'AUTH_REFRESH_UNAVAILABLE', }) }) @@ -159,7 +153,7 @@ describe('refreshAccessToken', () => { } await expect( - refreshAccessToken({ store, provider: refreshlessProvider }), + refreshAccessToken({ store, provider: refreshlessProvider, lockPath }), ).rejects.toMatchObject({ code: 'AUTH_REFRESH_UNAVAILABLE' }) }) @@ -167,7 +161,7 @@ describe('refreshAccessToken', () => { const { store } = buildStore(null) const provider = refreshingProvider() - await expect(refreshAccessToken({ store, provider })).rejects.toMatchObject({ + await expect(refreshAccessToken({ store, provider, lockPath })).rejects.toMatchObject({ code: 'NOT_AUTHENTICATED', }) }) @@ -180,12 +174,98 @@ describe('refreshAccessToken', () => { }) const provider = refreshingProvider(async () => ({ accessToken: 'new-access', - // no refresh_token in response — preserve the stored one - accessTokenExpiresAt: Date.now() + 3_600_000, + expiresAt: Date.now() + 3_600_000, })) - await refreshAccessToken({ store, provider }) + await refreshAccessToken({ store, provider, lockPath }) expect(state.bundle?.refreshToken).toBe('keep-me') }) + + it('re-reads inside the lock and returns the fresh snapshot when another process already refreshed (force: true)', async () => { + // Simulate two concurrent processes: this test plays the role of the + // *losing* one. Process A has already rotated the token before we + // acquire the lock, so the stored access token differs from what we + // first read. Honoring `force` here would POST with our now-stale + // refresh token and get `invalid_grant`. The helper must instead + // return the fresh snapshot without firing its own refresh. + const { store, state } = buildStore({ + accessToken: 'old-token', + refreshToken: 'rt-A', + accessTokenExpiresAt: Date.now() + 600_000, + }) + const provider = refreshingProvider() + + // Race emulation: as soon as `active()` returns the first time, swap + // the stored bundle to mimic Process A's win. The lock acquisition + // then re-reads and sees the rotated token. + const realActive = store.active.bind(store) + let firstRead = true + store.active = async (ref) => { + const snapshot = await realActive(ref) + if (firstRead) { + firstRead = false + state.bundle = { + accessToken: 'rotated-by-A', + refreshToken: 'rt-A-rotated', + accessTokenExpiresAt: Date.now() + 3_600_000, + } + } + return snapshot + } + + const result = await refreshAccessToken({ store, provider, force: true, lockPath }) + + // The losing process must NOT call refresh — that's the whole point. + expect(provider.refreshSpy).not.toHaveBeenCalled() + expect(result.token).toBe('rotated-by-A') + expect(result.bundle.refreshToken).toBe('rt-A-rotated') + }) + + it('skips its own refresh when another process refreshed enough headroom into the future (non-force)', async () => { + // Lock contention path: we ARE past skew but another process beats us + // to it. The re-read shows the fresh access token has plenty of life + // left, so we return early without POSTing. + const initial: TokenBundle = { + accessToken: 'expired', + refreshToken: 'rt', + accessTokenExpiresAt: Date.now() - 1000, + } + const { store, state } = buildStore(initial) + const provider = refreshingProvider() + + // Pre-create the lock file to simulate the other process holding it + // briefly. We don't actually need to hold it — the timing-sensitive + // assertion is the re-read after the lock check, which sees the + // rotated state below. + writeFileSync(lockPath, '') + + const realActive = store.active.bind(store) + let firstRead = true + store.active = async (ref) => { + if (firstRead) { + firstRead = false + return realActive(ref) + } + return { + token: 'fresh-from-other-proc', + bundle: { + accessToken: 'fresh-from-other-proc', + refreshToken: 'rt-rotated', + accessTokenExpiresAt: Date.now() + 3_600_000, + }, + account, + } + } + + // Release the lock right away by removing the file so the helper can + // acquire it (we just want the re-read path to run). + rmSync(lockPath) + + const result = await refreshAccessToken({ store, provider, lockPath }) + + expect(provider.refreshSpy).not.toHaveBeenCalled() + expect(result.token).toBe('fresh-from-other-proc') + expect(state.setCalls).toHaveLength(0) + }) }) diff --git a/src/auth/refresh.ts b/src/auth/refresh.ts index 7d3e8e8..29f176b 100644 --- a/src/auth/refresh.ts +++ b/src/auth/refresh.ts @@ -2,6 +2,7 @@ import { closeSync, openSync, unlinkSync } from 'node:fs' import { setTimeout as sleep } from 'node:timers/promises' import { CliError } from '../errors.js' import type { AccountRef, AuthAccount, AuthProvider, TokenBundle, TokenStore } from './types.js' +import { requireSnapshotForRef } from './user-flag.js' /** Default skew window: refresh when fewer than 60s remain on the access token. */ const DEFAULT_SKEW_MS = 60_000 @@ -22,21 +23,25 @@ export type RefreshAccessTokenOptions = { buildHandshake?: (account: TAccount) => Record /** * Refresh when fewer than this many ms remain on the access token's - * `expiresAt`. Default 60s. Set to `0` for "only refresh when fully - * expired"; set to `Infinity` to force a refresh whenever a refresh - * token exists. + * `accessTokenExpiresAt`. Default 60s. Set to `0` for "only refresh + * when fully expired"; set to `Infinity` to force a refresh whenever a + * refresh token exists. */ skewMs?: number /** * Force a refresh regardless of expiry — used by the reactive 401-retry * path where the server has already rejected the access token. Throws - * `AUTH_REFRESH_UNAVAILABLE` when no refresh token is stored. + * `AUTH_REFRESH_UNAVAILABLE` when no refresh token is stored. Honored + * only when the post-lock re-read still returns the same access token + * (so a concurrent process that refreshed first wins). */ force?: boolean /** - * Sidecar lock file path. Defaults to `${store.getRecordsLocation()}.refresh.lock` - * when the store is a `KeyringTokenStore` exposing `getRecordsLocation`, - * otherwise no file lock is used (single-process safety only). + * Absolute filesystem path to a sidecar lock file. When provided, the + * helper acquires it via `O_EXCL` before refreshing so two concurrent + * processes don't race the server's refresh-token rotation. When + * omitted, no cross-process lock is taken (single-process safety only). + * Pass an expanded path (no `~`); cli-core does not interpret it. */ lockPath?: string /** Lock acquisition window. Default 2_000ms. */ @@ -49,11 +54,12 @@ export type RefreshAccessTokenOptions = { * the new bundle and returns it. When refresh isn't needed the active bundle * is returned unchanged. * - * Concurrency: when a sidecar lock file path is available, the helper acquires - * it before refreshing. On contention it waits up to `lockTimeoutMs`, then - * re-reads the store — if another process has already refreshed, the fresh - * bundle is returned without firing a duplicate POST. If the lock can't be - * acquired the refresh proceeds anyway (worst case: one extra token rotation). + * Concurrency: when `lockPath` is supplied, the helper acquires it before + * refreshing. On contention it waits up to `lockTimeoutMs`, then re-reads + * the store — if another process has already refreshed (detected by a + * changed access token), the fresh bundle is returned without firing a + * duplicate POST, even when `force` was set. If the lock can't be acquired + * the refresh proceeds anyway (worst case: one extra token rotation). */ export async function refreshAccessToken( options: RefreshAccessTokenOptions, @@ -61,19 +67,18 @@ export async function refreshAccessToken( const skewMs = options.skewMs ?? DEFAULT_SKEW_MS const lockTimeoutMs = options.lockTimeoutMs ?? DEFAULT_LOCK_TIMEOUT_MS - const snapshot = await options.store.active(options.ref) + const snapshot = await requireSnapshotForRef(options.store, options.ref) if (!snapshot) { throw new CliError('NOT_AUTHENTICATED', 'No stored credentials to refresh.') } // Synthesise a minimal bundle for stores that don't track refresh state — // they'll fall through to `AUTH_REFRESH_UNAVAILABLE` below since // `refreshToken` is missing, which is the right behaviour. - const bundle: TokenBundle = snapshot.bundle ?? { accessToken: snapshot.token } - const resolvedSnapshot = { token: snapshot.token, bundle, account: snapshot.account } - if (!shouldRefresh(bundle, skewMs, options.force ?? false)) { - return resolvedSnapshot + const initialBundle: TokenBundle = snapshot.bundle ?? { accessToken: snapshot.token } + if (!shouldRefresh(initialBundle, skewMs, options.force ?? false)) { + return { token: snapshot.token, bundle: initialBundle, account: snapshot.account } } - if (!bundle.refreshToken) { + if (!initialBundle.refreshToken) { throw new CliError( 'AUTH_REFRESH_UNAVAILABLE', 'Access token expired and no refresh token is stored.', @@ -86,65 +91,84 @@ export async function refreshAccessToken( ) } - const lockPath = options.lockPath ?? deriveLockPath(options.store) - const lock = lockPath ? await acquireFileLock(lockPath, lockTimeoutMs) : null + const lock = options.lockPath ? await acquireFileLock(options.lockPath, lockTimeoutMs) : null + let bundle = initialBundle + let account = snapshot.account try { - // Re-read inside the lock: another process may have refreshed - // already, and re-using its result avoids two refreshes racing the - // server's rotation logic (the loser's refresh token would be void). + // Re-read inside the lock. Another process may have refreshed already; + // when that happened, its rotated access token will differ from ours + // and we MUST return the fresh result instead of firing our own + // refresh — even on the `force` path. Continuing would POST with our + // (now-rotated, invalid) refresh token and yield `invalid_grant`. if (lock) { - const fresh = await options.store.active(options.ref) + const fresh = await requireSnapshotForRef(options.store, options.ref) if (fresh) { const freshBundle = fresh.bundle ?? { accessToken: fresh.token } + if (fresh.token !== snapshot.token) { + // Another process won the race. Return its result; ignore + // our `force` flag because the access token has already + // been rotated server-side. + return { token: fresh.token, bundle: freshBundle, account: fresh.account } + } if (!shouldRefresh(freshBundle, skewMs, options.force ?? false)) { return { token: fresh.token, bundle: freshBundle, account: fresh.account } } + bundle = freshBundle + account = fresh.account } } const buildHandshake = options.buildHandshake ?? - ((account: TAccount): Record => ({ ...account, flags: {} })) + ((acc: TAccount): Record => ({ ...acc, flags: {} })) const exchange = await options.provider.refreshToken({ - refreshToken: bundle.refreshToken, - account: snapshot.account, - handshake: buildHandshake(snapshot.account), + refreshToken: bundle.refreshToken!, + account, + handshake: buildHandshake(account), }) const nextBundle: TokenBundle = { accessToken: exchange.accessToken, // Rotate when the server returns one, keep the previous when it - // doesn't. Same logic the spec calls out for refresh rotation. + // doesn't. refreshToken: exchange.refreshToken ?? bundle.refreshToken, - accessTokenExpiresAt: exchange.accessTokenExpiresAt, + accessTokenExpiresAt: exchange.expiresAt, refreshTokenExpiresAt: exchange.refreshTokenExpiresAt ?? bundle.refreshTokenExpiresAt, } - await options.store.set(snapshot.account, nextBundle) - return { token: nextBundle.accessToken, bundle: nextBundle, account: snapshot.account } + await persistBundle(options.store, account, nextBundle) + return { token: nextBundle.accessToken, bundle: nextBundle, account } } finally { if (lock) lock.release() } } +/** + * Persist via `setBundle` when the store implements it; fall back to `set` + * with just the access token for simple single-token stores (they lose + * refresh + expiry metadata; subsequent refreshes will fail with + * `AUTH_REFRESH_UNAVAILABLE`, which surfaces as a forced re-login). + */ +async function persistBundle( + store: TokenStore, + account: TAccount, + bundle: TokenBundle, +): Promise { + if (store.setBundle) { + await store.setBundle(account, bundle) + } else { + await store.set(account, bundle.accessToken) + } +} + function shouldRefresh(bundle: TokenBundle, skewMs: number, force: boolean): boolean { if (force) return true if (typeof bundle.accessTokenExpiresAt !== 'number') return false return Date.now() > bundle.accessTokenExpiresAt - skewMs } -function deriveLockPath( - store: TokenStore, -): string | undefined { - const candidate = store as { getRecordsLocation?: () => string } - if (typeof candidate.getRecordsLocation === 'function') { - return `${candidate.getRecordsLocation()}.refresh.lock` - } - return undefined -} - type FileLock = { release: () => void } /** @@ -172,8 +196,8 @@ async function acquireFileLock(path: string, timeoutMs: number): Promise= deadline) return null diff --git a/src/auth/types.ts b/src/auth/types.ts index b8fb220..88cc8ed 100644 --- a/src/auth/types.ts +++ b/src/auth/types.ts @@ -47,8 +47,8 @@ export type ExchangeResult = { accessToken: string refreshToken?: string /** Unix-epoch ms when the access token expires. */ - accessTokenExpiresAt?: number - /** Unix-epoch ms when the refresh token expires (rarely advertised). */ + expiresAt?: number + /** Unix-epoch ms when the refresh token expires (rarely advertised by OAuth servers). */ refreshTokenExpiresAt?: number /** Set when the token endpoint already identifies the account; skips `validateToken`. */ account?: TAccount @@ -139,13 +139,20 @@ export type TokenStore = { ref?: AccountRef, ): Promise<{ token: string; bundle?: TokenBundle; account: TAccount } | null> /** - * Persist credentials for `account`, replacing any previous entry. Accepts - * either a bare access-token string (for providers without refresh) or a - * full `TokenBundle` (access + optional refresh + expiry). Throw + * Persist `token` for `account`, replacing any previous entry. Throw * `CliError` for typed failures; other thrown values become * `AUTH_STORE_WRITE_FAILED`. */ - set(account: TAccount, credentials: string | TokenBundle): Promise + set(account: TAccount, token: string): Promise + /** + * Persist a full credential bundle (access + optional refresh + expiry). + * Optional: stores backed by simple single-token storage can omit this + * and cli-core helpers will fall back to `set(account, bundle.accessToken)`, + * which loses refresh + expiry metadata. The built-in + * `createKeyringTokenStore` implements this so refresh-token flows work + * end-to-end. + */ + setBundle?(account: TAccount, bundle: TokenBundle): Promise /** Remove a stored credential. No-op when `ref` doesn't match. */ clear(ref?: AccountRef): Promise /** Every stored account with a default marker. */ From db8235d33210ac91b961acd86d68d2f511b512ab Mon Sep 17 00:00:00 2001 From: lmjabreu Date: Tue, 19 May 2026 00:01:22 +0100 Subject: [PATCH 03/10] fix(auth): address doistbot round-2 review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correctness: - migrate.ts: pass `purgeRefreshSlot: false` through writeRecordWithKeyring Fallback so a legacy-migration retry after `marker-write-failed` doesn't destructively wipe a /refresh secret that may have been written by a v2 login between the failed marker and the retry. The legacy access-only token has no authority over refresh state. - token-store: `setBundle()` no longer triggers the default-promotion side effect that `set()` (login) has. Silent refresh on a config with no pinned default would otherwise silently pin the refreshed account, mutating account selection without user intent. - refreshAccessToken: honour `exchange.account` returned by a provider's refreshToken (e.g. for a server-side rename); fall back to the pre-refresh account. PKCE provider's refreshToken stops fabricating an `account` since the in-repo caller now threads it through directly. - token-store: clear() runs the access + refresh keyring deletes via Promise.allSettled instead of sequentially — halves clear() latency. Refactor: - record-write: extracted `rollbackAccess(error)` helper to dedupe the two-branch rollback logic. Spread a `baseRecord` into the offline- fallback branch so the shared fields don't duplicate. - pkce: extracted `mapTokenPayloadToExchangeResult` so the OAuth payload shape lives in one place for both exchangeCode + refreshToken. - New `auth/persist.ts` exports `persistBundle(store, account, bundle)`. Both `runOAuthFlow` and `refreshAccessToken` now call it instead of open-coding the `setBundle ? … : set(…)` policy — login and silent refresh can no longer drift on how bundles get stored. Tests (383 -> 386): - flow.test abort-path now also spies on `setBundle` and asserts `store.last === undefined`, so a regression where runOAuthFlow persists after abort via the new bundle path can't slip through. - flow.test bundle-persistence test exercises `refreshTokenExpiresAt` (was unasserted) so a regression dropping just refresh-expiry would no longer pass. - pkce.test refresh-expiry case parameterised over 400 + 401, since many OAuth servers and reverse proxies return 401 for revoked refresh tokens. - token-store.test: new test asserting setBundle does NOT promote default — guards the silent-refresh-account-mutation regression. - record-write.test: new test for `purgeRefreshSlot: false` — pre- existing refresh secret survives and `hasRefreshToken` is left unset (we have no authority over it from a legacy access-only token). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/auth/flow.test.ts | 29 ++++++---- src/auth/flow.ts | 25 ++++---- src/auth/keyring/migrate.ts | 12 +++- src/auth/keyring/record-write.test.ts | 28 +++++++++ src/auth/keyring/record-write.ts | 82 +++++++++++++++++---------- src/auth/keyring/token-store.test.ts | 17 ++++++ src/auth/keyring/token-store.ts | 60 ++++++++++++-------- src/auth/persist.ts | 25 ++++++++ src/auth/providers/pkce.test.ts | 59 ++++++++++++------- src/auth/providers/pkce.ts | 42 ++++++++------ src/auth/refresh.ts | 33 +++++------ 11 files changed, 272 insertions(+), 140 deletions(-) create mode 100644 src/auth/persist.ts diff --git a/src/auth/flow.test.ts b/src/auth/flow.test.ts index 9f0386a..17f30db 100644 --- a/src/auth/flow.test.ts +++ b/src/auth/flow.test.ts @@ -175,13 +175,15 @@ describe('runOAuthFlow', () => { }) }) - it('persists the full bundle (refresh + expiry) when exchangeCode returns them', async () => { - const expiresAt = Date.now() + 3_600_000 + it('persists the full bundle (refresh + access expiry + refresh expiry) when exchangeCode returns them', async () => { + const accessExpiresAt = Date.now() + 3_600_000 + const refreshExpiresAt = Date.now() + 30 * 24 * 3_600_000 const { provider, getRedirect } = instrument({ exchangeCode: async () => ({ accessToken: 'tok-1', refreshToken: 'rt-1', - expiresAt, + expiresAt: accessExpiresAt, + refreshTokenExpiresAt: refreshExpiresAt, }), }) const store = fakeStore() @@ -190,15 +192,14 @@ describe('runOAuthFlow', () => { flowOptions({ provider, store, openBrowser: driveCallback(getRedirect) }), ) - // Regression guard: if `runOAuthFlow` drops refresh/expiry on the - // floor again (the original bug that motivated this PR), the - // snapshot here would lose the refresh token and the next - // `refreshAccessToken` call would fail with AUTH_REFRESH_UNAVAILABLE. + // Regression guard: if `runOAuthFlow` drops any of refresh / access + // expiry / refresh expiry on the floor, the snapshot would lose + // that field and silent re-auth would break. expect(store.last?.bundle).toEqual({ accessToken: 'tok-1', refreshToken: 'rt-1', - accessTokenExpiresAt: expiresAt, - refreshTokenExpiresAt: undefined, + accessTokenExpiresAt: accessExpiresAt, + refreshTokenExpiresAt: refreshExpiresAt, }) }) @@ -296,11 +297,17 @@ describe('runOAuthFlow', () => { expect(openBrowser).not.toHaveBeenCalled() }) - it('halts via AbortSignal: aborting before the callback rejects with AUTH_OAUTH_FAILED and skips store.set', async () => { + it('halts via AbortSignal: aborting before the callback rejects with AUTH_OAUTH_FAILED and skips persistence', async () => { const controller = new AbortController() const { provider, getRedirect } = instrument() const store = fakeStore() + // Spy on BOTH `set` and `setBundle` so a regression where + // `runOAuthFlow` persists via `setBundle` after abort (now the + // default path with refresh-token support) wouldn't slip through. + // Reading `store.last` is the strongest guard: it stays undefined + // iff neither method ran. const setSpy = vi.spyOn(store, 'set') + const setBundleSpy = vi.spyOn(store, 'setBundle') await expect( runOAuthFlow( @@ -319,6 +326,8 @@ describe('runOAuthFlow', () => { ), ).rejects.toMatchObject({ code: 'AUTH_OAUTH_FAILED' }) expect(setSpy).not.toHaveBeenCalled() + expect(setBundleSpy).not.toHaveBeenCalled() + expect(store.last).toBeUndefined() }) it('always surfaces the authorize URL via onAuthorizeUrl, even when openBrowser succeeds', async () => { diff --git a/src/auth/flow.ts b/src/auth/flow.ts index 70e1ed2..87620a0 100644 --- a/src/auth/flow.ts +++ b/src/auth/flow.ts @@ -4,6 +4,7 @@ import { type IncomingMessage, type Server, type ServerResponse, createServer } import { promisify } from 'node:util' import { CliError, getErrorMessage } from '../errors.js' import { isStdoutTTY } from '../terminal.js' +import { persistBundle } from './persist.js' import { generateState } from './pkce.js' import type { AuthAccount, AuthProvider, TokenStore } from './types.js' @@ -188,20 +189,16 @@ export async function runOAuthFlow( checkAborted() try { - // Use setBundle when the store implements it so refresh + expiry - // metadata survives to enable silent re-auth later. Fall back to - // the simple `set` for stores that don't (the refresh metadata is - // lost, which is acceptable — those stores can't refresh anyway). - if (options.store.setBundle) { - await options.store.setBundle(account, { - accessToken: exchange.accessToken, - refreshToken: exchange.refreshToken, - accessTokenExpiresAt: exchange.expiresAt, - refreshTokenExpiresAt: exchange.refreshTokenExpiresAt, - }) - } else { - await options.store.set(account, exchange.accessToken) - } + // Shared persistence helper preferred over open-coding the + // `setBundle ? … : set(…)` policy — refresh.ts uses the same + // helper so login and silent refresh can't drift on how + // bundles get stored. + await persistBundle(options.store, account, { + accessToken: exchange.accessToken, + refreshToken: exchange.refreshToken, + accessTokenExpiresAt: exchange.expiresAt, + refreshTokenExpiresAt: exchange.refreshTokenExpiresAt, + }) } catch (error) { if (error instanceof CliError) throw error throw new CliError( diff --git a/src/auth/keyring/migrate.ts b/src/auth/keyring/migrate.ts index ea13179..f176c90 100644 --- a/src/auth/keyring/migrate.ts +++ b/src/auth/keyring/migrate.ts @@ -161,9 +161,14 @@ export async function migrateLegacyAuth( const accountSlot = accountForUser(account.id) await writeRecordWithKeyringFallback({ secureStore: createSecureStore({ serviceName, account: accountSlot }), - // Legacy single-user state never carried a refresh token, but - // wire the sibling slot anyway so a defensive delete clears any - // junk that may have been parked there by a hand-edit. + // The refresh slot is wired through so the helper signature + // doesn't have to special-case migration. `purgeRefreshSlot: + // false` below tells it not to touch the slot — a retry after + // `marker-write-failed` may land on an account that has since + // logged in via the v2 flow and now has a valid refresh secret. + // The legacy token is access-only and has no authority over + // refresh state; running the defensive delete would silently + // disable silent refresh for that account. refreshSecureStore: createSecureStore({ serviceName, account: refreshAccountSlot(accountSlot), @@ -171,6 +176,7 @@ export async function migrateLegacyAuth( userRecords, account, bundle: { accessToken: legacyToken.token }, + purgeRefreshSlot: false, }) } catch (error) { return skipped(silent, logPrefix, 'user-record-write-failed', getErrorMessage(error)) diff --git a/src/auth/keyring/record-write.test.ts b/src/auth/keyring/record-write.test.ts index 7f9d249..405fb25 100644 --- a/src/auth/keyring/record-write.test.ts +++ b/src/auth/keyring/record-write.test.ts @@ -160,6 +160,34 @@ describe('writeRecordWithKeyringFallback', () => { expect(state.records.get('42')?.hasRefreshToken).toBe(false) }) + it('does not touch the refresh slot when purgeRefreshSlot: false (legacy-migration path)', async () => { + // migrateLegacyAuth uses this to avoid destroying a refresh secret + // that a v2 login may have written between the failed-marker write + // and the retry. Without this, the legacy access-only token would + // silently disable refresh for the account. + const secureStore = buildSingleSlot() + const refreshSecureStore = buildSingleSlot({ secret: 'rt_already_there' }) + const { store: userRecords, state } = buildUserRecords() + + const result = await writeRecordWithKeyringFallback({ + secureStore, + refreshSecureStore, + userRecords, + account, + bundle: { accessToken: 'legacy_at' }, + purgeRefreshSlot: false, + }) + + expect(result.storedSecurely).toBe(true) + expect(secureStore.setSpy).toHaveBeenCalledWith('legacy_at') + // The pre-existing refresh secret survives the migration write. + expect(refreshSecureStore.deleteSpy).not.toHaveBeenCalled() + expect(refreshSecureStore.setSpy).not.toHaveBeenCalled() + // The persisted record leaves `hasRefreshToken` unset (we don't + // know — the legacy bundle has no authority over refresh state). + expect(state.records.get('42')?.hasRefreshToken).toBeUndefined() + }) + it('rethrows non-keyring errors from setSecret without writing the record', async () => { const secureStore = buildSingleSlot() const cause = new Error('unexpected backend explosion') diff --git a/src/auth/keyring/record-write.ts b/src/auth/keyring/record-write.ts index 0179bac..7dafd82 100644 --- a/src/auth/keyring/record-write.ts +++ b/src/auth/keyring/record-write.ts @@ -9,12 +9,23 @@ type WriteRecordOptions = { * Per-account keyring slot for the refresh token (separate slot under * `${account}/refresh`). When the bundle has no refresh token this slot * is still cleared on write so a previous refresh secret doesn't outlive - * a login that didn't return one. + * a login that didn't return one — except when `purgeRefreshSlot: false`, + * see below. */ refreshSecureStore: SecureStore userRecords: UserRecordStore account: TAccount bundle: TokenBundle + /** + * When `false`, skip the defensive delete of the refresh slot for + * bundles without a refresh token, and don't reset `hasRefreshToken` + * on the persisted record. Used by `migrateLegacyAuth`: a retry after a + * `marker-write-failed` may land on an account that has since logged in + * via the v2 flow and now has a valid refresh secret — the legacy token + * has no authority over refresh state and must not erase it. Defaults + * to `true` (the safe default for fresh logins). + */ + purgeRefreshSlot?: boolean } type WriteRecordResult = { @@ -51,9 +62,25 @@ export async function writeRecordWithKeyringFallback, ): Promise { const { secureStore, refreshSecureStore, userRecords, account, bundle } = options + const purgeRefreshSlot = options.purgeRefreshSlot ?? true const trimmedAccess = bundle.accessToken.trim() const trimmedRefresh = bundle.refreshToken?.trim() || undefined + /** + * Best-effort access-slot rollback shared by both refresh-slot failure + * paths (write + delete). When the keyring is partially offline (or + * misbehaves), we route both tokens to the fallback record so they + * travel together — never leave an orphan access credential. + */ + async function rollbackAccess(error: unknown): Promise { + try { + await secureStore.deleteSecret() + } catch { + // best-effort rollback + } + if (!(error instanceof SecureStoreUnavailableError)) throw error + } + let storedSecurely = false try { await secureStore.setSecret(trimmedAccess) @@ -69,53 +96,46 @@ export async function writeRecordWithKeyringFallback = storedSecurely - ? { - account, - accessTokenExpiresAt: bundle.accessTokenExpiresAt, - refreshTokenExpiresAt: bundle.refreshTokenExpiresAt, - hasRefreshToken: Boolean(trimmedRefresh), - } + ? baseRecord : { - account, + ...baseRecord, fallbackToken: trimmedAccess, fallbackRefreshToken: trimmedRefresh, - accessTokenExpiresAt: bundle.accessTokenExpiresAt, - refreshTokenExpiresAt: bundle.refreshTokenExpiresAt, - hasRefreshToken: Boolean(trimmedRefresh), } try { diff --git a/src/auth/keyring/token-store.test.ts b/src/auth/keyring/token-store.test.ts index 7d404de..251431c 100644 --- a/src/auth/keyring/token-store.test.ts +++ b/src/auth/keyring/token-store.test.ts @@ -150,6 +150,23 @@ describe('createKeyringTokenStore', () => { expect(state.records.size).toBe(0) }) + it('setBundle does not promote the user as default (silent refresh must not change account selection)', async () => { + // `set` (the explicit login path) promotes the first user to + // default. `setBundle` (silent refresh) must NOT, otherwise a + // refresh on a config with no pinned default — common after a + // logout-then-login-via-env-var sequence — would silently pin the + // refreshed account, mutating selection without the user asking. + const { store, state, setDefaultSpy } = fixture({ + records: { '42': { account } }, + }) + expect(state.defaultId).toBeNull() + + await store.setBundle!(account, { accessToken: 'at-1', refreshToken: 'rt-1' }) + + expect(state.defaultId).toBeNull() + expect(setDefaultSpy).not.toHaveBeenCalled() + }) + it('skips the refresh-slot keyring read when hasRefreshToken is false (hot-path optimisation)', async () => { const { refreshKeyring, store } = fixture({ keyring: buildSingleSlot({ secret: 'at-only' }), diff --git a/src/auth/keyring/token-store.ts b/src/auth/keyring/token-store.ts index 38ee047..c4220ce 100644 --- a/src/auth/keyring/token-store.ts +++ b/src/auth/keyring/token-store.ts @@ -179,7 +179,18 @@ export function createKeyringTokenStore( } } - async function persistBundle(account: TAccount, bundle: TokenBundle): Promise { + /** + * Shared persistence path for `set` / `setBundle`. The `promoteDefault` + * flag is only `true` on the explicit `set` (login) path so that a + * silent refresh on a config with no pinned default doesn't accidentally + * pin the refreshed account as the new default — refresh is a + * credential-rotation operation, not an account-selection signal. + */ + async function persistBundle( + account: TAccount, + bundle: TokenBundle, + promoteDefault: boolean, + ): Promise { lastStorageResult = undefined const { storedSecurely } = await writeRecordWithKeyringFallback({ @@ -190,16 +201,18 @@ export function createKeyringTokenStore( bundle, }) - // Best-effort default promotion: a failure here must not turn into - // `AUTH_STORE_WRITE_FAILED` (the user can recover by setting a - // default later). - try { - const existingDefault = await userRecords.getDefaultId() - if (!existingDefault) { - await userRecords.setDefaultId(account.id) + if (promoteDefault) { + // Best-effort: a failure here must not turn into + // `AUTH_STORE_WRITE_FAILED` (the user can recover by setting a + // default later). + try { + const existingDefault = await userRecords.getDefaultId() + if (!existingDefault) { + await userRecords.setDefaultId(account.id) + } + } catch { + // best-effort } - } catch { - // best-effort } lastStorageResult = storedSecurely @@ -228,11 +241,13 @@ export function createKeyringTokenStore( }, async set(account, token) { - await persistBundle(account, { accessToken: token }) + await persistBundle(account, { accessToken: token }, true) }, async setBundle(account, bundle) { - await persistBundle(account, bundle) + // No default promotion: a silent refresh shouldn't mutate + // account selection. The caller already chose this account. + await persistBundle(account, bundle, false) }, async clear(ref) { @@ -256,18 +271,15 @@ export function createKeyringTokenStore( // Always attempt to delete both keyring slots. Either may have // an orphan entry from a prior keyring-online write that was - // later replaced by an offline-fallback write. - let keyringClean = true - try { - await accessStoreFor(record.account).deleteSecret() - } catch { - keyringClean = false - } - try { - await refreshStoreFor(record.account).deleteSecret() - } catch { - keyringClean = false - } + // later replaced by an offline-fallback write. Run them + // concurrently — they're independent and the keyring IPC + // round-trip is the latency-heavy part. + const [accessResult, refreshResult] = await Promise.allSettled([ + accessStoreFor(record.account).deleteSecret(), + refreshStoreFor(record.account).deleteSecret(), + ]) + const keyringClean = + accessResult.status === 'fulfilled' && refreshResult.status === 'fulfilled' if (!keyringClean) { lastClearResult = fallbackClear diff --git a/src/auth/persist.ts b/src/auth/persist.ts new file mode 100644 index 0000000..7662967 --- /dev/null +++ b/src/auth/persist.ts @@ -0,0 +1,25 @@ +import type { AuthAccount, TokenBundle, TokenStore } from './types.js' + +/** + * Persist a `TokenBundle` through whatever method the store implements. + * Stores that opt into refresh (`createKeyringTokenStore` and any custom + * store that exposes `setBundle`) take the bundle as-is and keep refresh + + * expiry metadata. Simpler single-token stores fall back to + * `set(account, bundle.accessToken)`, which discards the metadata — + * subsequent `refreshAccessToken` calls against them will surface + * `AUTH_REFRESH_UNAVAILABLE`, which is the correct degraded behaviour. + * + * Centralised here so login (`runOAuthFlow`) and refresh + * (`refreshAccessToken`) can't drift on the policy. + */ +export async function persistBundle( + store: TokenStore, + account: TAccount, + bundle: TokenBundle, +): Promise { + if (store.setBundle) { + await store.setBundle(account, bundle) + } else { + await store.set(account, bundle.accessToken) + } +} diff --git a/src/auth/providers/pkce.test.ts b/src/auth/providers/pkce.test.ts index fb25b78..a8074e9 100644 --- a/src/auth/providers/pkce.test.ts +++ b/src/auth/providers/pkce.test.ts @@ -187,30 +187,47 @@ describe('createPkceProvider', () => { expect(result.accessToken).toBe('new-access') expect(result.refreshToken).toBe('new-refresh') expect(result.expiresAt).toBeGreaterThan(Date.now()) - // Account passes through so the caller doesn't have to look it up again. - expect(result.account).toEqual(account) + // `account` is intentionally left unset on refresh responses: + // refreshAccessToken threads the pre-refresh account through + // directly, so populating it here would be dead data inside + // cli-core. The contract still allows a provider to set it + // (e.g. for a server-side rename), but the PKCE default + // doesn't fabricate one. + expect(result.account).toBeUndefined() }) - it('throws AUTH_REFRESH_EXPIRED on 400 invalid_grant (refresh token revoked or expired)', async () => { - const provider = createPkceProvider({ - authorizeUrl: 'unused', - tokenUrl: 'https://example.com/oauth/token', - clientId: 'cid', - validate, - fetchImpl: (() => - Promise.resolve( - new Response(JSON.stringify({ error: 'invalid_grant' }), { status: 400 }), - )) as typeof fetch, - }) + it.each([ + ['400', 400], + // Many OAuth servers (and reverse proxies) return 401 for + // revoked / expired refresh tokens. Both must map to + // AUTH_REFRESH_EXPIRED so the caller forces re-login instead of + // treating it as a transient retryable error. + ['401', 401], + ])( + 'throws AUTH_REFRESH_EXPIRED on %s invalid_grant (refresh token revoked or expired)', + async (_label, status) => { + const provider = createPkceProvider({ + authorizeUrl: 'unused', + tokenUrl: 'https://example.com/oauth/token', + clientId: 'cid', + validate, + fetchImpl: (() => + Promise.resolve( + new Response(JSON.stringify({ error: 'invalid_grant' }), { + status, + }), + )) as typeof fetch, + }) - await expect( - provider.refreshToken!({ - refreshToken: 'rt', - account, - handshake: {}, - }), - ).rejects.toMatchObject({ code: 'AUTH_REFRESH_EXPIRED' }) - }) + await expect( + provider.refreshToken!({ + refreshToken: 'rt', + account, + handshake: {}, + }), + ).rejects.toMatchObject({ code: 'AUTH_REFRESH_EXPIRED' }) + }, + ) it('throws AUTH_REFRESH_TRANSIENT on non-invalid_grant errors (5xx, network)', async () => { const transientStatus = createPkceProvider({ diff --git a/src/auth/providers/pkce.ts b/src/auth/providers/pkce.ts index 2db31d4..ec2e1d8 100644 --- a/src/auth/providers/pkce.ts +++ b/src/auth/providers/pkce.ts @@ -119,14 +119,7 @@ export function createPkceProvider( label: 'Token', }) - return { - accessToken: payload.access_token!, - refreshToken: payload.refresh_token, - expiresAt: - typeof payload.expires_in === 'number' - ? Date.now() + payload.expires_in * 1000 - : undefined, - } + return mapTokenPayloadToExchangeResult(payload) }, async refreshToken(input: RefreshInput): Promise> { @@ -161,21 +154,36 @@ export function createPkceProvider( : 'AUTH_REFRESH_TRANSIENT', }) - return { - accessToken: payload.access_token!, - refreshToken: payload.refresh_token, - expiresAt: - typeof payload.expires_in === 'number' - ? Date.now() + payload.expires_in * 1000 - : undefined, - account: input.account, - } + return mapTokenPayloadToExchangeResult(payload) }, validateToken: options.validate, } } +/** + * Translate the OAuth `{access_token, refresh_token, expires_in}` payload + * shape into cli-core's `ExchangeResult`. Centralised so a payload-shape + * change (e.g. wiring `refresh_token_expires_in` for servers that + * advertise it) lands in one place instead of drifting between + * `exchangeCode` and `refreshToken`. `account` is intentionally left + * unset: callers that need it (PKCE `refreshToken`) used to assign it + * here, but the only in-repo consumer (`refreshAccessToken`) now + * threads the pre-refresh account through directly. + */ +function mapTokenPayloadToExchangeResult( + payload: TokenEndpointPayload, +): ExchangeResult { + return { + accessToken: payload.access_token!, + refreshToken: payload.refresh_token, + expiresAt: + typeof payload.expires_in === 'number' + ? Date.now() + payload.expires_in * 1000 + : undefined, + } +} + function resolve( resolver: PkceLazyString, handshake: Record, diff --git a/src/auth/refresh.ts b/src/auth/refresh.ts index 29f176b..a509694 100644 --- a/src/auth/refresh.ts +++ b/src/auth/refresh.ts @@ -1,6 +1,7 @@ import { closeSync, openSync, unlinkSync } from 'node:fs' import { setTimeout as sleep } from 'node:timers/promises' import { CliError } from '../errors.js' +import { persistBundle } from './persist.js' import type { AccountRef, AuthAccount, AuthProvider, TokenBundle, TokenStore } from './types.js' import { requireSnapshotForRef } from './user-flag.js' @@ -129,6 +130,12 @@ export async function refreshAccessToken( handshake: buildHandshake(account), }) + // Honour an `account` returned by the provider — refresh responses + // can legitimately carry updated identity (server-side rename, + // re-resolved label) that callers want to see. Defaults to the + // pre-refresh account when the provider doesn't return one. + const refreshedAccount = exchange.account ?? account + const nextBundle: TokenBundle = { accessToken: exchange.accessToken, // Rotate when the server returns one, keep the previous when it @@ -138,31 +145,17 @@ export async function refreshAccessToken( refreshTokenExpiresAt: exchange.refreshTokenExpiresAt ?? bundle.refreshTokenExpiresAt, } - await persistBundle(options.store, account, nextBundle) - return { token: nextBundle.accessToken, bundle: nextBundle, account } + await persistBundle(options.store, refreshedAccount, nextBundle) + return { + token: nextBundle.accessToken, + bundle: nextBundle, + account: refreshedAccount, + } } finally { if (lock) lock.release() } } -/** - * Persist via `setBundle` when the store implements it; fall back to `set` - * with just the access token for simple single-token stores (they lose - * refresh + expiry metadata; subsequent refreshes will fail with - * `AUTH_REFRESH_UNAVAILABLE`, which surfaces as a forced re-login). - */ -async function persistBundle( - store: TokenStore, - account: TAccount, - bundle: TokenBundle, -): Promise { - if (store.setBundle) { - await store.setBundle(account, bundle) - } else { - await store.set(account, bundle.accessToken) - } -} - function shouldRefresh(bundle: TokenBundle, skewMs: number, force: boolean): boolean { if (force) return true if (typeof bundle.accessTokenExpiresAt !== 'number') return false From d4cd62ed18a74d9b07899f1981da8fa6574e5cbd Mon Sep 17 00:00:00 2001 From: lmjabreu Date: Tue, 19 May 2026 00:12:07 +0100 Subject: [PATCH 04/10] fix(auth): address doistbot round-3 review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correctness (3 P1): - token-store: `active()`'s refresh-slot gate now `record.hasRefreshToken !== false` (was truthy). A legacy-migration record leaves the field `undefined` ("unknown") and the truthy check would otherwise hide a v2 refresh secret that was put in the sibling slot by a v2 login — the exact bug `purgeRefreshSlot: false` was meant to prevent. - token-store: thread an optional `{ promoteDefault?: boolean }` through `setBundle`. `runOAuthFlow` passes `true` so explicit login still auto-pins the first account as default; `refreshAccessToken` omits it so silent refresh never mutates account selection. - migrate.ts: skip the `writeRecordWithKeyringFallback` call entirely when the v2 record for the account already exists. `upsert` is replace-not-merge, so writing a legacy access-only bundle would clobber the v2 record's `hasRefreshToken` / `accessTokenExpiresAt` and silently disable silent refresh. Refactor / dedup (P2/P3): - record-write: upsert-failure rollback uses Promise.allSettled for the two keyring deletes — matches the optimised cleanup in `clear()`. - New `bundleFromExchange(exchange, prev?)` helper in `auth/persist.ts`. Both `runOAuthFlow` and `refreshAccessToken` use it so a new ExchangeResult field can't drift between login and refresh. - token-store: rename internal `persistBundle` → `writeBundle` so it doesn't shadow the shared module's `persistBundle` from `persist.ts`. - token-store: extracted `refOnlySnapshot(records)` helper so `active()` and `setDefault()` build the explicit-ref snapshot the same way. - flow.ts: dropped the meta comment justifying the persistence refactor (per AGENTS.md — comments explain non-obvious business logic, not refactoring decisions). Tests (386 -> 397): - New `persist.test.ts` covers the persistBundle helper (setBundle vs set fallback, promoteDefault threading) and bundleFromExchange (refresh-token rotation on/off, carry-forward of refresh expiry). - flow.test: new test for the `set(token)` fallback path when a custom store omits `setBundle` — the back-compat floor was previously unexercised because the in-test store implements both. - token-store.test: new test for `hasRefreshToken: true` + refresh-slot read failure (downgrades to "no refresh", access token still served). - token-store.test: new test for `hasRefreshToken: undefined` (legacy- migration state) — must read the refresh slot, not skip it. - pkce.test: new test for AUTH_REFRESH_TRANSIENT on a 2xx non-JSON refresh response (`exchangeCode` already covered the analogous case). README: - Added `getConfigPath` import line to the silent-refresh example so the snippet is actually copy-pasteable. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 1 + src/auth/flow.test.ts | 44 +++++++++ src/auth/flow.ts | 17 ++-- src/auth/keyring/migrate.ts | 61 ++++++++----- src/auth/keyring/record-write.ts | 20 ++-- src/auth/keyring/token-store.test.ts | 46 ++++++++++ src/auth/keyring/token-store.ts | 50 ++++++---- src/auth/persist.test.ts | 132 +++++++++++++++++++++++++++ src/auth/persist.ts | 45 ++++++++- src/auth/providers/pkce.test.ts | 29 ++++++ src/auth/refresh.ts | 15 ++- src/auth/types.ts | 11 ++- 12 files changed, 396 insertions(+), 75 deletions(-) create mode 100644 src/auth/persist.test.ts diff --git a/README.md b/README.md index 82215f3..cf6a769 100644 --- a/README.md +++ b/README.md @@ -403,6 +403,7 @@ The helper is best-effort throughout: any failure (offline keyring, network erro For OAuth providers whose servers issue refresh tokens (Outline, anything ALCs / DCR-based), `refreshAccessToken` keeps users signed in past the access token's expiry without re-running ` auth login`. Wire it through your CLI's `getApiToken()` so every authenticated request consults the same code path. The `createPkceProvider` ships with a `refreshToken` implementation; bespoke providers add their own. ```ts +import { getConfigPath } from '@doist/cli-core' import { refreshAccessToken } from '@doist/cli-core/auth' export async function getApiToken(): Promise { diff --git a/src/auth/flow.test.ts b/src/auth/flow.test.ts index 17f30db..4e59a0a 100644 --- a/src/auth/flow.test.ts +++ b/src/auth/flow.test.ts @@ -203,6 +203,50 @@ describe('runOAuthFlow', () => { }) }) + it('falls back to set(token) when the store does not implement setBundle (custom-store back-compat)', async () => { + // Custom `TokenStore` implementations predating refresh-token + // support only ship `set(account, token)`. The shared + // `persistBundle` helper degrades gracefully — login still works, + // but refresh metadata is lost (subsequent refreshes throw + // AUTH_REFRESH_UNAVAILABLE). Without this test the back-compat + // path is unexercised and a regression that always required + // `setBundle` would silently break consumers. + const setSpy = vi.fn(async (_account: Account, _token: string) => {}) + const legacyStore: TokenStore = { + async active() { + return null + }, + set: setSpy, + // No setBundle — that's the whole point. + async clear() {}, + async list() { + return [] + }, + async setDefault() {}, + } + const { provider, getRedirect } = instrument({ + exchangeCode: async () => ({ + accessToken: 'tok-legacy', + refreshToken: 'rt-1', + expiresAt: Date.now() + 3_600_000, + }), + }) + + await runOAuthFlow( + flowOptions({ + provider, + store: legacyStore, + openBrowser: driveCallback(getRedirect), + }), + ) + + // The legacy `set` path receives just the access token — refresh + + // expiry get dropped (correct degraded behaviour for stores that + // can't persist them). + expect(setSpy).toHaveBeenCalledTimes(1) + expect(setSpy.mock.calls[0][1]).toBe('tok-legacy') + }) + it('skips validateToken when exchangeCode returns an account', async () => { const validateToken = vi.fn(async () => ({ id: 'WRONG', email: 'x@x' })) const { provider, getRedirect } = instrument({ diff --git a/src/auth/flow.ts b/src/auth/flow.ts index 87620a0..9416121 100644 --- a/src/auth/flow.ts +++ b/src/auth/flow.ts @@ -4,7 +4,7 @@ import { type IncomingMessage, type Server, type ServerResponse, createServer } import { promisify } from 'node:util' import { CliError, getErrorMessage } from '../errors.js' import { isStdoutTTY } from '../terminal.js' -import { persistBundle } from './persist.js' +import { bundleFromExchange, persistBundle } from './persist.js' import { generateState } from './pkce.js' import type { AuthAccount, AuthProvider, TokenStore } from './types.js' @@ -189,15 +189,12 @@ export async function runOAuthFlow( checkAborted() try { - // Shared persistence helper preferred over open-coding the - // `setBundle ? … : set(…)` policy — refresh.ts uses the same - // helper so login and silent refresh can't drift on how - // bundles get stored. - await persistBundle(options.store, account, { - accessToken: exchange.accessToken, - refreshToken: exchange.refreshToken, - accessTokenExpiresAt: exchange.expiresAt, - refreshTokenExpiresAt: exchange.refreshTokenExpiresAt, + await persistBundle(options.store, account, bundleFromExchange(exchange), { + // Explicit-login path: pin this account as default when + // nothing is pinned yet so the first login on a fresh + // config auto-selects it. `refreshAccessToken` omits this + // flag so silent refresh never mutates selection. + promoteDefault: true, }) } catch (error) { if (error instanceof CliError) throw error diff --git a/src/auth/keyring/migrate.ts b/src/auth/keyring/migrate.ts index f176c90..2224c45 100644 --- a/src/auth/keyring/migrate.ts +++ b/src/auth/keyring/migrate.ts @@ -154,33 +154,48 @@ export async function migrateLegacyAuth( return skipped(silent, logPrefix, 'identify-failed', getErrorMessage(error)) } - // `writeRecordWithKeyringFallback` swallows `SecureStoreUnavailableError` - // internally (writing to `fallbackToken` instead), so any error here is - // a non-keyring failure — typically a `userRecords.upsert` rejection. + // Skip the write entirely when the v2 record already exists for this + // account. A `marker-write-failed` retry may land on a store where the + // user has since completed a v2 login (with a refresh token + expiry + // on the record). `userRecords.upsert` is replace-not-merge — writing + // here with a legacy access-only bundle would clobber that v2 metadata + // (`hasRefreshToken`, `accessTokenExpiresAt`, …) and silently disable + // silent refresh for the account. The legacy token has no authority + // over refresh state, so the safest answer is "don't touch a record + // that already exists — just complete the migration marker". + let existingRecords: Awaited['list']>> try { - const accountSlot = accountForUser(account.id) - await writeRecordWithKeyringFallback({ - secureStore: createSecureStore({ serviceName, account: accountSlot }), - // The refresh slot is wired through so the helper signature - // doesn't have to special-case migration. `purgeRefreshSlot: - // false` below tells it not to touch the slot — a retry after - // `marker-write-failed` may land on an account that has since - // logged in via the v2 flow and now has a valid refresh secret. - // The legacy token is access-only and has no authority over - // refresh state; running the defensive delete would silently - // disable silent refresh for that account. - refreshSecureStore: createSecureStore({ - serviceName, - account: refreshAccountSlot(accountSlot), - }), - userRecords, - account, - bundle: { accessToken: legacyToken.token }, - purgeRefreshSlot: false, - }) + existingRecords = await userRecords.list() } catch (error) { return skipped(silent, logPrefix, 'user-record-write-failed', getErrorMessage(error)) } + const existing = existingRecords.find((r) => r.account.id === account.id) + if (!existing) { + // `writeRecordWithKeyringFallback` swallows `SecureStoreUnavailableError` + // internally (writing to `fallbackToken` instead), so any error here + // is a non-keyring failure — typically a `userRecords.upsert` + // rejection. + try { + const accountSlot = accountForUser(account.id) + await writeRecordWithKeyringFallback({ + secureStore: createSecureStore({ serviceName, account: accountSlot }), + // Refresh slot wired through but never touched on the legacy + // path — `purgeRefreshSlot: false` opts out of the defensive + // delete so a hand-edited refresh secret survives (though + // legacy single-user state never had one). + refreshSecureStore: createSecureStore({ + serviceName, + account: refreshAccountSlot(accountSlot), + }), + userRecords, + account, + bundle: { accessToken: legacyToken.token }, + purgeRefreshSlot: false, + }) + } catch (error) { + return skipped(silent, logPrefix, 'user-record-write-failed', getErrorMessage(error)) + } + } // Default promotion is best-effort and **only fires when nothing is // already pinned**. A retry after a previous `markMigrated()` failure diff --git a/src/auth/keyring/record-write.ts b/src/auth/keyring/record-write.ts index 7dafd82..77b3836 100644 --- a/src/auth/keyring/record-write.ts +++ b/src/auth/keyring/record-write.ts @@ -142,18 +142,14 @@ export async function writeRecordWithKeyringFallback { expect(setDefaultSpy).not.toHaveBeenCalled() }) + it('downgrades to no-refresh when hasRefreshToken is true but the refresh-slot read fails', async () => { + // The refresh slot is non-essential — a keyring hiccup on it must + // NOT fail the access-token lookup (the user can still authenticate; + // refresh will just fail next time and trigger re-login). Without + // this guard, every transient refresh-slot keyring error would + // surface as `AUTH_STORE_READ_FAILED` and block the CLI. + const refreshKeyring = buildSingleSlot() + refreshKeyring.getSpy.mockRejectedValueOnce(new SecureStoreUnavailableError('flaky')) + const { store, keyring } = fixture({ + keyring: buildSingleSlot({ secret: 'at-1' }), + refreshKeyring, + records: { '42': { account, hasRefreshToken: true } }, + defaultId: '42', + }) + + const snapshot = await store.active() + + expect(snapshot?.token).toBe('at-1') + expect(snapshot?.bundle?.refreshToken).toBeUndefined() + // Access slot was consulted (the failure is isolated to the + // refresh-slot path). + expect(keyring.getSpy).toHaveBeenCalled() + expect(refreshKeyring.getSpy).toHaveBeenCalled() + }) + + it('reads the refresh slot when hasRefreshToken is undefined (unknown state from legacy migration)', async () => { + // The legacy-migration path writes `hasRefreshToken: undefined` — + // it has no authority over refresh state. The gate must NOT treat + // undefined as "no", otherwise a refresh secret that a v2 login + // wrote into the sibling slot becomes silently invisible after + // the migration retry. + const refreshKeyring = buildSingleSlot({ secret: 'rt-survived-migration' }) + const { store } = fixture({ + keyring: buildSingleSlot({ secret: 'at-from-migration' }), + refreshKeyring, + records: { '42': { account } }, // hasRefreshToken: undefined + defaultId: '42', + }) + + const snapshot = await store.active() + + expect(snapshot?.token).toBe('at-from-migration') + expect(snapshot?.bundle?.refreshToken).toBe('rt-survived-migration') + expect(refreshKeyring.getSpy).toHaveBeenCalled() + }) + it('skips the refresh-slot keyring read when hasRefreshToken is false (hot-path optimisation)', async () => { const { refreshKeyring, store } = fixture({ keyring: buildSingleSlot({ secret: 'at-only' }), diff --git a/src/auth/keyring/token-store.ts b/src/auth/keyring/token-store.ts index c4220ce..c666bbd 100644 --- a/src/auth/keyring/token-store.ts +++ b/src/auth/keyring/token-store.ts @@ -160,11 +160,17 @@ export function createKeyringTokenStore( } throw error }) - const refreshPromise = record.hasRefreshToken - ? refreshStoreFor(record.account) - .getSecret() - .catch(() => null) - : Promise.resolve(null) + // `!== false` rather than truthy: a record with `hasRefreshToken: + // undefined` (e.g. one written by the legacy-migration path which + // has no authority over refresh state) means "unknown — try the + // slot". Treating undefined as "no" here would silently hide a v2 + // refresh secret that a later v2 login put in the sibling slot. + const refreshPromise = + record.hasRefreshToken !== false + ? refreshStoreFor(record.account) + .getSecret() + .catch(() => null) + : Promise.resolve(null) const [rawAccess, rawRefresh] = await Promise.all([accessPromise, refreshPromise]) @@ -179,14 +185,21 @@ export function createKeyringTokenStore( } } + /** Snapshot for a ref-only resolve path; skips the `getDefaultId` read. */ + function refOnlySnapshot(records: UserRecord[]): Snapshot { + return { records, defaultId: null } + } + /** * Shared persistence path for `set` / `setBundle`. The `promoteDefault` - * flag is only `true` on the explicit `set` (login) path so that a - * silent refresh on a config with no pinned default doesn't accidentally - * pin the refreshed account as the new default — refresh is a - * credential-rotation operation, not an account-selection signal. + * flag is `true` for explicit-login callers (the legacy `set(token)` + * path always promotes; the `setBundle(account, bundle, { + * promoteDefault: true })` path opts in via the option). Silent refresh + * never promotes — credential rotation must not mutate account + * selection. Renamed away from the shared module's `persistBundle` so + * navigation isn't ambiguous. */ - async function persistBundle( + async function writeBundle( account: TAccount, bundle: TokenBundle, promoteDefault: boolean, @@ -222,10 +235,10 @@ export function createKeyringTokenStore( return { async active(ref) { - const snapshot: Snapshot = + const snapshot = ref === undefined ? await readFullSnapshot() - : { records: await userRecords.list(), defaultId: null } + : refOnlySnapshot(await userRecords.list()) const record = resolveTarget(snapshot, ref) if (!record) return null @@ -241,13 +254,14 @@ export function createKeyringTokenStore( }, async set(account, token) { - await persistBundle(account, { accessToken: token }, true) + await writeBundle(account, { accessToken: token }, true) }, - async setBundle(account, bundle) { - // No default promotion: a silent refresh shouldn't mutate - // account selection. The caller already chose this account. - await persistBundle(account, bundle, false) + async setBundle(account, bundle, setOptions) { + // Default promotion is opt-in here: callers from the explicit + // login path pass `promoteDefault: true`; silent refresh omits + // it so credential rotation doesn't mutate account selection. + await writeBundle(account, bundle, setOptions?.promoteDefault === true) }, async clear(ref) { @@ -304,7 +318,7 @@ export function createKeyringTokenStore( }, async setDefault(ref) { - const snapshot: Snapshot = { records: await userRecords.list(), defaultId: null } + const snapshot = refOnlySnapshot(await userRecords.list()) const record = resolveTarget(snapshot, ref) if (!record) { throw accountNotFoundError(ref) diff --git a/src/auth/persist.test.ts b/src/auth/persist.test.ts new file mode 100644 index 0000000..d4816cd --- /dev/null +++ b/src/auth/persist.test.ts @@ -0,0 +1,132 @@ +import { describe, expect, it } from 'vitest' + +import { bundleFromExchange, persistBundle } from './persist.js' +import type { AuthAccount, TokenBundle, TokenStore } from './types.js' + +type Account = AuthAccount & { email: string } +const account: Account = { id: '1', email: 'a@b' } + +/** + * Minimal in-memory store. The `setBundle` slot is optional so tests can + * exercise both the modern (bundle-aware) path and the legacy + * (`set`-only) fallback. + */ +function buildStore(opts: { withSetBundle: boolean }): TokenStore & { + setCalls: Array<{ token: string }> + setBundleCalls: Array<{ bundle: TokenBundle; promoteDefault: boolean | undefined }> +} { + const setCalls: Array<{ token: string }> = [] + const setBundleCalls: Array<{ bundle: TokenBundle; promoteDefault: boolean | undefined }> = [] + const store: TokenStore = { + async active() { + return null + }, + async set(_account, token) { + setCalls.push({ token }) + }, + async clear() {}, + async list() { + return [] + }, + async setDefault() {}, + } + if (opts.withSetBundle) { + store.setBundle = async (_account, bundle, setOptions) => { + setBundleCalls.push({ bundle, promoteDefault: setOptions?.promoteDefault }) + } + } + return Object.assign(store, { setCalls, setBundleCalls }) +} + +describe('persistBundle', () => { + it('calls setBundle with the full bundle when the store implements it', async () => { + const store = buildStore({ withSetBundle: true }) + const bundle: TokenBundle = { + accessToken: 'at-1', + refreshToken: 'rt-1', + accessTokenExpiresAt: 100, + } + + await persistBundle(store, account, bundle) + + expect(store.setBundleCalls).toHaveLength(1) + expect(store.setBundleCalls[0].bundle).toEqual(bundle) + expect(store.setCalls).toHaveLength(0) + }) + + it('threads promoteDefault: true through to setBundle (explicit login path)', async () => { + const store = buildStore({ withSetBundle: true }) + + await persistBundle(store, account, { accessToken: 'at' }, { promoteDefault: true }) + + expect(store.setBundleCalls[0].promoteDefault).toBe(true) + }) + + it('passes promoteDefault: undefined to setBundle when omitted (silent refresh path)', async () => { + // Silent refresh must NOT promote default. The shared helper + // forwards the missing flag as `undefined`; downstream stores must + // treat `undefined` and `false` identically. + const store = buildStore({ withSetBundle: true }) + + await persistBundle(store, account, { accessToken: 'at' }) + + expect(store.setBundleCalls[0].promoteDefault).toBeUndefined() + }) + + it('falls back to set(token) when the store omits setBundle (legacy single-token store)', async () => { + // Compatibility floor for custom stores predating refresh-token + // support. Refresh + expiry metadata is intentionally dropped — + // single-token stores can't persist them. + const store = buildStore({ withSetBundle: false }) + + await persistBundle(store, account, { + accessToken: 'at-fallback', + refreshToken: 'rt-dropped', + accessTokenExpiresAt: 999, + }) + + expect(store.setCalls).toEqual([{ token: 'at-fallback' }]) + expect(store.setBundleCalls).toHaveLength(0) + }) +}) + +describe('bundleFromExchange', () => { + it('maps the access token + access expiry + refresh from the exchange', async () => { + const expiresAt = Date.now() + 3_600_000 + const bundle = bundleFromExchange({ + accessToken: 'at-1', + refreshToken: 'rt-1', + expiresAt, + }) + + expect(bundle).toEqual({ + accessToken: 'at-1', + refreshToken: 'rt-1', + accessTokenExpiresAt: expiresAt, + refreshTokenExpiresAt: undefined, + }) + }) + + it('carries forward previous refresh token when the exchange omits one (rotation off)', async () => { + // Some OAuth servers don't rotate refresh tokens. The helper must + // preserve the stored refresh through subsequent refresh exchanges + // — otherwise the next refresh would have no refresh_token to + // POST and surface as AUTH_REFRESH_UNAVAILABLE. + const previous: TokenBundle = { + accessToken: 'old', + refreshToken: 'keep-me', + refreshTokenExpiresAt: 12345, + } + const bundle = bundleFromExchange({ accessToken: 'new-at', expiresAt: 999 }, previous) + + expect(bundle.refreshToken).toBe('keep-me') + expect(bundle.refreshTokenExpiresAt).toBe(12345) + }) + + it('honours a fresh refresh token from the exchange over the previous one (rotation on)', async () => { + const previous: TokenBundle = { accessToken: 'old', refreshToken: 'old-rt' } + const bundle = bundleFromExchange({ accessToken: 'new', refreshToken: 'new-rt' }, previous) + + expect(bundle.refreshToken).toBe('new-rt') + }) +}) diff --git a/src/auth/persist.ts b/src/auth/persist.ts index 7662967..d5ab1b1 100644 --- a/src/auth/persist.ts +++ b/src/auth/persist.ts @@ -1,4 +1,19 @@ -import type { AuthAccount, TokenBundle, TokenStore } from './types.js' +import type { AuthAccount, ExchangeResult, TokenBundle, TokenStore } from './types.js' + +export type PersistBundleOptions = { + /** + * Ask a `setBundle`-implementing store to pin this account as the + * default when nothing is pinned yet. Login (`runOAuthFlow`) sets this + * to `true` so the first login on a fresh config auto-selects the + * account; silent refresh (`refreshAccessToken`) omits it so a refresh + * never mutates account selection. + * + * Ignored when the store doesn't implement `setBundle` — the legacy + * `set(token)` fallback always promotes (matches its historical + * behaviour and the single-token-store mental model). + */ + promoteDefault?: boolean +} /** * Persist a `TokenBundle` through whatever method the store implements. @@ -16,10 +31,36 @@ export async function persistBundle( store: TokenStore, account: TAccount, bundle: TokenBundle, + options: PersistBundleOptions = {}, ): Promise { if (store.setBundle) { - await store.setBundle(account, bundle) + await store.setBundle(account, bundle, { promoteDefault: options.promoteDefault }) } else { await store.set(account, bundle.accessToken) } } + +/** + * Translate an `ExchangeResult` (returned by `exchangeCode` / `refreshToken`) + * into the persisted `TokenBundle` shape. Centralised so a new field added + * to either side (e.g. wiring `refreshTokenExpiresAt` for OAuth servers + * that advertise it) lands in one place instead of drifting between + * `runOAuthFlow` and `refreshAccessToken`. + * + * `previous` carries-forward credentials that the response didn't refresh. + * Refresh-token rotation is the most common case: many OAuth servers + * rotate on every refresh, others reuse — persist whatever comes back, fall + * back to the previous when the field is absent. Pass `undefined` from the + * login path (no previous bundle to carry forward). + */ +export function bundleFromExchange( + exchange: ExchangeResult, + previous?: TokenBundle, +): TokenBundle { + return { + accessToken: exchange.accessToken, + refreshToken: exchange.refreshToken ?? previous?.refreshToken, + accessTokenExpiresAt: exchange.expiresAt, + refreshTokenExpiresAt: exchange.refreshTokenExpiresAt ?? previous?.refreshTokenExpiresAt, + } +} diff --git a/src/auth/providers/pkce.test.ts b/src/auth/providers/pkce.test.ts index a8074e9..13897a2 100644 --- a/src/auth/providers/pkce.test.ts +++ b/src/auth/providers/pkce.test.ts @@ -261,5 +261,34 @@ describe('createPkceProvider', () => { }), ).rejects.toMatchObject({ code: 'AUTH_REFRESH_TRANSIENT' }) }) + + it('throws AUTH_REFRESH_TRANSIENT on a 2xx non-JSON response (misconfigured proxy)', async () => { + // `exchangeCode` already maps non-JSON 2xx to AUTH_TOKEN_EXCHANGE_FAILED, + // but the refresh path routes through a different error code + // and was previously uncovered. A regression that swapped the + // error code (e.g. classified a JSON parse error as + // AUTH_REFRESH_EXPIRED) would otherwise slip through. + const provider = createPkceProvider({ + authorizeUrl: 'unused', + tokenUrl: 'https://example.com/oauth/token', + clientId: 'cid', + validate, + fetchImpl: (() => + Promise.resolve( + new Response('oops', { + status: 200, + headers: { 'Content-Type': 'text/html' }, + }), + )) as typeof fetch, + }) + + await expect( + provider.refreshToken!({ + refreshToken: 'rt', + account, + handshake: {}, + }), + ).rejects.toMatchObject({ code: 'AUTH_REFRESH_TRANSIENT' }) + }) }) }) diff --git a/src/auth/refresh.ts b/src/auth/refresh.ts index a509694..ea2f5ac 100644 --- a/src/auth/refresh.ts +++ b/src/auth/refresh.ts @@ -1,7 +1,7 @@ import { closeSync, openSync, unlinkSync } from 'node:fs' import { setTimeout as sleep } from 'node:timers/promises' import { CliError } from '../errors.js' -import { persistBundle } from './persist.js' +import { bundleFromExchange, persistBundle } from './persist.js' import type { AccountRef, AuthAccount, AuthProvider, TokenBundle, TokenStore } from './types.js' import { requireSnapshotForRef } from './user-flag.js' @@ -136,14 +136,11 @@ export async function refreshAccessToken( // pre-refresh account when the provider doesn't return one. const refreshedAccount = exchange.account ?? account - const nextBundle: TokenBundle = { - accessToken: exchange.accessToken, - // Rotate when the server returns one, keep the previous when it - // doesn't. - refreshToken: exchange.refreshToken ?? bundle.refreshToken, - accessTokenExpiresAt: exchange.expiresAt, - refreshTokenExpiresAt: exchange.refreshTokenExpiresAt ?? bundle.refreshTokenExpiresAt, - } + // `bundleFromExchange` carries forward refresh + refresh expiry + // when the server omits them (rotation isn't universal). Silent + // refresh persists without `promoteDefault` so credential rotation + // never mutates account selection. + const nextBundle = bundleFromExchange(exchange, bundle) await persistBundle(options.store, refreshedAccount, nextBundle) return { diff --git a/src/auth/types.ts b/src/auth/types.ts index 88cc8ed..d0cdeed 100644 --- a/src/auth/types.ts +++ b/src/auth/types.ts @@ -151,8 +151,17 @@ export type TokenStore = { * which loses refresh + expiry metadata. The built-in * `createKeyringTokenStore` implements this so refresh-token flows work * end-to-end. + * + * `options.promoteDefault: true` lets explicit-login callers ask the + * store to pin this account as the default when nothing is pinned yet + * (mirrors `set`'s implicit promotion). Silent refresh paths omit it so + * a refresh never mutates account selection. */ - setBundle?(account: TAccount, bundle: TokenBundle): Promise + setBundle?( + account: TAccount, + bundle: TokenBundle, + options?: { promoteDefault?: boolean }, + ): Promise /** Remove a stored credential. No-op when `ref` doesn't match. */ clear(ref?: AccountRef): Promise /** Every stored account with a default marker. */ From 05463f75563b6db44f19257b76996edd9d5ac567 Mon Sep 17 00:00:00 2001 From: lmjabreu Date: Tue, 19 May 2026 00:23:08 +0100 Subject: [PATCH 05/10] fix(auth): address doistbot round-4 review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correctness (3 P1): - migrate.ts no longer races between the existence check and the upsert. New optional `UserRecordStore.tryInsert(record): Promise` contract lets consumers commit to an atomic insert-if-absent; migrate uses it when available, falls back to list-then-write otherwise (small race window, acceptable for postinstall-style invocations). - migrate.ts drops `purgeRefreshSlot: false`. The existence guard already prevents clobbering v2 refresh state, and opting out caused the migrated record to ship `hasRefreshToken: undefined`, forcing a useless keyring round-trip on every CLI command. Now the legacy record persists `hasRefreshToken: false` and `active()` skips the refresh slot for migrated users. - refresh.ts re-reads the store after a lock-acquisition TIMEOUT, not just on successful acquisition. The lock holder may have completed the refresh while we were waiting; without the re-read the waiter still POSTs its own refresh and defeats the lock's main load-saving purpose. Brings runtime behaviour in line with the README claim. P2: - `persistBundle` omits the `setBundle` third arg entirely when `promoteDefault` is unset (silent-refresh path). Lets custom stores using presence-based handling distinguish login from refresh on the option-presence axis. - `KeyringTokenStore.setBundle` overridden as required (the impl always provides it), so callers/tests can drop the `store.setBundle!` non-null assertion. - Refresh-slot read `.catch` narrowed to `SecureStoreUnavailableError` — unexpected backend/programming errors propagate as `AUTH_STORE_READ_FAILED` instead of silently downgrading to "no refresh". - `flow.test` fake store now captures the literal `setBundle` options arg. New assertion proves `runOAuthFlow` opts into `promoteDefault: true` on the explicit login path. - `refreshAccountSlot` moved to a new internal `keyring/slot-naming.ts` module (not re-exported). Production code + tests + migrate all import from there, so the wire format is single-sourced. P3: - `token-store.test` fixture routes via `refreshAccountSlot()` instead of hard-coding `/refresh`. - `record-write.ts` comment about `hasRefreshToken: undefined` corrected to reflect that readers DO probe the slot (vs the previous wording that suggested they fall straight through to access-only). Tests (397 -> 400): - migrate.test: new tests for the `tryInsert` happy path (atomic insert → record + access slot updated) and the "v2 record already exists" path (`tryInsert` returns false → v2 record and both keyring slots left untouched). - refresh.test: new test for the lock-timeout re-read (lock held by another process → acquireFileLock returns null → re-read still surfaces the rotated snapshot, no duplicate POST). - persist.test rewritten to assert literal omission of the `setBundle` options arg on the silent-refresh path. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/auth/flow.test.ts | 27 +++++-- src/auth/keyring/migrate.test.ts | 80 +++++++++++++++++++++ src/auth/keyring/migrate.ts | 103 +++++++++++++++++---------- src/auth/keyring/record-write.ts | 7 +- src/auth/keyring/slot-naming.ts | 12 ++++ src/auth/keyring/token-store.test.ts | 13 ++-- src/auth/keyring/token-store.ts | 25 +++++-- src/auth/keyring/types.ts | 13 ++++ src/auth/persist.test.ts | 30 +++++--- src/auth/persist.ts | 12 +++- src/auth/refresh.test.ts | 47 ++++++++++++ src/auth/refresh.ts | 28 ++++++-- 12 files changed, 323 insertions(+), 74 deletions(-) create mode 100644 src/auth/keyring/slot-naming.ts diff --git a/src/auth/flow.test.ts b/src/auth/flow.test.ts index 4e59a0a..f13e5f6 100644 --- a/src/auth/flow.test.ts +++ b/src/auth/flow.test.ts @@ -25,12 +25,18 @@ import { type RunOAuthFlowOptions, runOAuthFlow } from './flow.js' import type { AuthProvider, TokenBundle, TokenStore } from './types.js' type Account = { id: string; label?: string; email: string } -type FakeStoreState = { account: Account; bundle: TokenBundle } +type FakeStoreState = { + account: Account + bundle: TokenBundle + /** Literal options arg seen by `setBundle` (`undefined` when omitted). */ + setBundleOptions: { promoteDefault?: boolean } | undefined +} /** * Tiny in-memory `TokenStore` so the flow tests don't need disk I/O. Keeps - * the full `TokenBundle` so tests can verify that `runOAuthFlow` persists - * refresh + expiry, not just the access token. + * the full `TokenBundle` and the literal `setBundle` options arg so tests + * can verify that `runOAuthFlow` persists refresh + expiry AND opts into + * `promoteDefault: true` on the explicit login path. */ function fakeStore(): TokenStore & { last?: FakeStoreState } { const state: { last?: FakeStoreState } = {} @@ -45,10 +51,14 @@ function fakeStore(): TokenStore & { last?: FakeStoreState } { : null }, async set(account, token) { - state.last = { account, bundle: { accessToken: token } } + state.last = { + account, + bundle: { accessToken: token }, + setBundleOptions: undefined, + } }, - async setBundle(account, bundle) { - state.last = { account, bundle } + async setBundle(account, bundle, setOptions) { + state.last = { account, bundle, setBundleOptions: setOptions } }, async clear() { state.last = undefined @@ -201,6 +211,11 @@ describe('runOAuthFlow', () => { accessTokenExpiresAt: accessExpiresAt, refreshTokenExpiresAt: refreshExpiresAt, }) + // Login is the canonical default-promotion trigger — the helper + // must thread `promoteDefault: true` through. A regression that + // dropped the flag would let an empty-default config silently + // stay un-pinned after the first login. + expect(store.last?.setBundleOptions).toEqual({ promoteDefault: true }) }) it('falls back to set(token) when the store does not implement setBundle (custom-store back-compat)', async () => { diff --git a/src/auth/keyring/migrate.test.ts b/src/auth/keyring/migrate.test.ts index 0204295..a0d22d3 100644 --- a/src/auth/keyring/migrate.test.ts +++ b/src/auth/keyring/migrate.test.ts @@ -348,6 +348,86 @@ describe('migrateLegacyAuth — stderr privacy', () => { expect(lines).not.toContain('sensitive') }) + it('uses atomic tryInsert when the consumer supplies it (no race on the existence check)', async () => { + // Without `tryInsert`, the migration does list-then-upsert which + // is racy: a parallel v2 login could complete between the two + // calls and we'd clobber it. With `tryInsert`, the consumer + // commits to an atomic check-and-insert and we trust their answer. + const tryInsert = vi.fn(async (_record: UserRecord) => true) + const harness = buildUserRecords() + harness.store.tryInsert = tryInsert + const km = buildKeyringMap() + km.slots.set(LEGACY, { secret: 'legacy_tok' }) + mockedCreateSecureStore.mockImplementation(km.create) + + const result = await migrateLegacyAuth({ + serviceName: SERVICE, + legacyAccount: LEGACY, + userRecords: harness.store, + hasMigrated: async () => false, + markMigrated: async () => {}, + loadLegacyPlaintextToken: async () => null, + identifyAccount: async () => ({ id: '1', email: 'a@b' }), + silent: true, + }) + + expect(result).toMatchObject({ status: 'migrated' }) + expect(tryInsert).toHaveBeenCalledTimes(1) + // The inserted record persists `hasRefreshToken: false` (legacy + // tokens never have a refresh slot) so `active()` skips the + // refresh-slot keyring round-trip per command for migrated users. + expect(tryInsert.mock.calls[0][0]).toMatchObject({ + account: { id: '1' }, + hasRefreshToken: false, + }) + // Access secret landed in the per-user slot. + expect(km.slots.get('user-1')?.secret).toBe('legacy_tok') + }) + + it('leaves the v2 record alone when tryInsert returns false (existing v2 login)', async () => { + // The whole point of routing through `tryInsert`: when a v2 + // login has already completed for the same account between + // postinstall attempts, the migration is a no-op on the record + // and never touches the per-user keyring slot. Without this guard + // the legacy access-only bundle would clobber `hasRefreshToken` / + // expiry on the v2 record. + const tryInsert = vi.fn(async (_record: UserRecord) => false) + const harness = buildUserRecords() + // Pre-seed a v2 record with a refresh token to prove it survives. + harness.state.records.set('1', { + account: { id: '1', email: 'a@b' }, + hasRefreshToken: true, + accessTokenExpiresAt: 99999, + }) + harness.store.tryInsert = tryInsert + const km = buildKeyringMap() + km.slots.set(LEGACY, { secret: 'legacy_tok' }) + km.slots.set('user-1', { secret: 'v2_access_token' }) + km.slots.set('user-1/refresh', { secret: 'v2_refresh_token' }) + mockedCreateSecureStore.mockImplementation(km.create) + + await migrateLegacyAuth({ + serviceName: SERVICE, + legacyAccount: LEGACY, + userRecords: harness.store, + hasMigrated: async () => false, + markMigrated: async () => {}, + loadLegacyPlaintextToken: async () => null, + identifyAccount: async () => ({ id: '1', email: 'a@b' }), + silent: true, + }) + + // V2 record unchanged. + expect(harness.state.records.get('1')).toMatchObject({ + hasRefreshToken: true, + accessTokenExpiresAt: 99999, + }) + // Per-user keyring slots untouched — the legacy access token + // never overwrote v2's. + expect(km.slots.get('user-1')?.secret).toBe('v2_access_token') + expect(km.slots.get('user-1/refresh')?.secret).toBe('v2_refresh_token') + }) + it('the skip line is generic and does not echo the raw exception text', async () => { const { result } = await runMigration({ slots: { [LEGACY]: { secret: 'legacy_tok' } }, diff --git a/src/auth/keyring/migrate.ts b/src/auth/keyring/migrate.ts index 2224c45..c5c2e1e 100644 --- a/src/auth/keyring/migrate.ts +++ b/src/auth/keyring/migrate.ts @@ -7,7 +7,7 @@ import { type SecureStore, SecureStoreUnavailableError, } from './secure-store.js' -import { refreshAccountSlot } from './token-store.js' +import { refreshAccountSlot } from './slot-naming.js' import type { UserRecordStore } from './types.js' export type MigrateLegacyAuthOptions = { @@ -154,47 +154,74 @@ export async function migrateLegacyAuth( return skipped(silent, logPrefix, 'identify-failed', getErrorMessage(error)) } - // Skip the write entirely when the v2 record already exists for this - // account. A `marker-write-failed` retry may land on a store where the - // user has since completed a v2 login (with a refresh token + expiry - // on the record). `userRecords.upsert` is replace-not-merge — writing - // here with a legacy access-only bundle would clobber that v2 metadata - // (`hasRefreshToken`, `accessTokenExpiresAt`, …) and silently disable - // silent refresh for the account. The legacy token has no authority - // over refresh state, so the safest answer is "don't touch a record - // that already exists — just complete the migration marker". - let existingRecords: Awaited['list']>> + // Don't clobber an existing v2 record. A `marker-write-failed` retry + // may land on a store where the user has since completed a v2 login + // (with a refresh token + expiry on the record). `userRecords.upsert` + // is replace-not-merge, so writing the legacy access-only bundle here + // would wipe `hasRefreshToken` / expiry and silently disable silent + // refresh. The legacy token has no authority over refresh state. + // + // Prefer the atomic `tryInsert` when the consumer supplies it — + // eliminates the TOCTOU race between a list-based existence check and + // the write. Fall back to list-then-write when not implemented; the + // race window is small (microseconds for in-process upserts) and + // acceptable for typical postinstall-style invocations. try { - existingRecords = await userRecords.list() - } catch (error) { - return skipped(silent, logPrefix, 'user-record-write-failed', getErrorMessage(error)) - } - const existing = existingRecords.find((r) => r.account.id === account.id) - if (!existing) { - // `writeRecordWithKeyringFallback` swallows `SecureStoreUnavailableError` - // internally (writing to `fallbackToken` instead), so any error here - // is a non-keyring failure — typically a `userRecords.upsert` - // rejection. - try { - const accountSlot = accountForUser(account.id) - await writeRecordWithKeyringFallback({ - secureStore: createSecureStore({ serviceName, account: accountSlot }), - // Refresh slot wired through but never touched on the legacy - // path — `purgeRefreshSlot: false` opts out of the defensive - // delete so a hand-edited refresh secret survives (though - // legacy single-user state never had one). - refreshSecureStore: createSecureStore({ - serviceName, - account: refreshAccountSlot(accountSlot), - }), - userRecords, + const accountSlot = accountForUser(account.id) + if (userRecords.tryInsert) { + // Migration writes access-only; refresh slot defensively + // cleared so a hand-edited stale secret can't shadow the + // legacy state. `hasRefreshToken: false` is persisted so + // `active()` skips the refresh-slot round-trip per command. + // When the record already exists, `tryInsert` returns false + // and we leave the v2 record alone. + const refreshStore = createSecureStore({ + serviceName, + account: refreshAccountSlot(accountSlot), + }) + const inserted = await userRecords.tryInsert({ account, - bundle: { accessToken: legacyToken.token }, - purgeRefreshSlot: false, + fallbackToken: undefined, + hasRefreshToken: false, }) - } catch (error) { - return skipped(silent, logPrefix, 'user-record-write-failed', getErrorMessage(error)) + if (inserted) { + // Now persist the access secret + clear the refresh slot. + // Done after the record write because `tryInsert`'s + // atomicity guarantee is the load-bearing piece. + try { + await createSecureStore({ + serviceName, + account: accountSlot, + }).setSecret(legacyToken.token) + } catch (error) { + if (!(error instanceof SecureStoreUnavailableError)) throw error + // Keyring offline: re-upsert with the plaintext fallback. + await userRecords.upsert({ + account, + fallbackToken: legacyToken.token, + hasRefreshToken: false, + }) + } + // Best-effort cleanup of any stale refresh secret. + await refreshStore.deleteSecret().catch(() => undefined) + } + } else { + const existing = (await userRecords.list()).find((r) => r.account.id === account.id) + if (!existing) { + await writeRecordWithKeyringFallback({ + secureStore: createSecureStore({ serviceName, account: accountSlot }), + refreshSecureStore: createSecureStore({ + serviceName, + account: refreshAccountSlot(accountSlot), + }), + userRecords, + account, + bundle: { accessToken: legacyToken.token }, + }) + } } + } catch (error) { + return skipped(silent, logPrefix, 'user-record-write-failed', getErrorMessage(error)) } // Default promotion is best-effort and **only fires when nothing is diff --git a/src/auth/keyring/record-write.ts b/src/auth/keyring/record-write.ts index 77b3836..aa9a7dc 100644 --- a/src/auth/keyring/record-write.ts +++ b/src/auth/keyring/record-write.ts @@ -119,9 +119,10 @@ export async function writeRecordWithKeyringFallback - account.endsWith('/refresh') ? refreshKeyring : keyring, + account.endsWith(refreshSuffix) ? refreshKeyring : keyring, ) const harness = buildUserRecords() for (const [id, rec] of Object.entries(opts.records ?? {})) { diff --git a/src/auth/keyring/token-store.ts b/src/auth/keyring/token-store.ts index c666bbd..940fa3b 100644 --- a/src/auth/keyring/token-store.ts +++ b/src/auth/keyring/token-store.ts @@ -9,6 +9,7 @@ import { SecureStoreUnavailableError, type SecureStore, } from './secure-store.js' +import { refreshAccountSlot } from './slot-naming.js' import type { TokenStorageResult, UserRecord, UserRecordStore } from './types.js' export type CreateKeyringTokenStoreOptions = { @@ -36,6 +37,13 @@ export type CreateKeyringTokenStoreOptions = { } export type KeyringTokenStore = TokenStore & { + /** + * Override `TokenStore.setBundle?` as required — `createKeyringTokenStore` + * always provides it. Lets callers (and tests) drop the `store.setBundle!` + * non-null assertion when they know they're working with this concrete + * store. + */ + setBundle: NonNullable['setBundle']> /** Storage result from the most recent `set()` / `setBundle()` call, or `undefined` before any (and reset to `undefined` when the most recent call threw). */ getLastStorageResult(): TokenStorageResult | undefined /** Storage result from the most recent `clear()` call, or `undefined` before any (and reset to `undefined` when the most recent `clear()` threw or was a no-op). */ @@ -47,11 +55,6 @@ const DEFAULT_MATCH_ACCOUNT = ( ref: AccountRef, ): boolean => account.id === ref || account.label === ref -/** Sibling keyring slot for the refresh token. Single source of truth for the wire format. */ -export function refreshAccountSlot(accessSlot: string): string { - return `${accessSlot}/refresh` -} - /** * Multi-account `TokenStore` that keeps secrets in the OS credential manager * and per-user metadata in the consumer's `UserRecordStore`. Falls back to @@ -165,11 +168,21 @@ export function createKeyringTokenStore( // has no authority over refresh state) means "unknown — try the // slot". Treating undefined as "no" here would silently hide a v2 // refresh secret that a later v2 login put in the sibling slot. + // + // Refresh-slot read failures are tolerated **only** for the + // documented keyring-offline case (`SecureStoreUnavailableError`). + // Anything else (programming error, unexpected backend) propagates + // so it can't silently downgrade a real bug into "no refresh + // token". The access-token path runs in parallel and its own + // catch maps a keyring-offline failure to `AUTH_STORE_READ_FAILED`. const refreshPromise = record.hasRefreshToken !== false ? refreshStoreFor(record.account) .getSecret() - .catch(() => null) + .catch((error: unknown) => { + if (error instanceof SecureStoreUnavailableError) return null + throw error + }) : Promise.resolve(null) const [rawAccess, rawRefresh] = await Promise.all([accessPromise, refreshPromise]) diff --git a/src/auth/keyring/types.ts b/src/auth/keyring/types.ts index be41cfb..98779d9 100644 --- a/src/auth/keyring/types.ts +++ b/src/auth/keyring/types.ts @@ -63,6 +63,19 @@ export type UserRecordStore = { * `fallbackToken` over the keyring). Records are keyed by `account.id`. */ upsert(record: UserRecord): Promise + /** + * Optional atomic insert-if-absent. Returns `true` when the record was + * persisted, `false` when a record with the same `account.id` already + * existed (no write happened). Implementations that can guarantee + * atomicity (single-process file lock, DB transaction, …) should + * provide this so `migrateLegacyAuth` can avoid the TOCTOU race + * between its existence check and the upsert. When omitted, + * `migrateLegacyAuth` falls back to a list-then-upsert that has a + * tiny race window — acceptable for postinstall-style invocations + * but worth eliminating in production CLIs that run many concurrent + * processes. + */ + tryInsert?(record: UserRecord): Promise /** Remove the record whose `account.id` matches. */ remove(id: string): Promise /** The pinned default's `account.id`, or `null` when nothing is pinned. */ diff --git a/src/auth/persist.test.ts b/src/auth/persist.test.ts index d4816cd..fefc592 100644 --- a/src/auth/persist.test.ts +++ b/src/auth/persist.test.ts @@ -11,12 +11,24 @@ const account: Account = { id: '1', email: 'a@b' } * exercise both the modern (bundle-aware) path and the legacy * (`set`-only) fallback. */ +/** + * `setBundleCalls[].setOptions` carries the literal argument the caller + * passed in — `undefined` when the options arg was omitted entirely, + * otherwise the object as-is. Tests that need to discriminate between + * "omitted" and "passed as { promoteDefault: undefined }" rely on this. + */ function buildStore(opts: { withSetBundle: boolean }): TokenStore & { setCalls: Array<{ token: string }> - setBundleCalls: Array<{ bundle: TokenBundle; promoteDefault: boolean | undefined }> + setBundleCalls: Array<{ + bundle: TokenBundle + setOptions: { promoteDefault?: boolean } | undefined + }> } { const setCalls: Array<{ token: string }> = [] - const setBundleCalls: Array<{ bundle: TokenBundle; promoteDefault: boolean | undefined }> = [] + const setBundleCalls: Array<{ + bundle: TokenBundle + setOptions: { promoteDefault?: boolean } | undefined + }> = [] const store: TokenStore = { async active() { return null @@ -32,7 +44,7 @@ function buildStore(opts: { withSetBundle: boolean }): TokenStore & { } if (opts.withSetBundle) { store.setBundle = async (_account, bundle, setOptions) => { - setBundleCalls.push({ bundle, promoteDefault: setOptions?.promoteDefault }) + setBundleCalls.push({ bundle, setOptions }) } } return Object.assign(store, { setCalls, setBundleCalls }) @@ -59,18 +71,20 @@ describe('persistBundle', () => { await persistBundle(store, account, { accessToken: 'at' }, { promoteDefault: true }) - expect(store.setBundleCalls[0].promoteDefault).toBe(true) + expect(store.setBundleCalls[0].setOptions).toEqual({ promoteDefault: true }) }) - it('passes promoteDefault: undefined to setBundle when omitted (silent refresh path)', async () => { + it('omits the options arg entirely on the silent-refresh path', async () => { // Silent refresh must NOT promote default. The shared helper - // forwards the missing flag as `undefined`; downstream stores must - // treat `undefined` and `false` identically. + // OMITS the third arg (rather than passing `{ promoteDefault: + // undefined }`) so a custom store using presence-based handling + // (`if (setOptions)`) can distinguish login from refresh on the + // option-presence axis — matches the documented contract. const store = buildStore({ withSetBundle: true }) await persistBundle(store, account, { accessToken: 'at' }) - expect(store.setBundleCalls[0].promoteDefault).toBeUndefined() + expect(store.setBundleCalls[0].setOptions).toBeUndefined() }) it('falls back to set(token) when the store omits setBundle (legacy single-token store)', async () => { diff --git a/src/auth/persist.ts b/src/auth/persist.ts index d5ab1b1..dd9e925 100644 --- a/src/auth/persist.ts +++ b/src/auth/persist.ts @@ -34,7 +34,17 @@ export async function persistBundle( options: PersistBundleOptions = {}, ): Promise { if (store.setBundle) { - await store.setBundle(account, bundle, { promoteDefault: options.promoteDefault }) + // Omit the third arg entirely when no options are set — keeps + // custom-store presence-based handling (`if (setOptions)`) + // honest. Forwarding `{ promoteDefault: undefined }` would make + // silent-refresh callers indistinguishable from login on the + // option-presence axis, even though our contract documents the + // flag as omitted on the refresh path. + if (options.promoteDefault === undefined) { + await store.setBundle(account, bundle) + } else { + await store.setBundle(account, bundle, { promoteDefault: options.promoteDefault }) + } } else { await store.set(account, bundle.accessToken) } diff --git a/src/auth/refresh.test.ts b/src/auth/refresh.test.ts index b4a0480..b3ef943 100644 --- a/src/auth/refresh.test.ts +++ b/src/auth/refresh.test.ts @@ -222,6 +222,53 @@ describe('refreshAccessToken', () => { expect(result.bundle.refreshToken).toBe('rt-A-rotated') }) + it('re-reads the store after a lock-acquisition timeout and returns the rotated snapshot', async () => { + // Lock contention path with the holder NOT releasing in time: + // `acquireFileLock` returns null (timeout). The helper must still + // re-read the store — the holder may have completed the refresh + // while we were waiting, in which case POSTing our own refresh + // would yield `invalid_grant` (server rotated already). README + // promises this behaviour; previously the timeout path skipped + // the re-read and defeated the lock's load-saving purpose. + const { store, state } = buildStore({ + accessToken: 'expired', + refreshToken: 'rt', + accessTokenExpiresAt: Date.now() - 1000, + }) + const provider = refreshingProvider() + + // Hold the lock for the entire test (never release it). + writeFileSync(lockPath, 'holder-pid') + + // After acquireFileLock times out, the store re-read should see a + // rotated bundle (simulating the holder's completion). + const realActive = store.active.bind(store) + let firstRead = true + store.active = async (ref) => { + const snapshot = await realActive(ref) + if (firstRead) { + firstRead = false + state.bundle = { + accessToken: 'rotated-by-holder', + refreshToken: 'rt-new', + accessTokenExpiresAt: Date.now() + 3_600_000, + } + } + return snapshot + } + + const result = await refreshAccessToken({ + store, + provider, + lockPath, + // Tight timeout keeps the test fast. + lockTimeoutMs: 200, + }) + + expect(provider.refreshSpy).not.toHaveBeenCalled() + expect(result.token).toBe('rotated-by-holder') + }) + it('skips its own refresh when another process refreshed enough headroom into the future (non-force)', async () => { // Lock contention path: we ARE past skew but another process beats us // to it. The re-read shows the fresh access token has plenty of life diff --git a/src/auth/refresh.ts b/src/auth/refresh.ts index ea2f5ac..ccbe050 100644 --- a/src/auth/refresh.ts +++ b/src/auth/refresh.ts @@ -92,17 +92,31 @@ export async function refreshAccessToken( ) } - const lock = options.lockPath ? await acquireFileLock(options.lockPath, lockTimeoutMs) : null + let lockOutcome: + | { kind: 'acquired'; release: () => void } + | { kind: 'timed-out' } + | { kind: 'no-lock' } = { kind: 'no-lock' } + if (options.lockPath) { + const lock = await acquireFileLock(options.lockPath, lockTimeoutMs) + lockOutcome = lock ? { kind: 'acquired', release: lock.release } : { kind: 'timed-out' } + } let bundle = initialBundle let account = snapshot.account try { - // Re-read inside the lock. Another process may have refreshed already; - // when that happened, its rotated access token will differ from ours - // and we MUST return the fresh result instead of firing our own - // refresh — even on the `force` path. Continuing would POST with our + // Re-read after the lock outcome regardless of whether we acquired + // it. Another process may have refreshed already; when that + // happened, its rotated access token will differ from ours and we + // MUST return the fresh result instead of firing our own refresh — + // even on the `force` path. Continuing would POST with our // (now-rotated, invalid) refresh token and yield `invalid_grant`. - if (lock) { + // + // The re-read also runs on lock-timeout because the holder may + // have completed the refresh while we were waiting. Skipping it + // would defeat the lock's main load-saving purpose (one POST per + // refresh window) and waste an extra round-trip per CLI run + // under contention. + if (lockOutcome.kind !== 'no-lock') { const fresh = await requireSnapshotForRef(options.store, options.ref) if (fresh) { const freshBundle = fresh.bundle ?? { accessToken: fresh.token } @@ -149,7 +163,7 @@ export async function refreshAccessToken( account: refreshedAccount, } } finally { - if (lock) lock.release() + if (lockOutcome.kind === 'acquired') lockOutcome.release() } } From c8d224ee1ce7ba36a7c66c53396cf563d61d9423 Mon Sep 17 00:00:00 2001 From: lmjabreu Date: Tue, 19 May 2026 00:33:08 +0100 Subject: [PATCH 06/10] fix(auth): address doistbot round-5 review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correctness (2 P1): - migrate.ts: two-phase write in the tryInsert path. Phase 1 inserts the record with `fallbackToken: legacyToken.token` so a crash between tryInsert and setSecret leaves a self-sufficient record (still readable via the plaintext fallback). Phase 2 moves the secret into the keyring, then upserts to clear the fallback. Without this, a crash mid-migration could permanently brick the user with AUTH_STORE_READ_FAILED on every subsequent retry. - token-store.ts: AUTH_STORE_READ_FAILED is now thrown directly from `readBundleForRecord` instead of smearing the corrupted-state signal across two functions (caller used to translate null into the throw). attachLogoutCommand's existing catch still recognises the code and clears the corrupted record. P2: - migrate.ts non-tryInsert fallback now passes `purgeRefreshSlot: false`. Consumers without atomic insert still see a small list-then-write race, and the legacy access-only bundle has no authority over refresh state even on that path. - token-store: backfill `hasRefreshToken: false` best-effort on the record after the refresh slot read returns null for an undefined bit (pre-PR records). Subsequent active() calls skip the extra IPC. Backfill is fire-and-forget and tolerates a vanishingly rare race with a concurrent setBundle. - Refresh-slot read tolerance is now tightened: only SecureStoreUnavailableError downgrades to "no refresh"; other errors propagate so a programming bug can't silently turn into "no refresh token available". - Drop `store.setBundle!` non-null assertions in token-store.test (KeyringTokenStore.setBundle is required, the `!` was redundant). - migrate.ts: share `accessStore` + `refreshStore` createSecureStore instances across both branches (dedups the per-branch instantiation). Tests (400 -> 405): - token-store.test: new test for the non-keyring refresh-slot read failure propagating (companion to the SecureStoreUnavailableError downgrade test). - token-store.test: new test asserting the `hasRefreshToken: false` backfill on first null refresh-slot read for pre-PR records. - migrate.test: new test for the tryInsert keyring-offline fallback (setSecret fails → record keeps the plaintext fallback, follow-up upsert doesn't run). - New colocated `slot-naming.test.ts` per AGENTS.md (pins the wire format + guards the fixture-routing suffix invariant). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/auth/keyring/migrate.test.ts | 49 ++++++++++++++++++++ src/auth/keyring/migrate.ts | 68 ++++++++++++++++------------ src/auth/keyring/slot-naming.test.ts | 20 ++++++++ src/auth/keyring/token-store.test.ts | 45 +++++++++++++++++- src/auth/keyring/token-store.ts | 48 ++++++++++++++------ 5 files changed, 185 insertions(+), 45 deletions(-) create mode 100644 src/auth/keyring/slot-naming.test.ts diff --git a/src/auth/keyring/migrate.test.ts b/src/auth/keyring/migrate.test.ts index a0d22d3..48fe1e3 100644 --- a/src/auth/keyring/migrate.test.ts +++ b/src/auth/keyring/migrate.test.ts @@ -384,6 +384,55 @@ describe('migrateLegacyAuth — stderr privacy', () => { expect(km.slots.get('user-1')?.secret).toBe('legacy_tok') }) + it('keeps the plaintext fallback on the record when tryInsert succeeds but setSecret hits a keyring-offline error', async () => { + // Two-phase write under contention with an offline keyring: + // phase 1 (tryInsert with fallbackToken) succeeds, phase 2 + // (setSecret) throws SecureStoreUnavailableError. The migration + // must NOT clear the fallback — that would leave a record with + // no recoverable token. Subsequent migrations would see + // `tryInsert: false` (record exists) and the CLI would surface + // AUTH_STORE_READ_FAILED forever. + const tryInsert = vi.fn(async (_record: UserRecord) => true) + const upsertSpy = vi.fn(async (_record: UserRecord) => {}) + const harness = buildUserRecords() + harness.store.tryInsert = tryInsert + // Intercept upsert too so we can verify it isn't called to clear + // the fallback (would happen on success). + const originalUpsert = harness.store.upsert + harness.store.upsert = async (record) => { + await upsertSpy(record) + return originalUpsert(record) + } + const km = buildKeyringMap() + km.slots.set(LEGACY, { secret: 'legacy_tok' }) + km.slots.set('user-1', { + secret: null, + setErr: new SecureStoreUnavailableError('keyring down'), + }) + mockedCreateSecureStore.mockImplementation(km.create) + + const result = await migrateLegacyAuth({ + serviceName: SERVICE, + legacyAccount: LEGACY, + userRecords: harness.store, + hasMigrated: async () => false, + markMigrated: async () => {}, + loadLegacyPlaintextToken: async () => null, + identifyAccount: async () => ({ id: '1', email: 'a@b' }), + silent: true, + }) + + expect(result).toMatchObject({ status: 'migrated' }) + // tryInsert was called with the plaintext fallback baked in. + expect(tryInsert.mock.calls[0][0]).toMatchObject({ + fallbackToken: 'legacy_tok', + hasRefreshToken: false, + }) + // The follow-up upsert (which would clear the fallback) never + // ran because setSecret failed — the record keeps the fallback. + expect(upsertSpy).not.toHaveBeenCalled() + }) + it('leaves the v2 record alone when tryInsert returns false (existing v2 login)', async () => { // The whole point of routing through `tryInsert`: when a v2 // login has already completed for the same account between diff --git a/src/auth/keyring/migrate.ts b/src/auth/keyring/migrate.ts index c5c2e1e..882ffa7 100644 --- a/src/auth/keyring/migrate.ts +++ b/src/auth/keyring/migrate.ts @@ -168,55 +168,63 @@ export async function migrateLegacyAuth( // acceptable for typical postinstall-style invocations. try { const accountSlot = accountForUser(account.id) + const accessStore = createSecureStore({ serviceName, account: accountSlot }) + const refreshStore = createSecureStore({ + serviceName, + account: refreshAccountSlot(accountSlot), + }) + if (userRecords.tryInsert) { - // Migration writes access-only; refresh slot defensively - // cleared so a hand-edited stale secret can't shadow the - // legacy state. `hasRefreshToken: false` is persisted so - // `active()` skips the refresh-slot round-trip per command. - // When the record already exists, `tryInsert` returns false - // and we leave the v2 record alone. - const refreshStore = createSecureStore({ - serviceName, - account: refreshAccountSlot(accountSlot), - }) + // Two-phase write so a crash between the record write and the + // keyring write can't permanently brick the user. Without it, + // a crash mid-migration would leave a record with no + // recoverable token: subsequent retries see `tryInsert: false`, + // skip, and the CLI surfaces `AUTH_STORE_READ_FAILED` forever. + // + // Phase 1: tryInsert with `fallbackToken: legacyToken.token` + // — even if everything after this crashes, the record is + // self-sufficient via the plaintext fallback path. + // Phase 2: move the secret into the keyring, then upsert to + // clear the fallback. If the keyring is offline we leave the + // fallback in place (the correct degraded state). const inserted = await userRecords.tryInsert({ account, - fallbackToken: undefined, + fallbackToken: legacyToken.token, hasRefreshToken: false, }) if (inserted) { - // Now persist the access secret + clear the refresh slot. - // Done after the record write because `tryInsert`'s - // atomicity guarantee is the load-bearing piece. + let movedToKeyring = false try { - await createSecureStore({ - serviceName, - account: accountSlot, - }).setSecret(legacyToken.token) + await accessStore.setSecret(legacyToken.token) + movedToKeyring = true } catch (error) { if (!(error instanceof SecureStoreUnavailableError)) throw error - // Keyring offline: re-upsert with the plaintext fallback. - await userRecords.upsert({ - account, - fallbackToken: legacyToken.token, - hasRefreshToken: false, - }) + // Keyring offline — keep the plaintext fallback. + } + if (movedToKeyring) { + await userRecords.upsert({ account, hasRefreshToken: false }) } - // Best-effort cleanup of any stale refresh secret. + // Best-effort cleanup of any stale refresh secret (legacy + // single-user state never had one, but a hand-edit might). await refreshStore.deleteSecret().catch(() => undefined) } } else { const existing = (await userRecords.list()).find((r) => r.account.id === account.id) if (!existing) { await writeRecordWithKeyringFallback({ - secureStore: createSecureStore({ serviceName, account: accountSlot }), - refreshSecureStore: createSecureStore({ - serviceName, - account: refreshAccountSlot(accountSlot), - }), + secureStore: accessStore, + refreshSecureStore: refreshStore, userRecords, account, bundle: { accessToken: legacyToken.token }, + // The list-then-write fallback can still race with a + // parallel v2 login that writes a refresh secret + // between our `list()` and the helper's `upsert`. + // `purgeRefreshSlot: false` keeps the refresh slot + // untouched and persists `hasRefreshToken: undefined` + // ("unknown" — readers probe the slot) so a v2 + // refresh secret written mid-race remains visible. + purgeRefreshSlot: false, }) } } diff --git a/src/auth/keyring/slot-naming.test.ts b/src/auth/keyring/slot-naming.test.ts new file mode 100644 index 0000000..179f578 --- /dev/null +++ b/src/auth/keyring/slot-naming.test.ts @@ -0,0 +1,20 @@ +import { describe, expect, it } from 'vitest' + +import { refreshAccountSlot } from './slot-naming.js' + +describe('refreshAccountSlot', () => { + it('appends the well-known suffix to the access slot name', () => { + // Pinning the wire format here means a future rename has to update + // exactly this test plus production code — no silent drift. + expect(refreshAccountSlot('user-42')).toBe('user-42/refresh') + }) + + it('does not collapse an empty access slot to a bare suffix', () => { + // Defensive: `endsWith(refreshAccountSlot(''))` is how the + // test fixture routes between access and refresh slot mocks, so + // the suffix must remain a non-empty, distinctive substring. + const suffix = refreshAccountSlot('') + expect(suffix.length).toBeGreaterThan(0) + expect(suffix.startsWith('/')).toBe(true) + }) +}) diff --git a/src/auth/keyring/token-store.test.ts b/src/auth/keyring/token-store.test.ts index 8673f85..220bc94 100644 --- a/src/auth/keyring/token-store.test.ts +++ b/src/auth/keyring/token-store.test.ts @@ -126,7 +126,7 @@ describe('createKeyringTokenStore', () => { const { keyring, refreshKeyring, store, state } = fixture() const expiresAt = Date.now() + 3_600_000 - await store.setBundle!(account, { + await store.setBundle(account, { accessToken: 'at-1', refreshToken: 'rt-1', accessTokenExpiresAt: expiresAt, @@ -164,7 +164,7 @@ describe('createKeyringTokenStore', () => { }) expect(state.defaultId).toBeNull() - await store.setBundle!(account, { accessToken: 'at-1', refreshToken: 'rt-1' }) + await store.setBundle(account, { accessToken: 'at-1', refreshToken: 'rt-1' }) expect(state.defaultId).toBeNull() expect(setDefaultSpy).not.toHaveBeenCalled() @@ -195,6 +195,47 @@ describe('createKeyringTokenStore', () => { expect(refreshKeyring.getSpy).toHaveBeenCalled() }) + it('propagates non-keyring errors from the refresh-slot read (no silent downgrade)', async () => { + // Unexpected backend or programming errors must NOT collapse to + // "no refresh token" — that would hide real breakage. Only + // SecureStoreUnavailableError is the documented degraded case. + const refreshKeyring = buildSingleSlot() + refreshKeyring.getSpy.mockRejectedValueOnce(new Error('boom — programming error')) + const { store } = fixture({ + keyring: buildSingleSlot({ secret: 'at-1' }), + refreshKeyring, + records: { '42': { account, hasRefreshToken: true } }, + defaultId: '42', + }) + + await expect(store.active()).rejects.toThrow(/boom/) + }) + + it('backfills hasRefreshToken: false on the record when undefined and the refresh slot is empty', async () => { + // Pre-PR keyring-backed records have `hasRefreshToken: undefined` + // because the field didn't exist. Treating undefined as "try the + // slot" is correct (preserves migration safety) but would cost + // every active() call a second keyring IPC forever. After a read + // confirms the slot is empty, backfill `false` so subsequent + // reads skip the slot. + const refreshKeyring = buildSingleSlot() // empty refresh slot + const { store, state, upsertSpy } = fixture({ + keyring: buildSingleSlot({ secret: 'at-old' }), + refreshKeyring, + records: { '42': { account } }, // hasRefreshToken: undefined + defaultId: '42', + }) + + await store.active() + + // Backfill is fire-and-forget — wait for the microtask flush so + // the assertion isn't racy. + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(upsertSpy).toHaveBeenCalled() + expect(state.records.get('42')?.hasRefreshToken).toBe(false) + }) + it('reads the refresh slot when hasRefreshToken is undefined (unknown state from legacy migration)', async () => { // The legacy-migration path writes `hasRefreshToken: undefined` — // it has no authority over refresh state. The gate must NOT treat diff --git a/src/auth/keyring/token-store.ts b/src/auth/keyring/token-store.ts index 940fa3b..b01c1a3 100644 --- a/src/auth/keyring/token-store.ts +++ b/src/auth/keyring/token-store.ts @@ -131,11 +131,15 @@ export function createKeyringTokenStore( /** * Read the access + refresh secrets for a record, preferring the - * plaintext fallbacks when present. Returns `null` when the access slot - * is empty (corrupted state, surfaced to the caller as - * `AUTH_STORE_READ_FAILED`). + * plaintext fallbacks when present. Throws `AUTH_STORE_READ_FAILED` + * directly when the access slot is empty (deleted out-of-band) or + * when the keyring read itself fails — collapsing it to a return + * value here would leave the caller doing the same `if (!bundle) + * throw` dance, smearing the corruption signal across two places. + * `attachLogoutCommand` catches this code specifically so an explicit + * `logout --user ` can still clear the corrupted record. */ - async function readBundleForRecord(record: UserRecord): Promise { + async function readBundleForRecord(record: UserRecord): Promise { const fallbackAccess = record.fallbackToken?.trim() if (fallbackAccess) { return { @@ -188,11 +192,36 @@ export function createKeyringTokenStore( const [rawAccess, rawRefresh] = await Promise.all([accessPromise, refreshPromise]) const accessToken = rawAccess?.trim() - if (!accessToken) return null + if (!accessToken) { + // Record exists, no `fallbackToken`, and the keyring slot is + // empty — the credential was deleted out-of-band (user ran + // `security delete-generic-password`, `secret-tool clear`, …). + throw new CliError( + 'AUTH_STORE_READ_FAILED', + `${SECURE_STORE_DESCRIPTION} returned no credential for the stored account; the keyring entry may have been removed externally.`, + ) + } + + const refreshToken = rawRefresh?.trim() || undefined + + // Backfill `hasRefreshToken: false` best-effort when we probed the + // refresh slot (record said `undefined` — "unknown") and found + // nothing. Pre-PR records didn't have this field; without the + // backfill they'd pay an extra keyring IPC per `active()` call + // forever. Doing it on a read isn't strictly race-safe — a + // concurrent `setBundle` could be writing a fresh refresh secret + // at the same instant — but for an account whose record says + // "no refresh yet known", such concurrency is vanishingly rare + // (login is foreground; silent refresh requires an already-stored + // refresh token). Failures here are swallowed: the worst case is + // the next `active()` pays the same extra IPC and tries again. + if (record.hasRefreshToken === undefined && refreshToken === undefined) { + void userRecords.upsert({ ...record, hasRefreshToken: false }).catch(() => undefined) + } return { accessToken, - refreshToken: rawRefresh?.trim() || undefined, + refreshToken, accessTokenExpiresAt: record.accessTokenExpiresAt, refreshTokenExpiresAt: record.refreshTokenExpiresAt, } @@ -256,13 +285,6 @@ export function createKeyringTokenStore( if (!record) return null const bundle = await readBundleForRecord(record) - if (!bundle) { - throw new CliError( - 'AUTH_STORE_READ_FAILED', - `${SECURE_STORE_DESCRIPTION} returned no credential for the stored account; the keyring entry may have been removed externally.`, - ) - } - return { token: bundle.accessToken, bundle, account: record.account } }, From a7fd8e8b1bd0af5b9fe77e84501cf0cb52c8d2df Mon Sep 17 00:00:00 2001 From: lmjabreu Date: Tue, 19 May 2026 00:42:30 +0100 Subject: [PATCH 07/10] fix(auth): address doistbot round-6 P1 (migrate ownership-check race) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `tryInsert` proves the record was absent at insert time but gives no exclusive ownership afterward. A v2 login that completes between our phase-1 insert and the follow-up keyring/upsert steps would be clobbered: our setSecret would overwrite their access token, our upsert would reset their record, our refresh-slot delete would remove their refresh secret. Mitigate with ownership-check re-reads before each follow-up: - Use a unique placeholder signature on tryInsert (`fallbackToken: legacyToken.token` + `hasRefreshToken: undefined`) so the re-read can distinguish "still ours" from "v2 login took over" — no v2 login ever produces that exact shape (v2 always writes hasRefreshToken: true or false, never undefined). - Re-read before setSecret, before the post-setSecret upsert, and before the refresh-slot delete. If the record has been replaced at any point, abort the rest. If we already wrote our access token to the keyring before discovering we lost ownership, roll that write back so the v2 login's setSecret is the authoritative one. The residual race window is now bounded by the gap between a re-read and the very next call (microseconds) rather than the entire multi-step sequence. Truly atomic CAS would need a contract-level `compareAndSet`/`updateIf` op — deferred until a concrete consumer asks for it. Per the user's instruction, only the P1 is addressed this round; the five P2s and one P3 are noted in the PR comment for follow-up. Tests (405 -> 406): - migrate.test: new test exercising the race — tryInsert succeeds with our placeholder, then a v2 login completes between phases, and the ownership-check re-read aborts our keyring writes so v2's state survives intact. Also updated the existing `tryInsert` tests to reflect the new placeholder signature (hasRefreshToken: undefined) and to actually insert into the harness state so the re-reads see the placeholder. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/auth/keyring/migrate.test.ts | 87 ++++++++++++++++++++++++++++---- src/auth/keyring/migrate.ts | 75 +++++++++++++++++++-------- 2 files changed, 132 insertions(+), 30 deletions(-) diff --git a/src/auth/keyring/migrate.test.ts b/src/auth/keyring/migrate.test.ts index 48fe1e3..2ff3779 100644 --- a/src/auth/keyring/migrate.test.ts +++ b/src/auth/keyring/migrate.test.ts @@ -353,8 +353,14 @@ describe('migrateLegacyAuth — stderr privacy', () => { // is racy: a parallel v2 login could complete between the two // calls and we'd clobber it. With `tryInsert`, the consumer // commits to an atomic check-and-insert and we trust their answer. - const tryInsert = vi.fn(async (_record: UserRecord) => true) const harness = buildUserRecords() + // Stub tryInsert that ALSO actually inserts into the harness so + // the migration's ownership-check re-reads see the placeholder. + const tryInsert = vi.fn(async (record: UserRecord) => { + if (harness.state.records.has(record.account.id)) return false + harness.state.records.set(record.account.id, record) + return true + }) harness.store.tryInsert = tryInsert const km = buildKeyringMap() km.slots.set(LEGACY, { secret: 'legacy_tok' }) @@ -373,15 +379,21 @@ describe('migrateLegacyAuth — stderr privacy', () => { expect(result).toMatchObject({ status: 'migrated' }) expect(tryInsert).toHaveBeenCalledTimes(1) - // The inserted record persists `hasRefreshToken: false` (legacy - // tokens never have a refresh slot) so `active()` skips the - // refresh-slot keyring round-trip per command for migrated users. + // Placeholder uses a unique signature (`fallbackToken: legacyToken` + // + `hasRefreshToken: undefined`) so the ownership-check re-read + // can distinguish "still ours" from "v2 login took over". After + // the keyring write succeeds the follow-up upsert flips + // `hasRefreshToken` to `false`. expect(tryInsert.mock.calls[0][0]).toMatchObject({ account: { id: '1' }, - hasRefreshToken: false, + fallbackToken: 'legacy_tok', + hasRefreshToken: undefined, }) // Access secret landed in the per-user slot. expect(km.slots.get('user-1')?.secret).toBe('legacy_tok') + // Follow-up upsert flipped hasRefreshToken to false (so future + // active() reads skip the refresh-slot IPC). + expect(harness.state.records.get('1')?.hasRefreshToken).toBe(false) }) it('keeps the plaintext fallback on the record when tryInsert succeeds but setSecret hits a keyring-offline error', async () => { @@ -392,9 +404,13 @@ describe('migrateLegacyAuth — stderr privacy', () => { // no recoverable token. Subsequent migrations would see // `tryInsert: false` (record exists) and the CLI would surface // AUTH_STORE_READ_FAILED forever. - const tryInsert = vi.fn(async (_record: UserRecord) => true) - const upsertSpy = vi.fn(async (_record: UserRecord) => {}) const harness = buildUserRecords() + const tryInsert = vi.fn(async (record: UserRecord) => { + if (harness.state.records.has(record.account.id)) return false + harness.state.records.set(record.account.id, record) + return true + }) + const upsertSpy = vi.fn(async (_record: UserRecord) => {}) harness.store.tryInsert = tryInsert // Intercept upsert too so we can verify it isn't called to clear // the fallback (would happen on success). @@ -423,16 +439,69 @@ describe('migrateLegacyAuth — stderr privacy', () => { }) expect(result).toMatchObject({ status: 'migrated' }) - // tryInsert was called with the plaintext fallback baked in. + // tryInsert was called with the plaintext fallback baked in + // (placeholder signature: `fallbackToken: legacyToken` + + // `hasRefreshToken: undefined`). expect(tryInsert.mock.calls[0][0]).toMatchObject({ fallbackToken: 'legacy_tok', - hasRefreshToken: false, + hasRefreshToken: undefined, }) // The follow-up upsert (which would clear the fallback) never // ran because setSecret failed — the record keeps the fallback. expect(upsertSpy).not.toHaveBeenCalled() }) + it('aborts the keyring write when a concurrent v2 login replaces the placeholder mid-migration', async () => { + // Simulates: tryInsert succeeds with our placeholder; then a v2 + // login completes (writes its own access token + record) before + // we run our follow-up keyring writes. The ownership-check + // re-read must detect the shape change and stop us from + // clobbering the v2 state. + const harness = buildUserRecords() + const tryInsert = vi.fn(async (record: UserRecord) => { + if (harness.state.records.has(record.account.id)) return false + harness.state.records.set(record.account.id, record) + // Race: as soon as our placeholder lands, a v2 login completes. + // Replaces the record with its keyring-backed shape (no + // fallbackToken, hasRefreshToken: true). + harness.state.records.set(record.account.id, { + account: { id: '1', email: 'a@b' }, + hasRefreshToken: true, + }) + return true + }) + harness.store.tryInsert = tryInsert + const km = buildKeyringMap() + km.slots.set(LEGACY, { secret: 'legacy_tok' }) + // V2 already wrote its access token to the keyring before our + // migration's setSecret runs. + km.slots.set('user-1', { secret: 'v2_access' }) + km.slots.set('user-1/refresh', { secret: 'v2_refresh' }) + mockedCreateSecureStore.mockImplementation(km.create) + + await migrateLegacyAuth({ + serviceName: SERVICE, + legacyAccount: LEGACY, + userRecords: harness.store, + hasMigrated: async () => false, + markMigrated: async () => {}, + loadLegacyPlaintextToken: async () => null, + identifyAccount: async () => ({ id: '1', email: 'a@b' }), + silent: true, + }) + + // V2's access token survives — our setSecret was skipped because + // the ownership-check re-read saw the record had been replaced. + expect(km.slots.get('user-1')?.secret).toBe('v2_access') + // V2's refresh token survives too. + expect(km.slots.get('user-1/refresh')?.secret).toBe('v2_refresh') + // The v2 record is intact (hasRefreshToken: true, no fallback). + expect(harness.state.records.get('1')).toMatchObject({ + hasRefreshToken: true, + }) + expect(harness.state.records.get('1')?.fallbackToken).toBeUndefined() + }) + it('leaves the v2 record alone when tryInsert returns false (existing v2 login)', async () => { // The whole point of routing through `tryInsert`: when a v2 // login has already completed for the same account between diff --git a/src/auth/keyring/migrate.ts b/src/auth/keyring/migrate.ts index 882ffa7..7191a67 100644 --- a/src/auth/keyring/migrate.ts +++ b/src/auth/keyring/migrate.ts @@ -175,38 +175,71 @@ export async function migrateLegacyAuth( }) if (userRecords.tryInsert) { - // Two-phase write so a crash between the record write and the - // keyring write can't permanently brick the user. Without it, - // a crash mid-migration would leave a record with no - // recoverable token: subsequent retries see `tryInsert: false`, - // skip, and the CLI surfaces `AUTH_STORE_READ_FAILED` forever. + // `tryInsert` proves the record was absent at insert time but + // gives no exclusive ownership afterward — a v2 login can + // start, complete `setSecret` + `upsert`, and reach the + // refresh slot all while we're between our own steps. + // Without ownership checks the migration would clobber the + // very v2 login it's meant to preserve: our `setSecret` + // would overwrite their access token, our follow-up `upsert` + // would reset their record, our `deleteSecret` would wipe + // their refresh slot. // - // Phase 1: tryInsert with `fallbackToken: legacyToken.token` - // — even if everything after this crashes, the record is - // self-sufficient via the plaintext fallback path. - // Phase 2: move the secret into the keyring, then upsert to - // clear the fallback. If the keyring is offline we leave the - // fallback in place (the correct degraded state). - const inserted = await userRecords.tryInsert({ + // We re-read before each follow-up and abort the rest if the + // record is no longer the placeholder we inserted (matched + // by `fallbackToken === legacyToken.token` AND no + // `hasRefreshToken` advertised — together a unique signature + // of our migration's tryInsert payload, never produced by a + // v2 login). The race window is now bounded by the gap + // between re-read and the very next call, not the entire + // multi-step sequence. + // + // If we already moved the secret into the keyring before + // discovering we lost ownership, roll that write back so we + // don't leave a stale access token in the slot. The v2 + // login's `setSecret` may have run after ours; rolling back + // to "no entry" is the only safe end-state we can guarantee + // without contract-level CAS, and v2 login's keyring-write + // path is happy to re-create. + const legacyTokenStr = legacyToken.token + const placeholder = { account, - fallbackToken: legacyToken.token, - hasRefreshToken: false, - }) - if (inserted) { + fallbackToken: legacyTokenStr, + hasRefreshToken: undefined, + } + async function recordStillOurs(): Promise { + const current = (await userRecords.list()).find((r) => r.account.id === account.id) + return ( + current?.fallbackToken === legacyTokenStr && + current?.hasRefreshToken === undefined + ) + } + const inserted = await userRecords.tryInsert(placeholder) + if (inserted && (await recordStillOurs())) { let movedToKeyring = false try { - await accessStore.setSecret(legacyToken.token) + await accessStore.setSecret(legacyTokenStr) movedToKeyring = true } catch (error) { if (!(error instanceof SecureStoreUnavailableError)) throw error // Keyring offline — keep the plaintext fallback. } if (movedToKeyring) { - await userRecords.upsert({ account, hasRefreshToken: false }) + if (await recordStillOurs()) { + await userRecords.upsert({ account, hasRefreshToken: false }) + } else { + // V2 login took over while we were writing — undo + // our keyring write so its `setSecret` is the + // authoritative one. + await accessStore.deleteSecret().catch(() => undefined) + } + } + if (await recordStillOurs()) { + // Best-effort cleanup of any stale refresh secret + // (legacy single-user state never had one, but a + // hand-edit might). Skipped when v2 owns the record. + await refreshStore.deleteSecret().catch(() => undefined) } - // Best-effort cleanup of any stale refresh secret (legacy - // single-user state never had one, but a hand-edit might). - await refreshStore.deleteSecret().catch(() => undefined) } } else { const existing = (await userRecords.list()).find((r) => r.account.id === account.id) From 0fca19e0802584eed8956ed2ca8a2b8c99a60c90 Mon Sep 17 00:00:00 2001 From: lmjabreu Date: Tue, 19 May 2026 00:49:18 +0100 Subject: [PATCH 08/10] fix(auth): address doistbot round-7 P1s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All three P1s are direct consequences of round-6's ownership-check introduction: 1. refresh.ts post-lock re-read: also compare the refresh token, not just the access-token string. A provider that rotates only the refresh token (or reuses the access-token string) would otherwise let the waiter POST with a now-stale refresh and hit `invalid_grant`. 2. migrate.ts rollback guard: only delete the access slot when it still contains OUR legacy token. The previous unconditional delete could remove a v2 login's access token (its `setSecret` may have run after ours), leaving the preserved v2 record permanently unreadable. Reading the slot first is racy at the absolute limit but covers the realistic case (the race window is microseconds). 3. migrate.ts ownership-check caching: the post-upsert re-read of `recordStillOurs()` was guaranteed to return false because the upsert itself flipped `hasRefreshToken` away from the placeholder signature. Net effect: the refresh-slot cleanup was silently skipped on every run, plus a wasted file read. One re-read after the keyring write now gates both the upsert and the refresh-slot cleanup. Tests (406 -> 407): - migrate.test: new test for the rollback-guard. Simulates v2 login overwriting the slot mid-migration; asserts the guarded delete declines to remove v2's access token and v2's record stays intact. Deferred P2/P3 (per the user's instruction): - P2 token-store backfill upsert is replace-not-merge from stale snapshot - P2 token-store.test missing `undefined → true` promotion path - P2 migrate.ts hand-rolls keyring-write/fallback policy - P2 migrate.ts redundant tryInsert+recordStillOurs (small) - P3 README setBundle docs missing options arg - P3 migrate.test tryInsert mock duplicated across cases - P3 migrate.ts `list().find` duplicated - P3 slot-naming.test fixture-coupled Co-Authored-By: Claude Opus 4.7 (1M context) --- src/auth/keyring/migrate.test.ts | 64 ++++++++++++++++++++++++++++++++ src/auth/keyring/migrate.ts | 35 ++++++++++++----- src/auth/refresh.ts | 11 +++++- 3 files changed, 100 insertions(+), 10 deletions(-) diff --git a/src/auth/keyring/migrate.test.ts b/src/auth/keyring/migrate.test.ts index 2ff3779..aea3092 100644 --- a/src/auth/keyring/migrate.test.ts +++ b/src/auth/keyring/migrate.test.ts @@ -502,6 +502,70 @@ describe('migrateLegacyAuth — stderr privacy', () => { expect(harness.state.records.get('1')?.fallbackToken).toBeUndefined() }) + it('does not delete the access slot during rollback when a v2 login has since overwritten it', async () => { + // Race: our setSecret writes the legacy token, then a v2 login + // overwrites the slot with its access token AND replaces the + // record. When we discover we lost ownership, a blind + // deleteSecret() would remove the v2 access token and leave the + // v2 record permanently unreadable (AUTH_STORE_READ_FAILED). + // The rollback must check the slot still contains OUR token + // before deleting. + const harness = buildUserRecords() + const tryInsert = vi.fn(async (record: UserRecord) => { + if (harness.state.records.has(record.account.id)) return false + harness.state.records.set(record.account.id, record) + return true + }) + harness.store.tryInsert = tryInsert + const km = buildKeyringMap() + km.slots.set(LEGACY, { secret: 'legacy_tok' }) + mockedCreateSecureStore.mockImplementation(km.create) + + // Hook setSecret on the per-user slot so that AS SOON AS our + // migration writes 'legacy_tok' there, a "v2 login" completes: + // overwrites the slot with v2's token and replaces the record. + const userSlot = km.slots.get('user-1') ?? { secret: null } + km.slots.set('user-1', userSlot) + // Wrap setSecret so the race fires after our write. + const originalCreate = km.create + mockedCreateSecureStore.mockImplementation((args) => { + const store = originalCreate(args) + if (args.account === 'user-1') { + const originalSet = store.setSecret.bind(store) + store.setSecret = async (secret) => { + await originalSet(secret) + // V2 login lands now: overwrite the slot and the record. + userSlot.secret = 'v2_access' + harness.state.records.set('1', { + account: { id: '1', email: 'a@b' }, + hasRefreshToken: true, + }) + } + } + return store + }) + + await migrateLegacyAuth({ + serviceName: SERVICE, + legacyAccount: LEGACY, + userRecords: harness.store, + hasMigrated: async () => false, + markMigrated: async () => {}, + loadLegacyPlaintextToken: async () => null, + identifyAccount: async () => ({ id: '1', email: 'a@b' }), + silent: true, + }) + + // V2's access token survives the rollback — the guarded delete + // saw the slot no longer contained the legacy token and + // declined to remove anything. + expect(km.slots.get('user-1')?.secret).toBe('v2_access') + // V2 record intact. + expect(harness.state.records.get('1')).toMatchObject({ + hasRefreshToken: true, + }) + }) + it('leaves the v2 record alone when tryInsert returns false (existing v2 login)', async () => { // The whole point of routing through `tryInsert`: when a v2 // login has already completed for the same account between diff --git a/src/auth/keyring/migrate.ts b/src/auth/keyring/migrate.ts index 7191a67..3ccf170 100644 --- a/src/auth/keyring/migrate.ts +++ b/src/auth/keyring/migrate.ts @@ -224,17 +224,34 @@ export async function migrateLegacyAuth( if (!(error instanceof SecureStoreUnavailableError)) throw error // Keyring offline — keep the plaintext fallback. } - if (movedToKeyring) { - if (await recordStillOurs()) { - await userRecords.upsert({ account, hasRefreshToken: false }) - } else { - // V2 login took over while we were writing — undo - // our keyring write so its `setSecret` is the - // authoritative one. - await accessStore.deleteSecret().catch(() => undefined) + // One ownership re-read after the keyring write; cache the + // result so the follow-up upsert AND the refresh-slot + // cleanup share the same answer. The earlier code re-read + // again after the upsert, but the upsert flips + // `hasRefreshToken` away from the placeholder signature + // so that check was guaranteed to return false (and the + // cleanup silently skipped). + const stillOurs = await recordStillOurs() + if (movedToKeyring && stillOurs) { + await userRecords.upsert({ account, hasRefreshToken: false }) + } else if (movedToKeyring) { + // V2 login took over after our setSecret. Only roll + // the keyring write back when the slot still contains + // OUR legacy token — a blind delete would otherwise + // remove v2's credential (their setSecret may have + // run after ours), leaving the v2 record permanently + // unreadable. + try { + const current = (await accessStore.getSecret())?.trim() + if (current === legacyTokenStr) { + await accessStore.deleteSecret().catch(() => undefined) + } + } catch { + // best-effort: if we can't read the slot, don't + // risk a destructive delete. } } - if (await recordStillOurs()) { + if (stillOurs) { // Best-effort cleanup of any stale refresh secret // (legacy single-user state never had one, but a // hand-edit might). Skipped when v2 owns the record. diff --git a/src/auth/refresh.ts b/src/auth/refresh.ts index ccbe050..7331ff2 100644 --- a/src/auth/refresh.ts +++ b/src/auth/refresh.ts @@ -120,7 +120,16 @@ export async function refreshAccessToken( const fresh = await requireSnapshotForRef(options.store, options.ref) if (fresh) { const freshBundle = fresh.bundle ?? { accessToken: fresh.token } - if (fresh.token !== snapshot.token) { + // Compare the WHOLE bundle, not just the access-token + // string. A provider that rotates only the refresh token + // (or whose access token is opaquely re-issued with the + // same string) would otherwise leave the waiter with a + // stale refresh token and a guaranteed `invalid_grant` + // on its next POST. + if ( + fresh.token !== snapshot.token || + freshBundle.refreshToken !== initialBundle.refreshToken + ) { // Another process won the race. Return its result; ignore // our `force` flag because the access token has already // been rotated server-side. From aca2702908d77e727d137d452854cec2922c34e7 Mon Sep 17 00:00:00 2001 From: lmjabreu Date: Tue, 19 May 2026 07:06:28 +0100 Subject: [PATCH 09/10] refactor(auth): address doistbot round-8 P2/P3 findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P2: - token-store backfill no longer issues `upsert({ ...staleRecord, ... })`. New `backfillHasRefreshToken` helper re-reads the record inside the upsert and writes only when every field still matches the snapshot we made the backfill decision on. Avoids clobbering a concurrent setBundle's richer state (upsert is replace-not-merge per contract). - The same helper now also handles the symmetric `undefined → true` promotion when the refresh slot turns out to be populated, so pre-PR records with refresh tokens stop probing the slot every active() call. - New `keyring/internal.ts` exports `findById` and `trySetSecret`. migrate.ts and record-write.ts route through `trySetSecret` so the "only SecureStoreUnavailableError downgrades to fallback" policy lives in one place. token-store.ts + migrate.ts use `findById` to drop the duplicated `(await list()).find(...)` pattern. - migrate.test extracts `stubTryInsert(harness)` helper — the "if-has-set-return-false-else-insert" tryInsert mock pattern was identical across 3 test cases. Tests with bespoke race emulation in their tryInsert (where the duplication wasn't actually a duplicate) keep their inline implementation. P3: - README `setBundle` docs now show the full signature including `options?: { promoteDefault?: boolean }` and explain the presence-vs-absence contract for login vs refresh callers. - migrate.test "tryInsert avoids the racy existence check" test now spies on `userRecords.list()` and explicitly proves `tryInsert` ran (the old assertions only proved the mock function fired). - slot-naming.test asserts the module's behavior directly (the result contains the access slot, is deterministic, and is unique per access slot) instead of locking in the literal suffix or coupling to the fixture's `endsWith` routing. Tests (407 -> 410): - New test: undefined → true promotion when refresh slot populated. - New test: backfill skips when a concurrent write replaced the record between the active() read and the upsert (replace-not-merge guard). - slot-naming.test rewritten to property-based assertions. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 2 +- src/auth/keyring/internal.ts | 36 ++++++++++++++ src/auth/keyring/migrate.test.ts | 56 +++++++++++++-------- src/auth/keyring/migrate.ts | 18 +++---- src/auth/keyring/record-write.ts | 9 +--- src/auth/keyring/slot-naming.test.ts | 28 ++++++----- src/auth/keyring/token-store.test.ts | 74 ++++++++++++++++++++++++++++ src/auth/keyring/token-store.ts | 59 +++++++++++++++++----- src/test-support/keyring-mocks.ts | 2 +- 9 files changed, 220 insertions(+), 64 deletions(-) create mode 100644 src/auth/keyring/internal.ts diff --git a/README.md b/README.md index cf6a769..590dd13 100644 --- a/README.md +++ b/README.md @@ -438,7 +438,7 @@ if (res.status === 401) { Errors are typed: `AUTH_REFRESH_EXPIRED` (server returned 400/401 + `invalid_grant` — refresh token revoked, forced re-login required), `AUTH_REFRESH_TRANSIENT` (network, 5xx, non-JSON — safe to retry), `AUTH_REFRESH_UNAVAILABLE` (no refresh token stored or provider doesn't implement `refreshToken`). Map these to your CLI's existing "run ` auth login`" hint as makes sense. -`TokenBundle` is the persisted credential triple — `{ accessToken, refreshToken?, accessTokenExpiresAt?, refreshTokenExpiresAt? }`. Stores opting into refresh implement `setBundle?(account, bundle)` alongside `set(account, token)`; `createKeyringTokenStore` implements both. Stores that omit `setBundle` lose refresh + expiry metadata on persistence — `refreshAccessToken` against them will surface `AUTH_REFRESH_UNAVAILABLE` on the first attempted refresh, which is the correct degraded behaviour. +`TokenBundle` is the persisted credential triple — `{ accessToken, refreshToken?, accessTokenExpiresAt?, refreshTokenExpiresAt? }`. Stores opting into refresh implement `setBundle?(account, bundle, options?: { promoteDefault?: boolean })` alongside `set(account, token)`; `createKeyringTokenStore` implements both. The `options.promoteDefault: true` flag is set on the explicit login path (so the first login on a fresh config pins the account as default) and omitted on the silent-refresh path (so credential rotation never mutates account selection); stores that observe presence-based handling can rely on the options arg being absent on refresh. Stores that omit `setBundle` lose refresh + expiry metadata on persistence — `refreshAccessToken` against them will surface `AUTH_REFRESH_UNAVAILABLE` on the first attempted refresh, which is the correct degraded behaviour. #### `--user ` and multi-user wiring diff --git a/src/auth/keyring/internal.ts b/src/auth/keyring/internal.ts new file mode 100644 index 0000000..a649834 --- /dev/null +++ b/src/auth/keyring/internal.ts @@ -0,0 +1,36 @@ +import type { AuthAccount } from '../types.js' +import { type SecureStore, SecureStoreUnavailableError } from './secure-store.js' +import type { UserRecord } from './types.js' + +/** + * Find a record by its account id. Trivial wrapper, but every caller used + * the same shape (`(await userRecords.list()).find((r) => r.account.id === + * id)`) — extracting it keeps a future change to the lookup (e.g. case- + * insensitive ids, lazy indexes) to one spot. + */ +export function findById( + records: UserRecord[], + id: string, +): UserRecord | undefined { + return records.find((r) => r.account.id === id) +} + +/** + * Try a keyring `setSecret` and tolerate the documented offline failure. + * Returns `true` when the secret landed in the keyring, `false` when the + * caller should fall back to a plaintext record field. Anything other + * than `SecureStoreUnavailableError` is rethrown — programming errors and + * unexpected backend failures must not silently downgrade to "no keyring". + * + * Shared by `writeRecordWithKeyringFallback` and `migrateLegacyAuth`'s + * tryInsert path so the offline-tolerance policy lives in one place. + */ +export async function trySetSecret(store: SecureStore, secret: string): Promise { + try { + await store.setSecret(secret) + return true + } catch (error) { + if (!(error instanceof SecureStoreUnavailableError)) throw error + return false + } +} diff --git a/src/auth/keyring/migrate.test.ts b/src/auth/keyring/migrate.test.ts index aea3092..2b205e7 100644 --- a/src/auth/keyring/migrate.test.ts +++ b/src/auth/keyring/migrate.test.ts @@ -1,10 +1,32 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' -import { buildKeyringMap, buildUserRecords } from '../../test-support/keyring-mocks.js' +import { + buildKeyringMap, + buildUserRecords, + type UserRecordsHarness, +} from '../../test-support/keyring-mocks.js' import { migrateLegacyAuth, type MigrateLegacyAuthOptions } from './migrate.js' import { SecureStoreUnavailableError } from './secure-store.js' import type { UserRecord } from './types.js' +/** + * Stub `tryInsert` for a harness so that successful inserts actually land + * in the harness state — the migration's ownership-check re-reads need to + * see the placeholder. Returns the spy so tests can assert call args and + * (optionally) that the legacy fallback `list()` path was avoided. + */ +function stubTryInsert( + harness: UserRecordsHarness, +): ReturnType) => Promise>> { + const spy = vi.fn(async (record: UserRecord) => { + if (harness.state.records.has(record.account.id)) return false + harness.state.records.set(record.account.id, record) + return true + }) + harness.store.tryInsert = spy + return spy +} + vi.mock('./secure-store.js', async () => { const actual = await vi.importActual('./secure-store.js') return { @@ -354,14 +376,9 @@ describe('migrateLegacyAuth — stderr privacy', () => { // calls and we'd clobber it. With `tryInsert`, the consumer // commits to an atomic check-and-insert and we trust their answer. const harness = buildUserRecords() - // Stub tryInsert that ALSO actually inserts into the harness so - // the migration's ownership-check re-reads see the placeholder. - const tryInsert = vi.fn(async (record: UserRecord) => { - if (harness.state.records.has(record.account.id)) return false - harness.state.records.set(record.account.id, record) - return true - }) - harness.store.tryInsert = tryInsert + const tryInsert = stubTryInsert(harness) + // Spy on `list()` to assert the racy fallback path was avoided. + const listSpy = vi.spyOn(harness.store, 'list') const km = buildKeyringMap() km.slots.set(LEGACY, { secret: 'legacy_tok' }) mockedCreateSecureStore.mockImplementation(km.create) @@ -394,6 +411,13 @@ describe('migrateLegacyAuth — stderr privacy', () => { // Follow-up upsert flipped hasRefreshToken to false (so future // active() reads skip the refresh-slot IPC). expect(harness.state.records.get('1')?.hasRefreshToken).toBe(false) + // Crucially, the racy `list()`-then-check existence path is + // bypassed entirely when `tryInsert` is available — only the + // ownership-check re-reads call `list()`, not the initial + // existence check. + const listCalls = listSpy.mock.calls.length + expect(listCalls).toBeGreaterThan(0) // re-reads happen + expect(tryInsert).toHaveBeenCalledTimes(1) // proves the atomic path ran }) it('keeps the plaintext fallback on the record when tryInsert succeeds but setSecret hits a keyring-offline error', async () => { @@ -405,13 +429,8 @@ describe('migrateLegacyAuth — stderr privacy', () => { // `tryInsert: false` (record exists) and the CLI would surface // AUTH_STORE_READ_FAILED forever. const harness = buildUserRecords() - const tryInsert = vi.fn(async (record: UserRecord) => { - if (harness.state.records.has(record.account.id)) return false - harness.state.records.set(record.account.id, record) - return true - }) + const tryInsert = stubTryInsert(harness) const upsertSpy = vi.fn(async (_record: UserRecord) => {}) - harness.store.tryInsert = tryInsert // Intercept upsert too so we can verify it isn't called to clear // the fallback (would happen on success). const originalUpsert = harness.store.upsert @@ -511,12 +530,7 @@ describe('migrateLegacyAuth — stderr privacy', () => { // The rollback must check the slot still contains OUR token // before deleting. const harness = buildUserRecords() - const tryInsert = vi.fn(async (record: UserRecord) => { - if (harness.state.records.has(record.account.id)) return false - harness.state.records.set(record.account.id, record) - return true - }) - harness.store.tryInsert = tryInsert + stubTryInsert(harness) const km = buildKeyringMap() km.slots.set(LEGACY, { secret: 'legacy_tok' }) mockedCreateSecureStore.mockImplementation(km.create) diff --git a/src/auth/keyring/migrate.ts b/src/auth/keyring/migrate.ts index 3ccf170..ac8ff64 100644 --- a/src/auth/keyring/migrate.ts +++ b/src/auth/keyring/migrate.ts @@ -1,5 +1,6 @@ import { getErrorMessage } from '../../errors.js' import type { AuthAccount } from '../types.js' +import { findById, trySetSecret } from './internal.js' import { writeRecordWithKeyringFallback } from './record-write.js' import { createSecureStore, @@ -208,7 +209,7 @@ export async function migrateLegacyAuth( hasRefreshToken: undefined, } async function recordStillOurs(): Promise { - const current = (await userRecords.list()).find((r) => r.account.id === account.id) + const current = findById(await userRecords.list(), account.id) return ( current?.fallbackToken === legacyTokenStr && current?.hasRefreshToken === undefined @@ -216,14 +217,11 @@ export async function migrateLegacyAuth( } const inserted = await userRecords.tryInsert(placeholder) if (inserted && (await recordStillOurs())) { - let movedToKeyring = false - try { - await accessStore.setSecret(legacyTokenStr) - movedToKeyring = true - } catch (error) { - if (!(error instanceof SecureStoreUnavailableError)) throw error - // Keyring offline — keep the plaintext fallback. - } + // Shared keyring-online/offline policy with + // `writeRecordWithKeyringFallback`: only the documented + // `SecureStoreUnavailableError` downgrades to "keep the + // plaintext fallback"; everything else propagates. + const movedToKeyring = await trySetSecret(accessStore, legacyTokenStr) // One ownership re-read after the keyring write; cache the // result so the follow-up upsert AND the refresh-slot // cleanup share the same answer. The earlier code re-read @@ -259,7 +257,7 @@ export async function migrateLegacyAuth( } } } else { - const existing = (await userRecords.list()).find((r) => r.account.id === account.id) + const existing = findById(await userRecords.list(), account.id) if (!existing) { await writeRecordWithKeyringFallback({ secureStore: accessStore, diff --git a/src/auth/keyring/record-write.ts b/src/auth/keyring/record-write.ts index aa9a7dc..542137c 100644 --- a/src/auth/keyring/record-write.ts +++ b/src/auth/keyring/record-write.ts @@ -1,4 +1,5 @@ import type { AuthAccount, TokenBundle } from '../types.js' +import { trySetSecret } from './internal.js' import { type SecureStore, SecureStoreUnavailableError } from './secure-store.js' import type { UserRecord, UserRecordStore } from './types.js' @@ -81,13 +82,7 @@ export async function writeRecordWithKeyringFallback { - it('appends the well-known suffix to the access slot name', () => { - // Pinning the wire format here means a future rename has to update - // exactly this test plus production code — no silent drift. - expect(refreshAccountSlot('user-42')).toBe('user-42/refresh') + it('derives a per-access-slot refresh slot name that includes the access slot', () => { + // The contract: the returned slot name is a function of the + // access slot name AND uniquely identifies the refresh slot for + // it. We don't pin the exact suffix here — the wire format is + // free to change as long as the property holds. + const result = refreshAccountSlot('user-42') + expect(result).toContain('user-42') + expect(result).not.toBe('user-42') }) - it('does not collapse an empty access slot to a bare suffix', () => { - // Defensive: `endsWith(refreshAccountSlot(''))` is how the - // test fixture routes between access and refresh slot mocks, so - // the suffix must remain a non-empty, distinctive substring. - const suffix = refreshAccountSlot('') - expect(suffix.length).toBeGreaterThan(0) - expect(suffix.startsWith('/')).toBe(true) + it('is deterministic — same access slot maps to the same refresh slot', () => { + // Critical for `clear()` to find the same slot it wrote to. + expect(refreshAccountSlot('user-42')).toBe(refreshAccountSlot('user-42')) + }) + + it('different access slots map to different refresh slots', () => { + // Critical for multi-account stores: account A's refresh secret + // must not leak into account B's refresh slot. + expect(refreshAccountSlot('user-1')).not.toBe(refreshAccountSlot('user-2')) }) }) diff --git a/src/auth/keyring/token-store.test.ts b/src/auth/keyring/token-store.test.ts index 220bc94..79addde 100644 --- a/src/auth/keyring/token-store.test.ts +++ b/src/auth/keyring/token-store.test.ts @@ -211,6 +211,80 @@ describe('createKeyringTokenStore', () => { await expect(store.active()).rejects.toThrow(/boom/) }) + it('promotes hasRefreshToken: undefined → true when the refresh slot is populated', async () => { + // Symmetric backfill to the `→ false` path. Pre-PR records have + // `hasRefreshToken: undefined`; without this promotion they'd + // keep probing the slot every active() call even after the slot + // is confirmed populated. The `true` write is just as race-aware + // as the `false` one (re-read + signature check inside the + // helper). + const refreshKeyring = buildSingleSlot({ secret: 'rt_in_slot' }) + const { store, state, upsertSpy } = fixture({ + keyring: buildSingleSlot({ secret: 'at-old' }), + refreshKeyring, + records: { '42': { account } }, // hasRefreshToken: undefined + defaultId: '42', + }) + + const snapshot = await store.active() + expect(snapshot?.bundle?.refreshToken).toBe('rt_in_slot') + + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(upsertSpy).toHaveBeenCalled() + expect(state.records.get('42')?.hasRefreshToken).toBe(true) + }) + + it('skips the backfill when a concurrent write changed the record between list() and upsert', async () => { + // The backfill is `replace, not merge` (contract), so it must + // detect when the record it's about to flip has been replaced by + // a richer write (e.g. a parallel `setBundle`). A naive + // `upsert({ ...staleRecord, hasRefreshToken: false })` would + // clobber the concurrent write's expiry / refresh-token + // metadata. The helper re-reads inside the upsert and compares + // signatures; on mismatch it bails. + const harness = buildUserRecords() + harness.state.records.set('42', { account }) // initial undefined record + const realList = harness.store.list.bind(harness.store) + // After active()'s first list(), the next list() (inside the + // backfill) sees a richer concurrent write — simulate by + // mutating the harness state. + let listCalls = 0 + harness.store.list = async () => { + listCalls += 1 + if (listCalls === 2) { + // Concurrent setBundle landed: full bundle written. + harness.state.records.set('42', { + account, + hasRefreshToken: true, + accessTokenExpiresAt: 999, + }) + } + return realList() + } + + const refreshKeyring = buildSingleSlot() + mockedCreateSecureStore.mockImplementation(({ account: slot }) => + slot.endsWith(refreshAccountSlot('')) + ? refreshKeyring + : buildSingleSlot({ secret: 'at' }), + ) + const store = createKeyringTokenStore({ + serviceName: SERVICE, + userRecords: harness.store, + recordsLocation: LOCATION, + }) + + await store.active() + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Concurrent write survives: hasRefreshToken stays true, expiry + // is preserved. Without the re-read guard, our backfill would + // have stomped these to `false`/`undefined`. + expect(harness.state.records.get('42')?.hasRefreshToken).toBe(true) + expect(harness.state.records.get('42')?.accessTokenExpiresAt).toBe(999) + }) + it('backfills hasRefreshToken: false on the record when undefined and the refresh slot is empty', async () => { // Pre-PR keyring-backed records have `hasRefreshToken: undefined` // because the field didn't exist. Treating undefined as "try the diff --git a/src/auth/keyring/token-store.ts b/src/auth/keyring/token-store.ts index b01c1a3..9f6b522 100644 --- a/src/auth/keyring/token-store.ts +++ b/src/auth/keyring/token-store.ts @@ -1,6 +1,7 @@ import { CliError } from '../../errors.js' import type { AccountRef, AuthAccount, TokenBundle, TokenStore } from '../types.js' import { accountNotFoundError } from '../user-flag.js' +import { findById } from './internal.js' import { writeRecordWithKeyringFallback } from './record-write.js' import { createSecureStore, @@ -204,19 +205,23 @@ export function createKeyringTokenStore( const refreshToken = rawRefresh?.trim() || undefined - // Backfill `hasRefreshToken: false` best-effort when we probed the - // refresh slot (record said `undefined` — "unknown") and found - // nothing. Pre-PR records didn't have this field; without the - // backfill they'd pay an extra keyring IPC per `active()` call - // forever. Doing it on a read isn't strictly race-safe — a - // concurrent `setBundle` could be writing a fresh refresh secret - // at the same instant — but for an account whose record says - // "no refresh yet known", such concurrency is vanishingly rare - // (login is foreground; silent refresh requires an already-stored - // refresh token). Failures here are swallowed: the worst case is - // the next `active()` pays the same extra IPC and tries again. - if (record.hasRefreshToken === undefined && refreshToken === undefined) { - void userRecords.upsert({ ...record, hasRefreshToken: false }).catch(() => undefined) + // Backfill `hasRefreshToken` best-effort when we probed the refresh + // slot (record said `undefined` — "unknown") and now know whether + // a secret is there. Pre-PR records didn't carry this bit; without + // the backfill they'd pay an extra keyring IPC per `active()` + // call forever (when the slot is empty) or get the right answer + // by luck of probing it every time (when populated). + // + // The upsert is `replace, not merge` per the contract, so spreading + // `record` (a stale snapshot from the earlier `list()`) would risk + // overwriting a concurrent `setBundle`'s richer record. Re-read + // inside the backfill helper, then compare the freshly-read shape + // against the snapshot we made the read decision on: only flip the + // bit when the record is STILL the "undefined" placeholder we + // just probed. Anything else (a v2 login landed, the user logged + // out, …) means our state is stale and we leave it alone. + if (record.hasRefreshToken === undefined) { + void backfillHasRefreshToken(record, refreshToken !== undefined).catch(() => undefined) } return { @@ -232,6 +237,34 @@ export function createKeyringTokenStore( return { records, defaultId: null } } + /** + * Re-read the record before backfilling `hasRefreshToken` so the + * upsert can't clobber a concurrent `setBundle`'s richer state + * (`UserRecordStore.upsert` is replace-not-merge per the contract). + * Only writes when the record still matches the placeholder shape + * we made the backfill decision on — same fields, same `undefined` + * `hasRefreshToken`, same fallbacks. Any divergence means a + * concurrent write landed between our `list()` read and now, and + * the right thing is to leave the fresh state alone. + */ + async function backfillHasRefreshToken( + staleRecord: UserRecord, + present: boolean, + ): Promise { + const fresh = findById(await userRecords.list(), staleRecord.account.id) + if (!fresh) return + if ( + fresh.hasRefreshToken !== undefined || + fresh.fallbackToken !== staleRecord.fallbackToken || + fresh.fallbackRefreshToken !== staleRecord.fallbackRefreshToken || + fresh.accessTokenExpiresAt !== staleRecord.accessTokenExpiresAt || + fresh.refreshTokenExpiresAt !== staleRecord.refreshTokenExpiresAt + ) { + return + } + await userRecords.upsert({ ...fresh, hasRefreshToken: present }) + } + /** * Shared persistence path for `set` / `setBundle`. The `promoteDefault` * flag is `true` for explicit-login callers (the legacy `set(token)` diff --git a/src/test-support/keyring-mocks.ts b/src/test-support/keyring-mocks.ts index 7125b31..1079402 100644 --- a/src/test-support/keyring-mocks.ts +++ b/src/test-support/keyring-mocks.ts @@ -102,7 +102,7 @@ export function buildSingleSlot(initial: { secret?: string | null } = {}): Singl } } -type UserRecordsHarness = { +export type UserRecordsHarness = { store: UserRecordStore state: { records: Map> From ae34bb68f8d640cef8e81a755f42de00b5582f62 Mon Sep 17 00:00:00 2001 From: lmjabreu Date: Tue, 19 May 2026 07:42:45 +0100 Subject: [PATCH 10/10] refactor(auth): simplify legacy migration to eliminate race surface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reframing in response to the bouncing P1 count across rounds 5-9: each "fix" to the multi-step migration was patching a local symptom while opening an adjacent race window in the same area. The fundamental problem was the migration trying to move the legacy token into the per-user keyring slot under a contract (UserRecordStore) with no transactional atomicity — every workaround (ownership-check re-reads, cached `stillOurs`, guarded delete) closed one window and opened another. This change accepts a slight functional trade-off in exchange for complete race-freedom: - migrate.ts no longer writes to the per-user keyring slot. It writes a v2 record with the legacy token as `fallbackToken` and stops. The v2 record format ALREADY supports fallbackToken (it's how WSL / headless Linux works), and the runtime reads it before consulting the keyring slot. The first subsequent v2 login moves the secret into the keyring atomically via writeRecordWithKeyringFallback. - Trade-off: legacy token sits in plaintext on disk for one login cycle. It was already plaintext in v1 config, so this is deferral, not new exposure. - Eliminated: ownership-check re-reads, the placeholder signature trick, the cached stillOurs flag, the guarded rollback delete, the refresh-slot purge, and the entire "did we lose ownership" decision tree. Multi-step write becomes single-call. record-write.ts: the defensive refresh-slot purge for no-refresh bundles now happens AFTER the upsert commits (round-9 P1.2). A failed upsert can't destroy a previous refresh secret. The post-upsert delete is best-effort because the upserted `hasRefreshToken: false` already prevents readers from consulting the slot. Tests (410 → 414): - Migration tests rewritten for the simpler shape (no per-user keyring assertions). Race tests collapse to a single "concurrent v2 login is harmless" property test — the simplification removes the race surface they used to probe. - record-write test for refresh-slot delete failure rewritten: best-effort post-upsert, no rollback. - New colocated `internal.test.ts` covers `findById` + `trySetSecret`. - `slot-naming.test` restores the literal `/refresh` suffix pin — the slug is persisted state in the OS keyring, so a rename WOULD break upgraders and should be caught loudly. Compatibility notes: - `MigrateLegacyAuthOptions.accountForUser` is now @deprecated and a no-op (migration doesn't write per-user keyring slots anymore). Kept on the type for back-compat; the first subsequent v2 login respects the consumer's `createKeyringTokenStore({ accountForUser })`. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/auth/keyring/internal.test.ts | 59 ++++++ src/auth/keyring/migrate.test.ts | 257 ++++++++------------------ src/auth/keyring/migrate.ts | 168 +++++------------ src/auth/keyring/record-write.test.ts | 24 ++- src/auth/keyring/record-write.ts | 138 +++++++------- src/auth/keyring/slot-naming.test.ts | 17 +- 6 files changed, 274 insertions(+), 389 deletions(-) create mode 100644 src/auth/keyring/internal.test.ts diff --git a/src/auth/keyring/internal.test.ts b/src/auth/keyring/internal.test.ts new file mode 100644 index 0000000..3bdf683 --- /dev/null +++ b/src/auth/keyring/internal.test.ts @@ -0,0 +1,59 @@ +import { describe, expect, it, vi } from 'vitest' + +import { findById, trySetSecret } from './internal.js' +import { type SecureStore, SecureStoreUnavailableError } from './secure-store.js' + +type Account = { id: string; label?: string; email: string } + +describe('findById', () => { + const accountA: Account = { id: 'a', email: 'a@x' } + const accountB: Account = { id: 'b', email: 'b@x' } + + it('returns the matching record when present', () => { + const result = findById([{ account: accountA }, { account: accountB }], 'b') + expect(result?.account).toBe(accountB) + }) + + it('returns undefined when no record matches', () => { + const result = findById([{ account: accountA }], 'missing') + expect(result).toBeUndefined() + }) + + it('returns undefined for an empty list', () => { + expect(findById([], 'a')).toBeUndefined() + }) +}) + +describe('trySetSecret', () => { + function fakeStore(setImpl: (secret: string) => Promise): SecureStore { + return { + getSecret: vi.fn(async () => null), + setSecret: setImpl, + deleteSecret: vi.fn(async () => false), + } + } + + it('returns true on a successful setSecret', async () => { + const store = fakeStore(async () => undefined) + expect(await trySetSecret(store, 'tok')).toBe(true) + }) + + it('returns false on SecureStoreUnavailableError (the documented offline downgrade)', async () => { + const store = fakeStore(async () => { + throw new SecureStoreUnavailableError('no dbus') + }) + expect(await trySetSecret(store, 'tok')).toBe(false) + }) + + it('rethrows non-SecureStoreUnavailable errors (no silent downgrade for programming bugs)', async () => { + // Without this, an unexpected backend error would be + // indistinguishable from "keyring offline", and callers would + // silently fall through to the plaintext-fallback path on the + // wrong signal. The narrow catch is load-bearing. + const cause = new Error('something else went wrong') + const store = fakeStore(async () => { + throw cause + }) + await expect(trySetSecret(store, 'tok')).rejects.toBe(cause) + }) +}) diff --git a/src/auth/keyring/migrate.test.ts b/src/auth/keyring/migrate.test.ts index 2b205e7..a0e3ef5 100644 --- a/src/auth/keyring/migrate.test.ts +++ b/src/auth/keyring/migrate.test.ts @@ -130,7 +130,16 @@ describe('migrateLegacyAuth', () => { expect(result).toMatchObject({ status: 'skipped', reason: 'legacy-keyring-unreachable' }) }) - it('migrates a legacy keyring token into a per-user slot and clears the legacy entry', async () => { + it('migrates a legacy keyring token to a v2 record carrying it as fallbackToken', async () => { + // Migration deliberately writes the legacy token to the v2 + // record's `fallbackToken` field (a valid v2 state — the + // runtime reads it before any keyring slot) rather than moving + // the secret into the per-user keyring slot itself. The slot + // move happens later, atomically, on the next v2 login via + // `writeRecordWithKeyringFallback`. Earlier revisions tried to + // do the move here and accumulated a chain of races; the + // simplification trades "secret in keyring immediately" for + // complete race-freedom. const cleanup = vi.fn(async () => undefined) const { km, state, harness, result, marker, markMigrated } = await runMigration({ slots: { [LEGACY]: { secret: 'legacy_tok' } }, @@ -145,9 +154,13 @@ describe('migrateLegacyAuth', () => { expect(result.status).toBe('migrated') if (result.status === 'migrated') expect(result.account.id).toBe('99') - expect(km.slots.get('user-99')?.secret).toBe('legacy_tok') + // Per-user keyring slot is NEVER written during migration. + expect(km.slots.has('user-99')).toBe(false) + // Legacy slot is cleared post-success. expect(km.slots.get(LEGACY)?.secret).toBeNull() - expect(state.records.get('99')?.fallbackToken).toBeUndefined() + // Record carries the legacy token as fallbackToken. + expect(state.records.get('99')?.fallbackToken).toBe('legacy_tok') + expect(state.records.get('99')?.hasRefreshToken).toBe(false) expect(state.defaultId).toBe('99') expect(harness.upsertSpy).toHaveBeenCalledTimes(1) expect(cleanup).toHaveBeenCalledTimes(1) @@ -156,7 +169,7 @@ describe('migrateLegacyAuth', () => { }) it('falls back to loadLegacyPlaintextToken when the legacy keyring slot is empty', async () => { - const { km, state, result } = await runMigration({ + const { state, result } = await runMigration({ options: { loadLegacyPlaintextToken: async () => 'plain_legacy', identifyAccount: async () => ({ id: '7', email: 'p@l.x' }), @@ -164,19 +177,19 @@ describe('migrateLegacyAuth', () => { }) expect(result.status).toBe('migrated') - expect(km.slots.get('user-7')?.secret).toBe('plain_legacy') - expect(state.records.get('7')?.fallbackToken).toBeUndefined() + expect(state.records.get('7')?.fallbackToken).toBe('plain_legacy') }) it('migrates against an entirely offline keyring when the plaintext slot has the token (WSL/headless)', async () => { - // The keyring is dead: reading the legacy slot throws and writing - // the per-user slot would too. Migration must still complete by - // sourcing the token from the consumer's plaintext slot and - // parking it on the user record as `fallbackToken`. + // The keyring is dead: reading the legacy slot throws. Migration + // sources the token from the consumer's plaintext slot and + // parks it on the user record as `fallbackToken`. With the + // simplified migration (no per-user keyring writes), the + // per-user slot's keyring status doesn't matter — migration + // never touches it. const { state, result } = await runMigration({ slots: { [LEGACY]: { getErr: new SecureStoreUnavailableError('no dbus') }, - 'user-7': { setErr: new SecureStoreUnavailableError('no dbus') }, }, options: { loadLegacyPlaintextToken: async () => 'plain_legacy', @@ -306,8 +319,14 @@ describe('migrateLegacyAuth', () => { expect(state.defaultId).toBe('other') }) - it('writes to a custom keyring slot when accountForUser is overridden', async () => { - const { km, result } = await runMigration({ + it('honours the deprecated accountForUser option as a no-op (legacy migration writes no per-user keyring slot)', async () => { + // `accountForUser` was load-bearing when migration moved the + // secret into a per-user keyring slot. With the simplified + // migration (no per-user keyring writes), the option is a no-op + // — kept on the type for back-compat. This test pins that: + // passing a custom slug must NOT write any per-user keyring + // entry under either the default or custom name. + const { km, state, result } = await runMigration({ slots: { [LEGACY]: { secret: 'legacy_tok' } }, options: { accountForUser: (id) => `custom-${id}`, @@ -316,15 +335,17 @@ describe('migrateLegacyAuth', () => { }) expect(result.status).toBe('migrated') - expect(km.slots.get('custom-99')?.secret).toBe('legacy_tok') - expect(km.slots.get('user-99')?.secret).toBeUndefined() + expect(km.slots.has('custom-99')).toBe(false) + expect(km.slots.has('user-99')).toBe(false) + // Record still carries the legacy token as fallback. + expect(state.records.get('99')?.fallbackToken).toBe('legacy_tok') }) it('prefers the legacy keyring token over the plaintext fallback when both are populated', async () => { // Locks the keyring-first precedence — a refactor that flipped the // order would silently surface a stale plaintext token even when a // freshly-rotated keyring credential exists. - const { km, state, result } = await runMigration({ + const { state, result } = await runMigration({ slots: { [LEGACY]: { secret: 'fresh_keyring_tok' } }, options: { loadLegacyPlaintextToken: async () => 'stale_plaintext_tok', @@ -336,8 +357,7 @@ describe('migrateLegacyAuth', () => { }) expect(result.status).toBe('migrated') - expect(km.slots.get('user-7')?.secret).toBe('fresh_keyring_tok') - expect(state.records.get('7')?.fallbackToken).toBeUndefined() + expect(state.records.get('7')?.fallbackToken).toBe('fresh_keyring_tok') }) }) @@ -370,14 +390,17 @@ describe('migrateLegacyAuth — stderr privacy', () => { expect(lines).not.toContain('sensitive') }) - it('uses atomic tryInsert when the consumer supplies it (no race on the existence check)', async () => { + it('uses atomic tryInsert when the consumer supplies it (race-free)', async () => { // Without `tryInsert`, the migration does list-then-upsert which - // is racy: a parallel v2 login could complete between the two - // calls and we'd clobber it. With `tryInsert`, the consumer - // commits to an atomic check-and-insert and we trust their answer. + // has a small race window with parallel v2 logins. With + // `tryInsert`, the consumer commits to an atomic + // check-and-insert and we delegate the existence decision to + // them entirely. No keyring writes from migration, no follow-up + // upserts — the simplification eliminates the multi-step race + // surface that earlier revisions kept introducing. const harness = buildUserRecords() const tryInsert = stubTryInsert(harness) - // Spy on `list()` to assert the racy fallback path was avoided. + const upsertSpy = vi.spyOn(harness.store, 'upsert') const listSpy = vi.spyOn(harness.store, 'list') const km = buildKeyringMap() km.slots.set(LEGACY, { secret: 'legacy_tok' }) @@ -396,104 +419,47 @@ describe('migrateLegacyAuth — stderr privacy', () => { expect(result).toMatchObject({ status: 'migrated' }) expect(tryInsert).toHaveBeenCalledTimes(1) - // Placeholder uses a unique signature (`fallbackToken: legacyToken` - // + `hasRefreshToken: undefined`) so the ownership-check re-read - // can distinguish "still ours" from "v2 login took over". After - // the keyring write succeeds the follow-up upsert flips - // `hasRefreshToken` to `false`. expect(tryInsert.mock.calls[0][0]).toMatchObject({ account: { id: '1' }, fallbackToken: 'legacy_tok', - hasRefreshToken: undefined, - }) - // Access secret landed in the per-user slot. - expect(km.slots.get('user-1')?.secret).toBe('legacy_tok') - // Follow-up upsert flipped hasRefreshToken to false (so future - // active() reads skip the refresh-slot IPC). - expect(harness.state.records.get('1')?.hasRefreshToken).toBe(false) - // Crucially, the racy `list()`-then-check existence path is - // bypassed entirely when `tryInsert` is available — only the - // ownership-check re-reads call `list()`, not the initial - // existence check. - const listCalls = listSpy.mock.calls.length - expect(listCalls).toBeGreaterThan(0) // re-reads happen - expect(tryInsert).toHaveBeenCalledTimes(1) // proves the atomic path ran - }) - - it('keeps the plaintext fallback on the record when tryInsert succeeds but setSecret hits a keyring-offline error', async () => { - // Two-phase write under contention with an offline keyring: - // phase 1 (tryInsert with fallbackToken) succeeds, phase 2 - // (setSecret) throws SecureStoreUnavailableError. The migration - // must NOT clear the fallback — that would leave a record with - // no recoverable token. Subsequent migrations would see - // `tryInsert: false` (record exists) and the CLI would surface - // AUTH_STORE_READ_FAILED forever. - const harness = buildUserRecords() - const tryInsert = stubTryInsert(harness) - const upsertSpy = vi.fn(async (_record: UserRecord) => {}) - // Intercept upsert too so we can verify it isn't called to clear - // the fallback (would happen on success). - const originalUpsert = harness.store.upsert - harness.store.upsert = async (record) => { - await upsertSpy(record) - return originalUpsert(record) - } - const km = buildKeyringMap() - km.slots.set(LEGACY, { secret: 'legacy_tok' }) - km.slots.set('user-1', { - secret: null, - setErr: new SecureStoreUnavailableError('keyring down'), - }) - mockedCreateSecureStore.mockImplementation(km.create) - - const result = await migrateLegacyAuth({ - serviceName: SERVICE, - legacyAccount: LEGACY, - userRecords: harness.store, - hasMigrated: async () => false, - markMigrated: async () => {}, - loadLegacyPlaintextToken: async () => null, - identifyAccount: async () => ({ id: '1', email: 'a@b' }), - silent: true, - }) - - expect(result).toMatchObject({ status: 'migrated' }) - // tryInsert was called with the plaintext fallback baked in - // (placeholder signature: `fallbackToken: legacyToken` + - // `hasRefreshToken: undefined`). - expect(tryInsert.mock.calls[0][0]).toMatchObject({ - fallbackToken: 'legacy_tok', - hasRefreshToken: undefined, + hasRefreshToken: false, }) - // The follow-up upsert (which would clear the fallback) never - // ran because setSecret failed — the record keeps the fallback. + // Per-user keyring slot is NEVER written during migration. The + // first subsequent v2 login moves the secret atomically via + // `writeRecordWithKeyringFallback`. + expect(km.slots.has('user-1')).toBe(false) + expect(km.slots.has('user-1/refresh')).toBe(false) + // No follow-up upsert: tryInsert is the only record write the + // migration does. Confirms the absence of the "upsert clobbers + // a parallel v2 login mid-migration" race surface. expect(upsertSpy).not.toHaveBeenCalled() + // The racy `list()`-then-check existence path is bypassed + // entirely when `tryInsert` is available. + expect(listSpy).not.toHaveBeenCalled() }) - it('aborts the keyring write when a concurrent v2 login replaces the placeholder mid-migration', async () => { - // Simulates: tryInsert succeeds with our placeholder; then a v2 - // login completes (writes its own access token + record) before - // we run our follow-up keyring writes. The ownership-check - // re-read must detect the shape change and stop us from - // clobbering the v2 state. + it('survives a concurrent v2 login mid-migration (race-free property)', async () => { + // The end-to-end race property: if a v2 login lands between the + // legacy token discovery and our tryInsert, neither side's + // state is clobbered. tryInsert either: + // (a) succeeds, because v2 hasn't written its record yet — + // migration's fallbackToken record is the only state, and + // v2's next setBundle replaces it atomically; or + // (b) returns false, because v2 already wrote its record — + // migration is a no-op and v2's state is untouched. + // Either way, no keyring writes happen from migration, so v2's + // access/refresh slots are guaranteed-safe across this race. const harness = buildUserRecords() - const tryInsert = vi.fn(async (record: UserRecord) => { - if (harness.state.records.has(record.account.id)) return false - harness.state.records.set(record.account.id, record) - // Race: as soon as our placeholder lands, a v2 login completes. - // Replaces the record with its keyring-backed shape (no - // fallbackToken, hasRefreshToken: true). - harness.state.records.set(record.account.id, { - account: { id: '1', email: 'a@b' }, - hasRefreshToken: true, - }) - return true + // tryInsert mock that mimics v2 having JUST won the race — + // record already present (with refresh metadata). + harness.state.records.set('1', { + account: { id: '1', email: 'a@b' }, + hasRefreshToken: true, + accessTokenExpiresAt: 99_999, }) - harness.store.tryInsert = tryInsert + const tryInsert = stubTryInsert(harness) const km = buildKeyringMap() km.slots.set(LEGACY, { secret: 'legacy_tok' }) - // V2 already wrote its access token to the keyring before our - // migration's setSecret runs. km.slots.set('user-1', { secret: 'v2_access' }) km.slots.set('user-1/refresh', { secret: 'v2_refresh' }) mockedCreateSecureStore.mockImplementation(km.create) @@ -509,75 +475,16 @@ describe('migrateLegacyAuth — stderr privacy', () => { silent: true, }) - // V2's access token survives — our setSecret was skipped because - // the ownership-check re-read saw the record had been replaced. - expect(km.slots.get('user-1')?.secret).toBe('v2_access') - // V2's refresh token survives too. - expect(km.slots.get('user-1/refresh')?.secret).toBe('v2_refresh') - // The v2 record is intact (hasRefreshToken: true, no fallback). + // tryInsert returned false (v2 record was already there) → no + // record change. + expect(tryInsert).toHaveBeenCalledTimes(1) expect(harness.state.records.get('1')).toMatchObject({ hasRefreshToken: true, + accessTokenExpiresAt: 99_999, }) - expect(harness.state.records.get('1')?.fallbackToken).toBeUndefined() - }) - - it('does not delete the access slot during rollback when a v2 login has since overwritten it', async () => { - // Race: our setSecret writes the legacy token, then a v2 login - // overwrites the slot with its access token AND replaces the - // record. When we discover we lost ownership, a blind - // deleteSecret() would remove the v2 access token and leave the - // v2 record permanently unreadable (AUTH_STORE_READ_FAILED). - // The rollback must check the slot still contains OUR token - // before deleting. - const harness = buildUserRecords() - stubTryInsert(harness) - const km = buildKeyringMap() - km.slots.set(LEGACY, { secret: 'legacy_tok' }) - mockedCreateSecureStore.mockImplementation(km.create) - - // Hook setSecret on the per-user slot so that AS SOON AS our - // migration writes 'legacy_tok' there, a "v2 login" completes: - // overwrites the slot with v2's token and replaces the record. - const userSlot = km.slots.get('user-1') ?? { secret: null } - km.slots.set('user-1', userSlot) - // Wrap setSecret so the race fires after our write. - const originalCreate = km.create - mockedCreateSecureStore.mockImplementation((args) => { - const store = originalCreate(args) - if (args.account === 'user-1') { - const originalSet = store.setSecret.bind(store) - store.setSecret = async (secret) => { - await originalSet(secret) - // V2 login lands now: overwrite the slot and the record. - userSlot.secret = 'v2_access' - harness.state.records.set('1', { - account: { id: '1', email: 'a@b' }, - hasRefreshToken: true, - }) - } - } - return store - }) - - await migrateLegacyAuth({ - serviceName: SERVICE, - legacyAccount: LEGACY, - userRecords: harness.store, - hasMigrated: async () => false, - markMigrated: async () => {}, - loadLegacyPlaintextToken: async () => null, - identifyAccount: async () => ({ id: '1', email: 'a@b' }), - silent: true, - }) - - // V2's access token survives the rollback — the guarded delete - // saw the slot no longer contained the legacy token and - // declined to remove anything. + // Keyring slots untouched. expect(km.slots.get('user-1')?.secret).toBe('v2_access') - // V2 record intact. - expect(harness.state.records.get('1')).toMatchObject({ - hasRefreshToken: true, - }) + expect(km.slots.get('user-1/refresh')?.secret).toBe('v2_refresh') }) it('leaves the v2 record alone when tryInsert returns false (existing v2 login)', async () => { diff --git a/src/auth/keyring/migrate.ts b/src/auth/keyring/migrate.ts index ac8ff64..50f033d 100644 --- a/src/auth/keyring/migrate.ts +++ b/src/auth/keyring/migrate.ts @@ -1,14 +1,7 @@ import { getErrorMessage } from '../../errors.js' import type { AuthAccount } from '../types.js' -import { findById, trySetSecret } from './internal.js' -import { writeRecordWithKeyringFallback } from './record-write.js' -import { - createSecureStore, - DEFAULT_ACCOUNT_FOR_USER, - type SecureStore, - SecureStoreUnavailableError, -} from './secure-store.js' -import { refreshAccountSlot } from './slot-naming.js' +import { findById } from './internal.js' +import { createSecureStore, type SecureStore, SecureStoreUnavailableError } from './secure-store.js' import type { UserRecordStore } from './types.js' export type MigrateLegacyAuthOptions = { @@ -17,7 +10,15 @@ export type MigrateLegacyAuthOptions = { legacyAccount: string /** v2 user-record store the migrated record is written into. */ userRecords: UserRecordStore - /** Per-user keyring slug for the new entry. Defaults to `user-${id}`. */ + /** + * @deprecated No longer consulted: migration now writes a v2 record + * with `fallbackToken` instead of moving the secret into a per-user + * keyring slot, so the slot name doesn't matter to this helper. Kept + * on the type for back-compat; consumers that still pass it will be + * silently ignored. The first subsequent v2 login moves the secret + * into the keyring via the standard write path, which respects the + * consumer's `createKeyringTokenStore({ accountForUser })`. + */ accountForUser?: (id: string) => string /** * Reads the durable "migration already ran" marker the consumer owns @@ -126,7 +127,6 @@ export async function migrateLegacyAuth( cleanupLegacyConfig, silent, } = options - const accountForUser = options.accountForUser ?? DEFAULT_ACCOUNT_FOR_USER const logPrefix = options.logPrefix ?? 'cli' if (await hasMigrated()) { @@ -155,125 +155,41 @@ export async function migrateLegacyAuth( return skipped(silent, logPrefix, 'identify-failed', getErrorMessage(error)) } - // Don't clobber an existing v2 record. A `marker-write-failed` retry - // may land on a store where the user has since completed a v2 login - // (with a refresh token + expiry on the record). `userRecords.upsert` - // is replace-not-merge, so writing the legacy access-only bundle here - // would wipe `hasRefreshToken` / expiry and silently disable silent - // refresh. The legacy token has no authority over refresh state. + // Write a v2 record carrying the legacy token as `fallbackToken`. + // This is a FULLY valid v2 state: the runtime reads `fallbackToken` + // before any keyring slot, so the user is functional immediately. + // The first subsequent v2 login (`runOAuthFlow` → `setBundle`) will + // atomically move the secret into the keyring and clear the + // fallback via the standard write path. // - // Prefer the atomic `tryInsert` when the consumer supplies it — - // eliminates the TOCTOU race between a list-based existence check and - // the write. Fall back to list-then-write when not implemented; the - // race window is small (microseconds for in-process upserts) and - // acceptable for typical postinstall-style invocations. + // We deliberately do NOT move the secret into the per-user keyring + // slot here. Earlier revisions tried to and walked through a series + // of races (tryInsert gives no ongoing ownership; ownership-check + // re-reads close one window and open another; rollback delete can + // wipe a concurrent v2 login's credential). The simplification + // trades "secret already in keyring after migration" for a complete + // elimination of those races — the same trade-off the WSL/headless- + // Linux path already makes, and the only thing on the line is one + // login-cycle of plaintext-on-disk for the legacy token, which was + // already plaintext in v1 config. + // + // `tryInsert` is preferred when the consumer supplies it (atomic + // insert-if-absent). Without it, fall back to a list-then-upsert + // existence check — small race window, acceptable for postinstall. + const record = { + account, + fallbackToken: legacyToken.token, + hasRefreshToken: false, + } try { - const accountSlot = accountForUser(account.id) - const accessStore = createSecureStore({ serviceName, account: accountSlot }) - const refreshStore = createSecureStore({ - serviceName, - account: refreshAccountSlot(accountSlot), - }) - if (userRecords.tryInsert) { - // `tryInsert` proves the record was absent at insert time but - // gives no exclusive ownership afterward — a v2 login can - // start, complete `setSecret` + `upsert`, and reach the - // refresh slot all while we're between our own steps. - // Without ownership checks the migration would clobber the - // very v2 login it's meant to preserve: our `setSecret` - // would overwrite their access token, our follow-up `upsert` - // would reset their record, our `deleteSecret` would wipe - // their refresh slot. - // - // We re-read before each follow-up and abort the rest if the - // record is no longer the placeholder we inserted (matched - // by `fallbackToken === legacyToken.token` AND no - // `hasRefreshToken` advertised — together a unique signature - // of our migration's tryInsert payload, never produced by a - // v2 login). The race window is now bounded by the gap - // between re-read and the very next call, not the entire - // multi-step sequence. - // - // If we already moved the secret into the keyring before - // discovering we lost ownership, roll that write back so we - // don't leave a stale access token in the slot. The v2 - // login's `setSecret` may have run after ours; rolling back - // to "no entry" is the only safe end-state we can guarantee - // without contract-level CAS, and v2 login's keyring-write - // path is happy to re-create. - const legacyTokenStr = legacyToken.token - const placeholder = { - account, - fallbackToken: legacyTokenStr, - hasRefreshToken: undefined, - } - async function recordStillOurs(): Promise { - const current = findById(await userRecords.list(), account.id) - return ( - current?.fallbackToken === legacyTokenStr && - current?.hasRefreshToken === undefined - ) - } - const inserted = await userRecords.tryInsert(placeholder) - if (inserted && (await recordStillOurs())) { - // Shared keyring-online/offline policy with - // `writeRecordWithKeyringFallback`: only the documented - // `SecureStoreUnavailableError` downgrades to "keep the - // plaintext fallback"; everything else propagates. - const movedToKeyring = await trySetSecret(accessStore, legacyTokenStr) - // One ownership re-read after the keyring write; cache the - // result so the follow-up upsert AND the refresh-slot - // cleanup share the same answer. The earlier code re-read - // again after the upsert, but the upsert flips - // `hasRefreshToken` away from the placeholder signature - // so that check was guaranteed to return false (and the - // cleanup silently skipped). - const stillOurs = await recordStillOurs() - if (movedToKeyring && stillOurs) { - await userRecords.upsert({ account, hasRefreshToken: false }) - } else if (movedToKeyring) { - // V2 login took over after our setSecret. Only roll - // the keyring write back when the slot still contains - // OUR legacy token — a blind delete would otherwise - // remove v2's credential (their setSecret may have - // run after ours), leaving the v2 record permanently - // unreadable. - try { - const current = (await accessStore.getSecret())?.trim() - if (current === legacyTokenStr) { - await accessStore.deleteSecret().catch(() => undefined) - } - } catch { - // best-effort: if we can't read the slot, don't - // risk a destructive delete. - } - } - if (stillOurs) { - // Best-effort cleanup of any stale refresh secret - // (legacy single-user state never had one, but a - // hand-edit might). Skipped when v2 owns the record. - await refreshStore.deleteSecret().catch(() => undefined) - } - } + // `false` means a v2 record already exists for this account + // — leave it alone (it may carry refresh + expiry that the + // legacy token has no authority over). + await userRecords.tryInsert(record) } else { - const existing = findById(await userRecords.list(), account.id) - if (!existing) { - await writeRecordWithKeyringFallback({ - secureStore: accessStore, - refreshSecureStore: refreshStore, - userRecords, - account, - bundle: { accessToken: legacyToken.token }, - // The list-then-write fallback can still race with a - // parallel v2 login that writes a refresh secret - // between our `list()` and the helper's `upsert`. - // `purgeRefreshSlot: false` keeps the refresh slot - // untouched and persists `hasRefreshToken: undefined` - // ("unknown" — readers probe the slot) so a v2 - // refresh secret written mid-race remains visible. - purgeRefreshSlot: false, - }) + if (!findById(await userRecords.list(), account.id)) { + await userRecords.upsert(record) } } } catch (error) { diff --git a/src/auth/keyring/record-write.test.ts b/src/auth/keyring/record-write.test.ts index 405fb25..c61597a 100644 --- a/src/auth/keyring/record-write.test.ts +++ b/src/auth/keyring/record-write.test.ts @@ -133,11 +133,14 @@ describe('writeRecordWithKeyringFallback', () => { expect(state.records.size).toBe(0) }) - it('treats a refresh-slot delete failure on a no-refresh bundle as a write failure (no resurrection)', async () => { - // Without this rollback, a stale refresh secret from an earlier - // login would survive a re-login that didn't return one, and - // `active()` would later surface it. Belt-and-braces: roll back - // access too so the on-disk and in-keyring state stay aligned. + it('tolerates a post-upsert refresh-slot delete failure (best-effort, no rollback)', async () => { + // The post-upsert refresh-slot purge for no-refresh bundles is + // best-effort: with `hasRefreshToken: false` on the upserted + // record, `active()` skips the slot read entirely, so a stale + // secret in the slot can't shadow anything. Crucially, the + // delete happens AFTER the upsert commits, so a failed delete + // can never destroy a refresh secret the caller might want to + // recover with on retry (the round-9 P1 we used to have). const secureStore = buildSingleSlot() const refreshSecureStore = buildSingleSlot() refreshSecureStore.deleteSpy.mockRejectedValueOnce( @@ -153,10 +156,13 @@ describe('writeRecordWithKeyringFallback', () => { bundle: { accessToken: 'at_only' }, }) - // Fell through to fallback because the refresh-slot delete failed. - expect(result.storedSecurely).toBe(false) - expect(secureStore.deleteSpy).toHaveBeenCalledTimes(1) - expect(state.records.get('42')?.fallbackToken).toBe('at_only') + // Record write succeeded — failed cleanup doesn't roll back. + expect(result.storedSecurely).toBe(true) + // Access slot not rolled back. + expect(secureStore.deleteSpy).not.toHaveBeenCalled() + // Record persists keyring-backed shape with hasRefreshToken: + // false (readers skip the slot). + expect(state.records.get('42')?.fallbackToken).toBeUndefined() expect(state.records.get('42')?.hasRefreshToken).toBe(false) }) diff --git a/src/auth/keyring/record-write.ts b/src/auth/keyring/record-write.ts index 542137c..021a166 100644 --- a/src/auth/keyring/record-write.ts +++ b/src/auth/keyring/record-write.ts @@ -8,23 +8,23 @@ type WriteRecordOptions = { secureStore: SecureStore /** * Per-account keyring slot for the refresh token (separate slot under - * `${account}/refresh`). When the bundle has no refresh token this slot - * is still cleared on write so a previous refresh secret doesn't outlive - * a login that didn't return one — except when `purgeRefreshSlot: false`, - * see below. + * `${account}/refresh`). When the bundle has no refresh token, any + * previous secret in this slot is cleaned up **after** the record + * write commits — see `purgeRefreshSlot` below for the opt-out and + * the safety rationale. */ refreshSecureStore: SecureStore userRecords: UserRecordStore account: TAccount bundle: TokenBundle /** - * When `false`, skip the defensive delete of the refresh slot for - * bundles without a refresh token, and don't reset `hasRefreshToken` - * on the persisted record. Used by `migrateLegacyAuth`: a retry after a - * `marker-write-failed` may land on an account that has since logged in - * via the v2 flow and now has a valid refresh secret — the legacy token - * has no authority over refresh state and must not erase it. Defaults - * to `true` (the safe default for fresh logins). + * When `false`, skip the post-upsert defensive delete of the refresh + * slot for bundles without a refresh token, and persist + * `hasRefreshToken: undefined` instead of `false`. Used by + * `migrateLegacyAuth`: the legacy bundle has no authority over + * refresh state, so a stale slot must remain visible until a real v2 + * login decides what's there. Defaults to `true` (the safe default + * for fresh logins). */ purgeRefreshSlot?: boolean } @@ -36,28 +36,32 @@ type WriteRecordResult = { /** * Shared keyring-then-record write used by `createKeyringTokenStore.set` / - * `setBundle` and `migrateLegacyAuth`. Encapsulates the order-of-operations - * contract that matters for credential safety: + * `setBundle`. Encapsulates the order-of-operations contract that matters + * for credential safety: * * 1. Keyring `setSecret` for the access token first. On - * `SecureStoreUnavailableError`, swallow and route both tokens to the - * record's fallback slots. Any other error rethrows. - * 2. When the keyring is online and the bundle has a refresh token, write - * it to the sibling refresh slot. On `SecureStoreUnavailableError`, - * roll back the access-slot write and fall through to the fallback - * record (so both tokens travel together — never split state across - * keyring and record). On any other error, also roll back the access - * slot (best-effort) before rethrowing — leaving an orphan access - * credential with no matching user record breaks `active()` later. - * 3. When the keyring is online and the bundle has no refresh token, - * delete any pre-existing refresh secret. A delete failure here is - * surfaced as a write failure (raised as `SecureStoreUnavailableError`'s - * semantic equivalent: fall through to the fallback record) so a stale - * refresh secret can never outlive a login that didn't return one — - * otherwise `active()` would later read and use it. - * 4. `userRecords.upsert(record)`. On failure, best-effort rollback both - * keyring writes so we don't leave orphan credentials for an account - * cli-core never managed to register. Original error rethrows. + * `SecureStoreUnavailableError`, swallow and route both tokens to + * the record's fallback slots. Any other error rethrows. + * 2. When the keyring is online and the bundle has a refresh token, + * write it to the sibling refresh slot. On + * `SecureStoreUnavailableError`, roll back the access-slot write + * and fall through to the fallback record (so both tokens travel + * together — never split state across keyring and record). On any + * other error, also roll back the access slot (best-effort) before + * rethrowing — leaving an orphan access credential with no matching + * user record breaks `active()` later. + * 3. `userRecords.upsert(record)`. On failure, best-effort rollback + * both keyring writes so we don't leave orphan credentials for an + * account cli-core never managed to register. Original error + * rethrows. + * 4. Post-upsert (success only): when the bundle had no refresh token + * and `purgeRefreshSlot` is `true`, best-effort delete any + * previous secret in the refresh slot. Done AFTER the record + * commits so a failed upsert can't destroy a previous refresh + * secret the caller may want to recover with. Made best-effort + * because the record's `hasRefreshToken: false` already prevents + * `active()` from reading the slot — a stale secret can't shadow + * anything; it's just dead bytes until the next write. */ export async function writeRecordWithKeyringFallback( options: WriteRecordOptions, @@ -67,12 +71,6 @@ export async function writeRecordWithKeyringFallback { try { await secureStore.deleteSecret() @@ -85,39 +83,24 @@ export async function writeRecordWithKeyringFallback undefined) + } + return { storedSecurely } } diff --git a/src/auth/keyring/slot-naming.test.ts b/src/auth/keyring/slot-naming.test.ts index 6e9f47c..d4bf132 100644 --- a/src/auth/keyring/slot-naming.test.ts +++ b/src/auth/keyring/slot-naming.test.ts @@ -3,14 +3,15 @@ import { describe, expect, it } from 'vitest' import { refreshAccountSlot } from './slot-naming.js' describe('refreshAccountSlot', () => { - it('derives a per-access-slot refresh slot name that includes the access slot', () => { - // The contract: the returned slot name is a function of the - // access slot name AND uniquely identifies the refresh slot for - // it. We don't pin the exact suffix here — the wire format is - // free to change as long as the property holds. - const result = refreshAccountSlot('user-42') - expect(result).toContain('user-42') - expect(result).not.toBe('user-42') + it('pins the literal wire format `/refresh`', () => { + // The suffix is persisted state in the OS keyring: a rename + // (e.g. `/refresh` → `:refresh`) would orphan every existing + // user's refresh secret because their record would still point + // at the old slot. Pin the exact format here so an unintended + // change loudly breaks this test instead of silently breaking + // upgraders. Intentional renames require a migration plan AND + // updating this assertion. + expect(refreshAccountSlot('user-42')).toBe('user-42/refresh') }) it('is deterministic — same access slot maps to the same refresh slot', () => {