release(v4.2.1): fix concurrent-ratchet desync via OutboundQueue waiter cursor
Pull-mode httpClient + drainer + parallel RPCs against the same peer deteriorated after ~10s with `DecryptionError`. Two bugs combined: - `OutboundQueue.enqueue` woke `drain` waiters with a `since=0` snapshot, replaying already-processed events into `Shade.acceptTransferEnvelope` → `manager.decrypt` twice. The duplicate consumed an already-used skipped key and corrupted the Double Ratchet receive chain. - `ratchetDecrypt` then propagated the corruption: a same-DH message behind the chain with no cached skipped key fell through to `kdfChainKey` on the ahead state and rewound `chain.counter`, permanently desyncing the chain. Fix `OutboundQueue` to honor each waiter's `since`, and harden `ratchetDecrypt` so any future duplicate fails cleanly without mutating state. Adds regression coverage at all three layers. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@shade/transfer",
|
||||
"version": "4.2.0",
|
||||
"version": "4.2.1",
|
||||
"type": "module",
|
||||
"main": "src/index.ts",
|
||||
"types": "src/index.ts",
|
||||
|
||||
@@ -81,6 +81,14 @@ const DEFAULT_IDLE_EVICTION_MS = 10 * 60 * 1000;
|
||||
interface PendingWaiter {
|
||||
resolve(events: QueuedEvent[]): void;
|
||||
reject(err: Error): void;
|
||||
/**
|
||||
* The waiter's `since` cursor — only events with `id > since` should
|
||||
* be delivered when this waiter is resolved. Without this, an
|
||||
* enqueue that arrives while a poller is waiting would replay
|
||||
* already-processed events, causing the receiver to double-decrypt
|
||||
* (and corrupt ratchet state).
|
||||
*/
|
||||
since: number;
|
||||
timer: ReturnType<typeof setTimeout>;
|
||||
abortHandler?: () => void;
|
||||
signal?: AbortSignal;
|
||||
@@ -140,16 +148,21 @@ export class OutboundQueue {
|
||||
// last polled id; the @shade/transfer engine handles missing seqs
|
||||
// by re-sending on resume.
|
||||
while (state.events.length > this.maxEvents) state.events.shift();
|
||||
// Wake all waiters with whatever has accumulated.
|
||||
const drained = this.collect(state, 0);
|
||||
if (drained.length > 0) {
|
||||
// Wake each waiter with events newer than ITS OWN `since`. Using a
|
||||
// shared snapshot from `since=0` would replay events the waiter has
|
||||
// already processed once a fresh enqueue arrived mid-poll, which on
|
||||
// the receiver side double-dispatches an envelope into shade.receive
|
||||
// → manager.decrypt and consumes the same skipped-key twice (the
|
||||
// second dispatch corrupts the ratchet chain).
|
||||
if (state.waiters.length > 0) {
|
||||
const waiters = state.waiters.splice(0);
|
||||
for (const w of waiters) {
|
||||
clearTimeout(w.timer);
|
||||
if (w.abortHandler !== undefined && w.signal !== undefined) {
|
||||
w.signal.removeEventListener('abort', w.abortHandler);
|
||||
}
|
||||
w.resolve(drained);
|
||||
const wDrained = this.collect(state, w.since);
|
||||
w.resolve(wDrained);
|
||||
}
|
||||
}
|
||||
return event;
|
||||
@@ -181,7 +194,7 @@ export class OutboundQueue {
|
||||
// Empty drain on timeout — that's the "no new events" signal.
|
||||
resolve([]);
|
||||
}, blockMs);
|
||||
const waiter: PendingWaiter = { resolve, reject, timer };
|
||||
const waiter: PendingWaiter = { resolve, reject, since, timer };
|
||||
if (signal !== undefined) {
|
||||
const handler = () => {
|
||||
const idx = state.waiters.indexOf(waiter);
|
||||
|
||||
60
packages/shade-transfer/tests/outbound-queue.test.ts
Normal file
60
packages/shade-transfer/tests/outbound-queue.test.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
import { describe, expect, test } from 'bun:test';
|
||||
import { OutboundQueue } from '../src/index.js';
|
||||
|
||||
/**
|
||||
* Regression coverage for the long-poll waiter `since` cursor.
|
||||
*
|
||||
* The bug being guarded against: when `enqueue` woke a pending
|
||||
* `drain` waiter, it used a `since=0` snapshot and replayed every
|
||||
* event that had ever been queued — including the ones the waiter
|
||||
* had already processed in a previous poll. Downstream the queue
|
||||
* fed `Shade.acceptTransferEnvelope`, so the duplicate replay
|
||||
* dispatched the same envelope into `manager.decrypt` twice. The
|
||||
* second decrypt consumed an already-used skipped key, fell into
|
||||
* the stale-counter branch of `ratchetDecrypt`, and corrupted the
|
||||
* Double Ratchet receive chain — surfacing as
|
||||
* `DecryptionError: wrong key or tampered data` on every
|
||||
* subsequent message.
|
||||
*/
|
||||
describe('OutboundQueue — waiter since cursor', () => {
|
||||
test('mid-poll enqueue must not replay events the waiter already saw', async () => {
|
||||
const queue = new OutboundQueue({ idleEvictionMs: 0 });
|
||||
const peer = 'alice';
|
||||
const e1 = queue.enqueue(peer, { kind: 'envelope', bytes: new Uint8Array([1]) });
|
||||
const e2 = queue.enqueue(peer, { kind: 'envelope', bytes: new Uint8Array([2]) });
|
||||
|
||||
// First poll drains both events (no blocking — they're already there).
|
||||
const first = await queue.drain(peer, 0, 0);
|
||||
expect(first.map((e) => e.id)).toEqual([e1.id, e2.id]);
|
||||
|
||||
// Now the waiter polls past the last seen id. It blocks because
|
||||
// there are no events newer than `since`. Concurrently a fresh
|
||||
// event gets enqueued — that's the path the bug fired on.
|
||||
const blockMs = 5_000;
|
||||
const polling = queue.drain(peer, e2.id, blockMs);
|
||||
// Yield so `drain` actually parks on the waiter list before we
|
||||
// race the enqueue against it.
|
||||
await Promise.resolve();
|
||||
const e3 = queue.enqueue(peer, { kind: 'envelope', bytes: new Uint8Array([3]) });
|
||||
const woken = await polling;
|
||||
|
||||
// Pre-fix: would resolve with [e1, e2, e3] (a `since=0` snapshot
|
||||
// drained verbatim). Post-fix: only the events newer than the
|
||||
// waiter's recorded `since` come through.
|
||||
expect(woken.map((e) => e.id)).toEqual([e3.id]);
|
||||
});
|
||||
|
||||
test('parked waiter at the head still gets the new event when others have polled past it', async () => {
|
||||
const queue = new OutboundQueue({ idleEvictionMs: 0 });
|
||||
const peer = 'alice';
|
||||
const e1 = queue.enqueue(peer, { kind: 'envelope', bytes: new Uint8Array([1]) });
|
||||
|
||||
// A waiter that parks past the head — there are no events newer
|
||||
// than e1.id, so it has to block.
|
||||
const polling = queue.drain(peer, e1.id, 5_000);
|
||||
await Promise.resolve();
|
||||
const e2 = queue.enqueue(peer, { kind: 'envelope', bytes: new Uint8Array([2]) });
|
||||
const woken = await polling;
|
||||
expect(woken.map((e) => e.id)).toEqual([e2.id]);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user