Files
Shade/packages/shade-transport-webrtc/src/transport.ts
Sterister e6fdf31b49
Some checks failed
Test / test (push) Has been cancelled
Cross-platform vectors / TypeScript vectors (bun) (push) Has been cancelled
Cross-platform vectors / Kotlin vectors (gradle) (push) Has been cancelled
Docker build and publish / docker (push) Has been cancelled
Publish / publish (push) Has been cancelled
release(v4.0.0): Shade GA — V3.x consolidation + audit prep
V3.1 → V3.12 consolidated and tagged for the first GA release. Wire
format unchanged from 0.4.x — 4.0 peers interoperate with 0.4.x peers
byte-for-byte. The version bump is semantic: audit-cycle complete,
opt-in surface fully exposed, threat model refreshed for every new
surface.

Highlights:
- All 24 @shade/* packages bumped to 4.0.0 in lockstep.
- CHANGELOG 4.0.0 section is the canonical manifest of what landed.
- THREAT-MODEL extended (§10 fingerprint gates, §11 WebRTC P2P, §12
  Web-Worker boundary) + residual-risks table refreshed.
- OpenAPI now covers all 27 routes: prekey, transfer, KT, inbox,
  bridge, observer, /metrics, /healthz, /ready.
- MIGRATION 0.3.x → 4.0 documented + smoke-tested against
  shade migrate-storage on a real SQLite DB.
- docs/audit/REVIEW-BUNDLE.md + SCOPE.md ready for external reviewer.
- scripts/soak.ts harness for the GA-stable 2-week soak window.
- All V*.md plans archived under docs/archive/ with Status: Done.
- Voice/Video carved out into V5.0; 4.0 audit focuses on the frozen
  non-realtime stack.

Tests: TS 1000/1000 + Kotlin 11/11 cross-platform vectors green.
Docker: gt.zyon.no/stian/shade-prekey:4.0.0 builds and reports
  version 4.0.0 on /health.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 18:35:35 +02:00

211 lines
7.5 KiB
TypeScript

/**
* `ITransferTransport` adapter that ships chunks over a WebRTC
* `DataChannel`.
*
* `probe` → opens (or reuses) the peer connection and asserts the
* data channel is `open`. Throws on failure so the
* caller-side `FallbackTransferTransport` can demote us
* to HTTP.
* `sendChunk` → encodes a `0x01` frame, sends, awaits the matching
* `0x81 chunk-ack` (or `0xFE error`).
* `fetchResumeState` → encodes a `0x02 resume-query`, awaits `0x82
* resume-state`. Returns `null` when the peer answers
* with `'not found'`.
*
* Identical Ack contract to `ShadeTransferHttpTransport` so the upstream
* `TransferEngine` pipeline (lane queues, retries, resume) doesn't care
* which transport is in use.
*/
import {
TransferAbortError,
TransferTransportError,
type ChunkAck,
type ChunkSendOptions,
type ITransferTransport,
type TransferResumeState,
} from '@shade/transfer';
import { WebRtcDataChannelError } from './errors.js';
import {
encodeChunkFrame,
encodeResumeQueryFrame,
randomRequestId,
streamIdStringToBytes,
WIRE_CHUNK,
WIRE_CHUNK_ACK,
WIRE_ERROR,
WIRE_RESUME_QUERY,
WIRE_RESUME_STATE,
} from './wire.js';
import type { WebRtcConnectionManager } from './manager.js';
export interface WebRtcTransferTransportOptions {
manager: WebRtcConnectionManager;
/** Per-request timeout in ms. Default 30s. */
requestTimeoutMs?: number;
/**
* Backpressure threshold — if `bufferedAmount` on the data channel
* exceeds this value, sends pause until it drains under the threshold.
* Default 4 MiB; the spec recommends ≤ 16 MiB to avoid SCTP stalls.
*/
backpressureThresholdBytes?: number;
}
/**
* SCTP DataChannel chunks are limited per-message. The default cap matches
* Chrome's safe upper bound (256 KiB) — adapters can fragment/reassemble
* beyond that, but Shade's chunkSize default is 1 MiB so we'd need
* fragmenting to ship full chunks. For now we surface a clear error if a
* single envelope exceeds the cap.
*/
export const DEFAULT_MAX_DATACHANNEL_MESSAGE = 256 * 1024;
export class WebRtcTransferTransport implements ITransferTransport {
private readonly requestTimeoutMs: number;
private readonly backpressureBytes: number;
constructor(private readonly options: WebRtcTransferTransportOptions) {
this.requestTimeoutMs = options.requestTimeoutMs ?? 30_000;
this.backpressureBytes = options.backpressureThresholdBytes ?? 4 * 1024 * 1024;
}
async probe(peerAddress: string): Promise<void> {
try {
await this.options.manager.getOrCreate(peerAddress);
} catch (err) {
throw new TransferTransportError(
`webrtc probe failed: ${(err as Error).message}`,
);
}
}
async sendChunk(
peerAddress: string,
streamId: string,
laneId: number,
seq: number | bigint,
bytes: Uint8Array,
options?: ChunkSendOptions,
): Promise<ChunkAck> {
if (options?.signal?.aborted) throw new TransferAbortError('aborted before send');
const conn = await this.options.manager.getOrCreate(peerAddress);
// Backpressure: block if the SCTP buffer is full.
await this.awaitDrain(conn, options?.signal);
const seqBig = typeof seq === 'bigint' ? seq : BigInt(seq);
const requestId = randomRequestId();
const streamIdBytes = streamIdStringToBytes(streamId);
if (streamIdBytes.length !== 16) {
throw new TransferTransportError(`streamId must decode to 16 bytes`);
}
const frame = encodeChunkFrame({
type: WIRE_CHUNK,
requestId,
streamId: streamIdBytes,
laneId,
seq: seqBig,
envelope: bytes,
});
if (frame.length > DEFAULT_MAX_DATACHANNEL_MESSAGE) {
throw new TransferTransportError(
`frame too large for data channel (${frame.length} > ${DEFAULT_MAX_DATACHANNEL_MESSAGE}); reduce chunkSize`,
);
}
const onAbort = (): void => {
// The pending request inside `connection.request` will reject when
// the data channel closes. We don't have a direct cancel handle, so
// surface the abort as a transport error — the engine retries.
};
options?.signal?.addEventListener('abort', onAbort, { once: true });
let frameRes;
try {
frameRes = await conn.request(frame, requestId, this.requestTimeoutMs);
} finally {
options?.signal?.removeEventListener('abort', onAbort);
}
if (frameRes.type === WIRE_ERROR) {
throw new TransferTransportError(`webrtc sendChunk error: ${frameRes.json}`);
}
if (frameRes.type !== WIRE_CHUNK_ACK) {
throw new TransferTransportError(
`unexpected webrtc response type 0x${frameRes.type.toString(16)}`,
);
}
return {
lastSeq: frameRes.lastSeq,
bytesReceived: frameRes.bytesReceived,
};
}
async fetchResumeState(
peerAddress: string,
streamId: string,
): Promise<TransferResumeState | null> {
const conn = await this.options.manager.getOrCreate(peerAddress);
const requestId = randomRequestId();
const streamIdBytes = streamIdStringToBytes(streamId);
const frame = encodeResumeQueryFrame({
type: WIRE_RESUME_QUERY,
requestId,
streamId: streamIdBytes,
});
const response = await conn.request(frame, requestId, this.requestTimeoutMs);
if (response.type === WIRE_ERROR) {
// Convention: 'not found' → null; anything else throws.
try {
const parsed = JSON.parse(response.json) as { error?: string };
if (typeof parsed.error === 'string' && parsed.error.includes('not found')) {
return null;
}
} catch {
/* fall through to throw */
}
throw new TransferTransportError(`fetchResumeState failed: ${response.json}`);
}
if (response.type !== WIRE_RESUME_STATE) {
throw new TransferTransportError(
`unexpected webrtc response type 0x${response.type.toString(16)}`,
);
}
try {
return JSON.parse(response.json) as TransferResumeState;
} catch (err) {
throw new TransferTransportError(
`fetchResumeState bad JSON: ${(err as Error).message}`,
);
}
}
/** Wait until the SCTP send buffer drains below the configured threshold. */
private async awaitDrain(
conn: { sendRaw: (b: Uint8Array) => void },
signal?: AbortSignal,
): Promise<void> {
// The `conn` parameter intentionally has a structurally-narrow shape
// — the data channel is internal to WebRtcConnection. Backpressure is
// a soft optimisation; we expose the bufferedAmount via a getter.
const dc = (conn as unknown as { dc: { bufferedAmount: number } | null }).dc;
if (dc === null || dc === undefined) return;
if (dc.bufferedAmount <= this.backpressureBytes) return;
// Poll every 25 ms until the buffer drains. A more sophisticated impl
// would use `bufferedamountlow` events but those require setting
// `bufferedAmountLowThreshold`, which the IDataChannel shim doesn't
// standardise yet. The polling overhead is negligible at MiB-scale
// chunk sizes.
const start = Date.now();
while (dc.bufferedAmount > this.backpressureBytes) {
if (signal?.aborted) throw new TransferAbortError('aborted while waiting for drain');
if (Date.now() - start > 30_000) {
throw new WebRtcDataChannelError(
`bufferedAmount stayed above threshold for 30s (${dc.bufferedAmount} bytes)`,
);
}
await new Promise<void>((resolve) => setTimeout(resolve, 25));
}
}
}