Some checks failed
Test / test (push) Has been cancelled
Cross-platform vectors / TypeScript vectors (bun) (push) Has been cancelled
Cross-platform vectors / Kotlin vectors (gradle) (push) Has been cancelled
Docker build and publish / docker (push) Has been cancelled
Publish / publish (push) Has been cancelled
V3.1 → V3.12 consolidated and tagged for the first GA release. Wire format unchanged from 0.4.x — 4.0 peers interoperate with 0.4.x peers byte-for-byte. The version bump is semantic: audit-cycle complete, opt-in surface fully exposed, threat model refreshed for every new surface. Highlights: - All 24 @shade/* packages bumped to 4.0.0 in lockstep. - CHANGELOG 4.0.0 section is the canonical manifest of what landed. - THREAT-MODEL extended (§10 fingerprint gates, §11 WebRTC P2P, §12 Web-Worker boundary) + residual-risks table refreshed. - OpenAPI now covers all 27 routes: prekey, transfer, KT, inbox, bridge, observer, /metrics, /healthz, /ready. - MIGRATION 0.3.x → 4.0 documented + smoke-tested against shade migrate-storage on a real SQLite DB. - docs/audit/REVIEW-BUNDLE.md + SCOPE.md ready for external reviewer. - scripts/soak.ts harness for the GA-stable 2-week soak window. - All V*.md plans archived under docs/archive/ with Status: Done. - Voice/Video carved out into V5.0; 4.0 audit focuses on the frozen non-realtime stack. Tests: TS 1000/1000 + Kotlin 11/11 cross-platform vectors green. Docker: gt.zyon.no/stian/shade-prekey:4.0.0 builds and reports version 4.0.0 on /health. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
211 lines
7.5 KiB
TypeScript
211 lines
7.5 KiB
TypeScript
/**
|
|
* `ITransferTransport` adapter that ships chunks over a WebRTC
|
|
* `DataChannel`.
|
|
*
|
|
* `probe` → opens (or reuses) the peer connection and asserts the
|
|
* data channel is `open`. Throws on failure so the
|
|
* caller-side `FallbackTransferTransport` can demote us
|
|
* to HTTP.
|
|
* `sendChunk` → encodes a `0x01` frame, sends, awaits the matching
|
|
* `0x81 chunk-ack` (or `0xFE error`).
|
|
* `fetchResumeState` → encodes a `0x02 resume-query`, awaits `0x82
|
|
* resume-state`. Returns `null` when the peer answers
|
|
* with `'not found'`.
|
|
*
|
|
* Identical Ack contract to `ShadeTransferHttpTransport` so the upstream
|
|
* `TransferEngine` pipeline (lane queues, retries, resume) doesn't care
|
|
* which transport is in use.
|
|
*/
|
|
|
|
import {
|
|
TransferAbortError,
|
|
TransferTransportError,
|
|
type ChunkAck,
|
|
type ChunkSendOptions,
|
|
type ITransferTransport,
|
|
type TransferResumeState,
|
|
} from '@shade/transfer';
|
|
import { WebRtcDataChannelError } from './errors.js';
|
|
import {
|
|
encodeChunkFrame,
|
|
encodeResumeQueryFrame,
|
|
randomRequestId,
|
|
streamIdStringToBytes,
|
|
WIRE_CHUNK,
|
|
WIRE_CHUNK_ACK,
|
|
WIRE_ERROR,
|
|
WIRE_RESUME_QUERY,
|
|
WIRE_RESUME_STATE,
|
|
} from './wire.js';
|
|
import type { WebRtcConnectionManager } from './manager.js';
|
|
|
|
export interface WebRtcTransferTransportOptions {
|
|
manager: WebRtcConnectionManager;
|
|
/** Per-request timeout in ms. Default 30s. */
|
|
requestTimeoutMs?: number;
|
|
/**
|
|
* Backpressure threshold — if `bufferedAmount` on the data channel
|
|
* exceeds this value, sends pause until it drains under the threshold.
|
|
* Default 4 MiB; the spec recommends ≤ 16 MiB to avoid SCTP stalls.
|
|
*/
|
|
backpressureThresholdBytes?: number;
|
|
}
|
|
|
|
/**
|
|
* SCTP DataChannel chunks are limited per-message. The default cap matches
|
|
* Chrome's safe upper bound (256 KiB) — adapters can fragment/reassemble
|
|
* beyond that, but Shade's chunkSize default is 1 MiB so we'd need
|
|
* fragmenting to ship full chunks. For now we surface a clear error if a
|
|
* single envelope exceeds the cap.
|
|
*/
|
|
export const DEFAULT_MAX_DATACHANNEL_MESSAGE = 256 * 1024;
|
|
|
|
export class WebRtcTransferTransport implements ITransferTransport {
|
|
private readonly requestTimeoutMs: number;
|
|
private readonly backpressureBytes: number;
|
|
|
|
constructor(private readonly options: WebRtcTransferTransportOptions) {
|
|
this.requestTimeoutMs = options.requestTimeoutMs ?? 30_000;
|
|
this.backpressureBytes = options.backpressureThresholdBytes ?? 4 * 1024 * 1024;
|
|
}
|
|
|
|
async probe(peerAddress: string): Promise<void> {
|
|
try {
|
|
await this.options.manager.getOrCreate(peerAddress);
|
|
} catch (err) {
|
|
throw new TransferTransportError(
|
|
`webrtc probe failed: ${(err as Error).message}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
async sendChunk(
|
|
peerAddress: string,
|
|
streamId: string,
|
|
laneId: number,
|
|
seq: number | bigint,
|
|
bytes: Uint8Array,
|
|
options?: ChunkSendOptions,
|
|
): Promise<ChunkAck> {
|
|
if (options?.signal?.aborted) throw new TransferAbortError('aborted before send');
|
|
const conn = await this.options.manager.getOrCreate(peerAddress);
|
|
|
|
// Backpressure: block if the SCTP buffer is full.
|
|
await this.awaitDrain(conn, options?.signal);
|
|
|
|
const seqBig = typeof seq === 'bigint' ? seq : BigInt(seq);
|
|
const requestId = randomRequestId();
|
|
const streamIdBytes = streamIdStringToBytes(streamId);
|
|
if (streamIdBytes.length !== 16) {
|
|
throw new TransferTransportError(`streamId must decode to 16 bytes`);
|
|
}
|
|
const frame = encodeChunkFrame({
|
|
type: WIRE_CHUNK,
|
|
requestId,
|
|
streamId: streamIdBytes,
|
|
laneId,
|
|
seq: seqBig,
|
|
envelope: bytes,
|
|
});
|
|
|
|
if (frame.length > DEFAULT_MAX_DATACHANNEL_MESSAGE) {
|
|
throw new TransferTransportError(
|
|
`frame too large for data channel (${frame.length} > ${DEFAULT_MAX_DATACHANNEL_MESSAGE}); reduce chunkSize`,
|
|
);
|
|
}
|
|
|
|
const onAbort = (): void => {
|
|
// The pending request inside `connection.request` will reject when
|
|
// the data channel closes. We don't have a direct cancel handle, so
|
|
// surface the abort as a transport error — the engine retries.
|
|
};
|
|
options?.signal?.addEventListener('abort', onAbort, { once: true });
|
|
let frameRes;
|
|
try {
|
|
frameRes = await conn.request(frame, requestId, this.requestTimeoutMs);
|
|
} finally {
|
|
options?.signal?.removeEventListener('abort', onAbort);
|
|
}
|
|
|
|
if (frameRes.type === WIRE_ERROR) {
|
|
throw new TransferTransportError(`webrtc sendChunk error: ${frameRes.json}`);
|
|
}
|
|
if (frameRes.type !== WIRE_CHUNK_ACK) {
|
|
throw new TransferTransportError(
|
|
`unexpected webrtc response type 0x${frameRes.type.toString(16)}`,
|
|
);
|
|
}
|
|
return {
|
|
lastSeq: frameRes.lastSeq,
|
|
bytesReceived: frameRes.bytesReceived,
|
|
};
|
|
}
|
|
|
|
async fetchResumeState(
|
|
peerAddress: string,
|
|
streamId: string,
|
|
): Promise<TransferResumeState | null> {
|
|
const conn = await this.options.manager.getOrCreate(peerAddress);
|
|
const requestId = randomRequestId();
|
|
const streamIdBytes = streamIdStringToBytes(streamId);
|
|
const frame = encodeResumeQueryFrame({
|
|
type: WIRE_RESUME_QUERY,
|
|
requestId,
|
|
streamId: streamIdBytes,
|
|
});
|
|
const response = await conn.request(frame, requestId, this.requestTimeoutMs);
|
|
if (response.type === WIRE_ERROR) {
|
|
// Convention: 'not found' → null; anything else throws.
|
|
try {
|
|
const parsed = JSON.parse(response.json) as { error?: string };
|
|
if (typeof parsed.error === 'string' && parsed.error.includes('not found')) {
|
|
return null;
|
|
}
|
|
} catch {
|
|
/* fall through to throw */
|
|
}
|
|
throw new TransferTransportError(`fetchResumeState failed: ${response.json}`);
|
|
}
|
|
if (response.type !== WIRE_RESUME_STATE) {
|
|
throw new TransferTransportError(
|
|
`unexpected webrtc response type 0x${response.type.toString(16)}`,
|
|
);
|
|
}
|
|
try {
|
|
return JSON.parse(response.json) as TransferResumeState;
|
|
} catch (err) {
|
|
throw new TransferTransportError(
|
|
`fetchResumeState bad JSON: ${(err as Error).message}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
/** Wait until the SCTP send buffer drains below the configured threshold. */
|
|
private async awaitDrain(
|
|
conn: { sendRaw: (b: Uint8Array) => void },
|
|
signal?: AbortSignal,
|
|
): Promise<void> {
|
|
// The `conn` parameter intentionally has a structurally-narrow shape
|
|
// — the data channel is internal to WebRtcConnection. Backpressure is
|
|
// a soft optimisation; we expose the bufferedAmount via a getter.
|
|
const dc = (conn as unknown as { dc: { bufferedAmount: number } | null }).dc;
|
|
if (dc === null || dc === undefined) return;
|
|
if (dc.bufferedAmount <= this.backpressureBytes) return;
|
|
// Poll every 25 ms until the buffer drains. A more sophisticated impl
|
|
// would use `bufferedamountlow` events but those require setting
|
|
// `bufferedAmountLowThreshold`, which the IDataChannel shim doesn't
|
|
// standardise yet. The polling overhead is negligible at MiB-scale
|
|
// chunk sizes.
|
|
const start = Date.now();
|
|
while (dc.bufferedAmount > this.backpressureBytes) {
|
|
if (signal?.aborted) throw new TransferAbortError('aborted while waiting for drain');
|
|
if (Date.now() - start > 30_000) {
|
|
throw new WebRtcDataChannelError(
|
|
`bufferedAmount stayed above threshold for 30s (${dc.bufferedAmount} bytes)`,
|
|
);
|
|
}
|
|
await new Promise<void>((resolve) => setTimeout(resolve, 25));
|
|
}
|
|
}
|
|
}
|