From 4e87195739f2a5d9a05451b48773c8afdc680765 Mon Sep 17 00:00:00 2001 From: akiyamn Date: Sun, 24 Sep 2023 23:22:21 +1000 Subject: Initial commit (by create-cloudflare CLI) --- node_modules/capnp-ts/src/serialization/packing.ts | 372 +++++++++++++++++++++ 1 file changed, 372 insertions(+) create mode 100644 node_modules/capnp-ts/src/serialization/packing.ts (limited to 'node_modules/capnp-ts/src/serialization/packing.ts') diff --git a/node_modules/capnp-ts/src/serialization/packing.ts b/node_modules/capnp-ts/src/serialization/packing.ts new file mode 100644 index 0000000..31e9b6d --- /dev/null +++ b/node_modules/capnp-ts/src/serialization/packing.ts @@ -0,0 +1,372 @@ +/** + * @author jdiaz5513 + */ + +import { PACK_SPAN_THRESHOLD } from "../constants"; +import { MSG_PACK_NOT_WORD_ALIGNED } from "../errors"; + +/** + * When packing a message there are two tags that are interpreted in a special way: `0x00` and `0xff`. + * + * @enum {number} + */ + +const enum PackedTag { + /** + * The tag is followed by a single byte which indicates a count of consecutive zero-valued words, minus 1. E.g. if the + * tag 0x00 is followed by 0x05, the sequence unpacks to 6 words of zero. + * + * Or, put another way: the tag is first decoded as if it were not special. Since none of the bits are set, it is + * followed by no bytes and expands to a word full of zeros. After that, the next byte is interpreted as a count of + * additional words that are also all-zero. + */ + + ZERO = 0x00, + + /** + * The tag is followed by the bytes of the word (as if it weren’t special), but after those bytes is another byte with + * value N. Following that byte is N unpacked words that should be copied directly. + * + * These unpacked words may contain zeroes; in this implementation a minimum of PACK_SPAN_THRESHOLD zero bytes are + * written before ending the span. + * + * The purpose of this rule is to minimize the impact of packing on data that doesn’t contain any zeros – in + * particular, long text blobs. Because of this rule, the worst-case space overhead of packing is 2 bytes per 2 KiB of + * input (256 words = 2KiB). + */ + + SPAN = 0xff, +} + +/** + * Compute the Hamming weight (number of bits set to 1) of a number. Used to figure out how many bytes follow a tag byte + * while computing the size of a packed message. + * + * WARNING: Using this with floating point numbers will void your warranty. + * + * @param {number} x A real integer. + * @returns {number} The hamming weight (integer). + */ + +export function getHammingWeight(x: number): number { + // Thanks, HACKMEM! + + let w = x - ((x >> 1) & 0x55555555); + w = (w & 0x33333333) + ((w >> 2) & 0x33333333); + return (((w + (w >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24; +} + +export type byte = number; + +/** + * Compute the tag byte from the 8 bytes of a 64-bit word. + * + * @param {byte} a The first byte. + * @param {byte} b The second byte. + * @param {byte} c The third byte. + * @param {byte} d The fourth byte. + * @param {byte} e The fifth byte. + * @param {byte} f The sixth byte. + * @param {byte} g The seventh byte. + * @param {byte} h The eighth byte (phew!). + * @returns {number} The tag byte. + */ + +export function getTagByte(a: byte, b: byte, c: byte, d: byte, e: byte, f: byte, g: byte, h: byte): number { + // Yes, it's pretty. Don't touch it. + + return ( + (a === 0 ? 0 : 0b00000001) | + (b === 0 ? 0 : 0b00000010) | + (c === 0 ? 0 : 0b00000100) | + (d === 0 ? 0 : 0b00001000) | + (e === 0 ? 0 : 0b00010000) | + (f === 0 ? 0 : 0b00100000) | + (g === 0 ? 0 : 0b01000000) | + (h === 0 ? 0 : 0b10000000) + ); +} + +/** + * Efficiently calculate the length of a packed Cap'n Proto message. + * + * @export + * @param {ArrayBuffer} packed The packed message. + * @returns {number} The length of the unpacked message in bytes. + */ + +export function getUnpackedByteLength(packed: ArrayBuffer): number { + const p = new Uint8Array(packed); + let wordLength = 0; + let lastTag = 0x77; + + for (let i = 0; i < p.byteLength; ) { + const tag = p[i]; + + if (lastTag === PackedTag.ZERO) { + wordLength += tag; + + i++; + + lastTag = 0x77; + } else if (lastTag === PackedTag.SPAN) { + wordLength += tag; + + i += tag * 8 + 1; + + lastTag = 0x77; + } else { + wordLength++; + + i += getHammingWeight(tag) + 1; + + lastTag = tag; + } + } + + return wordLength * 8; +} + +/** + * Compute the number of zero bytes that occur in a given 64-bit word, provided as eight separate bytes. + * + * @param {byte} a The first byte. + * @param {byte} b The second byte. + * @param {byte} c The third byte. + * @param {byte} d The fourth byte. + * @param {byte} e The fifth byte. + * @param {byte} f The sixth byte. + * @param {byte} g The seventh byte. + * @param {byte} h The eighth byte (phew!). + * @returns {number} The number of these bytes that are zero. + */ + +export function getZeroByteCount(a: byte, b: byte, c: byte, d: byte, e: byte, f: byte, g: byte, h: byte): number { + return ( + (a === 0 ? 1 : 0) + + (b === 0 ? 1 : 0) + + (c === 0 ? 1 : 0) + + (d === 0 ? 1 : 0) + + (e === 0 ? 1 : 0) + + (f === 0 ? 1 : 0) + + (g === 0 ? 1 : 0) + + (h === 0 ? 1 : 0) + ); +} + +/** + * Pack a section of a Cap'n Proto message into a compressed format. This will efficiently compress zero bytes (which + * are common in idiomatic Cap'n Proto messages) into a compact form. + * + * For stream-framed messages this is called once for the frame header and once again for each segment in the message. + * + * The returned array buffer is trimmed to the exact size of the packed message with a single copy operation at the end. + * This should be decent on CPU time but does require quite a lot of memory (a normal array is filled up with each + * packed byte until the packing is complete). + * + * @export + * @param {ArrayBuffer} unpacked The message to pack. + * @param {number} [byteOffset] Starting byte offset to read bytes from, defaults to 0. + * @param {number} [byteLength] Total number of bytes to read, defaults to the remainder of the buffer contents. + * @returns {ArrayBuffer} A packed version of the message. + */ + +export function pack(unpacked: ArrayBuffer, byteOffset = 0, byteLength?: number): ArrayBuffer { + if (unpacked.byteLength % 8 !== 0) throw new Error(MSG_PACK_NOT_WORD_ALIGNED); + + const src = new Uint8Array(unpacked, byteOffset, byteLength); + + // TODO: Maybe we should do this with buffers? This costs more than 8x the final compressed size in temporary RAM. + + const dst: number[] = []; + + /* Just have to be sure it's neither ZERO nor SPAN. */ + + let lastTag = 0x77; + + /** This is where we need to remember to write the SPAN tag (0xff). */ + + let spanTagOffset = NaN; + + /** How many words have been copied during the current span. */ + + let spanWordLength = 0; + + /** + * When this hits zero, we've had PACK_SPAN_THRESHOLD zero bytes pass by and it's time to bail from the span. + */ + + let spanThreshold = PACK_SPAN_THRESHOLD; + + for (let srcByteOffset = 0; srcByteOffset < src.byteLength; srcByteOffset += 8) { + /** Read in the entire word. Yes, this feels silly but it's fast! */ + + const a = src[srcByteOffset]; + const b = src[srcByteOffset + 1]; + const c = src[srcByteOffset + 2]; + const d = src[srcByteOffset + 3]; + const e = src[srcByteOffset + 4]; + const f = src[srcByteOffset + 5]; + const g = src[srcByteOffset + 6]; + const h = src[srcByteOffset + 7]; + + const tag = getTagByte(a, b, c, d, e, f, g, h); + + /** If this is true we'll skip the normal word write logic after the switch statement. */ + + let skipWriteWord = true; + + switch (lastTag) { + case PackedTag.ZERO: + // We're writing a span of words with all zeroes in them. See if we need to bail out of the fast path. + + if (tag !== PackedTag.ZERO || spanWordLength >= 0xff) { + // There's a bit in there or we got too many zeroes. Damn, we need to bail. + + dst.push(spanWordLength); + spanWordLength = 0; + + skipWriteWord = false; + } else { + // Kay, let's quickly inc this and go. + + spanWordLength++; + } + + break; + + case PackedTag.SPAN: { + // We're writing a span of nonzero words. + + const zeroCount = getZeroByteCount(a, b, c, d, e, f, g, h); + + // See if we need to bail now. + + spanThreshold -= zeroCount; + + if (spanThreshold <= 0 || spanWordLength >= 0xff) { + // Alright, time to get packing again. Write the number of words we skipped to the beginning of the span. + + dst[spanTagOffset] = spanWordLength; + spanWordLength = 0; + + spanThreshold = PACK_SPAN_THRESHOLD; + + // We have to write this word normally. + + skipWriteWord = false; + } else { + // Just write this word verbatim. + + dst.push(a, b, c, d, e, f, g, h); + + spanWordLength++; + } + + break; + } + default: + // Didn't get a special tag last time, let's write this as normal. + + skipWriteWord = false; + + break; + } + + // A goto is fast, idk why people keep hatin'. + if (skipWriteWord) continue; + + dst.push(tag); + lastTag = tag; + + if (a !== 0) dst.push(a); + if (b !== 0) dst.push(b); + if (c !== 0) dst.push(c); + if (d !== 0) dst.push(d); + if (e !== 0) dst.push(e); + if (f !== 0) dst.push(f); + if (g !== 0) dst.push(g); + if (h !== 0) dst.push(h); + + // Record the span tag offset if needed, making sure to actually leave room for it. + + if (tag === PackedTag.SPAN) { + spanTagOffset = dst.length; + + dst.push(0); + } + } + + // We're done. If we were writing a span let's finish it. + + if (lastTag === PackedTag.ZERO) { + dst.push(spanWordLength); + } else if (lastTag === PackedTag.SPAN) { + dst[spanTagOffset] = spanWordLength; + } + + return new Uint8Array(dst).buffer; +} + +/** + * Unpack a compressed Cap'n Proto message into a new ArrayBuffer. + * + * Unlike the `pack` function, this is able to efficiently determine the exact size needed for the output buffer and + * runs considerably more efficiently. + * + * @export + * @param {ArrayBuffer} packed An array buffer containing the packed message. + * @returns {ArrayBuffer} The unpacked message. + */ + +export function unpack(packed: ArrayBuffer): ArrayBuffer { + // We have no choice but to read the packed buffer one byte at a time. + + const src = new Uint8Array(packed); + const dst = new Uint8Array(new ArrayBuffer(getUnpackedByteLength(packed))); + + /** The last tag byte that we've seen - it starts at a "neutral" value. */ + + let lastTag = 0x77; + + for (let srcByteOffset = 0, dstByteOffset = 0; srcByteOffset < src.byteLength; ) { + const tag = src[srcByteOffset]; + + if (lastTag === PackedTag.ZERO) { + // We have a span of zeroes. New array buffers are guaranteed to be initialized to zero so we just seek ahead. + + dstByteOffset += tag * 8; + + srcByteOffset++; + + lastTag = 0x77; + } else if (lastTag === PackedTag.SPAN) { + // We have a span of unpacked bytes. Copy them verbatim from the source buffer. + + const spanByteLength = tag * 8; + + dst.set(src.subarray(srcByteOffset + 1, srcByteOffset + 1 + spanByteLength), dstByteOffset); + + dstByteOffset += spanByteLength; + srcByteOffset += 1 + spanByteLength; + + lastTag = 0x77; + } else { + // Okay, a normal tag. Let's read past the tag and copy bytes that have a bit set in the tag. + + srcByteOffset++; + + for (let i = 1; i <= 0b10000000; i <<= 1) { + // We only need to actually touch `dst` if there's a nonzero byte (it's already initialized to zeroes). + + if ((tag & i) !== 0) dst[dstByteOffset] = src[srcByteOffset++]; + + dstByteOffset++; + } + + lastTag = tag; + } + } + + return dst.buffer; +} -- cgit v1.2.3