summaryrefslogtreecommitdiff
path: root/node_modules/capnp-ts/src/serialization/packing.ts
diff options
context:
space:
mode:
authorakiyamn2023-09-24 23:22:21 +1000
committerakiyamn2023-09-24 23:22:21 +1000
commit4e87195739f2a5d9a05451b48773c8afdc680765 (patch)
tree9cba501844a4a11dcbdffc4050ed8189561c55ed /node_modules/capnp-ts/src/serialization/packing.ts
downloadprice-tracker-worker-4e87195739f2a5d9a05451b48773c8afdc680765.tar.gz
price-tracker-worker-4e87195739f2a5d9a05451b48773c8afdc680765.zip
Initial commit (by create-cloudflare CLI)
Diffstat (limited to 'node_modules/capnp-ts/src/serialization/packing.ts')
-rw-r--r--node_modules/capnp-ts/src/serialization/packing.ts372
1 files changed, 372 insertions, 0 deletions
diff --git a/node_modules/capnp-ts/src/serialization/packing.ts b/node_modules/capnp-ts/src/serialization/packing.ts
new file mode 100644
index 0000000..31e9b6d
--- /dev/null
+++ b/node_modules/capnp-ts/src/serialization/packing.ts
@@ -0,0 +1,372 @@
+/**
+ * @author jdiaz5513
+ */
+
+import { PACK_SPAN_THRESHOLD } from "../constants";
+import { MSG_PACK_NOT_WORD_ALIGNED } from "../errors";
+
+/**
+ * When packing a message there are two tags that are interpreted in a special way: `0x00` and `0xff`.
+ *
+ * @enum {number}
+ */
+
+const enum PackedTag {
+ /**
+ * The tag is followed by a single byte which indicates a count of consecutive zero-valued words, minus 1. E.g. if the
+ * tag 0x00 is followed by 0x05, the sequence unpacks to 6 words of zero.
+ *
+ * Or, put another way: the tag is first decoded as if it were not special. Since none of the bits are set, it is
+ * followed by no bytes and expands to a word full of zeros. After that, the next byte is interpreted as a count of
+ * additional words that are also all-zero.
+ */
+
+ ZERO = 0x00,
+
+ /**
+ * The tag is followed by the bytes of the word (as if it weren’t special), but after those bytes is another byte with
+ * value N. Following that byte is N unpacked words that should be copied directly.
+ *
+ * These unpacked words may contain zeroes; in this implementation a minimum of PACK_SPAN_THRESHOLD zero bytes are
+ * written before ending the span.
+ *
+ * The purpose of this rule is to minimize the impact of packing on data that doesn’t contain any zeros – in
+ * particular, long text blobs. Because of this rule, the worst-case space overhead of packing is 2 bytes per 2 KiB of
+ * input (256 words = 2KiB).
+ */
+
+ SPAN = 0xff,
+}
+
+/**
+ * Compute the Hamming weight (number of bits set to 1) of a number. Used to figure out how many bytes follow a tag byte
+ * while computing the size of a packed message.
+ *
+ * WARNING: Using this with floating point numbers will void your warranty.
+ *
+ * @param {number} x A real integer.
+ * @returns {number} The hamming weight (integer).
+ */
+
+export function getHammingWeight(x: number): number {
+ // Thanks, HACKMEM!
+
+ let w = x - ((x >> 1) & 0x55555555);
+ w = (w & 0x33333333) + ((w >> 2) & 0x33333333);
+ return (((w + (w >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24;
+}
+
+export type byte = number;
+
+/**
+ * Compute the tag byte from the 8 bytes of a 64-bit word.
+ *
+ * @param {byte} a The first byte.
+ * @param {byte} b The second byte.
+ * @param {byte} c The third byte.
+ * @param {byte} d The fourth byte.
+ * @param {byte} e The fifth byte.
+ * @param {byte} f The sixth byte.
+ * @param {byte} g The seventh byte.
+ * @param {byte} h The eighth byte (phew!).
+ * @returns {number} The tag byte.
+ */
+
+export function getTagByte(a: byte, b: byte, c: byte, d: byte, e: byte, f: byte, g: byte, h: byte): number {
+ // Yes, it's pretty. Don't touch it.
+
+ return (
+ (a === 0 ? 0 : 0b00000001) |
+ (b === 0 ? 0 : 0b00000010) |
+ (c === 0 ? 0 : 0b00000100) |
+ (d === 0 ? 0 : 0b00001000) |
+ (e === 0 ? 0 : 0b00010000) |
+ (f === 0 ? 0 : 0b00100000) |
+ (g === 0 ? 0 : 0b01000000) |
+ (h === 0 ? 0 : 0b10000000)
+ );
+}
+
+/**
+ * Efficiently calculate the length of a packed Cap'n Proto message.
+ *
+ * @export
+ * @param {ArrayBuffer} packed The packed message.
+ * @returns {number} The length of the unpacked message in bytes.
+ */
+
+export function getUnpackedByteLength(packed: ArrayBuffer): number {
+ const p = new Uint8Array(packed);
+ let wordLength = 0;
+ let lastTag = 0x77;
+
+ for (let i = 0; i < p.byteLength; ) {
+ const tag = p[i];
+
+ if (lastTag === PackedTag.ZERO) {
+ wordLength += tag;
+
+ i++;
+
+ lastTag = 0x77;
+ } else if (lastTag === PackedTag.SPAN) {
+ wordLength += tag;
+
+ i += tag * 8 + 1;
+
+ lastTag = 0x77;
+ } else {
+ wordLength++;
+
+ i += getHammingWeight(tag) + 1;
+
+ lastTag = tag;
+ }
+ }
+
+ return wordLength * 8;
+}
+
+/**
+ * Compute the number of zero bytes that occur in a given 64-bit word, provided as eight separate bytes.
+ *
+ * @param {byte} a The first byte.
+ * @param {byte} b The second byte.
+ * @param {byte} c The third byte.
+ * @param {byte} d The fourth byte.
+ * @param {byte} e The fifth byte.
+ * @param {byte} f The sixth byte.
+ * @param {byte} g The seventh byte.
+ * @param {byte} h The eighth byte (phew!).
+ * @returns {number} The number of these bytes that are zero.
+ */
+
+export function getZeroByteCount(a: byte, b: byte, c: byte, d: byte, e: byte, f: byte, g: byte, h: byte): number {
+ return (
+ (a === 0 ? 1 : 0) +
+ (b === 0 ? 1 : 0) +
+ (c === 0 ? 1 : 0) +
+ (d === 0 ? 1 : 0) +
+ (e === 0 ? 1 : 0) +
+ (f === 0 ? 1 : 0) +
+ (g === 0 ? 1 : 0) +
+ (h === 0 ? 1 : 0)
+ );
+}
+
+/**
+ * Pack a section of a Cap'n Proto message into a compressed format. This will efficiently compress zero bytes (which
+ * are common in idiomatic Cap'n Proto messages) into a compact form.
+ *
+ * For stream-framed messages this is called once for the frame header and once again for each segment in the message.
+ *
+ * The returned array buffer is trimmed to the exact size of the packed message with a single copy operation at the end.
+ * This should be decent on CPU time but does require quite a lot of memory (a normal array is filled up with each
+ * packed byte until the packing is complete).
+ *
+ * @export
+ * @param {ArrayBuffer} unpacked The message to pack.
+ * @param {number} [byteOffset] Starting byte offset to read bytes from, defaults to 0.
+ * @param {number} [byteLength] Total number of bytes to read, defaults to the remainder of the buffer contents.
+ * @returns {ArrayBuffer} A packed version of the message.
+ */
+
+export function pack(unpacked: ArrayBuffer, byteOffset = 0, byteLength?: number): ArrayBuffer {
+ if (unpacked.byteLength % 8 !== 0) throw new Error(MSG_PACK_NOT_WORD_ALIGNED);
+
+ const src = new Uint8Array(unpacked, byteOffset, byteLength);
+
+ // TODO: Maybe we should do this with buffers? This costs more than 8x the final compressed size in temporary RAM.
+
+ const dst: number[] = [];
+
+ /* Just have to be sure it's neither ZERO nor SPAN. */
+
+ let lastTag = 0x77;
+
+ /** This is where we need to remember to write the SPAN tag (0xff). */
+
+ let spanTagOffset = NaN;
+
+ /** How many words have been copied during the current span. */
+
+ let spanWordLength = 0;
+
+ /**
+ * When this hits zero, we've had PACK_SPAN_THRESHOLD zero bytes pass by and it's time to bail from the span.
+ */
+
+ let spanThreshold = PACK_SPAN_THRESHOLD;
+
+ for (let srcByteOffset = 0; srcByteOffset < src.byteLength; srcByteOffset += 8) {
+ /** Read in the entire word. Yes, this feels silly but it's fast! */
+
+ const a = src[srcByteOffset];
+ const b = src[srcByteOffset + 1];
+ const c = src[srcByteOffset + 2];
+ const d = src[srcByteOffset + 3];
+ const e = src[srcByteOffset + 4];
+ const f = src[srcByteOffset + 5];
+ const g = src[srcByteOffset + 6];
+ const h = src[srcByteOffset + 7];
+
+ const tag = getTagByte(a, b, c, d, e, f, g, h);
+
+ /** If this is true we'll skip the normal word write logic after the switch statement. */
+
+ let skipWriteWord = true;
+
+ switch (lastTag) {
+ case PackedTag.ZERO:
+ // We're writing a span of words with all zeroes in them. See if we need to bail out of the fast path.
+
+ if (tag !== PackedTag.ZERO || spanWordLength >= 0xff) {
+ // There's a bit in there or we got too many zeroes. Damn, we need to bail.
+
+ dst.push(spanWordLength);
+ spanWordLength = 0;
+
+ skipWriteWord = false;
+ } else {
+ // Kay, let's quickly inc this and go.
+
+ spanWordLength++;
+ }
+
+ break;
+
+ case PackedTag.SPAN: {
+ // We're writing a span of nonzero words.
+
+ const zeroCount = getZeroByteCount(a, b, c, d, e, f, g, h);
+
+ // See if we need to bail now.
+
+ spanThreshold -= zeroCount;
+
+ if (spanThreshold <= 0 || spanWordLength >= 0xff) {
+ // Alright, time to get packing again. Write the number of words we skipped to the beginning of the span.
+
+ dst[spanTagOffset] = spanWordLength;
+ spanWordLength = 0;
+
+ spanThreshold = PACK_SPAN_THRESHOLD;
+
+ // We have to write this word normally.
+
+ skipWriteWord = false;
+ } else {
+ // Just write this word verbatim.
+
+ dst.push(a, b, c, d, e, f, g, h);
+
+ spanWordLength++;
+ }
+
+ break;
+ }
+ default:
+ // Didn't get a special tag last time, let's write this as normal.
+
+ skipWriteWord = false;
+
+ break;
+ }
+
+ // A goto is fast, idk why people keep hatin'.
+ if (skipWriteWord) continue;
+
+ dst.push(tag);
+ lastTag = tag;
+
+ if (a !== 0) dst.push(a);
+ if (b !== 0) dst.push(b);
+ if (c !== 0) dst.push(c);
+ if (d !== 0) dst.push(d);
+ if (e !== 0) dst.push(e);
+ if (f !== 0) dst.push(f);
+ if (g !== 0) dst.push(g);
+ if (h !== 0) dst.push(h);
+
+ // Record the span tag offset if needed, making sure to actually leave room for it.
+
+ if (tag === PackedTag.SPAN) {
+ spanTagOffset = dst.length;
+
+ dst.push(0);
+ }
+ }
+
+ // We're done. If we were writing a span let's finish it.
+
+ if (lastTag === PackedTag.ZERO) {
+ dst.push(spanWordLength);
+ } else if (lastTag === PackedTag.SPAN) {
+ dst[spanTagOffset] = spanWordLength;
+ }
+
+ return new Uint8Array(dst).buffer;
+}
+
+/**
+ * Unpack a compressed Cap'n Proto message into a new ArrayBuffer.
+ *
+ * Unlike the `pack` function, this is able to efficiently determine the exact size needed for the output buffer and
+ * runs considerably more efficiently.
+ *
+ * @export
+ * @param {ArrayBuffer} packed An array buffer containing the packed message.
+ * @returns {ArrayBuffer} The unpacked message.
+ */
+
+export function unpack(packed: ArrayBuffer): ArrayBuffer {
+ // We have no choice but to read the packed buffer one byte at a time.
+
+ const src = new Uint8Array(packed);
+ const dst = new Uint8Array(new ArrayBuffer(getUnpackedByteLength(packed)));
+
+ /** The last tag byte that we've seen - it starts at a "neutral" value. */
+
+ let lastTag = 0x77;
+
+ for (let srcByteOffset = 0, dstByteOffset = 0; srcByteOffset < src.byteLength; ) {
+ const tag = src[srcByteOffset];
+
+ if (lastTag === PackedTag.ZERO) {
+ // We have a span of zeroes. New array buffers are guaranteed to be initialized to zero so we just seek ahead.
+
+ dstByteOffset += tag * 8;
+
+ srcByteOffset++;
+
+ lastTag = 0x77;
+ } else if (lastTag === PackedTag.SPAN) {
+ // We have a span of unpacked bytes. Copy them verbatim from the source buffer.
+
+ const spanByteLength = tag * 8;
+
+ dst.set(src.subarray(srcByteOffset + 1, srcByteOffset + 1 + spanByteLength), dstByteOffset);
+
+ dstByteOffset += spanByteLength;
+ srcByteOffset += 1 + spanByteLength;
+
+ lastTag = 0x77;
+ } else {
+ // Okay, a normal tag. Let's read past the tag and copy bytes that have a bit set in the tag.
+
+ srcByteOffset++;
+
+ for (let i = 1; i <= 0b10000000; i <<= 1) {
+ // We only need to actually touch `dst` if there's a nonzero byte (it's already initialized to zeroes).
+
+ if ((tag & i) !== 0) dst[dstByteOffset] = src[srcByteOffset++];
+
+ dstByteOffset++;
+ }
+
+ lastTag = tag;
+ }
+ }
+
+ return dst.buffer;
+}