prefix-compression-test/test.html

<!doctype html>
<html>
  <head>
    <meta charset="utf-8"/>
    <title>Compression test</title>
    <script src="LZMA-JS/src/lzma_worker.js"></script>
    <script src="node-lz4/build/lz4.js"></script>
    <script src="lz-string/libs/lz-string.js"></script>
    <script >
    var Buffer = require('buffer').Buffer
    var LZ4 = require('lz4')

    class PrefixCompressor {
      constructor(prefix, compress, decompress, bytesToIgnore) {
        this.prefix = prefix;
        this.prefixCompressed = compress(prefix);
        this.bytesToIgnore = bytesToIgnore;
        this.baseCompress = compress;
        this.baseDecompress = decompress;
      }

      compress(str) {
        const c = this.baseCompress(this.prefix + str);
        for (var i = this.bytesToIgnore;
             i < this.prefixCompressed.length
               && this.prefixCompressed[i] == c[i];
             i++) { }
        const omittedBytes = this.prefixCompressed.length - i;
        const res = c.subarray(i - 1);
        res[0] = omittedBytes;
        return res;
      }

      decompress(arr) {
        const omittedBytes = arr[0];
        const c = new Uint8Array(Buffer.concat([
            this.prefixCompressed
                .subarray(0, this.prefixCompressed.length-omittedBytes),
            arr.subarray(1)
          ]));
        const d = this.baseDecompress(c);
        return d.substring(this.prefix.length);
      }
    }

    async function testCompression () {
      async function loadExampleFiles() {
        const exampleFiles = new Map();
        const filesToLoad = ['rfc4566-example'];
        for (var i = 1; i <= 44; i++) {
          filesToLoad.push('draft-ietf-rtcweb-sdp-12-ex' + i);
        }
        for (const filename of filesToLoad) {
          const response = await fetch('test-files/' + filename);
          const text = await response.text();
          exampleFiles[filename] = text;
        }
        return exampleFiles;
      }
      async function loadDictionary() {
        const response = await fetch('test-dictionary/base');
        return await response.text();
      }

      const exampleFiles = await loadExampleFiles();
      const prefix = await loadDictionary();

      const decoder = new TextDecoder("utf-8");
      // https://github.com/pierrec/node-lz4
      const LZ4Pre = new PrefixCompressor(prefix, str => {
        const input = new Buffer(str);
        var output = new Buffer(LZ4.encodeBound(input.length));
        const compressedSize = LZ4.encodeBlock(input, output);
        output = output.slice(0, compressedSize);
        return output;
      }, arr => {
        var uncompressed = new Buffer(10000)
        var size = LZ4.decodeBlock(arr, uncompressed);
        uncompressed = uncompressed.slice(0, size);
        return decoder.decode(uncompressed);
      }, 0);

      // https://github.com/LZMA-JS/LZMA-JS
      const LZMAPre = new PrefixCompressor(prefix, str => {
        const res = new Uint8Array(LZMA.compress(str, 9));
        // Clear uncompressed size as it will be invalid.
        for (var i = 5; i < 13; i++) res[i] = 255;
        return res;
      }, arr => {
        return LZMA.decompress(arr).toString();
      }, 12);

      // https://github.com/pieroxy/lz-string
      const LZStringPre = new PrefixCompressor(prefix, str => {
        return LZString.compressToUint8Array(str);
      }, arr => {
        return LZString.decompressFromUint8Array(arr);
      }, 0);

      const compressors = {lzma: LZMAPre, lz4: LZ4Pre, lzString: LZStringPre};

      const resultTable = document.getElementById('results');
      const headerRow = resultTable.querySelector('thead tr');
      const prefixRow = document.getElementById('prefix-row');
      const tbody = resultTable.querySelector('tbody');

      const prefixSize = document.createElement('td');
      prefixSize.innerText = prefix.length;
      prefixRow.appendChild(prefixSize);

      function appendHeader(str) {
        const th = document.createElement('th');
        th.innerText = str;
        headerRow.appendChild(th);
      }
      function appendPrefixCell(str) {
        const td = document.createElement('td');
        if (str) td.innerText = str;
        prefixRow.appendChild(td);
      }

      for (const name in compressors) {
        const algo = compressors[name];
        appendHeader(name);
        appendHeader('?');
        appendHeader(name + " w/ prefix");
        appendHeader('?');
        appendHeader(name + " incl. prefix");
        appendHeader("matched");
        appendHeader("skipped");
        appendHeader("remaining");

        const prefix_compressed = algo.baseCompress(prefix);
        const prefix_compressed_len = prefix_compressed.length;
        appendPrefixCell(prefix_compressed_len);
        for (var i = 0; i < 7; i++) appendPrefixCell();

        for (const filename in exampleFiles) {
          const str = exampleFiles[filename];

          const rowId = 'row-' + filename;
          var row = document.getElementById(rowId);
          if (!row) {
            row = document.createElement('tr');
            row.id = rowId;

            const rowHeader = document.createElement('th');
            rowHeader.innerText = filename;
            row.appendChild(rowHeader);

            const size = document.createElement('td');
            size.innerText = str.length;
            row.appendChild(size);

            tbody.appendChild(row);
          }
          function appendCell(str) {
            const td = document.createElement('td');
            if (str === undefined) str = '';
            else if (str === true) str = '✔️';
            else if (str === false) str = '❌';
            td.innerText = str;
            row.appendChild(td);
          }

          result = algo.baseCompress(str);
          appendCell(result.length);
          appendCell(str == algo.baseDecompress(result))
          var bestLength = result.length;
          result = algo.compress(str);
          const decompressionSuccessful = (str == algo.decompress(result));
          appendCell(result.length);
          appendCell(decompressionSuccessful)

          if (!decompressionSuccessful) {
            var bestDescription = "Prefix compression failed.";
            var bestCells = ['', '', ''];
            result = algo.baseCompress(prefix + str)
            appendCell(result.length);
            for (var j = 0; j < prefix_compressed_len; j++) {
              for (var i = j; i < prefix_compressed_len && prefix_compressed[i] == result[i]; i++) { }
              const truncatedLength = result.length - i + j;
              if (truncatedLength < bestLength) {
                bestLength = truncatedLength;
                bestDescription = filename + " compressed with prefix matches " + (i-j) + " bytes (skipping " + j + " bytes) of compressed prefix. Remaining length is " + truncatedLength + " bytes"
                bestCells = [i-j, j, truncatedLength]
              }
              j = i;
            }
            for (const cell of bestCells) appendCell(cell);
          } else {
            for (var i = 0; i < 4; i++) appendCell();
          }
        }
      }
    }
    testCompression();
    </script>
    <style>
      td {
        text-align: right;
      }
      table {
        border-spacing: 0;
        border-collapse: collapse;
      }
      table, tr, th, td {
        border: 1px solid #DDD;
      }
      tr:nth-child(2n) {
        background-color: #f6f8fa;
      }
      thead th {
        position: sticky;
        top: 0;
        background: white;
      }
    </style>
  </head>
  <body>
    <table id="results">
      <thead>
        <tr>
          <th>Filename</th>
          <th>Size</th>
        </tr>
      </thead>
      <tbody>
        <tr id="prefix-row">
          <th>PREFIX</th>
        </tr>
      </tbody>
    </table>
  </body>
</html>