Last active
May 24, 2021 08:37
-
-
Save xdsopl/43d2d00e28b095e7eb3feaae276123fe to your computer and use it in GitHub Desktop.
Variable length integer encoding and run length encoding
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Bitwise stream container | |
| Copyright 2018 Ahmet Inan <inan@aicodix.de> | |
| */ | |
| #pragma once | |
| namespace CODE { | |
| template <int SIZE> | |
| class Bitstream | |
| { | |
| public: | |
| static const int BITS = SIZE; | |
| static const int BYTES = (SIZE + 7) / 8; | |
| private: | |
| uint8_t buf_[BYTES]; | |
| int pos_ = 0; | |
| public: | |
| void seek(int pos) | |
| { | |
| pos_ = pos; | |
| } | |
| int tell() | |
| { | |
| return pos_; | |
| } | |
| uint8_t *data() | |
| { | |
| return buf_; | |
| } | |
| void reset() | |
| { | |
| pos_ = 0; | |
| for (int i = 0; i < BYTES; ++i) | |
| buf_[i] = 0; | |
| } | |
| void putbit(bool b) | |
| { | |
| int bit = pos_ % 8; | |
| int byte = pos_ / 8; | |
| ++pos_; | |
| uint8_t mask = 1 << bit; | |
| uint8_t tmp = ~mask & buf_[byte]; | |
| buf_[byte] = tmp | b << bit; | |
| } | |
| bool getbit() | |
| { | |
| int bit = pos_ % 8; | |
| int byte = pos_ / 8; | |
| ++pos_; | |
| return (buf_[byte] >> bit) & 1; | |
| } | |
| void putbyte(const uint8_t b) | |
| { | |
| int bit = pos_ % 8; | |
| int byte = pos_ / 8; | |
| pos_ += 8; | |
| if (bit) { | |
| uint8_t mask = (1 << bit) - 1; | |
| uint8_t lsb = mask & buf_[byte]; | |
| buf_[byte++] = lsb | b << bit; | |
| uint8_t msb = ~mask & buf_[byte]; | |
| buf_[byte] = msb | b >> (8 - bit); | |
| } else { | |
| buf_[byte] = b; | |
| } | |
| } | |
| uint8_t getbyte() | |
| { | |
| int bit = pos_ % 8; | |
| int byte = pos_ / 8; | |
| pos_ += 8; | |
| if (bit) { | |
| uint8_t lsb = buf_[byte++] >> bit; | |
| uint8_t msb = buf_[byte] << (8 - bit); | |
| return msb | lsb; | |
| } | |
| return buf_[byte]; | |
| } | |
| void writebytes(const uint8_t *buf, int len) | |
| { | |
| int bit = pos_ % 8; | |
| int byte = pos_ / 8; | |
| pos_ += 8 * len; | |
| if (bit) { | |
| uint8_t mask = (1 << bit) - 1; | |
| for (int i = 0; i < len; ++i) { | |
| uint8_t lsb = mask & buf_[byte]; | |
| buf_[byte++] = lsb | buf[i] << bit; | |
| uint8_t msb = ~mask & buf_[byte]; | |
| buf_[byte] = msb | buf[i] >> (8 - bit); | |
| } | |
| } else { | |
| for (int i = 0; i < len; ++i) | |
| buf_[byte++] = buf[i]; | |
| } | |
| } | |
| void readbytes(uint8_t *buf, int len) | |
| { | |
| int bit = pos_ % 8; | |
| int byte = pos_ / 8; | |
| pos_ += 8 * len; | |
| if (bit) { | |
| for (int i = 0; i < len; ++i) { | |
| uint8_t lsb = buf_[byte++] >> bit; | |
| uint8_t msb = buf_[byte] << (8 - bit); | |
| buf[i] = msb | lsb; | |
| } | |
| } else { | |
| for (int i = 0; i < len; ++i) | |
| buf[i] = buf_[byte++]; | |
| } | |
| } | |
| template <typename TYPE> | |
| void writebits(TYPE b, int num) | |
| { | |
| for (int sum = 0; num;) { | |
| int bit = pos_ % 8; | |
| int byte = pos_ / 8; | |
| int copy = std::min(8 - bit, num); | |
| uint8_t mask = (1 << copy) - 1; | |
| uint8_t src = (mask & (b >> sum)) << bit; | |
| uint8_t dst = ~(mask << bit) & buf_[byte]; | |
| buf_[byte++] = dst | src; | |
| pos_ += copy; | |
| num -= copy; | |
| sum += copy; | |
| } | |
| } | |
| template <typename TYPE> | |
| void readbits(TYPE *b, int num) | |
| { | |
| TYPE a = 0; | |
| for (int sum = 0; num;) { | |
| int bit = pos_ % 8; | |
| int byte = pos_ / 8; | |
| int copy = std::min(8 - bit, num); | |
| uint8_t mask = (1 << copy) - 1; | |
| TYPE tmp = mask & (buf_[byte++] >> bit); | |
| a |= tmp << sum; | |
| pos_ += copy; | |
| num -= copy; | |
| sum += copy; | |
| } | |
| *b = a; | |
| } | |
| }; | |
| } | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| CXXFLAGS = -std=c++11 -W -Wall -Ofast -fno-exceptions -fno-rtti -march=native | |
| CXX = clang++ -stdlib=libc++ | |
| #CXX = g++ | |
| .PHONY: all | |
| all: testbench | |
| test: testbench | |
| ./testbench | |
| testbench: testbench.cc | |
| $(CXX) $(CXXFLAGS) $< -o $@ | |
| .PHONY: clean | |
| clean: | |
| rm -f testbench | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Test bench for variable length integer encoding and run length encoding | |
| Copyright 2021 Ahmet Inan <xdsopl@gmail.com> | |
| */ | |
| #include <functional> | |
| #include <iostream> | |
| #include <cassert> | |
| #include <random> | |
| #include "vli.hh" | |
| #include "bitstream.hh" | |
| int main() | |
| { | |
| const int N = 16384; | |
| typedef CODE::Bitstream<N*8> Bits; | |
| Bits enc; | |
| VarLenInt<Bits> vli(&enc); | |
| if (0) { | |
| for (int i = 0; i < 1<<18; ++i) { | |
| enc.seek(0); | |
| vli(i); | |
| std::cout << i << " " << enc.tell() << std::endl; | |
| enc.seek(0); | |
| int o = vli(); | |
| assert(i == o); | |
| } | |
| return 0; | |
| } | |
| std::random_device rd; | |
| typedef std::default_random_engine generator; | |
| typedef std::geometric_distribution<int> geometric; | |
| auto geom = std::bind(geometric(0.9), generator(rd())); | |
| typedef std::uniform_int_distribution<int> uniform; | |
| auto rand = std::bind(uniform(0, 255), generator(rd())); | |
| uint8_t orig[N]; | |
| for (int i = 0; i < N; ++i) | |
| orig[i] = geom(); | |
| for (int i = 0; i < 10; ++i) | |
| orig[rand()] = rand(); | |
| std::cerr << "raw bits: " << N*8 << std::endl; | |
| if (0) { | |
| for (int i = 0; i < N; ++i) | |
| std::cout << " " << (int)orig[i]; | |
| std::cout << std::endl; | |
| } | |
| for (int i = 0; i < N; ++i) | |
| vli(orig[i]); | |
| std::cerr << "enc bits: " << enc.tell() << std::endl; | |
| std::cerr << "enc size: " << (100 * enc.tell()) / (N*8) << '%' << std::endl; | |
| enc.seek(0); | |
| uint8_t dec[N]; | |
| for (int i = 0; i < N; ++i) | |
| dec[i] = vli(); | |
| for (int i = 0; i < N; ++i) | |
| assert(orig[i] == dec[i]); | |
| uint8_t rle[N]; | |
| int R = 0; | |
| for (int i = 0; i < N; ++i) { | |
| if (orig[i]) { | |
| assert(R < N); | |
| rle[R++] = orig[i]; | |
| } else { | |
| assert(R < N); | |
| rle[R++] = 0; | |
| int k = i + 1; | |
| while (k < N && k-i < 256 && !orig[k]) | |
| ++k; | |
| --k; | |
| assert(R < N); | |
| rle[R++] = k - i; | |
| i = k; | |
| } | |
| } | |
| std::cerr << "rle bits: " << R*8 << std::endl; | |
| std::cerr << "rle size: " << (100 * R*8) / (N*8) << '%' << std::endl; | |
| if (0) { | |
| for (int i = 0; i < R; ++i) | |
| std::cout << " " << (int)rle[i]; | |
| std::cout << std::endl; | |
| } | |
| for (int i = 0, j = 0; i < R; ++i) { | |
| if (rle[i]) { | |
| assert(j < N); | |
| assert(orig[j] == rle[i]); | |
| ++j; | |
| } else { | |
| ++i; | |
| assert(i < R); | |
| for (int k = 0; k <= rle[i]; ++k) { | |
| assert(j < N); | |
| assert(orig[j] == 0); | |
| ++j; | |
| } | |
| } | |
| } | |
| enc.seek(0); | |
| for (int i = 0; i < R; ++i) | |
| vli(rle[i]); | |
| std::cerr << "enc bits: " << enc.tell() << std::endl; | |
| std::cerr << "enc size: " << (100 * enc.tell()) / (R*8) << '%' << std::endl; | |
| enc.seek(0); | |
| for (int i = 0; i < R; ++i) | |
| dec[i] = vli(); | |
| for (int i = 0; i < R; ++i) | |
| assert(rle[i] == dec[i]); | |
| return 0; | |
| } | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Variable length integer coding | |
| Copyright 2021 Ahmet Inan <xdsopl@gmail.com> | |
| */ | |
| #pragma once | |
| template <typename BITS> | |
| class VarLenInt | |
| { | |
| BITS *bits; | |
| public: | |
| VarLenInt(BITS *bits) : bits(bits) {} | |
| void operator()(unsigned val) | |
| { | |
| unsigned cnt = 0, top = 1; | |
| while (top <= val) { | |
| cnt += 1; | |
| top = 1 << cnt; | |
| assert(bits->tell() < BITS::BITS); | |
| bits->putbit(1); | |
| } | |
| assert(bits->tell() < BITS::BITS); | |
| bits->putbit(0); | |
| if (cnt > 0) { | |
| cnt -= 1; | |
| val -= top/2; | |
| assert(bits->tell()+cnt <= BITS::BITS); | |
| bits->writebits(val, cnt); | |
| } | |
| } | |
| unsigned operator()() | |
| { | |
| unsigned val = 0, cnt = 0, top = 1; | |
| assert(bits->tell() < BITS::BITS); | |
| while (bits->getbit()) { | |
| cnt += 1; | |
| top = 1 << cnt; | |
| assert(bits->tell() < BITS::BITS); | |
| } | |
| if (cnt > 0) { | |
| cnt -= 1; | |
| assert(bits->tell()+cnt <= BITS::BITS); | |
| bits->readbits(&val, cnt); | |
| val += top/2; | |
| } | |
| return val; | |
| } | |
| }; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment