Last active
May 24, 2021 09:12
-
-
Save xdsopl/45d5a7c056bc393aa7c767831c103ee9 to your computer and use it in GitHub Desktop.
Run length encoding of mostly zero bits
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Bitwise stream container | |
| Copyright 2018 Ahmet Inan <inan@aicodix.de> | |
| */ | |
| #pragma once | |
| namespace CODE { | |
| template <int SIZE> | |
| class Bitstream | |
| { | |
| public: | |
| static const int BITS = SIZE; | |
| static const int BYTES = (SIZE + 7) / 8; | |
| private: | |
| uint8_t buf_[BYTES]; | |
| int pos_ = 0; | |
| public: | |
| void seek(int pos) | |
| { | |
| pos_ = pos; | |
| } | |
| int tell() | |
| { | |
| return pos_; | |
| } | |
| uint8_t *data() | |
| { | |
| return buf_; | |
| } | |
| void reset() | |
| { | |
| pos_ = 0; | |
| for (int i = 0; i < BYTES; ++i) | |
| buf_[i] = 0; | |
| } | |
| void putbit(bool b) | |
| { | |
| int bit = pos_ % 8; | |
| int byte = pos_ / 8; | |
| ++pos_; | |
| uint8_t mask = 1 << bit; | |
| uint8_t tmp = ~mask & buf_[byte]; | |
| buf_[byte] = tmp | b << bit; | |
| } | |
| bool getbit() | |
| { | |
| int bit = pos_ % 8; | |
| int byte = pos_ / 8; | |
| ++pos_; | |
| return (buf_[byte] >> bit) & 1; | |
| } | |
| void putbyte(const uint8_t b) | |
| { | |
| int bit = pos_ % 8; | |
| int byte = pos_ / 8; | |
| pos_ += 8; | |
| if (bit) { | |
| uint8_t mask = (1 << bit) - 1; | |
| uint8_t lsb = mask & buf_[byte]; | |
| buf_[byte++] = lsb | b << bit; | |
| uint8_t msb = ~mask & buf_[byte]; | |
| buf_[byte] = msb | b >> (8 - bit); | |
| } else { | |
| buf_[byte] = b; | |
| } | |
| } | |
| uint8_t getbyte() | |
| { | |
| int bit = pos_ % 8; | |
| int byte = pos_ / 8; | |
| pos_ += 8; | |
| if (bit) { | |
| uint8_t lsb = buf_[byte++] >> bit; | |
| uint8_t msb = buf_[byte] << (8 - bit); | |
| return msb | lsb; | |
| } | |
| return buf_[byte]; | |
| } | |
| void writebytes(const uint8_t *buf, int len) | |
| { | |
| int bit = pos_ % 8; | |
| int byte = pos_ / 8; | |
| pos_ += 8 * len; | |
| if (bit) { | |
| uint8_t mask = (1 << bit) - 1; | |
| for (int i = 0; i < len; ++i) { | |
| uint8_t lsb = mask & buf_[byte]; | |
| buf_[byte++] = lsb | buf[i] << bit; | |
| uint8_t msb = ~mask & buf_[byte]; | |
| buf_[byte] = msb | buf[i] >> (8 - bit); | |
| } | |
| } else { | |
| for (int i = 0; i < len; ++i) | |
| buf_[byte++] = buf[i]; | |
| } | |
| } | |
| void readbytes(uint8_t *buf, int len) | |
| { | |
| int bit = pos_ % 8; | |
| int byte = pos_ / 8; | |
| pos_ += 8 * len; | |
| if (bit) { | |
| for (int i = 0; i < len; ++i) { | |
| uint8_t lsb = buf_[byte++] >> bit; | |
| uint8_t msb = buf_[byte] << (8 - bit); | |
| buf[i] = msb | lsb; | |
| } | |
| } else { | |
| for (int i = 0; i < len; ++i) | |
| buf[i] = buf_[byte++]; | |
| } | |
| } | |
| template <typename TYPE> | |
| void writebits(TYPE b, int num) | |
| { | |
| for (int sum = 0; num;) { | |
| int bit = pos_ % 8; | |
| int byte = pos_ / 8; | |
| int copy = std::min(8 - bit, num); | |
| uint8_t mask = (1 << copy) - 1; | |
| uint8_t src = (mask & (b >> sum)) << bit; | |
| uint8_t dst = ~(mask << bit) & buf_[byte]; | |
| buf_[byte++] = dst | src; | |
| pos_ += copy; | |
| num -= copy; | |
| sum += copy; | |
| } | |
| } | |
| template <typename TYPE> | |
| void readbits(TYPE *b, int num) | |
| { | |
| TYPE a = 0; | |
| for (int sum = 0; num;) { | |
| int bit = pos_ % 8; | |
| int byte = pos_ / 8; | |
| int copy = std::min(8 - bit, num); | |
| uint8_t mask = (1 << copy) - 1; | |
| TYPE tmp = mask & (buf_[byte++] >> bit); | |
| a |= tmp << sum; | |
| pos_ += copy; | |
| num -= copy; | |
| sum += copy; | |
| } | |
| *b = a; | |
| } | |
| }; | |
| } | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| CXXFLAGS = -std=c++11 -W -Wall -Ofast -fno-exceptions -fno-rtti -march=native | |
| CXX = clang++ -stdlib=libc++ | |
| #CXX = g++ | |
| .PHONY: all | |
| all: testbench | |
| test: testbench | |
| ./testbench | |
| testbench: testbench.cc *.hh | |
| $(CXX) $(CXXFLAGS) $< -o $@ | |
| .PHONY: clean | |
| clean: | |
| rm -f testbench | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Rice coding | |
| Copyright 2021 Ahmet Inan <xdsopl@gmail.com> | |
| */ | |
| #pragma once | |
| template <typename BITS> | |
| class Rice | |
| { | |
| BITS *bits; | |
| unsigned order; | |
| public: | |
| Rice(BITS *bits) : bits(bits), order(0) {} | |
| void reset() | |
| { | |
| order = 0; | |
| } | |
| void operator()(unsigned num) | |
| { | |
| while (num >= 1 << order) { | |
| assert(bits->tell() < BITS::BITS); | |
| bits->putbit(0); | |
| num -= 1 << order; | |
| order += 1; | |
| } | |
| assert(bits->tell()+1+order < BITS::BITS); | |
| bits->putbit(1); | |
| bits->writebits(num, order); | |
| order -= 1; | |
| if (order < 0) | |
| order = 0; | |
| } | |
| unsigned operator()() | |
| { | |
| unsigned num, sum = 0; | |
| assert(bits->tell() < BITS::BITS); | |
| while (!bits->getbit()) { | |
| sum += 1 << order; | |
| order += 1; | |
| assert(bits->tell() < BITS::BITS); | |
| } | |
| assert(bits->tell()+order <= BITS::BITS); | |
| bits->readbits(&num, order); | |
| order -= 1; | |
| if (order < 0) | |
| order = 0; | |
| return num + sum; | |
| } | |
| }; | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Run length encoding of mostly zero bits | |
| Copyright 2021 Ahmet Inan <xdsopl@gmail.com> | |
| */ | |
| #pragma once | |
| template <typename BITS, typename VLI> | |
| class RunLenEnc | |
| { | |
| VLI vli; | |
| unsigned count; | |
| public: | |
| RunLenEnc(BITS *bits) : vli(bits), count(0) {} | |
| void flush() | |
| { | |
| vli(count); | |
| count = 0; | |
| vli.reset(); | |
| } | |
| void start() | |
| { | |
| vli.reset(); | |
| count = vli(); | |
| } | |
| bool operator()() | |
| { | |
| if (count--) | |
| return 0; | |
| count = vli(); | |
| return 1; | |
| } | |
| void operator()(bool val) | |
| { | |
| if (val) { | |
| vli(count); | |
| count = 0; | |
| } else { | |
| ++count; | |
| } | |
| } | |
| }; | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Test bench for run length encoding of mostly zero bits | |
| Copyright 2021 Ahmet Inan <xdsopl@gmail.com> | |
| */ | |
| #include <functional> | |
| #include <iostream> | |
| #include <cassert> | |
| #include <random> | |
| #include "rle.hh" | |
| #include "vli.hh" | |
| #include "rice.hh" | |
| #include "bitstream.hh" | |
| int main() | |
| { | |
| const int N = 1 << 20; | |
| std::random_device rd; | |
| typedef std::default_random_engine generator; | |
| typedef std::bernoulli_distribution bernoulli; | |
| auto bern = std::bind(bernoulli(0.01), generator(rd())); | |
| bool orig[N]; | |
| for (int i = 0; i < N; ++i) | |
| orig[i] = bern(); | |
| int ones = 0; | |
| for (int i = 0; i < N; ++i) | |
| ones += orig[i]; | |
| int zeros = 0; | |
| for (int i = 0; i < N; ++i) | |
| zeros += !orig[i]; | |
| std::cerr << "uncoded bits: " << N << std::endl; | |
| std::cerr << "distribution: " << (1000 * ones) / zeros << "‰" << std::endl; | |
| if (0) { | |
| for (int i = 0; i < N; ++i) | |
| std::cout << orig[i]; | |
| std::cout << std::endl; | |
| } | |
| typedef CODE::Bitstream<N> Bits; | |
| Bits bits; | |
| #if 1 | |
| typedef Rice<Bits> Vli; | |
| #else | |
| typedef VarLenInt<Bits> Vli; | |
| #endif | |
| RunLenEnc<Bits, Vli> rle(&bits); | |
| for (int i = 0; i < N; ++i) | |
| rle(orig[i]); | |
| rle.flush(); | |
| std::cerr << "encoded bits: " << bits.tell() << std::endl; | |
| std::cerr << "encoded size: " << (100 * bits.tell()) / N << '%' << std::endl; | |
| if (0) { | |
| bits.seek(0); | |
| for (int i = 0; i < N; ++i) | |
| std::cout << bits.getbit(); | |
| std::cout << std::endl; | |
| } | |
| bits.seek(0); | |
| bool dec[N]; | |
| rle.start(); | |
| for (int i = 0; i < N; ++i) | |
| dec[i] = rle(); | |
| if (0) { | |
| for (int i = 0; i < N; ++i) | |
| std::cout << dec[i]; | |
| std::cout << std::endl; | |
| } | |
| for (int i = 0; i < N; ++i) | |
| assert(orig[i] == dec[i]); | |
| return 0; | |
| } | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Variable length integer coding | |
| Copyright 2021 Ahmet Inan <xdsopl@gmail.com> | |
| */ | |
| #pragma once | |
| template <typename BITS> | |
| class VarLenInt | |
| { | |
| BITS *bits; | |
| public: | |
| VarLenInt(BITS *bits) : bits(bits) {} | |
| void reset() {} | |
| void operator()(unsigned val) | |
| { | |
| unsigned cnt = 0, top = 1; | |
| while (top <= val) { | |
| cnt += 1; | |
| top = 1 << cnt; | |
| assert(bits->tell() < BITS::BITS); | |
| bits->putbit(1); | |
| } | |
| assert(bits->tell() < BITS::BITS); | |
| bits->putbit(0); | |
| if (cnt > 0) { | |
| cnt -= 1; | |
| val -= top/2; | |
| assert(bits->tell()+cnt <= BITS::BITS); | |
| bits->writebits(val, cnt); | |
| } | |
| } | |
| unsigned operator()() | |
| { | |
| unsigned val = 0, cnt = 0, top = 1; | |
| assert(bits->tell() < BITS::BITS); | |
| while (bits->getbit()) { | |
| cnt += 1; | |
| top = 1 << cnt; | |
| assert(bits->tell() < BITS::BITS); | |
| } | |
| if (cnt > 0) { | |
| cnt -= 1; | |
| assert(bits->tell()+cnt <= BITS::BITS); | |
| bits->readbits(&val, cnt); | |
| val += top/2; | |
| } | |
| return val; | |
| } | |
| }; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment