Skip to content

Instantly share code, notes, and snippets.

@xdsopl
Last active May 24, 2021 09:12
Show Gist options
  • Select an option

  • Save xdsopl/45d5a7c056bc393aa7c767831c103ee9 to your computer and use it in GitHub Desktop.

Select an option

Save xdsopl/45d5a7c056bc393aa7c767831c103ee9 to your computer and use it in GitHub Desktop.
Run length encoding of mostly zero bits
/*
Bitwise stream container
Copyright 2018 Ahmet Inan <inan@aicodix.de>
*/
#pragma once
namespace CODE {
template <int SIZE>
class Bitstream
{
public:
static const int BITS = SIZE;
static const int BYTES = (SIZE + 7) / 8;
private:
uint8_t buf_[BYTES];
int pos_ = 0;
public:
void seek(int pos)
{
pos_ = pos;
}
int tell()
{
return pos_;
}
uint8_t *data()
{
return buf_;
}
void reset()
{
pos_ = 0;
for (int i = 0; i < BYTES; ++i)
buf_[i] = 0;
}
void putbit(bool b)
{
int bit = pos_ % 8;
int byte = pos_ / 8;
++pos_;
uint8_t mask = 1 << bit;
uint8_t tmp = ~mask & buf_[byte];
buf_[byte] = tmp | b << bit;
}
bool getbit()
{
int bit = pos_ % 8;
int byte = pos_ / 8;
++pos_;
return (buf_[byte] >> bit) & 1;
}
void putbyte(const uint8_t b)
{
int bit = pos_ % 8;
int byte = pos_ / 8;
pos_ += 8;
if (bit) {
uint8_t mask = (1 << bit) - 1;
uint8_t lsb = mask & buf_[byte];
buf_[byte++] = lsb | b << bit;
uint8_t msb = ~mask & buf_[byte];
buf_[byte] = msb | b >> (8 - bit);
} else {
buf_[byte] = b;
}
}
uint8_t getbyte()
{
int bit = pos_ % 8;
int byte = pos_ / 8;
pos_ += 8;
if (bit) {
uint8_t lsb = buf_[byte++] >> bit;
uint8_t msb = buf_[byte] << (8 - bit);
return msb | lsb;
}
return buf_[byte];
}
void writebytes(const uint8_t *buf, int len)
{
int bit = pos_ % 8;
int byte = pos_ / 8;
pos_ += 8 * len;
if (bit) {
uint8_t mask = (1 << bit) - 1;
for (int i = 0; i < len; ++i) {
uint8_t lsb = mask & buf_[byte];
buf_[byte++] = lsb | buf[i] << bit;
uint8_t msb = ~mask & buf_[byte];
buf_[byte] = msb | buf[i] >> (8 - bit);
}
} else {
for (int i = 0; i < len; ++i)
buf_[byte++] = buf[i];
}
}
void readbytes(uint8_t *buf, int len)
{
int bit = pos_ % 8;
int byte = pos_ / 8;
pos_ += 8 * len;
if (bit) {
for (int i = 0; i < len; ++i) {
uint8_t lsb = buf_[byte++] >> bit;
uint8_t msb = buf_[byte] << (8 - bit);
buf[i] = msb | lsb;
}
} else {
for (int i = 0; i < len; ++i)
buf[i] = buf_[byte++];
}
}
template <typename TYPE>
void writebits(TYPE b, int num)
{
for (int sum = 0; num;) {
int bit = pos_ % 8;
int byte = pos_ / 8;
int copy = std::min(8 - bit, num);
uint8_t mask = (1 << copy) - 1;
uint8_t src = (mask & (b >> sum)) << bit;
uint8_t dst = ~(mask << bit) & buf_[byte];
buf_[byte++] = dst | src;
pos_ += copy;
num -= copy;
sum += copy;
}
}
template <typename TYPE>
void readbits(TYPE *b, int num)
{
TYPE a = 0;
for (int sum = 0; num;) {
int bit = pos_ % 8;
int byte = pos_ / 8;
int copy = std::min(8 - bit, num);
uint8_t mask = (1 << copy) - 1;
TYPE tmp = mask & (buf_[byte++] >> bit);
a |= tmp << sum;
pos_ += copy;
num -= copy;
sum += copy;
}
*b = a;
}
};
}
CXXFLAGS = -std=c++11 -W -Wall -Ofast -fno-exceptions -fno-rtti -march=native
CXX = clang++ -stdlib=libc++
#CXX = g++
.PHONY: all
all: testbench
test: testbench
./testbench
testbench: testbench.cc *.hh
$(CXX) $(CXXFLAGS) $< -o $@
.PHONY: clean
clean:
rm -f testbench
/*
Rice coding
Copyright 2021 Ahmet Inan <xdsopl@gmail.com>
*/
#pragma once
template <typename BITS>
class Rice
{
BITS *bits;
unsigned order;
public:
Rice(BITS *bits) : bits(bits), order(0) {}
void reset()
{
order = 0;
}
void operator()(unsigned num)
{
while (num >= 1 << order) {
assert(bits->tell() < BITS::BITS);
bits->putbit(0);
num -= 1 << order;
order += 1;
}
assert(bits->tell()+1+order < BITS::BITS);
bits->putbit(1);
bits->writebits(num, order);
order -= 1;
if (order < 0)
order = 0;
}
unsigned operator()()
{
unsigned num, sum = 0;
assert(bits->tell() < BITS::BITS);
while (!bits->getbit()) {
sum += 1 << order;
order += 1;
assert(bits->tell() < BITS::BITS);
}
assert(bits->tell()+order <= BITS::BITS);
bits->readbits(&num, order);
order -= 1;
if (order < 0)
order = 0;
return num + sum;
}
};
/*
Run length encoding of mostly zero bits
Copyright 2021 Ahmet Inan <xdsopl@gmail.com>
*/
#pragma once
template <typename BITS, typename VLI>
class RunLenEnc
{
VLI vli;
unsigned count;
public:
RunLenEnc(BITS *bits) : vli(bits), count(0) {}
void flush()
{
vli(count);
count = 0;
vli.reset();
}
void start()
{
vli.reset();
count = vli();
}
bool operator()()
{
if (count--)
return 0;
count = vli();
return 1;
}
void operator()(bool val)
{
if (val) {
vli(count);
count = 0;
} else {
++count;
}
}
};
/*
Test bench for run length encoding of mostly zero bits
Copyright 2021 Ahmet Inan <xdsopl@gmail.com>
*/
#include <functional>
#include <iostream>
#include <cassert>
#include <random>
#include "rle.hh"
#include "vli.hh"
#include "rice.hh"
#include "bitstream.hh"
int main()
{
const int N = 1 << 20;
std::random_device rd;
typedef std::default_random_engine generator;
typedef std::bernoulli_distribution bernoulli;
auto bern = std::bind(bernoulli(0.01), generator(rd()));
bool orig[N];
for (int i = 0; i < N; ++i)
orig[i] = bern();
int ones = 0;
for (int i = 0; i < N; ++i)
ones += orig[i];
int zeros = 0;
for (int i = 0; i < N; ++i)
zeros += !orig[i];
std::cerr << "uncoded bits: " << N << std::endl;
std::cerr << "distribution: " << (1000 * ones) / zeros << "‰" << std::endl;
if (0) {
for (int i = 0; i < N; ++i)
std::cout << orig[i];
std::cout << std::endl;
}
typedef CODE::Bitstream<N> Bits;
Bits bits;
#if 1
typedef Rice<Bits> Vli;
#else
typedef VarLenInt<Bits> Vli;
#endif
RunLenEnc<Bits, Vli> rle(&bits);
for (int i = 0; i < N; ++i)
rle(orig[i]);
rle.flush();
std::cerr << "encoded bits: " << bits.tell() << std::endl;
std::cerr << "encoded size: " << (100 * bits.tell()) / N << '%' << std::endl;
if (0) {
bits.seek(0);
for (int i = 0; i < N; ++i)
std::cout << bits.getbit();
std::cout << std::endl;
}
bits.seek(0);
bool dec[N];
rle.start();
for (int i = 0; i < N; ++i)
dec[i] = rle();
if (0) {
for (int i = 0; i < N; ++i)
std::cout << dec[i];
std::cout << std::endl;
}
for (int i = 0; i < N; ++i)
assert(orig[i] == dec[i]);
return 0;
}
/*
Variable length integer coding
Copyright 2021 Ahmet Inan <xdsopl@gmail.com>
*/
#pragma once
template <typename BITS>
class VarLenInt
{
BITS *bits;
public:
VarLenInt(BITS *bits) : bits(bits) {}
void reset() {}
void operator()(unsigned val)
{
unsigned cnt = 0, top = 1;
while (top <= val) {
cnt += 1;
top = 1 << cnt;
assert(bits->tell() < BITS::BITS);
bits->putbit(1);
}
assert(bits->tell() < BITS::BITS);
bits->putbit(0);
if (cnt > 0) {
cnt -= 1;
val -= top/2;
assert(bits->tell()+cnt <= BITS::BITS);
bits->writebits(val, cnt);
}
}
unsigned operator()()
{
unsigned val = 0, cnt = 0, top = 1;
assert(bits->tell() < BITS::BITS);
while (bits->getbit()) {
cnt += 1;
top = 1 << cnt;
assert(bits->tell() < BITS::BITS);
}
if (cnt > 0) {
cnt -= 1;
assert(bits->tell()+cnt <= BITS::BITS);
bits->readbits(&val, cnt);
val += top/2;
}
return val;
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment