Skip to content

Instantly share code, notes, and snippets.

@xdsopl
Last active May 24, 2021 08:37
Show Gist options
  • Select an option

  • Save xdsopl/43d2d00e28b095e7eb3feaae276123fe to your computer and use it in GitHub Desktop.

Select an option

Save xdsopl/43d2d00e28b095e7eb3feaae276123fe to your computer and use it in GitHub Desktop.
Variable length integer encoding and run length encoding
/*
Bitwise stream container
Copyright 2018 Ahmet Inan <inan@aicodix.de>
*/
#pragma once
namespace CODE {
template <int SIZE>
class Bitstream
{
public:
static const int BITS = SIZE;
static const int BYTES = (SIZE + 7) / 8;
private:
uint8_t buf_[BYTES];
int pos_ = 0;
public:
void seek(int pos)
{
pos_ = pos;
}
int tell()
{
return pos_;
}
uint8_t *data()
{
return buf_;
}
void reset()
{
pos_ = 0;
for (int i = 0; i < BYTES; ++i)
buf_[i] = 0;
}
void putbit(bool b)
{
int bit = pos_ % 8;
int byte = pos_ / 8;
++pos_;
uint8_t mask = 1 << bit;
uint8_t tmp = ~mask & buf_[byte];
buf_[byte] = tmp | b << bit;
}
bool getbit()
{
int bit = pos_ % 8;
int byte = pos_ / 8;
++pos_;
return (buf_[byte] >> bit) & 1;
}
void putbyte(const uint8_t b)
{
int bit = pos_ % 8;
int byte = pos_ / 8;
pos_ += 8;
if (bit) {
uint8_t mask = (1 << bit) - 1;
uint8_t lsb = mask & buf_[byte];
buf_[byte++] = lsb | b << bit;
uint8_t msb = ~mask & buf_[byte];
buf_[byte] = msb | b >> (8 - bit);
} else {
buf_[byte] = b;
}
}
uint8_t getbyte()
{
int bit = pos_ % 8;
int byte = pos_ / 8;
pos_ += 8;
if (bit) {
uint8_t lsb = buf_[byte++] >> bit;
uint8_t msb = buf_[byte] << (8 - bit);
return msb | lsb;
}
return buf_[byte];
}
void writebytes(const uint8_t *buf, int len)
{
int bit = pos_ % 8;
int byte = pos_ / 8;
pos_ += 8 * len;
if (bit) {
uint8_t mask = (1 << bit) - 1;
for (int i = 0; i < len; ++i) {
uint8_t lsb = mask & buf_[byte];
buf_[byte++] = lsb | buf[i] << bit;
uint8_t msb = ~mask & buf_[byte];
buf_[byte] = msb | buf[i] >> (8 - bit);
}
} else {
for (int i = 0; i < len; ++i)
buf_[byte++] = buf[i];
}
}
void readbytes(uint8_t *buf, int len)
{
int bit = pos_ % 8;
int byte = pos_ / 8;
pos_ += 8 * len;
if (bit) {
for (int i = 0; i < len; ++i) {
uint8_t lsb = buf_[byte++] >> bit;
uint8_t msb = buf_[byte] << (8 - bit);
buf[i] = msb | lsb;
}
} else {
for (int i = 0; i < len; ++i)
buf[i] = buf_[byte++];
}
}
template <typename TYPE>
void writebits(TYPE b, int num)
{
for (int sum = 0; num;) {
int bit = pos_ % 8;
int byte = pos_ / 8;
int copy = std::min(8 - bit, num);
uint8_t mask = (1 << copy) - 1;
uint8_t src = (mask & (b >> sum)) << bit;
uint8_t dst = ~(mask << bit) & buf_[byte];
buf_[byte++] = dst | src;
pos_ += copy;
num -= copy;
sum += copy;
}
}
template <typename TYPE>
void readbits(TYPE *b, int num)
{
TYPE a = 0;
for (int sum = 0; num;) {
int bit = pos_ % 8;
int byte = pos_ / 8;
int copy = std::min(8 - bit, num);
uint8_t mask = (1 << copy) - 1;
TYPE tmp = mask & (buf_[byte++] >> bit);
a |= tmp << sum;
pos_ += copy;
num -= copy;
sum += copy;
}
*b = a;
}
};
}
CXXFLAGS = -std=c++11 -W -Wall -Ofast -fno-exceptions -fno-rtti -march=native
CXX = clang++ -stdlib=libc++
#CXX = g++
.PHONY: all
all: testbench
test: testbench
./testbench
testbench: testbench.cc
$(CXX) $(CXXFLAGS) $< -o $@
.PHONY: clean
clean:
rm -f testbench
/*
Test bench for variable length integer encoding and run length encoding
Copyright 2021 Ahmet Inan <xdsopl@gmail.com>
*/
#include <functional>
#include <iostream>
#include <cassert>
#include <random>
#include "vli.hh"
#include "bitstream.hh"
int main()
{
const int N = 16384;
typedef CODE::Bitstream<N*8> Bits;
Bits enc;
VarLenInt<Bits> vli(&enc);
if (0) {
for (int i = 0; i < 1<<18; ++i) {
enc.seek(0);
vli(i);
std::cout << i << " " << enc.tell() << std::endl;
enc.seek(0);
int o = vli();
assert(i == o);
}
return 0;
}
std::random_device rd;
typedef std::default_random_engine generator;
typedef std::geometric_distribution<int> geometric;
auto geom = std::bind(geometric(0.9), generator(rd()));
typedef std::uniform_int_distribution<int> uniform;
auto rand = std::bind(uniform(0, 255), generator(rd()));
uint8_t orig[N];
for (int i = 0; i < N; ++i)
orig[i] = geom();
for (int i = 0; i < 10; ++i)
orig[rand()] = rand();
std::cerr << "raw bits: " << N*8 << std::endl;
if (0) {
for (int i = 0; i < N; ++i)
std::cout << " " << (int)orig[i];
std::cout << std::endl;
}
for (int i = 0; i < N; ++i)
vli(orig[i]);
std::cerr << "enc bits: " << enc.tell() << std::endl;
std::cerr << "enc size: " << (100 * enc.tell()) / (N*8) << '%' << std::endl;
enc.seek(0);
uint8_t dec[N];
for (int i = 0; i < N; ++i)
dec[i] = vli();
for (int i = 0; i < N; ++i)
assert(orig[i] == dec[i]);
uint8_t rle[N];
int R = 0;
for (int i = 0; i < N; ++i) {
if (orig[i]) {
assert(R < N);
rle[R++] = orig[i];
} else {
assert(R < N);
rle[R++] = 0;
int k = i + 1;
while (k < N && k-i < 256 && !orig[k])
++k;
--k;
assert(R < N);
rle[R++] = k - i;
i = k;
}
}
std::cerr << "rle bits: " << R*8 << std::endl;
std::cerr << "rle size: " << (100 * R*8) / (N*8) << '%' << std::endl;
if (0) {
for (int i = 0; i < R; ++i)
std::cout << " " << (int)rle[i];
std::cout << std::endl;
}
for (int i = 0, j = 0; i < R; ++i) {
if (rle[i]) {
assert(j < N);
assert(orig[j] == rle[i]);
++j;
} else {
++i;
assert(i < R);
for (int k = 0; k <= rle[i]; ++k) {
assert(j < N);
assert(orig[j] == 0);
++j;
}
}
}
enc.seek(0);
for (int i = 0; i < R; ++i)
vli(rle[i]);
std::cerr << "enc bits: " << enc.tell() << std::endl;
std::cerr << "enc size: " << (100 * enc.tell()) / (R*8) << '%' << std::endl;
enc.seek(0);
for (int i = 0; i < R; ++i)
dec[i] = vli();
for (int i = 0; i < R; ++i)
assert(rle[i] == dec[i]);
return 0;
}
/*
Variable length integer coding
Copyright 2021 Ahmet Inan <xdsopl@gmail.com>
*/
#pragma once
template <typename BITS>
class VarLenInt
{
BITS *bits;
public:
VarLenInt(BITS *bits) : bits(bits) {}
void operator()(unsigned val)
{
unsigned cnt = 0, top = 1;
while (top <= val) {
cnt += 1;
top = 1 << cnt;
assert(bits->tell() < BITS::BITS);
bits->putbit(1);
}
assert(bits->tell() < BITS::BITS);
bits->putbit(0);
if (cnt > 0) {
cnt -= 1;
val -= top/2;
assert(bits->tell()+cnt <= BITS::BITS);
bits->writebits(val, cnt);
}
}
unsigned operator()()
{
unsigned val = 0, cnt = 0, top = 1;
assert(bits->tell() < BITS::BITS);
while (bits->getbit()) {
cnt += 1;
top = 1 << cnt;
assert(bits->tell() < BITS::BITS);
}
if (cnt > 0) {
cnt -= 1;
assert(bits->tell()+cnt <= BITS::BITS);
bits->readbits(&val, cnt);
val += top/2;
}
return val;
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment