Major refactoring (#86)

Major code refactoring.
This commit is contained in:
jeefo 2019-07-01 21:10:00 +03:00
commit 68f2f010fd
24 changed files with 8524 additions and 8166 deletions

View file

@ -9,327 +9,295 @@
#pragma once
static constexpr int MAXBUF = 4096, PADDING = 18, THRESHOLD = 2, NIL = MAXBUF;
// see https://github.com/encode84/ulz/
class FastLZ final : NonCopyable {
public:
static constexpr int EXCESS = 16;
static constexpr int WINDOW_BITS = 17;
static constexpr int WINDOW_SIZE = cr::bit (WINDOW_BITS);
static constexpr int WINDOW_MASK = WINDOW_SIZE - 1;
class Compress {
protected:
unsigned long int m_csize;
static constexpr int MIN_MATCH = 4;
static constexpr int MAX_CHAIN = cr::bit (5);
uint8 m_buffer[MAXBUF + PADDING - 1];
int m_matchPos;
int m_matchLen;
int m_left[MAXBUF + 1];
int m_right[MAXBUF + 257];
int m_parent[MAXBUF + 1];
static constexpr int HASH_BITS = 19;
static constexpr int HASH_SIZE = cr::bit (HASH_BITS);
static constexpr int NIL = -1;
static constexpr int UNCOMPRESS_RESULT_FAILED = -1;
private:
void initTrees (void) {
for (int i = MAXBUF + 1; i <= MAXBUF + 256; i++) {
m_right[i] = NIL;
}
int *m_hashTable;
int *m_prevTable;
for (int j = 0; j < MAXBUF; j++) {
m_parent[j] = NIL;
}
public:
FastLZ (void) {
m_hashTable = new int[HASH_SIZE];
m_prevTable = new int[WINDOW_SIZE];
}
void insert (int node) {
int i;
int compare = 1;
uint8 *key = &m_buffer[node];
int temp = MAXBUF + 1 + key[0];
m_right[node] = m_left[node] = NIL;
m_matchLen = 0;
for (;;) {
if (compare >= 0) {
if (m_right[temp] != NIL) {
temp = m_right[temp];
}
else {
m_right[temp] = node;
m_parent[node] = temp;
return;
}
}
else {
if (m_left[temp] != NIL) {
temp = m_left[temp];
}
else {
m_left[temp] = node;
m_parent[node] = temp;
return;
}
}
for (i = 1; i < PADDING; i++) {
if ((compare = key[i] - m_buffer[temp + i]) != 0) {
break;
}
}
if (i > m_matchLen) {
m_matchPos = temp;
if ((m_matchLen = i) >= PADDING) {
break;
}
}
}
m_parent[node] = m_parent[temp];
m_left[node] = m_left[temp];
m_right[node] = m_right[temp];
m_parent[m_left[temp]] = node;
m_parent[m_right[temp]] = node;
if (m_right[m_parent[temp]] == temp) {
m_right[m_parent[temp]] = node;
}
else {
m_left[m_parent[temp]] = node;
}
m_parent[temp] = NIL;
}
void erase (int node) {
int temp;
if (m_parent[node] == NIL) {
return; // not in tree
}
if (m_right[node] == NIL) {
temp = m_left[node];
}
else if (m_left[node] == NIL) {
temp = m_right[node];
}
else {
temp = m_left[node];
if (m_right[temp] != NIL) {
do {
temp = m_right[temp];
} while (m_right[temp] != NIL);
m_right[m_parent[temp]] = m_left[temp];
m_parent[m_left[temp]] = m_parent[temp];
m_left[temp] = m_left[node];
m_parent[m_left[node]] = temp;
}
m_right[temp] = m_right[node];
m_parent[m_right[node]] = temp;
}
m_parent[temp] = m_parent[node];
if (m_right[m_parent[node]] == node) {
m_right[m_parent[node]] = temp;
}
else {
m_left[m_parent[node]] = temp;
}
m_parent[node] = NIL;
~FastLZ (void) {
delete [] m_hashTable;
delete [] m_prevTable;
}
public:
Compress (void) : m_csize (0), m_matchPos (0), m_matchLen (0) {
memset (m_right, 0, sizeof (m_right));
memset (m_left, 0, sizeof (m_left));
memset (m_parent, 0, sizeof (m_parent));
memset (m_buffer, 0, sizeof (m_buffer));
}
~Compress (void) = default;
int encode_ (const char *fileName, uint8 *header, int headerSize, uint8 *buffer, int bufferSize) {
File fp (fileName, "wb");
if (!fp.isValid ()) {
return -1;
int compress (uint8 *in, int inLength, uint8 *out) {
for (int i = 0; i < HASH_SIZE; i++) {
m_hashTable[i] = NIL;
}
int i, length, node, ptr, last, cbp, bp = 0;
uint8 cb[17] = {0, }, mask, bit;
uint8 *op = out;
fp.write (header, headerSize, 1);
initTrees ();
int anchor = 0;
int cur = 0;
cb[0] = 0;
cbp = mask = 1;
ptr = 0;
node = MAXBUF - PADDING;
while (cur < inLength) {
const int maxMatch = inLength - cur;
for (i = ptr; i < node; i++)
m_buffer[i] = ' ';
int bestLength = 0;
int dist = 0;
for (length = 0; (length < PADDING) && (bp < bufferSize); length++) {
bit = buffer[bp++];
m_buffer[node + length] = bit;
}
if (maxMatch >= MIN_MATCH) {
const int limit = cr::max (cur - WINDOW_SIZE, NIL);
if (length == 0) {
return -1;
}
int chainLength = MAX_CHAIN;
int lookup = m_hashTable[hash32 (&in[cur])];
for (i = 1; i <= PADDING; i++) {
insert (node - i);
}
insert (node);
while (lookup > limit) {
if (in[lookup + bestLength] == in[cur + bestLength] && load32 (&in[lookup]) == load32 (&in[cur])) {
int length = MIN_MATCH;
do {
if (m_matchLen > length) {
m_matchLen = length;
}
if (m_matchLen <= THRESHOLD) {
m_matchLen = 1;
while (length < maxMatch && in[lookup + length] == in[cur + length]) {
length++;
}
cb[0] |= mask;
cb[cbp++] = m_buffer[node];
}
else {
cb[cbp++] = (uint8) (m_matchPos & 0xff);
cb[cbp++] = (uint8) (((m_matchPos >> 4) & 0xf0) | (m_matchLen - (THRESHOLD + 1)));
}
if (length > bestLength) {
bestLength = length;
dist = cur - lookup;
if ((mask <<= 1) == 0) {
for (i = 0; i < cbp; i++) {
fp.putch (cb[i]);
}
m_csize += cbp;
cb[0] = 0;
cbp = mask = 1;
}
last = m_matchLen;
for (i = 0; (i < last) && (bp < bufferSize); i++) {
bit = buffer[bp++];
erase (ptr);
m_buffer[ptr] = bit;
if (ptr < PADDING - 1) {
m_buffer[ptr + MAXBUF] = bit;
}
ptr = (ptr + 1) & (MAXBUF - 1);
node = (node + 1) & (MAXBUF - 1);
insert (node);
}
while (i++ < last) {
erase (ptr);
ptr = (ptr + 1) & (MAXBUF - 1);
node = (node + 1) & (MAXBUF - 1);
if (length--) {
insert (node);
}
}
} while (length > 0);
if (cbp > 1) {
for (i = 0; i < cbp; i++) {
fp.putch (cb[i]);
}
m_csize += cbp;
}
fp.close ();
return m_csize;
}
int decode_ (const char *fileName, int headerSize, uint8 *buffer, int bufferSize) {
int i, j, k, node;
unsigned int flags;
int bp = 0;
uint8 bit;
File fp (fileName, "rb");
if (!fp.isValid ()) {
return -1;
}
fp.seek (headerSize, SEEK_SET);
node = MAXBUF - PADDING;
for (i = 0; i < node; i++) {
m_buffer[i] = ' ';
}
flags = 0;
for (;;) {
if (((flags >>= 1) & 256) == 0) {
int read = fp.getch ();
if (read == EOF) {
break;
}
bit = static_cast <uint8> (read);
flags = bit | 0xff00;
}
if (flags & 1) {
int read = fp.getch ();
if (read == EOF) {
break;
}
bit = static_cast <uint8> (read);
buffer[bp++] = bit;
if (bp > bufferSize) {
return -1;
}
m_buffer[node++] = bit;
node &= (MAXBUF - 1);
}
else {
if ((i = fp.getch ()) == EOF) {
break;
}
if ((j = fp.getch ()) == EOF) {
break;
}
i |= ((j & 0xf0) << 4);
j = (j & 0x0f) + THRESHOLD;
for (k = 0; k <= j; k++) {
bit = m_buffer[(i + k) & (MAXBUF - 1)];
buffer[bp++] = bit;
if (bp > bufferSize) {
return -1;
if (length == maxMatch) {
break;
}
}
}
m_buffer[node++] = bit;
node &= (MAXBUF - 1);
if (--chainLength == 0) {
break;
}
lookup = m_prevTable[lookup & WINDOW_MASK];
}
}
if (bestLength == MIN_MATCH && (cur - anchor) >= (7 + 128)) {
bestLength = 0;
}
if (bestLength >= MIN_MATCH && bestLength < maxMatch && (cur - anchor) != 6) {
const int next = cur + 1;
const int targetLength = bestLength + 1;
const int limit = cr::max (next - WINDOW_SIZE, NIL);
int chainLength = MAX_CHAIN;
int lookup = m_hashTable[hash32 (&in[next])];
while (lookup > limit) {
if (in[lookup + bestLength] == in[next + bestLength] && load32 (&in[lookup]) == load32 (&in[next])) {
int length = MIN_MATCH;
while (length < targetLength && in[lookup + length] == in[next + length]) {
length++;
}
if (length == targetLength) {
bestLength = 0;
break;
}
}
if (--chainLength == 0) {
break;
}
lookup = m_prevTable[lookup & WINDOW_MASK];
}
}
if (bestLength >= MIN_MATCH) {
const int length = bestLength - MIN_MATCH;
const int token = ((dist >> 12) & 16) + cr::min (length, 15);
if (anchor != cur) {
const int run = cur - anchor;
if (run >= 7) {
add (op, (7 << 5) + token);
encode (op, run - 7);
}
else {
add (op, (run << 5) + token);
}
copy (op, &in[anchor], run);
op += run;
}
else {
add (op, token);
}
if (length >= 15) {
encode (op, length - 15);
}
store16 (op, dist);
op += 2;
while (bestLength-- != 0) {
const uint32 hash = hash32 (&in[cur]);
m_prevTable[cur & WINDOW_MASK] = m_hashTable[hash];
m_hashTable[hash] = cur++;
}
anchor = cur;
}
else {
const uint32 hash = hash32 (&in[cur]);
m_prevTable[cur & WINDOW_MASK] = m_hashTable[hash];
m_hashTable[hash] = cur++;
}
}
if (anchor != cur) {
const int run = cur - anchor;
if (run >= 7) {
add (op, 7 << 5);
encode (op, run - 7);
}
else {
add (op, run << 5);
}
copy (op, &in[anchor], run);
op += run;
}
return op - out;
}
int uncompress (uint8 *in, int inLength, uint8 *out, int outLength) {
uint8 *op = out;
uint8 *ip = in;
const uint8 *opEnd = op + outLength;
const uint8 *ipEnd = ip + inLength;
while (ip < ipEnd) {
const int token = *ip++;
if (token >= 32) {
int run = token >> 5;
if (run == 7) {
run += decode (ip);
}
if ((opEnd - op) < run || (ipEnd - ip) < run) {
return UNCOMPRESS_RESULT_FAILED;
}
copy (op, ip, run);
op += run;
ip += run;
if (ip >= ipEnd) {
break;
}
}
int length = (token & 15) + MIN_MATCH;
if (length == (15 + MIN_MATCH)) {
length += decode (ip);
}
if ((opEnd - op) < length) {
return UNCOMPRESS_RESULT_FAILED;
}
const int dist = ((token & 16) << 12) + load16 (ip);
ip += 2;
uint8 *cp = op - dist;
if ((op - out) < dist) {
return UNCOMPRESS_RESULT_FAILED;
}
if (dist >= 8) {
copy (op, cp, length);
op += length;
}
else
{
for (int i = 0; i < 4; i++) {
*op++ = *cp++;
}
while (length-- != 4) {
*op++ = *cp++;
}
}
}
fp.close ();
return bp;
return (ip == ipEnd) ? op - out : UNCOMPRESS_RESULT_FAILED;
}
// external decoder
static int decode (const char *fileName, int headerSize, uint8 *buffer, int bufferSize) {
static Compress compressor;
return compressor.decode_ (fileName, headerSize, buffer, bufferSize);
private:
inline uint16 load16 (void *ptr) {
return *reinterpret_cast <const uint16 *> (ptr);
}
// external encoder
static int encode (const char *fileName, uint8 *header, int headerSize, uint8 *buffer, int bufferSize) {
static Compress compressor;
return compressor.encode_ (fileName, header, headerSize, buffer, bufferSize);
inline uint32 load32 (void *ptr) {
return *reinterpret_cast <const uint32 *> (ptr);
}
};
inline void store16 (void *ptr, int val) {
*reinterpret_cast <uint16 *> (ptr) = static_cast <uint16> (val);
}
inline void copy64 (void *dst, void *src) {
*reinterpret_cast <uint64 *> (dst) = *reinterpret_cast <const uint64 *> (src);
}
inline uint32 hash32 (void *ptr) {
return (load32 (ptr) * 0x9E3779B9) >> (32 - HASH_BITS);
}
inline void copy (uint8 *dst, uint8 *src, int count) {
copy64 (dst, src);
for (int i = 8; i < count; i += 8) {
copy64 (dst + i, src + i);
}
}
inline void add (uint8 *&dst, int val) {
*dst++ = static_cast <uint8> (val);
}
inline void encode (uint8 *&ptr, uint32 val) {
while (val >= 128) {
val -= 128;
*ptr++ = 128 + (val & 127);
val >>= 7;
}
*ptr++ = static_cast <uint8> (val);
}
inline uint32 decode (uint8 *&ptr) {
uint32 val = 0;
for (int i = 0; i <= 21; i += 7) {
const uint32 cur = *ptr++;
val += cur << i;
if (cur < 128) {
break;
}
}
return val;
}
};