parent
b50d6b198c
commit
68f2f010fd
24 changed files with 8524 additions and 8166 deletions
|
|
@ -9,327 +9,295 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
static constexpr int MAXBUF = 4096, PADDING = 18, THRESHOLD = 2, NIL = MAXBUF;
|
||||
// see https://github.com/encode84/ulz/
|
||||
class FastLZ final : NonCopyable {
|
||||
public:
|
||||
static constexpr int EXCESS = 16;
|
||||
static constexpr int WINDOW_BITS = 17;
|
||||
static constexpr int WINDOW_SIZE = cr::bit (WINDOW_BITS);
|
||||
static constexpr int WINDOW_MASK = WINDOW_SIZE - 1;
|
||||
|
||||
class Compress {
|
||||
protected:
|
||||
unsigned long int m_csize;
|
||||
static constexpr int MIN_MATCH = 4;
|
||||
static constexpr int MAX_CHAIN = cr::bit (5);
|
||||
|
||||
uint8 m_buffer[MAXBUF + PADDING - 1];
|
||||
int m_matchPos;
|
||||
int m_matchLen;
|
||||
|
||||
int m_left[MAXBUF + 1];
|
||||
int m_right[MAXBUF + 257];
|
||||
int m_parent[MAXBUF + 1];
|
||||
static constexpr int HASH_BITS = 19;
|
||||
static constexpr int HASH_SIZE = cr::bit (HASH_BITS);
|
||||
static constexpr int NIL = -1;
|
||||
static constexpr int UNCOMPRESS_RESULT_FAILED = -1;
|
||||
|
||||
private:
|
||||
void initTrees (void) {
|
||||
for (int i = MAXBUF + 1; i <= MAXBUF + 256; i++) {
|
||||
m_right[i] = NIL;
|
||||
}
|
||||
int *m_hashTable;
|
||||
int *m_prevTable;
|
||||
|
||||
for (int j = 0; j < MAXBUF; j++) {
|
||||
m_parent[j] = NIL;
|
||||
}
|
||||
public:
|
||||
FastLZ (void) {
|
||||
m_hashTable = new int[HASH_SIZE];
|
||||
m_prevTable = new int[WINDOW_SIZE];
|
||||
}
|
||||
|
||||
void insert (int node) {
|
||||
int i;
|
||||
|
||||
int compare = 1;
|
||||
|
||||
uint8 *key = &m_buffer[node];
|
||||
int temp = MAXBUF + 1 + key[0];
|
||||
|
||||
m_right[node] = m_left[node] = NIL;
|
||||
m_matchLen = 0;
|
||||
|
||||
for (;;) {
|
||||
if (compare >= 0) {
|
||||
if (m_right[temp] != NIL) {
|
||||
temp = m_right[temp];
|
||||
}
|
||||
else {
|
||||
m_right[temp] = node;
|
||||
m_parent[node] = temp;
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (m_left[temp] != NIL) {
|
||||
temp = m_left[temp];
|
||||
}
|
||||
else {
|
||||
m_left[temp] = node;
|
||||
m_parent[node] = temp;
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 1; i < PADDING; i++) {
|
||||
if ((compare = key[i] - m_buffer[temp + i]) != 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (i > m_matchLen) {
|
||||
m_matchPos = temp;
|
||||
|
||||
if ((m_matchLen = i) >= PADDING) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_parent[node] = m_parent[temp];
|
||||
m_left[node] = m_left[temp];
|
||||
m_right[node] = m_right[temp];
|
||||
m_parent[m_left[temp]] = node;
|
||||
m_parent[m_right[temp]] = node;
|
||||
|
||||
if (m_right[m_parent[temp]] == temp) {
|
||||
m_right[m_parent[temp]] = node;
|
||||
}
|
||||
else {
|
||||
m_left[m_parent[temp]] = node;
|
||||
}
|
||||
m_parent[temp] = NIL;
|
||||
}
|
||||
|
||||
void erase (int node) {
|
||||
int temp;
|
||||
|
||||
if (m_parent[node] == NIL) {
|
||||
return; // not in tree
|
||||
}
|
||||
|
||||
if (m_right[node] == NIL) {
|
||||
temp = m_left[node];
|
||||
}
|
||||
else if (m_left[node] == NIL) {
|
||||
temp = m_right[node];
|
||||
}
|
||||
else {
|
||||
temp = m_left[node];
|
||||
|
||||
if (m_right[temp] != NIL) {
|
||||
do {
|
||||
temp = m_right[temp];
|
||||
} while (m_right[temp] != NIL);
|
||||
|
||||
m_right[m_parent[temp]] = m_left[temp];
|
||||
m_parent[m_left[temp]] = m_parent[temp];
|
||||
m_left[temp] = m_left[node];
|
||||
m_parent[m_left[node]] = temp;
|
||||
}
|
||||
|
||||
m_right[temp] = m_right[node];
|
||||
m_parent[m_right[node]] = temp;
|
||||
}
|
||||
m_parent[temp] = m_parent[node];
|
||||
|
||||
if (m_right[m_parent[node]] == node) {
|
||||
m_right[m_parent[node]] = temp;
|
||||
}
|
||||
else {
|
||||
m_left[m_parent[node]] = temp;
|
||||
}
|
||||
m_parent[node] = NIL;
|
||||
~FastLZ (void) {
|
||||
delete [] m_hashTable;
|
||||
delete [] m_prevTable;
|
||||
}
|
||||
|
||||
public:
|
||||
Compress (void) : m_csize (0), m_matchPos (0), m_matchLen (0) {
|
||||
memset (m_right, 0, sizeof (m_right));
|
||||
memset (m_left, 0, sizeof (m_left));
|
||||
memset (m_parent, 0, sizeof (m_parent));
|
||||
memset (m_buffer, 0, sizeof (m_buffer));
|
||||
}
|
||||
|
||||
~Compress (void) = default;
|
||||
|
||||
int encode_ (const char *fileName, uint8 *header, int headerSize, uint8 *buffer, int bufferSize) {
|
||||
File fp (fileName, "wb");
|
||||
|
||||
if (!fp.isValid ()) {
|
||||
return -1;
|
||||
int compress (uint8 *in, int inLength, uint8 *out) {
|
||||
for (int i = 0; i < HASH_SIZE; i++) {
|
||||
m_hashTable[i] = NIL;
|
||||
}
|
||||
int i, length, node, ptr, last, cbp, bp = 0;
|
||||
uint8 cb[17] = {0, }, mask, bit;
|
||||
uint8 *op = out;
|
||||
|
||||
fp.write (header, headerSize, 1);
|
||||
initTrees ();
|
||||
int anchor = 0;
|
||||
int cur = 0;
|
||||
|
||||
cb[0] = 0;
|
||||
cbp = mask = 1;
|
||||
ptr = 0;
|
||||
node = MAXBUF - PADDING;
|
||||
while (cur < inLength) {
|
||||
const int maxMatch = inLength - cur;
|
||||
|
||||
for (i = ptr; i < node; i++)
|
||||
m_buffer[i] = ' ';
|
||||
int bestLength = 0;
|
||||
int dist = 0;
|
||||
|
||||
for (length = 0; (length < PADDING) && (bp < bufferSize); length++) {
|
||||
bit = buffer[bp++];
|
||||
m_buffer[node + length] = bit;
|
||||
}
|
||||
if (maxMatch >= MIN_MATCH) {
|
||||
const int limit = cr::max (cur - WINDOW_SIZE, NIL);
|
||||
|
||||
if (length == 0) {
|
||||
return -1;
|
||||
}
|
||||
int chainLength = MAX_CHAIN;
|
||||
int lookup = m_hashTable[hash32 (&in[cur])];
|
||||
|
||||
for (i = 1; i <= PADDING; i++) {
|
||||
insert (node - i);
|
||||
}
|
||||
insert (node);
|
||||
while (lookup > limit) {
|
||||
if (in[lookup + bestLength] == in[cur + bestLength] && load32 (&in[lookup]) == load32 (&in[cur])) {
|
||||
int length = MIN_MATCH;
|
||||
|
||||
do {
|
||||
if (m_matchLen > length) {
|
||||
m_matchLen = length;
|
||||
}
|
||||
if (m_matchLen <= THRESHOLD) {
|
||||
m_matchLen = 1;
|
||||
while (length < maxMatch && in[lookup + length] == in[cur + length]) {
|
||||
length++;
|
||||
}
|
||||
|
||||
cb[0] |= mask;
|
||||
cb[cbp++] = m_buffer[node];
|
||||
}
|
||||
else {
|
||||
cb[cbp++] = (uint8) (m_matchPos & 0xff);
|
||||
cb[cbp++] = (uint8) (((m_matchPos >> 4) & 0xf0) | (m_matchLen - (THRESHOLD + 1)));
|
||||
}
|
||||
if (length > bestLength) {
|
||||
bestLength = length;
|
||||
dist = cur - lookup;
|
||||
|
||||
if ((mask <<= 1) == 0) {
|
||||
for (i = 0; i < cbp; i++) {
|
||||
fp.putch (cb[i]);
|
||||
}
|
||||
m_csize += cbp;
|
||||
cb[0] = 0;
|
||||
cbp = mask = 1;
|
||||
}
|
||||
last = m_matchLen;
|
||||
|
||||
for (i = 0; (i < last) && (bp < bufferSize); i++) {
|
||||
bit = buffer[bp++];
|
||||
erase (ptr);
|
||||
|
||||
m_buffer[ptr] = bit;
|
||||
|
||||
if (ptr < PADDING - 1) {
|
||||
m_buffer[ptr + MAXBUF] = bit;
|
||||
}
|
||||
ptr = (ptr + 1) & (MAXBUF - 1);
|
||||
node = (node + 1) & (MAXBUF - 1);
|
||||
insert (node);
|
||||
}
|
||||
|
||||
while (i++ < last) {
|
||||
erase (ptr);
|
||||
|
||||
ptr = (ptr + 1) & (MAXBUF - 1);
|
||||
node = (node + 1) & (MAXBUF - 1);
|
||||
|
||||
if (length--) {
|
||||
insert (node);
|
||||
}
|
||||
}
|
||||
} while (length > 0);
|
||||
|
||||
if (cbp > 1) {
|
||||
for (i = 0; i < cbp; i++) {
|
||||
fp.putch (cb[i]);
|
||||
}
|
||||
m_csize += cbp;
|
||||
}
|
||||
fp.close ();
|
||||
|
||||
return m_csize;
|
||||
}
|
||||
|
||||
int decode_ (const char *fileName, int headerSize, uint8 *buffer, int bufferSize) {
|
||||
int i, j, k, node;
|
||||
unsigned int flags;
|
||||
int bp = 0;
|
||||
|
||||
uint8 bit;
|
||||
|
||||
File fp (fileName, "rb");
|
||||
|
||||
if (!fp.isValid ()) {
|
||||
return -1;
|
||||
}
|
||||
fp.seek (headerSize, SEEK_SET);
|
||||
|
||||
node = MAXBUF - PADDING;
|
||||
|
||||
for (i = 0; i < node; i++) {
|
||||
m_buffer[i] = ' ';
|
||||
}
|
||||
flags = 0;
|
||||
|
||||
for (;;) {
|
||||
if (((flags >>= 1) & 256) == 0) {
|
||||
int read = fp.getch ();
|
||||
|
||||
if (read == EOF) {
|
||||
break;
|
||||
}
|
||||
bit = static_cast <uint8> (read);
|
||||
flags = bit | 0xff00;
|
||||
}
|
||||
|
||||
if (flags & 1) {
|
||||
int read = fp.getch ();
|
||||
|
||||
if (read == EOF) {
|
||||
break;
|
||||
}
|
||||
bit = static_cast <uint8> (read);
|
||||
buffer[bp++] = bit;
|
||||
|
||||
if (bp > bufferSize) {
|
||||
return -1;
|
||||
}
|
||||
m_buffer[node++] = bit;
|
||||
node &= (MAXBUF - 1);
|
||||
}
|
||||
else {
|
||||
if ((i = fp.getch ()) == EOF) {
|
||||
break;
|
||||
}
|
||||
|
||||
if ((j = fp.getch ()) == EOF) {
|
||||
break;
|
||||
}
|
||||
|
||||
i |= ((j & 0xf0) << 4);
|
||||
j = (j & 0x0f) + THRESHOLD;
|
||||
|
||||
for (k = 0; k <= j; k++) {
|
||||
bit = m_buffer[(i + k) & (MAXBUF - 1)];
|
||||
buffer[bp++] = bit;
|
||||
|
||||
if (bp > bufferSize) {
|
||||
return -1;
|
||||
if (length == maxMatch) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
m_buffer[node++] = bit;
|
||||
node &= (MAXBUF - 1);
|
||||
|
||||
if (--chainLength == 0) {
|
||||
break;
|
||||
}
|
||||
lookup = m_prevTable[lookup & WINDOW_MASK];
|
||||
}
|
||||
}
|
||||
|
||||
if (bestLength == MIN_MATCH && (cur - anchor) >= (7 + 128)) {
|
||||
bestLength = 0;
|
||||
}
|
||||
|
||||
if (bestLength >= MIN_MATCH && bestLength < maxMatch && (cur - anchor) != 6) {
|
||||
const int next = cur + 1;
|
||||
const int targetLength = bestLength + 1;
|
||||
const int limit = cr::max (next - WINDOW_SIZE, NIL);
|
||||
|
||||
int chainLength = MAX_CHAIN;
|
||||
int lookup = m_hashTable[hash32 (&in[next])];
|
||||
|
||||
while (lookup > limit) {
|
||||
if (in[lookup + bestLength] == in[next + bestLength] && load32 (&in[lookup]) == load32 (&in[next])) {
|
||||
int length = MIN_MATCH;
|
||||
|
||||
while (length < targetLength && in[lookup + length] == in[next + length]) {
|
||||
length++;
|
||||
}
|
||||
|
||||
if (length == targetLength) {
|
||||
bestLength = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (--chainLength == 0) {
|
||||
break;
|
||||
}
|
||||
lookup = m_prevTable[lookup & WINDOW_MASK];
|
||||
}
|
||||
}
|
||||
|
||||
if (bestLength >= MIN_MATCH) {
|
||||
const int length = bestLength - MIN_MATCH;
|
||||
const int token = ((dist >> 12) & 16) + cr::min (length, 15);
|
||||
|
||||
if (anchor != cur) {
|
||||
const int run = cur - anchor;
|
||||
|
||||
if (run >= 7) {
|
||||
add (op, (7 << 5) + token);
|
||||
encode (op, run - 7);
|
||||
}
|
||||
else {
|
||||
add (op, (run << 5) + token);
|
||||
}
|
||||
copy (op, &in[anchor], run);
|
||||
op += run;
|
||||
}
|
||||
else {
|
||||
add (op, token);
|
||||
}
|
||||
|
||||
if (length >= 15) {
|
||||
encode (op, length - 15);
|
||||
}
|
||||
store16 (op, dist);
|
||||
op += 2;
|
||||
|
||||
while (bestLength-- != 0) {
|
||||
const uint32 hash = hash32 (&in[cur]);
|
||||
|
||||
m_prevTable[cur & WINDOW_MASK] = m_hashTable[hash];
|
||||
m_hashTable[hash] = cur++;
|
||||
}
|
||||
anchor = cur;
|
||||
}
|
||||
else {
|
||||
const uint32 hash = hash32 (&in[cur]);
|
||||
|
||||
m_prevTable[cur & WINDOW_MASK] = m_hashTable[hash];
|
||||
m_hashTable[hash] = cur++;
|
||||
}
|
||||
}
|
||||
|
||||
if (anchor != cur) {
|
||||
const int run = cur - anchor;
|
||||
|
||||
if (run >= 7) {
|
||||
add (op, 7 << 5);
|
||||
encode (op, run - 7);
|
||||
}
|
||||
else {
|
||||
add (op, run << 5);
|
||||
}
|
||||
copy (op, &in[anchor], run);
|
||||
op += run;
|
||||
}
|
||||
return op - out;
|
||||
}
|
||||
|
||||
int uncompress (uint8 *in, int inLength, uint8 *out, int outLength) {
|
||||
uint8 *op = out;
|
||||
uint8 *ip = in;
|
||||
|
||||
const uint8 *opEnd = op + outLength;
|
||||
const uint8 *ipEnd = ip + inLength;
|
||||
|
||||
while (ip < ipEnd) {
|
||||
const int token = *ip++;
|
||||
|
||||
if (token >= 32) {
|
||||
int run = token >> 5;
|
||||
|
||||
if (run == 7) {
|
||||
run += decode (ip);
|
||||
}
|
||||
|
||||
if ((opEnd - op) < run || (ipEnd - ip) < run) {
|
||||
return UNCOMPRESS_RESULT_FAILED;
|
||||
}
|
||||
copy (op, ip, run);
|
||||
|
||||
op += run;
|
||||
ip += run;
|
||||
|
||||
if (ip >= ipEnd) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
int length = (token & 15) + MIN_MATCH;
|
||||
|
||||
if (length == (15 + MIN_MATCH)) {
|
||||
length += decode (ip);
|
||||
}
|
||||
|
||||
if ((opEnd - op) < length) {
|
||||
return UNCOMPRESS_RESULT_FAILED;
|
||||
}
|
||||
const int dist = ((token & 16) << 12) + load16 (ip);
|
||||
ip += 2;
|
||||
|
||||
uint8 *cp = op - dist;
|
||||
|
||||
if ((op - out) < dist) {
|
||||
return UNCOMPRESS_RESULT_FAILED;
|
||||
}
|
||||
|
||||
if (dist >= 8) {
|
||||
copy (op, cp, length);
|
||||
op += length;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < 4; i++) {
|
||||
*op++ = *cp++;
|
||||
}
|
||||
|
||||
while (length-- != 4) {
|
||||
*op++ = *cp++;
|
||||
}
|
||||
}
|
||||
}
|
||||
fp.close ();
|
||||
|
||||
return bp;
|
||||
return (ip == ipEnd) ? op - out : UNCOMPRESS_RESULT_FAILED;
|
||||
}
|
||||
|
||||
// external decoder
|
||||
static int decode (const char *fileName, int headerSize, uint8 *buffer, int bufferSize) {
|
||||
static Compress compressor;
|
||||
return compressor.decode_ (fileName, headerSize, buffer, bufferSize);
|
||||
private:
|
||||
inline uint16 load16 (void *ptr) {
|
||||
return *reinterpret_cast <const uint16 *> (ptr);
|
||||
}
|
||||
|
||||
// external encoder
|
||||
static int encode (const char *fileName, uint8 *header, int headerSize, uint8 *buffer, int bufferSize) {
|
||||
static Compress compressor;
|
||||
return compressor.encode_ (fileName, header, headerSize, buffer, bufferSize);
|
||||
inline uint32 load32 (void *ptr) {
|
||||
return *reinterpret_cast <const uint32 *> (ptr);
|
||||
}
|
||||
};
|
||||
|
||||
inline void store16 (void *ptr, int val) {
|
||||
*reinterpret_cast <uint16 *> (ptr) = static_cast <uint16> (val);
|
||||
}
|
||||
|
||||
inline void copy64 (void *dst, void *src) {
|
||||
*reinterpret_cast <uint64 *> (dst) = *reinterpret_cast <const uint64 *> (src);
|
||||
}
|
||||
|
||||
inline uint32 hash32 (void *ptr) {
|
||||
return (load32 (ptr) * 0x9E3779B9) >> (32 - HASH_BITS);
|
||||
}
|
||||
|
||||
inline void copy (uint8 *dst, uint8 *src, int count) {
|
||||
copy64 (dst, src);
|
||||
|
||||
for (int i = 8; i < count; i += 8) {
|
||||
copy64 (dst + i, src + i);
|
||||
}
|
||||
}
|
||||
|
||||
inline void add (uint8 *&dst, int val) {
|
||||
*dst++ = static_cast <uint8> (val);
|
||||
}
|
||||
|
||||
inline void encode (uint8 *&ptr, uint32 val) {
|
||||
while (val >= 128) {
|
||||
val -= 128;
|
||||
|
||||
*ptr++ = 128 + (val & 127);
|
||||
val >>= 7;
|
||||
}
|
||||
*ptr++ = static_cast <uint8> (val);
|
||||
}
|
||||
|
||||
inline uint32 decode (uint8 *&ptr) {
|
||||
uint32 val = 0;
|
||||
|
||||
for (int i = 0; i <= 21; i += 7) {
|
||||
const uint32 cur = *ptr++;
|
||||
val += cur << i;
|
||||
|
||||
if (cur < 128) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return val;
|
||||
}
|
||||
};
|
||||
Loading…
Add table
Add a link
Reference in a new issue