Skip to content

Commit 7d670c1

Browse files
committed
Added 64-bit FOR packing
1 parent b30ab9f commit 7d670c1

File tree

8 files changed

+10060
-6
lines changed

8 files changed

+10060
-6
lines changed

README.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,38 @@ uncompress(compresseddata, recoverydata, nvalue);
2929
There is a similar API with ``turbocompress`` and ``turbouncompress`` with the difference
3030
that ``compresseddata`` uses an ``uint8_t`` pointer type.
3131
32+
```C
33+
#include "turbocompression.h"
34+
35+
...
36+
37+
uint32_t * inputdata = ... // length values
38+
uint8_t * compresseddata = ... // enough data
39+
uint8_t *out = turbocompress(inputdata, length, compresseddata);
40+
// compressed data lies between compresseddata and out
41+
uint32_t nvalue = 0;
42+
uint32_t * recoverydata = ... // available buffer with at least length elements
43+
turbouncompress(compresseddata, recoverydata, nvalue);
44+
// nvalue will be equal to length
45+
```
46+
47+
We can also compress 64-bit arrays:
48+
49+
```C
50+
#include "turbocompression.h"
51+
52+
...
53+
54+
uint64_t * inputdata = ... // length values
55+
uint8_t * compresseddata = ... // enough data
56+
uint8_t *out = turbocompress64(inputdata, length, compresseddata);
57+
// compressed data lies between compresseddata and out
58+
uint32_t nvalue = 0;
59+
uint64_t * recoverydata = ... // available buffer with at least length elements
60+
turbouncompress64(compresseddata, recoverydata, nvalue);
61+
// nvalue will be equal to length
62+
```
63+
3264
## Usage:
3365
3466
To run a simple benchmark, do

include/turbocompression.h

Lines changed: 92 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,26 @@
44
#define TURBOCOMPRESSION_H_
55

66
#include "turbopacking32.h"
7+
#include "turbopacking64.h"
8+
79
#include "util.h"
810

911
/**
1012
* "turbo" FOR packing uses an optimization to bit packing due to
1113
* TurboPFor: we use 64-bit words as much as possible.
1214
*
13-
* Currently the implementation assumes that the integer arrays to be compressed
14-
* are in multiples of 32, remaining integers are not compressed. Thus using this
15-
* code on arrays smaller than 32 is wasteful. (This limitation will be removed
16-
* in the future.)
1715
*/
1816

1917
/**
2018
* Compresses "length" values from "in" to "out" and return a pointer to the end
2119
* of the compressed stream.
2220
* The format is "number of values, minimal value, maximal value, followed by
2321
* packed data".
22+
*
23+
* Currently the implementation assumes that the integer arrays to be compressed
24+
* are in multiples of 32, remaining integers are not compressed. Thus using this
25+
* code on arrays smaller than 32 is wasteful. (This limitation will be removed
26+
* in the future.)
2427
*/
2528
inline uint8_t *turbocompress(const uint32_t *in, uint32_t length,
2629
uint8_t *out) {
@@ -92,4 +95,89 @@ inline const uint8_t *turbouncompress(const uint8_t *in, uint32_t *out,
9295
return in;
9396
}
9497

98+
99+
100+
/**
101+
* Compresses "length" values from "in" to "out" and return a pointer to the end
102+
* of the compressed stream.
103+
* The format is "number of values, minimal value, maximal value, followed by
104+
* packed data".
105+
*
106+
* Currently the implementation assumes that the integer arrays to be compressed
107+
* are in multiples of 32, remaining integers are not compressed. Thus using this
108+
* code on arrays smaller than 32 is wasteful. (This limitation will be removed
109+
* in the future.)
110+
*/
111+
inline uint8_t *turbocompress64(const uint64_t *in, uint32_t length,
112+
uint8_t *out) {
113+
memcpy(out, &length, sizeof(length));
114+
out += sizeof(length);
115+
if (length == 0)
116+
return out;
117+
uint64_t m = in[0];
118+
uint64_t M = in[0];
119+
for (uint32_t i = 1; i < length; ++i) {
120+
if (in[i] > M)
121+
M = in[i];
122+
if (in[i] < m)
123+
m = in[i];
124+
}
125+
int b = bits64(static_cast<uint64_t>(M - m));
126+
memcpy(out, &m, sizeof(m));
127+
out += sizeof(m);
128+
memcpy(out, &M, sizeof(M));
129+
out += sizeof(M);
130+
131+
uint32_t k = 0;
132+
for (; k + 32 <= length; k += 32) {
133+
funcForPackArr64[b](m, &in, &out);
134+
}
135+
// we could pack the rest, but we don't bother
136+
memcpy(out, in, (length - k) * sizeof(uint64_t));
137+
out += (length - k) * sizeof(uint64_t);
138+
return out;
139+
}
140+
141+
/*
142+
* uncompress FOR data found in "in".
143+
* The format is "number of values, minimal value, maximal value, followed by
144+
* packed data".
145+
* The 'nvalue' variable receives the number of decoded values (initial value is
146+
* ignored)
147+
* The values are stored in "out".
148+
* We return a pointer to the end of the compressed input stream.
149+
*/
150+
inline const uint8_t *turbouncompress64(const uint8_t *in, uint64_t *out,
151+
uint32_t &nvalue) {
152+
memcpy(&nvalue, in, sizeof(nvalue));
153+
in += sizeof(nvalue);
154+
if (nvalue == 0)
155+
return in;
156+
uint64_t m, M;
157+
memcpy(&m, in, sizeof(m));
158+
in += sizeof(m);
159+
memcpy(&M, in, sizeof(M));
160+
in += sizeof(M);
161+
int b = bits(static_cast<uint64_t>(M - m));
162+
#ifdef _OPENMP
163+
#pragma omp parallel for
164+
#endif
165+
for (uint32_t k = 0; k < nvalue / 32; ++k) {
166+
// could code as funcForUnpackArr[b](m,&in,&out); but it hurts
167+
// parallelization
168+
const uint8_t *input = in + 32 * b * k / 8;
169+
uint64_t *output = out + k * 32;
170+
funcForUnpackArr64[b](m, &input, &output);
171+
}
172+
in = in + (32 * b / 8) * (nvalue / 32);
173+
out = out + 32 * (nvalue / 32);
174+
175+
// we could pack the rest, but we don't bother
176+
uint32_t leftover = nvalue - nvalue / 32 * 32;
177+
memcpy(out, in, leftover * sizeof(uint64_t));
178+
in += leftover * sizeof(uint64_t);
179+
return in;
180+
}
181+
182+
95183
#endif /* COMPRESSION_H_ */

include/turbopacking32.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
#define UINT64_C(c) (c ## ULL)
88
#endif
99

10+
/**
11+
* (c) Daniel Lemire
12+
* Apache License 2.0
13+
*/
1014
/** turbopacking32 starts here **/
1115
/**
1216
* this code mimics the way TurboPFor packs short arrays of integers.

0 commit comments

Comments
 (0)