|
4 | 4 | #define TURBOCOMPRESSION_H_ |
5 | 5 |
|
6 | 6 | #include "turbopacking32.h" |
| 7 | +#include "turbopacking64.h" |
| 8 | + |
7 | 9 | #include "util.h" |
8 | 10 |
|
9 | 11 | /** |
10 | 12 | * "turbo" FOR packing uses an optimization to bit packing due to |
11 | 13 | * TurboPFor: we use 64-bit words as much as possible. |
12 | 14 | * |
13 | | -* Currently the implementation assumes that the integer arrays to be compressed |
14 | | -* are in multiples of 32, remaining integers are not compressed. Thus using this |
15 | | -* code on arrays smaller than 32 is wasteful. (This limitation will be removed |
16 | | -* in the future.) |
17 | 15 | */ |
18 | 16 |
|
19 | 17 | /** |
20 | 18 | * Compresses "length" values from "in" to "out" and return a pointer to the end |
21 | 19 | * of the compressed stream. |
22 | 20 | * The format is "number of values, minimal value, maximal value, followed by |
23 | 21 | * packed data". |
| 22 | +* |
| 23 | +* Currently the implementation assumes that the integer arrays to be compressed |
| 24 | +* are in multiples of 32, remaining integers are not compressed. Thus using this |
| 25 | +* code on arrays smaller than 32 is wasteful. (This limitation will be removed |
| 26 | +* in the future.) |
24 | 27 | */ |
25 | 28 | inline uint8_t *turbocompress(const uint32_t *in, uint32_t length, |
26 | 29 | uint8_t *out) { |
@@ -92,4 +95,89 @@ inline const uint8_t *turbouncompress(const uint8_t *in, uint32_t *out, |
92 | 95 | return in; |
93 | 96 | } |
94 | 97 |
|
| 98 | + |
| 99 | + |
| 100 | +/** |
| 101 | +* Compresses "length" values from "in" to "out" and return a pointer to the end |
| 102 | +* of the compressed stream. |
| 103 | +* The format is "number of values, minimal value, maximal value, followed by |
| 104 | +* packed data". |
| 105 | +* |
| 106 | +* Currently the implementation assumes that the integer arrays to be compressed |
| 107 | +* are in multiples of 32, remaining integers are not compressed. Thus using this |
| 108 | +* code on arrays smaller than 32 is wasteful. (This limitation will be removed |
| 109 | +* in the future.) |
| 110 | +*/ |
| 111 | +inline uint8_t *turbocompress64(const uint64_t *in, uint32_t length, |
| 112 | + uint8_t *out) { |
| 113 | + memcpy(out, &length, sizeof(length)); |
| 114 | + out += sizeof(length); |
| 115 | + if (length == 0) |
| 116 | + return out; |
| 117 | + uint64_t m = in[0]; |
| 118 | + uint64_t M = in[0]; |
| 119 | + for (uint32_t i = 1; i < length; ++i) { |
| 120 | + if (in[i] > M) |
| 121 | + M = in[i]; |
| 122 | + if (in[i] < m) |
| 123 | + m = in[i]; |
| 124 | + } |
| 125 | + int b = bits64(static_cast<uint64_t>(M - m)); |
| 126 | + memcpy(out, &m, sizeof(m)); |
| 127 | + out += sizeof(m); |
| 128 | + memcpy(out, &M, sizeof(M)); |
| 129 | + out += sizeof(M); |
| 130 | + |
| 131 | + uint32_t k = 0; |
| 132 | + for (; k + 32 <= length; k += 32) { |
| 133 | + funcForPackArr64[b](m, &in, &out); |
| 134 | + } |
| 135 | + // we could pack the rest, but we don't bother |
| 136 | + memcpy(out, in, (length - k) * sizeof(uint64_t)); |
| 137 | + out += (length - k) * sizeof(uint64_t); |
| 138 | + return out; |
| 139 | +} |
| 140 | + |
| 141 | +/* |
| 142 | +* uncompress FOR data found in "in". |
| 143 | +* The format is "number of values, minimal value, maximal value, followed by |
| 144 | +* packed data". |
| 145 | +* The 'nvalue' variable receives the number of decoded values (initial value is |
| 146 | +* ignored) |
| 147 | +* The values are stored in "out". |
| 148 | +* We return a pointer to the end of the compressed input stream. |
| 149 | +*/ |
| 150 | +inline const uint8_t *turbouncompress64(const uint8_t *in, uint64_t *out, |
| 151 | + uint32_t &nvalue) { |
| 152 | + memcpy(&nvalue, in, sizeof(nvalue)); |
| 153 | + in += sizeof(nvalue); |
| 154 | + if (nvalue == 0) |
| 155 | + return in; |
| 156 | + uint64_t m, M; |
| 157 | + memcpy(&m, in, sizeof(m)); |
| 158 | + in += sizeof(m); |
| 159 | + memcpy(&M, in, sizeof(M)); |
| 160 | + in += sizeof(M); |
| 161 | + int b = bits(static_cast<uint64_t>(M - m)); |
| 162 | +#ifdef _OPENMP |
| 163 | +#pragma omp parallel for |
| 164 | +#endif |
| 165 | + for (uint32_t k = 0; k < nvalue / 32; ++k) { |
| 166 | + // could code as funcForUnpackArr[b](m,&in,&out); but it hurts |
| 167 | + // parallelization |
| 168 | + const uint8_t *input = in + 32 * b * k / 8; |
| 169 | + uint64_t *output = out + k * 32; |
| 170 | + funcForUnpackArr64[b](m, &input, &output); |
| 171 | + } |
| 172 | + in = in + (32 * b / 8) * (nvalue / 32); |
| 173 | + out = out + 32 * (nvalue / 32); |
| 174 | + |
| 175 | + // we could pack the rest, but we don't bother |
| 176 | + uint32_t leftover = nvalue - nvalue / 32 * 32; |
| 177 | + memcpy(out, in, leftover * sizeof(uint64_t)); |
| 178 | + in += leftover * sizeof(uint64_t); |
| 179 | + return in; |
| 180 | +} |
| 181 | + |
| 182 | + |
95 | 183 | #endif /* COMPRESSION_H_ */ |
0 commit comments