Skip to content

Commit 4257308

Browse files
perf+refactor: avoid extra copy during decompress
1 parent a6583a9 commit 4257308

File tree

4 files changed

+38
-49
lines changed

4 files changed

+38
-49
lines changed

include/decompress_segmentation.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ void DecompressChannel(const uint32_t* input,
4141
const ptrdiff_t volume_size[3],
4242
const ptrdiff_t block_size[3],
4343
const ptrdiff_t strides[4],
44-
std::vector<Label>* output,
44+
Label* output,
4545
const ptrdiff_t channel);
4646

4747
// Encodes multiple channels.
@@ -66,7 +66,7 @@ void DecompressChannels(const uint32_t* input,
6666
const ptrdiff_t volume_size[4],
6767
const ptrdiff_t block_size[3],
6868
const ptrdiff_t strides[4],
69-
std::vector<Label>* output);
69+
Label* output);
7070

7171
} // namespace compress_segmentation
7272

src/compress_segmentation.cc

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,11 @@ void EncodeBlock(const Label* input, const ptrdiff_t input_strides[3],
108108
Label previous_value = input[0] + 1;
109109
{
110110
auto* input_z = input;
111-
for (size_t z = 0; z < actual_size[2]; ++z) {
111+
for (size_t z = 0; z < static_cast<size_t>(actual_size[2]); ++z) {
112112
auto* input_y = input_z;
113-
for (size_t y = 0; y < actual_size[1]; ++y) {
113+
for (size_t y = 0; y < static_cast<size_t>(actual_size[1]); ++y) {
114114
auto* input_x = input_y;
115-
for (size_t x = 0; x < actual_size[0]; ++x) {
115+
for (size_t x = 0; x < static_cast<size_t>(actual_size[0]); ++x) {
116116
auto value = *input_x;
117117
// If this value matches the previous value, we can skip the more
118118
// expensive hash table lookup.
@@ -170,11 +170,11 @@ void EncodeBlock(const Label* input, const ptrdiff_t input_strides[3],
170170
// Write encoded representation.
171171
{
172172
auto* input_z = input;
173-
for (size_t z = 0; z < actual_size[2]; ++z) {
173+
for (size_t z = 0; z < static_cast<size_t>(actual_size[2]); ++z) {
174174
auto* input_y = input_z;
175-
for (size_t y = 0; y < actual_size[1]; ++y) {
175+
for (size_t y = 0; y < static_cast<size_t>(actual_size[1]); ++y) {
176176
auto* input_x = input_y;
177-
for (size_t x = 0; x < actual_size[0]; ++x) {
177+
for (size_t x = 0; x < static_cast<size_t>(actual_size[0]); ++x) {
178178
auto value = *input_x;
179179
uint32_t index = seen_values.at(value);
180180
size_t output_offset = x + block_size[0] * (y + block_size[1] * z);
@@ -225,12 +225,10 @@ int CompressChannel(const Label* input, const ptrdiff_t input_strides[3],
225225
block[0] + grid_size[0] * (block[1] + grid_size[1] * block[2]);
226226
ptrdiff_t actual_size[3];
227227
ptrdiff_t input_offset = 0;
228-
size_t num_values = 1;
229228
for (size_t i = 0; i < 3; ++i) {
230229
auto pos = block[i] * block_size[i];
231230
actual_size[i] = std::min(block_size[i], volume_size[i] - pos);
232231
input_offset += pos * input_strides[i];
233-
num_values *= actual_size[i];
234232
}
235233
const size_t encoded_value_base_offset = output->size() - base_offset;
236234
size_t encoded_bits, table_offset;
@@ -257,7 +255,7 @@ int CompressChannels(const Label* input, const ptrdiff_t input_strides[4],
257255
const ptrdiff_t block_size[3],
258256
std::vector<uint32_t>* output) {
259257
output->resize(volume_size[3]);
260-
for (size_t channel_i = 0; channel_i < volume_size[3]; ++channel_i) {
258+
for (size_t channel_i = 0; channel_i < static_cast<size_t>(volume_size[3]); ++channel_i) {
261259
(*output)[channel_i] = output->size();
262260
int error = CompressChannel(input + input_strides[3] * channel_i, input_strides,
263261
volume_size, block_size, output);

src/compressed_segmentation.pyx

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ cdef extern from "decompress_segmentation.h" namespace "compress_segmentation":
4949
const ptrdiff_t volume_size[4],
5050
const ptrdiff_t block_size[3],
5151
const ptrdiff_t strides[4],
52-
vector[Label]* output
52+
Label* output
5353
)
5454

5555
DEFAULT_BLOCK_SIZE = (8,8,8)
@@ -172,29 +172,28 @@ cdef decompress_helper(
172172
strides[2] = volsize[3]
173173
strides[3] = 1
174174

175-
cdef vector[UINT] *output = new vector[UINT]()
175+
voxels = reduce(operator.mul, volume_size)
176176

177-
DecompressChannels(
178-
uintencodedptr,
179-
volsize,
180-
blksize,
181-
strides,
182-
output
183-
)
184-
185-
cdef UINT* output_ptr = <UINT*>&output[0][0]
186-
cdef UINT[:] vec_view = <UINT[:output.size()]>output_ptr
177+
cdef np.ndarray[UINT] output = np.zeros([voxels], dtype=dtype)
187178

188-
# possible double free issue
189-
# The buffer gets loaded into numpy, but not the vector<uint64_t>
190-
# So when numpy clears the buffer, the vector object remains
191-
# Maybe we should make a copy of the vector into a regular array.
179+
if sizeof(UINT) == 4:
180+
DecompressChannels[uint32_t](
181+
uintencodedptr,
182+
volsize,
183+
blksize,
184+
strides,
185+
<uint32_t*>&output[0]
186+
)
187+
else:
188+
DecompressChannels[uint64_t](
189+
uintencodedptr,
190+
volsize,
191+
blksize,
192+
strides,
193+
<uint64_t*>&output[0]
194+
)
192195

193-
# This construct is required by python 2.
194-
# Python 3 can just do np.frombuffer(vec_view, ...)
195-
buf = bytearray(vec_view[:])
196-
del output
197-
return np.frombuffer(buf, dtype=dtype).reshape( volume_size, order=order )
196+
return output.reshape( volume_size, order=order )
198197

199198
@cython.binding(True)
200199
def decompress(

src/decompress_segmentation.cc

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,6 @@
88
#include "decompress_segmentation.h"
99

1010
#include <algorithm>
11-
#include <unordered_map>
12-
#include <iostream>
13-
14-
using std::min;
1511

1612
namespace compress_segmentation {
1713

@@ -23,7 +19,7 @@ void DecompressChannel(
2319
const ptrdiff_t volume_size[3],
2420
const ptrdiff_t block_size[3],
2521
const ptrdiff_t strides[4],
26-
std::vector<Label>* output,
22+
Label* output,
2723
const ptrdiff_t channel
2824
) {
2925
const size_t table_entry_size = (sizeof(Label) + sizeof(uint32_t) - 1) / sizeof(uint32_t);
@@ -49,13 +45,13 @@ void DecompressChannel(
4945

5046
// find absolute positions in output array (+ base_offset)
5147
size_t xmin = block[0]*block_size[0];
52-
size_t xmax = min(xmin + block_size[0], size_t(volume_size[0]));
48+
size_t xmax = std::min(xmin + block_size[0], size_t(volume_size[0]));
5349

5450
size_t ymin = block[1]*block_size[1];
55-
size_t ymax = min(ymin + block_size[1], size_t(volume_size[1]));
51+
size_t ymax = std::min(ymin + block_size[1], size_t(volume_size[1]));
5652

5753
size_t zmin = block[2]*block_size[2];
58-
size_t zmax = min(zmin + block_size[2], size_t(volume_size[2]));
54+
size_t zmax = std::min(zmin + block_size[2], size_t(volume_size[2]));
5955

6056
uint64_t bitmask = (1 << encoded_bits) - 1;
6157
for (size_t z = zmin; z < zmax; ++z) {
@@ -78,7 +74,7 @@ void DecompressChannel(
7874
if (table_entry_size == 2) {
7975
val |= uint64_t(input[tableoffset + bitval*table_entry_size+1]) << 32;
8076
}
81-
(*output)[outindex] = val;
77+
output[outindex] = val;
8278
bitpos += encoded_bits;
8379
}
8480
}
@@ -94,7 +90,7 @@ void DecompressChannels(
9490
const ptrdiff_t volume_size[4],
9591
const ptrdiff_t block_size[3],
9692
const ptrdiff_t strides[4],
97-
std::vector<Label>* output
93+
Label* output
9894
) {
9995

10096
/*
@@ -111,11 +107,7 @@ void DecompressChannels(
111107
segmentation channel, the compressed segmentation data is simply prefixed with a
112108
single 1 value (encoded as a little-endian 32-bit unsigned integer).
113109
*/
114-
115-
size_t voxels = volume_size[0] * volume_size[1] * volume_size[2];
116-
output->resize(voxels * volume_size[3]);
117-
118-
for (size_t channel_i = 0; channel_i < volume_size[3]; ++channel_i) {
110+
for (size_t channel_i = 0; channel_i < static_cast<size_t>(volume_size[3]); ++channel_i) {
119111
DecompressChannel(
120112
input + input[channel_i], volume_size,
121113
block_size, strides, output, channel_i
@@ -128,13 +120,13 @@ void DecompressChannels(
128120
const uint32_t* input, const ptrdiff_t volume_size[3], \
129121
const ptrdiff_t block_size[3], \
130122
const ptrdiff_t strides[4], \
131-
std::vector<Label>* output, \
123+
Label* output, \
132124
const ptrdiff_t channel); \
133125
template void DecompressChannels<Label>( \
134126
const uint32_t* input, const ptrdiff_t volume_size[4], \
135127
const ptrdiff_t block_size[3], \
136128
const ptrdiff_t strides[4], \
137-
std::vector<Label>* output); \
129+
Label* output); \
138130
/**/
139131

140132
DO_INSTANTIATE(uint32_t)

0 commit comments

Comments
 (0)