Skip to content

Seg. fault with perf mode BALANCED and balance = 0 #3

@martaiborra

Description

@martaiborra

When running the following example with

BTUNE_PERF_MODE="BALANCED" BTUNE_BALANCE=0 BLOSC_TRACE=1 BTUNE_MODELS_DIR=../models_sample/ LD_LIBRARY_PATH=$CONDA_PREFIX/lib64 BTUNE_TRACE=1 BTUNE_USE_INFERENCE=3 ./btune_example temp.b2nd out.b2frame
which runs btune for the first 10 chunks of the temp dataset.

#include "btune.h"
#include <blosc2/tuners-registry.h>

#define KB  1024.
#define MB  (1024*KB)

#define CHUNKSIZE (64 * 1024)
#define BLOCKSIZE ( 8 * 1024)


static int compress(const char* in_fname, const char* out_fname) {

    // Open input file
    blosc2_schunk *schunk_in = blosc2_schunk_open(in_fname);
    if (schunk_in == NULL) {
        fprintf(stderr, "Input file cannot be open.\n");
        return 1;
    }

    // compression params
    blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS;
    cparams.nthreads = 16; // Btune may lower this
    cparams.typesize = schunk_in->typesize;

    // btune
    btune_config btune_config = BTUNE_CONFIG_DEFAULTS;
    //btune_config.perf_mode = BTUNE_PERF_BALANCED;
    btune_config.comp_balance = 0.;
    btune_config.use_inference = 3;
    btune_config.models_dir = "../models_sample/";
    cparams.tuner_id = BLOSC_BTUNE;
    cparams.tuner_params = &btune_config;

    // Create super chunk
    remove(out_fname);
    blosc2_dparams dparams = BLOSC2_DPARAMS_DEFAULTS;
    dparams.nthreads = 1;
    blosc2_storage storage = {
        .cparams=&cparams,
        .dparams=&dparams,
        .contiguous=true,
        .urlpath=(char*)out_fname
    };
    blosc2_schunk* schunk_out = blosc2_schunk_new(&storage);
    if (schunk_out == NULL) {
        fprintf(stderr, "Output file cannot be created.\n");
        return 1;
    }

    // Statistics
    blosc_timestamp_t t0;
    blosc_set_timestamp(&t0);

    // Compress
    int chunksize = schunk_in->chunksize;
    void *data = malloc(chunksize);
    int nchunks = 10; // schunk_in->nchunks; It is enough with 10 chunks
    for (int nchunk = 0; nchunk < nchunks; nchunk++) {
        int size = blosc2_schunk_decompress_chunk(schunk_in, nchunk, data, chunksize);
        if (blosc2_schunk_append_buffer(schunk_out, data, size) < 0) {
            fprintf(stderr, "Error in appending data to destination file");
            return 1;
        }
    }

    // Statistics
    blosc_timestamp_t t1;
    blosc_set_timestamp(&t1);
    int64_t nbytes = schunk_out->nbytes;
    int64_t cbytes = schunk_out->cbytes;
    double ttotal = blosc_elapsed_secs(t0, t1);
    printf("Compression ratio: %.1f MB -> %.1f MB (%.1fx)\n",
            (float)nbytes / MB, (float)cbytes / MB, (1. * (float)nbytes) / (float)cbytes);
    printf("Compression time: %.3g s, %.1f MB/s\n",
            ttotal, (float)nbytes / (ttotal * MB));

    // Decompress
    int dsize;
    blosc_set_timestamp(&t0);
      for (int nchunk = nchunks-1; nchunk >= 0; nchunk--) {
        dsize = blosc2_schunk_decompress_chunk(schunk_out, nchunk, data, chunksize);
        if (dsize < 0) {
          printf("Decompression error.  Error code: %d\n", dsize);
          return dsize;
        }
      }
      blosc_set_timestamp(&t1);
      ttotal = blosc_elapsed_secs(t0, t1);
      printf("Decompression time: %.3g s, %.1f MB/s\n",
            ttotal, (double)nbytes / (ttotal * MB));

    // Free resources
    blosc2_schunk_free(schunk_in);
    blosc2_schunk_free(schunk_out);
}


int main(int argc, char* argv[]) {
    blosc2_init();

    // Input parameters
    if (argc < 3 || (argc % 2 != 1)) {
        fprintf(stderr, "btune_example <input file> <output.b2frame>\n");
        return 1;
    }

    for (int i = 0; i < (argc - 1) / 2; i++) {
        const char* in_fname = argv[1 + i*2];
        const char* out_fname = argv[1 + i*2 + 1];
        compress(in_fname, out_fname);
    }

    blosc2_destroy();

    return 0;
}

The output is the following:

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
BTune version: 1.0.0
Performance Mode: BALANCED, Compression balance: 0.000000, Bandwidth: 20 GB/s
Behaviour: Waits - 0, Softs - 5, Hards - 11, Repeat Mode - REPEAT_ALL
INFO: Model files found in the '../models_sample//' directory
TRACE: time load model: 0.000053
TRACE: Inference category=6 codec=1 filter=2 clevel=5 splitmode=2 time entropy=0.002945 inference=0.000015
|    Codec   | Filter | Split | C.Level | Blocksize | Shufflesize | C.Threads | D.Threads |   Score   |  C.Ratio   |   BTune State   | Readapt | Winner
|        lz4 |      2 |     1 |       5 |         0 |           4 |        16 |         1 |   0.00954 |      5.15x |    CODEC_FILTER |    HARD | W
TRACE: Inference category=6 codec=1 filter=2 clevel=5 splitmode=2 time entropy=0.000600 inference=0.000008
|        lz4 |      2 |     1 |       5 |         0 |           4 |        16 |         1 |   0.00917 |      5.19x |    CODEC_FILTER |    HARD | W
TRACE: Inference category=6 codec=1 filter=2 clevel=5 splitmode=2 time entropy=0.000611 inference=0.000008
|        lz4 |      2 |     1 |       5 |         0 |           4 |        16 |         1 |   0.00947 |      5.84x |    CODEC_FILTER |    HARD | -
|        lz4 |      2 |     1 |       5 |         0 |           4 |        16 |         1 |   0.00902 |      4.04x |    CODEC_FILTER |    HARD | -
|        lz4 |      2 |     0 |       5 |         0 |           4 |        16 |         1 |    0.0084 |      4.87x |    CODEC_FILTER |    HARD | -
|        lz4 |      2 |     1 |       5 |         0 |           4 |        16 |         1 |   0.00848 |      5.59x |    THREADS_COMP |    HARD | W
|        lz4 |      2 |     1 |       5 |         0 |           4 |        16 |         2 |   0.00549 |      4.48x |  THREADS_DECOMP |    HARD | W
|        lz4 |      2 |     1 |       5 |         0 |           4 |        16 |         3 |   0.00431 |      5.41x |  THREADS_DECOMP |    HARD | W
|        lz4 |      2 |     1 |       5 |         0 |           4 |        16 |         4 |   0.00366 |      5.93x |  THREADS_DECOMP |    HARD | W
|        lz4 |      2 |     1 |       5 |         0 |           4 |        16 |         5 |    0.0037 |      5.03x |  THREADS_DECOMP |    HARD | W
Compression ratio: 320.0 MB -> 62.9 MB (5.1x)
Compression time: 0.219 s, 1464.3 MB/s
Segmentation fault (core dumped)

From the output, the compression seems to work fine but when decompressing it gives a Segmentation fault. Valgrind points to the blosc_d function. This can be due to the fact that btune changes constantly the decompression nthreads, even though btune is not applied when decompressing the dataset.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions