From 322b484ac6efcd7546767ef4cc1d19acb22fd217 Mon Sep 17 00:00:00 2001 From: liquidaty Date: Sat, 11 Nov 2023 09:42:18 -0800 Subject: [PATCH] compare: fix non-reporting of non-matching trailing rows --- app/compare.c | 88 +++++++++++++++++++++++------ app/compare.h | 4 +- app/compare_internal.h | 7 ++- app/compare_sort.c | 6 +- app/test/Makefile | 3 + app/test/expected/test-compare.out5 | 4 +- app/test/expected/test-compare.out8 | 6 ++ app/test/expected/test-compare.out9 | 5 ++ 8 files changed, 98 insertions(+), 25 deletions(-) create mode 100644 app/test/expected/test-compare.out9 diff --git a/app/compare.c b/app/compare.c index 10b915ed..d74d6d1a 100644 --- a/app/compare.c +++ b/app/compare.c @@ -138,7 +138,8 @@ static void zsv_compare_json_row_end(struct zsv_compare_data *data) { static void zsv_compare_output_tuple(struct zsv_compare_data *data, struct zsv_compare_input *key_input, const unsigned char *colname, - struct zsv_cell *values // in original input order + struct zsv_cell *values, // in original input order + char is_key ) { // print ID | Column | Value 1 | ... | Value N if(data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON) @@ -159,7 +160,7 @@ static void zsv_compare_output_tuple(struct zsv_compare_data *data, for(unsigned i = 0; i < data->input_count; i++) { struct zsv_compare_input *input = &data->inputs[i]; - if(input->done || !input->row_loaded) { // no data for this input + if((input->done || !input->row_loaded) && !is_key) { // no data for this input zsv_compare_output_str(data, NULL, ZSV_WRITER_SAME_ROW, 0); } else { struct zsv_cell *value = &values[i]; @@ -182,6 +183,34 @@ static void zsv_compare_output_tuple(struct zsv_compare_data *data, zsv_compare_json_row_end(data); } +static const unsigned char *zsv_compare_combined_key_names(struct zsv_compare_data *data) { + if(!data->combined_key_names) { + size_t len = 2; + + for(unsigned key_ix = 0; key_ix < data->key_count; key_ix++) { + struct zsv_compare_key *key = &data->keys[key_ix]; + if(key && key->name) + len += strlen(key->name) + 1; + } + if((data->combined_key_names = calloc(1, len))) { + unsigned char *start = NULL; + for(unsigned key_ix = 0; key_ix < data->key_count; key_ix++) { + struct zsv_compare_key *key = &data->keys[key_ix]; + if(key && key->name) { + if(start) { + *start = (unsigned char)'|'; + start++; + } else + start = data->combined_key_names; + strcpy((char *)start, key->name); + start += strlen((char *)start); + } + } + } + } + return data->combined_key_names; +} + static void zsv_compare_print_row(struct zsv_compare_data *data, const unsigned last_ix // last input ix in inputs_to_sort ) { @@ -198,19 +227,25 @@ static void zsv_compare_print_row(struct zsv_compare_data *data, #define ZSV_COMPARE_MISSING "Missing" - if(last_ix + 1 < data->input_count) { +// if(last_ix + 1 < data->input_count) { // if we don't have data from every input, then output "Missing" for missing inputs - for(unsigned i = last_ix + 1; i < data->input_count; i++) { + char got_missing = 0; + for(unsigned i = 0; i < data->input_count; i++) { struct zsv_compare_input *input = data->inputs_to_sort[i]; - unsigned input_ix = input->index; - values[input_ix].str = (unsigned char *)ZSV_COMPARE_MISSING; - values[input_ix].len = strlen(ZSV_COMPARE_MISSING); + if(i > last_ix) { + got_missing = 1; + unsigned input_ix = input->index; + values[input_ix].str = (unsigned char *)ZSV_COMPARE_MISSING; + values[input_ix].len = strlen(ZSV_COMPARE_MISSING); + } } - zsv_compare_output_tuple(data, key_input, (unsigned char *)"", values); - - // reset values - memset(values, 0, data->input_count * sizeof(*values)); - } + if(got_missing) { + const unsigned char *key_names = data->print_key_col_names ? zsv_compare_combined_key_names(data) : (const unsigned char *)""; + zsv_compare_output_tuple(data, key_input, key_names, values, 1); + // reset values + memset(values, 0, data->input_count * sizeof(*values)); + } +// } // for each output column zsv_compare_unique_colname *output_col = data->output_colnames_first; @@ -237,13 +272,13 @@ static void zsv_compare_print_row(struct zsv_compare_data *data, if(!output_col) output_col = input->output_colnames[input_col_ix]; values[input_ix] = data->get_cell(input, input_col_ix); - if(i > 0 && !different && data->cmp(data->cmp_ctx, values[first_input_ix], values[input_ix])) + if(i > 0 && !different && data->cmp(data->cmp_ctx, values[first_input_ix], values[input_ix], data, input_col_ix)) different = 1; } } if(different) - zsv_compare_output_tuple(data, key_input, output_col->name, values); + zsv_compare_output_tuple(data, key_input, output_col->name, values, 0); } free(values); } @@ -283,7 +318,8 @@ static enum zsv_compare_status zsv_compare_set_inputs(struct zsv_compare_data *d return zsv_compare_status_ok; } -static int zsv_compare_cell(void *ctx, struct zsv_cell c1, struct zsv_cell c2); +static int zsv_compare_cell(void *ctx, struct zsv_cell c1, struct zsv_cell c2, + void *data, unsigned col_ix); static void zsv_compare_output_begin(struct zsv_compare_data *data) { if(data->writer.type == ZSV_COMPARE_OUTPUT_TYPE_JSON) { @@ -390,7 +426,11 @@ input_init_unsorted(struct zsv_compare_data *data, zsv_compare_handle zsv_compare_new() { zsv_compare_handle z = calloc(1, sizeof(*z)); - zsv_compare_set_comparison(z, zsv_compare_cell, z); +#if defined(ZSV_COMPARE_CMP_FUNC) && defined(ZSV_COMPARE_CMP_CTX) + zsv_compare_set_comparison(z, ZSV_COMPARE_CMP_FUNC, ZSV_COMPARE_CMP_CTX); +#else + zsv_compare_set_comparison(z, zsv_compare_cell, NULL); +#endif z->output_colnames_next = &z->output_colnames; z->next_row = zsv_compare_next_unsorted_row; @@ -431,6 +471,7 @@ static void zsv_compare_data_free(struct zsv_compare_data *data) { for(unsigned i = 0; i < data->input_count; i++) zsv_compare_input_free(&data->inputs[i]); free(data->inputs); + free(data->combined_key_names); free(data->inputs_to_sort); for(unsigned i = 0; i < data->writer.properties.used; i++) free(data->writer.properties.names[i]); @@ -466,8 +507,11 @@ void zsv_compare_set_comparison(struct zsv_compare_data *data, data->cmp_ctx = cmp_ctx; } -static int zsv_compare_cell(void *ctx, struct zsv_cell c1, struct zsv_cell c2) { +static int zsv_compare_cell(void *ctx, struct zsv_cell c1, struct zsv_cell c2, + void *data, unsigned col_ix) { (void)(ctx); + (void)(data); + (void)(col_ix); return zsv_strincmp(c1.str, c1.len, c2.str, c2.len); } @@ -478,7 +522,11 @@ static enum zsv_compare_status zsv_compare_advance(struct zsv_compare_data *data for(unsigned i = 0; i < data->input_count; i++) { struct zsv_compare_input *input = &data->inputs[i]; if(input->done) continue; - if(input->row_loaded) continue; + + if(input->row_loaded) { + got = 1; + continue; + } if(data->next_row(input) != zsv_status_row) input->done = 1; else { @@ -563,6 +611,8 @@ static int compare_usage() { " --json : output as JSON", " --json-compact : output as compact JSON", " --json-object : output as an array of objects", + " --print-key-colname : when outputting key column diffs,", + " print column name instead of ", "", "NOTES", "", @@ -655,6 +705,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op } else if(!strcmp(arg, "--json-compact")) { data->writer.type = ZSV_COMPARE_OUTPUT_TYPE_JSON; data->writer.compact = 1; + } else if(!strcmp(arg, "--print-key-colname")) { + data->print_key_col_names = 1; } else input_filenames[input_count++] = arg; } diff --git a/app/compare.h b/app/compare.h index 16206158..3ba41a35 100644 --- a/app/compare.h +++ b/app/compare.h @@ -12,7 +12,9 @@ enum zsv_compare_status { typedef struct zsv_compare_data *zsv_compare_handle; -typedef int (*zsv_compare_cell_func)(void *ctx, struct zsv_cell, struct zsv_cell); +typedef int (*zsv_compare_cell_func)(void *ctx, struct zsv_cell, struct zsv_cell, + void *struct_zsv_compare_data, + unsigned input_col_ix); zsv_compare_handle zsv_compare_new(); // enum zsv_compare_status zsv_compare_set_inputs(zsv_compare_handle, unsigned input_count, unsigned key_count); diff --git a/app/compare_internal.h b/app/compare_internal.h index 7a0a7954..52bb49c3 100644 --- a/app/compare_internal.h +++ b/app/compare_internal.h @@ -51,8 +51,9 @@ struct zsv_compare_input { sqlite3_stmt *sort_stmt; unsigned char row_loaded:1; + unsigned char missing:1; unsigned char done:1; - unsigned char _:6; + unsigned char _:5; }; struct zsv_compare_key { @@ -77,6 +78,7 @@ struct zsv_compare_data { unsigned key_count; struct zsv_compare_key *keys; + unsigned char *combined_key_names; size_t row_count; // only matters if no ID columns are specified @@ -125,7 +127,8 @@ struct zsv_compare_data { unsigned char sort:1; unsigned char sort_in_memory:1; - unsigned char _:6; + unsigned char print_key_col_names:1; + unsigned char _:5; }; #endif diff --git a/app/compare_sort.c b/app/compare_sort.c index 1527dda5..32d7ae99 100644 --- a/app/compare_sort.c +++ b/app/compare_sort.c @@ -50,10 +50,12 @@ static int zsv_compare_sort_stmt_prep(sqlite3 *db, sqlite3_stmt **stmtp, static enum zsv_compare_status input_init_sorted(struct zsv_compare_data *data, struct zsv_compare_input *input, - struct zsv_opts *opts, + struct zsv_opts *_opts, + struct zsv_prop_handler *_prop_handler, const char *opts_used ) { - (void)(opts); + (void)(_opts); + (void)(_prop_handler); char *err_msg = NULL; int rc = zsv_compare_sort_prep_table(data, input->path, opts_used, 0, &err_msg, input->index); if(err_msg) { diff --git a/app/test/Makefile b/app/test/Makefile index 10c9164a..845a8468 100644 --- a/app/test/Makefile +++ b/app/test/Makefile @@ -486,3 +486,6 @@ test-compare: test-%: ${BUILD_DIR}/bin/zsv_%${EXE} @(${PREFIX} $< compare/t1.csv compare/t7.csv compare/t3.csv --json-object -k c ${REDIRECT1} ${TMP_DIR}/$@.out8 && \ ${CMP} ${TMP_DIR}/$@.out8 expected/$@.out8 && ${TEST_PASS} || ${TEST_FAIL}) + + @(${PREFIX} $< compare/t1.csv compare/t7.csv compare/t3.csv --print-key-colname -k c ${REDIRECT1} ${TMP_DIR}/$@.out9 && \ + ${CMP} ${TMP_DIR}/$@.out9 expected/$@.out9 && ${TEST_PASS} || ${TEST_FAIL}) diff --git a/app/test/expected/test-compare.out5 b/app/test/expected/test-compare.out5 index a4b0bba2..2ac231dc 100644 --- a/app/test/expected/test-compare.out5 +++ b/app/test/expected/test-compare.out5 @@ -2,5 +2,5 @@ C,Column,compare/t1.csv,compare/t5.csv,compare/t6-unsorted.csv C1,,,,Missing C9-NONMATCHING,,Missing,,Missing X2,B,B2,BB,BB -C9-NONMATCHING,,,, -C1,,,, +C9-NONMATCHING,,Missing,Missing, +C1,,Missing,Missing, diff --git a/app/test/expected/test-compare.out8 b/app/test/expected/test-compare.out8 index 37cfedad..d0dea36d 100644 --- a/app/test/expected/test-compare.out8 +++ b/app/test/expected/test-compare.out8 @@ -16,5 +16,11 @@ "Column": "B", "compare/t7.csv": "", "compare/t3.csv": "BB" + }, + { + "c": "X2", + "Column": "", + "compare/t7.csv": "Missing", + "compare/t3.csv": "Missing" } ] diff --git a/app/test/expected/test-compare.out9 b/app/test/expected/test-compare.out9 new file mode 100644 index 00000000..c8f26063 --- /dev/null +++ b/app/test/expected/test-compare.out9 @@ -0,0 +1,5 @@ +c,Column,compare/t1.csv,compare/t7.csv,compare/t3.csv +C1,B,X1,B1,X1 +C2,c,Missing,, +C2,B,,,BB +X2,c,,Missing,Missing