From 9de405c3ea2756840f40c9296e9c992708a81675 Mon Sep 17 00:00:00 2001 From: liquidaty Date: Tue, 13 Aug 2024 13:25:25 -0700 Subject: [PATCH] fix cum_scanned_length tracking for files larger than initial read buff add zsv_peek() --- app/Makefile | 2 +- app/builtin/help.c | 2 +- app/select.c | 3 ++- include/zsv/api.h | 3 +++ include/zsv/utils/dirs.h | 2 +- src/zsv.c | 12 ++++++++++-- 6 files changed, 18 insertions(+), 6 deletions(-) diff --git a/app/Makefile b/app/Makefile index bae75373..1b13ee53 100644 --- a/app/Makefile +++ b/app/Makefile @@ -260,7 +260,7 @@ MORE_SOURCE+= ${YAJL_INCLUDE} ${YAJL_HELPER_INCLUDE} -I${JQ_INCLUDE_DIR} MORE_LIBS+=${JQ_LIB} ${LDFLAGS_JQ} help: - @echo "To build: ${MAKE} [DEBUG=1] [clean] [clean-all] [BINDIR=${BINDIR}] [JQ_PREFIX=/usr/local] " + @echo "To build: ${MAKE} [DEBUG=1] [clean] [clean-all] [BINDIR=${BINDIR}] [JQ_PREFIX=/usr/local] " @echo @echo "If JQ_PREFIX is not defined, libjq will be built in the build dir" @echo diff --git a/app/builtin/help.c b/app/builtin/help.c index 277effd9..3b078a35 100644 --- a/app/builtin/help.c +++ b/app/builtin/help.c @@ -35,7 +35,7 @@ static int main_help(int argc, const char *argv[]) { "", "Options common to all commands except `prop`, `rm` and `jq`:", #ifdef ZSV_EXTRAS - " -L,--limit-rows : limit processing to the given number of rows (including any header row(s))", + " -L,--limit-rows : limit processing to the given number of rows (including any header row(s))", #endif " -c,--max-column-count : set the maximum number of columns parsed per row. defaults to 1024", " -r,--max-row-size : set the minimum supported maximum row size. defaults to 64k", diff --git a/app/select.c b/app/select.c index ea8618d8..340bf395 100644 --- a/app/select.c +++ b/app/select.c @@ -962,7 +962,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op while(status == zsv_status_ok && !zsv_signal_interrupted && !data.cancelled) status = zsv_parse_more(data.parser); - zsv_finish(data.parser); + if(status == zsv_status_no_more_input) + status = zsv_finish(data.parser); zsv_delete(data.parser); } } diff --git a/include/zsv/api.h b/include/zsv/api.h index 9bc05640..58ce3b1e 100644 --- a/include/zsv/api.h +++ b/include/zsv/api.h @@ -41,6 +41,9 @@ * - zsv_delete(): dispose the parser ******************************************************************************/ +ZSV_EXPORT +int zsv_peek(zsv_parser); + /** * Create a zsv parser. Typically, passed options will at least include a * a `row_handler()` callback. Many, but not all, options can be subsequently diff --git a/include/zsv/utils/dirs.h b/include/zsv/utils/dirs.h index ed120300..d78b0886 100644 --- a/include/zsv/utils/dirs.h +++ b/include/zsv/utils/dirs.h @@ -73,7 +73,7 @@ struct zsv_foreach_dirent_handle { unsigned char verbose:1; unsigned char is_dir:1; /* non-zero if this entry is a directory */ - unsigned char no_recurse:1; /* set to 1 when handling a dir to prevent recursing into it */ + unsigned char no_recurse:1; /* set to 1 when handling a dir to prevent recursing into it */ unsigned char _:5; }; diff --git a/src/zsv.c b/src/zsv.c index 10809d58..4f21fe1d 100644 --- a/src/zsv.c +++ b/src/zsv.c @@ -58,10 +58,11 @@ inline static size_t scanner_pre_parse(struct zsv_scanner *scanner) { scanner->old_bytes_read = 0; } - scanner->cum_scanned_length += scanner->scanned_length; + scanner->cum_scanned_length += scanner->scanned_length - scanner->partial_row_length; size_t capacity = scanner->buff.size - scanner->partial_row_length; - if(VERY_UNLIKELY(capacity == 0)) { // our row size was too small to fit a single row of data + if(VERY_UNLIKELY(capacity == 0)) { + // our row size was too small to fit a single row of data fprintf(stderr, "Warning: row %zu truncated\n", scanner->data_row_count); if(scanner->mode == ZSV_MODE_FIXED) { if(VERY_UNLIKELY(row_fx(scanner, scanner->buff.buff, 0, scanner->buff.size))) @@ -310,6 +311,13 @@ ZSV_EXPORT enum zsv_status zsv_set_fixed_offsets(zsv_parser parser, size_t count return zsv_status_ok; } +ZSV_EXPORT +int zsv_peek(zsv_parser z) { + if(z->scanned_length + 1 < z->buff.size) + return z->buff.buff[z->scanned_length+1]; + return -1; +} + /** * Create a zsv parser * @param opts