Skip to content

fix cum_scanned_length tracking for files larger than initial read buff #186

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ MORE_SOURCE+= ${YAJL_INCLUDE} ${YAJL_HELPER_INCLUDE} -I${JQ_INCLUDE_DIR}
MORE_LIBS+=${JQ_LIB} ${LDFLAGS_JQ}

help:
@echo "To build: ${MAKE} [DEBUG=1] [clean] [clean-all] [BINDIR=${BINDIR}] [JQ_PREFIX=/usr/local] <install|all|test>"
@echo "To build: ${MAKE} [DEBUG=1] [clean] [clean-all] [BINDIR=${BINDIR}] [JQ_PREFIX=/usr/local] <install|all|install-util-lib|test>"
@echo
@echo "If JQ_PREFIX is not defined, libjq will be built in the build dir"
@echo
Expand Down
2 changes: 1 addition & 1 deletion app/builtin/help.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ static int main_help(int argc, const char *argv[]) {
"",
"Options common to all commands except `prop`, `rm` and `jq`:",
#ifdef ZSV_EXTRAS
" -L,--limit-rows <n>: limit processing to the given number of rows (including any header row(s))",
" -L,--limit-rows <n> : limit processing to the given number of rows (including any header row(s))",
#endif
" -c,--max-column-count <n>: set the maximum number of columns parsed per row. defaults to 1024",
" -r,--max-row-size <n> : set the minimum supported maximum row size. defaults to 64k",
Expand Down
2 changes: 1 addition & 1 deletion app/ext_example/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ endif

DEBUG=0
ifeq ($(DEBUG),0)
CFLAGS+= -O3 -DNDEBUG -std=gnu11 -Wno-gnu-statement-expression -Wshadow -Wall -Wextra -Wno-missing-braces -pedantic -DSTDC_HEADERS -D_GNU_SOURCE -lm -mavx2 -ftree-vectorize -flto
CFLAGS+= -O3 -DNDEBUG -std=gnu11 -Wno-gnu-statement-expression -Wshadow -Wall -Wextra -Wno-missing-braces -pedantic -DSTDC_HEADERS -D_GNU_SOURCE -lm -ftree-vectorize -flto
else
CFLAGS += -g
endif
Expand Down
4 changes: 2 additions & 2 deletions app/ext_example/test/expected/zsvext-test-3.out
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Usage:
zsv license [<extension_id>]

Options common to all commands except `prop`, `rm` and `jq`:
-L,--limit-rows <n>: limit processing to the given number of rows (including any header row(s))
-L,--limit-rows <n> : limit processing to the given number of rows (including any header row(s))
-c,--max-column-count <n>: set the maximum number of columns parsed per row. defaults to 1024
-r,--max-row-size <n> : set the minimum supported maximum row size. defaults to 64k
-B,--buff-size <n> : set internal buffer size. defaults to 256k
Expand Down Expand Up @@ -76,7 +76,7 @@ Usage:
zsv license [<extension_id>]

Options common to all commands except `prop`, `rm` and `jq`:
-L,--limit-rows <n>: limit processing to the given number of rows (including any header row(s))
-L,--limit-rows <n> : limit processing to the given number of rows (including any header row(s))
-c,--max-column-count <n>: set the maximum number of columns parsed per row. defaults to 1024
-r,--max-row-size <n> : set the minimum supported maximum row size. defaults to 64k
-B,--buff-size <n> : set internal buffer size. defaults to 256k
Expand Down
3 changes: 2 additions & 1 deletion app/select.c
Original file line number Diff line number Diff line change
Expand Up @@ -962,7 +962,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
while(status == zsv_status_ok
&& !zsv_signal_interrupted && !data.cancelled)
status = zsv_parse_more(data.parser);
zsv_finish(data.parser);
if(status == zsv_status_no_more_input)
status = zsv_finish(data.parser);
zsv_delete(data.parser);
}
}
Expand Down
4 changes: 2 additions & 2 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -405,8 +405,8 @@ test -n "$MAKE" || fail "$0: cannot find a MAKE tool"
# Find a C compiler to use
#
printf "checking for C compiler... "
for c in cc gcc gcc-11 gcc-10 gcc-9 gcc-8 gcc-7 clang; do trycc1 "$c"; done
for c in cc gcc gcc-11 gcc-10 gcc-9 gcc-8 gcc-7 clang; do trycc2 "$c"; done
for c in cc gcc gcc-14 gcc-13 gcc-12 gcc-11 gcc-10 gcc-9 gcc-8 gcc-7 clang; do trycc1 "$c"; done
for c in cc gcc gcc-14 gcc-13 gcc-12 gcc-11 gcc-10 gcc-9 gcc-8 gcc-7 clang; do trycc2 "$c"; done
printf "%s\n" "$CC"
test -n "$CC" || fail "$0: cannot find a C compiler"

Expand Down
3 changes: 3 additions & 0 deletions include/zsv/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@
* - zsv_delete(): dispose the parser
******************************************************************************/

ZSV_EXPORT
int zsv_peek(zsv_parser);

/**
* Create a zsv parser. Typically, passed options will at least include a
* a `row_handler()` callback. Many, but not all, options can be subsequently
Expand Down
2 changes: 1 addition & 1 deletion include/zsv/utils/dirs.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ struct zsv_foreach_dirent_handle {

unsigned char verbose:1;
unsigned char is_dir:1; /* non-zero if this entry is a directory */
unsigned char no_recurse:1; /* set to 1 when handling a dir to prevent recursing into it */
unsigned char no_recurse:1; /* set to 1 when handling a dir to prevent recursing into it */
unsigned char _:5;
};

Expand Down
1 change: 1 addition & 0 deletions scripts/ci-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ if [ "$RUN_TESTS" = true ]; then
echo "[INF] Tests completed successfully!"

echo "[INF] Configuring example extension and running example extension tests"
echo "[INF] (cd app/ext_example && $MAKE CONFIGFILE=../../config.mk test)"
(cd app/ext_example && "$MAKE" CONFIGFILE=../../config.mk test)
echo "[INF] Tests completed successfully!"
fi
Expand Down
12 changes: 10 additions & 2 deletions src/zsv.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,11 @@ inline static size_t scanner_pre_parse(struct zsv_scanner *scanner) {
scanner->old_bytes_read = 0;
}

scanner->cum_scanned_length += scanner->scanned_length;
scanner->cum_scanned_length += scanner->scanned_length - scanner->partial_row_length;

size_t capacity = scanner->buff.size - scanner->partial_row_length;
if(VERY_UNLIKELY(capacity == 0)) { // our row size was too small to fit a single row of data
if(VERY_UNLIKELY(capacity == 0)) {
// our row size was too small to fit a single row of data
fprintf(stderr, "Warning: row %zu truncated\n", scanner->data_row_count);
if(scanner->mode == ZSV_MODE_FIXED) {
if(VERY_UNLIKELY(row_fx(scanner, scanner->buff.buff, 0, scanner->buff.size)))
Expand Down Expand Up @@ -310,6 +311,13 @@ ZSV_EXPORT enum zsv_status zsv_set_fixed_offsets(zsv_parser parser, size_t count
return zsv_status_ok;
}

ZSV_EXPORT
int zsv_peek(zsv_parser z) {
if(z->scanned_length + 1 < z->buff.size)
return z->buff.buff[z->scanned_length+1];
return -1;
}

/**
* Create a zsv parser
* @param opts
Expand Down
Loading