From 685acdcd2e5e5b73e7ef0f3244b6970f516c3283 Mon Sep 17 00:00:00 2001 From: Alex Argatoff Date: Mon, 3 Nov 2025 10:26:14 -0800 Subject: [PATCH 01/17] fixed tests, breaking methods, added macros for print debugging --- .../desktopFileInterface.c | 2 +- .../desktopFileInterface.h | 2 +- src/query-interface/activeRules.c | 12 +++++ src/query-interface/sort/flash_minsort.c | 2 + .../sort/flash_minsort_sublist.c | 2 + src/query-interface/sort/sortWrapper.c | 50 +++++++++++++++---- src/query-interface/sort/sortWrapper.h | 8 +++ test/test_sort/test_sort_query_interface.cpp | 30 ++--------- 8 files changed, 69 insertions(+), 39 deletions(-) diff --git a/lib/Desktop-File-Interface/desktopFileInterface.c b/lib/Desktop-File-Interface/desktopFileInterface.c index 86ffefb3..7fb87b2f 100644 --- a/lib/Desktop-File-Interface/desktopFileInterface.c +++ b/lib/Desktop-File-Interface/desktopFileInterface.c @@ -5,7 +5,7 @@ typedef struct { FILE *file; } FILE_INFO; -void *setupFile(char *filename) { +void *setupFile(const char *filename) { FILE_INFO *fileInfo = malloc(sizeof(FILE_INFO)); int nameLen = strlen(filename); fileInfo->filename = calloc(1, nameLen + 1); diff --git a/lib/Desktop-File-Interface/desktopFileInterface.h b/lib/Desktop-File-Interface/desktopFileInterface.h index 25b20b9a..1baf1467 100644 --- a/lib/Desktop-File-Interface/desktopFileInterface.h +++ b/lib/Desktop-File-Interface/desktopFileInterface.h @@ -17,7 +17,7 @@ extern "C" { /* File functions */ embedDBFileInterface *getFileInterface(); embedDBFileInterface *getMockEraseFileInterface(); -void *setupFile(char *filename); +void *setupFile(const char *filename); void tearDownFile(void *file); #ifdef __cplusplus diff --git a/src/query-interface/activeRules.c b/src/query-interface/activeRules.c index e7c63064..8fb1ba16 100644 --- a/src/query-interface/activeRules.c +++ b/src/query-interface/activeRules.c @@ -71,7 +71,9 @@ void executeRules(embedDBState* state, void* key, void* data) { handleCustomQuery(state, state->rules[i], key, data); break; default: +#ifdef PRINT_ERRORS printf("ERROR: Unsupported rule type\n"); +#endif } } } @@ -150,7 +152,9 @@ embedDBOperator* createOperator(embedDBState* state, activeRule* rule, void*** a it->minKey = minKeyPtr; } } else { +#ifdef PRINT_ERRORS printf("ERROR: Unsupported key size\n"); +#endif return NULL; } @@ -174,7 +178,9 @@ embedDBOperator* createOperator(embedDBState* state, activeRule* rule, void*** a aggFunc = createMinAggregate(rule->colNum, rule->schema->columnSizes[rule->colNum]); break; default: +#ifdef PRINT_ERRORS printf("ERROR: Unsupported rule type\n"); +#endif } embedDBAggregateFunc* aggFuncs = (embedDBAggregateFunc*)malloc(1 * sizeof(embedDBAggregateFunc)); @@ -218,7 +224,9 @@ void executeComparison(activeRule* rule, void* aggregateValue, Comparator compar if (comparisonResult != 0) rule->callback(aggregateValue, data, rule->context); break; default: +#ifdef PRINT_ERRORS printf("ERROR: Unsupported operation\n"); +#endif } } @@ -237,7 +245,9 @@ void handleGetMinMax(embedDBState* state, activeRule* rule, void* key, void* dat int64_t minmax = GetMinMax64(state, rule, key); executeComparison(rule, &minmax, int64Comparator, data); } else { +#ifdef PRINT_ERRORS printf("ERROR: Unsupported column size\n"); +#endif } } @@ -257,6 +267,8 @@ void handleCustomQuery(embedDBState* state, activeRule* rule, void* key, void* d executeComparison(rule, result, doubleComparator, data); break; default: +#ifdef PRINT_ERRORS printf("ERROR: Unsupported return type\n"); +#endif } } diff --git a/src/query-interface/sort/flash_minsort.c b/src/query-interface/sort/flash_minsort.c index ac1435a2..65e05156 100644 --- a/src/query-interface/sort/flash_minsort.c +++ b/src/query-interface/sort/flash_minsort.c @@ -70,7 +70,9 @@ void readPageMinSort(MinSortState *ms, int pageNum, external_sort_t *es, metrics // Read page into the buffer if (0 == is->fileInterface->read(ms->buffer, pageNum, es->page_size, fp)) { +#ifdef DEBUG printf("MINSORT: Failed to read block.\n"); +#endif } metric->num_reads++; diff --git a/src/query-interface/sort/flash_minsort_sublist.c b/src/query-interface/sort/flash_minsort_sublist.c index d2ce3722..c7cf54e2 100644 --- a/src/query-interface/sort/flash_minsort_sublist.c +++ b/src/query-interface/sort/flash_minsort_sublist.c @@ -58,7 +58,9 @@ void readPage_sublist(MinSortStateSublist *ms, int pageNum, external_sort_t *es, // Read page into the buffer if (0 == is->fileInterface->read(ms->buffer, pageNum, es->page_size, fp)) { +#ifdef DEBUG printf("MINSORT SUBLIST: Failed to read block.\n"); +#endif } metric->num_reads++; diff --git a/src/query-interface/sort/sortWrapper.c b/src/query-interface/sort/sortWrapper.c index 576c0120..8df58662 100644 --- a/src/query-interface/sort/sortWrapper.c +++ b/src/query-interface/sort/sortWrapper.c @@ -7,31 +7,29 @@ // External declaration for setupFile function extern void *setupFile(const char *filename); -// Forward declaration for pure in-memory sort (no file I/O) -file_iterator_state_t *startPureMemorySort(sortData *data, embedDBOperator *op); - -/** - * @brief Pure in-memory sort that avoids file I/O completely for very small datasets - * @param data Sort configuration data - * @param op The operator to read data from - * @return file_iterator_state_t* Iterator for reading sorted results from memory - */ file_iterator_state_t *startPureMemorySort(sortData *data, embedDBOperator *op) { +#ifdef DEBUG printf("DEBUG: Starting pure in-memory sort\n"); - +#endif int record_count = 0; while (exec(op->input)) { record_count++; if (record_count > 10) { // Safety limit +#ifdef PRINT_ERRORS printf("ERROR: Too many records for pure in-memory sort\n"); +#endif return NULL; } } +#ifdef DEBUG printf("DEBUG: Found %d records for pure in-memory sort\n", record_count); +#endif if (record_count == 0) { +#ifdef DEBUG printf("DEBUG: No records to sort\n"); +#endif file_iterator_state_t *iteratorState = malloc(sizeof(file_iterator_state_t)); if (iteratorState == NULL) { return NULL; @@ -49,7 +47,9 @@ file_iterator_state_t *startPureMemorySort(sortData *data, embedDBOperator *op) void *buffer = malloc(record_count * data->recordSize); if (buffer == NULL) { +#ifdef PRINT_ERRORS printf("ERROR: Failed to allocate memory for pure in-memory sort\n"); +#endif return NULL; } @@ -64,23 +64,31 @@ file_iterator_state_t *startPureMemorySort(sortData *data, embedDBOperator *op) records_read++; } +#ifdef DEBUG printf("DEBUG: Read %d records into memory buffer\n", records_read); +#endif // Sort the records in memory using quicksort metrics_t metrics = {0}; int sort_result = in_memory_quick_sort(buffer, records_read, data->recordSize, data->keyOffset, data->compareFn, &metrics); if (sort_result != 0) { +#ifdef PRINT_ERRORS printf("ERROR: In-memory sort failed\n"); +#endif free(buffer); return NULL; } +#ifdef DEBUG printf("DEBUG: Pure in-memory sort completed successfully\n"); +#endif file_iterator_state_t *iteratorState = malloc(sizeof(file_iterator_state_t)); if (iteratorState == NULL) { +#ifdef PRINT_ERRORS printf("ERROR: Failed to allocate iterator state\n"); +#endif free(buffer); return NULL; } @@ -115,7 +123,9 @@ int8_t writePageWithHeader(void *buffer, const uint32_t blockIndex, const uint32 fileInterface->write(buffer, blockIndex, pageSize, file); if (fileInterface->error(file)) { +#ifdef PRINT_ERRORS printf("ERROR: SORT: Failed to write unsorted data"); +#endif return 1; } @@ -142,8 +152,10 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { void *buffer = malloc(PAGE_SIZE); if (buffer == NULL) { +#ifdef PRINT_ERRORS printf("ERROR: SORT: buffer malloc failed"); - return 0; +#endif + return 1; } // Write row data to file @@ -163,7 +175,9 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { uint32_t rowOffset = count % valuesPerPage * data->recordSize + BLOCK_HEADER_SIZE; if (rowOffset + data->recordSize > PAGE_SIZE) { +#ifdef PRINT_ERRORS printf("ERROR: SORT: error calculating row offset"); +#endif free(buffer); buffer = NULL; return 0; @@ -217,7 +231,9 @@ void prepareSort(embedDBOperator *op) { // For Arduino Due, use pure in-memory sort to completely avoid SD card I/O issues data->fileIterator = startPureMemorySort(data, op); if (data->fileIterator == NULL) { +#ifdef PRINT_ERRORS printf("ERROR: Pure memory sort failed\n"); +#endif return; } return; @@ -250,7 +266,9 @@ void prepareSort(embedDBOperator *op) { // Start sorting file_iterator_state_t *iteratorState = startSort(data, unsortedFile, sortedFile); if (iteratorState == NULL) { +#ifdef PRINT_ERRORS printf("ERROR: Sort failed"); +#endif return; } @@ -328,16 +346,24 @@ file_iterator_state_t *startSort(sortData *data, void *unsortedFile, void *sorte int err; // Use simpler sort for Arduino with small datasets #ifdef ARDUINO +#ifdef DEBUG printf("DEBUG: Starting Arduino sort with %d records\n", data->count); +#endif if (data->count <= 100) { // Use flash_minsort for all datasets on Arduino (more memory efficient) +#ifdef DEBUG printf("DEBUG: Using flash_minsort for small dataset\n"); +#endif err = flash_minsort(iteratorState, tuple_buffer, sortedFile, buffer, buffer_max_pages * es.page_size, &es, &result_file_ptr, &metrics, data->compareFn); } else { +#ifdef DEBUG printf("DEBUG: Using flash_minsort for large dataset\n"); +#endif // Use flash_minsort for larger datasets (more memory efficient than adaptive_sort) err = flash_minsort(iteratorState, tuple_buffer, sortedFile, buffer, buffer_max_pages * es.page_size, &es, &result_file_ptr, &metrics, data->compareFn); } +#ifdef DEBUG printf("DEBUG: Arduino sort completed with error code: %d\n", err); +#endif #else // Use adaptive sort on desktop int8_t runGenOnly = false; // Run full sort operation @@ -404,7 +430,9 @@ uint8_t readNextRecord(void *data, void *buffer) { iteratorState->fileInterface->readRel(((sortData *)data)->readBuffer, PAGE_SIZE, 1, iteratorState->file); if (((sortData *)data)->fileInterface->error(iteratorState->file)) { +#ifdef PRINT_ERRORS printf("ERROR: SORT: next record read failed"); +#endif return 2; } } diff --git a/src/query-interface/sort/sortWrapper.h b/src/query-interface/sort/sortWrapper.h index 1dcdf9ac..ddfb5455 100644 --- a/src/query-interface/sort/sortWrapper.h +++ b/src/query-interface/sort/sortWrapper.h @@ -55,6 +55,14 @@ metrics_t initMetric(); */ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile); +/** + * @brief Pure in-memory sort that avoids file I/O completely for very small datasets + * @param data Sort configuration data + * @param op The operator to read data from + * @return file_iterator_state_t* Iterator for reading sorted results from memory + */ +file_iterator_state_t *startPureMemorySort(sortData *data, embedDBOperator *op); + /** * @brief The data given in the unsortedFile is sorted and stored in the sortedFile * diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index 85c5a509..181b8000 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -6,31 +6,11 @@ #endif -#ifdef ARDUINO -// For Arduino, setupFile is not used since we use pure memory sort -// But we need to define it for linking compatibility -#ifdef __cplusplus -extern "C" { -#endif - -void* setupSDFile(char* filename); - -void* setupFile(const char* filename) { - return setupSDFile((char*)filename); -} - -#ifdef __cplusplus -} -#endif -#endif - #define STORAGE_TYPE 0 -#ifdef ARDUINO -// pio test --environment due --filter "test_sort" - #if defined(MEMBOARD) && STORAGE_TYPE == 1 #include "dataflashFileInterface.h" +#include "memboardTestSetup.h" #endif #if defined(MEGA) @@ -41,26 +21,24 @@ void* setupFile(const char* filename) { #include "dueTestSetup.h" #endif +#ifdef ARDUINO #include "SDFileInterface.h" -#define FILE_TYPE SD_FILE #define getFileInterface getSDInterface +#define setupFile setupSDFile #define tearDownFile tearDownSDFile - +#define DATA_FILE_PATH "dataFile.bin" #define clock millis #define DATA_FILE_PATH_UWA "dataFileUWA.bin" #define INDEX_FILE_PATH_UWA "indexFileUWA.bin" #define DATA_FILE_PATH_SEA "dataFileSEA.bin" #define INDEX_FILE_PATH_SEA "indexFileSEA.bin" - #else - #define FILE_TYPE FILE #include "desktopFileInterface.h" #define DATA_FILE_PATH_UWA "build/artifacts/dataFileUWA.bin" #define INDEX_FILE_PATH_UWA "build/artifacts/indexFileUWA.bin" #define DATA_FILE_PATH_SEA "build/artifacts/dataFileSEA.bin" #define INDEX_FILE_PATH_SEA "build/artifacts/indexFileSEA.bin" - #endif #include "unity.h" From 0d055c3afb07c7c307f6cab397db808c2695eff5 Mon Sep 17 00:00:00 2001 From: xelArga Date: Mon, 3 Nov 2025 16:39:19 -0800 Subject: [PATCH 02/17] added pointer for the file interface to setup files --- .../desktopFileInterface.c | 4 +++ lib/SD-File-Interface/SDFileInterface.c | 2 ++ src/embedDB/embedDB.h | 5 +++- src/query-interface/sort/sortWrapper.c | 25 +++++++------------ src/query-interface/sort/sortWrapper.h | 7 ------ 5 files changed, 19 insertions(+), 24 deletions(-) diff --git a/lib/Desktop-File-Interface/desktopFileInterface.c b/lib/Desktop-File-Interface/desktopFileInterface.c index 7fb87b2f..b11ada0d 100644 --- a/lib/Desktop-File-Interface/desktopFileInterface.c +++ b/lib/Desktop-File-Interface/desktopFileInterface.c @@ -135,6 +135,8 @@ embedDBFileInterface *getFileInterface() { fileInterface->writeRel = FILE_WRITE_REL; fileInterface->seek = FILE_SEEK; fileInterface->tell = FILE_TELL; + fileInterface->setup = setupFile; + fileInterface->teardown = tearDownFile; return fileInterface; } @@ -152,5 +154,7 @@ embedDBFileInterface *getMockEraseFileInterface() { fileInterface->writeRel = FILE_WRITE_REL; fileInterface->seek = FILE_SEEK; fileInterface->tell = FILE_TELL; + fileInterface->setup = setupFile; + fileInterface->teardown = tearDownFile; return fileInterface; } diff --git a/lib/SD-File-Interface/SDFileInterface.c b/lib/SD-File-Interface/SDFileInterface.c index 58da4602..888d7468 100644 --- a/lib/SD-File-Interface/SDFileInterface.c +++ b/lib/SD-File-Interface/SDFileInterface.c @@ -134,5 +134,7 @@ embedDBFileInterface *getSDInterface() { fileInterface->erase = FILE_ERASE; fileInterface->open = FILE_OPEN; fileInterface->flush = FILE_FLUSH; + fileInterface->setup = setupSDFile; + fileInterface->teardown = tearDownSDFile; return fileInterface; } diff --git a/src/embedDB/embedDB.h b/src/embedDB/embedDB.h index 68964504..1fdbec1b 100644 --- a/src/embedDB/embedDB.h +++ b/src/embedDB/embedDB.h @@ -230,7 +230,10 @@ typedef struct { /** * */ - int32_t (*tell)(void *file); + int32_t(*tell)(void* file); + + void* (*setup)(const char* filename); + void (*teardown)(void* file); } embedDBFileInterface; diff --git a/src/query-interface/sort/sortWrapper.c b/src/query-interface/sort/sortWrapper.c index 8df58662..ef13183d 100644 --- a/src/query-interface/sort/sortWrapper.c +++ b/src/query-interface/sort/sortWrapper.c @@ -1,12 +1,10 @@ #include "sortWrapper.h" #include "query-interface/sort/in_memory_sort.h" +#include "unistd.h" #define PRINT_METRIC -// External declaration for setupFile function -extern void *setupFile(const char *filename); - file_iterator_state_t *startPureMemorySort(sortData *data, embedDBOperator *op) { #ifdef DEBUG printf("DEBUG: Starting pure in-memory sort\n"); @@ -227,21 +225,16 @@ void prepareSort(embedDBOperator *op) { data->keySize = -1 * data->keySize; } -#ifdef ARDUINO - // For Arduino Due, use pure in-memory sort to completely avoid SD card I/O issues - data->fileIterator = startPureMemorySort(data, op); - if (data->fileIterator == NULL) { -#ifdef PRINT_ERRORS - printf("ERROR: Pure memory sort failed\n"); -#endif - return; - } - return; -#endif + char tmp1[] = "/tmp/embedsort_unsortedXXXXXX"; + char tmp2[] = "/tmp/embedsort_sortedXXXXXX"; + int fd1 = mkstemp(tmp1); + int fd2 = mkstemp(tmp2); + if (fd1 >= 0) close(fd1); + if (fd2 >= 0) close(fd2); // Set up files - void *unsortedFile = setupFile(SORT_DATA_LOCATION); - void *sortedFile = setupFile(SORT_ORDER_LOCATION); + void* unsortedFile = data->fileInterface->setup(tmp1); + void* sortedFile = data->fileInterface->setup(tmp2); if (unsortedFile == NULL || sortedFile == NULL) { #ifdef PRINT_ERRORS diff --git a/src/query-interface/sort/sortWrapper.h b/src/query-interface/sort/sortWrapper.h index ddfb5455..4096f86c 100644 --- a/src/query-interface/sort/sortWrapper.h +++ b/src/query-interface/sort/sortWrapper.h @@ -11,13 +11,6 @@ #include "flash_minsort.h" #include "in_memory_sort.h" -#if defined(DESKTOP) -#include -#endif - -#define SORT_DATA_LOCATION "sort_data.bin" -#define SORT_ORDER_LOCATION "sort_order.bin" - typedef struct embedDBOperator embedDBOperator; typedef struct sortData { From 7ae8a0391d9a30b0dbd0d18cc9963923a7d0528c Mon Sep 17 00:00:00 2001 From: Alex Argatoff Date: Mon, 17 Nov 2025 10:35:53 -0800 Subject: [PATCH 03/17] fixed null check for sort wrapper --- src/query-interface/sort/sortWrapper.c | 17 ++++++++++++----- src/query-interface/sort/sortWrapper.h | 2 +- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/query-interface/sort/sortWrapper.c b/src/query-interface/sort/sortWrapper.c index ef13183d..12347c76 100644 --- a/src/query-interface/sort/sortWrapper.c +++ b/src/query-interface/sort/sortWrapper.c @@ -232,13 +232,20 @@ void prepareSort(embedDBOperator *op) { if (fd1 >= 0) close(fd1); if (fd2 >= 0) close(fd2); - // Set up files - void* unsortedFile = data->fileInterface->setup(tmp1); - void* sortedFile = data->fileInterface->setup(tmp2); + // Set up files using the configured file interface factory + if (data->fileInterface == NULL || data->fileInterface->setup == NULL) { +#ifdef PRINT_ERRORS + printf("ERROR: File interface or setup function not provided while initializing ORDER BY operator\n"); +#endif + return; + } + + void *unsortedFile = data->fileInterface->setup(tmp1); + void *sortedFile = data->fileInterface->setup(tmp2); if (unsortedFile == NULL || sortedFile == NULL) { #ifdef PRINT_ERRORS - printf("ERROR: Failed to open files while initializing ORDER BY operator"); + printf("ERROR: Failed to allocate file handles while initializing ORDER BY operator\n"); #endif return; } @@ -322,7 +329,7 @@ file_iterator_state_t *startSort(sortData *data, void *unsortedFile, void *sorte iteratorState->file = unsortedFile; iteratorState->recordsRead = 0; - iteratorState->totalRecords = data->count; // Total records from the previous while loop + iteratorState->totalRecords = data->count; iteratorState->recordSize = es.record_size; iteratorState->fileInterface = data->fileInterface; iteratorState->currentRecord = 0; diff --git a/src/query-interface/sort/sortWrapper.h b/src/query-interface/sort/sortWrapper.h index 4096f86c..533052f7 100644 --- a/src/query-interface/sort/sortWrapper.h +++ b/src/query-interface/sort/sortWrapper.h @@ -28,7 +28,7 @@ typedef struct sortData { } sortData; /** - * @brief Initalizes default metric values + * @brief Initializes default metric values * * @return metrics_t */ From a9eeea1802810d0719c52f41e1341f4a5656b6b2 Mon Sep 17 00:00:00 2001 From: Alex Argatoff Date: Wed, 26 Nov 2025 11:03:31 -0800 Subject: [PATCH 04/17] modifications to sort and tmp file handling --- .../desktopFileInterface.c | 35 ++ lib/SD-File-Interface/SDFileInterface.c | 7 + platformio.ini | 3 +- src/embedDB/embedDB.h | 10 + src/query-interface/sort/sortWrapper.c | 307 +++++++++--------- test/test_sort/test_sort_query_interface.cpp | 65 +--- 6 files changed, 224 insertions(+), 203 deletions(-) diff --git a/lib/Desktop-File-Interface/desktopFileInterface.c b/lib/Desktop-File-Interface/desktopFileInterface.c index b11ada0d..510daf84 100644 --- a/lib/Desktop-File-Interface/desktopFileInterface.c +++ b/lib/Desktop-File-Interface/desktopFileInterface.c @@ -1,5 +1,7 @@ #include "desktopFileInterface.h" +static char tempPathBuffer[256]; + typedef struct { char *filename; FILE *file; @@ -121,6 +123,37 @@ int32_t FILE_TELL(void *file) { return ftell(fileInfo->file); } +char *tempFilePath(void) { + static char tempPathBuffer[256]; + +#if defined(_WIN32) || defined(_WIN64) + char *path = _tempnam(NULL, "embeddb_"); + if (path != NULL) { + strncpy(tempPathBuffer, path, sizeof(tempPathBuffer) - 1); + tempPathBuffer[sizeof(tempPathBuffer) - 1] = '\0'; + free(path); + return tempPathBuffer; + } + + /* Fallback */ + snprintf(tempPathBuffer, sizeof(tempPathBuffer), + "embeddb_%lu.tmp", (unsigned long)rand()); + return tempPathBuffer; + +#else + /* POSIX systems */ + snprintf(tempPathBuffer, sizeof(tempPathBuffer), + "/tmp/embeddb_%luXXXXXX", (unsigned long)rand()); + + int fd = mkstemp(tempPathBuffer); + if (fd >= 0) { + close(fd); + } + + return tempPathBuffer; +#endif +} + embedDBFileInterface *getFileInterface() { embedDBFileInterface *fileInterface = malloc(sizeof(embedDBFileInterface)); fileInterface->close = FILE_CLOSE; @@ -137,6 +170,7 @@ embedDBFileInterface *getFileInterface() { fileInterface->tell = FILE_TELL; fileInterface->setup = setupFile; fileInterface->teardown = tearDownFile; + fileInterface->tempFilePath = tempFilePath; return fileInterface; } @@ -156,5 +190,6 @@ embedDBFileInterface *getMockEraseFileInterface() { fileInterface->tell = FILE_TELL; fileInterface->setup = setupFile; fileInterface->teardown = tearDownFile; + fileInterface->tempFilePath = tempFilePath; return fileInterface; } diff --git a/lib/SD-File-Interface/SDFileInterface.c b/lib/SD-File-Interface/SDFileInterface.c index 888d7468..8adae28d 100644 --- a/lib/SD-File-Interface/SDFileInterface.c +++ b/lib/SD-File-Interface/SDFileInterface.c @@ -136,5 +136,12 @@ embedDBFileInterface *getSDInterface() { fileInterface->flush = FILE_FLUSH; fileInterface->setup = setupSDFile; fileInterface->teardown = tearDownSDFile; + fileInterface->tempFilePath = sdfat_tempFilePath; return fileInterface; } + +char* sdfat_tempFilePath(void) { + static char tempPathBuffer[32]; + snprintf(tempPathBuffer, sizeof(tempPathBuffer), "TMP%lu.DAT", random()); + return tempPathBuffer; +} diff --git a/platformio.ini b/platformio.ini index e685579d..c810d597 100644 --- a/platformio.ini +++ b/platformio.ini @@ -20,9 +20,10 @@ build_src_filter = lib_ignore = Dataflash, Dataflash-File-Interface, Dataflash-Wrapper, Distribution, Due, Mega, Memboard, SD-File-Interface, SD-Test, SD-Wrapper, SdFat, Serial-Wrapper, Unity-Desktop build_flags = -lm - -DPRINT_ERRORS + -DPRINT_ERRORS extra_scripts = pre:scripts/create_build_folder.py lib_deps = +build_type = debug [env:desktop-dist] diff --git a/src/embedDB/embedDB.h b/src/embedDB/embedDB.h index 1fdbec1b..328fb9ed 100644 --- a/src/embedDB/embedDB.h +++ b/src/embedDB/embedDB.h @@ -232,8 +232,18 @@ typedef struct { */ int32_t(*tell)(void* file); + /** + * @brief Pointer to external function for file setup + */ void* (*setup)(const char* filename); + /** + * @brief Pointer to external function for file teardown + */ void (*teardown)(void* file); + /** + * @brief Pointer to platform specific tmp file path + */ + char* (*tempFilePath)(void); } embedDBFileInterface; diff --git a/src/query-interface/sort/sortWrapper.c b/src/query-interface/sort/sortWrapper.c index 12347c76..6344d214 100644 --- a/src/query-interface/sort/sortWrapper.c +++ b/src/query-interface/sort/sortWrapper.c @@ -5,6 +5,12 @@ #define PRINT_METRIC +/** + * @brief Pure in-memory sort that avoids file I/O completely for very small datasets + * @param data Sort configuration data + * @param op The operator to read data from + * @return file_iterator_state_t* Iterator for reading sorted results from memory + */ file_iterator_state_t *startPureMemorySort(sortData *data, embedDBOperator *op) { #ifdef DEBUG printf("DEBUG: Starting pure in-memory sort\n"); @@ -225,14 +231,13 @@ void prepareSort(embedDBOperator *op) { data->keySize = -1 * data->keySize; } - char tmp1[] = "/tmp/embedsort_unsortedXXXXXX"; - char tmp2[] = "/tmp/embedsort_sortedXXXXXX"; - int fd1 = mkstemp(tmp1); - int fd2 = mkstemp(tmp2); - if (fd1 >= 0) close(fd1); - if (fd2 >= 0) close(fd2); +#ifdef ARDUINO + data->fileIterator = startPureMemorySort(data, op); + if (data->fileIterator == NULL) { + printf("ERROR: Pure memory sort failed\n"); + return; +#else - // Set up files using the configured file interface factory if (data->fileInterface == NULL || data->fileInterface->setup == NULL) { #ifdef PRINT_ERRORS printf("ERROR: File interface or setup function not provided while initializing ORDER BY operator\n"); @@ -240,6 +245,9 @@ void prepareSort(embedDBOperator *op) { return; } + const char *tmp1 = data->fileInterface->tempFilePath(); + const char *tmp2 = data->fileInterface->tempFilePath(); + void *unsortedFile = data->fileInterface->setup(tmp1); void *sortedFile = data->fileInterface->setup(tmp2); @@ -276,93 +284,94 @@ void prepareSort(embedDBOperator *op) { iteratorState->file = sortedFile; data->fileInterface->close(unsortedFile); data->fileIterator = iteratorState; -} +#endif + } -/** - * @brief The data given in the unsortedFile is sorted and stored in the sortedFile - * - * @param fileInterface The file interface - * @param unsortedFile The file that is loaded with row data - * @param sortedFile An empty file - * @param recordSize The size of the records - * @param count The total number of records stored in unsortedFile - * @return file_iterator_state_t* An iterator that is used to retrieve the sorted records - */ -file_iterator_state_t *startSort(sortData *data, void *unsortedFile, void *sortedFile) { - // Initialize external_sort_t structure - external_sort_t es; - es.key_size = data->keySize; - es.value_size = data->recordSize; - es.record_size = data->recordSize; - es.key_offset = data->keyOffset; - es.headerSize = BLOCK_HEADER_SIZE; - es.page_size = PAGE_SIZE; - es.num_pages = (uint32_t)ceil((float)data->count / ((es.page_size - es.headerSize) / es.record_size)); + /** + * @brief The data given in the unsortedFile is sorted and stored in the sortedFile + * + * @param fileInterface The file interface + * @param unsortedFile The file that is loaded with row data + * @param sortedFile An empty file + * @param recordSize The size of the records + * @param count The total number of records stored in unsortedFile + * @return file_iterator_state_t* An iterator that is used to retrieve the sorted records + */ + file_iterator_state_t *startSort(sortData * data, void *unsortedFile, void *sortedFile) { + // Initialize external_sort_t structure + external_sort_t es; + es.key_size = data->keySize; + es.value_size = data->recordSize; + es.record_size = data->recordSize; + es.key_offset = data->keyOffset; + es.headerSize = BLOCK_HEADER_SIZE; + es.page_size = PAGE_SIZE; + es.num_pages = (uint32_t)ceil((float)data->count / ((es.page_size - es.headerSize) / es.record_size)); // Reduce buffer size for Arduino #ifdef ARDUINO - const int buffer_max_pages = 1; // Reduced to minimum for Arduino + const int buffer_max_pages = 1; // Reduced to minimum for Arduino #else const int buffer_max_pages = 4; #endif - char *buffer = malloc(buffer_max_pages * es.page_size + es.record_size); - char *tuple_buffer = buffer + es.page_size * buffer_max_pages; + char *buffer = malloc(buffer_max_pages * es.page_size + es.record_size); + char *tuple_buffer = buffer + es.page_size * buffer_max_pages; - if (buffer == NULL) { + if (buffer == NULL) { #ifdef PRINT_ERRORS - printf("ERROR: SORT: buffer malloc failed m\n"); + printf("ERROR: SORT: buffer malloc failed m\n"); #endif - return NULL; - } + return NULL; + } - // Prepare the file iterator data for sorting - file_iterator_state_t *iteratorState = malloc(sizeof(file_iterator_state_t)); - if (iteratorState == NULL) { + // Prepare the file iterator data for sorting + file_iterator_state_t *iteratorState = malloc(sizeof(file_iterator_state_t)); + if (iteratorState == NULL) { #ifdef PRINT_ERRORS - printf("Error: SORT: iterator malloc failed\n"); + printf("Error: SORT: iterator malloc failed\n"); #endif - free(buffer); - buffer = NULL; - return NULL; - } + free(buffer); + buffer = NULL; + return NULL; + } - iteratorState->file = unsortedFile; - iteratorState->recordsRead = 0; - iteratorState->totalRecords = data->count; - iteratorState->recordSize = es.record_size; - iteratorState->fileInterface = data->fileInterface; - iteratorState->currentRecord = 0; - iteratorState->recordsLeftInBlock = 0; - iteratorState->resultFile = 0; + iteratorState->file = unsortedFile; + iteratorState->recordsRead = 0; + iteratorState->totalRecords = data->count; + iteratorState->recordSize = es.record_size; + iteratorState->fileInterface = data->fileInterface; + iteratorState->currentRecord = 0; + iteratorState->recordsLeftInBlock = 0; + iteratorState->resultFile = 0; - data->fileIterator = iteratorState; + data->fileIterator = iteratorState; - // Metrics - metrics_t metrics = initMetric(); + // Metrics + metrics_t metrics = initMetric(); - long result_file_ptr = 0; + long result_file_ptr = 0; - int err; + int err; // Use simpler sort for Arduino with small datasets #ifdef ARDUINO #ifdef DEBUG - printf("DEBUG: Starting Arduino sort with %d records\n", data->count); + printf("DEBUG: Starting Arduino sort with %d records\n", data->count); #endif - if (data->count <= 100) { // Use flash_minsort for all datasets on Arduino (more memory efficient) + if (data->count <= 100) { // Use flash_minsort for all datasets on Arduino (more memory efficient) #ifdef DEBUG - printf("DEBUG: Using flash_minsort for small dataset\n"); + printf("DEBUG: Using flash_minsort for small dataset\n"); #endif - err = flash_minsort(iteratorState, tuple_buffer, sortedFile, buffer, buffer_max_pages * es.page_size, &es, &result_file_ptr, &metrics, data->compareFn); - } else { + err = flash_minsort(iteratorState, tuple_buffer, sortedFile, buffer, buffer_max_pages * es.page_size, &es, &result_file_ptr, &metrics, data->compareFn); + } else { #ifdef DEBUG - printf("DEBUG: Using flash_minsort for large dataset\n"); + printf("DEBUG: Using flash_minsort for large dataset\n"); #endif - // Use flash_minsort for larger datasets (more memory efficient than adaptive_sort) - err = flash_minsort(iteratorState, tuple_buffer, sortedFile, buffer, buffer_max_pages * es.page_size, &es, &result_file_ptr, &metrics, data->compareFn); - } + // Use flash_minsort for larger datasets (more memory efficient than adaptive_sort) + err = flash_minsort(iteratorState, tuple_buffer, sortedFile, buffer, buffer_max_pages * es.page_size, &es, &result_file_ptr, &metrics, data->compareFn); + } #ifdef DEBUG - printf("DEBUG: Arduino sort completed with error code: %d\n", err); + printf("DEBUG: Arduino sort completed with error code: %d\n", err); #endif #else // Use adaptive sort on desktop @@ -372,114 +381,114 @@ file_iterator_state_t *startSort(sortData *data, void *unsortedFile, void *sorte #endif #ifdef PRINT_METRIC - printf("\tComplete. Comparisons: %d Writes: %d Reads: %d Memcpys: %d\n", metrics.num_compar, metrics.num_writes, metrics.num_reads, metrics.num_memcpys); + printf("\tComplete. Comparisons: %d Writes: %d Reads: %d Memcpys: %d\n", metrics.num_compar, metrics.num_writes, metrics.num_reads, metrics.num_memcpys); #endif - iteratorState->resultFile = result_file_ptr; + iteratorState->resultFile = result_file_ptr; #ifdef PRINT_ERRORS - if (8 == err) { - printf("Out of memory!\n"); - } else if (10 == err) { - printf("File Read Error!\n"); - } else if (9 == err) { - printf("File Write Error!\n"); - } + if (8 == err) { + printf("Out of memory!\n"); + } else if (10 == err) { + printf("File Read Error!\n"); + } else if (9 == err) { + printf("File Write Error!\n"); + } #endif - // Reset file iterator - iteratorState->recordsRead = 0; - iteratorState->currentRecord = 0; - - // Clean up - free(buffer); - buffer = NULL; - return iteratorState; -} - -/** - * @brief Reads the next record from the sorted file - * - * @param data The ORDER BY operator data - * @param buffer A buffer that is the size of one record - * @return uint8_t 0: if read was successful. other wise none zero - */ -uint8_t readNextRecord(void *data, void *buffer) { - file_iterator_state_t *iteratorState = ((sortData *)data)->fileIterator; + // Reset file iterator + iteratorState->recordsRead = 0; + iteratorState->currentRecord = 0; - if (iteratorState->recordsRead >= iteratorState->totalRecords) { - return 1; // No more records left to read + // Clean up + free(buffer); + buffer = NULL; + return iteratorState; } + /** + * @brief Reads the next record from the sorted file + * + * @param data The ORDER BY operator data + * @param buffer A buffer that is the size of one record + * @return uint8_t 0: if read was successful. other wise none zero + */ + uint8_t readNextRecord(void *data, void *buffer) { + file_iterator_state_t *iteratorState = ((sortData *)data)->fileIterator; + + if (iteratorState->recordsRead >= iteratorState->totalRecords) { + return 1; // No more records left to read + } + #ifdef ARDUINO - // For pure memory sort on Arduino, read directly from memory buffer - if (iteratorState->file != NULL && iteratorState->resultFile == 0) { - memcpy(buffer, (char *)iteratorState->file + iteratorState->recordsRead * iteratorState->recordSize, - iteratorState->recordSize); - iteratorState->recordsRead++; - iteratorState->currentRecord++; - return 0; - } + // For pure memory sort on Arduino, read directly from memory buffer + if (iteratorState->file != NULL && iteratorState->resultFile == 0) { + memcpy(buffer, (char *)iteratorState->file + iteratorState->recordsRead * iteratorState->recordSize, + iteratorState->recordSize); + iteratorState->recordsRead++; + iteratorState->currentRecord++; + return 0; + } #endif - uint32_t recordPerPage = (PAGE_SIZE - BLOCK_HEADER_SIZE) / iteratorState->recordSize; + uint32_t recordPerPage = (PAGE_SIZE - BLOCK_HEADER_SIZE) / iteratorState->recordSize; - // Read next page if current buffer is empty - if (iteratorState->currentRecord % recordPerPage == 0 || iteratorState->recordsRead == 0) { - iteratorState->fileInterface->seek(iteratorState->currentRecord / recordPerPage * PAGE_SIZE + iteratorState->resultFile, iteratorState->file); - iteratorState->fileInterface->readRel(((sortData *)data)->readBuffer, PAGE_SIZE, 1, iteratorState->file); + // Read next page if current buffer is empty + if (iteratorState->currentRecord % recordPerPage == 0 || iteratorState->recordsRead == 0) { + iteratorState->fileInterface->seek(iteratorState->currentRecord / recordPerPage * PAGE_SIZE + iteratorState->resultFile, iteratorState->file); + iteratorState->fileInterface->readRel(((sortData *)data)->readBuffer, PAGE_SIZE, 1, iteratorState->file); - if (((sortData *)data)->fileInterface->error(iteratorState->file)) { + if (((sortData *)data)->fileInterface->error(iteratorState->file)) { #ifdef PRINT_ERRORS - printf("ERROR: SORT: next record read failed"); + printf("ERROR: SORT: next record read failed"); #endif - return 2; + return 2; + } } - } - // Copy result to ouput buffer - memcpy(buffer, ((sortData *)data)->readBuffer + BLOCK_HEADER_SIZE + iteratorState->recordSize * (iteratorState->currentRecord % recordPerPage), iteratorState->recordSize); - iteratorState->recordsRead++; - iteratorState->currentRecord++; + // Copy result to ouput buffer + memcpy(buffer, ((sortData *)data)->readBuffer + BLOCK_HEADER_SIZE + iteratorState->recordSize * (iteratorState->currentRecord % recordPerPage), iteratorState->recordSize); + iteratorState->recordsRead++; + iteratorState->currentRecord++; #ifdef DEBUG - printf("DEBUG: ROWDATA from file:\n"); - for (int i = 0; i < iteratorState->recordSize - SORT_KEY_SIZE; i++) { - printf("%2x ", ((uint8_t *)buffer)[i]); - } - printf("\n"); + printf("DEBUG: ROWDATA from file:\n"); + for (int i = 0; i < iteratorState->recordSize - SORT_KEY_SIZE; i++) { + printf("%2x ", ((uint8_t *)buffer)[i]); + } + printf("\n"); #endif - return 0; -} + return 0; + } -void closeSort(file_iterator_state_t *iteratorState) { + void closeSort(file_iterator_state_t * iteratorState) { #ifdef ARDUINO - // For pure memory sort, we need to free the memory buffer - if (iteratorState->file != NULL && iteratorState->resultFile == 0) { - free(iteratorState->file); - iteratorState->file = NULL; - return; - } + // For pure memory sort, we need to free the memory buffer + if (iteratorState->file != NULL && iteratorState->resultFile == 0) { + free(iteratorState->file); + iteratorState->file = NULL; + return; + } #endif - if (iteratorState->file != NULL) { - iteratorState->fileInterface->close(iteratorState->file); - iteratorState->file = NULL; + if (iteratorState->file != NULL) { + iteratorState->fileInterface->close(iteratorState->file); + iteratorState->file = NULL; + } } -} -/** - * @brief Initalizes default metric values - * - * @return metrics_t - */ -metrics_t initMetric() { - metrics_t metrics; - metrics.num_reads = 0; - metrics.num_compar = 0; - metrics.num_memcpys = 0; - metrics.num_runs = 0; - metrics.num_writes = 0; - return metrics; -} + /** + * @brief Initalizes default metric values + * + * @return metrics_t + */ + metrics_t initMetric() { + metrics_t metrics; + metrics.num_reads = 0; + metrics.num_compar = 0; + metrics.num_memcpys = 0; + metrics.num_runs = 0; + metrics.num_writes = 0; + return metrics; + } diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index 181b8000..ab6a3b0d 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -63,8 +63,12 @@ void setUp() { /* Setup files */ char dataPath[] = DATA_FILE_PATH_UWA, indexPath[] = INDEX_FILE_PATH_UWA; stateUWA->fileInterface = getFileInterface(); - stateUWA->dataFile = setupFile(dataPath); - stateUWA->indexFile = setupFile(indexPath); + + stateUWA->dataFile = stateUWA->fileInterface->setup(dataPath); + stateUWA->indexFile = stateUWA->fileInterface->setup(indexPath); +#ifdef ARDUINO + stateUWA->fileInterface->tempFilePath +#endif stateUWA->bufferSizeInBlocks = 4; stateUWA->buffer = malloc(stateUWA->bufferSizeInBlocks * stateUWA->pageSize); @@ -125,10 +129,11 @@ void insertNValues(embedDBState* state, int n, int mode) { } break; case 1: + key = 1; for (int i = n; i >= 0; i--) { - key = i; value = i; embedDBPut(state, &key, &value); + key++; } break; default: @@ -141,7 +146,7 @@ void runTestSequentialValues() { #ifdef ARDUINO insertNValues(stateUWA, 1, 0); #else - insertNValues(stateUWA, 10, 0); + insertNValues(stateUWA, 10, 1); #endif embedDBIterator it; @@ -163,8 +168,8 @@ void runTestSequentialValues() { int recordCount = 0; while (exec(orderByOp)) { - TEST_ASSERT_GREATER_OR_EQUAL_UINT32_MESSAGE(previous, ((uint32_t)recordBuffer[1]) / 10.0, "Sort value is not greater than or equal to previous value."); - previous = ((uint32_t)recordBuffer[1]) / 10.0; + TEST_ASSERT_GREATER_OR_EQUAL_UINT32_MESSAGE(previous, ((uint32_t)recordBuffer[1]), "Sort value is not greater than or equal to previous value."); + previous = ((uint32_t)recordBuffer[1]); recordCount++; // Safety break to prevent infinite loop @@ -178,45 +183,6 @@ void runTestSequentialValues() { } void runTestUsingUWA500k() { - printf("Advanced Query Example.\n"); - embedDBState* stateUWA = (embedDBState*)malloc(sizeof(embedDBState)); - stateUWA->keySize = 4; - stateUWA->dataSize = 12; - stateUWA->compareKey = int32Comparator; - stateUWA->compareData = int32Comparator; - stateUWA->pageSize = 512; - stateUWA->eraseSizeInPages = 4; - stateUWA->numDataPages = 20000; - stateUWA->numIndexPages = 1000; - stateUWA->numSplinePoints = 30; - - if (STORAGE_TYPE == 1) { - TEST_FAIL_MESSAGE("Dataflash is not currently supported. Defaulting to SD card interface."); - } - - /* Setup files */ - char dataPath[] = DATA_FILE_PATH_UWA, indexPath[] = INDEX_FILE_PATH_UWA; - stateUWA->fileInterface = getFileInterface(); - stateUWA->dataFile = setupFile(dataPath); - stateUWA->indexFile = setupFile(indexPath); - - stateUWA->bufferSizeInBlocks = 4; - stateUWA->buffer = malloc(stateUWA->bufferSizeInBlocks * stateUWA->pageSize); - stateUWA->parameters = EMBEDDB_USE_BMAP | EMBEDDB_USE_INDEX | EMBEDDB_RESET_DATA; - stateUWA->bitmapSize = 2; - stateUWA->inBitmap = inBitmapInt16; - stateUWA->updateBitmap = updateBitmapInt16; - stateUWA->buildBitmapFromRange = buildBitmapInt16FromRange; - int8_t initResult = embedDBInit(stateUWA, 1); - if (initResult != 0) { - TEST_FAIL_MESSAGE("There was an error setting up the state of the UWA dataset."); - } - - int8_t colSizes[] = {4, 4, 4, 4}; - int8_t colSignedness[] = {embedDB_COLUMN_UNSIGNED, embedDB_COLUMN_SIGNED, embedDB_COLUMN_SIGNED, embedDB_COLUMN_SIGNED}; - ColumnType colTypes[] = {embedDB_COLUMN_UINT32, embedDB_COLUMN_INT32, embedDB_COLUMN_INT32, embedDB_COLUMN_INT32}; - embedDBSchema* baseSchema = embedDBCreateSchema(4, colSizes, colSignedness, colTypes); - // Insert data const char datafileName[] = "data/uwa500K.bin"; insertData(stateUWA, datafileName); @@ -244,19 +210,12 @@ void runTestUsingUWA500k() { orderByOp->close(orderByOp); embedDBFreeOperatorRecursive(&orderByOp); - // Close embedDB - embedDBClose(stateUWA); - tearDownFile(stateUWA->dataFile); - tearDownFile(stateUWA->indexFile); - free(stateUWA->fileInterface); - free(stateUWA->buffer); - free(stateUWA); - embedDBFreeSchema(&baseSchema); } int runUnityTests() { UNITY_BEGIN(); RUN_TEST(runTestSequentialValues); + RUN_TEST(runTestUsingUWA500k); return UNITY_END(); } From 8185a1e2d8874120610f28553645aaf79f9f60b7 Mon Sep 17 00:00:00 2001 From: xelArga Date: Wed, 26 Nov 2025 17:09:50 -0800 Subject: [PATCH 05/17] fixed some typos and added check for data input --- src/embedDB/embedDB.c | 54 ++++++++++---------- src/embedDB/embedDB.h | 10 ++-- src/embedDBExample.h | 2 +- src/query-interface/advancedQueries.c | 2 +- src/query-interface/sort/sortWrapper.c | 4 +- test/test_sort/test_sort_query_interface.cpp | 6 +++ 6 files changed, 42 insertions(+), 36 deletions(-) diff --git a/src/embedDB/embedDB.c b/src/embedDB/embedDB.c index 6ce7c6c0..8a4ffec6 100644 --- a/src/embedDB/embedDB.c +++ b/src/embedDB/embedDB.c @@ -61,7 +61,7 @@ int8_t embedDBInitVarDataFromFile(embedDBState *state); int8_t shiftRecordLevelConsistencyBlocks(embedDBState *state); void embedDBInitSplineFromFile(embedDBState *state); int32_t getMaxError(embedDBState *state, void *buffer); -void updateMaxiumError(embedDBState *state, void *buffer); +void updateMaximumError(embedDBState *state, void *buffer); int8_t embedDBSetupVarDataStream(embedDBState *state, void *key, embedDBVarDataStream **varData, id_t recordNumber); uint32_t cleanSpline(embedDBState *state, uint32_t minPageNumber); void readToWriteBuf(embedDBState *state); @@ -212,7 +212,7 @@ int8_t embedDBInit(embedDBState *state, size_t indexMaxError) { return -1; } - /* Initalize the spline structure if being used */ + /* Initialize the spline structure if being used */ if (!EMBEDDB_USING_BINARY_SEARCH(state->parameters)) { if (state->numSplinePoints < 4) { #ifdef PRINT_ERRORS @@ -341,7 +341,7 @@ int8_t embedDBInitDataFromFile(embedDBState *state) { hasData = true; maxLogicalPageId = logicalPageId; physicalPageId++; - updateMaxiumError(state, buffer); + updateMaximumError(state, buffer); count++; i = 2; } else { @@ -362,7 +362,7 @@ int8_t embedDBInitDataFromFile(embedDBState *state) { if (validData && logicalPageId == maxLogicalPageId + 1) { maxLogicalPageId = logicalPageId; physicalPageId++; - updateMaxiumError(state, buffer); + updateMaximumError(state, buffer); moreToRead = !(readPage(state, physicalPageId)); count++; } else { @@ -385,7 +385,7 @@ int8_t embedDBInitDataFromFile(embedDBState *state) { physicalPageId = (physicalPageId + pagesToBlockBoundary) % state->numDataPages; moreToRead = !(readPage(state, physicalPageId)); - /* there should have been more to read becuase the file should not be empty at this point if it was not empty at the previous block */ + /* there should have been more to read because the file should not be empty at this point if it was not empty at the previous block */ if (!moreToRead) { return -1; } @@ -442,7 +442,7 @@ int8_t embedDBInitDataFromFileWithRecordLevelConsistency(embedDBState *state) { hasPermanentData = true; maxLogicalPageId = logicalPageId; physicalPageId++; - updateMaxiumError(state, buffer); + updateMaximumError(state, buffer); count++; i = 4; } else { @@ -460,7 +460,7 @@ int8_t embedDBInitDataFromFileWithRecordLevelConsistency(embedDBState *state) { if (validData && logicalPageId == maxLogicalPageId + 1) { maxLogicalPageId = logicalPageId; physicalPageId++; - updateMaxiumError(state, buffer); + updateMaximumError(state, buffer); moreToRead = !(readPage(state, physicalPageId)); count++; } else { @@ -494,7 +494,7 @@ int8_t embedDBInitDataFromFileWithRecordLevelConsistency(embedDBState *state) { /* record-level consistency recovery algorithm */ uint32_t numPagesRead = 0; uint32_t numPagesToRead = blockSize * 2; - uint32_t rlcMaxLogicialPageNumber = UINT32_MAX; + uint32_t rlcMaxLogicalPageNumber = UINT32_MAX; uint32_t rlcMaxRecordCount = UINT32_MAX; uint32_t rlcMaxPage = UINT32_MAX; moreToRead = !(readPage(state, physicalPageId)); @@ -507,7 +507,7 @@ int8_t embedDBInitDataFromFileWithRecordLevelConsistency(embedDBState *state) { uint32_t numRecords = EMBEDDB_GET_COUNT(buffer); if (rlcMaxRecordCount == UINT32_MAX || numRecords > rlcMaxRecordCount) { rlcMaxRecordCount = numRecords; - rlcMaxLogicialPageNumber = logicalPageId; + rlcMaxLogicalPageNumber = logicalPageId; rlcMaxPage = numPagesRead; } } @@ -516,11 +516,11 @@ int8_t embedDBInitDataFromFileWithRecordLevelConsistency(embedDBState *state) { numPagesRead++; } - /* need to find larged record-level consistency page to place back into the buffer and either one or both of the record-level consistency pages */ + /* need to find large record-level consistency page to place back into the buffer and either one or both of the record-level consistency pages */ uint32_t eraseStartingPage = 0; uint32_t eraseEndingPage = 0; uint32_t numBlocksToErase = 0; - if (rlcMaxLogicialPageNumber == UINT32_MAX) { + if (rlcMaxLogicalPageNumber == UINT32_MAX) { eraseStartingPage = state->rlcPhysicalStartingPage % state->numDataPages; numBlocksToErase = 2; } else { @@ -657,7 +657,7 @@ int8_t embedDBInitIndex(embedDBState *state) { int8_t embedDBInitIndexFromFile(embedDBState *state) { id_t logicalIndexPageId = 0; - id_t maxLogicaIndexPageId = 0; + id_t maxLogicalIndexPageId = 0; id_t physicalIndexPageId = 0; /* This will become zero if there is no more to read */ @@ -669,13 +669,13 @@ int8_t embedDBInitIndexFromFile(embedDBState *state) { while (moreToRead && count < state->numIndexPages) { memcpy(&logicalIndexPageId, buffer, sizeof(id_t)); - if (count == 0 || logicalIndexPageId == maxLogicaIndexPageId + 1) { - maxLogicaIndexPageId = logicalIndexPageId; + if (count == 0 || logicalIndexPageId == maxLogicalIndexPageId + 1) { + maxLogicalIndexPageId = logicalIndexPageId; physicalIndexPageId++; moreToRead = !(readIndexPage(state, physicalIndexPageId)); count++; } else { - haveWrappedInMemory = logicalIndexPageId == maxLogicaIndexPageId - state->numIndexPages + 1; + haveWrappedInMemory = logicalIndexPageId == maxLogicalIndexPageId - state->numIndexPages + 1; break; } } @@ -683,20 +683,20 @@ int8_t embedDBInitIndexFromFile(embedDBState *state) { if (count == 0) return 0; - state->nextIdxPageId = maxLogicaIndexPageId + 1; + state->nextIdxPageId = maxLogicalIndexPageId + 1; id_t physicalPageIDOfSmallestData = 0; if (haveWrappedInMemory) { physicalPageIDOfSmallestData = logicalIndexPageId % state->numIndexPages; } readIndexPage(state, physicalPageIDOfSmallestData); memcpy(&(state->minIndexPageId), buffer, sizeof(id_t)); - state->numAvailIndexPages = state->numIndexPages + state->minIndexPageId - maxLogicaIndexPageId - 1; + state->numAvailIndexPages = state->numIndexPages + state->minIndexPageId - maxLogicalIndexPageId - 1; return 0; } int8_t embedDBInitVarData(embedDBState *state) { - // Initialize variable data outpt buffer + // Initialize variable data output buffer initBufferPage(state, EMBEDDB_VAR_WRITE_BUFFER(state->parameters)); state->variableDataHeaderSize = state->keySize + sizeof(id_t); @@ -797,7 +797,7 @@ int8_t embedDBInitVarDataFromFile(embedDBState *state) { physicalVariablePageId = (physicalVariablePageId + pagesToBlockBoundary) % state->numVarPages; moreToRead = !(readVariablePage(state, physicalVariablePageId)); - /* there should have been more to read becuase the file should not be empty at this point if it was not empty at the previous block */ + /* there should have been more to read because the file should not be empty at this point if it was not empty at the previous block */ if (!moreToRead) { return -1; } @@ -1055,7 +1055,7 @@ int8_t embedDBPut(embedDBState *state, void *key, void *data) { memcpy((void *)((int8_t *)buf + EMBEDDB_IDX_HEADER_SIZE + state->bitmapSize * idxcount), bm, state->bitmapSize); } - updateMaxiumError(state, state->buffer); + updateMaximumError(state, state->buffer); count = 0; initBufferPage(state, 0); @@ -1139,7 +1139,7 @@ int8_t shiftRecordLevelConsistencyBlocks(embedDBState *state) { uint32_t eraseStartingPage = state->rlcPhysicalStartingPage; uint32_t eraseEndingPage = 0; - /* if we have wraped, we need to erase an additional block as the block we are shifting into is not empty */ + /* if we have wrapped, we need to erase an additional block as the block we are shifting into is not empty */ bool haveWrapped = (state->minDataPageId % state->numDataPages) == ((state->rlcPhysicalStartingPage + numRecordLevelConsistencyPages) % state->numDataPages); uint32_t numBlocksToErase = haveWrapped ? 2 : 3; @@ -1175,7 +1175,7 @@ int8_t shiftRecordLevelConsistencyBlocks(embedDBState *state) { return 0; } -void updateMaxiumError(embedDBState *state, void *buffer) { +void updateMaximumError(embedDBState *state, void *buffer) { // Calculate error within the page int32_t maxError = getMaxError(state, buffer); if (state->maxError < maxError) { @@ -1637,7 +1637,7 @@ void embedDBCloseIterator(embedDBIterator *it) { /** * @brief Flushes output buffer. * @param state algorithm state structure - * @returns 0 if successul and a non-zero value otherwise + * @returns 0 if successful and a non-zero value otherwise */ int8_t embedDBFlushVar(embedDBState *state) { /* Check if we actually have any variable data in the buffer */ @@ -1667,7 +1667,7 @@ int8_t embedDBFlushVar(embedDBState *state) { /** * @brief Flushes output buffer. * @param state algorithm state structure - * @returns 0 if successul and a non-zero value otherwise + * @returns 0 if successful and a non-zero value otherwise */ int8_t embedDBFlush(embedDBState *state) { // As the first buffer is the data write buffer, no address change is required @@ -1859,7 +1859,7 @@ int8_t embedDBNextVar(embedDBState *state, embedDBIterator *it, void *key, void * @param state embedDB algorithm state structure * @param key Key for the record * @param varData Return variable for variable data as a embedDBVarDataStream (Unallocated). Returns NULL if no variable data. **Be sure to free the stream after you are done with it** - * @return Returns 0 if sucessfull or no variable data for the record, 1 if the records variable data was overwritten, 2 if the page failed to read, and 3 if the memorey failed to allocate. + * @return Returns 0 if successful or no variable data for the record, 1 if the records variable data was overwritten, 2 if the page failed to read, and 3 if the memory failed to allocate. */ int8_t embedDBSetupVarDataStream(embedDBState *state, void *key, embedDBVarDataStream **varData, id_t recordNumber) { void *dataBuf = (int8_t *)state->buffer + state->pageSize * EMBEDDB_DATA_READ_BUFFER; @@ -2099,7 +2099,7 @@ int8_t writeTemporaryPage(embedDBState *state, void *buffer) { /** * @brief Calculates the number of spline points not in use by embedDB and deletes them * @param state embedDB algorithm state structure - * @param key The minimim key embedDB still needs points for + * @param key The minimum key embedDB still needs points for * @return Returns the number of points deleted */ uint32_t cleanSpline(embedDBState *state, uint32_t minPageNumber) { @@ -2179,7 +2179,7 @@ id_t writeVariablePage(embedDBState *state, void *buffer) { return -1; } - // Make sure the address being witten to wraps around + // Make sure the address being written to wraps around id_t physicalPageId = state->nextVarPageId % state->numVarPages; // Erase data if needed diff --git a/src/embedDB/embedDB.h b/src/embedDB/embedDB.h index 328fb9ed..47565d7c 100644 --- a/src/embedDB/embedDB.h +++ b/src/embedDB/embedDB.h @@ -176,7 +176,7 @@ typedef struct { /** * @brief Erases a span of paes from file - * @param startPage The first page to earse + * @param startPage The first page to erase * @param pageSize The page to erase up to (exclusive) * @param file The file data that was stored in embedDBState->dataFile etc * @return 1 for success and 0 for failure @@ -268,7 +268,7 @@ typedef struct { id_t nextIdxPageId; /* Next logical page id for index. Page id is an incrementing value and may not always be same as physical page id. */ id_t nextVarPageId; /* Page number of next var page to be written */ uint32_t nextRLCPhysicalPageLocation; /* Physical page number for the location for the next record-level-consistency page */ - uint32_t rlcPhysicalStartingPage; /* Physical page number for the starting page of the record-level consistnecy pages */ + uint32_t rlcPhysicalStartingPage; /* Physical page number for the starting page of the record-level consistency pages */ id_t currentVarLoc; /* Current variable address offset to write at (bytes from beginning of file) */ void *buffer; /* Pre-allocated memory buffer for use by algorithm */ spline *spl; /* Spline model */ @@ -441,14 +441,14 @@ uint32_t embedDBVarDataStreamRead(embedDBState *state, embedDBVarDataStream *str /** * @brief Flushes output buffer. * @param state algorithm state structure - * @returns 0 if successul and a non-zero value otherwise + * @returns 0 if successful and a non-zero value otherwise */ int8_t embedDBFlush(embedDBState *state); /** * @brief Flushes output buffer. * @param state algorithm state structure - * @returns 0 if successul and a non-zero value otherwise + * @returns 0 if successful and a non-zero value otherwise */ int8_t embedDBFlushVar(embedDBState *state); @@ -501,7 +501,7 @@ id_t writeIndexPage(embedDBState *state, void *buffer); id_t writeVariablePage(embedDBState *state, void *buffer); /** - * @brief Writes a temporary page when using record-levek-consistency to storage. + * @brief Writes a temporary page when using record-level-consistency to storage. * @param state embedDB algorithm state structure * @param pageNum Page number to read * @return Returns 0 for success and non-zero value for an error. diff --git a/src/embedDBExample.h b/src/embedDBExample.h index ea4a3a58..8da2ab57 100644 --- a/src/embedDBExample.h +++ b/src/embedDBExample.h @@ -2,7 +2,7 @@ /** * @file embedDBExample.h * @author EmbedDB Team (See Authors.md) - * @brief This file includes and example for insterting and retrieving sequential records for EmbeDB. + * @brief This file includes and example for inserting and retrieving sequential records for EmbedDB. * @copyright Copyright 2023 * EmbedDB Team * @par Redistribution and use in source and binary forms, with or without diff --git a/src/query-interface/advancedQueries.c b/src/query-interface/advancedQueries.c index f0903708..77954853 100644 --- a/src/query-interface/advancedQueries.c +++ b/src/query-interface/advancedQueries.c @@ -897,7 +897,7 @@ void closeKeyJoin(embedDBOperator* op) { } /** - * @brief Creates an operator for perfoming an equijoin on the keys (sorted and distinct) of two tables + * @brief Creates an operator for performing an equi-join on the keys (sorted and distinct) of two tables */ embedDBOperator* createKeyJoinOperator(embedDBOperator* input1, embedDBOperator* input2) { embedDBOperator* op = malloc(sizeof(embedDBOperator)); diff --git a/src/query-interface/sort/sortWrapper.c b/src/query-interface/sort/sortWrapper.c index 6344d214..2127dec1 100644 --- a/src/query-interface/sort/sortWrapper.c +++ b/src/query-interface/sort/sortWrapper.c @@ -141,7 +141,7 @@ int8_t writePageWithHeader(void *buffer, const uint32_t blockIndex, const uint32 * * @param data The operator data * @param op The previous operator - * @param unsortedFile A prexisting file that the row data will be writen to + * @param unsortedFile A preexisting file that the row data will be written to * @param recordSize The size of the data * @param keySize The size of the key * @param keyOffset The offset of the key with in the record (# of bytes) @@ -479,7 +479,7 @@ void prepareSort(embedDBOperator *op) { } /** - * @brief Initalizes default metric values + * @brief Initializes default metric values * * @return metrics_t */ diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index ab6a3b0d..842034b7 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -135,6 +135,12 @@ void insertNValues(embedDBState* state, int n, int mode) { embedDBPut(state, &key, &value); key++; } + for (int i = 0, data = 10; i <= n; i++) { + key = i + 1; + embedDBGet(state, (void*)&key, (void*)&value); + TEST_ASSERT_MESSAGE(value == data, "value isn't equal to extracted data"); + data--; + } break; default: break; From 5f854f2d332faf5849021d8ec63d7702a86abef1 Mon Sep 17 00:00:00 2001 From: Alex Argatoff Date: Mon, 1 Dec 2025 10:07:01 -0800 Subject: [PATCH 06/17] temp files are removed, fixed test with limit set, changed column to be sorted which fixed a test.. --- .../desktopFileInterface.c | 47 +++++++++++++++---- lib/SD-File-Interface/SDFileInterface.c | 27 +++++++++++ src/benchmarks/sortBenchmark.h | 4 +- src/embedDB/embedDB.h | 13 +++-- src/query-interface/advancedQueries.c | 1 + src/query-interface/sort/sortWrapper.c | 15 ++++-- test/test_sort/test_sort_query_interface.cpp | 8 ++-- 7 files changed, 92 insertions(+), 23 deletions(-) diff --git a/lib/Desktop-File-Interface/desktopFileInterface.c b/lib/Desktop-File-Interface/desktopFileInterface.c index 510daf84..8347c9ae 100644 --- a/lib/Desktop-File-Interface/desktopFileInterface.c +++ b/lib/Desktop-File-Interface/desktopFileInterface.c @@ -1,7 +1,5 @@ #include "desktopFileInterface.h" -static char tempPathBuffer[256]; - typedef struct { char *filename; FILE *file; @@ -24,6 +22,31 @@ void tearDownFile(void *file) { free(file); } +int8_t FILE_REMOVE(void *file) { + if (file == NULL) return 0; + FILE_INFO *fileInfo = (FILE_INFO *)file; + + if (fileInfo->file != NULL) { + fclose(fileInfo->file); + fileInfo->file = NULL; + } + + int8_t result = 1; + if (fileInfo->filename != NULL) { + if (remove(fileInfo->filename) != 0) { + result = 0; +#ifdef PRINT_ERRORS + perror("ERROR: Failed to remove temp file"); +#endif + } + free(fileInfo->filename); + fileInfo->filename = NULL; + + free(fileInfo); + return result; + } +} + int8_t FILE_READ(void *buffer, uint32_t pageNum, uint32_t pageSize, void *file) { FILE_INFO *fileInfo = (FILE_INFO *)file; fseek(fileInfo->file, pageSize * pageNum, SEEK_SET); @@ -132,14 +155,12 @@ char *tempFilePath(void) { strncpy(tempPathBuffer, path, sizeof(tempPathBuffer) - 1); tempPathBuffer[sizeof(tempPathBuffer) - 1] = '\0'; free(path); - return tempPathBuffer; + } else { + /* Fallback */ + snprintf(tempPathBuffer, sizeof(tempPathBuffer), + "embeddb_%lu.tmp", (unsigned long)rand()); } - /* Fallback */ - snprintf(tempPathBuffer, sizeof(tempPathBuffer), - "embeddb_%lu.tmp", (unsigned long)rand()); - return tempPathBuffer; - #else /* POSIX systems */ snprintf(tempPathBuffer, sizeof(tempPathBuffer), @@ -149,9 +170,13 @@ char *tempFilePath(void) { if (fd >= 0) { close(fd); } - - return tempPathBuffer; #endif + + char *out = malloc(strlen(tempPathBuffer) + 1); + if (out) { + strcpy(out, tempPathBuffer); + } + return out; } embedDBFileInterface *getFileInterface() { @@ -170,6 +195,7 @@ embedDBFileInterface *getFileInterface() { fileInterface->tell = FILE_TELL; fileInterface->setup = setupFile; fileInterface->teardown = tearDownFile; + fileInterface->removeFile = FILE_REMOVE; fileInterface->tempFilePath = tempFilePath; return fileInterface; } @@ -190,6 +216,7 @@ embedDBFileInterface *getMockEraseFileInterface() { fileInterface->tell = FILE_TELL; fileInterface->setup = setupFile; fileInterface->teardown = tearDownFile; + fileInterface->removeFile = FILE_REMOVE; fileInterface->tempFilePath = tempFilePath; return fileInterface; } diff --git a/lib/SD-File-Interface/SDFileInterface.c b/lib/SD-File-Interface/SDFileInterface.c index 8adae28d..e2a9b749 100644 --- a/lib/SD-File-Interface/SDFileInterface.c +++ b/lib/SD-File-Interface/SDFileInterface.c @@ -57,6 +57,32 @@ void tearDownSDFile(void *file) { free(file); } +int8_t SD_FILE_REMOVE(void *file) { + if (file == NULL) return 0; + SD_FILE_INFO *fileInfo = (SD_FILE_INFO *)file; + + if (fileInfo->sdFile != NULL) { + sd_fclose(fileInfo->sdFile); + fileInfo->sdFile = NULL; + } + + int8_t result = 1; + if (fileInfo->filename != NULL) { + /* Try to use C remove as fallback; replace with sd-specific remove if available */ + if (remove(fileInfo->filename) != 0) { + result = 0; +#ifdef PRINT_ERRORS + perror("ERROR: Failed to remove SD temp file"); +#endif + } + free(fileInfo->filename); + fileInfo->filename = NULL; + } + + free(fileInfo); + return result; +} + int8_t FILE_READ(void *buffer, uint32_t pageNum, uint32_t pageSize, void *file) { SD_FILE_INFO *fileInfo = (SD_FILE_INFO *)file; sd_fseek(fileInfo->sdFile, pageSize * pageNum, SEEK_SET); @@ -136,6 +162,7 @@ embedDBFileInterface *getSDInterface() { fileInterface->flush = FILE_FLUSH; fileInterface->setup = setupSDFile; fileInterface->teardown = tearDownSDFile; + fileInterface->removeFile = SD_FILE_REMOVE; fileInterface->tempFilePath = sdfat_tempFilePath; return fileInterface; } diff --git a/src/benchmarks/sortBenchmark.h b/src/benchmarks/sortBenchmark.h index 33a0ca49..174c4cd7 100644 --- a/src/benchmarks/sortBenchmark.h +++ b/src/benchmarks/sortBenchmark.h @@ -163,7 +163,7 @@ void sort_order_last(int32_t numValues, embedDBState* stateUWA, embedDBSchema* b embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); embedDBOperator* orderByOp = createOrderByOperator(stateUWA, projColsOrderBy, 1, numValues, merge_sort_int32_comparator); orderByOp->init(orderByOp); - int32_t* recordBuffer = orderByOp->recordBuffer; + int32_t* recordBuffer = (int32_t*)orderByOp->recordBuffer; for (uint32_t i = 0; i < 10; i++) { if (!exec(orderByOp)) { @@ -188,7 +188,7 @@ void sort_order_first(int32_t numValues, embedDBState* stateUWA, embedDBSchema* uint8_t projColsOB[] = {0, 1}; embedDBOperator* projColsOrderBy = createProjectionOperator(orderByOp, 2, projColsOB); projColsOrderBy->init(projColsOrderBy); - int32_t* recordBuffer = projColsOrderBy->recordBuffer; + int32_t* recordBuffer = (int32_t*)projColsOrderBy->recordBuffer; for (uint32_t i = 0; i < 10; i++) { if (!exec(projColsOrderBy)) { diff --git a/src/embedDB/embedDB.h b/src/embedDB/embedDB.h index 47565d7c..7b97b80c 100644 --- a/src/embedDB/embedDB.h +++ b/src/embedDB/embedDB.h @@ -230,20 +230,25 @@ typedef struct { /** * */ - int32_t(*tell)(void* file); + int32_t (*tell)(void *file); /** * @brief Pointer to external function for file setup */ - void* (*setup)(const char* filename); + void *(*setup)(const char *filename); /** * @brief Pointer to external function for file teardown */ - void (*teardown)(void* file); + void (*teardown)(void *file); /** * @brief Pointer to platform specific tmp file path */ - char* (*tempFilePath)(void); + char *(*tempFilePath)(void); + + /** + * @brief Pointer to file for deletion + */ + int8_t (*removeFile)(void *file); } embedDBFileInterface; diff --git a/src/query-interface/advancedQueries.c b/src/query-interface/advancedQueries.c index 77954853..0b085f05 100644 --- a/src/query-interface/advancedQueries.c +++ b/src/query-interface/advancedQueries.c @@ -536,6 +536,7 @@ void closeOrderBy(embedDBOperator* op) { * @param dbState The database state * @param input The operator that this operator can pull records from * @param colNum The column that is being sorted on + * @param limit The first values to be read and sorted - not like a true limit at the moment * @param compareFn The function being used to make comparisons between row data */ embedDBOperator* createOrderByOperator(embedDBState* dbState, embedDBOperator* input, int8_t colNum, int32_t limit, int8_t (*compareFn)(void* a, void* b)) { diff --git a/src/query-interface/sort/sortWrapper.c b/src/query-interface/sort/sortWrapper.c index 2127dec1..a754a347 100644 --- a/src/query-interface/sort/sortWrapper.c +++ b/src/query-interface/sort/sortWrapper.c @@ -245,11 +245,13 @@ void prepareSort(embedDBOperator *op) { return; } - const char *tmp1 = data->fileInterface->tempFilePath(); - const char *tmp2 = data->fileInterface->tempFilePath(); + char *tmp1 = data->fileInterface->tempFilePath(); + char *tmp2 = data->fileInterface->tempFilePath(); void *unsortedFile = data->fileInterface->setup(tmp1); void *sortedFile = data->fileInterface->setup(tmp2); + free(tmp1); + free(tmp2); if (unsortedFile == NULL || sortedFile == NULL) { #ifdef PRINT_ERRORS @@ -283,6 +285,9 @@ void prepareSort(embedDBOperator *op) { // Finish iteratorState->file = sortedFile; data->fileInterface->close(unsortedFile); + if (data->fileInterface->removeFile) { + data->fileInterface->removeFile(unsortedFile); + } data->fileIterator = iteratorState; #endif } @@ -435,7 +440,8 @@ void prepareSort(embedDBOperator *op) { // Read next page if current buffer is empty if (iteratorState->currentRecord % recordPerPage == 0 || iteratorState->recordsRead == 0) { - iteratorState->fileInterface->seek(iteratorState->currentRecord / recordPerPage * PAGE_SIZE + iteratorState->resultFile, iteratorState->file); + uint32_t pageOffset = (iteratorState->currentRecord / recordPerPage) * PAGE_SIZE; + iteratorState->fileInterface->seek(pageOffset, iteratorState->file); iteratorState->fileInterface->readRel(((sortData *)data)->readBuffer, PAGE_SIZE, 1, iteratorState->file); if (((sortData *)data)->fileInterface->error(iteratorState->file)) { @@ -474,6 +480,9 @@ void prepareSort(embedDBOperator *op) { if (iteratorState->file != NULL) { iteratorState->fileInterface->close(iteratorState->file); + if (iteratorState->fileInterface->removeFile) { + iteratorState->fileInterface->removeFile(iteratorState->file); + } iteratorState->file = NULL; } } diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index 842034b7..b6bf8337 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -70,7 +70,7 @@ void setUp() { stateUWA->fileInterface->tempFilePath #endif - stateUWA->bufferSizeInBlocks = 4; + stateUWA->bufferSizeInBlocks = 4; stateUWA->buffer = malloc(stateUWA->bufferSizeInBlocks * stateUWA->pageSize); stateUWA->parameters = EMBEDDB_USE_BMAP | EMBEDDB_USE_INDEX | EMBEDDB_RESET_DATA; stateUWA->bitmapSize = 2; @@ -165,7 +165,7 @@ void runTestSequentialValues() { embedDBOperator* scanOpOrderBy = createTableScanOperator(stateUWA, &it, baseSchema); uint8_t projColsOB[] = {0, 1}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); - embedDBOperator* orderByOp = createOrderByOperator(stateUWA, projColsOrderBy, 1, 3, int32Comparator); + embedDBOperator* orderByOp = createOrderByOperator(stateUWA, projColsOrderBy, 1, -1, int32Comparator); orderByOp->init(orderByOp); @@ -201,9 +201,9 @@ void runTestUsingUWA500k() { embedDBInitIterator(stateUWA, &it); embedDBOperator* scanOpOrderBy = createTableScanOperator(stateUWA, &it, baseSchema); - uint8_t projColsOB[] = {0, 1}; + uint8_t projColsOB[] = {0, 2}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); - embedDBOperator* orderByOp = createOrderByOperator(stateUWA, projColsOrderBy, 1, -1, int32Comparator); + embedDBOperator* orderByOp = createOrderByOperator(stateUWA, projColsOrderBy, 2, -1, int32Comparator); orderByOp->init(orderByOp); int32_t* recordBuffer = (int32_t*)orderByOp->recordBuffer; uint32_t previous = 0; From 20ea1de3e95e05db5e7a59b35dc0b001330e1286 Mon Sep 17 00:00:00 2001 From: xelArga Date: Mon, 1 Dec 2025 13:11:09 -0800 Subject: [PATCH 07/17] sorting and temp files seemingly working --- test/test_sort/test_sort_query_interface.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index b6bf8337..89abd881 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -201,9 +201,9 @@ void runTestUsingUWA500k() { embedDBInitIterator(stateUWA, &it); embedDBOperator* scanOpOrderBy = createTableScanOperator(stateUWA, &it, baseSchema); - uint8_t projColsOB[] = {0, 2}; + uint8_t projColsOB[] = {0, 3}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); - embedDBOperator* orderByOp = createOrderByOperator(stateUWA, projColsOrderBy, 2, -1, int32Comparator); + embedDBOperator* orderByOp = createOrderByOperator(stateUWA, projColsOrderBy, 3, -1, int32Comparator); orderByOp->init(orderByOp); int32_t* recordBuffer = (int32_t*)orderByOp->recordBuffer; uint32_t previous = 0; From f55d5ac8285228b7d2c6b09c58e0ebf3b482d09f Mon Sep 17 00:00:00 2001 From: xelArga Date: Mon, 1 Dec 2025 16:45:06 -0800 Subject: [PATCH 08/17] added binary debugger for test data --- .../test_buffered_read_iterator.cpp | 8 +- .../test_embedDB_data_recovery.cpp | 16 +-- .../test_embedDB_multiple_instances.cpp | 28 +++--- test/test_sort/test_sort_query_interface.cpp | 99 ++++++++++--------- 4 files changed, 81 insertions(+), 70 deletions(-) diff --git a/test/test_buffered_read_iterator/test_buffered_read_iterator.cpp b/test/test_buffered_read_iterator/test_buffered_read_iterator.cpp index da6a6264..e5e76f8b 100644 --- a/test/test_buffered_read_iterator/test_buffered_read_iterator.cpp +++ b/test/test_buffered_read_iterator/test_buffered_read_iterator.cpp @@ -123,7 +123,7 @@ void embedDBIterator_should_return_records_in_storage_and_in_write_buffer(void) while (embedDBNext(state, &it, &itKey, itData)) { uint32_t actualDataValue; memcpy(&actualDataValue, itData, sizeof(int)); - snprintf(message, 100, "embedDBIterator returned the wrong data value for key %li.", key); + snprintf(message, 100, "embedDBIterator returned the wrong data value for key %u.", key); TEST_ASSERT_EQUAL_UINT32_MESSAGE(expectedDataValue, actualDataValue, message); expectedDataValue += 5; numRecordsReturned += 1; @@ -169,7 +169,7 @@ void embedDBIterator_should_return_records_in_storage_and_in_write_buffer_with_f /* test data and keys are returned correctly */ while (embedDBNext(state, &it, &actualKeyValue, returnedDataValue)) { TEST_ASSERT_EQUAL_UINT32_MESSAGE(expectedKeyValue, actualKeyValue, "embedDBIterator returned an unexpected key value"); - snprintf(message, 100, "embedDBIterator did not return the correct data for key %li).", expectedKeyValue); + snprintf(message, 100, "embedDBIterator did not return the correct data for key %u).", expectedKeyValue); memcpy(&actualDataValue, returnedDataValue, sizeof(float)); TEST_ASSERT_EQUAL_FLOAT_MESSAGE(expectedDataValue, actualDataValue, message); expectedKeyValue += 3; @@ -216,7 +216,7 @@ void embedDBIterator_should_return_keys_in_write_buffer_when_no_data_has_been_fl while (embedDBNext(state, &it, &actualKeyValue, returnedDataBuffer)) { TEST_ASSERT_EQUAL_UINT32_MESSAGE(key, actualKeyValue, "embedDBIterator returned an unexpected key value"); memcpy(&acutalDataValue, returnedDataBuffer, sizeof(uint32_t)); - snprintf(message, 100, "embedDBIterator did not return the correct data for key %li).", key); + snprintf(message, 100, "embedDBIterator did not return the correct data for key %u).", key); TEST_ASSERT_EQUAL_UINT32_MESSAGE(data, acutalDataValue, message); data += 15; key += 1; @@ -261,7 +261,7 @@ void embedDBIterator_should_filter_and_rechieve_records_by_data_value(void) { /* assert returned records have correct values */ while (embedDBNext(state, &it, &itKey, itData)) { TEST_ASSERT_EQUAL_UINT32_MESSAGE(expectedKeyValue, itKey, "embedDBIterator returned a key value which should have been filtered out"); - snprintf(message, 100, "embedDBIterator did not return the correct data for key %li).", expectedKeyValue); + snprintf(message, 100, "embedDBIterator did not return the correct data for key %u).", expectedKeyValue); TEST_ASSERT_EQUAL_UINT32_MESSAGE(expectedDataValue, itData[0], message); expectedKeyValue += 1; expectedDataValue += 5; diff --git a/test/test_embedDB_data_recovery/test_embedDB_data_recovery.cpp b/test/test_embedDB_data_recovery/test_embedDB_data_recovery.cpp index cf12045b..eff41b6e 100644 --- a/test/test_embedDB_data_recovery/test_embedDB_data_recovery.cpp +++ b/test/test_embedDB_data_recovery/test_embedDB_data_recovery.cpp @@ -224,9 +224,9 @@ void embedDB_inserts_correctly_into_data_file_after_reload() { /* Records inserted before reload */ for (int i = 0; i < 3654; i++) { int8_t getResult = embedDBGet(state, &key, recordBuffer); - snprintf(message, 100, "EmbedDB get encountered an error fetching the data for key %li.", key); + snprintf(message, 100, "EmbedDB get encountered an error fetching the data for key %u.", key); TEST_ASSERT_EQUAL_INT8_MESSAGE(0, getResult, message); - snprintf(message, 100, "EmbedDB get did not return correct data for a record inserted before reloading (key %li).", key); + snprintf(message, 100, "EmbedDB get did not return correct data for a record inserted before reloading (key %u).", key); TEST_ASSERT_EQUAL_MEMORY_MESSAGE(&data, ((int64_t *)recordBuffer), state->dataSize, message); key++; data++; @@ -235,9 +235,9 @@ void embedDB_inserts_correctly_into_data_file_after_reload() { data = 11; for (int i = 0; i < 42; i++) { int8_t getResult = embedDBGet(state, &key, recordBuffer); - snprintf(message, 100, "EmbedDB get encountered an error fetching the data for key %li.", key); + snprintf(message, 100, "EmbedDB get encountered an error fetching the data for key %u.", key); TEST_ASSERT_EQUAL_INT8_MESSAGE(0, getResult, message); - snprintf(message, 100, "EmbedDB get did not return correct data for a record inserted after reloading (key %li).", key); + snprintf(message, 100, "EmbedDB get did not return correct data for a record inserted after reloading (key %u).", key); TEST_ASSERT_EQUAL_MEMORY_MESSAGE(&data, ((int64_t *)recordBuffer), state->dataSize, message); key++; data++; @@ -258,9 +258,9 @@ void embedDB_correctly_gets_records_after_reload_with_wrapped_data() { /* Records inserted before reload */ for (int i = 0; i < 3678; i++) { getResult = embedDBGet(state, &key, recordBuffer); - snprintf(message, 100, "EmbedDB get encountered an error fetching the data for key %li.", key); + snprintf(message, 100, "EmbedDB get encountered an error fetching the data for key %u.", key); TEST_ASSERT_EQUAL_INT8_MESSAGE(0, getResult, message); - snprintf(message, 100, "EmbedDB get did not return correct data for a record inserted before reloading (key %li).", key); + snprintf(message, 100, "EmbedDB get did not return correct data for a record inserted before reloading (key %u).", key); TEST_ASSERT_EQUAL_MEMORY_MESSAGE(&data, ((int64_t *)recordBuffer), state->dataSize, message); key++; data++; @@ -293,9 +293,9 @@ void embedDB_queries_correctly_with_non_liner_data_after_reload() { uint32_t i; for (i = 0; i < 3822; i++) { int8_t getResult = embedDBGet(state, &key, recordBuffer); - snprintf(message, 80, "EmbedDB get encountered an error fetching the data for key %li.", key); + snprintf(message, 80, "EmbedDB get encountered an error fetching the data for key %u.", key); TEST_ASSERT_EQUAL_INT8_MESSAGE(0, getResult, message); - snprintf(message, 100, "EmbedDB get did not return correct data for a record inserted before reloading (key %li).", key); + snprintf(message, 100, "EmbedDB get did not return correct data for a record inserted before reloading (key %u).", key); TEST_ASSERT_EQUAL_MEMORY_MESSAGE(&data, recordBuffer, sizeof(int64_t), message); key += increment; data += 1; diff --git a/test/test_embedDB_multiple_instances/test_embedDB_multiple_instances.cpp b/test/test_embedDB_multiple_instances/test_embedDB_multiple_instances.cpp index 27e92b8b..46fe1ead 100644 --- a/test/test_embedDB_multiple_instances/test_embedDB_multiple_instances.cpp +++ b/test/test_embedDB_multiple_instances/test_embedDB_multiple_instances.cpp @@ -67,9 +67,9 @@ #else #include "desktopFileInterface.h" #define FILE_TYPE FILE -#define DATA_FILE_PATH "build/artifacts/dataFile%li.bin" -#define INDEX_FILE_PATH "build/artifacts/indexFile%li.bin" -#define VAR_DATA_FILE_PATH "build/artifacts/varFile%li.bin" +#define DATA_FILE_PATH "build/artifacts/dataFile%u.bin" +#define INDEX_FILE_PATH "build/artifacts/indexFile%u.bin" +#define VAR_DATA_FILE_PATH "build/artifacts/varFile%u.bin" #endif #include "unity.h" @@ -128,9 +128,9 @@ void queryRecords(embedDBState *state, int32_t numberOfRecords, int32_t starting char message[120]; for (int32_t i = 0; i < numberOfRecords; i++) { int8_t getResult = embedDBGet(state, &key, &dataBuffer); - snprintf(message, 120, "embedDBGet returned a non-zero value when getting key %li from state %li", key, i); + snprintf(message, 120, "embedDBGet returned a non-zero value when getting key %u from state %u", key, i); TEST_ASSERT_EQUAL_INT8_MESSAGE(0, getResult, message); - snprintf(message, 120, "embedDBGet did not return the correct data for key %li from state %li", key, i); + snprintf(message, 120, "embedDBGet did not return the correct data for key %u from state %u", key, i); TEST_ASSERT_EQUAL_INT32_MESSAGE(data, dataBuffer, message); key++; data++; @@ -151,7 +151,7 @@ void insertRecordsFromFile(embedDBState *state, const char *fileName, int32_t nu for (int16_t i = 0; i < count; i++) { void *buf = (infileBuffer + headerSize + i * state->recordSize); int8_t putResult = embedDBPut(state, buf, (void *)((int8_t *)buf + 4)); - snprintf(message, 100, "embedDBPut returned non-zero value for insert of key %li", *((uint32_t *)buf)); + snprintf(message, 100, "embedDBPut returned non-zero value for insert of key %u", *((uint32_t *)buf)); TEST_ASSERT_EQUAL_INT8_MESSAGE(0, putResult, message); numInserted++; if (numInserted >= numRecords) { @@ -182,7 +182,7 @@ void insertRecordsFromFileWithVarData(embedDBState *state, const char *fileName, memcpy(&key, buf, sizeof(uint32_t)); snprintf(varData, 30, "Hello world %li", key); int8_t putResult = embedDBPutVar(state, buf, (void *)((int8_t *)buf + 4), varData, strlen(varData)); - snprintf(message, 100, "embedDBPut returned non-zero value for insert of key %li", key); + snprintf(message, 100, "embedDBPut returned non-zero value for insert of key %u", key); TEST_ASSERT_EQUAL_INT8_MESSAGE(0, putResult, message); numInserted++; if (numInserted >= numRecords) { @@ -213,9 +213,9 @@ void queryRecordsFromFile(embedDBState *state, const char *fileName, int32_t num int8_t getResult = embedDBGet(state, buf, dataBuffer); uint32_t key = 0; memcpy(&key, buf, sizeof(uint32_t)); - snprintf(message, 100, "embedDBGet was not able to find the data for key %li", key); + snprintf(message, 100, "embedDBGet was not able to find the data for key %u", key); TEST_ASSERT_EQUAL_INT8_MESSAGE(0, getResult, message); - snprintf(message, 100, "embedDBGet did not return the correct data for key %li", key); + snprintf(message, 100, "embedDBGet did not return the correct data for key %u", key); TEST_ASSERT_EQUAL_MEMORY_MESSAGE((int8_t *)buf + 4, dataBuffer, state->dataSize, message); numRead++; if (numRead >= numRecords) @@ -246,17 +246,17 @@ void queryRecordsFromFileWithVarData(embedDBState *state, const char *fileName, void *buf = (infileBuffer + headerSize + i * (state->keySize + state->dataSize)); uint32_t key = 0; memcpy(&key, buf, sizeof(uint32_t)); - snprintf(varDataExpected, 30, "Hello world %li", key); + snprintf(varDataExpected, 30, "Hello world %u", key); embedDBVarDataStream *stream = NULL; int8_t getResult = embedDBGetVar(state, buf, dataBuffer, &stream); - snprintf(message, 100, "embedDBGetVar was not able to find the data for key %li", key); + snprintf(message, 100, "embedDBGetVar was not able to find the data for key %u", key); TEST_ASSERT_EQUAL_INT8_MESSAGE(0, getResult, message); - snprintf(message, 100, "embedDBGetBar did not return the correct data for key %li", key); + snprintf(message, 100, "embedDBGetBar did not return the correct data for key %u", key); TEST_ASSERT_EQUAL_MEMORY_MESSAGE((int8_t *)buf + 4, dataBuffer, state->dataSize, message); uint32_t streamBytesRead = embedDBVarDataStreamRead(state, stream, varDataBuffer, strlen(varDataExpected)); - snprintf(message, 100, "embedDBGetVar did not return the correct number of bytes read for key %li.", key); + snprintf(message, 100, "embedDBGetVar did not return the correct number of bytes read for key %u.", key); TEST_ASSERT_EQUAL_UINT32_MESSAGE(strlen(varDataExpected), streamBytesRead, message); - snprintf(message, 100, "embedDBGetVar did not return the correct variable data for key %li", key); + snprintf(message, 100, "embedDBGetVar did not return the correct variable data for key %u", key); TEST_ASSERT_EQUAL_MEMORY_MESSAGE(varDataExpected, varDataBuffer, strlen(varDataExpected), message); numRead++; diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index 89abd881..4af03e92 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -43,46 +43,46 @@ #include "unity.h" -embedDBState* stateUWA; +embedDBState* state; embedDBSchema* baseSchema; void setUp() { if (STORAGE_TYPE == 1) { TEST_FAIL_MESSAGE("Dataflash is not currently supported. Defaulting to SD card interface."); } - stateUWA = (embedDBState*)malloc(sizeof(embedDBState)); - stateUWA->keySize = 4; - stateUWA->dataSize = 12; - stateUWA->compareKey = int32Comparator; - stateUWA->compareData = int32Comparator; - stateUWA->pageSize = 512; - stateUWA->eraseSizeInPages = 4; - stateUWA->numDataPages = 20000; - stateUWA->numIndexPages = 1000; - stateUWA->numSplinePoints = 30; + state = (embedDBState*)malloc(sizeof(embedDBState)); + state->keySize = 4; + state->dataSize = 12; + state->compareKey = int32Comparator; + state->compareData = int32Comparator; + state->pageSize = 512; + state->eraseSizeInPages = 4; + state->numDataPages = 20000; + state->numIndexPages = 1000; + state->numSplinePoints = 30; /* Setup files */ char dataPath[] = DATA_FILE_PATH_UWA, indexPath[] = INDEX_FILE_PATH_UWA; - stateUWA->fileInterface = getFileInterface(); + state->fileInterface = getFileInterface(); - stateUWA->dataFile = stateUWA->fileInterface->setup(dataPath); - stateUWA->indexFile = stateUWA->fileInterface->setup(indexPath); + state->dataFile = state->fileInterface->setup(dataPath); + state->indexFile = state->fileInterface->setup(indexPath); #ifdef ARDUINO - stateUWA->fileInterface->tempFilePath + state->fileInterface->tempFilePath #endif - stateUWA->bufferSizeInBlocks = 4; - stateUWA->buffer = malloc(stateUWA->bufferSizeInBlocks * stateUWA->pageSize); - stateUWA->parameters = EMBEDDB_USE_BMAP | EMBEDDB_USE_INDEX | EMBEDDB_RESET_DATA; - stateUWA->bitmapSize = 2; - stateUWA->inBitmap = inBitmapInt16; - stateUWA->updateBitmap = updateBitmapInt16; - stateUWA->buildBitmapFromRange = buildBitmapInt16FromRange; - int8_t initResult = embedDBInit(stateUWA, 1); + state->bufferSizeInBlocks = 4; + state->buffer = malloc(state->bufferSizeInBlocks * state->pageSize); + state->parameters = EMBEDDB_USE_BMAP | EMBEDDB_USE_INDEX | EMBEDDB_RESET_DATA; + state->bitmapSize = 2; + state->inBitmap = inBitmapInt16; + state->updateBitmap = updateBitmapInt16; + state->buildBitmapFromRange = buildBitmapInt16FromRange; + int8_t initResult = embedDBInit(state, 1); if (initResult != 0) { TEST_FAIL_MESSAGE("There was an error setting up the state of the UWA dataset."); } - stateUWA->rules = NULL; - stateUWA->numRules = 0; + state->rules = NULL; + state->numRules = 0; int8_t colSizes[] = {4, 12}; int8_t colSignedness[] = {embedDB_COLUMN_UNSIGNED, embedDB_COLUMN_UNSIGNED}; @@ -91,12 +91,12 @@ void setUp() { } void tearDown() { - embedDBClose(stateUWA); - tearDownFile(stateUWA->dataFile); - tearDownFile(stateUWA->indexFile); - free(stateUWA->fileInterface); - free(stateUWA->buffer); - free(stateUWA); + embedDBClose(state); + tearDownFile(state->dataFile); + tearDownFile(state->indexFile); + free(state->fileInterface); + free(state->buffer); + free(state); embedDBFreeSchema(&baseSchema); } @@ -147,12 +147,22 @@ void insertNValues(embedDBState* state, int n, int mode) { } } +void debugBinData(embedDBOperator* scanTableOp, uint32_t numValues, uint32_t col) { + scanTableOp->init(scanTableOp); + int32_t* buffer = (int32_t*)scanTableOp->recordBuffer; + for (int i = 0; i < numValues; ++i) { + exec(scanTableOp); + printf("%u ", (uint32_t)buffer[col]); + } + fflush(stdout); +} + void runTestSequentialValues() { // Insert test data #ifdef ARDUINO - insertNValues(stateUWA, 1, 0); + insertNValues(state, 1, 0); #else - insertNValues(stateUWA, 10, 1); + insertNValues(state, 10, 1); #endif embedDBIterator it; @@ -160,12 +170,12 @@ void runTestSequentialValues() { it.maxKey = NULL; it.minData = NULL; it.maxData = NULL; - embedDBInitIterator(stateUWA, &it); + embedDBInitIterator(state, &it); - embedDBOperator* scanOpOrderBy = createTableScanOperator(stateUWA, &it, baseSchema); + embedDBOperator* scanOpOrderBy = createTableScanOperator(state, &it, baseSchema); uint8_t projColsOB[] = {0, 1}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); - embedDBOperator* orderByOp = createOrderByOperator(stateUWA, projColsOrderBy, 1, -1, int32Comparator); + embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); orderByOp->init(orderByOp); @@ -188,22 +198,23 @@ void runTestSequentialValues() { embedDBFreeOperatorRecursive(&orderByOp); } -void runTestUsingUWA500k() { +void runTestUsingSEA100k() { // Insert data - const char datafileName[] = "data/uwa500K.bin"; - insertData(stateUWA, datafileName); + const char datafileName[] = "data/sea100K.bin"; + insertData(state, datafileName); embedDBIterator it; it.minKey = NULL; it.maxKey = NULL; it.minData = NULL; it.maxData = NULL; - embedDBInitIterator(stateUWA, &it); + embedDBInitIterator(state, &it); - embedDBOperator* scanOpOrderBy = createTableScanOperator(stateUWA, &it, baseSchema); - uint8_t projColsOB[] = {0, 3}; + embedDBOperator* scanOpOrderBy = createTableScanOperator(state, &it, baseSchema); + debugBinData(scanOpOrderBy, 20, 1); + uint8_t projColsOB[] = { 0, 1 }; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); - embedDBOperator* orderByOp = createOrderByOperator(stateUWA, projColsOrderBy, 3, -1, int32Comparator); + embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); orderByOp->init(orderByOp); int32_t* recordBuffer = (int32_t*)orderByOp->recordBuffer; uint32_t previous = 0; @@ -221,7 +232,7 @@ void runTestUsingUWA500k() { int runUnityTests() { UNITY_BEGIN(); RUN_TEST(runTestSequentialValues); - RUN_TEST(runTestUsingUWA500k); + RUN_TEST(runTestUsingSEA100k); return UNITY_END(); } From 1c7e621341218644f4c62723c65b2ecd471a9243 Mon Sep 17 00:00:00 2001 From: Alex Argatoff Date: Thu, 8 Jan 2026 10:09:45 -0800 Subject: [PATCH 09/17] added debug print class and enabled debugging macros, on top of numerous attempts at finding the problem --- src/query-interface/sort/adaptive_sort.c | 173 +++++++++++-------- src/query-interface/sort/debug_print.c | 25 +++ src/query-interface/sort/debug_print.h | 8 + src/query-interface/sort/flash_minsort.c | 18 +- src/query-interface/sort/sortWrapper.c | 146 +++++++++++----- test/test_sort/test_sort_query_interface.cpp | 56 +++--- 6 files changed, 282 insertions(+), 144 deletions(-) create mode 100644 src/query-interface/sort/debug_print.c create mode 100644 src/query-interface/sort/debug_print.h diff --git a/src/query-interface/sort/adaptive_sort.c b/src/query-interface/sort/adaptive_sort.c index ed7d6ad1..22bc6dcd 100644 --- a/src/query-interface/sort/adaptive_sort.c +++ b/src/query-interface/sort/adaptive_sort.c @@ -50,10 +50,13 @@ #include "in_memory_sort.h" #include "no_output_heap.h" -// #define DEBUG 1 -// #define DEBUG_OUTPUT 1 -// #define DEBUG_READ 1 -// #define DEBUG_HEAP 0 +#include "debug_print.h" + + #define DEBUG 1 + #define DEBUG_OUTPUT 1 + #define DEBUG_READ 1 +#define DEBUG_HEAP 0 +#define ADAPTIVE_SORT_PRINT // #define ADAPTIVE_SORT_PRINT_FINISH @@ -67,22 +70,24 @@ void print_heap(char* buffer, int32_t heap_start_offset, int heap_size, int list int j; for (aa = 0; aa < 1; aa++) { addr = buffer + heap_start_offset; - printf("heap: "); + debug_log("heap: "); for (j = 0; j < heap_size; j++) - printf(" %d", *(int32_t*)(addr - j * es->record_size)); - printf("| "); + debug_log(" %d", *(int32_t*)(addr - j * es->record_size)); + debug_log("| "); } - printf(" "); + debug_log(" "); + // Prints the list for (aa = 0; aa < 1; aa++) { addr = buffer + es->page_size; - printf("list: "); + debug_log("list: "); for (j = 0; j < list_size; j++) - printf(" %d", *(int32_t*)(addr + j * es->record_size)); - printf("| "); + debug_log(" %d", *(int32_t*)(addr + j * es->record_size)); + debug_log("| "); } - printf("\n"); + debug_log("\n"); + } /** @@ -145,7 +150,7 @@ int adaptive_sort( if (optimistic) { // Do FLASH MinSort init first #ifdef DEBUG - printf("*Optimistic*\n"); + debug_log("*Optimistic*\n"); #endif MinSortState ms; @@ -161,18 +166,18 @@ int adaptive_sort( int32_t nobSortCost = numPasses * (10 + writeToReadRatio) / 10; #ifdef DEBUG - printf("Adaptive calculation.\n"); - printf("NOB sort cost. # runs: %d", numSublist); - printf(" # passes: %d cost: %d\n", numPasses, nobSortCost); - printf("MinSort cost. Num sublists: %d ", numSublist); - printf(" Avg. distinct/sublist: %d\n", avgDistinct / 10); + debug_log("Adaptive calculation.\n"); + debug_log("NOB sort cost. # runs: %d", numSublist); + debug_log(" # passes: %d cost: %d\n", numPasses, nobSortCost); + debug_log("MinSort cost. Num sublists: %d ", numSublist); + debug_log(" Avg. distinct/sublist: %d\n", avgDistinct / 10); #endif if (avgDistinct < nobSortCost) // if (true) { #ifdef DEBUG - printf("Performing MinSort Optimistic\n"); + debug_log("Performing MinSort Optimistic\n"); #endif int16_t count = 0; @@ -201,10 +206,11 @@ int adaptive_sort( metric->num_writes++; #ifdef DEBUG_OUTPUT - printf("Wrote output block. Block index: %d\n", blockIndex); + debug_log("Wrote output block. Block index: %d\n", blockIndex); for (int k = 0; k < values_per_page; k++) { - printf("%3d: 1 Output Record: %d\n", k, outputBuffer + es->headerSize + k * es->record_size + es->key_offset); + debug_log("%3d: 1 Output Record: %d\n", k, outputBuffer + es->headerSize + k * es->record_size + es->key_offset); } + #endif } } @@ -223,10 +229,11 @@ int adaptive_sort( metric->num_writes++; #ifdef DEBUG_OUTPUT - printf("Wrote output block. Block index: %d\n", blockIndex); + debug_log("Wrote output block. Block index: %d\n", blockIndex); for (int k = 0; k < values_per_page; k++) { - printf("%3d: 2 Output Record: %d\n", k, *(uint32_t*)(outputBuffer + es->headerSize + k * es->record_size + es->key_offset)); + debug_log("%3d: 2 Output Record: %d\n", k, *(uint32_t*)(outputBuffer + es->headerSize + k * es->record_size + es->key_offset)); } + #endif } @@ -299,6 +306,7 @@ int adaptive_sort( #ifdef DEBUG_HEAP print_heap(buffer, heapStartOffset, heapSize, listSize, es); + #endif if (recordsRead > 1) { @@ -323,7 +331,8 @@ int adaptive_sort( // Track number of distinct values per sublist avgDistinct = avgDistinct + (numDistinctInRun - avgDistinct / 10) * 10 / numSublist; #ifdef DEBUG - printf("Number of distinct values in sublist: %d Running average: %d\n", numDistinctInRun, avgDistinct / 10); + debug_log("Number of distinct values in sublist: %d Running average: %d\n", numDistinctInRun, avgDistinct / 10); + #endif numDistinctInRun = 1; @@ -340,8 +349,9 @@ int adaptive_sort( // Check if we've read all records from the current page if (recordsRead == 0) { // Check if there are any records left - if (recordsLeft <= 0) + if (recordsLeft <= 0) { break; + } // Just copy over from heap memcpy(buffer + es->headerSize + i * es->record_size, buffer + heapStartOffset, es->record_size); /* Heap into input/output block */ @@ -368,7 +378,8 @@ int adaptive_sort( // Track number of distinct values per sublist avgDistinct = avgDistinct + (numDistinctInRun - avgDistinct / 10) * 10 / numSublist; #ifdef DEBUG - printf("Number of distinct values in sublist: %d Running average: %d\n", numDistinctInRun, avgDistinct / 10); + debug_log("Number of distinct values in sublist: %d Running average: %d\n", numDistinctInRun, avgDistinct / 10); + #endif numDistinctInRun = 1; @@ -471,12 +482,13 @@ int adaptive_sort( } #ifdef DEBUG_OUTPUT - printf("Wrote block. Sublist: %d ", numSublist); - printf(" Idx: %d\n", sublistSize); - // printf("Offset: %lu\n", ftell(outputFile)-es->page_size); + debug_log("Wrote block. Sublist: %d ", numSublist); + debug_log(" Idx: %d\n", sublistSize); + //debug_log("Offset: %lu\n", ftell(outputFile) - es->page_size); for (int k = 0; k < tuplesPerPage; k++) { - printf("%3d: 3 Output Record: %d\n", k, *(uint32_t*)(buffer + es->headerSize + k * es->record_size + es->key_offset)); + debug_log("%3d: 3 Output Record: %d\n", k, *(uint32_t*)(buffer + es->headerSize + k * es->record_size + es->key_offset)); } + #endif metric->num_writes += 1; @@ -487,13 +499,15 @@ int adaptive_sort( // free(lastOutputKey); numSublist = metric->num_runs; #ifdef ADAPTIVE_SORT_PRINT - printf("Gen time: %d\n", metric->genTime); + debug_log("Gen time: %d\n", metric->genTime); + #endif // Track number of distinct values per sublist avgDistinct = avgDistinct + (numDistinctInRun - avgDistinct / 10) * 10 / numSublist; #ifdef ADAPTIVE_SORT_PRINT - printf("Final number of distinct values in sublist: %d Average: %d\n", numDistinctInRun, avgDistinct); + debug_log("Final number of distinct values in sublist: %d Average: %d\n", numDistinctInRun, avgDistinct); + #endif numDistinctInRun = 0; } /* end pessmistic */ @@ -523,11 +537,12 @@ int adaptive_sort( int32_t nobSortCost = numPasses * (10 + writeToReadRatio) / 10; #ifdef ADAPTIVE_SORT_PRINT - printf("Adaptive calculation.\n"); - printf("NOB sort cost. # runs: %d", numSublist); - printf(" # passes: %d cost: %d\n", numPasses, nobSortCost); - printf("MinSort cost. Num sublists: %d ", numSublist); - printf(" Avg. distinct/sublist: %d\n", avgDistinct / 10); + debug_log("Adaptive calculation.\n"); + debug_log("NOB sort cost. # runs: %d", numSublist); + debug_log(" # passes: %d cost: %d\n", numPasses, nobSortCost); + debug_log("MinSort cost. Num sublists: %d ", numSublist); + debug_log(" Avg. distinct/sublist: %d\n", avgDistinct / 10); + #endif // Make decision to use either no output buffer sort or MinSort @@ -540,7 +555,7 @@ int adaptive_sort( if (sublistVersionPossible) { // Use better performing version of minsort #ifdef ADAPTIVE_SORT_PRINT - printf("Performing MinSort with sorted sublists\n"); + debug_log("Performing MinSort with sorted sublists\n"); #endif ((file_iterator_state_t*)iteratorState)->file = outputFile; *resultFilePtr = 0; @@ -549,7 +564,7 @@ int adaptive_sort( } else { // Use normal version of minsort. Do not have enough space to index a value per sublist. Assumes data is not sorted in each region #ifdef ADAPTIVE_SORT_PRINT - printf("Performing MinSort\n"); + debug_log("Performing MinSort\n"); #endif ((file_iterator_state_t*)iteratorState)->file = outputFile; flash_minsort(iteratorState, tupleBuffer, outputFile, buffer, bufferSizeBytes, es, resultFilePtr, metric, compareFn); @@ -603,7 +618,7 @@ int adaptive_sort( // if (numSublist >= 32 && numSublist <= 64)// && avgDistinct/10 < 32) // { // // Switch to MinSort to finish off - // printf("Finishing sort with MinSort with sorted sublists\n"); + // debug_log("Finishing sort with MinSort with sorted sublists\n"); // ((file_iterator_state_t*) iteratorState)->file = outputFile; // // *resultFilePtr = lastMergeStart; // // fflush(outputFile); @@ -621,7 +636,8 @@ int adaptive_sort( lastWritePos = 0; } #ifdef ADAPTIVE_SORT_PRINT - printf("Pass number: %u Comparisons: %lu MemCopies: %lu TransferIn: %lu TransferOut: %lu TransferOther: %lu Other: %lu\n", passNumber, metric->num_compar, metric->num_memcpys, numShiftIntoOutput, numShiftOutOutput, numShiftOtherBlock, other); + debug_log("Pass number: %u Comparisons: %lu MemCopies: %lu TransferIn: %lu TransferOut: %lu TransferOther: %lu Other: %lu\n", passNumber, metric->num_compar, metric->num_memcpys, numShiftIntoOutput, numShiftOutOutput, numShiftOtherBlock, other); + #endif passNumber++; @@ -698,7 +714,8 @@ int adaptive_sort( #ifdef DEBUG void* buffer0Rec = (void*)buffer + es->headerSize; void* currentRec = (void*)buffer + i * es->page_size + es->headerSize; - printf("Swapping in buffer 0. Current key: %d New key: %d\n", *(uint32_t*)(buffer0Rec + es->key_offset), *(uint32_t*)(currentRec + es->key_offset)); + debug_log("Swapping in buffer 0. Current key: %d New key: %d\n", *(uint32_t*)(buffer0Rec + es->key_offset), *(uint32_t*)(currentRec + es->key_offset)); + #endif // Perform swap sublsBlkPos[i] = sublsFilePtr[0]; /* Note: Using subls_blk_pos[i] as a temp variable during swap */ // TODO: Update swap to not be variable length @@ -739,8 +756,9 @@ int adaptive_sort( #ifdef DEBUG_READ void* firstRec = (void*)buffer + i * es->page_size + es->headerSize; void* lastRec = (void*)buffer + i * es->page_size + es->headerSize + (*((int16_t*)(buffer + i * es->page_size + BLOCK_COUNT_OFFSET)) - 1) * es->record_size; - printf("Read Sublist: %d Block: %d NumRec: %d First key: %d Last key: %d\n", i, (int32_t) * (buffer + i * es->page_size), - *((int16_t*)(buffer + i * es->page_size + BLOCK_COUNT_OFFSET)), *(uint32_t*)(firstRec + es->key_offset), *(uint32_t*)(lastRec + es->key_offset)); + debug_log("Read Sublist: %d Block: %d NumRec: %d First key: %d Last key: %d\n", i, (int32_t) * (buffer + i * es->page_size), + *((int16_t*)(buffer + i * es->page_size + BLOCK_COUNT_OFFSET)), *(uint32_t*)(firstRec + es->key_offset), *(uint32_t*)(lastRec + es->key_offset)); + #endif // Initialize record1 to start of each block and record2 to empty record1[i] = i * es->page_size + es->headerSize; @@ -811,21 +829,22 @@ int adaptive_sort( #ifdef DEBUG void* buf = (void*)buffer + resultRecOffset; - printf("Smallest Record: %d From list: %d\n", *(uint32_t*)(buf + es->key_offset), resultBlock); - printf("List status: 0: (%d, %d) 1: (%d, %d) 2: (%d, %d) ResultList: %d\n", record1[0], record2[0], - record1[1], record2[1], record1[2], record2[2], resultBlock); + debug_log("Smallest Record: %d From list: %d\n", *(uint32_t*)(buf + es->key_offset), resultBlock); + debug_log("List status: 0: (%d, %d) 1: (%d, %d) 2: (%d, %d) ResultList: %d\n", record1[0], record2[0], + record1[1], record2[1], record1[2], record2[2], resultBlock); if (*(uint32_t*)(buf + es->key_offset) == 27391) { /* Output all block contents */ for (int l = 0; l < 2; l++) { - printf("Current block: %d # records: %d\n", l, tuplesPerPage); + debug_log("Current block: %d # records: %d\n", l, tuplesPerPage); for (int k = 0; k < tuplesPerPage; k++) { void* buf = (void*)(buffer + es->headerSize + k * es->record_size + l * es->page_size); - printf("%d: Record: %d Address: %p\n", k, buf + es->key_size, buf); + debug_log("%d: Record: %d Address: %p\n", k, buf + es->key_size, buf); } } - printf("HERE\n"); + debug_log("HERE\n"); } + #endif /* Add smallest tuple to output position in buffer (may already be in output buffer) */ @@ -846,7 +865,8 @@ int adaptive_sort( numShiftOutOutput++; #ifdef DEBUG void* buf = (void*)(buffer + record1[OUTPUT_BLOCK_ID]); - printf("Output record moved to list %d Key: %d\n", resultBlock, *(uint32_t*)(buf + es->key_size)); + debug_log("Output record moved to list %d Key: %d\n", resultBlock, *(uint32_t*)(buf + es->key_size)); + #endif /* Move result record into output block (record1[output_block]==record2[output_block]) */ metric->num_memcpys++; @@ -940,11 +960,12 @@ int adaptive_sort( record2[OUTPUT_BLOCK_ID] = -1; metric->num_writes++; #ifdef DEBUG_OUTPUT - printf("Wrote output block: %d # records: %d\n", *((int32_t*)buffer), tuplesPerPage); + debug_log("Wrote output block: %d # records: %d\n", *((int32_t*)buffer), tuplesPerPage); for (int k = 0; k < tuplesPerPage; k++) { void* buf = (void*)(buffer + es->headerSize + k * es->record_size); - printf("%3d: 4 Output Record: %d Address: %p\n", k, *(uint32_t*)(buf + es->key_offset), buf); + debug_log("%3d: 4 Output Record: %d Address: %p\n", k, *(uint32_t*)(buf + es->key_offset), buf); } + #endif } @@ -990,17 +1011,18 @@ int adaptive_sort( if (destBlk > bufferSizeInBlocks) { #ifdef ADAPTIVE_SORT_PRINT - printf("Incorrect destination block. List 1: (%d, %d) List 2: (%d, %d) List 3: (%d, %d) ResultList: %d\n", record1[0], record2[0], - record1[1], record2[1], record1[2], record2[2], resultBlock); + debug_log("Incorrect destination block. List 1: (%d, %d) List 2: (%d, %d) List 3: (%d, %d) ResultList: %d\n", record1[0], record2[0], + record1[1], record2[1], record1[2], record2[2], resultBlock); /* Output all block contents */ for (int l = 0; l < 3; l++) { - printf("Current block: %d # records: %d\n", l, tuplesPerPage); + debug_log("Current block: %d # records: %d\n", l, tuplesPerPage); for (int k = 0; k < tuplesPerPage; k++) { void* buf = (void*)(buffer + es->headerSize + k * es->record_size + l * es->page_size); - printf("%d: Record: %d Address: %p\n", k, buf + es->key_offset, buf); + debug_log("%d: Record: %d Address: %p\n", k, buf + es->key_offset, buf); } } + #endif } } @@ -1018,7 +1040,8 @@ int adaptive_sort( for (i = 0; i < numTransferThisPass; i++) { #ifdef DEBUG void* buf = (void*)(buffer + originPtr); - printf("Empty output block case. Moved output record back from list %d Key: %d\n", resultBlock, *(uint32_t*)(buf + es->key_offset)); + debug_log("Empty output block case. Moved output record back from list %d Key: %d\n", resultBlock, *(uint32_t*)(buf + es->key_offset)); + #endif numShiftIntoOutput++; /* Get top value from heap */ @@ -1039,7 +1062,8 @@ int adaptive_sort( record1[destBlk] = record1[destBlk] - es->record_size; #ifdef DEBUG void* buf = (void*)(buffer + originPtr); - printf("Moved output record back from list %d Key: %d\n", resultBlock, buf + es->key_offset); + debug_log("Moved output record back from list %d Key: %d\n", resultBlock, buf + es->key_offset); + #endif numShiftIntoOutput++; @@ -1049,7 +1073,8 @@ int adaptive_sort( metric->num_compar++; #ifdef DEBUG void* buf = (void*)(buffer + insert_ptr + es->record_size); - printf("Compare with list %d Key: %d\n", resultBlock, buf + es->key_offset); + debug_log("Compare with list %d Key: %d\n", resultBlock, buf + es->key_offset); + #endif if (0 < es->compare_fcn(buffer + originPtr + es->key_offset, buffer + insert_ptr + es->record_size + es->key_offset)) { /* shift next_val down */ @@ -1076,7 +1101,8 @@ int adaptive_sort( #ifdef DEBUG void* buf = (void*)(buffer + originPtr); - printf("Moved output record to list %d Key: %d\n", destBlk, buf + es->key_offset); + debug_log("Moved output record to list %d Key: %d\n", destBlk, buf + es->key_offset); + #endif numShiftOtherBlock++; @@ -1113,11 +1139,12 @@ int adaptive_sort( record2[resultBlock] = -1; record1[resultBlock] = resultBlock * es->page_size + es->headerSize; #ifdef DEBUG_READ - printf("Read block sublist: %d\n", resultBlock); + debug_log("Read block sublist: %d\n", resultBlock); void* firstRec = (void*)buffer + resultBlock * es->page_size + es->headerSize; void* lastRec = (void*)buffer + resultBlock * es->page_size + es->headerSize + (*((int16_t*)(buffer + resultBlock * es->page_size + BLOCK_COUNT_OFFSET)) - 1) * es->record_size; - printf("Read Sublist: %d Block: %d NumRec: %d First key: %d Last key: %d\n", resultBlock, (int32_t) * (buffer + resultBlock * es->page_size), - *((int16_t*)(buffer + resultBlock * es->page_size + BLOCK_COUNT_OFFSET)), firstRec + es->key_offset, lastRec + es->key_offset); + debug_log("Read Sublist: %d Block: %d NumRec: %d First key: %d Last key: %d\n", resultBlock, (int32_t) * (buffer + resultBlock * es->page_size), + *((int16_t*)(buffer + resultBlock * es->page_size + BLOCK_COUNT_OFFSET)), firstRec + es->key_offset, lastRec + es->key_offset); + #endif } } /* end if is the non output block empty */ @@ -1184,7 +1211,8 @@ int adaptive_sort( /* move the record */ #ifdef DEBUG void* buf = (void*)(buffer + outputCursor); - printf("Output list empty so moved record in output to list %d Key: %d\n", destBlk, *(uint32_t*)(buf + es->key_offset)); + debug_log("Output list empty so moved record in output to list %d Key: %d\n", destBlk, *(uint32_t*)(buf + es->key_offset)); + #endif numShiftOutOutput++; metric->num_memcpys++; @@ -1215,11 +1243,12 @@ int adaptive_sort( int16_t numRecords = *((int16_t*)(buffer + BLOCK_COUNT_OFFSET)); #ifdef DEBUG_READ - printf("Read block sublist: 0\n"); + debug_log("Read block sublist: 0\n"); void* firstRec = (void*)buffer + es->headerSize; void* lastRec = (void*)buffer + es->headerSize + (*((int16_t*)(buffer + BLOCK_COUNT_OFFSET)) - 1) * es->record_size; - printf("Read Sublist: %d Block: %d NumRec: %d First key: %d Last key: %d\n", 0, (int32_t) * (buffer + 0 * es->page_size), - *((int16_t*)(buffer + BLOCK_COUNT_OFFSET)), firstRec + es->key_offset, lastRec + es->key_offset); + debug_log("Read Sublist: %d Block: %d NumRec: %d First key: %d Last key: %d\n", 0, (int32_t) * (buffer + 0 * es->page_size), + *((int16_t*)(buffer + BLOCK_COUNT_OFFSET)), firstRec + es->key_offset, lastRec + es->key_offset); + #endif metric->num_reads += 1; @@ -1304,11 +1333,12 @@ int adaptive_sort( metric->num_writes += 1; #ifdef DEBUG_OUTPUT - printf("Wrote output block here.\n"); + debug_log("Wrote output block here.\n"); for (int k = 0; k < tuplesPerPage; k++) { void* buf = (void*)(buffer + es->headerSize + k * es->record_size); - printf("%3d: 5 Output Record: %d Address: %p\n", k, *(uint32_t*)(buf + es->key_offset), buf); // TODO: Update to no use test_record_t + debug_log("%3d: 5 Output Record: %d Address: %p\n", k, *(uint32_t*)(buf + es->key_offset), buf); // TODO: Update to no use test_record_t } + #endif } @@ -1320,7 +1350,8 @@ int adaptive_sort( } /* end of merge */ *resultFilePtr = lastMergeStart; #ifdef ADAPTIVE_SORT_PRINT_FINISH - printf("Complete. Comparisons: %u Writes: %u Reads: %u Memcpys:\n", metric->num_compar, metric->num_writes, metric->num_reads, metric->num_memcpys); + debug_log("Complete. Comparisons: %u Writes: %u Reads: %u Memcpys:\n", metric->num_compar, metric->num_writes, metric->num_reads, metric->num_memcpys); + #endif /* cleanup */ diff --git a/src/query-interface/sort/debug_print.c b/src/query-interface/sort/debug_print.c new file mode 100644 index 00000000..1b37383f --- /dev/null +++ b/src/query-interface/sort/debug_print.c @@ -0,0 +1,25 @@ +#include "debug_print.h" + +#include +#include +#include + +#if defined(_WIN32) || defined(_WIN64) +#include +#define write _write +#else +#include +#endif + +void debug_log(const char *format, ...) { + char buf[256]; + va_list ap; + va_start(ap, format); + int n = vsnprintf(buf, sizeof(buf), format, ap); + va_end(ap); + if (n > 0) { + if (n > (int)sizeof(buf)) n = sizeof(buf); + /* Use low-level write to avoid stdio buffering/locks that can block under some debuggers/targets */ + (void)write(2, buf, n); + } +} diff --git a/src/query-interface/sort/debug_print.h b/src/query-interface/sort/debug_print.h new file mode 100644 index 00000000..5c8f739d --- /dev/null +++ b/src/query-interface/sort/debug_print.h @@ -0,0 +1,8 @@ +#ifndef DEBUG_PRINT_H_ +#define DEBUG_PRINT_H_ + +#include + +void debug_log(const char *format, ...); + +#endif // DEBUG_PRINT_H_ diff --git a/src/query-interface/sort/flash_minsort.c b/src/query-interface/sort/flash_minsort.c index 65e05156..737c6e44 100644 --- a/src/query-interface/sort/flash_minsort.c +++ b/src/query-interface/sort/flash_minsort.c @@ -49,6 +49,8 @@ This is no output sort with block headers and iterator input. Heap used when mov #include "in_memory_sort.h" +#include "debug_print.h" + // #define DEBUG 1 // #define DEBUG_OUTPUT 1 // #define DEBUG_READ 1 @@ -71,7 +73,7 @@ void readPageMinSort(MinSortState *ms, int pageNum, external_sort_t *es, metrics // Read page into the buffer if (0 == is->fileInterface->read(ms->buffer, pageNum, es->page_size, fp)) { #ifdef DEBUG - printf("MINSORT: Failed to read block.\n"); + debug_log("MINSORT: Failed to read block.\n"); #endif } @@ -149,7 +151,7 @@ void init_MinSort(MinSortState *ms, external_sort_t *es, metrics_t *metric, int8 ms->min_initialized = (int8_t *)(ms->min + es->key_size * ms->numRegions); #ifdef DEBUG - printf("Memory overhead: %d Max regions: %d\r\n", 2 * SORT_KEY_SIZE + INT_SIZE, j); + // printf("Memory overhead: %d Max regions: %d\r\n", 2 * SORT_KEY_SIZE + INT_SIZE, j); printf("Page size: %d, Memory size: %d Record size: %d, Number of records: %lu, Number of blocks: %d, Blocks per region: %d Regions: %d\r\n", es->page_size, ms->memoryAvailable, ms->record_size, ms->num_records, ms->numBlocks, ms->blocks_per_region, ms->numRegions); #endif @@ -275,9 +277,9 @@ char *next_MinSort(MinSortState *ms, external_sort_t *es, void *tupleBuffer, met memcpy(tupleBuffer, &(ms->buffer[ms->record_size * i + es->headerSize]), ms->record_size); metric->num_memcpys++; #ifdef DEBUG - test_record_t *buf = (test_record_t *)(ms->buffer + es->headerSize + i * es->record_size); - buf = (test_record_t *)tupleBuffer; - printf("Returning tuple: %d\n", buf->key); + test_record_t *buf = (test_record_t *)(ms->buffer + es->headerSize + i * es->record_size); + buf = (test_record_t *)tupleBuffer; + debug_log("Returning tuple: %d\n", buf->key); #endif i++; // Move to the next record ms->tuplesOut++; @@ -297,7 +299,7 @@ char *next_MinSort(MinSortState *ms, external_sort_t *es, void *tupleBuffer, met done: #ifdef DEBUG - printf("Updating minimum in region\r\n"); + debug_log("Updating minimum in region\r\n"); #endif // After processing the current block, scan the rest of the region to find a smaller record if possible @@ -329,7 +331,7 @@ char *next_MinSort(MinSortState *ms, external_sort_t *es, void *tupleBuffer, met if (compareFn(dataVal, ms->current) == 0) { ms->nextIdx = k * ms->records_per_block + i; #ifdef DEBUG - printf("Next tuple at: %d k: %d i: %d\r\n", ms->nextIdx, k, i); + debug_log("Next tuple at: %d k: %d i: %d\r\n", ms->nextIdx, k, i); #endif goto done2; } @@ -359,7 +361,7 @@ char *next_MinSort(MinSortState *ms, external_sort_t *es, void *tupleBuffer, met } #ifdef DEBUG - printf("Updated minimum in block to: %d\r\n", ms->min[ms->regionIdx]); + debug_log("Updated minimum in block to: %d\r\n", ms->min[ms->regionIdx]); #endif } diff --git a/src/query-interface/sort/sortWrapper.c b/src/query-interface/sort/sortWrapper.c index a754a347..f9dc2326 100644 --- a/src/query-interface/sort/sortWrapper.c +++ b/src/query-interface/sort/sortWrapper.c @@ -1,9 +1,13 @@ #include "sortWrapper.h" +#include "debug_print.h" #include "query-interface/sort/in_memory_sort.h" #include "unistd.h" + #define PRINT_METRIC +#define DEBUG +#define PRINT_ERRORS /** * @brief Pure in-memory sort that avoids file I/O completely for very small datasets @@ -13,26 +17,26 @@ */ file_iterator_state_t *startPureMemorySort(sortData *data, embedDBOperator *op) { #ifdef DEBUG - printf("DEBUG: Starting pure in-memory sort\n"); + debug_log("DEBUG: Starting pure in-memory sort\n"); #endif int record_count = 0; while (exec(op->input)) { record_count++; if (record_count > 10) { // Safety limit #ifdef PRINT_ERRORS - printf("ERROR: Too many records for pure in-memory sort\n"); + debug_log("ERROR: Too many records for pure in-memory sort\n"); #endif return NULL; } } #ifdef DEBUG - printf("DEBUG: Found %d records for pure in-memory sort\n", record_count); + debug_log("DEBUG: Found %d records for pure in-memory sort\n", record_count); #endif if (record_count == 0) { #ifdef DEBUG - printf("DEBUG: No records to sort\n"); + debug_log("DEBUG: No records to sort\n"); #endif file_iterator_state_t *iteratorState = malloc(sizeof(file_iterator_state_t)); if (iteratorState == NULL) { @@ -52,7 +56,7 @@ file_iterator_state_t *startPureMemorySort(sortData *data, embedDBOperator *op) void *buffer = malloc(record_count * data->recordSize); if (buffer == NULL) { #ifdef PRINT_ERRORS - printf("ERROR: Failed to allocate memory for pure in-memory sort\n"); + debug_log("ERROR: Failed to allocate memory for pure in-memory sort\n"); #endif return NULL; } @@ -69,7 +73,7 @@ file_iterator_state_t *startPureMemorySort(sortData *data, embedDBOperator *op) } #ifdef DEBUG - printf("DEBUG: Read %d records into memory buffer\n", records_read); + debug_log("DEBUG: Read %d records into memory buffer\n", records_read); #endif // Sort the records in memory using quicksort @@ -78,20 +82,20 @@ file_iterator_state_t *startPureMemorySort(sortData *data, embedDBOperator *op) if (sort_result != 0) { #ifdef PRINT_ERRORS - printf("ERROR: In-memory sort failed\n"); + debug_log("ERROR: In-memory sort failed\n"); #endif free(buffer); return NULL; } #ifdef DEBUG - printf("DEBUG: Pure in-memory sort completed successfully\n"); + debug_log("DEBUG: Pure in-memory sort completed successfully\n"); #endif file_iterator_state_t *iteratorState = malloc(sizeof(file_iterator_state_t)); if (iteratorState == NULL) { #ifdef PRINT_ERRORS - printf("ERROR: Failed to allocate iterator state\n"); + debug_log("ERROR: Failed to allocate iterator state\n"); #endif free(buffer); return NULL; @@ -128,7 +132,7 @@ int8_t writePageWithHeader(void *buffer, const uint32_t blockIndex, const uint32 if (fileInterface->error(file)) { #ifdef PRINT_ERRORS - printf("ERROR: SORT: Failed to write unsorted data"); + debug_log("ERROR: SORT: Failed to write unsorted data"); #endif return 1; } @@ -153,17 +157,23 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { int32_t blockIndex = 0; int16_t valuesPerPage = (PAGE_SIZE - BLOCK_HEADER_SIZE) / data->recordSize; +#ifdef DEBUG + debug_log("DEBUG loadRowData: PAGE_SIZE=%d, BLOCK_HEADER_SIZE=%d, recordSize=%d, valuesPerPage=%d\n", + PAGE_SIZE, BLOCK_HEADER_SIZE, data->recordSize, valuesPerPage); +#endif + void *buffer = malloc(PAGE_SIZE); if (buffer == NULL) { #ifdef PRINT_ERRORS - printf("ERROR: SORT: buffer malloc failed"); + debug_log("ERROR: SORT: buffer malloc failed"); #endif return 1; } // Write row data to file while (exec(op->input)) { + // Write page to file when full if (count % valuesPerPage == 0 && count != 0) { if (writePageWithHeader(buffer, blockIndex, valuesPerPage, PAGE_SIZE, data->fileInterface, unsortedFile)) { @@ -173,14 +183,16 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { } blockIndex++; + + memset(buffer, 0, PAGE_SIZE); } // Offset of the data in the page - uint32_t rowOffset = count % valuesPerPage * data->recordSize + BLOCK_HEADER_SIZE; + uint32_t rowOffset = (count % valuesPerPage) * data->recordSize + BLOCK_HEADER_SIZE; if (rowOffset + data->recordSize > PAGE_SIZE) { #ifdef PRINT_ERRORS - printf("ERROR: SORT: error calculating row offset"); + debug_log("ERROR: SORT: error calculating row offset"); #endif free(buffer); buffer = NULL; @@ -190,6 +202,13 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { // Write data to buffer memcpy((uint8_t *)buffer + rowOffset, op->input->recordBuffer, data->recordSize); +#ifdef DEBUG + if (count < 100 || count % 1000 == 0) { + int32_t *keyPtr = (int32_t *)(op->input->recordBuffer + data->keyOffset); + debug_log("DEBUG loadRowData: count=%d, rowOffset=%d, key=%d\n", count, rowOffset, *keyPtr); + } +#endif + count++; // temp limit for debugging @@ -207,6 +226,10 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { data->fileInterface->flush(unsortedFile); +#ifdef DEBUG + debug_log("DEBUG loadRowData: finished, totalRecords=%d\n", count); +#endif + // Clean up free(buffer); buffer = NULL; @@ -234,13 +257,13 @@ void prepareSort(embedDBOperator *op) { #ifdef ARDUINO data->fileIterator = startPureMemorySort(data, op); if (data->fileIterator == NULL) { - printf("ERROR: Pure memory sort failed\n"); + debug_log("ERROR: Pure memory sort failed\n"); return; #else if (data->fileInterface == NULL || data->fileInterface->setup == NULL) { #ifdef PRINT_ERRORS - printf("ERROR: File interface or setup function not provided while initializing ORDER BY operator\n"); + debug_log("ERROR: File interface or setup function not provided while initializing ORDER BY operator\n"); #endif return; } @@ -255,7 +278,7 @@ void prepareSort(embedDBOperator *op) { if (unsortedFile == NULL || sortedFile == NULL) { #ifdef PRINT_ERRORS - printf("ERROR: Failed to allocate file handles while initializing ORDER BY operator\n"); + debug_log("ERROR: Failed to allocate file handles while initializing ORDER BY operator\n"); #endif return; } @@ -265,7 +288,7 @@ void prepareSort(embedDBOperator *op) { if (!unsortedOpen || !sortedOpen) { #ifdef PRINT_ERRORS - printf("ERROR: Failed to open files while initializing ORDER BY operator"); + debug_log("ERROR: Failed to open files while initializing ORDER BY operator"); #endif return; } @@ -277,7 +300,7 @@ void prepareSort(embedDBOperator *op) { file_iterator_state_t *iteratorState = startSort(data, unsortedFile, sortedFile); if (iteratorState == NULL) { #ifdef PRINT_ERRORS - printf("ERROR: Sort failed"); + debug_log("ERROR: Sort failed"); #endif return; } @@ -325,7 +348,7 @@ void prepareSort(embedDBOperator *op) { if (buffer == NULL) { #ifdef PRINT_ERRORS - printf("ERROR: SORT: buffer malloc failed m\n"); + debug_log("ERROR: SORT: buffer malloc failed m\n"); #endif return NULL; } @@ -334,7 +357,7 @@ void prepareSort(embedDBOperator *op) { file_iterator_state_t *iteratorState = malloc(sizeof(file_iterator_state_t)); if (iteratorState == NULL) { #ifdef PRINT_ERRORS - printf("Error: SORT: iterator malloc failed\n"); + debug_log("Error: SORT: iterator malloc failed\n"); #endif free(buffer); buffer = NULL; @@ -361,22 +384,22 @@ void prepareSort(embedDBOperator *op) { // Use simpler sort for Arduino with small datasets #ifdef ARDUINO #ifdef DEBUG - printf("DEBUG: Starting Arduino sort with %d records\n", data->count); + debug_log("DEBUG: Starting Arduino sort with %d records\n", data->count); #endif if (data->count <= 100) { // Use flash_minsort for all datasets on Arduino (more memory efficient) #ifdef DEBUG - printf("DEBUG: Using flash_minsort for small dataset\n"); + debug_log("DEBUG: Using flash_minsort for small dataset\n"); #endif err = flash_minsort(iteratorState, tuple_buffer, sortedFile, buffer, buffer_max_pages * es.page_size, &es, &result_file_ptr, &metrics, data->compareFn); } else { #ifdef DEBUG - printf("DEBUG: Using flash_minsort for large dataset\n"); + debug_log("DEBUG: Using flash_minsort for large dataset\n"); #endif // Use flash_minsort for larger datasets (more memory efficient than adaptive_sort) err = flash_minsort(iteratorState, tuple_buffer, sortedFile, buffer, buffer_max_pages * es.page_size, &es, &result_file_ptr, &metrics, data->compareFn); } #ifdef DEBUG - printf("DEBUG: Arduino sort completed with error code: %d\n", err); + debug_log("DEBUG: Arduino sort completed with error code: %d\n", err); #endif #else // Use adaptive sort on desktop @@ -386,18 +409,18 @@ void prepareSort(embedDBOperator *op) { #endif #ifdef PRINT_METRIC - printf("\tComplete. Comparisons: %d Writes: %d Reads: %d Memcpys: %d\n", metrics.num_compar, metrics.num_writes, metrics.num_reads, metrics.num_memcpys); + debug_log("\tComplete. Comparisons: %d Writes: %d Reads: %d Memcpys: %d\n", metrics.num_compar, metrics.num_writes, metrics.num_reads, metrics.num_memcpys); #endif iteratorState->resultFile = result_file_ptr; #ifdef PRINT_ERRORS if (8 == err) { - printf("Out of memory!\n"); + debug_log("Out of memory!\n"); } else if (10 == err) { - printf("File Read Error!\n"); + debug_log("File Read Error!\n"); } else if (9 == err) { - printf("File Write Error!\n"); + debug_log("File Write Error!\n"); } #endif @@ -440,31 +463,76 @@ void prepareSort(embedDBOperator *op) { // Read next page if current buffer is empty if (iteratorState->currentRecord % recordPerPage == 0 || iteratorState->recordsRead == 0) { - uint32_t pageOffset = (iteratorState->currentRecord / recordPerPage) * PAGE_SIZE; - iteratorState->fileInterface->seek(pageOffset, iteratorState->file); + uint32_t seekOffset = iteratorState->resultFile + (iteratorState->currentRecord / recordPerPage) * PAGE_SIZE; + memset(((sortData *)data)->readBuffer, 0, PAGE_SIZE); + + iteratorState->fileInterface->seek(seekOffset, iteratorState->file); iteratorState->fileInterface->readRel(((sortData *)data)->readBuffer, PAGE_SIZE, 1, iteratorState->file); +#ifdef DEBUG + if (iteratorState->recordsRead == 0 || iteratorState->recordsRead % 1000 == 0) { + debug_log("DEBUG readNextRecord: pageNum=%d, seekOffset=%d, recordsRead=%d\n", + iteratorState->currentRecord / recordPerPage, seekOffset, iteratorState->recordsRead); + } +#endif + if (((sortData *)data)->fileInterface->error(iteratorState->file)) { #ifdef PRINT_ERRORS - printf("ERROR: SORT: next record read failed"); + debug_log("ERROR: SORT: next record read failed"); #endif return 2; } } - // Copy result to ouput buffer - memcpy(buffer, ((sortData *)data)->readBuffer + BLOCK_HEADER_SIZE + iteratorState->recordSize * (iteratorState->currentRecord % recordPerPage), iteratorState->recordSize); - iteratorState->recordsRead++; - iteratorState->currentRecord++; + // Copy result to output buffer + uint16_t valuesInPage; + memcpy(&valuesInPage, ((sortData *)data)->readBuffer + sizeof(uint32_t), + sizeof(uint16_t)); + uint32_t recordIndexInPage = iteratorState->currentRecord % recordPerPage; +#ifdef DEBUG + uint32_t blockIdx; + memcpy(&blockIdx, ((sortData *)data)->readBuffer, sizeof(uint32_t)); + debug_log("READ PAGE hdr: blockIdx=%u values=%u\n", + blockIdx, valuesInPage); +#endif + + if (recordIndexInPage >= valuesInPage) { + return 1; + } + uint32_t copyOffset = BLOCK_HEADER_SIZE + iteratorState->recordSize * recordIndexInPage; + memcpy(buffer, ((sortData *)data)->readBuffer + copyOffset, iteratorState->recordSize); #ifdef DEBUG - printf("DEBUG: ROWDATA from file:\n"); - for (int i = 0; i < iteratorState->recordSize - SORT_KEY_SIZE; i++) { - printf("%2x ", ((uint8_t *)buffer)[i]); + if (iteratorState->recordsRead < 100 || iteratorState->recordsRead % 1000 == 0) { + int32_t *keyPtr = (int32_t *)(buffer + ((sortData *)data)->keyOffset); + debug_log("DEBUG readNextRecord: recordsRead=%d, currentRecord=%d, pageIdx=%d, recordInPage=%d, copyOffset=%d, key=%d\n", + iteratorState->recordsRead, iteratorState->currentRecord, iteratorState->currentRecord / recordPerPage, + recordIndexInPage, copyOffset, *keyPtr); } - printf("\n"); + memcpy(&valuesInPage, ((sortData *)data)->readBuffer + sizeof(uint32_t), sizeof(uint16_t)); + + debug_log("PAGE HEADER: page=%d values=%d\n", + iteratorState->currentRecord / recordPerPage, + valuesInPage); + + debug_log("RESULT FILE BASE OFFSET = %u\n", iteratorState->resultFile); + #endif + iteratorState->recordsRead++; + iteratorState->currentRecord++; + + // #ifdef DEBUG + // printf("DEBUG: ROWDATA from file:\n"); + // for (int i = 0; i < iteratorState->recordSize - SORT_KEY_SIZE; i++) { + // printf("%2x ", ((uint8_t *)buffer)[i]); + // } + // printf("\n"); + // #endif +#ifdef DEBUG + debug_log("DEBUG readNextRecord: recordIndexInPage=%d, valuesInPage=%d, returning=%d\n", + recordIndexInPage, valuesInPage, (recordIndexInPage >= valuesInPage) ? 1 : 0); +#endif return 0; } diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index 4af03e92..806f4c29 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -43,6 +43,8 @@ #include "unity.h" +#define DEBUG + embedDBState* state; embedDBSchema* baseSchema; @@ -59,7 +61,7 @@ void setUp() { state->eraseSizeInPages = 4; state->numDataPages = 20000; state->numIndexPages = 1000; - state->numSplinePoints = 30; + state->numSplinePoints = 120; /* Setup files */ char dataPath[] = DATA_FILE_PATH_UWA, indexPath[] = INDEX_FILE_PATH_UWA; state->fileInterface = getFileInterface(); @@ -84,10 +86,10 @@ void setUp() { state->rules = NULL; state->numRules = 0; - int8_t colSizes[] = {4, 12}; - int8_t colSignedness[] = {embedDB_COLUMN_UNSIGNED, embedDB_COLUMN_UNSIGNED}; - ColumnType colTypes[] = {embedDB_COLUMN_UINT32, embedDB_COLUMN_UINT32}; - baseSchema = embedDBCreateSchema(2, colSizes, colSignedness, colTypes); + int8_t colSizes[] = {4, 4, 4, 4}; + int8_t colSignedness[] = {embedDB_COLUMN_UNSIGNED, embedDB_COLUMN_SIGNED, embedDB_COLUMN_SIGNED, embedDB_COLUMN_SIGNED}; + ColumnType colTypes[] = {embedDB_COLUMN_UINT32, embedDB_COLUMN_INT32, embedDB_COLUMN_INT32, embedDB_COLUMN_INT32}; + baseSchema = embedDBCreateSchema(4, colSizes, colSignedness, colTypes); } void tearDown() { @@ -135,7 +137,7 @@ void insertNValues(embedDBState* state, int n, int mode) { embedDBPut(state, &key, &value); key++; } - for (int i = 0, data = 10; i <= n; i++) { + for (int i = 0, data = n; i <= n; i++) { key = i + 1; embedDBGet(state, (void*)&key, (void*)&value); TEST_ASSERT_MESSAGE(value == data, "value isn't equal to extracted data"); @@ -147,13 +149,15 @@ void insertNValues(embedDBState* state, int n, int mode) { } } -void debugBinData(embedDBOperator* scanTableOp, uint32_t numValues, uint32_t col) { - scanTableOp->init(scanTableOp); - int32_t* buffer = (int32_t*)scanTableOp->recordBuffer; - for (int i = 0; i < numValues; ++i) { - exec(scanTableOp); - printf("%u ", (uint32_t)buffer[col]); +void debugBinData(embedDBOperator* op, uint32_t numValues, uint8_t col) { + op->init(op); + int32_t* buffer = (int32_t*)op->recordBuffer; + printf("\n"); + for (int i = 0; i <= numValues; ++i) { + exec(op); + printf("%i ", (int32_t)buffer[col]); } + printf("\n"); fflush(stdout); } @@ -162,9 +166,9 @@ void runTestSequentialValues() { #ifdef ARDUINO insertNValues(state, 1, 0); #else - insertNValues(state, 10, 1); + insertNValues(state, 67, 1); #endif - + embedDBIterator it; it.minKey = NULL; it.maxKey = NULL; @@ -176,6 +180,7 @@ void runTestSequentialValues() { uint8_t projColsOB[] = {0, 1}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); + //debugBinData(orderByOp, 67, 1); orderByOp->init(orderByOp); @@ -184,14 +189,11 @@ void runTestSequentialValues() { int recordCount = 0; while (exec(orderByOp)) { - TEST_ASSERT_GREATER_OR_EQUAL_UINT32_MESSAGE(previous, ((uint32_t)recordBuffer[1]), "Sort value is not greater than or equal to previous value."); - previous = ((uint32_t)recordBuffer[1]); + TEST_ASSERT_GREATER_OR_EQUAL_INT32_MESSAGE(previous, ((int32_t)recordBuffer[1]), "Sort value is not greater than or equal to previous value."); + previous = ((int32_t)recordBuffer[1]); recordCount++; - - // Safety break to prevent infinite loop - if (recordCount >= 10) { - break; - } + printf("%d ", previous); + fflush(stdout); } orderByOp->close(orderByOp); @@ -211,18 +213,20 @@ void runTestUsingSEA100k() { embedDBInitIterator(state, &it); embedDBOperator* scanOpOrderBy = createTableScanOperator(state, &it, baseSchema); - debugBinData(scanOpOrderBy, 20, 1); - uint8_t projColsOB[] = { 0, 1 }; + //debugBinData(scanOpOrderBy, 20, 1); + uint8_t projColsOB[] = {0, 1}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); + //debugBinData(projColsOrderBy, 100000, 1); embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); orderByOp->init(orderByOp); + //debugBinData(orderByOp, 100000, 1); int32_t* recordBuffer = (int32_t*)orderByOp->recordBuffer; uint32_t previous = 0; // Result of the sort - + uint32_t count = 1; while (exec(orderByOp)) { - TEST_ASSERT_GREATER_OR_EQUAL_UINT32_MESSAGE(previous, ((uint32_t)recordBuffer[1]) / 10.0, "Sort value is not greater than or equal to previous value previous values."); - previous = ((uint32_t)recordBuffer[1]) / 10.0; + TEST_ASSERT_GREATER_OR_EQUAL_INT32_MESSAGE(previous, ((int32_t)recordBuffer[1]) / 10.0, "Sort value is not greater than or equal to previous value previous values."); + previous = ((int32_t)recordBuffer[1]) / 10.0; } orderByOp->close(orderByOp); From b44b5acde4a0c4db86468244c1955c71e10f2adc Mon Sep 17 00:00:00 2001 From: Alex Argatoff Date: Tue, 13 Jan 2026 09:57:58 -0800 Subject: [PATCH 10/17] flash sort sublist fixed, moved debug print to lib, and various changes to tests and debug statements to try get flash minsort working --- .../sort => lib/Debug}/debug_print.c | 0 .../sort => lib/Debug}/debug_print.h | 0 src/query-interface/sort/adaptive_sort.c | 44 +++++++--- src/query-interface/sort/flash_minsort.c | 56 +++++++----- .../sort/flash_minsort_sublist.c | 61 +++++++------ src/query-interface/sort/sortWrapper.c | 88 +++++++++++-------- test/test_sort/test_sort_query_interface.cpp | 15 ++-- 7 files changed, 160 insertions(+), 104 deletions(-) rename {src/query-interface/sort => lib/Debug}/debug_print.c (100%) rename {src/query-interface/sort => lib/Debug}/debug_print.h (100%) diff --git a/src/query-interface/sort/debug_print.c b/lib/Debug/debug_print.c similarity index 100% rename from src/query-interface/sort/debug_print.c rename to lib/Debug/debug_print.c diff --git a/src/query-interface/sort/debug_print.h b/lib/Debug/debug_print.h similarity index 100% rename from src/query-interface/sort/debug_print.h rename to lib/Debug/debug_print.h diff --git a/src/query-interface/sort/adaptive_sort.c b/src/query-interface/sort/adaptive_sort.c index 22bc6dcd..956943e5 100644 --- a/src/query-interface/sort/adaptive_sort.c +++ b/src/query-interface/sort/adaptive_sort.c @@ -52,11 +52,11 @@ #include "debug_print.h" - #define DEBUG 1 - #define DEBUG_OUTPUT 1 - #define DEBUG_READ 1 -#define DEBUG_HEAP 0 -#define ADAPTIVE_SORT_PRINT +// #define DEBUG 1 +// #define DEBUG_OUTPUT 1 +// #define DEBUG_READ 1 +// #define DEBUG_HEAP 0 +// #define ADAPTIVE_SORT_PRINT // #define ADAPTIVE_SORT_PRINT_FINISH @@ -195,7 +195,10 @@ int adaptive_sort( if (count == values_per_page) { *((int32_t*)outputBuffer) = blockIndex; /* Block index */ *((int16_t*)(outputBuffer + BLOCK_COUNT_OFFSET)) = count; /* Block record count */ - +#ifdef DEBUG + debug_log("Writing page adaptive sort: blockIndex=%d, count=%d, filePosition=%ld\n", + blockIndex, count, lastWritePos / PAGE_SIZE); +#endif // Write block to the ouput file if (0 == ((file_iterator_state_t*)iteratorState)->fileInterface->write(outputBuffer, blockIndex, es->page_size, outputFile)) { return 9; // Return error code if writing to the output file fails @@ -219,7 +222,10 @@ int adaptive_sort( if (count > 0) { *((int32_t*)outputBuffer) = blockIndex; /* Block index */ *((int16_t*)(outputBuffer + BLOCK_COUNT_OFFSET)) = count; /* Block record count */ - +#ifdef DEBUG + debug_log("Writing last page adaptive: blockIndex=%d, count=%d, filePosition=%ld\n", + blockIndex, count, lastWritePos / PAGE_SIZE); +#endif if (0 == ((file_iterator_state_t*)iteratorState)->fileInterface->write(outputBuffer, blockIndex, es->page_size, outputFile)) { return 9; // Return error code if writing to the output file fails } @@ -339,7 +345,7 @@ int adaptive_sort( // Restart building the sublist outputCount = 0; haveOutputKey = 0; - sublistSize = 0; + // sublistSize = 0; metric->num_runs++; } } @@ -467,14 +473,19 @@ int adaptive_sort( // Add Page Headers *((int32_t*)buffer) = sublistSize; - *((int16_t*)(buffer + BLOCK_COUNT_OFFSET)) = (int8_t)outputCount; + *((int16_t*)(buffer + BLOCK_COUNT_OFFSET)) = (int16_t)outputCount; memcpy(tupleBuffer, buffer + (outputCount - 1) * es->record_size + es->headerSize, es->key_size); memcpy(lastOutputKey, tupleBuffer, es->record_size); metric->num_memcpys += 2; // Store the last key output temporarily in tuple buffer as once write out then read new block it would be gone // Write the output block +#ifdef DEBUG + debug_log("Writing page adaptive writeRel: blockIndex=%d, count=%d, filePosition=%ld\n", + sublistSize, outputCount, lastWritePos / PAGE_SIZE); +#endif ((file_iterator_state_t*)iteratorState)->fileInterface->writeRel(buffer, PAGE_SIZE, 1, outputFile); + if (((file_iterator_state_t*)iteratorState)->fileInterface->error(outputFile)) { // File write error free(lastOutputKey); @@ -495,7 +506,6 @@ int adaptive_sort( sublistSize++; outputCount = 0; } /* while records left */ - // free(lastOutputKey); numSublist = metric->num_runs; #ifdef ADAPTIVE_SORT_PRINT @@ -560,15 +570,15 @@ int adaptive_sort( ((file_iterator_state_t*)iteratorState)->file = outputFile; *resultFilePtr = 0; flash_minsort_sublist(iteratorState, tupleBuffer, outputFile, buffer, bufferSizeBytes, es, resultFilePtr, metric, compareFn, numSublist); - *resultFilePtr = lastWritePos; + //*resultFilePtr = lastWritePos; } else { // Use normal version of minsort. Do not have enough space to index a value per sublist. Assumes data is not sorted in each region #ifdef ADAPTIVE_SORT_PRINT debug_log("Performing MinSort\n"); #endif ((file_iterator_state_t*)iteratorState)->file = outputFile; - flash_minsort(iteratorState, tupleBuffer, outputFile, buffer, bufferSizeBytes, es, resultFilePtr, metric, compareFn); *resultFilePtr = 0; + flash_minsort(iteratorState, tupleBuffer, outputFile, buffer, bufferSizeBytes, es, resultFilePtr, metric, compareFn); } } else { /* */ @@ -946,6 +956,10 @@ int adaptive_sort( *((int16_t*)(buffer + BLOCK_COUNT_OFFSET)) = (int16_t)tuplesPerPage; ((file_iterator_state_t*)iteratorState)->fileInterface->seek(lastWritePos, outputFile); +#ifdef DEBUG + debug_log("Writing page adaptive writeRel 2: blockIndex=%d, count=%d, filePosition=%ld\n", + currentBlockId, tuplesPerPage, lastWritePos / PAGE_SIZE); +#endif ((file_iterator_state_t*)iteratorState)->fileInterface->writeRel(buffer + OUTPUT_BLOCK_ID * es->page_size, PAGE_SIZE, 1, outputFile); if (((file_iterator_state_t*)iteratorState)->fileInterface->error(outputFile)) { // File read error @@ -1304,7 +1318,7 @@ int adaptive_sort( /* end of run */ } - if (record2[0] > 0) { /* Tuples in output block to write out */ + if (record2[0] != -1) { /* Tuples in output block to write out */ // fseek(outputFile, lastWritePos, SEEK_SET); // if (0 == fwrite(buffer + OUTPUT_BLOCK_ID * es->page_size, (size_t)es->page_size, 1, outputFile)) // { /* File write error - arduino prints 1st value nmemb times if nmemb != 1 */ @@ -1318,6 +1332,10 @@ int adaptive_sort( currentBlockId++; ((file_iterator_state_t*)iteratorState)->fileInterface->seek(lastWritePos, outputFile); +#ifdef DEBUG + debug_log("Writing page adaptive write rel 3: blockIndex=%d, count=%d, filePosition=%ld\n", + currentBlockId, (int16_t)(record2[0] - es->headerSize) / es->record_size + 1, lastWritePos / PAGE_SIZE); +#endif ((file_iterator_state_t*)iteratorState)->fileInterface->writeRel(buffer + OUTPUT_BLOCK_ID * es->page_size, PAGE_SIZE, 1, outputFile); if (((file_iterator_state_t*)iteratorState)->fileInterface->error(outputFile)) { // File write error diff --git a/src/query-interface/sort/flash_minsort.c b/src/query-interface/sort/flash_minsort.c index 737c6e44..a6f6638d 100644 --- a/src/query-interface/sort/flash_minsort.c +++ b/src/query-interface/sort/flash_minsort.c @@ -49,11 +49,10 @@ This is no output sort with block headers and iterator input. Heap used when mov #include "in_memory_sort.h" -#include "debug_print.h" - // #define DEBUG 1 // #define DEBUG_OUTPUT 1 // #define DEBUG_READ 1 +// #include "debug_print.h" #ifndef INT_MAX #define INT_MAX 0xFFFFFFFF @@ -82,10 +81,10 @@ void readPageMinSort(MinSortState *ms, int pageNum, external_sort_t *es, metrics ms->lastBlockIdx = pageNum; #ifdef DEBUG_READ - printf("Reading block: %d\n", pageNum); + debug_log("Reading block: %d\n", pageNum); for (int k = 0; k < 31; k++) { test_record_t *buf = (void *)(ms->buffer + es->headerSize + k * es->record_size); - printf("%d: Record: %d\n", k, buf->key); + debug_log("%d: Record: %d\n", k, buf->key); } #endif } @@ -140,7 +139,7 @@ void init_MinSort(MinSortState *ms, external_sort_t *es, metrics_t *metric, int8 ms->records_per_block = (es->page_size - es->headerSize) / es->record_size; j = (ms->memoryAvailable - 2 * es->page_size - 2 * es->key_size - INT_SIZE) / (es->key_size + sizeof(uint8_t)); #ifdef FLASH_MINSORT_PRINT - printf("Memory overhead: %d Max regions: %d\r\n", 2 * es->key_size + INT_SIZE, j); + debug_log("Memory overhead: %d Max regions: %d\r\n", 2 * es->key_size + INT_SIZE, j); #endif ms->blocks_per_region = (uint32_t)ceil((double)ms->numBlocks / j); ms->numRegions = (uint32_t)ceil((double)ms->numBlocks / ms->blocks_per_region); @@ -152,8 +151,8 @@ void init_MinSort(MinSortState *ms, external_sort_t *es, metrics_t *metric, int8 #ifdef DEBUG // printf("Memory overhead: %d Max regions: %d\r\n", 2 * SORT_KEY_SIZE + INT_SIZE, j); - printf("Page size: %d, Memory size: %d Record size: %d, Number of records: %lu, Number of blocks: %d, Blocks per region: %d Regions: %d\r\n", - es->page_size, ms->memoryAvailable, ms->record_size, ms->num_records, ms->numBlocks, ms->blocks_per_region, ms->numRegions); + debug_log("Page size: %d, Memory size: %d Record size: %d, Number of records: %lu, Number of blocks: %d, Blocks per region: %d Regions: %d\r\n", + es->page_size, ms->memoryAvailable, ms->record_size, ms->num_records, ms->numBlocks, ms->blocks_per_region, ms->numRegions); #endif /* Initialize each region’s minimum value */ @@ -191,7 +190,7 @@ void init_MinSort(MinSortState *ms, external_sort_t *es, metrics_t *metric, int8 #ifdef DEBUG for (i = 0; i < ms->numRegions; i++) - printf("Region: %d Min: %d\r\n", i, ms->min[i]); + debug_log("Region: %d Min: %d\r\n", i, ms->min[i]); #endif /* Allocate memory for current and next keys */ @@ -277,9 +276,9 @@ char *next_MinSort(MinSortState *ms, external_sort_t *es, void *tupleBuffer, met memcpy(tupleBuffer, &(ms->buffer[ms->record_size * i + es->headerSize]), ms->record_size); metric->num_memcpys++; #ifdef DEBUG - test_record_t *buf = (test_record_t *)(ms->buffer + es->headerSize + i * es->record_size); - buf = (test_record_t *)tupleBuffer; - debug_log("Returning tuple: %d\n", buf->key); + test_record_t *buf = (test_record_t *)(ms->buffer + es->headerSize + i * es->record_size); + buf = (test_record_t *)tupleBuffer; + debug_log("Returning tuple: %d\n", buf->key); #endif i++; // Move to the next record ms->tuplesOut++; @@ -299,7 +298,7 @@ char *next_MinSort(MinSortState *ms, external_sort_t *es, void *tupleBuffer, met done: #ifdef DEBUG - debug_log("Updating minimum in region\r\n"); + debug_log("Updating minimum in region\r\n"); #endif // After processing the current block, scan the rest of the region to find a smaller record if possible @@ -331,7 +330,7 @@ char *next_MinSort(MinSortState *ms, external_sort_t *es, void *tupleBuffer, met if (compareFn(dataVal, ms->current) == 0) { ms->nextIdx = k * ms->records_per_block + i; #ifdef DEBUG - debug_log("Next tuple at: %d k: %d i: %d\r\n", ms->nextIdx, k, i); + debug_log("Next tuple at: %d k: %d i: %d\r\n", ms->nextIdx, k, i); #endif goto done2; } @@ -361,7 +360,7 @@ char *next_MinSort(MinSortState *ms, external_sort_t *es, void *tupleBuffer, met } #ifdef DEBUG - debug_log("Updated minimum in block to: %d\r\n", ms->min[ms->regionIdx]); + debug_log("Updated minimum in block to: %d\r\n", ms->min[ms->regionIdx]); #endif } @@ -418,7 +417,7 @@ int flash_minsort( metrics_t *metric, int8_t (*compareFn)(void *a, void *b)) { #ifdef DEBUG - printf("*Flash Minsort*\n"); + debug_log("*Flash Minsort*\n"); #endif clock_t start = clock(); @@ -433,6 +432,7 @@ int flash_minsort( int32_t blockIndex = 0; int16_t values_per_page = (es->page_size - es->headerSize) / es->record_size; uint8_t *outputBuffer = buffer + es->page_size; + unsigned long lastWritePos = *resultFilePtr; // test_record_t *buf; // Main sorting loop: fetches and writes sorted records in blocks @@ -447,28 +447,40 @@ int flash_minsort( count = 0; // Reset count for the next block // Write the block to the output file using the file interface's write method - if (0 == ((file_iterator_state_t *)iteratorState)->fileInterface->write(outputBuffer, blockIndex, es->page_size, outputFile)) { + ((file_iterator_state_t *)iteratorState)->fileInterface->seek(lastWritePos, outputFile); +#ifdef DEBUG + debug_log("Writing page flash minsort: blockIndex=%d, count=%d, filePosition=%ld\n", + blockIndex, count, count / PAGE_SIZE); +#endif + debug_log("Writing page flash minsort: blockIndex=%d, count=%d\n", + blockIndex, count); + if (0 == ((file_iterator_state_t *)iteratorState)->fileInterface->writeRel(outputBuffer, es->page_size, 1, outputFile)) { return 9; // Return error code if writing to the output file fails } #ifdef DEBUG - printf("Wrote output block. Block index: %d\n", blockIndex); + debug_log("Wrote output block. Block index: %d\n", blockIndex); for (int k = 0; k < values_per_page; k++) { test_record_t *buf = (void *)(outputBuffer + es->headerSize + k * es->record_size); - printf("%d: Output Record: %d\n", k, buf->key); + debug_log("%d: Output Record: %d\n", k, buf->key); } #endif metric->num_writes++; + lastWritePos += es->page_size; blockIndex++; } } // Write the last block if there are remaining records if (count > 0) { + ((file_iterator_state_t *)iteratorState)->fileInterface->seek(lastWritePos, outputFile); *((int32_t *)outputBuffer) = blockIndex; /* Block index */ *((int16_t *)(outputBuffer + BLOCK_COUNT_OFFSET)) = count; /* Block record count */ - - if (0 == ((file_iterator_state_t *)iteratorState)->fileInterface->write(outputBuffer, blockIndex, es->page_size, outputFile)) { +#ifdef DEBUG + debug_log("Writing page flash minsort: blockIndex=%d, count=%d, filePosition=%ld\n", + blockIndex, count, count / PAGE_SIZE); +#endif + if (0 == ((file_iterator_state_t *)iteratorState)->fileInterface->writeRel(outputBuffer, es->page_size, 1, outputFile)) { return 9; // Return error code if writing to the output file fails } metric->num_writes++; @@ -477,7 +489,7 @@ int flash_minsort( } #ifdef DEBUG - printf("Number of sorted records: %d", ms.num_records); + debug_log("Number of sorted records: %d", ms.num_records); #endif ((file_iterator_state_t *)iteratorState)->fileInterface->flush(outputFile); @@ -489,7 +501,7 @@ int flash_minsort( *resultFilePtr = 0; #ifdef DEBUG - printf("Complete. Comparisons: %d MemCopies: %d\n", metric->num_compar, metric->num_memcpys); + debug_log("Complete. Comparisons: %d MemCopies: %d\n", metric->num_compar, metric->num_memcpys); #endif return 0; // Successful completion diff --git a/src/query-interface/sort/flash_minsort_sublist.c b/src/query-interface/sort/flash_minsort_sublist.c index c7cf54e2..26b603f7 100644 --- a/src/query-interface/sort/flash_minsort_sublist.c +++ b/src/query-interface/sort/flash_minsort_sublist.c @@ -46,11 +46,10 @@ #include "in_memory_sort.h" -/* -#define DEBUG 1 -#define DEBUG_OUTPUT 1 -#define DEBUG_READ 1 -*/ +// #define DEBUG 1 +// #define DEBUG_OUTPUT 1 +// #define DEBUG_READ 1 +// #include "debug_print.h" void readPage_sublist(MinSortStateSublist *ms, int pageNum, external_sort_t *es, metrics_t *metric) { file_iterator_state_t *is = (file_iterator_state_t *)ms->iteratorState; @@ -59,7 +58,7 @@ void readPage_sublist(MinSortStateSublist *ms, int pageNum, external_sort_t *es, // Read page into the buffer if (0 == is->fileInterface->read(ms->buffer, pageNum, es->page_size, fp)) { #ifdef DEBUG - printf("MINSORT SUBLIST: Failed to read block.\n"); + debug_log("MINSORT SUBLIST: Failed to read block.\n"); #endif } @@ -68,10 +67,10 @@ void readPage_sublist(MinSortStateSublist *ms, int pageNum, external_sort_t *es, ms->lastBlockIdx = pageNum; #ifdef DEBUG_READ - printf("Reading block: %d Offset: %lu\n", pageNum, offset); + debug_log("Reading block: %d Offset: %lu\n", pageNum, es->key_offset); for (int k = 0; k < 31; k++) { test_record_t *buf = (void *)(ms->buffer + es->headerSize + k * es->record_size); - printf("%d: Record: %d\n", k, buf->key); + debug_log("%d: Record: %d\n", k, buf->key); } #endif } @@ -107,7 +106,7 @@ void init_MinSort_sublist(MinSortStateSublist *ms, external_sort_t *es, metrics_ // j = (ms->memoryAvailable - 2 * SORT_KEY_SIZE - INT_SIZE) / SORT_KEY_SIZE; j = (ms->memoryAvailable) / (SORT_KEY_SIZE + sizeof(uint8_t)); #ifdef FLASH_MINSORT_PRINT - printf("Memory overhead: %d Max regions: %d\r\n", 2 * SORT_KEY_SIZE + INT_SIZE, j); + debug_log("Memory overhead: %d Max regions: %d\r\n", 2 * SORT_KEY_SIZE + INT_SIZE, j); #endif // Memory allocation // Allocate minimum index in separate memory space (block 0 is input buffer, block 1 is output buffer) @@ -121,8 +120,8 @@ void init_MinSort_sublist(MinSortStateSublist *ms, external_sort_t *es, metrics_ ms->min_set = malloc(ms->numRegions * sizeof(uint8_t)); ms->offset = malloc(ms->numRegions * sizeof(long)); #ifdef FLASH_MINSORT_PRINT - printf("Page size: %d, Memory size: %d Record size: %d, Number of records: %lu, Number of blocks: %d, Regions: %d\r\n", - es->page_size, ms->memoryAvailable, ms->record_size, ms->num_records, ms->numBlocks, ms->numRegions); + debug_log("Page size: %d, Memory size: %d Record size: %d, Number of records: %lu, Number of blocks: %d, Regions: %d\r\n", + es->page_size, ms->memoryAvailable, ms->record_size, ms->num_records, ms->numBlocks, ms->numRegions); #endif for (i = 0; i < ms->numRegions; i++) @@ -139,12 +138,12 @@ void init_MinSort_sublist(MinSortStateSublist *ms, external_sort_t *es, metrics_ int numBlocksSublist = *(int32_t *)ms->buffer; /* Retrieve block id (indexed from 0) to compute count of blocks in sublist */ #if DEBUG - printf("Read block: %d", lastBlock); - printf(" Num: %d\n", numBlocksSublist); + debug_log("Read block: %d", lastBlock); + debug_log(" Num: %d\n", numBlocksSublist); for (int k = 0; k < 31; k++) { test_record_t *buf = (void *)(ms->buffer + es->headerSize + k * es->record_size); - printf("%d: Record: %d\n", k, buf->key); + debug_log("%d: Record: %d\n", k, buf->key); } #endif lastBlock = lastBlock - numBlocksSublist; @@ -157,20 +156,20 @@ void init_MinSort_sublist(MinSortStateSublist *ms, external_sort_t *es, metrics_ ms->min_set[regionIdx] = true; ms->offset[regionIdx] = lastBlock * es->page_size + es->headerSize + ms->fileOffset; #if DEBUG - printf("New min. Index: %d", regionIdx); - printf(" Min: %u", ms->min[regionIdx]); - printf(" Offset: %lu\n", ms->offset[regionIdx]); + debug_log("New min. Index: %d", regionIdx); + debug_log(" Min: %u", ms->min[regionIdx]); + debug_log(" Offset: %lu\n", ms->offset[regionIdx]); #endif regionIdx--; lastBlock--; } #ifdef DEBUG - printf("Region summary\n"); + debug_log("Region summary\n"); for (i = 0; i < ms->numRegions; i++) { - printf("Reg: %d", i); - printf(" Min: %u", ms->min[i]); - printf(" Offset: %lu\n", ms->offset[i]); + debug_log("Reg: %d", i); + debug_log(" Min: %u", ms->min[i]); + debug_log(" Offset: %lu\n", ms->offset[i]); } #endif @@ -234,7 +233,7 @@ char *next_MinSort_sublist(MinSortStateSublist *ms, external_sort_t *es, void *t #ifdef DEBUG test_record_t *buf = (test_record_t *)(ms->buffer + es->headerSize + i * es->record_size); buf = (test_record_t *)tupleBuffer; - printf("Returning tuple: %d\n", buf->key); + debug_log("Returning tuple: %d\n", buf->key); #endif // Advance to next tuple in block @@ -275,7 +274,7 @@ char *next_MinSort_sublist(MinSortStateSublist *ms, external_sort_t *es, void *t } #ifdef DEBUG - printf("Updated minimum in block to: %d\r\n", ms->min[ms->regionIdx]); + debug_log("Updated minimum in block to: %d\r\n", ms->min[ms->regionIdx]); #endif return tupleBuffer; @@ -341,7 +340,7 @@ int flash_minsort_sublist( int32_t blockIndex = 0; int16_t values_per_page = (es->page_size - es->headerSize) / es->record_size; char *outputBuffer = buffer + es->page_size; - unsigned long lastWritePos = ms.fileOffset + es->num_pages * es->page_size; + unsigned long lastWritePos = ms.fileOffset; // Write while (next_MinSort_sublist(&ms, es, (char *)(outputBuffer + count * es->record_size + es->headerSize), metric) != NULL) { @@ -356,6 +355,10 @@ int flash_minsort_sublist( // Force seek to end of file as outputFile is also inputFile and have been reading it ((file_iterator_state_t *)iteratorState)->fileInterface->seek(lastWritePos, outputFile); // Write the block to the output file using the file interface's write method +#ifdef DEBUG + debug_log("Writing page flash minsort sublist: blockIndex=%d, count=%d, filePosition=%ld\n", + blockIndex, count, lastWritePos / PAGE_SIZE); +#endif if (0 == ((file_iterator_state_t *)iteratorState)->fileInterface->writeRel(outputBuffer, es->page_size, 1, outputFile)) { return 9; // Return error code if writing to the output file fails } @@ -368,7 +371,7 @@ int flash_minsort_sublist( printf("Last write pos: %lu Block: %d\n", lastWritePos, blockIndex); */ #ifdef DEBUG_OUTPUT - printf("Wrote output block. Block index: %d\n", blockIndex); + debug_log("Wrote output block. Block index: %d\n", blockIndex); for (int k = 0; k < values_per_page; k++) { test_record_t *buf = (void *)(outputBuffer + es->headerSize + k * es->record_size); printf("%d: Output Record: %d\n", k, buf->key); @@ -382,11 +385,13 @@ int flash_minsort_sublist( if (count > 0) { // fseek(outputFile, lastWritePos, SEEK_SET); ((file_iterator_state_t *)iteratorState)->fileInterface->seek(lastWritePos, outputFile); - *((int32_t *)buffer) = blockIndex; /* Block index */ *((int16_t *)(buffer + BLOCK_COUNT_OFFSET)) = count; /* Block record count */ - - if (0 == ((file_iterator_state_t *)iteratorState)->fileInterface->write(outputBuffer, es->page_size, 1, outputFile)) { +#ifdef DEBUG + debug_log("Writing last page minsort sublist: blockIndex=%d, count=%d, filePosition=%ld\n", + blockIndex, count, lastWritePos / PAGE_SIZE); +#endif + if (0 == ((file_iterator_state_t *)iteratorState)->fileInterface->writeRel(outputBuffer, es->page_size, 1, outputFile)) { return 9; // Return error code if writing to the output file fails } diff --git a/src/query-interface/sort/sortWrapper.c b/src/query-interface/sort/sortWrapper.c index f9dc2326..a8e0ee46 100644 --- a/src/query-interface/sort/sortWrapper.c +++ b/src/query-interface/sort/sortWrapper.c @@ -1,13 +1,13 @@ #include "sortWrapper.h" -#include "debug_print.h" #include "query-interface/sort/in_memory_sort.h" #include "unistd.h" -#define PRINT_METRIC +// #define PRINT_METRIC #define DEBUG -#define PRINT_ERRORS +// #define PRINT_ERRORS +#include "debug_print.h" /** * @brief Pure in-memory sort that avoids file I/O completely for very small datasets @@ -124,9 +124,9 @@ file_iterator_state_t *startPureMemorySort(sortData *data, embedDBOperator *op) * @param file The file being written to * @return int8_t */ -int8_t writePageWithHeader(void *buffer, const uint32_t blockIndex, const uint32_t numberOfValues, const uint32_t pageSize, const embedDBFileInterface *fileInterface, void *file) { - memcpy(buffer, &blockIndex, sizeof(int32_t)); - memcpy(buffer + sizeof(uint32_t), &numberOfValues, sizeof(int16_t)); +int8_t writePageWithHeader(void *buffer, const uint32_t blockIndex, const uint16_t numberOfValues, const uint32_t pageSize, const embedDBFileInterface *fileInterface, void *file) { + memcpy(buffer, &blockIndex, sizeof(uint32_t)); + memcpy(buffer + sizeof(uint32_t), &numberOfValues, sizeof(uint16_t)); fileInterface->write(buffer, blockIndex, pageSize, file); @@ -154,15 +154,15 @@ int8_t writePageWithHeader(void *buffer, const uint32_t blockIndex, const uint32 */ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { uint32_t count = 0; - int32_t blockIndex = 0; - int16_t valuesPerPage = (PAGE_SIZE - BLOCK_HEADER_SIZE) / data->recordSize; + uint32_t blockIndex = 0; + uint16_t valuesPerPage = (PAGE_SIZE - BLOCK_HEADER_SIZE) / data->recordSize; #ifdef DEBUG debug_log("DEBUG loadRowData: PAGE_SIZE=%d, BLOCK_HEADER_SIZE=%d, recordSize=%d, valuesPerPage=%d\n", - PAGE_SIZE, BLOCK_HEADER_SIZE, data->recordSize, valuesPerPage); + PAGE_SIZE, BLOCK_HEADER_SIZE, data->recordSize, valuesPerPage); #endif - void *buffer = malloc(PAGE_SIZE); + void* buffer = malloc(PAGE_SIZE); if (buffer == NULL) { #ifdef PRINT_ERRORS @@ -176,15 +176,13 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { // Write page to file when full if (count % valuesPerPage == 0 && count != 0) { + if (writePageWithHeader(buffer, blockIndex, valuesPerPage, PAGE_SIZE, data->fileInterface, unsortedFile)) { free(buffer); buffer = NULL; return 0; } - blockIndex++; - - memset(buffer, 0, PAGE_SIZE); } // Offset of the data in the page @@ -200,10 +198,24 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { } // Write data to buffer - memcpy((uint8_t *)buffer + rowOffset, op->input->recordBuffer, data->recordSize); - + memcpy((uint8_t*)buffer + rowOffset, op->input->recordBuffer, data->recordSize); #ifdef DEBUG - if (count < 100 || count % 1000 == 0) { + if (count < 10) { + debug_log("DEBUG loadRowData record %d: ", count); + for (int i = 0; i < data->recordSize; i++) { + debug_log("%02x ", ((uint8_t *)op->input->recordBuffer)[i]); + } + debug_log("\n"); + + // Also show what we wrote to the buffer + debug_log("DEBUG wrote to buffer at offset %d: ", rowOffset); + for (int i = 0; i < data->recordSize; i++) { + debug_log("%02x ", ((uint8_t *)buffer)[rowOffset + i]); + } + debug_log("\n"); + debug_log("DEBUG: recordBuffer address: %p\n", op->input->recordBuffer); + } + if (count < 10 || count % 1000 == 0) { int32_t *keyPtr = (int32_t *)(op->input->recordBuffer + data->keyOffset); debug_log("DEBUG loadRowData: count=%d, rowOffset=%d, key=%d\n", count, rowOffset, *keyPtr); } @@ -247,6 +259,14 @@ void prepareSort(embedDBOperator *op) { data->keyOffset = getColOffsetFromSchema(op->schema, data->colNum); data->recordSize = getRecordSizeFromSchema(op->schema); data->keySize = op->schema->columnSizes[data->colNum]; +#ifdef DEBUG + debug_log("DEBUG prepareSort: recordSize=%d, keySize=%d, keyOffset=%d, colNum=%d\n", + data->recordSize, data->keySize, data->keyOffset, data->colNum); + debug_log("DEBUG prepareSort: schema has %d columns\n", op->schema->numCols); + for (int i = 0; i < op->schema->numCols; i++) { + debug_log(" Column %d: size=%d\n", i, op->schema->columnSizes[i]); + } +#endif // A columns size will be negative if the column is signed // and positive if value is unsigned @@ -257,8 +277,11 @@ void prepareSort(embedDBOperator *op) { #ifdef ARDUINO data->fileIterator = startPureMemorySort(data, op); if (data->fileIterator == NULL) { +#ifdef DEBUG debug_log("ERROR: Pure memory sort failed\n"); +#endif return; + } #else if (data->fileInterface == NULL || data->fileInterface->setup == NULL) { @@ -300,7 +323,7 @@ void prepareSort(embedDBOperator *op) { file_iterator_state_t *iteratorState = startSort(data, unsortedFile, sortedFile); if (iteratorState == NULL) { #ifdef PRINT_ERRORS - debug_log("ERROR: Sort failed"); + debug_log("ERROR: Sort failed"); #endif return; } @@ -490,10 +513,7 @@ void prepareSort(embedDBOperator *op) { sizeof(uint16_t)); uint32_t recordIndexInPage = iteratorState->currentRecord % recordPerPage; #ifdef DEBUG - uint32_t blockIdx; - memcpy(&blockIdx, ((sortData *)data)->readBuffer, sizeof(uint32_t)); - debug_log("READ PAGE hdr: blockIdx=%u values=%u\n", - blockIdx, valuesInPage); + #endif if (recordIndexInPage >= valuesInPage) { @@ -503,20 +523,22 @@ void prepareSort(embedDBOperator *op) { memcpy(buffer, ((sortData *)data)->readBuffer + copyOffset, iteratorState->recordSize); #ifdef DEBUG - if (iteratorState->recordsRead < 100 || iteratorState->recordsRead % 1000 == 0) { + if (iteratorState->recordsRead < 10 || iteratorState->recordsRead % 1000 == 0) { int32_t *keyPtr = (int32_t *)(buffer + ((sortData *)data)->keyOffset); debug_log("DEBUG readNextRecord: recordsRead=%d, currentRecord=%d, pageIdx=%d, recordInPage=%d, copyOffset=%d, key=%d\n", iteratorState->recordsRead, iteratorState->currentRecord, iteratorState->currentRecord / recordPerPage, - recordIndexInPage, copyOffset, *keyPtr); + recordIndexInPage, copyOffset, *keyPtr); + uint32_t blockIdx; + memcpy(&blockIdx, ((sortData *)data)->readBuffer, sizeof(uint32_t)); + debug_log("READ PAGE hdr: blockIdx=%u values=%u\n", + blockIdx, valuesInPage); + debug_log("PAGE HEADER: page=%d values=%d\n", + iteratorState->currentRecord / recordPerPage, + valuesInPage); + int32_t *key0 = (int32_t *)(((sortData *)data)->readBuffer + BLOCK_HEADER_SIZE + ((sortData *)data)->keyOffset); + int32_t *keyLast = (int32_t *)(((sortData *)data)->readBuffer + BLOCK_HEADER_SIZE + (recordPerPage - 1) * iteratorState->recordSize + ((sortData *)data)->keyOffset); + debug_log(" First key on page: %d, Last key on page: %d\n", *key0, *keyLast); } - memcpy(&valuesInPage, ((sortData *)data)->readBuffer + sizeof(uint32_t), sizeof(uint16_t)); - - debug_log("PAGE HEADER: page=%d values=%d\n", - iteratorState->currentRecord / recordPerPage, - valuesInPage); - - debug_log("RESULT FILE BASE OFFSET = %u\n", iteratorState->resultFile); - #endif iteratorState->recordsRead++; @@ -529,10 +551,6 @@ void prepareSort(embedDBOperator *op) { // } // printf("\n"); // #endif -#ifdef DEBUG - debug_log("DEBUG readNextRecord: recordIndexInPage=%d, valuesInPage=%d, returning=%d\n", - recordIndexInPage, valuesInPage, (recordIndexInPage >= valuesInPage) ? 1 : 0); -#endif return 0; } diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index 806f4c29..40897f8a 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -166,7 +166,7 @@ void runTestSequentialValues() { #ifdef ARDUINO insertNValues(state, 1, 0); #else - insertNValues(state, 67, 1); + insertNValues(state, 130, 1); #endif embedDBIterator it; @@ -180,7 +180,7 @@ void runTestSequentialValues() { uint8_t projColsOB[] = {0, 1}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); - //debugBinData(orderByOp, 67, 1); + //debugBinData(orderByOp, 66, 1); orderByOp->init(orderByOp); @@ -213,20 +213,23 @@ void runTestUsingSEA100k() { embedDBInitIterator(state, &it); embedDBOperator* scanOpOrderBy = createTableScanOperator(state, &it, baseSchema); - //debugBinData(scanOpOrderBy, 20, 1); + //debugBinData(scanOpOrderBy, 200, 0); uint8_t projColsOB[] = {0, 1}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); - //debugBinData(projColsOrderBy, 100000, 1); + //debugBinData(projColsOrderBy, 200, 0); embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); + debugBinData(orderByOp, 100000, 1); + orderByOp->init(orderByOp); - //debugBinData(orderByOp, 100000, 1); + int32_t* recordBuffer = (int32_t*)orderByOp->recordBuffer; uint32_t previous = 0; // Result of the sort - uint32_t count = 1; while (exec(orderByOp)) { TEST_ASSERT_GREATER_OR_EQUAL_INT32_MESSAGE(previous, ((int32_t)recordBuffer[1]) / 10.0, "Sort value is not greater than or equal to previous value previous values."); previous = ((int32_t)recordBuffer[1]) / 10.0; + printf("%d ", previous); + fflush(stdout); } orderByOp->close(orderByOp); From bfaf89ed1130b7bae20f7d2e4458ec2f4da6c2eb Mon Sep 17 00:00:00 2001 From: Alex Argatoff Date: Mon, 19 Jan 2026 17:04:02 -0800 Subject: [PATCH 11/17] added an extra check in the heap value, no luck though --- src/query-interface/sort/adaptive_sort.c | 103 ++++++++++++++++--- src/query-interface/sort/sortWrapper.c | 7 +- test/test_sort/test_sort_query_interface.cpp | 2 +- 3 files changed, 95 insertions(+), 17 deletions(-) diff --git a/src/query-interface/sort/adaptive_sort.c b/src/query-interface/sort/adaptive_sort.c index 956943e5..e02a7823 100644 --- a/src/query-interface/sort/adaptive_sort.c +++ b/src/query-interface/sort/adaptive_sort.c @@ -52,13 +52,13 @@ #include "debug_print.h" -// #define DEBUG 1 -// #define DEBUG_OUTPUT 1 -// #define DEBUG_READ 1 -// #define DEBUG_HEAP 0 -// #define ADAPTIVE_SORT_PRINT + #define DEBUG 1 + #define DEBUG_OUTPUT 1 + #define DEBUG_READ 1 +#define DEBUG_HEAP 0 +#define ADAPTIVE_SORT_PRINT -// #define ADAPTIVE_SORT_PRINT_FINISH +#define ADAPTIVE_SORT_PRINT_FINISH /** * Prints the contents of the heap. Used for debugging. @@ -146,7 +146,7 @@ int adaptive_sort( /* Note: Could be int8_t as larger than 255 is above cutoff for using MinSort. */ uint8_t numDistinctInRun = 0; /* Number of distinct values in current run */ - int optimistic = true; + int optimistic = false; if (optimistic) { // Do FLASH MinSort init first #ifdef DEBUG @@ -268,11 +268,14 @@ int adaptive_sort( int32_t sublistSize = 0; /* size in blocks */ int32_t outputCount = 0; /* number of values in output block */ int32_t recordsLeft = 0; /* number of records in buffer */ - void *heapVal, *inputVal; + void* heapVal, * inputVal; + + ((file_iterator_state_t*)iteratorState)->fileInterface->seek(0, outputFile); + lastWritePos = 0; // Fill all blocks other than the first with tuples addr = buffer + es->page_size; - for (i = 0; i < (bufferSizeInBlocks - 1) * tuplesPerPage; i++) { + for (i = 0; i < (bufferSizeInBlocks - 1) / 2 * tuplesPerPage; i++) { status = !iterator(sortData, addr); if (status == 0) break; @@ -345,7 +348,7 @@ int adaptive_sort( // Restart building the sublist outputCount = 0; haveOutputKey = 0; - // sublistSize = 0; + sublistSize = 0; metric->num_runs++; } } @@ -371,6 +374,81 @@ int adaptive_sort( heapify_rev(buffer + heapStartOffset, buffer + heapStartOffset - heapSize * es->record_size, heapSize, es, metric); continue; } + // Check if heap + list are too full (leave some margin) + int maxRecords = (bufferSizeInBlocks - 1) * tuplesPerPage; + int usedRecords = heapSize + listSize; + + if (usedRecords >= maxRecords - tuplesPerPage) { // Leave room for one page + // Buffer is getting full - force flush current run and start new one + + // First, output all remaining records from current output page + if (outputCount > 0) { + *((int32_t*)buffer) = sublistSize; + *((int16_t*)(buffer + BLOCK_COUNT_OFFSET)) = (int16_t)outputCount; + ((file_iterator_state_t*)iteratorState)->fileInterface->writeRel(buffer, PAGE_SIZE, 1, outputFile); + if (((file_iterator_state_t*)iteratorState)->fileInterface->error(outputFile)) { + free(lastOutputKey); + return 9; + } + metric->num_writes++; + sublistSize++; + outputCount = 0; + } + + // Dump entire heap to output + while (heapSize > 0) { + memcpy(buffer + es->headerSize + outputCount * es->record_size, + buffer + heapStartOffset, es->record_size); + outputCount++; + + if (outputCount >= tuplesPerPage) { + *((int32_t*)buffer) = sublistSize; + *((int16_t*)(buffer + BLOCK_COUNT_OFFSET)) = (int16_t)outputCount; + ((file_iterator_state_t*)iteratorState)->fileInterface->writeRel(buffer, PAGE_SIZE, 1, outputFile); + if (((file_iterator_state_t*)iteratorState)->fileInterface->error(outputFile)) { + free(lastOutputKey); + return 9; + } + metric->num_writes++; + sublistSize++; + outputCount = 0; + } + + heapSize--; + if (heapSize > 0) + heapify_rev(buffer + heapStartOffset, + buffer + heapStartOffset - heapSize * es->record_size, + heapSize, es, metric); + } + + // Write final partial page if any + if (outputCount > 0) { + *((int32_t*)buffer) = sublistSize; + *((int16_t*)(buffer + BLOCK_COUNT_OFFSET)) = (int16_t)outputCount; + ((file_iterator_state_t*)iteratorState)->fileInterface->writeRel(buffer, PAGE_SIZE, 1, outputFile); + if (((file_iterator_state_t*)iteratorState)->fileInterface->error(outputFile)) { + free(lastOutputKey); + return 9; + } + metric->num_writes++; + outputCount = 0; + } + + // Convert list to heap for new run + for (int j = 0; j < listSize; j++) { + shiftUp_rev(buffer + heapStartOffset, + buffer + es->page_size + j * es->record_size, + heapSize, es, metric); + heapSize++; + } + listSize = 0; + + // Start new sublist + numSublist++; + sublistSize = 0; + haveOutputKey = 0; + metric->num_runs++; + } heapVal = buffer + heapStartOffset; inputVal = buffer + es->headerSize + i * es->record_size; @@ -485,7 +563,7 @@ int adaptive_sort( sublistSize, outputCount, lastWritePos / PAGE_SIZE); #endif ((file_iterator_state_t*)iteratorState)->fileInterface->writeRel(buffer, PAGE_SIZE, 1, outputFile); - + if (((file_iterator_state_t*)iteratorState)->fileInterface->error(outputFile)) { // File write error free(lastOutputKey); @@ -503,6 +581,7 @@ int adaptive_sort( #endif metric->num_writes += 1; + lastWritePos += es->page_size; sublistSize++; outputCount = 0; } /* while records left */ @@ -534,7 +613,7 @@ int adaptive_sort( return 0; // lastWritePos = ftell(outputFile); - lastWritePos = ((file_iterator_state_t*)iteratorState)->fileInterface->tell(outputFile); + //lastWritePos = ((file_iterator_state_t*)iteratorState)->fileInterface->tell(outputFile); // if (avgDistinct/10 < nobSortCost) int bufferSizeBytes = (bufferSizeInBlocks - 1) * es->page_size; /* One of the buffers is used for a read buffer */ diff --git a/src/query-interface/sort/sortWrapper.c b/src/query-interface/sort/sortWrapper.c index a8e0ee46..229672c2 100644 --- a/src/query-interface/sort/sortWrapper.c +++ b/src/query-interface/sort/sortWrapper.c @@ -5,7 +5,7 @@ // #define PRINT_METRIC -#define DEBUG +// #define DEBUG // #define PRINT_ERRORS #include "debug_print.h" @@ -213,7 +213,6 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { debug_log("%02x ", ((uint8_t *)buffer)[rowOffset + i]); } debug_log("\n"); - debug_log("DEBUG: recordBuffer address: %p\n", op->input->recordBuffer); } if (count < 10 || count % 1000 == 0) { int32_t *keyPtr = (int32_t *)(op->input->recordBuffer + data->keyOffset); @@ -363,7 +362,7 @@ void prepareSort(embedDBOperator *op) { #ifdef ARDUINO const int buffer_max_pages = 1; // Reduced to minimum for Arduino #else - const int buffer_max_pages = 4; + const int buffer_max_pages = 3; #endif char *buffer = malloc(buffer_max_pages * es.page_size + es.record_size); @@ -428,6 +427,7 @@ void prepareSort(embedDBOperator *op) { // Use adaptive sort on desktop int8_t runGenOnly = false; // Run full sort operation int8_t writeReadRatio = 19; // 1.97 * 10 => 19 + // err = flash_minsort(iteratorState, tuple_buffer, sortedFile, buffer, buffer_max_pages * es.page_size, &es, &result_file_ptr, &metrics, data->compareFn); err = adaptive_sort(readNextRecord, iteratorState, tuple_buffer, sortedFile, buffer, buffer_max_pages, &es, &result_file_ptr, &metrics, data->compareFn, runGenOnly, writeReadRatio, data); #endif @@ -487,7 +487,6 @@ void prepareSort(embedDBOperator *op) { // Read next page if current buffer is empty if (iteratorState->currentRecord % recordPerPage == 0 || iteratorState->recordsRead == 0) { uint32_t seekOffset = iteratorState->resultFile + (iteratorState->currentRecord / recordPerPage) * PAGE_SIZE; - memset(((sortData *)data)->readBuffer, 0, PAGE_SIZE); iteratorState->fileInterface->seek(seekOffset, iteratorState->file); iteratorState->fileInterface->readRel(((sortData *)data)->readBuffer, PAGE_SIZE, 1, iteratorState->file); diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index 40897f8a..659076eb 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -166,7 +166,7 @@ void runTestSequentialValues() { #ifdef ARDUINO insertNValues(state, 1, 0); #else - insertNValues(state, 130, 1); + insertNValues(state, 400, 1); #endif embedDBIterator it; From 22e847af50d51d9517cb4f246a05e758acee6f71 Mon Sep 17 00:00:00 2001 From: Alex Argatoff Date: Sat, 24 Jan 2026 15:33:18 -0800 Subject: [PATCH 12/17] reverted some changes for testing purposes --- src/query-interface/sort/adaptive_sort.c | 81 +------------------- src/query-interface/sort/sortWrapper.c | 2 +- test/test_sort/test_sort_query_interface.cpp | 10 +-- 3 files changed, 9 insertions(+), 84 deletions(-) diff --git a/src/query-interface/sort/adaptive_sort.c b/src/query-interface/sort/adaptive_sort.c index e02a7823..d3269ff6 100644 --- a/src/query-interface/sort/adaptive_sort.c +++ b/src/query-interface/sort/adaptive_sort.c @@ -72,7 +72,7 @@ void print_heap(char* buffer, int32_t heap_start_offset, int heap_size, int list addr = buffer + heap_start_offset; debug_log("heap: "); for (j = 0; j < heap_size; j++) - debug_log(" %d", *(int32_t*)(addr - j * es->record_size)); + debug_log(" %d", *(int32_t*)(addr - j * es->record_size + es->key_offset)); debug_log("| "); } debug_log(" "); @@ -275,7 +275,7 @@ int adaptive_sort( // Fill all blocks other than the first with tuples addr = buffer + es->page_size; - for (i = 0; i < (bufferSizeInBlocks - 1) / 2 * tuplesPerPage; i++) { + for (i = 0; i < (bufferSizeInBlocks - 1) * tuplesPerPage; i++) { status = !iterator(sortData, addr); if (status == 0) break; @@ -374,81 +374,6 @@ int adaptive_sort( heapify_rev(buffer + heapStartOffset, buffer + heapStartOffset - heapSize * es->record_size, heapSize, es, metric); continue; } - // Check if heap + list are too full (leave some margin) - int maxRecords = (bufferSizeInBlocks - 1) * tuplesPerPage; - int usedRecords = heapSize + listSize; - - if (usedRecords >= maxRecords - tuplesPerPage) { // Leave room for one page - // Buffer is getting full - force flush current run and start new one - - // First, output all remaining records from current output page - if (outputCount > 0) { - *((int32_t*)buffer) = sublistSize; - *((int16_t*)(buffer + BLOCK_COUNT_OFFSET)) = (int16_t)outputCount; - ((file_iterator_state_t*)iteratorState)->fileInterface->writeRel(buffer, PAGE_SIZE, 1, outputFile); - if (((file_iterator_state_t*)iteratorState)->fileInterface->error(outputFile)) { - free(lastOutputKey); - return 9; - } - metric->num_writes++; - sublistSize++; - outputCount = 0; - } - - // Dump entire heap to output - while (heapSize > 0) { - memcpy(buffer + es->headerSize + outputCount * es->record_size, - buffer + heapStartOffset, es->record_size); - outputCount++; - - if (outputCount >= tuplesPerPage) { - *((int32_t*)buffer) = sublistSize; - *((int16_t*)(buffer + BLOCK_COUNT_OFFSET)) = (int16_t)outputCount; - ((file_iterator_state_t*)iteratorState)->fileInterface->writeRel(buffer, PAGE_SIZE, 1, outputFile); - if (((file_iterator_state_t*)iteratorState)->fileInterface->error(outputFile)) { - free(lastOutputKey); - return 9; - } - metric->num_writes++; - sublistSize++; - outputCount = 0; - } - - heapSize--; - if (heapSize > 0) - heapify_rev(buffer + heapStartOffset, - buffer + heapStartOffset - heapSize * es->record_size, - heapSize, es, metric); - } - - // Write final partial page if any - if (outputCount > 0) { - *((int32_t*)buffer) = sublistSize; - *((int16_t*)(buffer + BLOCK_COUNT_OFFSET)) = (int16_t)outputCount; - ((file_iterator_state_t*)iteratorState)->fileInterface->writeRel(buffer, PAGE_SIZE, 1, outputFile); - if (((file_iterator_state_t*)iteratorState)->fileInterface->error(outputFile)) { - free(lastOutputKey); - return 9; - } - metric->num_writes++; - outputCount = 0; - } - - // Convert list to heap for new run - for (int j = 0; j < listSize; j++) { - shiftUp_rev(buffer + heapStartOffset, - buffer + es->page_size + j * es->record_size, - heapSize, es, metric); - heapSize++; - } - listSize = 0; - - // Start new sublist - numSublist++; - sublistSize = 0; - haveOutputKey = 0; - metric->num_runs++; - } heapVal = buffer + heapStartOffset; inputVal = buffer + es->headerSize + i * es->record_size; @@ -613,7 +538,7 @@ int adaptive_sort( return 0; // lastWritePos = ftell(outputFile); - //lastWritePos = ((file_iterator_state_t*)iteratorState)->fileInterface->tell(outputFile); + lastWritePos = ((file_iterator_state_t*)iteratorState)->fileInterface->tell(outputFile); // if (avgDistinct/10 < nobSortCost) int bufferSizeBytes = (bufferSizeInBlocks - 1) * es->page_size; /* One of the buffers is used for a read buffer */ diff --git a/src/query-interface/sort/sortWrapper.c b/src/query-interface/sort/sortWrapper.c index 229672c2..ca7c7a94 100644 --- a/src/query-interface/sort/sortWrapper.c +++ b/src/query-interface/sort/sortWrapper.c @@ -362,7 +362,7 @@ void prepareSort(embedDBOperator *op) { #ifdef ARDUINO const int buffer_max_pages = 1; // Reduced to minimum for Arduino #else - const int buffer_max_pages = 3; + const int buffer_max_pages = 4; #endif char *buffer = malloc(buffer_max_pages * es.page_size + es.record_size); diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index 659076eb..2ca53bb2 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -54,7 +54,7 @@ void setUp() { } state = (embedDBState*)malloc(sizeof(embedDBState)); state->keySize = 4; - state->dataSize = 12; + state->dataSize = 72; state->compareKey = int32Comparator; state->compareData = int32Comparator; state->pageSize = 512; @@ -72,7 +72,7 @@ void setUp() { state->fileInterface->tempFilePath #endif - state->bufferSizeInBlocks = 4; + state->bufferSizeInBlocks = 4; state->buffer = malloc(state->bufferSizeInBlocks * state->pageSize); state->parameters = EMBEDDB_USE_BMAP | EMBEDDB_USE_INDEX | EMBEDDB_RESET_DATA; state->bitmapSize = 2; @@ -86,7 +86,7 @@ void setUp() { state->rules = NULL; state->numRules = 0; - int8_t colSizes[] = {4, 4, 4, 4}; + int8_t colSizes[] = {4, 64, 4, 4}; int8_t colSignedness[] = {embedDB_COLUMN_UNSIGNED, embedDB_COLUMN_SIGNED, embedDB_COLUMN_SIGNED, embedDB_COLUMN_SIGNED}; ColumnType colTypes[] = {embedDB_COLUMN_UINT32, embedDB_COLUMN_INT32, embedDB_COLUMN_INT32, embedDB_COLUMN_INT32}; baseSchema = embedDBCreateSchema(4, colSizes, colSignedness, colTypes); @@ -166,7 +166,7 @@ void runTestSequentialValues() { #ifdef ARDUINO insertNValues(state, 1, 0); #else - insertNValues(state, 400, 1); + insertNValues(state, 22, 1); #endif embedDBIterator it; @@ -180,7 +180,7 @@ void runTestSequentialValues() { uint8_t projColsOB[] = {0, 1}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); - //debugBinData(orderByOp, 66, 1); + debugBinData(orderByOp, 22, 1); orderByOp->init(orderByOp); From f456177f139e83f4d7b5dbe655ae6bd0d98d080c Mon Sep 17 00:00:00 2001 From: Alex Argatoff Date: Mon, 26 Jan 2026 21:42:33 -0800 Subject: [PATCH 13/17] temp fixes, heap seems to have less memory write issues, block writing during replacement selection still not working for 4 byte data --- src/query-interface/sort/adaptive_sort.c | 156 +++++++++++++++++-- test/test_sort/test_sort_query_interface.cpp | 8 +- 2 files changed, 146 insertions(+), 18 deletions(-) diff --git a/src/query-interface/sort/adaptive_sort.c b/src/query-interface/sort/adaptive_sort.c index d3269ff6..7f7a8d7b 100644 --- a/src/query-interface/sort/adaptive_sort.c +++ b/src/query-interface/sort/adaptive_sort.c @@ -83,7 +83,7 @@ void print_heap(char* buffer, int32_t heap_start_offset, int heap_size, int list addr = buffer + es->page_size; debug_log("list: "); for (j = 0; j < list_size; j++) - debug_log(" %d", *(int32_t*)(addr + j * es->record_size)); + debug_log(" %d", *(int32_t*)(addr + j * es->record_size + es->key_offset)); debug_log("| "); } debug_log("\n"); @@ -263,44 +263,112 @@ int adaptive_sort( int32_t heapStartOffset = bufferSizeInBlocks * es->page_size - es->record_size; int32_t listSize = 0; - void* lastOutputKey = malloc(es->record_size); /* Pointer to memory storing value of last key output */ + void* lastOutputKey = malloc(es->record_size); int8_t haveOutputKey = 0; - int32_t sublistSize = 0; /* size in blocks */ - int32_t outputCount = 0; /* number of values in output block */ - int32_t recordsLeft = 0; /* number of records in buffer */ - void* heapVal, * inputVal; + int32_t sublistSize = 0; + int32_t outputCount = 0; + int32_t recordsLeft = 0; + void *heapVal, *inputVal; + + // Calculate safe initial heap size + // Need space for: heap (grows down) + list (grows up) + input page (block 0) + // Available buffer space = blocks 1 to (bufferSizeInBlocks-1) + // Reserve last block's worth of space for the list to grow safely + int32_t maxRecordsInBuffer = (bufferSizeInBlocks - 1) * tuplesPerPage; + int32_t safeInitialHeapSize = (bufferSizeInBlocks - 2) * tuplesPerPage; // Reserve 1 block for list + +#ifdef DEBUG + debug_log("DEBUG: Buffer setup:\n"); + debug_log(" bufferSizeInBlocks=%d, tuplesPerPage=%d\n", bufferSizeInBlocks, tuplesPerPage); + debug_log(" maxRecordsInBuffer=%d, safeInitialHeapSize=%d\n", maxRecordsInBuffer, safeInitialHeapSize); + debug_log(" heapStartOffset=%d, page_size=%d, record_size=%d\n", + heapStartOffset, es->page_size, es->record_size); + debug_log(" Buffer layout: [I/O Page 0][Data Pages 1-%d][Heap grows down from top]\n", bufferSizeInBlocks - 1); +#endif ((file_iterator_state_t*)iteratorState)->fileInterface->seek(0, outputFile); lastWritePos = 0; - // Fill all blocks other than the first with tuples - addr = buffer + es->page_size; - for (i = 0; i < (bufferSizeInBlocks - 1) * tuplesPerPage; i++) { + // CRITICAL FIX: Only read enough to fill the safe initial heap size + // This leaves room for the list to grow and prevents reading all data before main loop + addr = buffer + es->page_size; // Start after I/O block + for (i = 0; i < safeInitialHeapSize; i++) { status = !iterator(sortData, addr); if (status == 0) - break; + break; // No more records available recordsRead++; addr += es->record_size; } +#ifdef DEBUG + debug_log("DEBUG: Initial load completed:\n"); + debug_log(" recordsRead=%d (requested %d)\n", recordsRead, safeInitialHeapSize); + debug_log(" Data spans from offset %d to %d\n", + (int)(es->page_size), (int)(es->page_size + recordsRead * es->record_size)); +#endif + recordsLeft = recordsRead; // Update metrics - metric->num_reads += bufferSizeInBlocks - 1; + // Note: num_reads tracks page reads, but we read data during initial fill via iterator + // The iterator handles its own page reads, so don't double-count here metric->num_runs++; - // Build heap from tuples in filled blocks + // Build heap from tuples in reverse order + // Start from the end of loaded data and work backwards + addr = buffer + es->page_size + recordsRead * es->record_size; for (i = 0; i < recordsRead; i++) { addr -= es->record_size; + memcpy(tupleBuffer, addr, es->record_size); metric->num_memcpys++; shiftUp_rev(buffer + heapStartOffset, tupleBuffer, heapSize, es, metric); heapSize++; } +#ifdef DEBUG + debug_log("DEBUG: Heap construction completed:\n"); + debug_log(" heapSize=%d, listSize=%d\n", heapSize, listSize); + + // Memory layout verification + int32_t heapBottom = heapStartOffset - (heapSize - 1) * es->record_size; + int32_t listTop = es->page_size + listSize * es->record_size; + int32_t gapSize = heapBottom - listTop; + + debug_log("DEBUG: Memory layout:\n"); + debug_log(" I/O block: offset 0 - %d\n", es->page_size); + debug_log(" List top: offset %d\n", listTop); + debug_log(" Gap: %d bytes (%d records)\n", gapSize, gapSize / es->record_size); + debug_log(" Heap bottom: offset %d\n", heapBottom); + debug_log(" Heap top: offset %d\n", heapStartOffset); + debug_log(" Buffer end: offset %d\n", bufferSizeInBlocks * es->page_size); + + if (heapBottom <= listTop) { + debug_log("ERROR: Heap and list overlap! This will cause corruption.\n"); + free(lastOutputKey); + return 9; + } +#endif + +#ifdef DEBUG_HEAP + print_heap(buffer, heapStartOffset, heapSize, listSize, es); +#endif + +#ifdef DEBUG + debug_log("DEBUG: About to enter main loop\n"); + debug_log(" heapSize=%d, listSize=%d, recordsLeft=%d\n", heapSize, listSize, recordsLeft); + debug_log(" Iterator position: recordsRead=%d, totalRecords=%d\n", + ((file_iterator_state_t*)iteratorState)->recordsRead, + ((file_iterator_state_t*)iteratorState)->totalRecords); +#endif + // Read each block and sort while (recordsLeft != 0) { recordsRead = 0; +#ifdef DEBUG + debug_log("\n=== Main loop iteration: sublistSize=%d, outputCount=%d, heapSize=%d, listSize=%d, recordsLeft=%d ===\n", + sublistSize, outputCount, heapSize, listSize, recordsLeft); +#endif // Read in page addr = buffer + es->headerSize; @@ -318,6 +386,16 @@ int adaptive_sort( #endif +#ifdef DEBUG + debug_log("DEBUG: Main loop iteration - read %d records\n", recordsRead); + if (recordsRead > 0) { + debug_log(" First record value: %d, Last record value: %d\n", + *(int32_t*)(buffer + es->headerSize + es->key_offset), + *(int32_t*)(buffer + es->headerSize + (recordsRead - 1) * es->record_size + es->key_offset)); + } + debug_log(" heapSize before processing: %d, listSize: %d\n", heapSize, listSize); +#endif + if (recordsRead > 1) { // Sort page using in memory quick sort metric->num_reads += 1; @@ -333,7 +411,7 @@ int adaptive_sort( // If first value in heap is smaller than lastOutputValue then start new sublist, otherwise continue with previous one. heapVal = buffer + heapStartOffset; - if (lastOutputKey == NULL || es->compare_fcn(heapVal, lastOutputKey) < 0) { + if (lastOutputKey == NULL || es->compare_fcn(heapVal + es->key_offset, lastOutputKey + es->key_offset) < 0) { // Start new sublist numSublist++; @@ -355,6 +433,12 @@ int adaptive_sort( // Swap output records into output buffer from heap if smaller than records currently there. (I/O block is id zero) for (i = 0; i < tuplesPerPage; i++) { +#ifdef DEBUG + if (i < 3 || i == tuplesPerPage - 1) { // Only log first 3 and last iteration + debug_log(" Inner loop i=%d: recordsRead=%d, outputCount=%d, recordsLeft=%d, heapSize=%d\n", + i, recordsRead, outputCount, recordsLeft, heapSize); + } +#endif // Check if we've read all records from the current page if (recordsRead == 0) { // Check if there are any records left @@ -475,7 +559,19 @@ int adaptive_sort( } // Add Page Headers - *((int32_t*)buffer) = sublistSize; +#ifdef DEBUG + debug_log("About to write block: sublistSize=%d, outputCount=%d, numSublist=%d\n", + sublistSize, outputCount, numSublist); + debug_log(" First 3 output values:"); + for (int dbg = 0; dbg < 3 && dbg < outputCount; dbg++) { + debug_log(" %d", *(int32_t*)(buffer + es->headerSize + dbg * es->record_size + es->key_offset)); + } + debug_log("\n"); +#endif + if (outputCount == 0) { + continue; // Skip to next iteration + } + * ((int32_t*)buffer) = sublistSize; *((int16_t*)(buffer + BLOCK_COUNT_OFFSET)) = (int16_t)outputCount; memcpy(tupleBuffer, buffer + (outputCount - 1) * es->record_size + es->headerSize, es->key_size); memcpy(lastOutputKey, tupleBuffer, es->record_size); @@ -509,6 +605,12 @@ int adaptive_sort( lastWritePos += es->page_size; sublistSize++; outputCount = 0; +#ifdef DEBUG + if (recordsLeft == 0) { + debug_log("DEBUG: Exiting main loop - heapSize=%d, listSize=%d, outputCount=%d, sublistSize=%d\n", + heapSize, listSize, outputCount, sublistSize); + } +#endif } /* while records left */ // free(lastOutputKey); numSublist = metric->num_runs; @@ -526,6 +628,32 @@ int adaptive_sort( numDistinctInRun = 0; } /* end pessmistic */ +#ifdef DEBUG + debug_log("\n=== REPLACEMENT SELECTION COMPLETE ===\n"); + debug_log("Number of sublists created: %d\n", numSublist); + debug_log("Output file size: %ld bytes (%ld blocks)\n", lastWritePos, lastWritePos / es->page_size); + debug_log("About to start merge phase...\n\n"); + + // Read and display what's in each block + for (int debugBlock = 0; debugBlock < lastWritePos / es->page_size; debugBlock++) { + ((file_iterator_state_t*)iteratorState)->fileInterface->seek(debugBlock * es->page_size, outputFile); + ((file_iterator_state_t*)iteratorState)->fileInterface->readRel(buffer, es->page_size, 1, outputFile); + + uint32_t blockIdx = *((uint32_t*)buffer); + uint16_t count = *((uint16_t*)(buffer + BLOCK_COUNT_OFFSET)); + + debug_log("Block %d: blockIdx=%u, count=%u, first 10 values:", debugBlock, blockIdx, count); + for (int v = 0; v < count && v < 10; v++) { + debug_log(" %d", *(int32_t*)(buffer + es->headerSize + v * es->record_size + es->key_offset)); + } + if (count > 10) debug_log(" ..."); + debug_log("\n"); + } + debug_log("=================================\n\n"); +#endif + + //((file_iterator_state_t*)iteratorState)->fileInterface->flush(outputFile); + // No merge phase necessary if (numSublist == 1) { ((file_iterator_state_t*)iteratorState)->fileInterface->flush(outputFile); diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index 2ca53bb2..aa4369c6 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -54,7 +54,7 @@ void setUp() { } state = (embedDBState*)malloc(sizeof(embedDBState)); state->keySize = 4; - state->dataSize = 72; + state->dataSize = 12; state->compareKey = int32Comparator; state->compareData = int32Comparator; state->pageSize = 512; @@ -86,7 +86,7 @@ void setUp() { state->rules = NULL; state->numRules = 0; - int8_t colSizes[] = {4, 64, 4, 4}; + int8_t colSizes[] = {4, 4, 4, 4}; int8_t colSignedness[] = {embedDB_COLUMN_UNSIGNED, embedDB_COLUMN_SIGNED, embedDB_COLUMN_SIGNED, embedDB_COLUMN_SIGNED}; ColumnType colTypes[] = {embedDB_COLUMN_UINT32, embedDB_COLUMN_INT32, embedDB_COLUMN_INT32, embedDB_COLUMN_INT32}; baseSchema = embedDBCreateSchema(4, colSizes, colSignedness, colTypes); @@ -166,7 +166,7 @@ void runTestSequentialValues() { #ifdef ARDUINO insertNValues(state, 1, 0); #else - insertNValues(state, 22, 1); + insertNValues(state, 190, 1); #endif embedDBIterator it; @@ -180,7 +180,7 @@ void runTestSequentialValues() { uint8_t projColsOB[] = {0, 1}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); - debugBinData(orderByOp, 22, 1); + debugBinData(orderByOp, 190, 1); orderByOp->init(orderByOp); From 345941bd7544d04ec43bc08a00b87c27a2a97d13 Mon Sep 17 00:00:00 2001 From: xelArga Date: Tue, 27 Jan 2026 12:07:56 -0800 Subject: [PATCH 14/17] I think the heap logic is fixed, fingers crossed. I had originally messed with the flahs minsort logic, and I tried putting the result filte to be after last write, but there's some logic breaking in there --- src/query-interface/sort/adaptive_sort.c | 73 +++++++++++++++----- test/test_sort/test_sort_query_interface.cpp | 2 +- 2 files changed, 56 insertions(+), 19 deletions(-) diff --git a/src/query-interface/sort/adaptive_sort.c b/src/query-interface/sort/adaptive_sort.c index 7f7a8d7b..2f1bba99 100644 --- a/src/query-interface/sort/adaptive_sort.c +++ b/src/query-interface/sort/adaptive_sort.c @@ -52,13 +52,13 @@ #include "debug_print.h" - #define DEBUG 1 - #define DEBUG_OUTPUT 1 - #define DEBUG_READ 1 -#define DEBUG_HEAP 0 -#define ADAPTIVE_SORT_PRINT +// #define DEBUG 1 +// #define DEBUG_OUTPUT 1 +// #define DEBUG_READ 1 +// #define DEBUG_HEAP 0 +// #define ADAPTIVE_SORT_PRINT -#define ADAPTIVE_SORT_PRINT_FINISH +// #define ADAPTIVE_SORT_PRINT_FINISH /** * Prints the contents of the heap. Used for debugging. @@ -265,9 +265,9 @@ int adaptive_sort( void* lastOutputKey = malloc(es->record_size); int8_t haveOutputKey = 0; - int32_t sublistSize = 0; - int32_t outputCount = 0; - int32_t recordsLeft = 0; + int32_t sublistSize = 0; /* size in blocks */ + int32_t outputCount = 0; /* number of values in output block */ + int32_t recordsLeft = 0; /* number of records in buffer */ void *heapVal, *inputVal; // Calculate safe initial heap size @@ -275,12 +275,11 @@ int adaptive_sort( // Available buffer space = blocks 1 to (bufferSizeInBlocks-1) // Reserve last block's worth of space for the list to grow safely int32_t maxRecordsInBuffer = (bufferSizeInBlocks - 1) * tuplesPerPage; - int32_t safeInitialHeapSize = (bufferSizeInBlocks - 2) * tuplesPerPage; // Reserve 1 block for list #ifdef DEBUG debug_log("DEBUG: Buffer setup:\n"); debug_log(" bufferSizeInBlocks=%d, tuplesPerPage=%d\n", bufferSizeInBlocks, tuplesPerPage); - debug_log(" maxRecordsInBuffer=%d, safeInitialHeapSize=%d\n", maxRecordsInBuffer, safeInitialHeapSize); + debug_log(" maxRecordsInBuffer=%d", maxRecordsInBuffer); debug_log(" heapStartOffset=%d, page_size=%d, record_size=%d\n", heapStartOffset, es->page_size, es->record_size); debug_log(" Buffer layout: [I/O Page 0][Data Pages 1-%d][Heap grows down from top]\n", bufferSizeInBlocks - 1); @@ -289,10 +288,8 @@ int adaptive_sort( ((file_iterator_state_t*)iteratorState)->fileInterface->seek(0, outputFile); lastWritePos = 0; - // CRITICAL FIX: Only read enough to fill the safe initial heap size - // This leaves room for the list to grow and prevents reading all data before main loop addr = buffer + es->page_size; // Start after I/O block - for (i = 0; i < safeInitialHeapSize; i++) { + for (i = 0; i < maxRecordsInBuffer; i++) { status = !iterator(sortData, addr); if (status == 0) break; // No more records available @@ -302,7 +299,7 @@ int adaptive_sort( #ifdef DEBUG debug_log("DEBUG: Initial load completed:\n"); - debug_log(" recordsRead=%d (requested %d)\n", recordsRead, safeInitialHeapSize); + debug_log(" recordsRead=%d (requested %d)\n", recordsRead, maxRecordsInBuffer); debug_log(" Data spans from offset %d to %d\n", (int)(es->page_size), (int)(es->page_size + recordsRead * es->record_size)); #endif @@ -433,6 +430,46 @@ int adaptive_sort( // Swap output records into output buffer from heap if smaller than records currently there. (I/O block is id zero) for (i = 0; i < tuplesPerPage; i++) { + /* ========================================================== + * HEAP-EMPTY → START NEW RUN TRANSITION + * This MUST happen before producing output + * ========================================================== */ + if (heapSize == 0) { + if (listSize > 0) { + // Finish current run and start a new one + numSublist++; + metric->num_runs++; + + sublistSize = 0; + outputCount = 0; + haveOutputKey = 0; + +#ifdef DEBUG + debug_log("DEBUG: Heap empty → starting new run, promoting list (%d records)\n", + listSize); +#endif + + // Promote frozen list → heap + for (int32_t k = listSize - 1; k >= 0; k--) { + shiftUp_rev(buffer + heapStartOffset, + buffer + es->page_size + k * es->record_size, + heapSize, es, metric); + heapSize++; + } + listSize = 0; + + // Restart filling the output page for the new run + i = -1; + continue; + } else { + // No heap, no list → nothing left to output + break; + } + } + + /* ========================================================== + * EXISTING LOGIC CONTINUES HERE + * ========================================================== */ #ifdef DEBUG if (i < 3 || i == tuplesPerPage - 1) { // Only log first 3 and last iteration debug_log(" Inner loop i=%d: recordsRead=%d, outputCount=%d, recordsLeft=%d, heapSize=%d\n", @@ -440,7 +477,7 @@ int adaptive_sort( } #endif // Check if we've read all records from the current page - if (recordsRead == 0) { + if (recordsRead == 0 || i >= recordsRead) { // Check if there are any records left if (recordsLeft <= 0) { break; @@ -700,7 +737,7 @@ int adaptive_sort( debug_log("Performing MinSort with sorted sublists\n"); #endif ((file_iterator_state_t*)iteratorState)->file = outputFile; - *resultFilePtr = 0; + *resultFilePtr = lastWritePos; flash_minsort_sublist(iteratorState, tupleBuffer, outputFile, buffer, bufferSizeBytes, es, resultFilePtr, metric, compareFn, numSublist); //*resultFilePtr = lastWritePos; } else { @@ -709,7 +746,7 @@ int adaptive_sort( debug_log("Performing MinSort\n"); #endif ((file_iterator_state_t*)iteratorState)->file = outputFile; - *resultFilePtr = 0; + *resultFilePtr = lastWritePos; flash_minsort(iteratorState, tupleBuffer, outputFile, buffer, bufferSizeBytes, es, resultFilePtr, metric, compareFn); } } else { diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index aa4369c6..4e8d414b 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -180,7 +180,7 @@ void runTestSequentialValues() { uint8_t projColsOB[] = {0, 1}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); - debugBinData(orderByOp, 190, 1); + // debugBinData(orderByOp, 190, 1); orderByOp->init(orderByOp); From b956d13b643745c11e1ed706975502dff8ead984 Mon Sep 17 00:00:00 2001 From: Alex Argatoff Date: Mon, 2 Feb 2026 20:49:02 -0800 Subject: [PATCH 15/17] Sorting works at the moment! Just now in the process of getting the file interface and adaptive sort working properly on the arduino --- lib/Debug/debug_print.c | 3 +- lib/Debug/debug_print.h | 2 +- .../desktopFileInterface.c | 6 +- lib/SD-File-Interface/SDFileInterface.c | 75 ++-- lib/SD-File-Interface/SDFileInterface.h | 2 +- lib/SD-Wrapper/sdcard_c_iface.cpp | 26 +- lib/SD-Wrapper/sdcard_c_iface.h | 12 +- makefile | 2 +- platformio.ini | 1 - src/query-interface/sort/adaptive_sort.c | 73 ++-- src/query-interface/sort/flash_minsort.c | 46 +- .../sort/flash_minsort_sublist.c | 9 +- src/query-interface/sort/sortWrapper.c | 403 ++++++++---------- test/test_sort/test_sort_query_interface.cpp | 87 ++-- 14 files changed, 364 insertions(+), 383 deletions(-) diff --git a/lib/Debug/debug_print.c b/lib/Debug/debug_print.c index 1b37383f..09032a1f 100644 --- a/lib/Debug/debug_print.c +++ b/lib/Debug/debug_print.c @@ -1,9 +1,10 @@ #include "debug_print.h" -#include #include +#include #include + #if defined(_WIN32) || defined(_WIN64) #include #define write _write diff --git a/lib/Debug/debug_print.h b/lib/Debug/debug_print.h index 5c8f739d..4aba361a 100644 --- a/lib/Debug/debug_print.h +++ b/lib/Debug/debug_print.h @@ -5,4 +5,4 @@ void debug_log(const char *format, ...); -#endif // DEBUG_PRINT_H_ +#endif // DEBUG_PRINT_H_ diff --git a/lib/Desktop-File-Interface/desktopFileInterface.c b/lib/Desktop-File-Interface/desktopFileInterface.c index 8347c9ae..8fb4a7cc 100644 --- a/lib/Desktop-File-Interface/desktopFileInterface.c +++ b/lib/Desktop-File-Interface/desktopFileInterface.c @@ -23,7 +23,7 @@ void tearDownFile(void *file) { } int8_t FILE_REMOVE(void *file) { - if (file == NULL) return 0; + if (file == NULL) return 1; FILE_INFO *fileInfo = (FILE_INFO *)file; if (fileInfo->file != NULL) { @@ -39,10 +39,6 @@ int8_t FILE_REMOVE(void *file) { perror("ERROR: Failed to remove temp file"); #endif } - free(fileInfo->filename); - fileInfo->filename = NULL; - - free(fileInfo); return result; } } diff --git a/lib/SD-File-Interface/SDFileInterface.c b/lib/SD-File-Interface/SDFileInterface.c index e2a9b749..d41d7503 100644 --- a/lib/SD-File-Interface/SDFileInterface.c +++ b/lib/SD-File-Interface/SDFileInterface.c @@ -40,7 +40,7 @@ typedef struct { SD_FILE *sdFile; } SD_FILE_INFO; -void *setupSDFile(char *filename) { +void *setupSDFile(const char *filename) { SD_FILE_INFO *fileInfo = malloc(sizeof(SD_FILE_INFO)); int nameLen = strlen(filename); fileInfo->filename = calloc(1, nameLen + 1); @@ -58,7 +58,7 @@ void tearDownSDFile(void *file) { } int8_t SD_FILE_REMOVE(void *file) { - if (file == NULL) return 0; + if (file == NULL) return 1; SD_FILE_INFO *fileInfo = (SD_FILE_INFO *)file; if (fileInfo->sdFile != NULL) { @@ -66,21 +66,11 @@ int8_t SD_FILE_REMOVE(void *file) { fileInfo->sdFile = NULL; } - int8_t result = 1; if (fileInfo->filename != NULL) { - /* Try to use C remove as fallback; replace with sd-specific remove if available */ - if (remove(fileInfo->filename) != 0) { - result = 0; -#ifdef PRINT_ERRORS - perror("ERROR: Failed to remove SD temp file"); -#endif - } - free(fileInfo->filename); - fileInfo->filename = NULL; + int result = sd_remove(fileInfo->filename); + return (result == 0); } - - free(fileInfo); - return result; + return 1; } int8_t FILE_READ(void *buffer, uint32_t pageNum, uint32_t pageSize, void *file) { @@ -91,31 +81,25 @@ int8_t FILE_READ(void *buffer, uint32_t pageNum, uint32_t pageSize, void *file) int8_t FILE_WRITE(void *buffer, uint32_t pageNum, uint32_t pageSize, void *file) { SD_FILE_INFO *fileInfo = (SD_FILE_INFO *)file; + if (fileInfo->sdFile == NULL) return -1; + size_t fileSize = sd_length(fileInfo->sdFile); - size_t requiredSize = pageNum * pageSize; - if (fileSize < pageNum * pageSize) { - int8_t seekSuccess = sd_fseek(fileInfo->sdFile, fileSize, SEEK_SET); - if (seekSuccess == -1) { - return -1; - } - size_t currentSize = fileSize; - uint32_t max = UINT32_MAX; - uint32_t writeSuccess = 0; - while (currentSize < requiredSize) { - writeSuccess = sd_fwrite(&max, sizeof(uint32_t), 1, fileInfo->sdFile); - if (writeSuccess == 0) - return -1; - currentSize += 4; + size_t requiredSize = (size_t)pageNum * pageSize; + + if (fileSize < requiredSize) { + sd_fseek(fileInfo->sdFile, 0, SEEK_END); + uint8_t zero = 0; + while (sd_length(fileInfo->sdFile) < requiredSize) { + if (sd_fwrite(&zero, 1, 1, fileInfo->sdFile) != 1) return -1; } } - int8_t seekSuccess = sd_fseek(fileInfo->sdFile, pageNum * pageSize, SEEK_SET); - if (seekSuccess == -1) { - return -1; + + if (sd_fseek(fileInfo->sdFile, requiredSize, SEEK_SET) != 0) return -1; + + if (sd_fwrite(buffer, pageSize, 1, fileInfo->sdFile) == 1) { + return 1; } - int8_t writeSuccess = sd_fwrite(buffer, pageSize, 1, fileInfo->sdFile) == pageSize; - if (seekSuccess == -1) - return 0; - return 1; + return 0; } int8_t FILE_ERASE(uint32_t startPage, uint32_t endPage, uint32_t pageSize, void *file) { @@ -152,6 +136,17 @@ int8_t FILE_OPEN(void *file, uint8_t mode) { } } +char *sdFat_tempFilePath(void) { + char tempPathBuffer[32]; + snprintf(tempPathBuffer, sizeof(tempPathBuffer), "TMP%lu.DAT", random()); + + char *out = malloc(strlen(tempPathBuffer) + 1); + if (out) { + strcpy(out, tempPathBuffer); + } + return out; +} + embedDBFileInterface *getSDInterface() { embedDBFileInterface *fileInterface = malloc(sizeof(embedDBFileInterface)); fileInterface->close = FILE_CLOSE; @@ -163,12 +158,6 @@ embedDBFileInterface *getSDInterface() { fileInterface->setup = setupSDFile; fileInterface->teardown = tearDownSDFile; fileInterface->removeFile = SD_FILE_REMOVE; - fileInterface->tempFilePath = sdfat_tempFilePath; + fileInterface->tempFilePath = sdFat_tempFilePath; return fileInterface; } - -char* sdfat_tempFilePath(void) { - static char tempPathBuffer[32]; - snprintf(tempPathBuffer, sizeof(tempPathBuffer), "TMP%lu.DAT", random()); - return tempPathBuffer; -} diff --git a/lib/SD-File-Interface/SDFileInterface.h b/lib/SD-File-Interface/SDFileInterface.h index 9267de42..56f467ec 100644 --- a/lib/SD-File-Interface/SDFileInterface.h +++ b/lib/SD-File-Interface/SDFileInterface.h @@ -52,7 +52,7 @@ extern "C" { #include "sdcard_c_iface.h" embedDBFileInterface *getSDInterface(); -void *setupSDFile(char *filename); +void *setupSDFile(const char *filename); void tearDownSDFile(void *file); #ifdef __cplusplus diff --git a/lib/SD-Wrapper/sdcard_c_iface.cpp b/lib/SD-Wrapper/sdcard_c_iface.cpp index 2f72f1e7..73cb6f82 100644 --- a/lib/SD-Wrapper/sdcard_c_iface.cpp +++ b/lib/SD-Wrapper/sdcard_c_iface.cpp @@ -112,24 +112,32 @@ size_t sd_fread(void *ptr, size_t size, size_t nmemb, SD_FILE *stream) { } int sd_fseek(SD_FILE *stream, unsigned long int offset, int whence) { - if (NULL == stream) - return -1; + if (NULL == stream) return -1; - bool result = stream->f.seek(offset); - if (!result) - return -1; - return 0; + unsigned long absolute_pos = offset; + if (whence == SEEK_CUR) { + absolute_pos = stream->f.position() + offset; + } else if (whence == SEEK_END) { + absolute_pos = stream->f.size() - offset; + } + return stream->f.seek(absolute_pos) ? 0 : -1; } size_t sd_fwrite(void *ptr, size_t size, size_t nmemb, SD_FILE *stream) { size_t total_count = size * nmemb; size_t bytes_written = stream->f.write(ptr, total_count); - if (total_count != bytes_written) - return 0; - return total_count; + if (bytes_written == 0) return 0; + return bytes_written / size; } size_t sd_length(SD_FILE *stream) { return stream->f.size(); } + +int sd_remove(const char *filename) { + if (sdcard->remove(filename)) { + return 0; + } + return -1; +} diff --git a/lib/SD-Wrapper/sdcard_c_iface.h b/lib/SD-Wrapper/sdcard_c_iface.h index 2dc785ea..30c0be58 100644 --- a/lib/SD-Wrapper/sdcard_c_iface.h +++ b/lib/SD-Wrapper/sdcard_c_iface.h @@ -54,6 +54,7 @@ extern "C" { #define fflush(x) sd_fflush(x) #define fseek(x, y, z) sd_fseek(x, y, z) #define fread(w, x, y, z) sd_fread(w, x, y, z) +// #define remove(x) sd_remove(x) /** @brief Wrapper around Arduino File type (a C++ object). @@ -166,9 +167,16 @@ sd_fwrite( * @param stream A pointer to a C file struct type associated with an SD file object. * @return The size of the file in bytes */ -size_t sd_length(SD_FILE *stream); +size_t sd_length(SD_FILE* stream); -void init_sdcard(void *sd); +/** +@brief Remove (delete) a file from the SD card. +@param filename The name of the file to delete. +@returns 0 on success, -1 on failure. +*/ +int sd_remove(const char *filename); + +void init_sdcard(void* sd); #if defined(__cplusplus) } diff --git a/makefile b/makefile index 0840eae4..d8a3674c 100644 --- a/makefile +++ b/makefile @@ -54,7 +54,7 @@ DEV_TEST_OBJECTS = $(EMBEDDB_OBJECTS) $(QUERY_OBJECTS) $(EMBEDDB_FILE_INTERFACE) TEST_FLAGS = -I. -I$(PATHU) -I $(PATHS) -I$(PATH_UTILITY) -I$(PATH_FILE_INTERFACE) -D TEST -EXAMPLE_FLAGS = -I. -I$(PATHS) -I$(PATH_UTILITY) -I$(PATH_FILE_INTERFACE) -I$(PATH_DISTRIBUTION) -DPRINT_ERRORS +EXAMPLE_FLAGS = -I. -I$(PATHS) -I$(PATH_UTILITY) -I$(PATH_FILE_INTERFACE) -I$(PATH_DISTRIBUTION) TEST_DIST_FLAGS = -I. -I$(PATHS) -I$(PATHU) -I$(PATH_FILE_INTERFACE) -I$(PATH_DISTRIBUTION) -I$(PATH_UTILITY) -DDIST -D TEST override CFLAGS += $(if $(filter test-dist,$(MAKECMDGOALS)), $(TEST_DIST_FLAGS), $(if $(filter test,$(MAKECMDGOALS)),$(TEST_FLAGS),$(EXAMPLE_FLAGS)) ) diff --git a/platformio.ini b/platformio.ini index c810d597..44e7ffd7 100644 --- a/platformio.ini +++ b/platformio.ini @@ -20,7 +20,6 @@ build_src_filter = lib_ignore = Dataflash, Dataflash-File-Interface, Dataflash-Wrapper, Distribution, Due, Mega, Memboard, SD-File-Interface, SD-Test, SD-Wrapper, SdFat, Serial-Wrapper, Unity-Desktop build_flags = -lm - -DPRINT_ERRORS extra_scripts = pre:scripts/create_build_folder.py lib_deps = build_type = debug diff --git a/src/query-interface/sort/adaptive_sort.c b/src/query-interface/sort/adaptive_sort.c index 2f1bba99..5fe62c27 100644 --- a/src/query-interface/sort/adaptive_sort.c +++ b/src/query-interface/sort/adaptive_sort.c @@ -50,15 +50,19 @@ #include "in_memory_sort.h" #include "no_output_heap.h" -#include "debug_print.h" - // #define DEBUG 1 // #define DEBUG_OUTPUT 1 // #define DEBUG_READ 1 // #define DEBUG_HEAP 0 -// #define ADAPTIVE_SORT_PRINT - +// #define ADAPTIVE_SORT_PRINT // #define ADAPTIVE_SORT_PRINT_FINISH +#if defined(DEBUG) || defined(DEBUG_OUTPUT) || defined(DEBUG_READ) || defined(DEBUG_HEAP) || defined(ADAPTIVE_SORT_PRINT) || defined(ADAPTIVE_SORT_PRINT_FINISH) +#include "debug_print.h" +#else +#ifndef debug_log +#define debug_log(...) ((void)0) +#endif +#endif /** * Prints the contents of the heap. Used for debugging. @@ -76,7 +80,6 @@ void print_heap(char* buffer, int32_t heap_start_offset, int heap_size, int list debug_log("| "); } debug_log(" "); - // Prints the list for (aa = 0; aa < 1; aa++) { @@ -87,7 +90,6 @@ void print_heap(char* buffer, int32_t heap_start_offset, int heap_size, int list debug_log("| "); } debug_log("\n"); - } /** @@ -150,7 +152,7 @@ int adaptive_sort( if (optimistic) { // Do FLASH MinSort init first #ifdef DEBUG - debug_log("*Optimistic*\n"); + debug_log("*Optimistic*\n"); #endif MinSortState ms; @@ -166,11 +168,11 @@ int adaptive_sort( int32_t nobSortCost = numPasses * (10 + writeToReadRatio) / 10; #ifdef DEBUG - debug_log("Adaptive calculation.\n"); - debug_log("NOB sort cost. # runs: %d", numSublist); - debug_log(" # passes: %d cost: %d\n", numPasses, nobSortCost); - debug_log("MinSort cost. Num sublists: %d ", numSublist); - debug_log(" Avg. distinct/sublist: %d\n", avgDistinct / 10); + debug_log("Adaptive calculation.\n"); + debug_log("NOB sort cost. # runs: %d", numSublist); + debug_log(" # passes: %d cost: %d\n", numPasses, nobSortCost); + debug_log("MinSort cost. Num sublists: %d ", numSublist); + debug_log(" Avg. distinct/sublist: %d\n", avgDistinct / 10); #endif if (avgDistinct < nobSortCost) @@ -197,7 +199,7 @@ int adaptive_sort( *((int16_t*)(outputBuffer + BLOCK_COUNT_OFFSET)) = count; /* Block record count */ #ifdef DEBUG debug_log("Writing page adaptive sort: blockIndex=%d, count=%d, filePosition=%ld\n", - blockIndex, count, lastWritePos / PAGE_SIZE); + blockIndex, count, lastWritePos / PAGE_SIZE); #endif // Write block to the ouput file if (0 == ((file_iterator_state_t*)iteratorState)->fileInterface->write(outputBuffer, blockIndex, es->page_size, outputFile)) { @@ -213,7 +215,7 @@ int adaptive_sort( for (int k = 0; k < values_per_page; k++) { debug_log("%3d: 1 Output Record: %d\n", k, outputBuffer + es->headerSize + k * es->record_size + es->key_offset); } - + #endif } } @@ -224,7 +226,7 @@ int adaptive_sort( *((int16_t*)(outputBuffer + BLOCK_COUNT_OFFSET)) = count; /* Block record count */ #ifdef DEBUG debug_log("Writing last page adaptive: blockIndex=%d, count=%d, filePosition=%ld\n", - blockIndex, count, lastWritePos / PAGE_SIZE); + blockIndex, count, lastWritePos / PAGE_SIZE); #endif if (0 == ((file_iterator_state_t*)iteratorState)->fileInterface->write(outputBuffer, blockIndex, es->page_size, outputFile)) { return 9; // Return error code if writing to the output file fails @@ -239,7 +241,7 @@ int adaptive_sort( for (int k = 0; k < values_per_page; k++) { debug_log("%3d: 2 Output Record: %d\n", k, *(uint32_t*)(outputBuffer + es->headerSize + k * es->record_size + es->key_offset)); } - + #endif } @@ -380,7 +382,6 @@ int adaptive_sort( #ifdef DEBUG_HEAP print_heap(buffer, heapStartOffset, heapSize, listSize, es); - #endif #ifdef DEBUG @@ -416,7 +417,7 @@ int adaptive_sort( avgDistinct = avgDistinct + (numDistinctInRun - avgDistinct / 10) * 10 / numSublist; #ifdef DEBUG debug_log("Number of distinct values in sublist: %d Running average: %d\n", numDistinctInRun, avgDistinct / 10); - + #endif numDistinctInRun = 1; @@ -430,10 +431,9 @@ int adaptive_sort( // Swap output records into output buffer from heap if smaller than records currently there. (I/O block is id zero) for (i = 0; i < tuplesPerPage; i++) { - /* ========================================================== - * HEAP-EMPTY → START NEW RUN TRANSITION - * This MUST happen before producing output - * ========================================================== */ + /* + * HEAP-EMPTY START NEW RUN TRANSITION + */ if (heapSize == 0) { if (listSize > 0) { // Finish current run and start a new one @@ -449,7 +449,7 @@ int adaptive_sort( listSize); #endif - // Promote frozen list → heap + // Promote frozen list to heap for (int32_t k = listSize - 1; k >= 0; k--) { shiftUp_rev(buffer + heapStartOffset, buffer + es->page_size + k * es->record_size, @@ -466,10 +466,6 @@ int adaptive_sort( break; } } - - /* ========================================================== - * EXISTING LOGIC CONTINUES HERE - * ========================================================== */ #ifdef DEBUG if (i < 3 || i == tuplesPerPage - 1) { // Only log first 3 and last iteration debug_log(" Inner loop i=%d: recordsRead=%d, outputCount=%d, recordsLeft=%d, heapSize=%d\n", @@ -608,7 +604,7 @@ int adaptive_sort( if (outputCount == 0) { continue; // Skip to next iteration } - * ((int32_t*)buffer) = sublistSize; + *((int32_t*)buffer) = sublistSize; *((int16_t*)(buffer + BLOCK_COUNT_OFFSET)) = (int16_t)outputCount; memcpy(tupleBuffer, buffer + (outputCount - 1) * es->record_size + es->headerSize, es->key_size); memcpy(lastOutputKey, tupleBuffer, es->record_size); @@ -618,7 +614,7 @@ int adaptive_sort( // Write the output block #ifdef DEBUG debug_log("Writing page adaptive writeRel: blockIndex=%d, count=%d, filePosition=%ld\n", - sublistSize, outputCount, lastWritePos / PAGE_SIZE); + sublistSize, outputCount, lastWritePos / PAGE_SIZE); #endif ((file_iterator_state_t*)iteratorState)->fileInterface->writeRel(buffer, PAGE_SIZE, 1, outputFile); @@ -631,11 +627,11 @@ int adaptive_sort( #ifdef DEBUG_OUTPUT debug_log("Wrote block. Sublist: %d ", numSublist); debug_log(" Idx: %d\n", sublistSize); - //debug_log("Offset: %lu\n", ftell(outputFile) - es->page_size); + // debug_log("Offset: %lu\n", ftell(outputFile) - es->page_size); for (int k = 0; k < tuplesPerPage; k++) { debug_log("%3d: 3 Output Record: %d\n", k, *(uint32_t*)(buffer + es->headerSize + k * es->record_size + es->key_offset)); } - + #endif metric->num_writes += 1; @@ -689,8 +685,6 @@ int adaptive_sort( debug_log("=================================\n\n"); #endif - //((file_iterator_state_t*)iteratorState)->fileInterface->flush(outputFile); - // No merge phase necessary if (numSublist == 1) { ((file_iterator_state_t*)iteratorState)->fileInterface->flush(outputFile); @@ -781,7 +775,6 @@ int adaptive_sort( int16_t space = 0; int16_t outputCursor; int8_t destBlk; - int32_t other = 0; // Verify all memory has been allocated successfully if (record2 == NULL) { @@ -1023,7 +1016,7 @@ int adaptive_sort( } debug_log("HERE\n"); } - + #endif /* Add smallest tuple to output position in buffer (may already be in output buffer) */ @@ -1127,7 +1120,7 @@ int adaptive_sort( ((file_iterator_state_t*)iteratorState)->fileInterface->seek(lastWritePos, outputFile); #ifdef DEBUG debug_log("Writing page adaptive writeRel 2: blockIndex=%d, count=%d, filePosition=%ld\n", - currentBlockId, tuplesPerPage, lastWritePos / PAGE_SIZE); + currentBlockId, tuplesPerPage, lastWritePos / PAGE_SIZE); #endif ((file_iterator_state_t*)iteratorState)->fileInterface->writeRel(buffer + OUTPUT_BLOCK_ID * es->page_size, PAGE_SIZE, 1, outputFile); if (((file_iterator_state_t*)iteratorState)->fileInterface->error(outputFile)) { @@ -1148,7 +1141,7 @@ int adaptive_sort( void* buf = (void*)(buffer + es->headerSize + k * es->record_size); debug_log("%3d: 4 Output Record: %d Address: %p\n", k, *(uint32_t*)(buf + es->key_offset), buf); } - + #endif } @@ -1205,7 +1198,7 @@ int adaptive_sort( debug_log("%d: Record: %d Address: %p\n", k, buf + es->key_offset, buf); } } - + #endif } } @@ -1503,7 +1496,7 @@ int adaptive_sort( ((file_iterator_state_t*)iteratorState)->fileInterface->seek(lastWritePos, outputFile); #ifdef DEBUG debug_log("Writing page adaptive write rel 3: blockIndex=%d, count=%d, filePosition=%ld\n", - currentBlockId, (int16_t)(record2[0] - es->headerSize) / es->record_size + 1, lastWritePos / PAGE_SIZE); + currentBlockId, (int16_t)(record2[0] - es->headerSize) / es->record_size + 1, lastWritePos / PAGE_SIZE); #endif ((file_iterator_state_t*)iteratorState)->fileInterface->writeRel(buffer + OUTPUT_BLOCK_ID * es->page_size, PAGE_SIZE, 1, outputFile); if (((file_iterator_state_t*)iteratorState)->fileInterface->error(outputFile)) { @@ -1525,7 +1518,7 @@ int adaptive_sort( void* buf = (void*)(buffer + es->headerSize + k * es->record_size); debug_log("%3d: 5 Output Record: %d Address: %p\n", k, *(uint32_t*)(buf + es->key_offset), buf); // TODO: Update to no use test_record_t } - + #endif } diff --git a/src/query-interface/sort/flash_minsort.c b/src/query-interface/sort/flash_minsort.c index a6f6638d..d4d1980f 100644 --- a/src/query-interface/sort/flash_minsort.c +++ b/src/query-interface/sort/flash_minsort.c @@ -52,7 +52,13 @@ This is no output sort with block headers and iterator input. Heap used when mov // #define DEBUG 1 // #define DEBUG_OUTPUT 1 // #define DEBUG_READ 1 -// #include "debug_print.h" +#if defined(DEBUG) || defined(DEBUG_OUTPUT) || defined(DEBUG_READ) +#include "debug_print.h" +#else +#ifndef debug_log +#define debug_log(...) ((void)0) +#endif +#endif #ifndef INT_MAX #define INT_MAX 0xFFFFFFFF @@ -68,7 +74,9 @@ This is no output sort with block headers and iterator input. Heap used when mov void readPageMinSort(MinSortState *ms, int pageNum, external_sort_t *es, metrics_t *metric) { file_iterator_state_t *is = (file_iterator_state_t *)ms->iteratorState; void *fp = is->file; - +#ifdef DEBUG + debug_log("DEBUG: READ_PAGE %d (Offset %d)\n", pageNum, pageNum * es->page_size); +#endif // Read page into the buffer if (0 == is->fileInterface->read(ms->buffer, pageNum, es->page_size, fp)) { #ifdef DEBUG @@ -160,28 +168,21 @@ void init_MinSort(MinSortState *ms, external_sort_t *es, metrics_t *metric, int8 ms->min_initialized[i] = false; } - /* Populate each region’s minimum key by scanning blocks */ + /* Populate each region's minimum key by scanning blocks */ for (i = 0; i < ms->numBlocks; i++) { - readPageMinSort(ms, i, es, metric); // Load block i into buffer + readPageMinSort(ms, i, es, metric); regionIdx = i / ms->blocks_per_region; - // Set inital value to first read. - // ms->min[regionIdx] = getValuePtr(ms, 0, es); - memcpy(getMinRegionPtr(ms, regionIdx, es), getValuePtr(ms, 0, es), es->key_size); - metric->num_memcpys++; - ms->min_initialized[regionIdx] = true; - - /* Process remaining records in the block */ - for (j = 1; j < ms->records_per_block; j++) { + for (j = 0; j < ms->records_per_block; j++) { if (((i * ms->records_per_block) + j) < ms->num_records) { val = getValuePtr(ms, j, es); metric->num_compar++; - /* Update region’s minimum if current record is smaller */ - if (compareFn(val, getMinRegionPtr(ms, regionIdx, es)) == -1) { + // Only update if this is the first record for the region OR we found a new minimum + if (!ms->min_initialized[regionIdx] || compareFn(val, getMinRegionPtr(ms, regionIdx, es)) == -1) { memcpy(getMinRegionPtr(ms, regionIdx, es), val, es->key_size); - metric->num_memcpys++; ms->min_initialized[regionIdx] = true; + metric->num_memcpys++; } } else break; @@ -190,7 +191,7 @@ void init_MinSort(MinSortState *ms, external_sort_t *es, metrics_t *metric, int8 #ifdef DEBUG for (i = 0; i < ms->numRegions; i++) - debug_log("Region: %d Min: %d\r\n", i, ms->min[i]); + debug_log("Region: %d Min: %d\r\n", i, *(int *)getMinRegionPtr(ms, i, es)); #endif /* Allocate memory for current and next keys */ @@ -254,7 +255,7 @@ char *next_MinSort(MinSortState *ms, external_sort_t *es, void *tupleBuffer, met for (k = startIndex / ms->records_per_block; k < ms->blocks_per_region; k++) { curBlk = startBlk + k; - if (curBlk > ms->numBlocks) { + if (curBlk >= ms->numBlocks) { break; } @@ -419,7 +420,9 @@ int flash_minsort( #ifdef DEBUG debug_log("*Flash Minsort*\n"); #endif +#ifndef ARDUINO clock_t start = clock(); +#endif MinSortState ms; ms.buffer = buffer; @@ -444,7 +447,6 @@ int flash_minsort( if (count == values_per_page) { // Write block *((int32_t *)outputBuffer) = blockIndex; /* Block index */ *((int16_t *)(outputBuffer + BLOCK_COUNT_OFFSET)) = count; /* Block record count */ - count = 0; // Reset count for the next block // Write the block to the output file using the file interface's write method ((file_iterator_state_t *)iteratorState)->fileInterface->seek(lastWritePos, outputFile); @@ -452,8 +454,6 @@ int flash_minsort( debug_log("Writing page flash minsort: blockIndex=%d, count=%d, filePosition=%ld\n", blockIndex, count, count / PAGE_SIZE); #endif - debug_log("Writing page flash minsort: blockIndex=%d, count=%d\n", - blockIndex, count); if (0 == ((file_iterator_state_t *)iteratorState)->fileInterface->writeRel(outputBuffer, es->page_size, 1, outputFile)) { return 9; // Return error code if writing to the output file fails } @@ -468,6 +468,7 @@ int flash_minsort( metric->num_writes++; lastWritePos += es->page_size; blockIndex++; + count = 0; } } @@ -495,10 +496,9 @@ int flash_minsort( ((file_iterator_state_t *)iteratorState)->fileInterface->flush(outputFile); close_MinSort(&ms, es); - +#ifndef ARDUINO clock_t end = clock(); - - *resultFilePtr = 0; +#endif #ifdef DEBUG debug_log("Complete. Comparisons: %d MemCopies: %d\n", metric->num_compar, metric->num_memcpys); diff --git a/src/query-interface/sort/flash_minsort_sublist.c b/src/query-interface/sort/flash_minsort_sublist.c index 26b603f7..210ac5d0 100644 --- a/src/query-interface/sort/flash_minsort_sublist.c +++ b/src/query-interface/sort/flash_minsort_sublist.c @@ -49,7 +49,13 @@ // #define DEBUG 1 // #define DEBUG_OUTPUT 1 // #define DEBUG_READ 1 -// #include "debug_print.h" +#if defined(DEBUG) || defined(DEBUG_OUTPUT) || defined(DEBUG_READ) +#include "debug_print.h" +#else +#ifndef debug_log +#define debug_log(...) ((void)0) +#endif +#endif void readPage_sublist(MinSortStateSublist *ms, int pageNum, external_sort_t *es, metrics_t *metric) { file_iterator_state_t *is = (file_iterator_state_t *)ms->iteratorState; @@ -404,7 +410,6 @@ int flash_minsort_sublist( close_MinSort_sublist(&ms, es); - *resultFilePtr = 0; free(ms.min); free(ms.offset); free(ms.current); diff --git a/src/query-interface/sort/sortWrapper.c b/src/query-interface/sort/sortWrapper.c index ca7c7a94..c10d7932 100644 --- a/src/query-interface/sort/sortWrapper.c +++ b/src/query-interface/sort/sortWrapper.c @@ -1,13 +1,21 @@ #include "sortWrapper.h" #include "query-interface/sort/in_memory_sort.h" +#ifndef ARDUINO #include "unistd.h" +#endif - -// #define PRINT_METRIC -// #define DEBUG -// #define PRINT_ERRORS +#define PRINT_METRIC +#define DEBUG +#define PRINT_ERRORS +#if defined(DEBUG) || defined(PRINT_METRIC) || defined(PRINT_ERRORS) #include "debug_print.h" +#else +#ifndef debug_log +#define debug_log(...) ((void)0) +#endif + +#endif /** * @brief Pure in-memory sort that avoids file I/O completely for very small datasets @@ -159,10 +167,10 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { #ifdef DEBUG debug_log("DEBUG loadRowData: PAGE_SIZE=%d, BLOCK_HEADER_SIZE=%d, recordSize=%d, valuesPerPage=%d\n", - PAGE_SIZE, BLOCK_HEADER_SIZE, data->recordSize, valuesPerPage); + PAGE_SIZE, BLOCK_HEADER_SIZE, data->recordSize, valuesPerPage); #endif - void* buffer = malloc(PAGE_SIZE); + void *buffer = malloc(PAGE_SIZE); if (buffer == NULL) { #ifdef PRINT_ERRORS @@ -173,10 +181,8 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { // Write row data to file while (exec(op->input)) { - // Write page to file when full if (count % valuesPerPage == 0 && count != 0) { - if (writePageWithHeader(buffer, blockIndex, valuesPerPage, PAGE_SIZE, data->fileInterface, unsortedFile)) { free(buffer); buffer = NULL; @@ -198,7 +204,7 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { } // Write data to buffer - memcpy((uint8_t*)buffer + rowOffset, op->input->recordBuffer, data->recordSize); + memcpy((uint8_t *)buffer + rowOffset, op->input->recordBuffer, data->recordSize); #ifdef DEBUG if (count < 10) { debug_log("DEBUG loadRowData record %d: ", count); @@ -272,17 +278,6 @@ void prepareSort(embedDBOperator *op) { if (data->keySize < 0) { data->keySize = -1 * data->keySize; } - -#ifdef ARDUINO - data->fileIterator = startPureMemorySort(data, op); - if (data->fileIterator == NULL) { -#ifdef DEBUG - debug_log("ERROR: Pure memory sort failed\n"); -#endif - return; - } -#else - if (data->fileInterface == NULL || data->fileInterface->setup == NULL) { #ifdef PRINT_ERRORS debug_log("ERROR: File interface or setup function not provided while initializing ORDER BY operator\n"); @@ -291,10 +286,10 @@ void prepareSort(embedDBOperator *op) { } char *tmp1 = data->fileInterface->tempFilePath(); - char *tmp2 = data->fileInterface->tempFilePath(); - - void *unsortedFile = data->fileInterface->setup(tmp1); - void *sortedFile = data->fileInterface->setup(tmp2); + char* tmp2 = data->fileInterface->tempFilePath(); + + void* unsortedFile = data->fileInterface->setup(tmp1); + void* sortedFile = data->fileInterface->setup(tmp2); free(tmp1); free(tmp2); @@ -304,7 +299,6 @@ void prepareSort(embedDBOperator *op) { #endif return; } - const uint8_t unsortedOpen = data->fileInterface->open(unsortedFile, EMBEDDB_FILE_MODE_W_PLUS_B); const uint8_t sortedOpen = data->fileInterface->open(sortedFile, EMBEDDB_FILE_MODE_W_PLUS_B); @@ -317,12 +311,12 @@ void prepareSort(embedDBOperator *op) { // Load row data data->count = loadRowData(data, op, unsortedFile); - + debug_log("finished load row data, starting sort\n"); // Start sorting file_iterator_state_t *iteratorState = startSort(data, unsortedFile, sortedFile); if (iteratorState == NULL) { #ifdef PRINT_ERRORS - debug_log("ERROR: Sort failed"); + debug_log("ERROR: Sort failed"); #endif return; } @@ -334,255 +328,208 @@ void prepareSort(embedDBOperator *op) { data->fileInterface->removeFile(unsortedFile); } data->fileIterator = iteratorState; -#endif - } +} - /** - * @brief The data given in the unsortedFile is sorted and stored in the sortedFile - * - * @param fileInterface The file interface - * @param unsortedFile The file that is loaded with row data - * @param sortedFile An empty file - * @param recordSize The size of the records - * @param count The total number of records stored in unsortedFile - * @return file_iterator_state_t* An iterator that is used to retrieve the sorted records - */ - file_iterator_state_t *startSort(sortData * data, void *unsortedFile, void *sortedFile) { - // Initialize external_sort_t structure - external_sort_t es; - es.key_size = data->keySize; - es.value_size = data->recordSize; - es.record_size = data->recordSize; - es.key_offset = data->keyOffset; - es.headerSize = BLOCK_HEADER_SIZE; - es.page_size = PAGE_SIZE; - es.num_pages = (uint32_t)ceil((float)data->count / ((es.page_size - es.headerSize) / es.record_size)); - -// Reduce buffer size for Arduino -#ifdef ARDUINO - const int buffer_max_pages = 1; // Reduced to minimum for Arduino -#else - const int buffer_max_pages = 4; -#endif +/** + * @brief The data given in the unsortedFile is sorted and stored in the sortedFile + * + * @param fileInterface The file interface + * @param unsortedFile The file that is loaded with row data + * @param sortedFile An empty file + * @param recordSize The size of the records + * @param count The total number of records stored in unsortedFile + * @return file_iterator_state_t* An iterator that is used to retrieve the sorted records + */ +file_iterator_state_t *startSort(sortData *data, void *unsortedFile, void *sortedFile) { + // Initialize external_sort_t structure + external_sort_t es; + es.key_size = data->keySize; + es.value_size = data->recordSize; + es.record_size = data->recordSize; + es.key_offset = data->keyOffset; + es.headerSize = BLOCK_HEADER_SIZE; + es.page_size = PAGE_SIZE; + es.num_pages = (uint32_t)ceil((float)data->count / ((es.page_size - es.headerSize) / es.record_size)); + + const int buffer_max_pages = 4; - char *buffer = malloc(buffer_max_pages * es.page_size + es.record_size); - char *tuple_buffer = buffer + es.page_size * buffer_max_pages; - if (buffer == NULL) { + char *buffer = malloc(buffer_max_pages * es.page_size + es.record_size); + char *tuple_buffer = buffer + es.page_size * buffer_max_pages; + + if (buffer == NULL) { #ifdef PRINT_ERRORS - debug_log("ERROR: SORT: buffer malloc failed m\n"); + debug_log("ERROR: SORT: buffer malloc failed m\n"); #endif - return NULL; - } + return NULL; + } - // Prepare the file iterator data for sorting - file_iterator_state_t *iteratorState = malloc(sizeof(file_iterator_state_t)); - if (iteratorState == NULL) { + // Prepare the file iterator data for sorting + file_iterator_state_t *iteratorState = malloc(sizeof(file_iterator_state_t)); + if (iteratorState == NULL) { #ifdef PRINT_ERRORS - debug_log("Error: SORT: iterator malloc failed\n"); + debug_log("Error: SORT: iterator malloc failed\n"); #endif - free(buffer); - buffer = NULL; - return NULL; - } + free(buffer); + buffer = NULL; + return NULL; + } - iteratorState->file = unsortedFile; - iteratorState->recordsRead = 0; - iteratorState->totalRecords = data->count; - iteratorState->recordSize = es.record_size; - iteratorState->fileInterface = data->fileInterface; - iteratorState->currentRecord = 0; - iteratorState->recordsLeftInBlock = 0; - iteratorState->resultFile = 0; + iteratorState->file = unsortedFile; + iteratorState->recordsRead = 0; + iteratorState->totalRecords = data->count; + iteratorState->recordSize = es.record_size; + iteratorState->fileInterface = data->fileInterface; + iteratorState->currentRecord = 0; + iteratorState->recordsLeftInBlock = 0; + iteratorState->resultFile = 0; - data->fileIterator = iteratorState; + data->fileIterator = iteratorState; - // Metrics - metrics_t metrics = initMetric(); + // Metrics + metrics_t metrics = initMetric(); - long result_file_ptr = 0; + long result_file_ptr = 0; + + int err; - int err; -// Use simpler sort for Arduino with small datasets -#ifdef ARDUINO -#ifdef DEBUG - debug_log("DEBUG: Starting Arduino sort with %d records\n", data->count); -#endif - if (data->count <= 100) { // Use flash_minsort for all datasets on Arduino (more memory efficient) -#ifdef DEBUG - debug_log("DEBUG: Using flash_minsort for small dataset\n"); -#endif - err = flash_minsort(iteratorState, tuple_buffer, sortedFile, buffer, buffer_max_pages * es.page_size, &es, &result_file_ptr, &metrics, data->compareFn); - } else { -#ifdef DEBUG - debug_log("DEBUG: Using flash_minsort for large dataset\n"); -#endif - // Use flash_minsort for larger datasets (more memory efficient than adaptive_sort) - err = flash_minsort(iteratorState, tuple_buffer, sortedFile, buffer, buffer_max_pages * es.page_size, &es, &result_file_ptr, &metrics, data->compareFn); - } -#ifdef DEBUG - debug_log("DEBUG: Arduino sort completed with error code: %d\n", err); -#endif -#else // Use adaptive sort on desktop int8_t runGenOnly = false; // Run full sort operation int8_t writeReadRatio = 19; // 1.97 * 10 => 19 - // err = flash_minsort(iteratorState, tuple_buffer, sortedFile, buffer, buffer_max_pages * es.page_size, &es, &result_file_ptr, &metrics, data->compareFn); err = adaptive_sort(readNextRecord, iteratorState, tuple_buffer, sortedFile, buffer, buffer_max_pages, &es, &result_file_ptr, &metrics, data->compareFn, runGenOnly, writeReadRatio, data); -#endif #ifdef PRINT_METRIC - debug_log("\tComplete. Comparisons: %d Writes: %d Reads: %d Memcpys: %d\n", metrics.num_compar, metrics.num_writes, metrics.num_reads, metrics.num_memcpys); + debug_log("\tComplete. Comparisons: %d Writes: %d Reads: %d Memcpys: %d\n", metrics.num_compar, metrics.num_writes, metrics.num_reads, metrics.num_memcpys); #endif - iteratorState->resultFile = result_file_ptr; + iteratorState->resultFile = result_file_ptr; #ifdef PRINT_ERRORS - if (8 == err) { - debug_log("Out of memory!\n"); - } else if (10 == err) { - debug_log("File Read Error!\n"); - } else if (9 == err) { - debug_log("File Write Error!\n"); - } + if (8 == err) { + debug_log("Out of memory!\n"); + } else if (10 == err) { + debug_log("File Read Error!\n"); + } else if (9 == err) { + debug_log("File Write Error!\n"); + } #endif - // Reset file iterator - iteratorState->recordsRead = 0; - iteratorState->currentRecord = 0; + // Reset file iterator + iteratorState->recordsRead = 0; + iteratorState->currentRecord = 0; - // Clean up - free(buffer); - buffer = NULL; - return iteratorState; - } + // Clean up + free(buffer); + buffer = NULL; + return iteratorState; +} - /** - * @brief Reads the next record from the sorted file - * - * @param data The ORDER BY operator data - * @param buffer A buffer that is the size of one record - * @return uint8_t 0: if read was successful. other wise none zero - */ - uint8_t readNextRecord(void *data, void *buffer) { - file_iterator_state_t *iteratorState = ((sortData *)data)->fileIterator; - - if (iteratorState->recordsRead >= iteratorState->totalRecords) { - return 1; // No more records left to read - } +/** + * @brief Reads the next record from the sorted file + * + * @param data The ORDER BY operator data + * @param buffer A buffer that is the size of one record + * @return uint8_t 0: if read was successful. other wise none zero + */ +uint8_t readNextRecord(void *data, void *buffer) { + file_iterator_state_t *iteratorState = ((sortData *)data)->fileIterator; -#ifdef ARDUINO - // For pure memory sort on Arduino, read directly from memory buffer - if (iteratorState->file != NULL && iteratorState->resultFile == 0) { - memcpy(buffer, (char *)iteratorState->file + iteratorState->recordsRead * iteratorState->recordSize, - iteratorState->recordSize); - iteratorState->recordsRead++; - iteratorState->currentRecord++; - return 0; - } -#endif + if (iteratorState->recordsRead >= iteratorState->totalRecords) { + return 1; // No more records left to read + } - uint32_t recordPerPage = (PAGE_SIZE - BLOCK_HEADER_SIZE) / iteratorState->recordSize; + uint32_t recordPerPage = (PAGE_SIZE - BLOCK_HEADER_SIZE) / iteratorState->recordSize; - // Read next page if current buffer is empty - if (iteratorState->currentRecord % recordPerPage == 0 || iteratorState->recordsRead == 0) { - uint32_t seekOffset = iteratorState->resultFile + (iteratorState->currentRecord / recordPerPage) * PAGE_SIZE; + // Read next page if current buffer is empty + if (iteratorState->currentRecord % recordPerPage == 0 || iteratorState->recordsRead == 0) { + uint32_t seekOffset = iteratorState->resultFile + (iteratorState->currentRecord / recordPerPage) * PAGE_SIZE; - iteratorState->fileInterface->seek(seekOffset, iteratorState->file); - iteratorState->fileInterface->readRel(((sortData *)data)->readBuffer, PAGE_SIZE, 1, iteratorState->file); + iteratorState->fileInterface->seek(seekOffset, iteratorState->file); + iteratorState->fileInterface->readRel(((sortData *)data)->readBuffer, PAGE_SIZE, 1, iteratorState->file); #ifdef DEBUG - if (iteratorState->recordsRead == 0 || iteratorState->recordsRead % 1000 == 0) { - debug_log("DEBUG readNextRecord: pageNum=%d, seekOffset=%d, recordsRead=%d\n", - iteratorState->currentRecord / recordPerPage, seekOffset, iteratorState->recordsRead); - } + if (iteratorState->recordsRead == 0 || iteratorState->recordsRead % 1000 == 0) { + debug_log("DEBUG readNextRecord: pageNum=%d, seekOffset=%d, recordsRead=%d\n", + iteratorState->currentRecord / recordPerPage, seekOffset, iteratorState->recordsRead); + } #endif - if (((sortData *)data)->fileInterface->error(iteratorState->file)) { + if (((sortData *)data)->fileInterface->error(iteratorState->file)) { #ifdef PRINT_ERRORS - debug_log("ERROR: SORT: next record read failed"); + debug_log("ERROR: SORT: next record read failed"); #endif - return 2; - } + return 2; } + } - // Copy result to output buffer - uint16_t valuesInPage; - memcpy(&valuesInPage, ((sortData *)data)->readBuffer + sizeof(uint32_t), - sizeof(uint16_t)); - uint32_t recordIndexInPage = iteratorState->currentRecord % recordPerPage; + // Copy result to output buffer + uint16_t valuesInPage; + memcpy(&valuesInPage, ((sortData *)data)->readBuffer + sizeof(uint32_t), + sizeof(uint16_t)); + uint32_t recordIndexInPage = iteratorState->currentRecord % recordPerPage; #ifdef DEBUG - + #endif - if (recordIndexInPage >= valuesInPage) { - return 1; - } - uint32_t copyOffset = BLOCK_HEADER_SIZE + iteratorState->recordSize * recordIndexInPage; - memcpy(buffer, ((sortData *)data)->readBuffer + copyOffset, iteratorState->recordSize); + if (recordIndexInPage >= valuesInPage) { + return 1; + } + uint32_t copyOffset = BLOCK_HEADER_SIZE + iteratorState->recordSize * recordIndexInPage; + memcpy(buffer, ((sortData *)data)->readBuffer + copyOffset, iteratorState->recordSize); #ifdef DEBUG - if (iteratorState->recordsRead < 10 || iteratorState->recordsRead % 1000 == 0) { - int32_t *keyPtr = (int32_t *)(buffer + ((sortData *)data)->keyOffset); - debug_log("DEBUG readNextRecord: recordsRead=%d, currentRecord=%d, pageIdx=%d, recordInPage=%d, copyOffset=%d, key=%d\n", - iteratorState->recordsRead, iteratorState->currentRecord, iteratorState->currentRecord / recordPerPage, - recordIndexInPage, copyOffset, *keyPtr); - uint32_t blockIdx; - memcpy(&blockIdx, ((sortData *)data)->readBuffer, sizeof(uint32_t)); - debug_log("READ PAGE hdr: blockIdx=%u values=%u\n", - blockIdx, valuesInPage); - debug_log("PAGE HEADER: page=%d values=%d\n", - iteratorState->currentRecord / recordPerPage, - valuesInPage); - int32_t *key0 = (int32_t *)(((sortData *)data)->readBuffer + BLOCK_HEADER_SIZE + ((sortData *)data)->keyOffset); - int32_t *keyLast = (int32_t *)(((sortData *)data)->readBuffer + BLOCK_HEADER_SIZE + (recordPerPage - 1) * iteratorState->recordSize + ((sortData *)data)->keyOffset); - debug_log(" First key on page: %d, Last key on page: %d\n", *key0, *keyLast); - } + if (iteratorState->recordsRead < 10 || iteratorState->recordsRead % 1000 == 0) { + int32_t *keyPtr = (int32_t *)(buffer + ((sortData *)data)->keyOffset); + debug_log("DEBUG readNextRecord: recordsRead=%d, currentRecord=%d, pageIdx=%d, recordInPage=%d, copyOffset=%d, key=%d\n", + iteratorState->recordsRead, iteratorState->currentRecord, iteratorState->currentRecord / recordPerPage, + recordIndexInPage, copyOffset, *keyPtr); + uint32_t blockIdx; + memcpy(&blockIdx, ((sortData *)data)->readBuffer, sizeof(uint32_t)); + debug_log("READ PAGE hdr: blockIdx=%u values=%u\n", + blockIdx, valuesInPage); + debug_log("PAGE HEADER: page=%d values=%d\n", + iteratorState->currentRecord / recordPerPage, + valuesInPage); + int32_t *key0 = (int32_t *)(((sortData *)data)->readBuffer + BLOCK_HEADER_SIZE + ((sortData *)data)->keyOffset); + int32_t *keyLast = (int32_t *)(((sortData *)data)->readBuffer + BLOCK_HEADER_SIZE + (recordPerPage - 1) * iteratorState->recordSize + ((sortData *)data)->keyOffset); + debug_log(" First key on page: %d, Last key on page: %d\n", *key0, *keyLast); + } #endif - iteratorState->recordsRead++; - iteratorState->currentRecord++; - - // #ifdef DEBUG - // printf("DEBUG: ROWDATA from file:\n"); - // for (int i = 0; i < iteratorState->recordSize - SORT_KEY_SIZE; i++) { - // printf("%2x ", ((uint8_t *)buffer)[i]); - // } - // printf("\n"); - // #endif - return 0; - } + iteratorState->recordsRead++; + iteratorState->currentRecord++; - void closeSort(file_iterator_state_t * iteratorState) { -#ifdef ARDUINO - // For pure memory sort, we need to free the memory buffer - if (iteratorState->file != NULL && iteratorState->resultFile == 0) { - free(iteratorState->file); - iteratorState->file = NULL; - return; - } -#endif + // #ifdef DEBUG + // printf("DEBUG: ROWDATA from file:\n"); + // for (int i = 0; i < iteratorState->recordSize - SORT_KEY_SIZE; i++) { + // printf("%2x ", ((uint8_t *)buffer)[i]); + // } + // printf("\n"); + // #endif + return 0; +} - if (iteratorState->file != NULL) { - iteratorState->fileInterface->close(iteratorState->file); - if (iteratorState->fileInterface->removeFile) { - iteratorState->fileInterface->removeFile(iteratorState->file); - } - iteratorState->file = NULL; +void closeSort(file_iterator_state_t *iteratorState) { + if (iteratorState->file != NULL) { + iteratorState->fileInterface->close(iteratorState->file); + if (iteratorState->fileInterface->removeFile) { + iteratorState->fileInterface->removeFile(iteratorState->file); } + iteratorState->file = NULL; } +} - /** - * @brief Initializes default metric values - * - * @return metrics_t - */ - metrics_t initMetric() { - metrics_t metrics; - metrics.num_reads = 0; - metrics.num_compar = 0; - metrics.num_memcpys = 0; - metrics.num_runs = 0; - metrics.num_writes = 0; - return metrics; - } +/** + * @brief Initializes default metric values + * + * @return metrics_t + */ +metrics_t initMetric() { + metrics_t metrics; + metrics.num_reads = 0; + metrics.num_compar = 0; + metrics.num_memcpys = 0; + metrics.num_runs = 0; + metrics.num_writes = 0; + return metrics; +} diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index 4e8d414b..1f5de7e7 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -22,6 +22,7 @@ #endif #ifdef ARDUINO +#define FILE_TYPE SD_FILE #include "SDFileInterface.h" #define getFileInterface getSDInterface #define setupFile setupSDFile @@ -43,8 +44,6 @@ #include "unity.h" -#define DEBUG - embedDBState* state; embedDBSchema* baseSchema; @@ -68,9 +67,6 @@ void setUp() { state->dataFile = state->fileInterface->setup(dataPath); state->indexFile = state->fileInterface->setup(indexPath); -#ifdef ARDUINO - state->fileInterface->tempFilePath -#endif state->bufferSizeInBlocks = 4; state->buffer = malloc(state->bufferSizeInBlocks * state->pageSize); @@ -153,47 +149,74 @@ void debugBinData(embedDBOperator* op, uint32_t numValues, uint8_t col) { op->init(op); int32_t* buffer = (int32_t*)op->recordBuffer; printf("\n"); - for (int i = 0; i <= numValues; ++i) { + for (uint32_t i = 0; i <= numValues; ++i) { exec(op); printf("%i ", (int32_t)buffer[col]); } printf("\n"); - fflush(stdout); + //fflush(stdout); } void runTestSequentialValues() { // Insert test data #ifdef ARDUINO - insertNValues(state, 1, 0); + Serial.println("About to insert values\n"); + insertNValues(state, 100, 1); #else - insertNValues(state, 190, 1); + insertNValues(state, 300, 1); #endif - + embedDBIterator it; it.minKey = NULL; it.maxKey = NULL; it.minData = NULL; it.maxData = NULL; embedDBInitIterator(state, &it); - +#ifdef ARDUINO + Serial.println("\n=================================="); + Serial.println("Creating table scan"); + Serial.println("=================================="); + Serial.flush(); +#endif embedDBOperator* scanOpOrderBy = createTableScanOperator(state, &it, baseSchema); - uint8_t projColsOB[] = {0, 1}; + uint8_t projColsOB[] = { 0, 1 }; +#ifdef ARDUINO + Serial.println("\n=================================="); + Serial.println("Creating projection"); + Serial.println("=================================="); + Serial.flush(); +#endif embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); +#ifdef ARDUINO + Serial.println("\n=================================="); + Serial.println("Creating order by"); + Serial.println("=================================="); + Serial.flush(); +#endif embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); - // debugBinData(orderByOp, 190, 1); - + // debugBinData(orderByOp, 300, 1); +#ifdef ARDUINO + Serial.println("\n=================================="); + Serial.println("Init order by"); + Serial.println("=================================="); + Serial.flush(); +#endif orderByOp->init(orderByOp); - +#ifdef ARDUINO + Serial.println("\n=================================="); + Serial.println("exec order by"); + Serial.println("=================================="); + Serial.flush(); +#endif int32_t* recordBuffer = (int32_t*)orderByOp->recordBuffer; - uint32_t previous = 0; + exec(orderByOp); + int32_t previous = ((int32_t)recordBuffer[1]); int recordCount = 0; while (exec(orderByOp)) { TEST_ASSERT_GREATER_OR_EQUAL_INT32_MESSAGE(previous, ((int32_t)recordBuffer[1]), "Sort value is not greater than or equal to previous value."); previous = ((int32_t)recordBuffer[1]); recordCount++; - printf("%d ", previous); - fflush(stdout); } orderByOp->close(orderByOp); @@ -213,23 +236,22 @@ void runTestUsingSEA100k() { embedDBInitIterator(state, &it); embedDBOperator* scanOpOrderBy = createTableScanOperator(state, &it, baseSchema); - //debugBinData(scanOpOrderBy, 200, 0); - uint8_t projColsOB[] = {0, 1}; + // debugBinData(scanOpOrderBy, 200, 0); + uint8_t projColsOB[] = { 0, 1 }; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); - //debugBinData(projColsOrderBy, 200, 0); + // debugBinData(projColsOrderBy, 300, 1); embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); - debugBinData(orderByOp, 100000, 1); + // debugBinData(orderByOp, 100000, 1); orderByOp->init(orderByOp); int32_t* recordBuffer = (int32_t*)orderByOp->recordBuffer; - uint32_t previous = 0; + exec(orderByOp); + int32_t previous = ((int32_t)recordBuffer[1]) / 10.0; // Result of the sort while (exec(orderByOp)) { TEST_ASSERT_GREATER_OR_EQUAL_INT32_MESSAGE(previous, ((int32_t)recordBuffer[1]) / 10.0, "Sort value is not greater than or equal to previous value previous values."); previous = ((int32_t)recordBuffer[1]) / 10.0; - printf("%d ", previous); - fflush(stdout); } orderByOp->close(orderByOp); @@ -238,8 +260,21 @@ void runTestUsingSEA100k() { int runUnityTests() { UNITY_BEGIN(); +#ifdef ARDUINO + Serial.println("\n\n=================================="); + Serial.println("Starting Test Suite"); + Serial.println("=================================="); + Serial.flush(); + delay(1000); // Give time to start serial monitor +#endif RUN_TEST(runTestSequentialValues); - RUN_TEST(runTestUsingSEA100k); +#ifdef ARDUINO + Serial.println("\n=================================="); + Serial.println("Test 1 Complete, starting Test 2"); + Serial.println("=================================="); + Serial.flush(); +#endif + //RUN_TEST(runTestUsingSEA100k); return UNITY_END(); } From bc245b9da7a9bd1af36cc87036b9ffec7f0a5ac1 Mon Sep 17 00:00:00 2001 From: xelArga Date: Tue, 3 Feb 2026 12:09:23 -0800 Subject: [PATCH 16/17] started adding sd interface methods and debugging the io operations --- lib/SD-File-Interface/SDFileInterface.c | 29 ++++++++++++++++---- lib/SD-Wrapper/sdcard_c_iface.cpp | 5 ++++ lib/SD-Wrapper/sdcard_c_iface.h | 5 ++-- src/query-interface/sort/adaptive_sort.c | 9 +++--- src/query-interface/sort/sortWrapper.c | 21 ++++++-------- test/test_sort/test_sort_query_interface.cpp | 2 +- 6 files changed, 45 insertions(+), 26 deletions(-) diff --git a/lib/SD-File-Interface/SDFileInterface.c b/lib/SD-File-Interface/SDFileInterface.c index d41d7503..3b9e442e 100644 --- a/lib/SD-File-Interface/SDFileInterface.c +++ b/lib/SD-File-Interface/SDFileInterface.c @@ -57,7 +57,7 @@ void tearDownSDFile(void *file) { free(file); } -int8_t SD_FILE_REMOVE(void *file) { +int8_t FILE_REMOVE(void *file) { if (file == NULL) return 1; SD_FILE_INFO *fileInfo = (SD_FILE_INFO *)file; @@ -81,7 +81,7 @@ int8_t FILE_READ(void *buffer, uint32_t pageNum, uint32_t pageSize, void *file) int8_t FILE_WRITE(void *buffer, uint32_t pageNum, uint32_t pageSize, void *file) { SD_FILE_INFO *fileInfo = (SD_FILE_INFO *)file; - if (fileInfo->sdFile == NULL) return -1; + if (fileInfo->sdFile == NULL) return 0; size_t fileSize = sd_length(fileInfo->sdFile); size_t requiredSize = (size_t)pageNum * pageSize; @@ -90,14 +90,14 @@ int8_t FILE_WRITE(void *buffer, uint32_t pageNum, uint32_t pageSize, void *file) sd_fseek(fileInfo->sdFile, 0, SEEK_END); uint8_t zero = 0; while (sd_length(fileInfo->sdFile) < requiredSize) { - if (sd_fwrite(&zero, 1, 1, fileInfo->sdFile) != 1) return -1; + if (sd_fwrite(&zero, 1, 1, fileInfo->sdFile) != 1) return 0; } } - if (sd_fseek(fileInfo->sdFile, requiredSize, SEEK_SET) != 0) return -1; + if (sd_fseek(fileInfo->sdFile, requiredSize, SEEK_SET) != 0) return 0; if (sd_fwrite(buffer, pageSize, 1, fileInfo->sdFile) == 1) { - return 1; + return 1; } return 0; } @@ -147,6 +147,21 @@ char *sdFat_tempFilePath(void) { return out; } +int8_t FILE_SEEK(uint32_t n, void *file) { + SD_FILE_INFO *fileInfo = (SD_FILE_INFO *)file; + return sd_fseek(fileInfo->sdFile, n, SEEK_SET); +} + +int8_t FILE_ERROR(void *file) { + if (file == NULL) return 1; + SD_FILE_INFO *fileInfo = (SD_FILE_INFO *)file; + + if (sd_ferror(fileInfo->sdFile)) { + return 1; + } + return 0; +} + embedDBFileInterface *getSDInterface() { embedDBFileInterface *fileInterface = malloc(sizeof(embedDBFileInterface)); fileInterface->close = FILE_CLOSE; @@ -154,10 +169,12 @@ embedDBFileInterface *getSDInterface() { fileInterface->write = FILE_WRITE; fileInterface->erase = FILE_ERASE; fileInterface->open = FILE_OPEN; + fileInterface->seek = FILE_SEEK; fileInterface->flush = FILE_FLUSH; + fileInterface->error = FILE_ERROR; fileInterface->setup = setupSDFile; fileInterface->teardown = tearDownSDFile; - fileInterface->removeFile = SD_FILE_REMOVE; + fileInterface->removeFile = FILE_REMOVE; fileInterface->tempFilePath = sdFat_tempFilePath; return fileInterface; } diff --git a/lib/SD-Wrapper/sdcard_c_iface.cpp b/lib/SD-Wrapper/sdcard_c_iface.cpp index 73cb6f82..f4a5e36c 100644 --- a/lib/SD-Wrapper/sdcard_c_iface.cpp +++ b/lib/SD-Wrapper/sdcard_c_iface.cpp @@ -141,3 +141,8 @@ int sd_remove(const char *filename) { } return -1; } + +int sd_ferror(SD_FILE *stream) { + if (stream == NULL) return 1; + return stream->f.getWriteError() ? 1 : 0; +} diff --git a/lib/SD-Wrapper/sdcard_c_iface.h b/lib/SD-Wrapper/sdcard_c_iface.h index 30c0be58..5d6fc8c3 100644 --- a/lib/SD-Wrapper/sdcard_c_iface.h +++ b/lib/SD-Wrapper/sdcard_c_iface.h @@ -54,7 +54,6 @@ extern "C" { #define fflush(x) sd_fflush(x) #define fseek(x, y, z) sd_fseek(x, y, z) #define fread(w, x, y, z) sd_fread(w, x, y, z) -// #define remove(x) sd_remove(x) /** @brief Wrapper around Arduino File type (a C++ object). @@ -174,7 +173,9 @@ size_t sd_length(SD_FILE* stream); @param filename The name of the file to delete. @returns 0 on success, -1 on failure. */ -int sd_remove(const char *filename); +int sd_remove(const char* filename); + +int sd_ferror(SD_FILE *stream); void init_sdcard(void* sd); diff --git a/src/query-interface/sort/adaptive_sort.c b/src/query-interface/sort/adaptive_sort.c index 5fe62c27..07f9e036 100644 --- a/src/query-interface/sort/adaptive_sort.c +++ b/src/query-interface/sort/adaptive_sort.c @@ -50,9 +50,9 @@ #include "in_memory_sort.h" #include "no_output_heap.h" -// #define DEBUG 1 -// #define DEBUG_OUTPUT 1 -// #define DEBUG_READ 1 +// #define DEBUG 1 +// #define DEBUG_OUTPUT 1 +// #define DEBUG_READ 1 // #define DEBUG_HEAP 0 // #define ADAPTIVE_SORT_PRINT // #define ADAPTIVE_SORT_PRINT_FINISH @@ -289,7 +289,6 @@ int adaptive_sort( ((file_iterator_state_t*)iteratorState)->fileInterface->seek(0, outputFile); lastWritePos = 0; - addr = buffer + es->page_size; // Start after I/O block for (i = 0; i < maxRecordsInBuffer; i++) { status = !iterator(sortData, addr); @@ -808,7 +807,7 @@ int adaptive_sort( lastWritePos = 0; } #ifdef ADAPTIVE_SORT_PRINT - debug_log("Pass number: %u Comparisons: %lu MemCopies: %lu TransferIn: %lu TransferOut: %lu TransferOther: %lu Other: %lu\n", passNumber, metric->num_compar, metric->num_memcpys, numShiftIntoOutput, numShiftOutOutput, numShiftOtherBlock, other); + debug_log("Pass number: %u Comparisons: %lu MemCopies: %lu TransferIn: %lu TransferOut: %lu TransferOther: %lu\n", passNumber, metric->num_compar, metric->num_memcpys, numShiftIntoOutput, numShiftOutOutput, numShiftOtherBlock); #endif diff --git a/src/query-interface/sort/sortWrapper.c b/src/query-interface/sort/sortWrapper.c index c10d7932..6a6f995e 100644 --- a/src/query-interface/sort/sortWrapper.c +++ b/src/query-interface/sort/sortWrapper.c @@ -5,9 +5,9 @@ #include "unistd.h" #endif -#define PRINT_METRIC -#define DEBUG -#define PRINT_ERRORS +// #define PRINT_METRIC +// #define DEBUG +// #define PRINT_ERRORS #if defined(DEBUG) || defined(PRINT_METRIC) || defined(PRINT_ERRORS) #include "debug_print.h" #else @@ -137,7 +137,6 @@ int8_t writePageWithHeader(void *buffer, const uint32_t blockIndex, const uint16 memcpy(buffer + sizeof(uint32_t), &numberOfValues, sizeof(uint16_t)); fileInterface->write(buffer, blockIndex, pageSize, file); - if (fileInterface->error(file)) { #ifdef PRINT_ERRORS debug_log("ERROR: SORT: Failed to write unsorted data"); @@ -206,7 +205,7 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { // Write data to buffer memcpy((uint8_t *)buffer + rowOffset, op->input->recordBuffer, data->recordSize); #ifdef DEBUG - if (count < 10) { + if (count < 100) { debug_log("DEBUG loadRowData record %d: ", count); for (int i = 0; i < data->recordSize; i++) { debug_log("%02x ", ((uint8_t *)op->input->recordBuffer)[i]); @@ -220,7 +219,7 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { } debug_log("\n"); } - if (count < 10 || count % 1000 == 0) { + if (count < 100 || count % 1000 == 0) { int32_t *keyPtr = (int32_t *)(op->input->recordBuffer + data->keyOffset); debug_log("DEBUG loadRowData: count=%d, rowOffset=%d, key=%d\n", count, rowOffset, *keyPtr); } @@ -240,7 +239,6 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { buffer = NULL; return 0; } - data->fileInterface->flush(unsortedFile); #ifdef DEBUG @@ -286,10 +284,10 @@ void prepareSort(embedDBOperator *op) { } char *tmp1 = data->fileInterface->tempFilePath(); - char* tmp2 = data->fileInterface->tempFilePath(); - - void* unsortedFile = data->fileInterface->setup(tmp1); - void* sortedFile = data->fileInterface->setup(tmp2); + char *tmp2 = data->fileInterface->tempFilePath(); + + void *unsortedFile = data->fileInterface->setup(tmp1); + void *sortedFile = data->fileInterface->setup(tmp2); free(tmp1); free(tmp2); @@ -353,7 +351,6 @@ file_iterator_state_t *startSort(sortData *data, void *unsortedFile, void *sorte const int buffer_max_pages = 4; - char *buffer = malloc(buffer_max_pages * es.page_size + es.record_size); char *tuple_buffer = buffer + es.page_size * buffer_max_pages; diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index 1f5de7e7..284c3e52 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -161,7 +161,7 @@ void runTestSequentialValues() { // Insert test data #ifdef ARDUINO Serial.println("About to insert values\n"); - insertNValues(state, 100, 1); + insertNValues(state, 60, 1); #else insertNValues(state, 300, 1); #endif From 47ba04588b236837de8e47d2069a4db2c7c5dced Mon Sep 17 00:00:00 2001 From: Alex Argatoff Date: Mon, 9 Feb 2026 20:42:19 -0800 Subject: [PATCH 17/17] added some checks for the flash sublist to work, and added more missing sd file interface methods --- lib/SD-File-Interface/SDFileInterface.c | 21 +++++++ lib/SD-Wrapper/sdcard_c_iface.cpp | 9 ++- lib/SD-Wrapper/sdcard_c_iface.h | 13 +++- src/query-interface/sort/flash_minsort.c | 4 +- .../sort/flash_minsort_sublist.c | 38 ++++++++---- src/query-interface/sort/sortWrapper.c | 6 +- test/test_sort/test_sort_query_interface.cpp | 60 +++---------------- 7 files changed, 77 insertions(+), 74 deletions(-) diff --git a/lib/SD-File-Interface/SDFileInterface.c b/lib/SD-File-Interface/SDFileInterface.c index 3b9e442e..578965cb 100644 --- a/lib/SD-File-Interface/SDFileInterface.c +++ b/lib/SD-File-Interface/SDFileInterface.c @@ -113,6 +113,16 @@ int8_t FILE_CLOSE(void *file) { return 1; } +int8_t FILE_READ_REL(void *buffer, uint32_t size, uint32_t n, void *file) { + SD_FILE_INFO *fileInfo = (SD_FILE_INFO *)file; + return sd_fread(buffer, size, n, fileInfo->sdFile); +} + +int8_t FILE_WRITE_REL(void *buffer, uint32_t size, uint32_t n, void *file) { + SD_FILE_INFO *fileInfo = (SD_FILE_INFO *)file; + return sd_fwrite(buffer, size, n, fileInfo->sdFile); +} + int8_t FILE_FLUSH(void *file) { SD_FILE_INFO *fileInfo = (SD_FILE_INFO *)file; return sd_fflush(fileInfo->sdFile) == 0; @@ -136,6 +146,14 @@ int8_t FILE_OPEN(void *file, uint8_t mode) { } } +int32_t FILE_TELL(void *file) { + SD_FILE_INFO *fileInfo = (SD_FILE_INFO *)file; + if (fileInfo == NULL || fileInfo->sdFile == NULL) { + return -1; + } + return (int32_t)sd_ftell(fileInfo->sdFile); +} + char *sdFat_tempFilePath(void) { char tempPathBuffer[32]; snprintf(tempPathBuffer, sizeof(tempPathBuffer), "TMP%lu.DAT", random()); @@ -172,6 +190,9 @@ embedDBFileInterface *getSDInterface() { fileInterface->seek = FILE_SEEK; fileInterface->flush = FILE_FLUSH; fileInterface->error = FILE_ERROR; + fileInterface->readRel = FILE_READ_REL; + fileInterface->writeRel = FILE_WRITE_REL; + fileInterface->tell = FILE_TELL; fileInterface->setup = setupSDFile; fileInterface->teardown = tearDownSDFile; fileInterface->removeFile = FILE_REMOVE; diff --git a/lib/SD-Wrapper/sdcard_c_iface.cpp b/lib/SD-Wrapper/sdcard_c_iface.cpp index f4a5e36c..7217d20b 100644 --- a/lib/SD-Wrapper/sdcard_c_iface.cpp +++ b/lib/SD-Wrapper/sdcard_c_iface.cpp @@ -104,7 +104,7 @@ SD_FILE *sd_fopen(const char *filename, const char *mode) { size_t sd_fread(void *ptr, size_t size, size_t nmemb, SD_FILE *stream) { /* read is the size of bytes * num of size-bytes */ - int16_t num_bytes = stream->f.read((char *)ptr, size * nmemb); + int32_t num_bytes = stream->f.read((char *)ptr, size * nmemb); if (num_bytes < 0) return 0; @@ -142,6 +142,13 @@ int sd_remove(const char *filename) { return -1; } +long sd_ftell(SD_FILE *stream) { + if (stream == NULL) { + return -1; + } + return (long)stream->f.position(); +} + int sd_ferror(SD_FILE *stream) { if (stream == NULL) return 1; return stream->f.getWriteError() ? 1 : 0; diff --git a/lib/SD-Wrapper/sdcard_c_iface.h b/lib/SD-Wrapper/sdcard_c_iface.h index 5d6fc8c3..80059533 100644 --- a/lib/SD-Wrapper/sdcard_c_iface.h +++ b/lib/SD-Wrapper/sdcard_c_iface.h @@ -166,18 +166,25 @@ sd_fwrite( * @param stream A pointer to a C file struct type associated with an SD file object. * @return The size of the file in bytes */ -size_t sd_length(SD_FILE* stream); +size_t sd_length(SD_FILE *stream); /** @brief Remove (delete) a file from the SD card. @param filename The name of the file to delete. @returns 0 on success, -1 on failure. */ -int sd_remove(const char* filename); +int sd_remove(const char *filename); + +/** +@brief Find location of current file position. +@param stream A pointer to a C file struct type associated with an SD file object. +@returns 0 on success, -1 on failure. +*/ +long sd_ftell(SD_FILE *stream); int sd_ferror(SD_FILE *stream); -void init_sdcard(void* sd); +void init_sdcard(void *sd); #if defined(__cplusplus) } diff --git a/src/query-interface/sort/flash_minsort.c b/src/query-interface/sort/flash_minsort.c index d4d1980f..a9835618 100644 --- a/src/query-interface/sort/flash_minsort.c +++ b/src/query-interface/sort/flash_minsort.c @@ -71,7 +71,7 @@ This is no output sort with block headers and iterator input. Heap used when mov * @param es Sorting configuration, including page and record sizes. * @param metric Metrics tracking structure for performance analysis. */ -void readPageMinSort(MinSortState *ms, int pageNum, external_sort_t *es, metrics_t *metric) { +int8_t readPageMinSort(MinSortState *ms, int pageNum, external_sort_t *es, metrics_t *metric) { file_iterator_state_t *is = (file_iterator_state_t *)ms->iteratorState; void *fp = is->file; #ifdef DEBUG @@ -82,6 +82,7 @@ void readPageMinSort(MinSortState *ms, int pageNum, external_sort_t *es, metrics #ifdef DEBUG debug_log("MINSORT: Failed to read block.\n"); #endif + return 0; } metric->num_reads++; @@ -95,6 +96,7 @@ void readPageMinSort(MinSortState *ms, int pageNum, external_sort_t *es, metrics debug_log("%d: Record: %d\n", k, buf->key); } #endif + return 1; } /** diff --git a/src/query-interface/sort/flash_minsort_sublist.c b/src/query-interface/sort/flash_minsort_sublist.c index 210ac5d0..daa6d778 100644 --- a/src/query-interface/sort/flash_minsort_sublist.c +++ b/src/query-interface/sort/flash_minsort_sublist.c @@ -57,7 +57,7 @@ #endif #endif -void readPage_sublist(MinSortStateSublist *ms, int pageNum, external_sort_t *es, metrics_t *metric) { +int8_t readPage_sublist(MinSortStateSublist *ms, int pageNum, external_sort_t *es, metrics_t *metric) { file_iterator_state_t *is = (file_iterator_state_t *)ms->iteratorState; void *fp = is->file; @@ -66,6 +66,7 @@ void readPage_sublist(MinSortStateSublist *ms, int pageNum, external_sort_t *es, #ifdef DEBUG debug_log("MINSORT SUBLIST: Failed to read block.\n"); #endif + return 0; } metric->num_reads++; @@ -79,6 +80,7 @@ void readPage_sublist(MinSortStateSublist *ms, int pageNum, external_sort_t *es, debug_log("%d: Record: %d\n", k, buf->key); } #endif + return 1; } int32_t getBlockId(MinSortStateSublist *ms) { @@ -160,7 +162,7 @@ void init_MinSort_sublist(MinSortStateSublist *ms, external_sort_t *es, metrics_ memcpy(ms->min + es->record_size * regionIdx, getTuple_sublist(ms, 0, es), es->value_size); metric->num_memcpys++; ms->min_set[regionIdx] = true; - ms->offset[regionIdx] = lastBlock * es->page_size + es->headerSize + ms->fileOffset; + ms->offset[regionIdx] = lastBlock * es->page_size + es->headerSize; #if DEBUG debug_log("New min. Index: %d", regionIdx); debug_log(" Min: %u", ms->min[regionIdx]); @@ -226,10 +228,17 @@ char *next_MinSort_sublist(MinSortStateSublist *ms, external_sort_t *es, void *t curBlk = startIndex / es->page_size; // Smallest value is at current index - if (curBlk != ms->lastBlockIdx) { // Read block into buffer - readPage_sublist(ms, curBlk, es, metric); + if (curBlk != ms->lastBlockIdx) { + /* Checking for read failure here */ + if (0 == readPage_sublist(ms, curBlk, es, metric)) { + // If we can't read the block, this region is exhausted. + ms->offset[ms->regionIdx] = -1; + ms->min_set[ms->regionIdx] = false; + // Recursive call to try again with this region disabled + return next_MinSort_sublist(ms, es, tupleBuffer, metric); + } } - } else { // Use next record in current block + } else { i = ms->nextIdx; } @@ -251,10 +260,15 @@ char *next_MinSort_sublist(MinSortStateSublist *ms, external_sort_t *es, void *t i = 0; int32_t currentBlockId = getBlockId(ms); curBlk++; - readPage_sublist(ms, curBlk, es, metric); - if (currentBlockId >= getBlockId(ms)) { - // Transitioned to a block in a new sublist - // ms->min[ms->regionIdx] = INT_MAX; + + if (0 == readPage_sublist(ms, curBlk, es, metric)) { + // Read failed (EOF). Mark region as finished. + ms->offset[ms->regionIdx] = -1; + ms->min_set[ms->regionIdx] = false; + } + /* Only process the new block if read was successful */ + else if (currentBlockId >= getBlockId(ms)) { + // Transitioned to a block in a new sublist (ID check) ms->offset[ms->regionIdx] = -1; ms->min_set[ms->regionIdx] = false; } else { @@ -346,7 +360,7 @@ int flash_minsort_sublist( int32_t blockIndex = 0; int16_t values_per_page = (es->page_size - es->headerSize) / es->record_size; char *outputBuffer = buffer + es->page_size; - unsigned long lastWritePos = ms.fileOffset; + unsigned long lastWritePos = *resultFilePtr; // Write while (next_MinSort_sublist(&ms, es, (char *)(outputBuffer + count * es->record_size + es->headerSize), metric) != NULL) { @@ -391,8 +405,8 @@ int flash_minsort_sublist( if (count > 0) { // fseek(outputFile, lastWritePos, SEEK_SET); ((file_iterator_state_t *)iteratorState)->fileInterface->seek(lastWritePos, outputFile); - *((int32_t *)buffer) = blockIndex; /* Block index */ - *((int16_t *)(buffer + BLOCK_COUNT_OFFSET)) = count; /* Block record count */ + *((int32_t *)outputBuffer) = blockIndex; /* Block index */ + *((int16_t *)(outputBuffer + BLOCK_COUNT_OFFSET)) = count; /* Block record count */ #ifdef DEBUG debug_log("Writing last page minsort sublist: blockIndex=%d, count=%d, filePosition=%ld\n", blockIndex, count, lastWritePos / PAGE_SIZE); diff --git a/src/query-interface/sort/sortWrapper.c b/src/query-interface/sort/sortWrapper.c index 6a6f995e..c07d0e17 100644 --- a/src/query-interface/sort/sortWrapper.c +++ b/src/query-interface/sort/sortWrapper.c @@ -205,7 +205,7 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { // Write data to buffer memcpy((uint8_t *)buffer + rowOffset, op->input->recordBuffer, data->recordSize); #ifdef DEBUG - if (count < 100) { + if (count < 10) { debug_log("DEBUG loadRowData record %d: ", count); for (int i = 0; i < data->recordSize; i++) { debug_log("%02x ", ((uint8_t *)op->input->recordBuffer)[i]); @@ -219,7 +219,7 @@ uint32_t loadRowData(sortData *data, embedDBOperator *op, void *unsortedFile) { } debug_log("\n"); } - if (count < 100 || count % 1000 == 0) { + if (count < 10 || count % 1000 == 0) { int32_t *keyPtr = (int32_t *)(op->input->recordBuffer + data->keyOffset); debug_log("DEBUG loadRowData: count=%d, rowOffset=%d, key=%d\n", count, rowOffset, *keyPtr); } @@ -309,7 +309,6 @@ void prepareSort(embedDBOperator *op) { // Load row data data->count = loadRowData(data, op, unsortedFile); - debug_log("finished load row data, starting sort\n"); // Start sorting file_iterator_state_t *iteratorState = startSort(data, unsortedFile, sortedFile); if (iteratorState == NULL) { @@ -390,7 +389,6 @@ file_iterator_state_t *startSort(sortData *data, void *unsortedFile, void *sorte int err; - // Use adaptive sort on desktop int8_t runGenOnly = false; // Run full sort operation int8_t writeReadRatio = 19; // 1.97 * 10 => 19 err = adaptive_sort(readNextRecord, iteratorState, tuple_buffer, sortedFile, buffer, buffer_max_pages, &es, &result_file_ptr, &metrics, data->compareFn, runGenOnly, writeReadRatio, data); diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index 284c3e52..3f43f220 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -154,17 +154,12 @@ void debugBinData(embedDBOperator* op, uint32_t numValues, uint8_t col) { printf("%i ", (int32_t)buffer[col]); } printf("\n"); - //fflush(stdout); + // fflush(stdout); } void runTestSequentialValues() { // Insert test data -#ifdef ARDUINO - Serial.println("About to insert values\n"); - insertNValues(state, 60, 1); -#else insertNValues(state, 300, 1); -#endif embedDBIterator it; it.minKey = NULL; @@ -172,42 +167,14 @@ void runTestSequentialValues() { it.minData = NULL; it.maxData = NULL; embedDBInitIterator(state, &it); -#ifdef ARDUINO - Serial.println("\n=================================="); - Serial.println("Creating table scan"); - Serial.println("=================================="); - Serial.flush(); -#endif embedDBOperator* scanOpOrderBy = createTableScanOperator(state, &it, baseSchema); - uint8_t projColsOB[] = { 0, 1 }; -#ifdef ARDUINO - Serial.println("\n=================================="); - Serial.println("Creating projection"); - Serial.println("=================================="); - Serial.flush(); -#endif + uint8_t projColsOB[] = {0, 1}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); -#ifdef ARDUINO - Serial.println("\n=================================="); - Serial.println("Creating order by"); - Serial.println("=================================="); - Serial.flush(); -#endif embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); - // debugBinData(orderByOp, 300, 1); -#ifdef ARDUINO - Serial.println("\n=================================="); - Serial.println("Init order by"); - Serial.println("=================================="); - Serial.flush(); -#endif + // debugBinData(orderByOp, 70, 1); + orderByOp->init(orderByOp); -#ifdef ARDUINO - Serial.println("\n=================================="); - Serial.println("exec order by"); - Serial.println("=================================="); - Serial.flush(); -#endif + int32_t* recordBuffer = (int32_t*)orderByOp->recordBuffer; exec(orderByOp); int32_t previous = ((int32_t)recordBuffer[1]); @@ -237,7 +204,7 @@ void runTestUsingSEA100k() { embedDBOperator* scanOpOrderBy = createTableScanOperator(state, &it, baseSchema); // debugBinData(scanOpOrderBy, 200, 0); - uint8_t projColsOB[] = { 0, 1 }; + uint8_t projColsOB[] = {0, 1}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); // debugBinData(projColsOrderBy, 300, 1); embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); @@ -260,21 +227,8 @@ void runTestUsingSEA100k() { int runUnityTests() { UNITY_BEGIN(); -#ifdef ARDUINO - Serial.println("\n\n=================================="); - Serial.println("Starting Test Suite"); - Serial.println("=================================="); - Serial.flush(); - delay(1000); // Give time to start serial monitor -#endif RUN_TEST(runTestSequentialValues); -#ifdef ARDUINO - Serial.println("\n=================================="); - Serial.println("Test 1 Complete, starting Test 2"); - Serial.println("=================================="); - Serial.flush(); -#endif - //RUN_TEST(runTestUsingSEA100k); + RUN_TEST(runTestUsingSEA100k); return UNITY_END(); }