diff --git a/ft/cachetable/cachetable-internal.h b/ft/cachetable/cachetable-internal.h index 05fb771de..eb7f08ecc 100644 --- a/ft/cachetable/cachetable-internal.h +++ b/ft/cachetable/cachetable-internal.h @@ -141,6 +141,7 @@ struct cachefile { // If set then fclose will not be logged in recovery log. bool skip_log_recover_on_close; int fd; /* Bug: If a file is opened read-only, then it is stuck in read-only. If it is opened read-write, then subsequent writers can write to it too. */ + unsigned int blocksize; /* Filesystem block size for O_DIRECT operations */ CACHETABLE cachetable; struct fileid fileid; // the filenum is used as an identifer of the cachefile diff --git a/ft/cachetable/cachetable.cc b/ft/cachetable/cachetable.cc index d97d87622..4a42ad923 100644 --- a/ft/cachetable/cachetable.cc +++ b/ft/cachetable/cachetable.cc @@ -369,6 +369,11 @@ toku_cachetable_reserve_filenum(CACHETABLE ct) { return ct->cf_list.reserve_filenum(); } +static unsigned int fd_blocksize(int fd) { + toku_struct_stat st; + return (unsigned int) toku_os_fstat(fd, &st) ? 512 : st.st_blksize; +} + static void create_new_cachefile( CACHETABLE ct, FILENUM filenum, @@ -387,6 +392,7 @@ static void create_new_cachefile( newcf->filenum = filenum; newcf->fd = fd; + newcf->blocksize = fd_blocksize(fd); newcf->fname_in_env = toku_xstrdup(fname_in_env); bjm_init(&newcf->bjm); *cfptr = newcf; @@ -427,6 +433,7 @@ int toku_cachetable_openfd_with_filenum (CACHEFILE *cfptr, CACHETABLE ct, int fd // fix up the fields in the cachefile existing_cf->filenum = filenum; existing_cf->fd = fd; + existing_cf->blocksize = fd_blocksize(fd); existing_cf->fname_in_env = toku_xstrdup(fname_in_env); bjm_init(&existing_cf->bjm); @@ -498,6 +505,11 @@ toku_cachefile_get_fd (CACHEFILE cf) { return cf->fd; } +unsigned int +toku_cachefile_get_blocksize (CACHEFILE cf) { + return cf->blocksize; +} + static void cachefile_destroy(CACHEFILE cf) { if (cf->free_userdata) { cf->free_userdata(cf, cf->userdata); @@ -535,6 +547,7 @@ void toku_cachefile_close(CACHEFILE *cfp, bool oplsn_valid, LSN oplsn) { int r = close(cf->fd); assert(r == 0); cf->fd = -1; + cf->blocksize = 512; // destroy the parts of the cachefile // that do not persist across opens/closes diff --git a/ft/cachetable/cachetable.h b/ft/cachetable/cachetable.h index c5c21b49f..979da76c4 100644 --- a/ft/cachetable/cachetable.h +++ b/ft/cachetable/cachetable.h @@ -501,6 +501,9 @@ void toku_cachefile_close (CACHEFILE*, bool oplsn_valid, LSN oplsn); // Grabs a read lock protecting the fd int toku_cachefile_get_fd (CACHEFILE); +// Get the blocksize associated with the cachefile +unsigned int toku_cachefile_get_blocksize (CACHEFILE); + // Get the iname (within the environment) associated with the cachefile // Return the filename char * toku_cachefile_fname_in_env (CACHEFILE cf); diff --git a/ft/ft-internal.h b/ft/ft-internal.h index eec591d17..b1aca9a1c 100644 --- a/ft/ft-internal.h +++ b/ft/ft-internal.h @@ -454,8 +454,8 @@ void toku_ft_get_status(FT_STATUS); void toku_flusher_thread_set_callback(void (*callback_f)(int, void*), void* extra); // For upgrade -int toku_upgrade_subtree_estimates_to_stat64info(int fd, FT ft) __attribute__((nonnull)); -int toku_upgrade_msn_from_root_to_header(int fd, FT ft) __attribute__((nonnull)); +int toku_upgrade_subtree_estimates_to_stat64info(int fd, unsigned int block_size, FT ft) __attribute__((nonnull)); +int toku_upgrade_msn_from_root_to_header(int fd, unsigned int block_size, FT ft) __attribute__((nonnull)); // A callback function is invoked with the key, and the data. // The pointers (to the bytevecs) must not be modified. The data must be copied out before the callback function returns. diff --git a/ft/ft-ops.cc b/ft/ft-ops.cc index d036366dd..9e07da2fa 100644 --- a/ft/ft-ops.cc +++ b/ft/ft-ops.cc @@ -796,7 +796,7 @@ toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe) } } -int toku_ftnode_fetch_callback(CACHEFILE UU(cachefile), +int toku_ftnode_fetch_callback(CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM blocknum, @@ -815,7 +815,7 @@ int toku_ftnode_fetch_callback(CACHEFILE UU(cachefile), // evaluate what piece of the the node is necessary until we get it at // least partially into memory int r = - toku_deserialize_ftnode_from(fd, blocknum, fullhash, node, ndd, bfe); + toku_deserialize_ftnode_from(fd, toku_cachefile_get_blocksize(cachefile), blocknum, fullhash, node, ndd, bfe); if (r != 0) { if (r == TOKUDB_BAD_CHECKSUM) { fprintf( diff --git a/ft/ft-ops.h b/ft/ft-ops.h index df8ffe287..7a01c353a 100644 --- a/ft/ft-ops.h +++ b/ft/ft-ops.h @@ -266,7 +266,7 @@ void toku_ft_serialize_layer_destroy(void); void toku_maybe_truncate_file (int fd, uint64_t size_used, uint64_t expected_size, uint64_t *new_size); // Effect: truncate file if overallocated by at least 32MiB -void toku_maybe_preallocate_in_file (int fd, int64_t size, int64_t expected_size, int64_t *new_size); +void toku_maybe_preallocate_in_file (int fd, unsigned int block_size, int64_t size, int64_t expected_size, int64_t *new_size); // Effect: make the file bigger by either doubling it or growing by 16MiB whichever is less, until it is at least size // Return 0 on success, otherwise an error number. diff --git a/ft/ft.cc b/ft/ft.cc index 454bf1179..b06057bcd 100644 --- a/ft/ft.cc +++ b/ft/ft.cc @@ -410,7 +410,7 @@ void toku_ft_create(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn) { toku_ft_init_reflock(ft); // Assign blocknum for root block, also dirty the header - ft->blocktable.create(); + ft->blocktable.create(toku_cachefile_get_blocksize(cf)); ft->blocktable.allocate_blocknum(&ft->h->root_blocknum, ft); ft_init(ft, options, cf); @@ -431,9 +431,8 @@ int toku_read_ft_and_store_in_cachefile (FT_HANDLE ft_handle, CACHEFILE cf, LSN return 0; } - int fd = toku_cachefile_get_fd(cf); const char *fn = toku_cachefile_fname_in_env(cf); - int r = toku_deserialize_ft_from(fd, fn, max_acceptable_lsn, &ft); + int r = toku_deserialize_ft_from(toku_cachefile_get_fd(cf), toku_cachefile_get_blocksize(cf), fn, max_acceptable_lsn, &ft); if (r == TOKUDB_BAD_CHECKSUM) { fprintf(stderr, "Checksum failure while reading header in file %s.\n", toku_cachefile_fname_in_env(cf)); assert(false); // make absolutely sure we crash before doing anything else @@ -868,9 +867,10 @@ toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR desc, int fd) { // the checksum is four bytes, so that's where the magic number comes from // make space for the new descriptor and write it out to disk DISKOFF offset, size; + assert(fd == toku_cachefile_get_fd(ft->cf)); size = toku_serialize_descriptor_size(desc) + 4; ft->blocktable.realloc_descriptor_on_disk(size, &offset, ft, fd); - toku_serialize_descriptor_contents_to_fd(fd, desc, offset); + toku_serialize_descriptor_contents_to_fd(fd, toku_cachefile_get_blocksize(ft->cf), desc, offset); // cleanup the old descriptor and set the in-memory descriptor to the new one toku_destroy_dbt(&ft->descriptor.dbt); @@ -1036,7 +1036,7 @@ garbage_helper(BLOCKNUM blocknum, int64_t UU(size), int64_t UU(address), void *e ftnode_fetch_extra bfe; bfe.create_for_full_read(info->ft); int fd = toku_cachefile_get_fd(info->ft->cf); - int r = toku_deserialize_ftnode_from(fd, blocknum, 0, &node, &ndd, &bfe); + int r = toku_deserialize_ftnode_from(fd, toku_cachefile_get_blocksize(info->ft->cf), blocknum, 0, &node, &ndd, &bfe); if (r != 0) { goto no_node; } diff --git a/ft/serialize/block_allocator.cc b/ft/serialize/block_allocator.cc index e64139f0e..5cff56ff3 100644 --- a/ft/serialize/block_allocator.cc +++ b/ft/serialize/block_allocator.cc @@ -56,10 +56,10 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #endif void BlockAllocator::CreateInternal(uint64_t reserve_at_beginning, - uint64_t alignment) { - // the alignment must be at least 512 and aligned with 512 to work with + uint64_t alignment, unsigned int blocksize) { + // the alignment must be at least blocksize and aligned with blocksize to work with // direct I/O - invariant(alignment >= 512 && (alignment % 512) == 0); + invariant(alignment >= blocksize && (alignment % blocksize) == 0); _reserve_at_beginning = reserve_at_beginning; _alignment = alignment; @@ -68,8 +68,9 @@ void BlockAllocator::CreateInternal(uint64_t reserve_at_beginning, _tree = new MhsRbTree::Tree(alignment); } -void BlockAllocator::Create(uint64_t reserve_at_beginning, uint64_t alignment) { - CreateInternal(reserve_at_beginning, alignment); +void BlockAllocator::Create(uint64_t reserve_at_beginning, uint64_t alignment, + unsigned int blocksize) { + CreateInternal(reserve_at_beginning, alignment, blocksize); _tree->Insert({reserve_at_beginning, MAX_BYTE}); VALIDATE(); } @@ -80,9 +81,10 @@ void BlockAllocator::Destroy() { void BlockAllocator::CreateFromBlockPairs(uint64_t reserve_at_beginning, uint64_t alignment, + unsigned int blocksize, struct BlockPair *translation_pairs, uint64_t n_blocks) { - CreateInternal(reserve_at_beginning, alignment); + CreateInternal(reserve_at_beginning, alignment, blocksize); _n_blocks = n_blocks; struct BlockPair *XMALLOC_N(n_blocks, pairs); @@ -124,7 +126,7 @@ static inline uint64_t Align(uint64_t value, uint64_t ba_alignment) { // Effect: Allocate a block. The resulting block must be aligned on the // ba->alignment (which to make direct_io happy must be a positive multiple of -// 512). +// blocksize). void BlockAllocator::AllocBlock(uint64_t size, uint64_t *offset) { // Allocator does not support size 0 blocks. See block_allocator_free_block. diff --git a/ft/serialize/block_allocator.h b/ft/serialize/block_allocator.h index 648ea9a9e..01feda5aa 100644 --- a/ft/serialize/block_allocator.h +++ b/ft/serialize/block_allocator.h @@ -96,7 +96,8 @@ class BlockAllocator { // reserve_at_beginning (IN) Size of reserved block at beginning. // This size does not have to be aligned. // alignment (IN) Block alignment. - void Create(uint64_t reserve_at_beginning, uint64_t alignment); + // blocksize (IN) Blocksize. + void Create(uint64_t reserve_at_beginning, uint64_t alignment, unsigned int blocksize); // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING // bytes are not put into a block. @@ -110,8 +111,10 @@ class BlockAllocator { // reserve_at_beginning (IN) Size of reserved block at beginning. // This size does not have to be aligned. // alignment (IN) Block alignment. + // blocksize (IN) Block size. void CreateFromBlockPairs(uint64_t reserve_at_beginning, uint64_t alignment, + unsigned int blocksize, struct BlockPair *pairs, uint64_t n_blocks); @@ -173,7 +176,7 @@ class BlockAllocator { virtual ~BlockAllocator(){}; private: - void CreateInternal(uint64_t reserve_at_beginning, uint64_t alignment); + void CreateInternal(uint64_t reserve_at_beginning, uint64_t alignment, unsigned int blocksize); // How much to reserve at the beginning uint64_t _reserve_at_beginning; diff --git a/ft/serialize/block_table.cc b/ft/serialize/block_table.cc index 56d51f569..2e1ac6197 100644 --- a/ft/serialize/block_table.cc +++ b/ft/serialize/block_table.cc @@ -136,8 +136,13 @@ int block_table::create_from_buffer( // Determine the file size int64_t file_size = 0; - r = toku_os_get_file_size(fd, &file_size); - lazy_assert_zero(r); + int blocksize; + toku_struct_stat st; + + r = toku_os_fstat(fd, &st); + lazy_assert_zero(r ); + blocksize = r ? 512 : st.st_blksize; + file_size = r ? st.st_size : 0; invariant(file_size >= 0); _safe_file_size = file_size; @@ -159,13 +164,14 @@ int block_table::create_from_buffer( _bt_block_allocator->CreateFromBlockPairs( BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT, + blocksize, pairs, n_pairs); return 0; } -void block_table::create() { +void block_table::create(unsigned int blocksize) { // Does not initialize the block allocator _create_internal(); @@ -187,7 +193,7 @@ void block_table::create() { // Create an empty block allocator. _bt_block_allocator->Create( BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, - BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT); + BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT, blocksize); } // TODO: Refactor with FT-303 @@ -484,6 +490,7 @@ void block_table::_realloc_on_disk_internal(BLOCKNUM b, } void block_table::_ensure_safe_write_unlocked(int fd, + unsigned int disk_block_size, DISKOFF block_size, DISKOFF block_offset) { // Requires: holding _mutex @@ -496,7 +503,7 @@ void block_table::_ensure_safe_write_unlocked(int fd, int64_t size_after; toku_maybe_preallocate_in_file( - fd, size_needed, _safe_file_size, &size_after); + fd, disk_block_size, size_needed, _safe_file_size, &size_after); _mutex_lock(); _safe_file_size = size_after; @@ -516,7 +523,7 @@ void block_table::realloc_on_disk(BLOCKNUM b, _verify_valid_freeable_blocknum(t, b); _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint); - _ensure_safe_write_unlocked(fd, size, *offset); + _ensure_safe_write_unlocked(fd, toku_cachefile_get_blocksize(ft->cf), size, *offset); _mutex_unlock(); } @@ -550,14 +557,16 @@ void block_table::_alloc_inprogress_translation_on_disk_unlocked() { // Effect: Serializes the blocktable to a wbuf (which starts uninitialized) // A clean shutdown runs checkpoint start so that current and inprogress are // copies. -// The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the -// total length is a multiple of 512 (so we pad with zeros at the end if +// The resulting wbuf buffer is guaranteed to be be blocksize-byte aligned and the +// total length is a multiple of blocksize (so we pad with zeros at the end if // needd) -// The address is guaranteed to be 512-byte aligned, but the size is not +// The address is guaranteed to be blocksize-byte aligned, but the size is not // guaranteed. -// It *is* guaranteed that we can read up to the next 512-byte boundary, +// It *is* guaranteed that we can read up to the next blocksize-byte boundary, // however +// blocksize equates to the blocksize of the filesystem cf is on. void block_table::serialize_translation_to_wbuf(int fd, + unsigned int blocksize, struct wbuf *w, int64_t *address, int64_t *size) { @@ -566,11 +575,11 @@ void block_table::serialize_translation_to_wbuf(int fd, BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); _alloc_inprogress_translation_on_disk_unlocked(); // The allocated block - // must be 512-byte + // must be blocksize-byte // aligned to make // O_DIRECT happy. uint64_t size_translation = _calculate_size_on_disk(t); - uint64_t size_aligned = roundup_to_multiple(512, size_translation); + uint64_t size_aligned = roundup_to_multiple(blocksize, size_translation); invariant((int64_t)size_translation == t->block_translation[b.b].size); { // Init wbuf @@ -582,7 +591,7 @@ void block_table::serialize_translation_to_wbuf(int fd, __LINE__, size_translation, t->block_translation[b.b].u.diskoff); - char *XMALLOC_N_ALIGNED(512, size_aligned, buf); + char *XMALLOC_N_ALIGNED(blocksize, size_aligned, buf); for (uint64_t i = size_translation; i < size_aligned; i++) buf[i] = 0; // fill in the end of the buffer with zeros. wbuf_init(w, buf, size_aligned); @@ -604,9 +613,9 @@ void block_table::serialize_translation_to_wbuf(int fd, wbuf_int(w, checksum); *address = t->block_translation[b.b].u.diskoff; *size = size_translation; - invariant((*address) % 512 == 0); + invariant((*address) % blocksize == 0); - _ensure_safe_write_unlocked(fd, size_aligned, *address); + _ensure_safe_write_unlocked(fd, blocksize, size_aligned, *address); _mutex_unlock(); } @@ -1028,7 +1037,7 @@ void block_table::realloc_descriptor_on_disk(DISKOFF size, int fd) { _mutex_lock(); _realloc_descriptor_on_disk_unlocked(size, offset, ft); - _ensure_safe_write_unlocked(fd, size, *offset); + _ensure_safe_write_unlocked(fd, toku_cachefile_get_blocksize(ft->cf), size, *offset); _mutex_unlock(); } diff --git a/ft/serialize/block_table.h b/ft/serialize/block_table.h index dd732d4f3..be136d45c 100644 --- a/ft/serialize/block_table.h +++ b/ft/serialize/block_table.h @@ -105,7 +105,7 @@ class block_table { TRANSLATION_DEBUG }; - void create(); + void create(unsigned int blocksize); int create_from_buffer(int fd, DISKOFF location_on_disk, @@ -146,6 +146,7 @@ class block_table { // Serialization void serialize_translation_to_wbuf(int fd, + unsigned int blocksize, struct wbuf *w, int64_t *address, int64_t *size); @@ -258,6 +259,7 @@ class block_table { // File management void _maybe_truncate_file(int fd, uint64_t size_needed_before); void _ensure_safe_write_unlocked(int fd, + unsigned int disk_block_size, DISKOFF block_size, DISKOFF block_offset); diff --git a/ft/serialize/ft-serialize.cc b/ft/serialize/ft-serialize.cc index b24d72a5d..7d5ca47a0 100644 --- a/ft/serialize/ft-serialize.cc +++ b/ft/serialize/ft-serialize.cc @@ -71,12 +71,12 @@ void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, DESCRIPTOR desc //descriptor. //Descriptors are NOT written during the header checkpoint process. void -toku_serialize_descriptor_contents_to_fd(int fd, DESCRIPTOR desc, DISKOFF offset) { +toku_serialize_descriptor_contents_to_fd(int fd, unsigned int blocksize, DESCRIPTOR desc, DISKOFF offset) { // make the checksum int64_t size = toku_serialize_descriptor_size(desc)+4; //4 for checksum - int64_t size_aligned = roundup_to_multiple(512, size); + int64_t size_aligned = roundup_to_multiple(blocksize, size); struct wbuf w; - char *XMALLOC_N_ALIGNED(512, size_aligned, aligned_buf); + char *XMALLOC_N_ALIGNED(blocksize, size_aligned, aligned_buf); for (int64_t i=size; i 0) { lazy_assert(size>=4); //4 for checksum { - ssize_t size_to_malloc = roundup_to_multiple(512, size); - XMALLOC_N_ALIGNED(512, size_to_malloc, dbuf); + ssize_t size_to_malloc = roundup_to_multiple(blocksize, size); + XMALLOC_N_ALIGNED(blocksize, size_to_malloc, dbuf); { ssize_t sz_read = toku_os_pread(fd, dbuf, size_to_malloc, offset); @@ -148,7 +148,7 @@ deserialize_descriptor_from(int fd, block_table *bt, DESCRIPTOR desc, int layout return r; } -int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) +int deserialize_ft_versioned(int fd, unsigned int block_size, struct rbuf *rb, FT *ftp, uint32_t version) // Effect: Deserialize the ft header. // We deserialize ft_header only once and then share everything with all the FTs. { @@ -209,8 +209,8 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) //Load translation table { - size_t size_to_read = roundup_to_multiple(512, translation_size_on_disk); - unsigned char *XMALLOC_N_ALIGNED(512, size_to_read, tbuf); + size_t size_to_read = roundup_to_multiple(block_size, translation_size_on_disk); + unsigned char *XMALLOC_N_ALIGNED(block_size, size_to_read, tbuf); { // This cast is messed up in 32-bits if the block translation // table is ever more than 4GB. But in that case, the @@ -373,20 +373,20 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) if (ft->layout_version_read_from_disk < FT_LAYOUT_VERSION_18) { // This needs ft->h to be non-null, so we have to do it after we // read everything else. - r = toku_upgrade_subtree_estimates_to_stat64info(fd, ft); + r = toku_upgrade_subtree_estimates_to_stat64info(fd, block_size, ft); if (r != 0) { goto exit; } } if (ft->layout_version_read_from_disk < FT_LAYOUT_VERSION_21) { - r = toku_upgrade_msn_from_root_to_header(fd, ft); + r = toku_upgrade_msn_from_root_to_header(fd, block_size, ft); if (r != 0) { goto exit; } } invariant((uint32_t) ft->layout_version_read_from_disk == version); - r = deserialize_descriptor_from(fd, &ft->blocktable, &ft->descriptor, version); + r = deserialize_descriptor_from(fd, block_size, &ft->blocktable, &ft->descriptor, version); if (r != 0) { goto exit; } @@ -507,6 +507,7 @@ static size_t serialize_ft_min_size(uint32_t version) { } int deserialize_ft_from_fd_into_rbuf(int fd, + unsigned int block_size, toku_off_t offset_of_header, struct rbuf *rb, uint64_t *checkpoint_count, @@ -525,8 +526,8 @@ int deserialize_ft_from_fd_into_rbuf(int fd, 4 + // version 4 + // build_id 4; // size - const int64_t read_size = roundup_to_multiple(512, prefix_size); - unsigned char *XMALLOC_N_ALIGNED(512, read_size, prefix); + const int64_t read_size = roundup_to_multiple(block_size, prefix_size); + unsigned char *XMALLOC_N_ALIGNED(block_size, read_size, prefix); rb->buf = NULL; int64_t n = toku_os_pread(fd, prefix, read_size, offset_of_header); if (n != read_size) { @@ -589,10 +590,10 @@ int deserialize_ft_from_fd_into_rbuf(int fd, rb->size = size; { toku_free(rb->buf); - uint32_t size_to_read = roundup_to_multiple(512, size); - XMALLOC_N_ALIGNED(512, size_to_read, rb->buf); + uint32_t size_to_read = roundup_to_multiple(block_size, size); + XMALLOC_N_ALIGNED(block_size, size_to_read, rb->buf); - invariant(offset_of_header % 512 == 0); + invariant(offset_of_header % block_size == 0); n = toku_os_pread(fd, rb->buf, size_to_read, offset_of_header); if (n != size_to_read) { if (n < 0) { @@ -672,6 +673,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd, checkpoint_count_1); int toku_deserialize_ft_from(int fd, + unsigned int block_size, const char *fn, LSN max_acceptable_lsn, FT *ft) { @@ -689,6 +691,7 @@ int toku_deserialize_ft_from(int fd, toku_off_t header_0_off = 0; r0 = deserialize_ft_from_fd_into_rbuf(fd, + block_size, header_0_off, &rb_0, &checkpoint_count_0, @@ -700,6 +703,7 @@ int toku_deserialize_ft_from(int fd, toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE; r1 = deserialize_ft_from_fd_into_rbuf(fd, + block_size, header_1_off, &rb_1, &checkpoint_count_1, @@ -791,7 +795,7 @@ int toku_deserialize_ft_from(int fd, dump_state_of_toku_deserialize_ft_from(); } paranoid_invariant(rb); - r = deserialize_ft_versioned(fd, rb, ft, version); + r = deserialize_ft_versioned(fd, block_size, rb, ft, version); exit: if (rb_0.buf) { @@ -858,22 +862,25 @@ void toku_serialize_ft_to(int fd, FT_HEADER h, block_table *bt, CACHEFILE cf) { struct wbuf w_translation; int64_t size_translation; int64_t address_translation; + unsigned int blocksize = toku_cachefile_get_blocksize(cf); + + assert(fd == toku_cachefile_get_fd(cf)); // Must serialize translation first, to get address,size for header. bt->serialize_translation_to_wbuf( - fd, &w_translation, &address_translation, &size_translation); + fd, blocksize, &w_translation, &address_translation, &size_translation); invariant(size_translation == w_translation.ndone); - // the number of bytes available in the buffer is 0 mod 512, and those last + // the number of bytes available in the buffer is 0 mod blocksize, and those last // bytes are all initialized. - invariant(w_translation.size % 512 == 0); + invariant(w_translation.size % blocksize == 0); struct wbuf w_main; size_t size_main = toku_serialize_ft_size(h); - size_t size_main_aligned = roundup_to_multiple(512, size_main); - invariant(size_main_aligned < + size_t size_main_aligned = roundup_to_multiple(blocksize, size_main); + invariant(size_main_aligned <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE); - char *XMALLOC_N_ALIGNED(512, size_main_aligned, mainbuf); + char *XMALLOC_N_ALIGNED(blocksize, size_main_aligned, mainbuf); for (size_t i = size_main; i < size_main_aligned; i++) mainbuf[i] = 0; // initialize the end of the buffer with zeros wbuf_init(&w_main, mainbuf, size_main); @@ -884,10 +891,10 @@ void toku_serialize_ft_to(int fd, FT_HEADER h, block_table *bt, CACHEFILE cf) { // Actually write translation table // This write is guaranteed to read good data at the end of the buffer, // since the - // w_translation.buf is padded with zeros to a 512-byte boundary. + // w_translation.buf is padded with zeros to a blocksize-byte boundary. toku_os_full_pwrite(fd, w_translation.buf, - roundup_to_multiple(512, size_translation), + roundup_to_multiple(blocksize, size_translation), address_translation); // Everything but the header MUST be on disk before header starts. diff --git a/ft/serialize/ft-serialize.h b/ft/serialize/ft-serialize.h index 144e18856..042082295 100644 --- a/ft/serialize/ft-serialize.h +++ b/ft/serialize/ft-serialize.h @@ -51,17 +51,20 @@ void toku_serialize_ft_to_wbuf(struct wbuf *wbuf, DISKOFF translation_location_on_disk, DISKOFF translation_size_on_disk); void toku_serialize_descriptor_contents_to_fd(int fd, + unsigned int blocksize, DESCRIPTOR desc, DISKOFF offset); void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, DESCRIPTOR desc); int toku_deserialize_ft_from(int fd, + unsigned int blocksize, const char *fn, LSN max_acceptable_lsn, FT *ft); // TODO rename int deserialize_ft_from_fd_into_rbuf(int fd, + unsigned int block_size, toku_off_t offset_of_header, struct rbuf *rb, uint64_t *checkpoint_count, @@ -70,4 +73,4 @@ int deserialize_ft_from_fd_into_rbuf(int fd, // used by verify // TODO rename -int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version); +int deserialize_ft_versioned(int fd, unsigned int block_size, struct rbuf *rb, FT *ft, uint32_t version); diff --git a/ft/serialize/ft_node-serialize.cc b/ft/serialize/ft_node-serialize.cc index 55899905b..93e40de4d 100644 --- a/ft/serialize/ft_node-serialize.cc +++ b/ft/serialize/ft_node-serialize.cc @@ -149,7 +149,7 @@ min64(int64_t a, int64_t b) { } void -toku_maybe_preallocate_in_file (int fd, int64_t size, int64_t expected_size, int64_t *new_size) +toku_maybe_preallocate_in_file (int fd, unsigned int block_size, int64_t size, int64_t expected_size, int64_t *new_size) // Effect: make the file bigger by either doubling it or growing by 16MiB whichever is less, until it is at least size // Return 0 on success, otherwise an error number. { @@ -165,7 +165,6 @@ toku_maybe_preallocate_in_file (int fd, int64_t size, int64_t expected_size, int lazy_assert_zero(r); } invariant(file_size >= 0); - invariant(expected_size == file_size); // We want to double the size of the file, or add 16MiB, whichever is less. // We emulate calling this function repeatedly until it satisfies the request. int64_t to_write = 0; @@ -177,8 +176,8 @@ toku_maybe_preallocate_in_file (int fd, int64_t size, int64_t expected_size, int to_write += alignup64(min64(file_size + to_write, FILE_CHANGE_INCREMENT), stripe_width); } if (to_write > 0) { - assert(to_write%512==0); - toku::scoped_malloc_aligned wbuf_aligned(to_write, 512); + assert(to_write%block_size==0); + toku::scoped_malloc_aligned wbuf_aligned(to_write, block_size); char *wbuf = reinterpret_cast(wbuf_aligned.get()); memset(wbuf, 0, to_write); toku_off_t start_write = alignup64(file_size, stripe_width); @@ -1060,6 +1059,7 @@ void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl) void read_block_from_fd_into_rbuf( int fd, + unsigned int block_size, BLOCKNUM blocknum, FT ft, struct rbuf *rb @@ -1068,8 +1068,8 @@ void read_block_from_fd_into_rbuf( // get the file offset and block size for the block DISKOFF offset, size; ft->blocktable.translate_blocknum_to_offset_size(blocknum, &offset, &size); - DISKOFF size_aligned = roundup_to_multiple(512, size); - uint8_t *XMALLOC_N_ALIGNED(512, size_aligned, raw_block); + DISKOFF size_aligned = roundup_to_multiple(block_size, size); + uint8_t *XMALLOC_N_ALIGNED(block_size, size_aligned, raw_block); rbuf_init(rb, raw_block, size); // read the block ssize_t rlen = toku_os_pread(fd, raw_block, size_aligned, offset); @@ -2668,6 +2668,7 @@ int toku_deserialize_bp_from_compressed(FTNODE node, } static int deserialize_ftnode_from_fd(int fd, + unsigned int block_size, BLOCKNUM blocknum, uint32_t fullhash, FTNODE *ftnode, @@ -2677,7 +2678,7 @@ static int deserialize_ftnode_from_fd(int fd, struct rbuf rb = RBUF_INITIALIZER; tokutime_t t0 = toku_time_now(); - read_block_from_fd_into_rbuf(fd, blocknum, bfe->ft, &rb); + read_block_from_fd_into_rbuf(fd, block_size, blocknum, bfe->ft, &rb); tokutime_t t1 = toku_time_now(); // Decompress and deserialize the ftnode. Time statistics @@ -2708,6 +2709,7 @@ static int deserialize_ftnode_from_fd(int fd, // Effect: Read a node in. If possible, read just the header. // Perform version upgrade if necessary. int toku_deserialize_ftnode_from(int fd, + unsigned int block_size, BLOCKNUM blocknum, uint32_t fullhash, FTNODE *ftnode, @@ -2731,7 +2733,7 @@ int toku_deserialize_ftnode_from(int fd, if (r != 0) { // Something went wrong, go back to doing it the old way. r = deserialize_ftnode_from_fd( - fd, blocknum, fullhash, ftnode, ndd, bfe, nullptr); + fd, block_size, blocknum, fullhash, ftnode, ndd, bfe, nullptr); } toku_free(rb.buf); @@ -3210,7 +3212,7 @@ int toku_deserialize_rollback_log_from(int fd, BLOCKNUM blocknum, ROLLBACK_LOG_N } int -toku_upgrade_subtree_estimates_to_stat64info(int fd, FT ft) +toku_upgrade_subtree_estimates_to_stat64info(int fd, unsigned int block_size, FT ft) { int r = 0; // 15 was the last version with subtree estimates @@ -3220,7 +3222,7 @@ toku_upgrade_subtree_estimates_to_stat64info(int fd, FT ft) FTNODE_DISK_DATA unused_ndd = NULL; ftnode_fetch_extra bfe; bfe.create_for_min_read(ft); - r = deserialize_ftnode_from_fd(fd, ft->h->root_blocknum, 0, &unused_node, &unused_ndd, + r = deserialize_ftnode_from_fd(fd, block_size, ft->h->root_blocknum, 0, &unused_node, &unused_ndd, &bfe, &ft->h->on_disk_stats); ft->in_memory_stats = ft->h->on_disk_stats; @@ -3234,7 +3236,7 @@ toku_upgrade_subtree_estimates_to_stat64info(int fd, FT ft) } int -toku_upgrade_msn_from_root_to_header(int fd, FT ft) +toku_upgrade_msn_from_root_to_header(int fd, unsigned int block_size, FT ft) { int r; // 21 was the first version with max_msn_in_ft in the header @@ -3244,7 +3246,7 @@ toku_upgrade_msn_from_root_to_header(int fd, FT ft) FTNODE_DISK_DATA ndd; ftnode_fetch_extra bfe; bfe.create_for_min_read(ft); - r = deserialize_ftnode_from_fd(fd, ft->h->root_blocknum, 0, &node, &ndd, &bfe, nullptr); + r = deserialize_ftnode_from_fd(fd, block_size, ft->h->root_blocknum, 0, &node, &ndd, &bfe, nullptr); if (r != 0) { goto exit; } diff --git a/ft/serialize/ft_node-serialize.h b/ft/serialize/ft_node-serialize.h index 678139655..95323917e 100644 --- a/ft/serialize/ft_node-serialize.h +++ b/ft/serialize/ft_node-serialize.h @@ -86,6 +86,7 @@ int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, ftnode_fetch_extra *bfe); int toku_deserialize_ftnode_from(int fd, + unsigned int blocksize, BLOCKNUM off, uint32_t fullhash, FTNODE *node, @@ -102,8 +103,9 @@ void toku_create_compressed_partition_from_available(FTNODE node, int childnum, int decompress_from_raw_block_into_rbuf(uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum); // used by verify -int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version); +int deserialize_ft_versioned(int fd, unsigned int block_size, struct rbuf *rb, FT *ft, uint32_t version); void read_block_from_fd_into_rbuf(int fd, + unsigned int block_size, BLOCKNUM blocknum, FT ft, struct rbuf *rb); diff --git a/ft/tests/ft-bfe-query.cc b/ft/tests/ft-bfe-query.cc index 7abd2267a..2aa39f9c0 100644 --- a/ft/tests/ft-bfe-query.cc +++ b/ft/tests/ft-bfe-query.cc @@ -54,6 +54,7 @@ static void test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { FT_CURSOR XMALLOC(cursor); FTNODE dn = NULL; PAIR_ATTR attr; + unsigned int block_size = toku_cachefile_get_blocksize(ft_h->cf); // first test that prefetching everything should work memset(&cursor->range_lock_left_key, 0, sizeof(DBT)); @@ -70,7 +71,7 @@ static void test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { bfe.create_for_prefetch(ft_h, cursor); FTNODE_DISK_DATA ndd = NULL; r = toku_deserialize_ftnode_from( - fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + fd, block_size, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); invariant(r == 0); invariant(dn->n_children == 3); invariant(BP_STATE(dn, 0) == PT_ON_DISK); @@ -89,7 +90,7 @@ static void test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { bfe.create_for_prefetch(ft_h, cursor); r = toku_deserialize_ftnode_from( - fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + fd, block_size, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); invariant(r == 0); invariant(dn->n_children == 3); invariant(BP_STATE(dn, 0) == PT_AVAIL); @@ -113,7 +114,7 @@ static void test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { cursor->left_is_neg_infty = false; bfe.create_for_prefetch(ft_h, cursor); r = toku_deserialize_ftnode_from( - fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + fd, block_size, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); invariant(r == 0); invariant(dn->n_children == 3); invariant(BP_STATE(dn, 0) == PT_ON_DISK); @@ -137,7 +138,7 @@ static void test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { cursor->right_is_pos_infty = false; bfe.create_for_prefetch(ft_h, cursor); r = toku_deserialize_ftnode_from( - fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + fd, block_size, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); invariant(r == 0); invariant(dn->n_children == 3); invariant(BP_STATE(dn, 0) == PT_ON_DISK); @@ -160,7 +161,7 @@ static void test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { right_key = 100000; bfe.create_for_prefetch(ft_h, cursor); r = toku_deserialize_ftnode_from( - fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + fd, block_size, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); invariant(r == 0); invariant(dn->n_children == 3); invariant(BP_STATE(dn, 0) == PT_ON_DISK); @@ -183,7 +184,7 @@ static void test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { right_key = 100; bfe.create_for_prefetch(ft_h, cursor); r = toku_deserialize_ftnode_from( - fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + fd, block_size, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); invariant(r == 0); invariant(dn->n_children == 3); invariant(BP_STATE(dn, 0) == PT_AVAIL); @@ -233,7 +234,7 @@ static void test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) { bfe.child_to_read = 2; bfe.disable_prefetching = true; r = toku_deserialize_ftnode_from( - fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + fd, block_size, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); invariant(r == 0); invariant(dn->n_children == 3); invariant(BP_STATE(dn, 0) == PT_ON_DISK); @@ -262,7 +263,7 @@ static void test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) { bfe.child_to_read = 2; bfe.disable_prefetching = false; r = toku_deserialize_ftnode_from( - fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + fd, block_size, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); invariant(r == 0); invariant(dn->n_children == 3); invariant(BP_STATE(dn, 0) == PT_ON_DISK); @@ -290,7 +291,7 @@ static void test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) { // fake the childnum to read bfe.child_to_read = 0; r = toku_deserialize_ftnode_from( - fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + fd, block_size, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); invariant(r == 0); invariant(dn->n_children == 3); invariant(BP_STATE(dn, 0) == PT_AVAIL); diff --git a/ft/tests/ft-serialize-benchmark.cc b/ft/tests/ft-serialize-benchmark.cc index d50488ae1..29689d44e 100644 --- a/ft/tests/ft-serialize-benchmark.cc +++ b/ft/tests/ft-serialize-benchmark.cc @@ -78,12 +78,16 @@ static void test_serialize_leaf(int valsize, int deser_runs) { // struct ft_handle source_ft; struct ftnode *sn, *dn; + unsigned int block_size; + toku_struct_stat st; int fd = open(TOKU_TEST_FILENAME, O_RDWR | O_CREAT | O_BINARY, S_IRWXU | S_IRWXG | S_IRWXO); invariant(fd >= 0); + block_size = toku_os_fstat(fd, &st) ? 512 : st.st_blksize; + int r; XCALLOC(sn); @@ -202,7 +206,7 @@ static void test_serialize_leaf(int valsize, gettimeofday(&t[0], NULL); FTNODE_DISK_DATA ndd2 = NULL; r = toku_deserialize_ftnode_from( - fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe); + fd, block_size, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe); invariant(r == 0); gettimeofday(&t[1], NULL); @@ -375,7 +379,7 @@ static void test_serialize_nonleaf(int valsize, gettimeofday(&t[0], NULL); FTNODE_DISK_DATA ndd2 = NULL; r = toku_deserialize_ftnode_from( - fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe); + fd, block_size, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe); invariant(r == 0); gettimeofday(&t[1], NULL); dt = (t[1].tv_sec - t[0].tv_sec) + diff --git a/ft/tests/ft-serialize-test.cc b/ft/tests/ft-serialize-test.cc index 0cddaf196..fed7ffaf1 100644 --- a/ft/tests/ft-serialize-test.cc +++ b/ft/tests/ft-serialize-test.cc @@ -106,17 +106,18 @@ static void setup_dn(enum ftnode_verify_type bft, FTNODE *dn, FTNODE_DISK_DATA *ndd) { int r; + unsigned int block_size = toku_cachefile_get_blocksize(ft_h->cf); if (bft == read_all) { ftnode_fetch_extra bfe; bfe.create_for_full_read(ft_h); r = toku_deserialize_ftnode_from( - fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, ndd, &bfe); + fd, block_size, make_blocknum(20), 0 /*pass zero for hash*/, dn, ndd, &bfe); invariant(r == 0); } else if (bft == read_compressed || bft == read_none) { ftnode_fetch_extra bfe; bfe.create_for_min_read(ft_h); r = toku_deserialize_ftnode_from( - fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, ndd, &bfe); + fd, block_size, make_blocknum(20), 0 /*pass zero for hash*/, dn, ndd, &bfe); invariant(r == 0); // invariant all bp's are compressed or on disk. for (int i = 0; i < (*dn)->n_children; i++) { diff --git a/tools/ftverify.cc b/tools/ftverify.cc index ee40b991e..b10a3e053 100644 --- a/tools/ftverify.cc +++ b/tools/ftverify.cc @@ -132,17 +132,21 @@ deserialize_headers(int fd, struct ft **h1p, struct ft **h2p) bool h1_acceptable = false; int r0, r1; int r; + unsigned int block_size; + toku_struct_stat st; + + block_size = toku_os_fstat(fd, &st) ? 512 : st.st_blksize; { toku_off_t header_0_off = 0; r0 = deserialize_ft_from_fd_into_rbuf( fd, + block_size, header_0_off, &rb_0, &checkpoint_count_0, &checkpoint_lsn_0, - &version_0 - ); + &version_0); if ((r0==0) && (checkpoint_lsn_0.lsn <= MAX_LSN.lsn)) { h0_acceptable = true; } @@ -151,12 +155,12 @@ deserialize_headers(int fd, struct ft **h1p, struct ft **h2p) toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE; r1 = deserialize_ft_from_fd_into_rbuf( fd, + block_size, header_1_off, &rb_1, &checkpoint_count_1, &checkpoint_lsn_1, - &version_1 - ); + &version_1); if ((r1==0) && (checkpoint_lsn_1.lsn <= MAX_LSN.lsn)) { h1_acceptable = true; } @@ -169,7 +173,7 @@ deserialize_headers(int fd, struct ft **h1p, struct ft **h2p) } if (h0_acceptable) { printf("Found dictionary header 1 with LSN %" PRIu64 "\n", checkpoint_lsn_0.lsn); - r = deserialize_ft_versioned(fd, &rb_0, h1p, version_0); + r = deserialize_ft_versioned(fd, block_size, &rb_0, h1p, version_0); if (r != 0) { printf("---Header Error----\n"); @@ -180,7 +184,7 @@ deserialize_headers(int fd, struct ft **h1p, struct ft **h2p) } if (h1_acceptable) { printf("Found dictionary header 2 with LSN %" PRIu64 "\n", checkpoint_lsn_1.lsn); - r = deserialize_ft_versioned(fd, &rb_1, h2p, version_1); + r = deserialize_ft_versioned(fd, block_size, &rb_1, h2p, version_1); if (r != 0) { printf("---Header Error----\n"); } @@ -221,7 +225,7 @@ check_old_node(FTNODE node, struct rbuf *rb, int version) // Read, decompress, and check the given block. static int -check_block(BLOCKNUM blocknum, int64_t UU(blocksize), int64_t UU(address), void *extra) +check_block(BLOCKNUM blocknum, int64_t blocksize, int64_t UU(address), void *extra) { int r = 0; int failure = 0; @@ -235,7 +239,7 @@ check_block(BLOCKNUM blocknum, int64_t UU(blocksize), int64_t UU(address), void // Let's read the block off of disk and fill a buffer with that // block. struct rbuf rb = RBUF_INITIALIZER; - read_block_from_fd_into_rbuf(fd, blocknum, ft, &rb); + read_block_from_fd_into_rbuf(fd, blocksize, blocknum, ft, &rb); // Allocate the node. FTNODE XMALLOC(node); diff --git a/tools/tokuftdump.cc b/tools/tokuftdump.cc index 2838ae518..93540f7af 100644 --- a/tools/tokuftdump.cc +++ b/tools/tokuftdump.cc @@ -159,7 +159,7 @@ static void open_header(int fd, FT *header, CACHEFILE cf) { FT ft = NULL; int r; const char *fn = toku_cachefile_fname_in_env(cf); - r = toku_deserialize_ft_from (fd, fn, MAX_LSN, &ft); + r = toku_deserialize_ft_from (fd, toku_cachefile_get_blocksize(cf), fn, MAX_LSN, &ft); if (r != 0) { fprintf(stderr, "%s: can not deserialize from %s error %d\n", arg0, fname, r); exit(1); @@ -212,8 +212,9 @@ static int getHeight(int fd, BLOCKNUM blocknum, FT ft){ FTNODE n; FTNODE_DISK_DATA ndd = nullptr; ftnode_fetch_extra bfe; + unsigned int block_size = toku_cachefile_get_blocksize(ft->cf); bfe.create_for_full_read(ft); - int r = toku_deserialize_ftnode_from (fd, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); + int r = toku_deserialize_ftnode_from (fd, block_size, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); assert_zero(r); assert(n!=0); return n->height; @@ -223,8 +224,9 @@ static FTNODE getNode(int fd, BLOCKNUM blocknum, FT ft) { FTNODE n; FTNODE_DISK_DATA ndd = nullptr; ftnode_fetch_extra bfe; + unsigned int block_size = toku_cachefile_get_blocksize(ft->cf); bfe.create_for_full_read(ft); - int r = toku_deserialize_ftnode_from (fd, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); + int r = toku_deserialize_ftnode_from (fd, block_size, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); assert_zero(r);; return n; } @@ -398,8 +400,9 @@ static void dump_node(int fd, BLOCKNUM blocknum, FT ft) { FTNODE n; FTNODE_DISK_DATA ndd = nullptr; ftnode_fetch_extra bfe; + unsigned int block_size = toku_cachefile_get_blocksize(ft->cf); bfe.create_for_full_read(ft); - int r = toku_deserialize_ftnode_from (fd, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); + int r = toku_deserialize_ftnode_from (fd, block_size, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); assert_zero(r); assert(n!=0); printf("ftnode\n"); @@ -547,7 +550,7 @@ static int nodesizes_helper(BLOCKNUM b, int64_t size, int64_t UU(address), void FTNODE_DISK_DATA ndd = NULL; ftnode_fetch_extra bfe; bfe.create_for_full_read(info->ft); - int r = toku_deserialize_ftnode_from(info->fd, b, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); + int r = toku_deserialize_ftnode_from(info->fd, info->blocksizes, b, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); if (r==0) { info->blocksizes += size; if (n->height == 0) { @@ -600,7 +603,7 @@ static int summary_helper(BLOCKNUM b, int64_t size, int64_t UU(address), void *e ftnode_fetch_extra bfe; bfe.create_for_full_read(info->ft); - int r = toku_deserialize_ftnode_from(info->fd, b, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); + int r = toku_deserialize_ftnode_from(info->fd, info->blocksizes, b, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); if (r==0) { info->blocksizes += size; @@ -988,6 +991,7 @@ static void writeTree(NMC *msgs[],int height){ static void FT_to_JSON(int fd, FT ft, CACHEFILE cf, const char * JsonFile){ toku_ft_free(ft); + assert(fd == toku_cachefile_get_fd(ft->cf)); open_header(fd, &ft, cf); int root=getRootNode(ft); BLOCKNUM off = make_blocknum(root);