FlatBuffers 64 for C++ (#7935)

* First working hack of adding 64-bit. Don't judge :) * Made vector_downward work on 64 bit types * vector_downward uses size_t, added offset64 to reflection * cleaned up adding offset64 in parser * Add C++ testing skeleton for 64-bit * working test for CreateVector64 * working >2 GiB buffers * support for large strings * simplified CreateString<> to just provide the offset type * generalize CreateVector template * update test_64.afb due to upstream format change * Added Vector64 type, which is just an alias for vector ATM * Switch to Offset64 for Vector64 * Update for reflection bfbs output change * Starting to add support for vector64 type in C++ * made a generic CreateVector that can handle different offsets and vector types * Support for 32-vector with 64-addressing * Vector64 basic builder + tests working * basic support for json vector64 support * renamed fields in test_64bit.fbs to better reflect their use * working C++ vector64 builder * Apply --annotate-sparse-vector to 64-bit tests * Enable Vector64 for --annotate-sparse-vectors * Merged from upstream * Add `near_string` field for testing 32-bit offsets alongside * keep track of where the 32-bit and 64-bit regions are for flatbufferbuilder * move template<> outside class body for GCC * update run.sh to build and run tests * basic assertion for adding 64-bit offset at the wrong time * started to separate `FlatBufferBuilder` into two classes, 1 64-bit aware, the other not * add test for nested flatbuffer vector64, fix bug in alignment of big vectors * fixed CreateDirect method by iterating by Offset64 first * internal refactoring of flatbufferbuilder * block not supported languages in the parser from using 64-bit * evolution tests for adding a vector64 field * conformity tests for adding/removing offset64 attributes * ensure test is for a big buffer * add parser error tests for `offset64` and `vector64` attributes * add missing static that GCC only complains about * remove stdint-uintn.h header that gets automatically added * move 64-bit CalculateOffset internal * fixed return size of EndVector * various fixes on windows * add SizeT to vector_downward * minimze range of size changes in vector and builder * reworked how tracking if 64-offsets are added * Add ReturnT to EndVector * small cleanups * remove need for second Array definition * combine IndirectHelpers into one definition * started support for vector of struct * Support for 32/64-vectors of structs + Offset64 * small cleanups * add verification for vector64 * add sized prefix for 64-bit buffers * add fuzzer for 64-bit * add example of adding many vectors using a wrapper table * run the new -bfbs-gen-embed logic on the 64-bit tests * remove run.sh and fix cmakelist issue * fixed bazel rules * fixed some PR comments * add 64-bit tests to cmakelist
google · May 9, 2023 · 63b7b25 · 63b7b25
1 parent 13fc75c
commit 63b7b25
Show file tree

Hide file tree

Showing 49 changed files with 3,270 additions and 525 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -234,6 +234,8 @@ set(FlatBuffers_Tests_SRCS
   tests/native_type_test_impl.cpp
   tests/alignment_test.h
   tests/alignment_test.cpp
+  tests/64bit/offset64_test.h
+  tests/64bit/offset64_test.cpp
   include/flatbuffers/code_generators.h
   src/code_generators.cpp
 )
@@ -527,6 +529,9 @@ if(FLATBUFFERS_BUILD_TESTS)
   compile_schema_for_test(tests/native_inline_table_test.fbs "${FLATC_OPT_COMP}")
   compile_schema_for_test(tests/native_type_test.fbs "${FLATC_OPT}")
   compile_schema_for_test(tests/key_field/key_field_sample.fbs "${FLATC_OPT_COMP}")
+  compile_schema_for_test(tests/64bit/test_64bit.fbs "${FLATC_OPT_COMP};--bfbs-gen-embed")
+  compile_schema_for_test(tests/64bit/evolution/v1.fbs "${FLATC_OPT_COMP}")
+  compile_schema_for_test(tests/64bit/evolution/v2.fbs "${FLATC_OPT_COMP}")
 
   if(FLATBUFFERS_CODE_SANITIZE)
     add_fsanitize_to_target(flattests ${FLATBUFFERS_CODE_SANITIZE})

diff --git a/include/flatbuffers/array.h b/include/flatbuffers/array.h
@@ -17,6 +17,7 @@
 #ifndef FLATBUFFERS_ARRAY_H_
 #define FLATBUFFERS_ARRAY_H_
 
+#include <cstdint>
 #include <memory>
 
 #include "flatbuffers/base.h"
@@ -37,7 +38,7 @@ template<typename T, uint16_t length> class Array {
  public:
   typedef uint16_t size_type;
   typedef typename IndirectHelper<IndirectHelperType>::return_type return_type;
-  typedef VectorConstIterator<T, return_type> const_iterator;
+  typedef VectorConstIterator<T, return_type, uoffset_t> const_iterator;
   typedef VectorReverseIterator<const_iterator> const_reverse_iterator;
 
   // If T is a LE-scalar or a struct (!scalar_tag::value).
@@ -158,11 +159,13 @@ template<typename T, uint16_t length> class Array {
 
 // Specialization for Array[struct] with access using Offset<void> pointer.
 // This specialization used by idl_gen_text.cpp.
-template<typename T, uint16_t length> class Array<Offset<T>, length> {
+template<typename T, uint16_t length, template<typename> class OffsetT>
+class Array<OffsetT<T>, length> {
   static_assert(flatbuffers::is_same<T, void>::value, "unexpected type T");
 
  public:
   typedef const void *return_type;
+  typedef uint16_t size_type;
 
   const uint8_t *Data() const { return data_; }
 

diff --git a/include/flatbuffers/base.h b/include/flatbuffers/base.h
@@ -43,6 +43,7 @@
 #include <vector>
 #include <set>
 #include <algorithm>
+#include <limits>
 #include <iterator>
 #include <memory>
 
@@ -323,9 +324,11 @@ namespace flatbuffers {
 // Also, using a consistent offset type maintains compatibility of serialized
 // offset values between 32bit and 64bit systems.
 typedef uint32_t uoffset_t;
+typedef uint64_t uoffset64_t;
 
 // Signed offsets for references that can go in both directions.
 typedef int32_t soffset_t;
+typedef int64_t soffset64_t;
 
 // Offset/index used in v-tables, can be changed to uint8_t in
 // format forks to save a bit of space if desired.
@@ -334,7 +337,8 @@ typedef uint16_t voffset_t;
 typedef uintmax_t largest_scalar_t;
 
 // In 32bits, this evaluates to 2GB - 1
-#define FLATBUFFERS_MAX_BUFFER_SIZE ((1ULL << (sizeof(::flatbuffers::soffset_t) * 8 - 1)) - 1)
+#define FLATBUFFERS_MAX_BUFFER_SIZE std::numeric_limits<::flatbuffers::soffset_t>::max()
+#define FLATBUFFERS_MAX_64_BUFFER_SIZE std::numeric_limits<::flatbuffers::soffset64_t>::max()
 
 // The minimum size buffer that can be a valid flatbuffer.
 // Includes the offset to the root table (uoffset_t), the offset to the vtable

diff --git a/include/flatbuffers/buffer.h b/include/flatbuffers/buffer.h
@@ -25,14 +25,33 @@ namespace flatbuffers {
 
 // Wrapper for uoffset_t to allow safe template specialization.
 // Value is allowed to be 0 to indicate a null object (see e.g. AddOffset).
-template<typename T> struct Offset {
-  uoffset_t o;
+template<typename T = void> struct Offset {
+  // The type of offset to use.
+  typedef uoffset_t offset_type;
+
+  offset_type o;
   Offset() : o(0) {}
-  Offset(uoffset_t _o) : o(_o) {}
-  Offset<void> Union() const { return Offset<void>(o); }
+  Offset(const offset_type _o) : o(_o) {}
+  Offset<> Union() const { return o; }
+  bool IsNull() const { return !o; }
+};
+
+// Wrapper for uoffset64_t Offsets.
+template<typename T = void> struct Offset64 {
+  // The type of offset to use.
+  typedef uoffset64_t offset_type;
+
+  offset_type o;
+  Offset64() : o(0) {}
+  Offset64(const offset_type offset) : o(offset) {}
+  Offset64<> Union() const { return o; }
   bool IsNull() const { return !o; }
 };
 
+// Litmus check for ensuring the Offsets are the expected size.
+static_assert(sizeof(Offset<>) == 4, "Offset has wrong size");
+static_assert(sizeof(Offset64<>) == 8, "Offset64 has wrong size");
+
 inline void EndianCheck() {
   int endiantest = 1;
   // If this fails, see FLATBUFFERS_LITTLEENDIAN above.
@@ -75,35 +94,59 @@ template<typename T> struct IndirectHelper {
   typedef T return_type;
   typedef T mutable_return_type;
   static const size_t element_stride = sizeof(T);
-  static return_type Read(const uint8_t *p, uoffset_t i) {
+
+  static return_type Read(const uint8_t *p, const size_t i) {
     return EndianScalar((reinterpret_cast<const T *>(p))[i]);
   }
-  static return_type Read(uint8_t *p, uoffset_t i) {
-    return Read(const_cast<const uint8_t *>(p), i);
+  static mutable_return_type Read(uint8_t *p, const size_t i) {
+    return reinterpret_cast<mutable_return_type>(
+        Read(const_cast<const uint8_t *>(p), i));
   }
 };
-template<typename T> struct IndirectHelper<Offset<T>> {
+
+// For vector of Offsets.
+template<typename T, template<typename> class OffsetT>
+struct IndirectHelper<OffsetT<T>> {
   typedef const T *return_type;
   typedef T *mutable_return_type;
-  static const size_t element_stride = sizeof(uoffset_t);
-  static return_type Read(const uint8_t *p, uoffset_t i) {
-    p += i * sizeof(uoffset_t);
-    return reinterpret_cast<return_type>(p + ReadScalar<uoffset_t>(p));
+  typedef typename OffsetT<T>::offset_type offset_type;
+  static const offset_type element_stride = sizeof(offset_type);
+
+  static return_type Read(const uint8_t *const p, const offset_type i) {
+    // Offsets are relative to themselves, so first update the pointer to
+    // point to the offset location.
+    const uint8_t *const offset_location = p + i * element_stride;
+
+    // Then read the scalar value of the offset (which may be 32 or 64-bits) and
+    // then determine the relative location from the offset location.
+    return reinterpret_cast<return_type>(
+        offset_location + ReadScalar<offset_type>(offset_location));
   }
-  static mutable_return_type Read(uint8_t *p, uoffset_t i) {
-    p += i * sizeof(uoffset_t);
-    return reinterpret_cast<mutable_return_type>(p + ReadScalar<uoffset_t>(p));
+  static mutable_return_type Read(uint8_t *const p, const offset_type i) {
+    // Offsets are relative to themselves, so first update the pointer to
+    // point to the offset location.
+    uint8_t *const offset_location = p + i * element_stride;
+
+    // Then read the scalar value of the offset (which may be 32 or 64-bits) and
+    // then determine the relative location from the offset location.
+    return reinterpret_cast<mutable_return_type>(
+        offset_location + ReadScalar<offset_type>(offset_location));
   }
 };
+
+// For vector of structs.
 template<typename T> struct IndirectHelper<const T *> {
   typedef const T *return_type;
   typedef T *mutable_return_type;
   static const size_t element_stride = sizeof(T);
-  static return_type Read(const uint8_t *p, uoffset_t i) {
-    return reinterpret_cast<return_type>(p + i * sizeof(T));
+
+  static return_type Read(const uint8_t *const p, const size_t i) {
+    // Structs are stored inline, relative to the first struct pointer.
+    return reinterpret_cast<return_type>(p + i * element_stride);
   }
-  static mutable_return_type Read(uint8_t *p, uoffset_t i) {
-    return reinterpret_cast<mutable_return_type>(p + i * sizeof(T));
+  static mutable_return_type Read(uint8_t *const p, const size_t i) {
+    // Structs are stored inline, relative to the first struct pointer.
+    return reinterpret_cast<mutable_return_type>(p + i * element_stride);
   }
 };
 
@@ -130,23 +173,25 @@ inline bool BufferHasIdentifier(const void *buf, const char *identifier,
 /// @cond FLATBUFFERS_INTERNAL
 // Helpers to get a typed pointer to the root object contained in the buffer.
 template<typename T> T *GetMutableRoot(void *buf) {
+  if (!buf) return nullptr;
   EndianCheck();
   return reinterpret_cast<T *>(
       reinterpret_cast<uint8_t *>(buf) +
       EndianScalar(*reinterpret_cast<uoffset_t *>(buf)));
 }
 
-template<typename T> T *GetMutableSizePrefixedRoot(void *buf) {
-  return GetMutableRoot<T>(reinterpret_cast<uint8_t *>(buf) +
-                           sizeof(uoffset_t));
+template<typename T, typename SizeT = uoffset_t>
+T *GetMutableSizePrefixedRoot(void *buf) {
+  return GetMutableRoot<T>(reinterpret_cast<uint8_t *>(buf) + sizeof(SizeT));
 }
 
 template<typename T> const T *GetRoot(const void *buf) {
   return GetMutableRoot<T>(const_cast<void *>(buf));
 }
 
-template<typename T> const T *GetSizePrefixedRoot(const void *buf) {
-  return GetRoot<T>(reinterpret_cast<const uint8_t *>(buf) + sizeof(uoffset_t));
+template<typename T, typename SizeT = uoffset_t>
+const T *GetSizePrefixedRoot(const void *buf) {
+  return GetRoot<T>(reinterpret_cast<const uint8_t *>(buf) + sizeof(SizeT));
 }
 
 }  // namespace flatbuffers