emscripten-core · arsnyder16 · Jul 29, 2025 · Jul 29, 2025 · Jul 29, 2025 · Jul 31, 2025
diff --git a/test/test_other.py b/test/test_other.py
@@ -344,6 +344,12 @@ def run_on_pty(self, cmd):
       os.close(master)
       os.close(slave)
 
+  def create_huge_file(self, name, length):
+    f = open(name,"wb")
+    f.seek(length - 1)
+    f.write(b"\0")
+    f.close()
+
   # Test that running `emcc -v` always works even in the presence of `EMCC_CFLAGS`.
   # This needs to work because many tools run `emcc -v` internally and it should
   # always work even if the user has `EMCC_CFLAGS` set.
@@ -6381,14 +6387,164 @@ def test_underscore_exit(self):
 
   def test_file_packager_huge(self):
     MESSAGE = 'warning: file packager is creating an asset bundle of 257 MB. this is very large, and browsers might have trouble loading it'
-    create_file('huge.dat', 'a' * (1024 * 1024 * 257))
+    self.create_huge_file('huge.dat', 1024 * 1024 * 257)
     create_file('tiny.dat', 'a')
     err = self.run_process([FILE_PACKAGER, 'test.data', '--preload', 'tiny.dat'], stdout=PIPE, stderr=PIPE).stderr
     self.assertNotContained(MESSAGE, err)
     err = self.run_process([FILE_PACKAGER, 'test.data', '--preload', 'huge.dat'], stdout=PIPE, stderr=PIPE).stderr
     self.assertContained(MESSAGE, err)
     self.clear()
 
+  def test_file_packager_huge_no_split(self):
+    # Verify that when packaging up to 2046MB of data, file packager should not split up the generated data file.
+    for i in range(7):
+      self.create_huge_file(f'huge{i}.dat', 1024 * 1024 * 256)
+    self.create_huge_file('huge7.dat', 1024 * 1024 * 254)
+    result = self.run_process([FILE_PACKAGER, 'test.data', '--preload', 'huge7.dat'] + [f'huge{i}.dat' for i in range(7)], stdout=PIPE, stderr=PIPE)
+    self.assertContained('warning: file packager is creating an asset bundle of 2046 MB. this is very large, and browsers might have trouble loading it', result.stderr)
+    self.assertExists('test.data')
+    self.assertNotExists('test_1.data')
+    self.assertEqual(os.path.getsize('test.data'), (1024 * 1024 * 1024) + (1022 * 1024 * 1024))
+    create_file('load.js', result.stdout)
+    create_file('src.c', r'''
+    #include <assert.h>
+    #include <sys/stat.h>
+    #include <stdio.h>
+
+    int main() {
+      struct stat buf;
+      assert(stat("huge0.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge1.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge2.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge3.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge4.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge5.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge6.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge7.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 254);
+      printf("done\n");
+      return 0;
+    }
+    ''')
+    self.do_runf('src.c', cflags=['--pre-js=load.js', '-sFORCE_FILESYSTEM'])
+    self.clear()
+
+  def test_file_packager_huge_split(self):
+    # Verify that when size exceeds 2046MB, file packager should split up the generated data file into two.
+    for i in range(7):
+      self.create_huge_file(f'huge{i}.dat', 1024 * 1024 * 256)
+    self.create_huge_file('huge7.dat', (1024 * 1024 * 254) + 1)
+    result = self.run_process([FILE_PACKAGER, 'test.data', '--preload', 'huge7.dat'] + [f'huge{i}.dat' for i in range(7)], stdout=PIPE, stderr=PIPE)
+    self.assertContained('warning: file packager is creating an asset bundle of 1792 MB. this is very large, and browsers might have trouble loading it', result.stderr)
+    self.assertContained('warning: file packager is splitting bundle into 2 chunks', result.stderr)
+    self.assertExists('test.data')
+    self.assertExists('test_1.data')
+    self.assertEqual(os.path.getsize('test.data'), (1024 * 1024 * 256) * 7)
+    self.assertEqual(os.path.getsize('test_1.data'), (1024 * 1024 * 254) + 1)
+    create_file('load.js', result.stdout)
+    create_file('src.c', r'''
+    #include <assert.h>
+    #include <sys/stat.h>
+    #include <stdio.h>
+
+    int main() {
+      struct stat buf;
+      assert(stat("huge0.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge1.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge2.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge3.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge4.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge5.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge6.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge7.dat", &buf) == 0);
+      assert(buf.st_size == (1024 * 1024 * 254) + 1);
+      printf("done\n");
+      return 0;
+    }
+    ''')
+    self.do_runf('src.c', cflags=['--pre-js=load.js', '-sFORCE_FILESYSTEM'])
+    self.clear()
+
+  def test_file_packager_huge_split_metadata(self):
+    # Verify that when size exceeds 2046MB, file packager should split up the generated data and metadata file into two.
+    large_size = 1024 * 1024 * 256
+    final_size = (1024 * 1024 * 254) + 1
+    for i in range(7):
+      self.create_huge_file(f'huge{i}.dat', large_size)
+    self.create_huge_file('huge7.dat', final_size)
+    err = self.run_process([FILE_PACKAGER, 'test.data', '--separate-metadata', '--js-output=immutable.js', '--preload', 'huge7.dat'] + [f'huge{i}.dat' for i in range(7)], stdout=PIPE, stderr=PIPE).stderr
+    self.assertContained('warning: file packager is creating an asset bundle of 1792 MB. this is very large, and browsers might have trouble loading it', err)
+    self.assertContained('warning: file packager is splitting bundle into 2 chunks', err)
+    self.assertExists('test.data')
+    self.assertExists('immutable.js')
+    self.assertExists('immutable_1.js')
+    self.assertExists('test_1.data')
+    self.assertEqual(os.path.getsize('test.data'), (1024 * 1024 * 256) * 7)
+    self.assertEqual(os.path.getsize('test_1.data'), (1024 * 1024 * 254) + 1)
+
+    self.assertExists('immutable.js.metadata')
+    metadata = json.loads(read_file('immutable.js.metadata'))
+    self.assertEqual(len(metadata['files']), 7)
+    for i in range(7):
+      assert metadata['files'][i]['start'] == i * large_size and metadata['files'][i]['end'] == (i * large_size) + large_size and metadata['files'][i]['filename'] == f'/huge{i}.dat'
+    assert metadata['remote_package_size'] == 7 * large_size
+
+    self.assertExists('immutable_1.js.metadata')
+    metadata = json.loads(read_file('immutable_1.js.metadata'))
+    self.assertEqual(len(metadata['files']), 1)
+    assert metadata['files'][0]['start'] == 0 and metadata['files'][0]['end'] == final_size and metadata['files'][0]['filename'] == '/huge7.dat'
+    assert metadata['remote_package_size'] == final_size
+    create_file('src.c', r'''
+    #include <assert.h>
+    #include <sys/stat.h>
+    #include <stdio.h>
+
+    int main() {
+      struct stat buf;
+      assert(stat("huge0.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge1.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge2.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge3.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge4.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge5.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge6.dat", &buf) == 0);
+      assert(buf.st_size == 1024 * 1024 * 256);
+      assert(stat("huge7.dat", &buf) == 0);
+      assert(buf.st_size == (1024 * 1024 * 254) + 1);
+      printf("done\n");
+      return 0;
+    }
+    ''')
+    self.do_runf('src.c', cflags=['--pre-js=immutable.js', '--pre-js=immutable_1.js', '-sFORCE_FILESYSTEM'])
+    self.clear()
+
+  def test_file_packager_huge_split_too_large(self):
+    self.create_huge_file('huge.dat', (1024 * 1024 * 1024) + ((1022 * 1024 * 1024) + 1))
+    proc = self.run_process([FILE_PACKAGER, 'test.data', '--preload', 'huge.dat'], check=False, stdout=PIPE, stderr=PIPE)
+    self.assertEqual(proc.returncode, 1)
+    self.assertContained('error: cannot package file huge.dat, which is larger than maximum individual file size limit 2046 MB', proc.stderr)
+    self.clear()
+
   @parameterized({
     '': (True,),
     'wasm2js': (False,),

diff --git a/tools/file_packager.py b/tools/file_packager.py
@@ -88,6 +88,9 @@
 
 DEBUG = os.environ.get('EMCC_DEBUG')
 
+# chrome limit is 2MB under 2Gi
+PRELOAD_DATA_FILE_LIMIT = 2046 * 1024 * 1024
+
 excluded_patterns: list[str] = []
 new_data_files = []
 walked = []
@@ -537,13 +540,53 @@ def was_seen(name):
   data_files = sorted(data_files, key=lambda file_: file_.dstpath)
   data_files = [file_ for file_ in data_files if not was_seen(file_.dstpath)]
 
+  targets = []
+  if options.obj_output:
+    if not options.has_embedded:
+      diagnostics.error('--obj-output is only applicable when embedding files')
+    targets.append(options.obj_output)
+    generate_object_file(data_files)
+  else:
+    file_chunks = [data_files]
+    file_chunks = [[]]
+    current_size = 0
+    for file_ in data_files:
+      fsize = os.path.getsize(file_.srcpath)
+      if current_size + fsize <= PRELOAD_DATA_FILE_LIMIT:
+        file_chunks[-1].append(file_)
+        current_size += fsize
+      elif fsize > PRELOAD_DATA_FILE_LIMIT:
+        diagnostics.error('cannot package file %s, which is larger than maximum individual file size limit %d MB.' % (file_.srcpath, (PRELOAD_DATA_FILE_LIMIT / (1024 * 1024))))
+        return 1
+      else:
+        current_size = fsize
+        file_chunks.append([file_])
+
+    if len(file_chunks) > 1:
+      diagnostics.warn('warning: file packager is splitting bundle into %d chunks' % len(file_chunks))
+
+    for counter, data_files in enumerate(file_chunks):
+      metadata = {'files': []}
+
+      def construct_data_file_name(base,ext):
+        return f"{base}{f'_{counter}' if counter else ''}.{ext}"
+      data_file = construct_data_file_name(*data_target.rsplit('.', 1))
+      js_file = None if options.jsoutput is None else construct_data_file_name(*options.jsoutput.rsplit('.', 1))
+      targets.append(data_file)
+      ret = generate_preload_js(data_file, data_files, metadata, js_file)
+      if options.force or len(data_files):
+        if options.jsoutput is None:
+          print(ret)
+        else:
+          # Overwrite the old jsoutput file (if exists) only when its content
+          # differs from the current generated one, otherwise leave the file
+          # untouched preserving its old timestamp
+          targets.append(js_file)
+          if ret != (utils.read_file(js_file) if os.path.isfile(js_file) else ''):
+            utils.write_file(js_file, ret)
+          if options.separate_metadata:
+            utils.write_file(js_file + '.metadata', json.dumps(metadata, separators=(',', ':')))
   if options.depfile:
-    targets = []
-    if options.obj_output:
-      targets.append(options.obj_output)
-    if options.jsoutput:
-      targets.append(data_target)
-      targets.append(options.jsoutput)
     with open(options.depfile, 'w') as f:
       for target in targets:
         if target:
@@ -554,31 +597,6 @@ def was_seen(name):
         f.write(escape_for_makefile(dependency))
         f.write(' \\\n')
 
-  if options.obj_output:
-    if not options.has_embedded:
-      diagnostics.error('--obj-output is only applicable when embedding files')
-    generate_object_file(data_files)
-  else:
-    metadata = {'files': []}
-
-    ret = generate_preload_js(data_target, data_files, metadata)
-
-    if options.force or data_files:
-      if options.jsoutput is None:
-        print(ret)
-      else:
-        # Overwrite the old jsoutput file (if exists) only when its content
-        # differs from the current generated one, otherwise leave the file
-        # untouched preserving its old timestamp
-        if os.path.isfile(options.jsoutput):
-          old = utils.read_file(options.jsoutput)
-          if old != ret:
-            utils.write_file(options.jsoutput, ret)
-        else:
-          utils.write_file(options.jsoutput, ret)
-        if options.separate_metadata:
-          utils.write_file(options.jsoutput + '.metadata', json.dumps(metadata, separators=(',', ':')))
-
   return 0
 
 
@@ -590,7 +608,7 @@ def escape_for_makefile(fpath):
   return fpath.replace('$', '$$').replace('#', '\\#').replace(' ', '\\ ')
 
 
-def generate_preload_js(data_target, data_files, metadata):
+def generate_preload_js(data_target, data_files, metadata, js_file):
   # emcc will add this to the output itself, so it is only needed for
   # standalone calls
   if options.from_emcc:
@@ -1077,7 +1095,7 @@ def generate_preload_js(data_target, data_files, metadata):
   } else {
     if (!Module['preRun']) Module['preRun'] = [];
     Module['preRun'].push(runMetaWithFS);
-  }\n''' % {'node_support_code': node_support_code, 'metadata_file': os.path.basename(options.jsoutput + '.metadata')}
+  }\n''' % {'node_support_code': node_support_code, 'metadata_file': os.path.basename(js_file + '.metadata')}
   else:
     ret += '''
     }