apache · sungwy · Sep 24, 2024 · Sep 20, 2024 · Sep 20, 2024 · Sep 20, 2024
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py
@@ -28,9 +28,12 @@
     List,
     Literal,
     Optional,
+    Tuple,
     Type,
 )
 
+from cachetools import LRUCache, cached
+from cachetools.keys import hashkey
 from pydantic_core import to_json
 
 from pyiceberg.avro.file import AvroFile, AvroOutputFile
@@ -620,6 +623,13 @@ def fetch_manifest_entry(self, io: FileIO, discard_deleted: bool = True) -> List
             ]
 
 
+@cached(cache=LRUCache(maxsize=128), key=lambda io, manifest_list: hashkey(manifest_list))
+def _manifests(io: FileIO, manifest_list: str) -> Tuple[ManifestFile, ...]:
+    """Read and cache manifests from the given manifest list, returning a tuple to prevent modification."""
+    file = io.new_input(manifest_list)
+    return tuple(read_manifest_list(file))
+
+
 def read_manifest_list(input_file: InputFile) -> Iterator[ManifestFile]:
     """
     Read the manifests from the manifest list.

diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py
@@ -19,13 +19,12 @@
 import time
 from collections import defaultdict
 from enum import Enum
-from functools import lru_cache
 from typing import TYPE_CHECKING, Any, DefaultDict, Dict, Iterable, List, Mapping, Optional
 
 from pydantic import Field, PrivateAttr, model_serializer
 
 from pyiceberg.io import FileIO
-from pyiceberg.manifest import DataFile, DataFileContent, ManifestFile, read_manifest_list
+from pyiceberg.manifest import DataFile, DataFileContent, ManifestFile, _manifests
 from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
 from pyiceberg.schema import Schema
 
@@ -231,13 +230,6 @@ def __eq__(self, other: Any) -> bool:
         )
 
 
-@lru_cache
-def _manifests(io: FileIO, manifest_list: str) -> List[ManifestFile]:
-    """Return the manifests from the manifest list."""
-    file = io.new_input(manifest_list)
-    return list(read_manifest_list(file))
-
-
 class Snapshot(IcebergBaseModel):
     snapshot_id: int = Field(alias="snapshot-id")
     parent_snapshot_id: Optional[int] = Field(alias="parent-snapshot-id", default=None)
@@ -260,7 +252,7 @@ def __str__(self) -> str:
     def manifests(self, io: FileIO) -> List[ManifestFile]:
         """Return the manifests for the given snapshot."""
         if self.manifest_list:
-            return _manifests(io, self.manifest_list)
+            return list(_manifests(io, self.manifest_list))
         return []