Skip to content

Commit

Permalink
move _manifests to manifest.py
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinjqliu committed Sep 20, 2024
1 parent 7d82252 commit 13fe7d6
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 14 deletions.
10 changes: 10 additions & 0 deletions pyiceberg/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,12 @@
List,
Literal,
Optional,
Tuple,
Type,
)

from cachetools import LRUCache, cached
from cachetools.keys import hashkey
from pydantic_core import to_json

from pyiceberg.avro.file import AvroFile, AvroOutputFile
Expand Down Expand Up @@ -620,6 +623,13 @@ def fetch_manifest_entry(self, io: FileIO, discard_deleted: bool = True) -> List
]


@cached(cache=LRUCache(maxsize=128), key=lambda io, manifest_list: hashkey(manifest_list))
def _manifests(io: FileIO, manifest_list: str) -> Tuple[ManifestFile, ...]:
"""Read and cache manifests from the given manifest list, returning a tuple to prevent modification."""
file = io.new_input(manifest_list)
return tuple(read_manifest_list(file))


def read_manifest_list(input_file: InputFile) -> Iterator[ManifestFile]:
"""
Read the manifests from the manifest list.
Expand Down
13 changes: 2 additions & 11 deletions pyiceberg/table/snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,12 @@
import time
from collections import defaultdict
from enum import Enum
from typing import TYPE_CHECKING, Any, DefaultDict, Dict, Iterable, List, Mapping, Optional, Tuple
from typing import TYPE_CHECKING, Any, DefaultDict, Dict, Iterable, List, Mapping, Optional

from cachetools import LRUCache, cached
from cachetools.keys import hashkey
from pydantic import Field, PrivateAttr, model_serializer

from pyiceberg.io import FileIO
from pyiceberg.manifest import DataFile, DataFileContent, ManifestFile, read_manifest_list
from pyiceberg.manifest import DataFile, DataFileContent, ManifestFile, _manifests
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
from pyiceberg.schema import Schema

Expand Down Expand Up @@ -232,13 +230,6 @@ def __eq__(self, other: Any) -> bool:
)


@cached(cache=LRUCache(maxsize=128), key=lambda io, manifest_list: hashkey(manifest_list))
def _manifests(io: FileIO, manifest_list: str) -> Tuple[ManifestFile, ...]:
"""Read and cache manifests from the given manifest list, returning a tuple to prevent modification."""
file = io.new_input(manifest_list)
return tuple(read_manifest_list(file))


class Snapshot(IcebergBaseModel):
snapshot_id: int = Field(alias="snapshot-id")
parent_snapshot_id: Optional[int] = Field(alias="parent-snapshot-id", default=None)
Expand Down
6 changes: 3 additions & 3 deletions tests/utils/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,14 @@
ManifestEntryStatus,
ManifestFile,
PartitionFieldSummary,
_manifests,
read_manifest_list,
write_manifest,
write_manifest_list,
)
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionField, PartitionSpec
from pyiceberg.schema import Schema
from pyiceberg.table.snapshots import Operation, Snapshot, Summary, _manifests
from pyiceberg.table.snapshots import Operation, Snapshot, Summary
from pyiceberg.transforms import IdentityTransform
from pyiceberg.typedef import Record, TableVersion
from pyiceberg.types import IntegerType, NestedField
Expand Down Expand Up @@ -314,8 +315,7 @@ def test_read_manifest_v2(generated_manifest_file_file_v2: str) -> None:


def test_read_manifest_cache(generated_manifest_file_file_v2: str) -> None:
# Mock the read_manifest_list function relative to the module path
with patch("pyiceberg.table.snapshots.read_manifest_list") as mocked_read_manifest_list:
with patch("pyiceberg.manifest.read_manifest_list") as mocked_read_manifest_list:
io = load_file_io()

snapshot = Snapshot(
Expand Down

0 comments on commit 13fe7d6

Please sign in to comment.