Skip to content

Commit

Permalink
fix: revert back to old style dataset sharding
Browse files Browse the repository at this point in the history
  • Loading branch information
AshishKumar4 committed Sep 11, 2024
1 parent 9f4eee0 commit c3381df
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 9 deletions.
16 changes: 8 additions & 8 deletions flaxdiff/data/online_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,19 +86,17 @@ def default_feature_extractor(sample):


def map_sample(
sample,
url,
caption,
image_shape=(256, 256),
min_image_shape=(128, 128),
timeout=15,
retries=3,
upscale_interpolation=cv2.INTER_CUBIC,
downscale_interpolation=cv2.INTER_AREA,
image_processor=default_image_processor,
feature_extractor=default_feature_extractor,
):
try:
features = feature_extractor(sample)
url, caption = features["url"], features["caption"]
# Assuming fetch_single_image is defined elsewhere
image = fetch_single_image(url, timeout=timeout, retries=retries)
if image is None:
Expand Down Expand Up @@ -147,8 +145,10 @@ def map_batch(
downscale_interpolation=downscale_interpolation,
feature_extractor=feature_extractor
)
features = feature_extractor(batch)
url, caption = features["url"], features["caption"]
with ThreadPoolExecutor(max_workers=num_threads) as executor:
executor.map(map_sample_fn, batch)
executor.map(map_sample_fn, url, caption)
except Exception as e:
print(f"Error maping batch", e)
traceback.print_exc()
Expand Down Expand Up @@ -214,9 +214,9 @@ def parallel_image_loader(
iteration = 0
while True:
# Repeat forever
# shards = [dataset[i*shard_len:(i+1)*shard_len]
# for i in range(num_workers)]
shards = [dataset.shard(num_shards=num_workers, index=i) for i in range(num_workers)]
shards = [dataset[i*shard_len:(i+1)*shard_len]
for i in range(num_workers)]
# shards = [dataset.shard(num_shards=num_workers, index=i) for i in range(num_workers)]
print(f"mapping {len(shards)} shards")
pool.map(map_batch_fn, shards)
iteration += 1
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
setup(
name='flaxdiff',
packages=find_packages(),
version='0.1.35',
version='0.1.35.1',
description='A versatile and easy to understand Diffusion library',
long_description=open('README.md').read(),
long_description_content_type='text/markdown',
Expand Down

0 comments on commit c3381df

Please sign in to comment.