Skip to content

Commit

Permalink
Added DatasetVariationIndex
Browse files Browse the repository at this point in the history
  • Loading branch information
valsdav committed Feb 21, 2024
1 parent 3d7fedb commit 2e6a38e
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 47 deletions.
44 changes: 29 additions & 15 deletions order/adapters/das.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# coding: utf-8

from __future__ import annotations

import requests
from order.settings import Settings

__all__ = ["DASDatasetAdapter"]

Expand All @@ -13,17 +14,30 @@ class DASDatasetAdapter(Adapter):

name = "das_dataset"

def retrieve_data(self, *, keys: list[str]) -> Materialized:
if keys[0].startswith("/SCALE"):
return Materialized(n_events=1, n_files=1)
return Materialized(n_events=5_000_000, n_files=12)


class DASLFNsAdapter(Adapter):

name = "das_lfns"

def retrieve_data(self, *, keys: list[str]) -> Materialized:
if keys[0].startswith("/SCALE"):
return Materialized(lfns=["/SCALE/b/NANOAODSIM"])
return Materialized(lfns=["/a/b/NANOAODSIM"])
def retrieve_data(self, *, keys: list[str], dbs_instance: str = "prod/global") -> Materialized:
# Support list of keys since we may have datasets with extensions in stat
results = {}
for key in keys:
resource = f"https://cmsweb.cern.ch:8443/dbs/{dbs_instance}/DBSReader/files?dataset={key}&detail=True" # noqa
r = requests.get(
resource,
cert=Settings.instance().user_proxy,
verify=False,
)
results[key] = r.json()

out = {"n_files": 0,
"n_events": 0,
"lfns": [],
"file_size": 0}

for res in results.values():
for file in res:
out["n_files"] += 1
out["n_events"] += file["event_count"]
out["lfns"].append(file["logical_file_name"])
out["file_size"] += file["file_size"]

return Materialized(**out)


26 changes: 0 additions & 26 deletions order/adapters/dbs.py

This file was deleted.

27 changes: 22 additions & 5 deletions order/models/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@
from __future__ import annotations


__all__ = ["DatasetIndex", "Dataset", "LazyDataset", "DatasetVariation", "GenOrder"]
__all__ = ["DatasetIndex", "Dataset", "LazyDataset", "DatasetVariation","DatasetVariationIndex", "GenOrder"]


import enum

from pydantic import Field, field_validator

from order.types import (
Union, List, Dict, NonEmptyStrictStr, PositiveStrictInt, Lazy, ClassVar, Any,
Union, List, Dict, NonEmptyStrictStr, PositiveStrictInt,
PositiveStrictFloat, Lazy, ClassVar, Any,
)
# from order.util import validated
from order.models.base import Model, AdapterModel
Expand All @@ -23,7 +24,13 @@ class DatasetIndex(UniqueObjectIndex):
class_name: NonEmptyStrictStr = Field(default="Dataset", frozen=True)
objects: Lazy[List[Union["LazyDataset", "Dataset"]]] = Field(default_factory=list, repr=False)

class DatasetVariationIndex(UniqueObjectIndex):

class_name: NonEmptyStrictStr = Field(default="DatasetVariation", frozen=True)
# This may become Lazy if we read the dataset variations from somewhere else
objects: Lazy[List[Union["LazyDataset", "DatasetVariation"]]] = Field(default_factory=list, repr=False)


class LazyDataset(LazyUniqueObject):

class_name: NonEmptyStrictStr = Field(default="Dataset", frozen=True)
Expand Down Expand Up @@ -65,14 +72,17 @@ def __str__(self) -> str:
return self.value


class DatasetVariation(Model):

class DatasetVariation(UniqueObject):
keys: List[NonEmptyStrictStr] = Field(frozen=True)
gen_order: NonEmptyStrictStr = Field(default=str(GenOrder.unknown))
n_files: Lazy[PositiveStrictInt]
n_events: Lazy[PositiveStrictInt]
file_size: Lazy[PositiveStrictInt]
lfns: Lazy[List[NonEmptyStrictStr]]

lazy_cls: ClassVar[UniqueObjectBase] = LazyDataset

@field_validator("gen_order", mode="after")
@classmethod
def validate_gen_order(cls, gen_order: str) -> str:
Expand All @@ -85,7 +95,7 @@ def validate_gen_order(cls, gen_order: str) -> str:
class Dataset(UniqueObject):

campaign: Lazy["Campaign"]
variations: Dict[str, DatasetVariation] = Field(frozen=True)
variations: DatasetVariationIndex = Field(default_factory=DatasetVariationIndex, frozen=True)

lazy_cls: ClassVar[UniqueObjectBase] = LazyDataset

Expand Down Expand Up @@ -127,10 +137,17 @@ def n_events(self) -> int:
def lfns(self) -> list[NonEmptyStrictStr]:
return self.variations["nominal"].lfns

@property
def file_size(self) -> int:
return self.variations["nominal"].file_size



# trailing imports
from order.models.campaign import Campaign

# rebuild models that contained forward type declarations
DatasetIndex.model_rebuild()
DatasetVariation.model_rebuild()
DatasetVariationIndex.model_rebuild()
Dataset.model_rebuild()
4 changes: 3 additions & 1 deletion order/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@
from pydantic.fields import FieldInfo # noqa


#: Strict positive integer.
# Strict positive integer.
PositiveStrictInt = Annotated[StrictInt, Ge(0)]
# Strict positive float.
PositiveStrictFloat = Annotated[StrictFloat, Ge(0)]

#: Strict non-empty string.
NonEmptyStrictStr = Annotated[StrictStr, Len(min_length=1)]
Expand Down

0 comments on commit 2e6a38e

Please sign in to comment.