Skip to content

Commit

Permalink
Revert "Update users/berger"
Browse files Browse the repository at this point in the history
This reverts commit bbe56ae.
  • Loading branch information
Simon Berger committed Mar 18, 2024
1 parent a6df1f5 commit b4b6ec3
Show file tree
Hide file tree
Showing 19 changed files with 130 additions and 1,294 deletions.
11 changes: 4 additions & 7 deletions common/baselines/librispeech/default_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,14 @@
version listed here. Nevertheless, the most recent "head" should be safe to be used as well
"""
from i6_experiments.common.tools.rasr import compile_rasr_binaries_apptainer
from sisyphus import tk
from i6_experiments.common.tools.audio import compile_ffmpeg_binary
from i6_experiments.common.tools.rasr import compile_rasr_binaries_i6mode
from i6_experiments.common.tools.sctk import compile_sctk

# RASR_BINARY_PATH = None
# RASR_BINARY_PATH = compile_rasr_binaries_i6mode(commit="907eec4f4e36c11153f6ab6b5dd7675116f909f6") # use tested RASR
# RASR_BINARY_PATH = compile_rasr_binaries_i6mode() # use most recent RASR
RASR_BINARY_PATH = compile_rasr_binaries_apptainer(
"2023-05-08_tensorflow-2.8_v1", commit="a1218e196557aa6d02570bbb38767e987b7a77a2"
)
# , branch="apptainer_tf_2_8", commit="9dcef411b27a4b302698c83c0af81789ef4de2c2"
# )
RASR_BINARY_PATH = compile_rasr_binaries_i6mode() # use most recent RASR
assert RASR_BINARY_PATH, "Please set a specific RASR_BINARY_PATH before running the pipeline"
RASR_BINARY_PATH.hash_overwrite = "LIBRISPEECH_DEFAULT_RASR_BINARY_PATH"

Expand Down
2 changes: 1 addition & 1 deletion common/baselines/tedlium2/lm/ngram_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def run_tedlium2_ngram_lm(add_unknown_phoneme_and_mapping: bool = False, alias_p
srilm_path=SRILM_PATH,
ngram_rqmt=None,
perplexity_rqmt=None,
mail_address=gs.MAIL_ADDRESS if hasattr(gs, "MAIL_ADDRESS") else None,
mail_address=gs.MAIL_ADDRESS,
)
ngram_system.run_training()

Expand Down
6 changes: 1 addition & 5 deletions common/setups/lm/srilm_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,11 +318,7 @@ def _format_report_perplexities(self, ppl_dict: Dict[str, Union[str, tk.Variable
for order in self.ngram_order:
out_str = str(order).ljust(len(order_header))
for eval_name in self.eval_data.keys():
var = ppl_dict[f"{train_name}_{order}gram_{eval_name}"]
if not var.is_set():
out_str += "None".ljust(max_size)
else:
out_str += f"{var.get():.2f}".ljust(max_size)
out_str += f'{ppl_dict[f"{train_name}_{order}gram_{eval_name}"].get():.2f}'.ljust(max_size)
out_str += " "
out.append(out_str)

Expand Down
12 changes: 4 additions & 8 deletions common/setups/returnn_common/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
"""

from __future__ import annotations
import copy
from typing import Any, List, Union, Optional, Dict, Set
from dataclasses import dataclass, asdict
import os
Expand Down Expand Up @@ -175,11 +174,8 @@ def get(self) -> str:
assert False, "invalid type for packages"
target_package_path = os.path.join(out_dir, package_path)
pathlib.Path(os.path.dirname(target_package_path)).mkdir(parents=True, exist_ok=True)
try:
shutil.copytree(os.path.join(self.root_path, package_path), target_package_path)
except FileExistsError:
pass
content.append(f"sys.path.insert(0, os.path.dirname(__file__))\n")
shutil.copytree(os.path.join(self.root_path, package_path), target_package_path)
content.append(f"sys.path.insert(0, os.path.dirname(__file__))\n")
else:
content.append(f"sys.path.insert(0, {self.root_path!r})\n")

Expand Down Expand Up @@ -327,14 +323,14 @@ def __init__(
"""
super().__init__()
self.net_func_name = net_func_name
self.net_kwargs = copy.deepcopy(net_kwargs)
self.net_kwargs = net_kwargs
self.net_kwargs.update({k: CodeWrapper(v) for k, v in net_func_map.items()})

def get(self):
"""get"""
return string.Template(self.TEMPLATE).substitute(
{
"NETWORK_KWARGS": str(instanciate_delayed(self.net_kwargs)),
"NETWORK_KWARGS": str(self.net_kwargs),
"FUNCTION_NAME": self.net_func_name,
}
)
Expand Down
96 changes: 39 additions & 57 deletions common/setups/returnn_pytorch/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import torch
from i6_core.util import instanciate_delayed
from sisyphus import gs, tk
from sisyphus.delayed_ops import DelayedBase, DelayedFormat
from sisyphus.delayed_ops import DelayedBase
from sisyphus.hash import sis_hash_helper

if TYPE_CHECKING:
Expand Down Expand Up @@ -163,108 +163,90 @@ def build_config_constructor_serializers(
"""
from i6_models.config import ModelConfiguration, ModuleFactoryV1

def serialize_value(value: Any) -> Tuple[Union[str, DelayedBase], List[Import]]:
# Switch over serialization logic for different subtypes
# Import the class of <cfg>
imports = [
Import(
code_object_path=f"{type(cfg).__module__}.{type(cfg).__name__}", unhashed_package_root=unhashed_package_root
)
]

call_kwargs = []

# Iterate over all dataclass fields
for key in fields(type(cfg)):
# Value corresponding to dataclass field name
value = getattr(cfg, key.name)

# Switch over serialization logic for different subtypes
if isinstance(value, ModelConfiguration):
# Example:
# ConformerBlockConfig(mhsa_config=ConformerMHSAConfig(...))
# -> Sub-Constructor-Call and imports for ConformerMHSAConfig
return build_config_constructor_serializers(value, unhashed_package_root=unhashed_package_root)
subcall, subimports = build_config_constructor_serializers(value)
imports += subimports
call_kwargs.append((key.name, subcall))
elif isinstance(value, ModuleFactoryV1):
# Example:
# ConformerEncoderConfig(
# frontend=ModuleFactoryV1(module_class=VGGFrontend, cfg=VGGFrontendConfig(...)))
# -> Import classes ModuleFactoryV1, VGGFrontend and VGGFrontendConfig
# -> Sub-Constructor-Call for VGGFrontendConfig
subcall, subimports = build_config_constructor_serializers(value.cfg, unhashed_package_root=unhashed_package_root)
subimports.append(
subcall, subimports = build_config_constructor_serializers(value.cfg)
imports += subimports
imports.append(
Import(
code_object_path=f"{value.module_class.__module__}.{value.module_class.__name__}",
unhashed_package_root=unhashed_package_root,
)
)
subimports.append(
imports.append(
Import(
code_object_path=f"{ModuleFactoryV1.__module__}.{ModuleFactoryV1.__name__}",
unhashed_package_root=unhashed_package_root,
)
)
return Call(
callable_name=ModuleFactoryV1.__name__,
kwargs=[("module_class", value.module_class.__name__), ("cfg", subcall)],
), subimports
call_kwargs.append(
(
key.name,
Call(
callable_name=ModuleFactoryV1.__name__,
kwargs=[("module_class", value.module_class.__name__), ("cfg", subcall)],
),
)
)
elif isinstance(value, torch.nn.Module):
# Example:
# ConformerConvolutionConfig(norm=BatchNorm1d(...))
# -> Import class BatchNorm1d
# -> Sub-serialization of BatchNorm1d object.
# The __str__ function of torch.nn.Module already does this in the way we want.
return str(value), [
imports.append(
Import(
code_object_path=f"{value.__module__}.{type(value).__name__}",
unhashed_package_root=unhashed_package_root,
)
]
)
call_kwargs.append((key.name, str(value)))
elif isfunction(value):
# Example:
# ConformerConvolutionConfig(activation=torch.nn.functional.silu)
# -> Import function silu
# Builtins (e.g. 'sum') do not need to be imported
if value.__module__ != "builtins":
subimports = [
imports.append(
Import(
code_object_path=f"{value.__module__}.{value.__name__}",
unhashed_package_root=unhashed_package_root,
)
]
else:
subimports = []
return value.__name__, subimports
elif isinstance(value, list):
# -> Serialize list values individually, collect subimports
list_items = []
list_imports = []
for item in value:
item_serialized, item_imports = serialize_value(item)
list_items.append(item_serialized)
list_imports += item_imports
return DelayedFormat(f"[{', '.join(['{}'] * len(list_items))}]", *list_items), list_imports
elif isinstance(value, dict):
# -> Serialize dict values individually, collect subimports
dict_items = [] # Will alternatingly contain key and value of all dict items
dict_imports = []
for key, val in value.items():
val_serialized, item_imports = serialize_value(val)
dict_items += [key, val_serialized]
dict_imports += item_imports
return DelayedFormat(f"{{{', '.join(['{}: {}'] * len(dict_items))}}}", *dict_items), dict_imports
)
call_kwargs.append((key.name, value.__name__))
elif isinstance(value, DelayedBase):
# sisyphus variables are just given as-is and will be instanciated only when calling "get".
return value, []
call_kwargs.append((key.name, value))
else:
# No special case (usually python primitives)
# -> Just get string representation
return str(value), []


# Import the class of <cfg>
imports = [
Import(
code_object_path=f"{type(cfg).__module__}.{type(cfg).__name__}", unhashed_package_root=unhashed_package_root
)
]

call_kwargs = []

# Iterate over all dataclass fields
for key in fields(type(cfg)):
# Value corresponding to dataclass field name
value = getattr(cfg, key.name)

serialized_value, value_imports = serialize_value(value)
call_kwargs.append((key.name, serialized_value))
imports += value_imports
call_kwargs.append((key.name, str(value)))

imports = list(OrderedDict.fromkeys(imports)) # remove duplications

Expand Down
19 changes: 0 additions & 19 deletions common/tools/rasr.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,3 @@ def compile_rasr_binaries_i6mode(
)
make_job.rqmt["mem"] = 8
return make_job.out_links["binaries"]


def compile_rasr_binaries_apptainer(
apptainer_image_version: str, # Most recent: 2023-05-08_tensorflow-2.8_v1
branch: Optional[str] = None,
commit: Optional[str] = None,
rasr_git_repository: str = "https://github.com/rwth-i6/rasr",
rasr_arch: str = "linux-x86_64-standard",
) -> tk.Path:
rasr_repo = CloneGitRepositoryJob(rasr_git_repository, branch=branch, commit=commit).out_repository
make_job = MakeJob(
folder=rasr_repo,
make_sequence=["build", "install"],
configure_opts=[f"--apptainer-setup={apptainer_image_version}"],
num_processes=8,
link_outputs={"binaries": f"arch/{rasr_arch}/"},
)
make_job.rqmt["mem"] = 8
return make_job.out_links["binaries"]
7 changes: 2 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
[tool.black]
line-length = 120
target-version = ["py38"]
# exclude = 'users'

[tool.ruff]
line-length = 120
target-version = ["py37"]
exclude = 'users'
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,12 @@

# from .config_02e_transducer_rasr_features_tinaconf_old import py as py_02e_old
from .config_02e_transducer_rasr_features_tinaconf import py as py_02e
from .config_02e_transducer_rasr_features_tinaconf_rtf import py as py_02e_rtf

# from .config_02c_transducer_wei import py as py_02c
# from .config_02d_transducer_rasr_features_dc import py as py_02d
from .config_03a_transducer_fullsum_raw_samples import py as py_03a
from .config_03b_transducer_fullsum_rasr_features import py as py_03b

from .config_04b_transducer_fullsum_from_scratch_rasr_features import py as py_04b

# from .config_03c_transducer_fullsum_wei import py as py_03c

# from .config_test_1 import py as py_test_1
Expand Down Expand Up @@ -54,18 +51,16 @@ def main() -> SummaryReport:
sub_reports.append(copy.deepcopy(py_02b()[0]))
# sub_reports.append(copy.deepcopy(py_02e_old()))
sub_reports.append(copy.deepcopy(py_02e()))
sub_reports.append(copy.deepcopy(py_02e_rtf()))
sub_reports.append(copy.deepcopy(py_03a()))
sub_reports.append(copy.deepcopy(py_03b()))
sub_reports.append(copy.deepcopy(py_04b()))

for report in sub_reports:
report.collapse(
[SummaryKey.CORPUS.value], best_selector_key=SummaryKey.ERR.value
) # Keep one row for each recognition corpus
summary_report.merge_report(report, update_structure=True)

summary_report.set_col_sort_key([SummaryKey.ERR.value, SummaryKey.CORPUS.value])
summary_report.set_col_sort_key([SummaryKey.ERR.value, SummaryKey.WER.value, SummaryKey.CORPUS.value])

tk.register_report("summary.report", summary_report)

Expand Down
Loading

0 comments on commit b4b6ec3

Please sign in to comment.