Merge pull request #15 from Eve-ning/clean-repo

Clean up repository and briefly prepare for remote training
Eve-ning · Feb 14, 2023 · f060758 · f060758
2 parents 98d3d5e + aabac6f
commit f060758
Show file tree

Hide file tree

Showing 11 changed files with 124 additions and 212 deletions.
diff --git a/.gitmodules b/.gitmodules
diff --git a/opal/conf/conf.py b/opal/conf/conf.py
@@ -4,5 +4,4 @@
 DATA_DIR = ROOT_DIR / "data/"
 OSU_DIR = DATA_DIR / "osu/"
 MODEL_DIR = ROOT_DIR / "models/"
-REPLAYS_DIR = OSU_DIR / "replays"
 SCORES_DIR = OSU_DIR / "scores"
diff --git a/opal/data_ppy_sh_to_csv b/opal/data_ppy_sh_to_csv
diff --git a/opal/replay/__init__.py b/opal/replay/__init__.py
diff --git a/opal/score/datamodule/ignore_mapping.yaml b/opal/score/datamodule/ignore_mapping.yaml
@@ -0,0 +1,92 @@
+# This is the default ignore mapping.
+# For every column that is NOT commented out, it'll be IGNORED when converting
+# Try to ignore those that are not necessary for your use
+# This will heavily reduce storage needed and increase processing speed
+# Once you're done, reference this file when calling osu-data-csv via
+# osu-data-csv -i path/to/ignore_mapping.yaml
+
+osu_beatmap_difficulty.sql:
+#  - beatmap_id
+#  - mode
+#  - mods
+#  - diff_unified
+  - last_update
+osu_beatmaps.sql:
+#  - beatmap_id
+  - beatmapset_id
+  - user_id
+#  - filename
+  - checksum
+#  - version
+  - total_length
+  - hit_length
+#  - countTotal
+#  - countNormal
+#  - countSlider
+#  - countSpinner
+#  - diff_drain
+#  - diff_size
+#  - diff_overall
+#  - diff_approach
+#  - playmode
+#  - approved
+  - last_update
+#  - difficultyrating
+#  - playcount
+#  - passcount
+#  - youtube_preview
+#  - score_version
+#  - deleted_at
+  - bpm
+osu_scores{mode}_high.sql:
+#  - score_id
+#  - beatmap_id
+#  - user_id
+#  - score
+  - maxcombo
+  - rank
+#  - count50
+#  - count100
+#  - count300
+#  - countmiss
+#  - countgeki
+#  - countkatu
+  - perfect
+#  - enabled_mods
+#  - date
+#  - pp
+#  - replay
+  - hidden
+  - country_acronym
+osu_user_stats{mode}.sql:
+#  - user_id
+  - count300
+  - count100
+  - count50
+  - countMiss
+  - accuracy_total
+  - accuracy_count
+  - accuracy
+  - playcount
+  - ranked_score
+  - total_score
+  - x_rank_count
+  - xh_rank_count
+  - s_rank_count
+  - sh_rank_count
+  - a_rank_count
+  - rank
+  - level
+  - replay_popularity
+  - fail_count
+  - exit_count
+  - max_combo
+  - country_acronym
+#  - rank_score
+#  - rank_score_index
+  - rank_score_exp
+  - rank_score_index_exp
+  - accuracy_new
+  - last_update
+  - last_played
+  - total_seconds_played
diff --git a/opal/score/datamodule/score_datamodule.py b/opal/score/datamodule/score_datamodule.py
@@ -1,18 +1,18 @@
 import logging
-from dataclasses import dataclass, field
-from typing import Sequence, Tuple
-
 import numpy as np
 import pandas as pd
 import pytorch_lightning as pl
 import torch
+from dataclasses import dataclass, field
+from pathlib import Path
 from sklearn.base import TransformerMixin
 from sklearn.preprocessing import LabelEncoder, QuantileTransformer
 from torch.utils.data import DataLoader, TensorDataset, random_split
+from typing import Sequence, Tuple
 
 from opal.conf.conf import DATA_DIR
 from opal.conf.mods import OsuMod
-from opal.data_ppy_sh_to_csv.main import get_dataset, default_sql_names
+from osu_data_csv.main import get_dataset
 
 
 @dataclass
@@ -50,18 +50,16 @@ def __post_init__(self):
     def prepare_data(self) -> None:
         """ Downloads data via data_ppy_sh_to_csv submodule """
         get_dataset(
-            self.ds_yyyy_mm,  # year_month=
-            self.ds_mode,  # mode=
-            self.ds_set,  # set=
-            DATA_DIR,  # dl_dir=
-            'Y',  # bypass_confirm=
-            ",".join(default_sql_names[:4]),  # sql_names=
-            'N',  # cleanup=
-            'N'  # zip_csv_files=
+            year_month=self.ds_yyyy_mm,
+            mode=self.ds_mode,
+            set=self.ds_set,
+            dl_dir=DATA_DIR,
+            bypass_confirm='Y',
+            cleanup='N',
+            ignore_path=(Path(__file__).parent / "ignore_mapping.yaml").as_posix()
         )
 
     def setup(self, stage: str = "") -> None:
-        self.prepare_data()
         ds_str = f"{self.ds_yyyy_mm}_01_performance_{self.ds_mode}_top_{self.ds_set}"
 
         csv_dir = DATA_DIR / ds_str / "csv"
@@ -175,7 +173,7 @@ def prep_map(df: pd.DataFrame,
             (df['playmode'] == 3) &
             (df['diff_size'].isin(diff_sizes)) &
             (df['difficultyrating'].between(*sr_bounds)),
-            ['difficultyrating', 'diff_overall', 'diff_size', 'version', 'beatmap_id', 'filename']
+            ['difficultyrating', 'diff_overall', 'diff_size', 'beatmap_id', 'filename']
         ]
         return df
 

diff --git a/opal/utils/__init__.py b/opal/utils/__init__.py
diff --git a/opal/utils/ecdf.py b/opal/utils/ecdf.py
diff --git a/opal/utils/load_map.py b/opal/utils/load_map.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,3 +1,23 @@
+[tool.poetry]
+name = "opal"
+version = "0.1.0"
+description = "osu!mania score estimation through Collaborative Filtering"
+authors = ["Eve-ning <dev_evening@hotmail.com>"]
+license = "MIT"
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = "^3.9"
+scikit-learn = "^1.2.1"
+pandas = "^1.5.3"
+pytorch-lightning = "^1.9.1"
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+
 [tool.pytest.ini_options]
 log_cli = true
 log_cli_level = "INFO"

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,3 @@
-reamber==0.1.6
 scikit-learn
-junit_xml
 pandas
 pytorch-lightning