Skip to content

Commit

Permalink
Merge pull request #9 from mappingtools/ML-extractor
Browse files Browse the repository at this point in the history
ML dataset extractor
  • Loading branch information
OliBomby authored Nov 2, 2023
2 parents 18b4926 + 8a94f29 commit ea1da2b
Show file tree
Hide file tree
Showing 15 changed files with 648 additions and 14 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -360,4 +360,5 @@ MigrationBackup/
.ionide/

# Fody - auto-generated XML schema
FodyWeavers.xsd
FodyWeavers.xsd
.idea/.idea.Mapperator/.idea/misc.xml
53 changes: 44 additions & 9 deletions Mapperator.ConsoleApp/DbManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
using Mapping_Tools_Core.BeatmapHelper;
using Mapping_Tools_Core.BeatmapHelper.HitObjects.Objects;
using Mapping_Tools_Core.BeatmapHelper.IO.Editor;
using OsuParsers.Database;
using OsuParsers.Enums;

namespace Mapperator.ConsoleApp {
Expand All @@ -30,10 +31,17 @@ public static IEnumerable<DbBeatmap> GetCollection(string collectionName) {
return collection.MD5Hashes.SelectMany(o => beatmaps.Where(b => b.MD5Hash == o));
}

public static List<DbBeatmap> GetAll() {
public static OsuDatabase GetOsuDatabase() {
var osuDbPath = Path.Join(ConfigManager.Config.OsuPath, "osu!.db");
var db = DatabaseDecoder.DecodeOsu(osuDbPath);
return db.Beatmaps;
return DatabaseDecoder.DecodeOsu(osuDbPath);
}

public static List<DbBeatmap> GetAll() {
return GetOsuDatabase().Beatmaps;
}

public static IEnumerable<DbBeatmap> GetMapSet(OsuDatabase db, int setId) {
return db.Beatmaps.Where(o => o.BeatmapSetId == setId);
}

public static IEnumerable<DbBeatmap> GetFiltered(IHasFilter opts) {
Expand All @@ -46,28 +54,32 @@ public static bool DbBeatmapFilter(DbBeatmap o, IHasFilter opts) {
var regex = new Regex(@$"(?!\s?(de\s)?(it|that|{string.Join('|', opts.Mapper!.Select(Regex.Escape))}))(((^|[^\S\r\n])(\S)*([sz]'|'s))|((^|[^\S\r\n])de\s(\S)*))", RegexOptions.IgnoreCase);

return (!opts.MinId.HasValue || o.BeatmapSetId >= opts.MinId)
&& (!opts.MaxId.HasValue || o.BeatmapSetId <= opts.MaxId)
&& (!opts.RankedStatus!.Any() || opts.RankedStatus!.Contains(o.RankedStatus))
&& o.Ruleset == opts.Ruleset
&& (!opts.MinStarRating.HasValue || GetDefaultStarRating(o) >= opts.MinStarRating)
&& (!opts.MaxStarRating.HasValue || GetDefaultStarRating(o) <= opts.MaxStarRating)
&& (!opts.Mapper!.Any() || (opts.Mapper!.Any(x => x == o.Creator || o.Difficulty.Contains(x))
&& !o.Difficulty.Contains("Hitsounds", StringComparison.OrdinalIgnoreCase)
&& !o.Difficulty.Contains("Collab", StringComparison.OrdinalIgnoreCase)
&& !regex.IsMatch(o.Difficulty)));
}

public static double GetDefaultStarRating(DbBeatmap beatmap) {
return beatmap.Ruleset switch {
Ruleset.Taiko => beatmap.TaikoStarRating[Mods.None],
Ruleset.Mania => beatmap.ManiaStarRating[Mods.None],
Ruleset.Fruits => beatmap.CatchStarRating[Mods.None],
_ => beatmap.StandardStarRating[Mods.None]
var dict = beatmap.Ruleset switch {
Ruleset.Taiko => beatmap.TaikoStarRating,
Ruleset.Mania => beatmap.ManiaStarRating,
Ruleset.Fruits => beatmap.CatchStarRating,
_ => beatmap.StandardStarRating
};

return dict.TryGetValue(Mods.None, out double value) ? value : double.NaN;
}

public static IEnumerable<IBeatmap> GetFilteredAndRead(IHasFilter opts) {
return GetFiltered(opts)
.Select(o => Path.Combine(ConfigManager.Config.SongsPath, o.FolderName.Trim(), o.FileName.Trim()))
.Where(o => {
.Where((o) => {
if (File.Exists(o)) {
Console.Write('.');
return true;
Expand All @@ -87,6 +99,29 @@ public static IEnumerable<IBeatmap> GetFilteredAndRead(IHasFilter opts) {
}).Where(ValidBeatmap)!;
}

public static IEnumerable<(IBeatmap, DbBeatmap)> GetFilteredAndRead2(IHasFilter opts) {
return GetFiltered(opts)
.Select(o => (Path.Combine(ConfigManager.Config.SongsPath, o.FolderName.Trim(), o.FileName.Trim()), o))
.Where(o => {
if (File.Exists(o.Item1)) {
Console.Write('.');
return true;
}

Console.WriteLine(Strings.CouldNotFindFile, o.Item1);
return false;
})
.Select<(string, DbBeatmap), (IBeatmap?, DbBeatmap)>(o => {
try {
return (new BeatmapEditor(o.Item1).ReadFile(), o.Item2);
}
catch (Exception e) {
Console.WriteLine(Strings.ErrorReadingFile, o.Item1, e);
return (null, o.Item2);
}
}).Where(o => ValidBeatmap(o.Item1))!;
}

public static bool ValidBeatmap(IBeatmap? beatmap) {
if (beatmap == null)
return false;
Expand Down
2 changes: 2 additions & 0 deletions Mapperator.ConsoleApp/IHasFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ namespace Mapperator.ConsoleApp;
public interface IHasFilter {
public string? CollectionName { get; }
int? MinId { get; }
int? MaxId { get; }
IEnumerable<RankedStatus>? RankedStatus { get; }
Ruleset Ruleset { get; }
double? MinStarRating { get; }
double? MaxStarRating { get; }
IEnumerable<string>? Mapper { get; }
}
4 changes: 3 additions & 1 deletion Mapperator.ConsoleApp/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@ public static class Program {
private static int Main(string[] args) {
ConfigManager.LoadConfig();

return Parser.Default.ParseArguments<Count.CountOptions, Extract.ExtractOptions, Build.BuildOptions, Convert.ConvertOptions, Search.SearchOptions, Analyze.AnalyzeOptions, ConvertML.ConvertMLOptions>(args)
return Parser.Default.ParseArguments<Count.CountOptions, Extract.ExtractOptions, Build.BuildOptions, Convert.ConvertOptions, Search.SearchOptions, Analyze.AnalyzeOptions, Extract2.Extract2Options, Dataset.DatasetOptions, ConvertML.ConvertMLOptions>(args)
.MapResult(
(Count.CountOptions opts) => Count.DoDataCount(opts),
(Extract.ExtractOptions opts) => Extract.DoDataExtraction(opts),
(Build.BuildOptions opts) => Build.DoBuildGraph(opts),
(Convert.ConvertOptions opts) => Convert.DoMapConvert(opts),
(Search.SearchOptions opts) => Search.DoPatternSearch(opts),
(Analyze.AnalyzeOptions opts) => Analyze.DoVisualSpacingExtract(opts),
(Extract2.Extract2Options opts) => Extract2.DoDataExtraction(opts),
(Dataset.DatasetOptions opts) => Dataset.DoDataExtraction(opts),
(ConvertML.ConvertMLOptions opts) => ConvertML.DoMapConvert(opts),
_ => 1);
}
Expand Down
10 changes: 9 additions & 1 deletion Mapperator.ConsoleApp/Properties/launchSettings.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
{
"profiles": {
"CreateDataSet": {
"commandName": "Project",
"commandLineArgs": "dataset -m Standard -s Ranked -i 20000 -x 25000 -r 4 -o \"D:\\Osu! Dingen\\Beatmap ML Datasets\\old_test\""
},
"ConvertMapML": {
"commandName": "Project",
"commandLineArgs": "convert-ml -m \"Resources\\sdf_osu_5_fine_2.h5\" -i \"Resources\\input\" -o output"
},
"CountData": {
"commandName": "Project",
"commandLineArgs": "count -s Ranked -m Standard -a Sotarks -v"
"commandLineArgs": "count -m Standard -s Ranked -i 20000 -x 25000 -r 4 -f"
},
"VisualSpacingTest": {
"commandName": "Project",
Expand All @@ -20,6 +24,10 @@
"commandName": "Project",
"commandLineArgs": "extract -s Ranked -m Standard -a Sotarks -o SotarksData"
},
"ExtractAllData122123": {
"commandName": "Project",
"commandLineArgs": "extract2 -s Ranked -m Standard -o pp_data_v2"
},
"ConvertMap": {
"commandName": "Project",
"commandLineArgs": "convert -d test -i \"Resources\\input\" -o output"
Expand Down
54 changes: 54 additions & 0 deletions Mapperator.ConsoleApp/Resources/Strings.Designer.cs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions Mapperator.ConsoleApp/Resources/Strings.resx
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,23 @@ Basic usage:
<data name="Program_DoMapConvert_Converting_spacing_to_reference_beatmap___" xml:space="preserve">
<value>Converting spacing to reference beatmap...</value>
</data>
<data name="Count_DoDataCount_Total_file_size___0__MB" xml:space="preserve">
<value>Total file size: {0} MB</value>
</data>
<data name="Count_DoDataCount_Total_duration___0_" xml:space="preserve">
<value>Total duration: {0}</value>
</data>
<data name="Dataset_DoDataExtraction_Finding_beatmap_sets___" xml:space="preserve">
<value>Finding beatmap sets...</value>
</data>
<data name="Dataset_DoDataExtraction_Count_Update" xml:space="preserve">
<value>{0} sets found</value>
</data>
<data name="Dataset_DoDataExtraction_Writing_dataset___" xml:space="preserve">
<value>Writing dataset...</value>
</data>
<data name="Dataset_DoDataExtraction_Copy_Update" xml:space="preserve">
<value>Copied beatmap set {0}/{1}</value>
<data name="ConvertML_DoMapConvert_Loading_ML_model___" xml:space="preserve">
<value>Loading ML model...</value>
</data>
Expand Down
39 changes: 38 additions & 1 deletion Mapperator.ConsoleApp/Verbs/Count.cs
Original file line number Diff line number Diff line change
@@ -1,24 +1,61 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using CommandLine;
using JetBrains.Annotations;
using Mapperator.ConsoleApp.Resources;
using Mapping_Tools_Core.Audio;

namespace Mapperator.ConsoleApp.Verbs;

public static class Count {
[Verb("count", HelpText = "Count the amount of beatmaps available matching the specified filter.")]
public class CountOptions : FilterBase {
[Option('u', "uniqueSong", HelpText = "Count each unique song file", Default = false)]
public bool UniqueSong { get; [UsedImplicitly] set; }

[Option('f', "fileSize", HelpText = "Aggregate the filesize of the songs", Default = false)]
public bool FileSize { get; [UsedImplicitly] set; }

[Option('v', "verbose", HelpText = "Print the name of each counted beatmap", Default = false)]
public bool Verbose { get; [UsedImplicitly] set; }
}

public static int DoDataCount(CountOptions opts) {
var songNames = new HashSet<string>();
long totalSize = 0;
var totalTime = TimeSpan.Zero;

Console.WriteLine(DbManager.GetFiltered(opts)
.Count(o => { if (opts.Verbose) Console.WriteLine(Strings.FullBeatmapName, o.Artist, o.Title, o.Creator, o.Difficulty);
.Count(o => {
if (opts.Verbose) Console.WriteLine(Strings.FullBeatmapName, o.Artist, o.Title, o.Creator, o.Difficulty);
if (!opts.UniqueSong) return true;
string songFile = Path.Combine(ConfigManager.Config.SongsPath, o.FolderName.Trim(), o.AudioFileName.Trim());
string songName = $"{o.Artist} - {Dataset.RemovePartsBetweenParentheses(o.Title)}";
if (!string.Equals(Path.GetExtension(songFile), ".mp3", StringComparison.OrdinalIgnoreCase)) return false;
if (songNames.Contains(songName)) return false;
songNames.Add(songName);
var info = new FileInfo(songFile);
if (!info.Exists) return false;
if (opts.FileSize) {
totalSize += info.Length;
try {
totalTime += new Mp3FileReader(songFile).TotalTime;
} catch (InvalidOperationException e) {
Console.WriteLine(e);
return false;
}
}
if (opts.Verbose) Console.WriteLine(songName);
return true;
}));

if (opts.FileSize) {
Console.WriteLine(Strings.Count_DoDataCount_Total_file_size___0__MB, totalSize / 1024 / 1024);
Console.WriteLine(Strings.Count_DoDataCount_Total_duration___0_, totalTime);
}

return 0;
}
}
Loading

0 comments on commit ea1da2b

Please sign in to comment.