diff --git a/src/OneBestFstLoader.cpp b/src/OneBestFstLoader.cpp index 0e6cc75..6b70a00 100644 --- a/src/OneBestFstLoader.cpp +++ b/src/OneBestFstLoader.cpp @@ -92,7 +92,7 @@ std::vector OneBestFstLoader::convertToIntVector(fst::SymbolTable &symbol) FstAlignOption options; for (TokenType::const_iterator i = mToken.begin(); i != mToken.end(); ++i) { - std::string token = *i; + std::string token = UnicodeLowercase(*i); int token_sym = symbol.Find(token); if (token_sym == -1) { token_sym = symbol.Find(options.symUnk); diff --git a/src/version.h b/src/version.h index 7ae5632..53471fa 100644 --- a/src/version.h +++ b/src/version.h @@ -1,5 +1,5 @@ #pragma once #define FSTALIGNER_VERSION_MAJOR 1 -#define FSTALIGNER_VERSION_MINOR 5 -#define FSTALIGNER_VERSION_PATCH 0 +#define FSTALIGNER_VERSION_MINOR 6 +#define FSTALIGNER_VERSION_PATCH 1 diff --git a/test/data/short.aligned.nlp b/test/data/short.aligned.nlp index 840a2f7..313068d 100644 --- a/test/data/short.aligned.nlp +++ b/test/data/short.aligned.nlp @@ -1,7 +1,7 @@ token|speaker|ts|endTs|punctuation|prepunctuation|case|tags|wer_tags|oldTs|oldEndTs|ali_comment |2|0.0000|0.0000|||LC|[]|[]||| -Yeah|1|0.0000|0.0000|,||UC|[]|[]||| -yeah|1|||,||LC|[]|[]|||del +Yeah|1|||,||UC|[]|[]|||del +yeah|1|0.0000|0.0000|,||LC|[]|[]||| right|1|0.0000|0.0000|.||LC|[]|[]||| Yeah|1|||,||UC|[]|[]|||del alright|1|0.0000|0.0000|,||LC|[]|[]|||sub(i'll),split_worst diff --git a/test/fstalign_Test.cc b/test/fstalign_Test.cc index f2f713d..1077560 100644 --- a/test/fstalign_Test.cc +++ b/test/fstalign_Test.cc @@ -256,24 +256,14 @@ TEST_CASE_METHOD(UniqueTestsFixture, "main-standard-composition()") { REQUIRE_THAT(result, Contains("WER: INS:0 DEL:2 SUB:2")); } - SECTION("wer (nlp output)") { - const auto result = exec(command("wer", approach, "short.ref.nlp", "short.hyp.nlp", sbs_output, nlp_output, - TEST_SYNONYMS, nullptr, false, -1, "--disable-approx-alignment")); - const auto testFile = std::string{TEST_DATA} + "short.aligned.nlp"; - - REQUIRE(compareFiles(nlp_output.c_str(), testFile.c_str())); - REQUIRE_THAT(result, Contains("WER: 5/31 = 0.1613")); - REQUIRE_THAT(result, Contains("WER: INS:0 DEL:2 SUB:2")); - } - SECTION("Case Metrics") { const auto result = exec(command("wer", approach, "short.ref.nlp", "short.hyp.txt", sbs_output, nlp_output, TEST_SYNONYMS, nullptr, false, -1, "--record-case-stats")); const auto testFile = std::string{TEST_DATA} + "short.aligned.nlp"; REQUIRE(compareFiles(nlp_output.c_str(), testFile.c_str())); - REQUIRE_THAT(result, Contains("case WER, (matching words only): Precision:1.0")); - REQUIRE_THAT(result, Contains("case WER, (all including substitutions): Precision:0.77")); + REQUIRE_THAT(result, Contains("case WER, (matching words only): Precision:0.857143")); + REQUIRE_THAT(result, Contains("case WER, (all including substitutions): Precision:0.666667")); } // alignment tests