diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cbc636f1a..a2886d56e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -50,4 +50,4 @@ repos: - id: black name: Format code args: [--skip-string-normalization, --line-length=119] - additional_dependencies: ['click==8.0.2'] + additional_dependencies: ['click>=8.0.2'] diff --git a/Jenkinsfile b/Jenkinsfile index f1bab1c59..55aa0d038 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,8 +1,8 @@ pipeline { agent { docker { - image 'tnitn_ci:py310' - args '--user 0:128 -v /home/jenkinsci:/home/jenkinsci -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""' + image 'tnitn_ci_py310:24.07' + args '-v /mnt/jenkins/jenkinsci:/home/jenkins -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""' } } options { @@ -16,7 +16,7 @@ pipeline { EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/09-04-24-0' ES_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/09-25-24-0' ES_EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/08-30-24-0' - FR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/04-07-25-0' + FR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-04-24-0' HU_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/07-16-24-0' PT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0' RU_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0' @@ -32,12 +32,6 @@ pipeline { } stages { - stage('Add git safe directory'){ - steps{ - sh 'git config --global --add safe.directory /var/lib/jenkins/workspace/NTP_$GIT_BRANCH' - sh 'git config --global --add safe.directory /home/jenkinsci/workspace/NTP_$GIT_BRANCH' - } - } stage('PyTorch version') { steps { @@ -46,14 +40,6 @@ pipeline { } } - stage('Install test requirements') { - steps { - sh 'apt-get update && apt-get install -y bc' - } - } - - - stage('NeMo Installation') { steps { sh './reinstall.sh release' @@ -65,7 +51,10 @@ pipeline { when { anyOf { branch 'main' + branch 'staging/**' + branch 'staging_*' changeRequest target: 'main' + } } failFast true @@ -97,6 +86,8 @@ pipeline { when { anyOf { branch 'main' + branch 'staging/**' + branch 'staging_*' changeRequest target: 'main' } } @@ -120,6 +111,8 @@ pipeline { when { anyOf { branch 'main' + branch 'staging/**' + branch 'staging_*' changeRequest target: 'main' } } @@ -156,7 +149,9 @@ pipeline { stage('L0: Create AR TN/ITN Grammars') { when { anyOf { - branch 'main' + branch 'main' + branch 'staging/**' + branch 'staging_*' changeRequest target: 'main' } } @@ -179,7 +174,9 @@ pipeline { stage('L0: Create FR TN/ITN & VI ITN & HU TN & IT TN') { when { anyOf { - branch 'main' + branch 'main' + branch 'staging/**' + branch 'staging_*' changeRequest target: 'main' } } @@ -216,7 +213,9 @@ pipeline { stage('L0: Create RU TN/ITN Grammars & SV & PT') { when { anyOf { - branch 'main' + branch 'main' + branch 'staging/**' + branch 'staging_*' changeRequest target: 'main' } } @@ -258,7 +257,9 @@ pipeline { stage('L0: Create HY TN/ITN Grammars & MR') { when { anyOf { - branch 'main' + branch 'main' + branch 'staging/**' + branch 'staging_*' changeRequest target: 'main' } } @@ -284,7 +285,9 @@ pipeline { stage('L0: Create ZH TN/ITN Grammar') { when { anyOf { - branch 'main' + branch 'main' + branch 'staging/**' + branch 'staging_*' changeRequest target: 'main' } } @@ -305,7 +308,9 @@ pipeline { stage('L0: Create JA ITN Grammars') { when { anyOf { - branch 'main' + branch 'main' + branch 'staging/**' + branch 'staging_*' changeRequest target: 'main' } } @@ -325,7 +330,9 @@ pipeline { stage('L1: TN/ITN Tests CPU') { when { anyOf { - branch 'main' + branch 'main' + branch 'staging/**' + branch 'staging_*' changeRequest target: 'main' } } @@ -409,10 +416,12 @@ pipeline { } } - stage('L2: Sparrowhawk Tests') { + stage('L2: EN Sparrowhawk Tests') { when { anyOf { - branch 'main' + branch 'main' + branch 'staging/**' + branch 'staging_*' changeRequest target: 'main' } } @@ -441,11 +450,13 @@ pipeline { } } - + stage('L2: NeMo text processing') { when { anyOf { - branch 'main' + branch 'main' + branch 'staging/**' + branch 'staging_*' changeRequest target: 'main' } } @@ -453,23 +464,23 @@ pipeline { parallel { stage('L2: Eng TN') { steps { - sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_norm/output_${TIME} && \ + sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkins/TestData/text_norm/output_${TIME} && \ cd tools/text_processing_deployment && python pynini_export.py --output=$NORM_OUTPUT_DIR --grammars=tn_grammars --cache_dir ${EN_TN_CACHE} --language=en && ls -R $NORM_OUTPUT_DIR && echo ".far files created "|| exit 1' - sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_norm/output_${TIME} && mkdir $NORM_OUTPUT_DIR && \ - cd nemo_text_processing/text_normalization/ && python normalize.py --input_file=/home/jenkinsci/TestData/text_norm/ci/test.txt --input_case="lower_cased" --language=en --output_file=$NORM_OUTPUT_DIR/test.pynini.txt --verbose && \ + sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkins/TestData/text_norm/output_${TIME} && mkdir $NORM_OUTPUT_DIR && \ + cd nemo_text_processing/text_normalization/ && python normalize.py --input_file=/home/jenkins/TestData/text_norm/ci/test.txt --input_case="lower_cased" --language=en --output_file=$NORM_OUTPUT_DIR/test.pynini.txt --verbose && \ cat $NORM_OUTPUT_DIR/test.pynini.txt && \ - cmp --silent $NORM_OUTPUT_DIR/test.pynini.txt /home/jenkinsci/TestData/text_norm/ci/test_goal_py.txt || exit 1 && \ + cmp --silent $NORM_OUTPUT_DIR/test.pynini.txt /home/jenkins/TestData/text_norm/ci/test_goal_py.txt || exit 1 && \ rm -rf $NORM_OUTPUT_DIR' } } stage('L2: Eng ITN export') { steps { - sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_denorm/output_${TIME} && \ + sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkins/TestData/text_denorm/output_${TIME} && \ cd tools/text_processing_deployment && python pynini_export.py --output=$DENORM_OUTPUT_DIR --grammars=itn_grammars --cache_dir ${EN_TN_CACHE} --language=en && ls -R $DENORM_OUTPUT_DIR && echo ".far files created "|| exit 1' - sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_denorm/output_${TIME} && mkdir $DENORM_OUTPUT_DIR && \ - cd nemo_text_processing/inverse_text_normalization/ && python inverse_normalize.py --input_file=/home/jenkinsci/TestData/text_denorm/ci/test.txt --language=en --output_file=$DENORM_OUTPUT_DIR/test.pynini.txt --verbose && \ - cmp --silent $DENORM_OUTPUT_DIR/test.pynini.txt /home/jenkinsci/TestData/text_denorm/ci/test_goal_py.txt || exit 1 && \ + sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkins/TestData/text_denorm/output_${TIME} && mkdir $DENORM_OUTPUT_DIR && \ + cd nemo_text_processing/inverse_text_normalization/ && python inverse_normalize.py --input_file=/home/jenkins/TestData/text_denorm/ci/test.txt --language=en --output_file=$DENORM_OUTPUT_DIR/test.pynini.txt --verbose && \ + cmp --silent $DENORM_OUTPUT_DIR/test.pynini.txt /home/jenkins/TestData/text_denorm/ci/test_goal_py.txt || exit 1 && \ rm -rf $DENORM_OUTPUT_DIR' } } @@ -477,18 +488,18 @@ pipeline { stage('L2: Eng alignment TN') { steps { - sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_norm/output_${TIME} && mkdir $NORM_OUTPUT_DIR && \ + sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkins/TestData/text_norm/output_${TIME} && mkdir $NORM_OUTPUT_DIR && \ cd nemo_text_processing/fst_alignment && python alignment.py --text="2615 Forest Av, 90501 CA, Santa Clara. 10kg, 12/16/2018" --grammar=tn --rule=tokenize_and_classify --fst=${EN_TN_CACHE}/en_tn_True_deterministic_cased__tokenize.far 2>&1 | tee $NORM_OUTPUT_DIR/pred.txt && \ - cmp --silent $NORM_OUTPUT_DIR/pred.txt /home/jenkinsci/TestData/text_norm/ci/alignment_gold.txt || exit 1 && \ + cmp --silent $NORM_OUTPUT_DIR/pred.txt /home/jenkins/TestData/text_norm/ci/alignment_gold.txt || exit 1 && \ rm -rf $NORM_OUTPUT_DIR' } } stage('L2: Eng alignment ITN') { steps { - sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_denorm/output_${TIME} && mkdir $DENORM_OUTPUT_DIR && \ + sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkins/TestData/text_denorm/output_${TIME} && mkdir $DENORM_OUTPUT_DIR && \ cd nemo_text_processing/fst_alignment && python alignment.py --text="one million twenty three thousand two hundred eleven ten kilograms one hundred twenty three dollars and twenty five cents" --grammar=itn --rule=tokenize_and_classify --fst=${EN_TN_CACHE}/en_itn_lower_cased.far 2>&1 | tee $DENORM_OUTPUT_DIR/pred.txt && \ - cmp --silent $DENORM_OUTPUT_DIR/pred.txt /home/jenkinsci/TestData/text_denorm/ci/alignment_gold.txt || exit 1 && \ + cmp --silent $DENORM_OUTPUT_DIR/pred.txt /home/jenkins/TestData/text_denorm/ci/alignment_gold.txt || exit 1 && \ rm -rf $DENORM_OUTPUT_DIR' } } diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py index 27e6eec32..9362d3d69 100644 --- a/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py +++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py @@ -87,6 +87,8 @@ def __init__(self, cardinal: GraphFst, ordinal: GraphFst): graph_year_range = self.year_range graph_year_range_century = self.year_range + delete_space + self.century + graph_ordinal_century = self.ordinal_century + self.morpho_graph + delete_extra_space + self.century + graph_ordinal_century = self.ordinal_century + self.morpho_graph + delete_extra_space + self.century graph_date_exceptions = self.month + delete_space + pynutil.delete("की") + delete_space + self.day graph_date_exceptions += pynutil.insert("preserve_order: true") diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py index 386f1353d..f3f0c0434 100644 --- a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py +++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py @@ -23,7 +23,6 @@ class TelephoneFst(GraphFst): """ Finite state transducer for classifying telephone numbers, e.g. e.g. प्लस इक्यानवे नौ आठ सात छह पांच चार तीन दो एक शून्य => tokens { name: "+९१ ९८७६५ ४३२१०" } - Args: Cardinal: CardinalFst """ diff --git a/requirements/requirements_test.txt b/requirements/requirements_test.txt index a3e90e5dc..aacfde319 100644 --- a/requirements/requirements_test.txt +++ b/requirements/requirements_test.txt @@ -1,6 +1,6 @@ -black==19.10b0 -click==8.0.2 -isort[requirements]>5.1.0,<6.0.0 +black==25.1.0 +click>=8.0.2 +isort[requirements]>5.1.0,<=6.0.1 parameterized pynini==2.1.6.post1 pytest