Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,4 @@ repos:
- id: black
name: Format code
args: [--skip-string-normalization, --line-length=119]
additional_dependencies: ['click==8.0.2']
additional_dependencies: ['click>=8.0.2']
91 changes: 51 additions & 40 deletions Jenkinsfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
pipeline {
agent {
docker {
image 'tnitn_ci:py310'
args '--user 0:128 -v /home/jenkinsci:/home/jenkinsci -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""'
image 'tnitn_ci_py310:24.07'
args '-v /mnt/jenkins/jenkinsci:/home/jenkins -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""'
}
}
options {
Expand All @@ -16,7 +16,7 @@ pipeline {
EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/09-04-24-0'
ES_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/09-25-24-0'
ES_EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/08-30-24-0'
FR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/04-07-25-0'
FR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-04-24-0'
HU_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/07-16-24-0'
PT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
RU_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
Expand All @@ -32,12 +32,6 @@ pipeline {
}
stages {

stage('Add git safe directory'){
steps{
sh 'git config --global --add safe.directory /var/lib/jenkins/workspace/NTP_$GIT_BRANCH'
sh 'git config --global --add safe.directory /home/jenkinsci/workspace/NTP_$GIT_BRANCH'
}
}

stage('PyTorch version') {
steps {
Expand All @@ -46,14 +40,6 @@ pipeline {
}
}

stage('Install test requirements') {
steps {
sh 'apt-get update && apt-get install -y bc'
}
}



stage('NeMo Installation') {
steps {
sh './reinstall.sh release'
Expand All @@ -65,7 +51,10 @@ pipeline {
when {
anyOf {
branch 'main'
branch 'staging/**'
branch 'staging_*'
changeRequest target: 'main'

}
}
failFast true
Expand Down Expand Up @@ -97,6 +86,8 @@ pipeline {
when {
anyOf {
branch 'main'
branch 'staging/**'
branch 'staging_*'
changeRequest target: 'main'
}
}
Expand All @@ -120,6 +111,8 @@ pipeline {
when {
anyOf {
branch 'main'
branch 'staging/**'
branch 'staging_*'
changeRequest target: 'main'
}
}
Expand Down Expand Up @@ -156,7 +149,9 @@ pipeline {
stage('L0: Create AR TN/ITN Grammars') {
when {
anyOf {
branch 'main'
branch 'main'
branch 'staging/**'
branch 'staging_*'
changeRequest target: 'main'
}
}
Expand All @@ -179,7 +174,9 @@ pipeline {
stage('L0: Create FR TN/ITN & VI ITN & HU TN & IT TN') {
when {
anyOf {
branch 'main'
branch 'main'
branch 'staging/**'
branch 'staging_*'
changeRequest target: 'main'
}
}
Expand Down Expand Up @@ -216,7 +213,9 @@ pipeline {
stage('L0: Create RU TN/ITN Grammars & SV & PT') {
when {
anyOf {
branch 'main'
branch 'main'
branch 'staging/**'
branch 'staging_*'
changeRequest target: 'main'
}
}
Expand Down Expand Up @@ -258,7 +257,9 @@ pipeline {
stage('L0: Create HY TN/ITN Grammars & MR') {
when {
anyOf {
branch 'main'
branch 'main'
branch 'staging/**'
branch 'staging_*'
changeRequest target: 'main'
}
}
Expand All @@ -284,7 +285,9 @@ pipeline {
stage('L0: Create ZH TN/ITN Grammar') {
when {
anyOf {
branch 'main'
branch 'main'
branch 'staging/**'
branch 'staging_*'
changeRequest target: 'main'
}
}
Expand All @@ -305,7 +308,9 @@ pipeline {
stage('L0: Create JA ITN Grammars') {
when {
anyOf {
branch 'main'
branch 'main'
branch 'staging/**'
branch 'staging_*'
changeRequest target: 'main'
}
}
Expand All @@ -325,7 +330,9 @@ pipeline {
stage('L1: TN/ITN Tests CPU') {
when {
anyOf {
branch 'main'
branch 'main'
branch 'staging/**'
branch 'staging_*'
changeRequest target: 'main'
}
}
Expand Down Expand Up @@ -409,10 +416,12 @@ pipeline {
}
}

stage('L2: Sparrowhawk Tests') {
stage('L2: EN Sparrowhawk Tests') {
when {
anyOf {
branch 'main'
branch 'main'
branch 'staging/**'
branch 'staging_*'
changeRequest target: 'main'
}
}
Expand Down Expand Up @@ -441,54 +450,56 @@ pipeline {

}
}

stage('L2: NeMo text processing') {
when {
anyOf {
branch 'main'
branch 'main'
branch 'staging/**'
branch 'staging_*'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('L2: Eng TN') {
steps {
sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_norm/output_${TIME} && \
sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkins/TestData/text_norm/output_${TIME} && \
cd tools/text_processing_deployment && python pynini_export.py --output=$NORM_OUTPUT_DIR --grammars=tn_grammars --cache_dir ${EN_TN_CACHE} --language=en && ls -R $NORM_OUTPUT_DIR && echo ".far files created "|| exit 1'
sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_norm/output_${TIME} && mkdir $NORM_OUTPUT_DIR && \
cd nemo_text_processing/text_normalization/ && python normalize.py --input_file=/home/jenkinsci/TestData/text_norm/ci/test.txt --input_case="lower_cased" --language=en --output_file=$NORM_OUTPUT_DIR/test.pynini.txt --verbose && \
sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkins/TestData/text_norm/output_${TIME} && mkdir $NORM_OUTPUT_DIR && \
cd nemo_text_processing/text_normalization/ && python normalize.py --input_file=/home/jenkins/TestData/text_norm/ci/test.txt --input_case="lower_cased" --language=en --output_file=$NORM_OUTPUT_DIR/test.pynini.txt --verbose && \
cat $NORM_OUTPUT_DIR/test.pynini.txt && \
cmp --silent $NORM_OUTPUT_DIR/test.pynini.txt /home/jenkinsci/TestData/text_norm/ci/test_goal_py.txt || exit 1 && \
cmp --silent $NORM_OUTPUT_DIR/test.pynini.txt /home/jenkins/TestData/text_norm/ci/test_goal_py.txt || exit 1 && \
rm -rf $NORM_OUTPUT_DIR'
}
}

stage('L2: Eng ITN export') {
steps {
sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_denorm/output_${TIME} && \
sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkins/TestData/text_denorm/output_${TIME} && \
cd tools/text_processing_deployment && python pynini_export.py --output=$DENORM_OUTPUT_DIR --grammars=itn_grammars --cache_dir ${EN_TN_CACHE} --language=en && ls -R $DENORM_OUTPUT_DIR && echo ".far files created "|| exit 1'
sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_denorm/output_${TIME} && mkdir $DENORM_OUTPUT_DIR && \
cd nemo_text_processing/inverse_text_normalization/ && python inverse_normalize.py --input_file=/home/jenkinsci/TestData/text_denorm/ci/test.txt --language=en --output_file=$DENORM_OUTPUT_DIR/test.pynini.txt --verbose && \
cmp --silent $DENORM_OUTPUT_DIR/test.pynini.txt /home/jenkinsci/TestData/text_denorm/ci/test_goal_py.txt || exit 1 && \
sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkins/TestData/text_denorm/output_${TIME} && mkdir $DENORM_OUTPUT_DIR && \
cd nemo_text_processing/inverse_text_normalization/ && python inverse_normalize.py --input_file=/home/jenkins/TestData/text_denorm/ci/test.txt --language=en --output_file=$DENORM_OUTPUT_DIR/test.pynini.txt --verbose && \
cmp --silent $DENORM_OUTPUT_DIR/test.pynini.txt /home/jenkins/TestData/text_denorm/ci/test_goal_py.txt || exit 1 && \
rm -rf $DENORM_OUTPUT_DIR'
}
}


stage('L2: Eng alignment TN') {
steps {
sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_norm/output_${TIME} && mkdir $NORM_OUTPUT_DIR && \
sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkins/TestData/text_norm/output_${TIME} && mkdir $NORM_OUTPUT_DIR && \
cd nemo_text_processing/fst_alignment && python alignment.py --text="2615 Forest Av, 90501 CA, Santa Clara. 10kg, 12/16/2018" --grammar=tn --rule=tokenize_and_classify --fst=${EN_TN_CACHE}/en_tn_True_deterministic_cased__tokenize.far 2>&1 | tee $NORM_OUTPUT_DIR/pred.txt && \
cmp --silent $NORM_OUTPUT_DIR/pred.txt /home/jenkinsci/TestData/text_norm/ci/alignment_gold.txt || exit 1 && \
cmp --silent $NORM_OUTPUT_DIR/pred.txt /home/jenkins/TestData/text_norm/ci/alignment_gold.txt || exit 1 && \
rm -rf $NORM_OUTPUT_DIR'
}
}

stage('L2: Eng alignment ITN') {
steps {
sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkinsci/TestData/text_denorm/output_${TIME} && mkdir $DENORM_OUTPUT_DIR && \
sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkins/TestData/text_denorm/output_${TIME} && mkdir $DENORM_OUTPUT_DIR && \
cd nemo_text_processing/fst_alignment && python alignment.py --text="one million twenty three thousand two hundred eleven ten kilograms one hundred twenty three dollars and twenty five cents" --grammar=itn --rule=tokenize_and_classify --fst=${EN_TN_CACHE}/en_itn_lower_cased.far 2>&1 | tee $DENORM_OUTPUT_DIR/pred.txt && \
cmp --silent $DENORM_OUTPUT_DIR/pred.txt /home/jenkinsci/TestData/text_denorm/ci/alignment_gold.txt || exit 1 && \
cmp --silent $DENORM_OUTPUT_DIR/pred.txt /home/jenkins/TestData/text_denorm/ci/alignment_gold.txt || exit 1 && \
rm -rf $DENORM_OUTPUT_DIR'
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ def __init__(self, cardinal: GraphFst, ordinal: GraphFst):
graph_year_range = self.year_range
graph_year_range_century = self.year_range + delete_space + self.century

graph_ordinal_century = self.ordinal_century + self.morpho_graph + delete_extra_space + self.century

graph_ordinal_century = self.ordinal_century + self.morpho_graph + delete_extra_space + self.century
graph_date_exceptions = self.month + delete_space + pynutil.delete("की") + delete_space + self.day
graph_date_exceptions += pynutil.insert("preserve_order: true")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ class TelephoneFst(GraphFst):
"""
Finite state transducer for classifying telephone numbers, e.g.
e.g. प्लस इक्यानवे नौ आठ सात छह पांच चार तीन दो एक शून्य => tokens { name: "+९१ ९८७६५ ४३२१०" }

Args:
Cardinal: CardinalFst
"""
Expand Down
6 changes: 3 additions & 3 deletions requirements/requirements_test.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
black==19.10b0
click==8.0.2
isort[requirements]>5.1.0,<6.0.0
black==25.1.0
click>=8.0.2
isort[requirements]>5.1.0,<=6.0.1
parameterized
pynini==2.1.6.post1
pytest
Expand Down