Skip to content

Commit f260f10

Browse files
committed
Updated run_pipeline script to latest pipeline
1 parent cf55653 commit f260f10

File tree

1 file changed

+53
-38
lines changed

1 file changed

+53
-38
lines changed

run_pipeline.py

Lines changed: 53 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,9 @@
1818
import map_sra_to_ontology
1919
from map_sra_to_ontology import ontology_graph
2020
from map_sra_to_ontology import load_ontology
21-
from map_sra_to_ontology import predict_sample_type
2221
from map_sra_to_ontology import config
23-
from map_sra_to_ontology import predict_sample_type
24-
from map_sra_to_ontology import run_sample_type_predictor
22+
import predict_sample_type
23+
from predict_sample_type import run_sample_type_predictor
2524
from predict_sample_type.learn_classifier import *
2625
from map_sra_to_ontology import pipeline_components as pc
2726

@@ -44,7 +43,7 @@ def main():
4443
"EFO":"16",
4544
"CVCL":"4"}
4645
ont_id_to_og = {x:load_ontology.load(x)[0] for x in ont_name_to_ont_id.values()}
47-
pipeline = p_48()
46+
pipeline = p_53()
4847

4948
all_mappings = []
5049
for tag_to_val in tag_to_vals:
@@ -114,36 +113,50 @@ def run_pipeline_on_key_vals(tag_to_val, ont_id_to_og, mapping_data):
114113
# }
115114
# return mappings
116115

117-
def p_48():
118-
spec_lex = pc.SpecialistLexicon(config.specialist_lex_location())
119-
inflec_var = pc.SPECIALISTLexInflectionalVariants(spec_lex)
120-
spell_var = pc.SPECIALISTSpellingVariants(spec_lex)
121-
key_val_filt = pc.KeyValueFilter_Stage()
122-
init_tokens_stage = pc.InitKeyValueTokens_Stage()
123-
ngram = pc.NGram_Stage()
124-
lower_stage = pc.Lowercase_Stage()
125-
man_at_syn = pc.ManuallyAnnotatedSynonyms_Stage()
126-
infer_cell_line = pc.InferCellLineTerms_Stage()
127-
prop_spec_syn = pc.PropertySpecificSynonym_Stage()
128-
infer_dev_stage = pc.ImpliedDevelopmentalStageFromAge_Stage()
129-
linked_super = pc.LinkedTermsOfSuperterms_Stage()
130-
cell_culture = pc.ConsequentCulturedCell_Stage()
131-
filt_match_priority = pc.FilterOntologyMatchesByPriority_Stage()
132-
real_val = pc.ExtractRealValue_Stage()
133-
match_cust_targs = pc.ExactMatchCustomTargets_Stage()
134-
cust_conseq = pc.CustomConsequentTerms_Stage()
135-
delimit_plus = pc.Delimit_Stage('+')
136-
delimit_underscore = pc.Delimit_Stage('_')
137-
delimit_dash = pc.Delimit_Stage('-')
138-
delimit_slash = pc.Delimit_Stage('/')
139-
block_cell_line_key = pc.BlockCellLineNonCellLineKey_Stage()
140-
subphrase_linked = pc.RemoveSubIntervalOfMatchedBlockAncestralLink_Stage()
141-
cellline_to_implied_disease = pc.CellLineToImpliedDisease_Stage()
142-
acr_to_expan = pc.AcronymToExpansion_Stage()
143-
exact_match = pc.ExactStringMatching_Stage(["1", "2", "4", "5", "7", "8", "9"], query_len_thresh=3)
144-
fuzzy_match = pc.FuzzyStringMatching_Stage(0.1, query_len_thresh=3)
145-
two_char_match = pc.TwoCharMappings_Stage()
146-
time_unit = pc.ParseTimeWithUnit_Stage()
116+
117+
def p_53():
118+
spec_lex = SpecialistLexicon(config.specialist_lex_location())
119+
inflec_var = SPECIALISTLexInflectionalVariants(spec_lex)
120+
spell_var = SPECIALISTSpellingVariants(spec_lex)
121+
key_val_filt = KeyValueFilter_Stage()
122+
init_tokens_stage = InitKeyValueTokens_Stage()
123+
ngram = NGram_Stage()
124+
lower_stage = Lowercase_Stage()
125+
man_at_syn = ManuallyAnnotatedSynonyms_Stage()
126+
infer_cell_line = InferCellLineTerms_Stage()
127+
prop_spec_syn = PropertySpecificSynonym_Stage()
128+
infer_dev_stage = ImpliedDevelopmentalStageFromAge_Stage()
129+
linked_super = LinkedTermsOfSuperterms_Stage()
130+
cell_culture = ConsequentCulturedCell_Stage()
131+
filt_match_priority = FilterOntologyMatchesByPriority_Stage()
132+
real_val = ExtractRealValue_Stage()
133+
match_cust_targs = ExactMatchCustomTargets_Stage()
134+
cust_conseq = CustomConsequentTerms_Stage()
135+
delimit_plus = Delimit_Stage('+')
136+
delimit_underscore = Delimit_Stage('_')
137+
delimit_dash = Delimit_Stage('-')
138+
delimit_slash = Delimit_Stage('/')
139+
block_cell_line_key = BlockCellLineNonCellLineKey_Stage()
140+
subphrase_linked = RemoveSubIntervalOfMatchedBlockAncestralLink_Stage()
141+
cellline_to_implied_disease = CellLineToImpliedDisease_Stage()
142+
acr_to_expan = AcronymToExpansion_Stage()
143+
exact_match = ExactStringMatching_Stage(
144+
[
145+
"1",
146+
"2",
147+
"5",
148+
"7",
149+
"8",
150+
"9",
151+
"18" # Cellosaurus restricted to human cell lines
152+
],
153+
query_len_thresh=3
154+
)
155+
fuzzy_match = FuzzyStringMatching_Stage(0.1, query_len_thresh=3)
156+
two_char_match = TwoCharMappings_Stage()
157+
time_unit = ParseTimeWithUnit_Stage()
158+
prioritize_exact = PrioritizeExactMatchOverFuzzyMatch()
159+
artifact_term_combo = TermArtifactCombinations_Stage()
147160

148161
stages = [
149162
key_val_filt,
@@ -158,8 +171,8 @@ def p_48():
158171
spell_var,
159172
man_at_syn,
160173
acr_to_expan,
161-
time_unit,
162174
exact_match,
175+
time_unit,
163176
two_char_match,
164177
prop_spec_syn,
165178
fuzzy_match,
@@ -169,13 +182,15 @@ def p_48():
169182
cellline_to_implied_disease,
170183
subphrase_linked,
171184
cust_conseq,
185+
artifact_term_combo,
172186
real_val,
173187
filt_match_priority,
174188
infer_cell_line,
175189
infer_dev_stage,
176-
cell_culture]
177-
return pc.Pipeline(stages, defaultdict(lambda: 1.0))
178-
190+
cell_culture,
191+
prioritize_exact
192+
]
193+
return Pipeline(stages, defaultdict(lambda: 1.0))
179194

180195
if __name__ == "__main__":
181196
main()

0 commit comments

Comments
 (0)