18
18
import map_sra_to_ontology
19
19
from map_sra_to_ontology import ontology_graph
20
20
from map_sra_to_ontology import load_ontology
21
- from map_sra_to_ontology import predict_sample_type
22
21
from map_sra_to_ontology import config
23
- from map_sra_to_ontology import predict_sample_type
24
- from map_sra_to_ontology import run_sample_type_predictor
22
+ import predict_sample_type
23
+ from predict_sample_type import run_sample_type_predictor
25
24
from predict_sample_type .learn_classifier import *
26
25
from map_sra_to_ontology import pipeline_components as pc
27
26
@@ -44,7 +43,7 @@ def main():
44
43
"EFO" :"16" ,
45
44
"CVCL" :"4" }
46
45
ont_id_to_og = {x :load_ontology .load (x )[0 ] for x in ont_name_to_ont_id .values ()}
47
- pipeline = p_48 ()
46
+ pipeline = p_53 ()
48
47
49
48
all_mappings = []
50
49
for tag_to_val in tag_to_vals :
@@ -114,36 +113,50 @@ def run_pipeline_on_key_vals(tag_to_val, ont_id_to_og, mapping_data):
114
113
# }
115
114
# return mappings
116
115
117
- def p_48 ():
118
- spec_lex = pc .SpecialistLexicon (config .specialist_lex_location ())
119
- inflec_var = pc .SPECIALISTLexInflectionalVariants (spec_lex )
120
- spell_var = pc .SPECIALISTSpellingVariants (spec_lex )
121
- key_val_filt = pc .KeyValueFilter_Stage ()
122
- init_tokens_stage = pc .InitKeyValueTokens_Stage ()
123
- ngram = pc .NGram_Stage ()
124
- lower_stage = pc .Lowercase_Stage ()
125
- man_at_syn = pc .ManuallyAnnotatedSynonyms_Stage ()
126
- infer_cell_line = pc .InferCellLineTerms_Stage ()
127
- prop_spec_syn = pc .PropertySpecificSynonym_Stage ()
128
- infer_dev_stage = pc .ImpliedDevelopmentalStageFromAge_Stage ()
129
- linked_super = pc .LinkedTermsOfSuperterms_Stage ()
130
- cell_culture = pc .ConsequentCulturedCell_Stage ()
131
- filt_match_priority = pc .FilterOntologyMatchesByPriority_Stage ()
132
- real_val = pc .ExtractRealValue_Stage ()
133
- match_cust_targs = pc .ExactMatchCustomTargets_Stage ()
134
- cust_conseq = pc .CustomConsequentTerms_Stage ()
135
- delimit_plus = pc .Delimit_Stage ('+' )
136
- delimit_underscore = pc .Delimit_Stage ('_' )
137
- delimit_dash = pc .Delimit_Stage ('-' )
138
- delimit_slash = pc .Delimit_Stage ('/' )
139
- block_cell_line_key = pc .BlockCellLineNonCellLineKey_Stage ()
140
- subphrase_linked = pc .RemoveSubIntervalOfMatchedBlockAncestralLink_Stage ()
141
- cellline_to_implied_disease = pc .CellLineToImpliedDisease_Stage ()
142
- acr_to_expan = pc .AcronymToExpansion_Stage ()
143
- exact_match = pc .ExactStringMatching_Stage (["1" , "2" , "4" , "5" , "7" , "8" , "9" ], query_len_thresh = 3 )
144
- fuzzy_match = pc .FuzzyStringMatching_Stage (0.1 , query_len_thresh = 3 )
145
- two_char_match = pc .TwoCharMappings_Stage ()
146
- time_unit = pc .ParseTimeWithUnit_Stage ()
116
+
117
+ def p_53 ():
118
+ spec_lex = SpecialistLexicon (config .specialist_lex_location ())
119
+ inflec_var = SPECIALISTLexInflectionalVariants (spec_lex )
120
+ spell_var = SPECIALISTSpellingVariants (spec_lex )
121
+ key_val_filt = KeyValueFilter_Stage ()
122
+ init_tokens_stage = InitKeyValueTokens_Stage ()
123
+ ngram = NGram_Stage ()
124
+ lower_stage = Lowercase_Stage ()
125
+ man_at_syn = ManuallyAnnotatedSynonyms_Stage ()
126
+ infer_cell_line = InferCellLineTerms_Stage ()
127
+ prop_spec_syn = PropertySpecificSynonym_Stage ()
128
+ infer_dev_stage = ImpliedDevelopmentalStageFromAge_Stage ()
129
+ linked_super = LinkedTermsOfSuperterms_Stage ()
130
+ cell_culture = ConsequentCulturedCell_Stage ()
131
+ filt_match_priority = FilterOntologyMatchesByPriority_Stage ()
132
+ real_val = ExtractRealValue_Stage ()
133
+ match_cust_targs = ExactMatchCustomTargets_Stage ()
134
+ cust_conseq = CustomConsequentTerms_Stage ()
135
+ delimit_plus = Delimit_Stage ('+' )
136
+ delimit_underscore = Delimit_Stage ('_' )
137
+ delimit_dash = Delimit_Stage ('-' )
138
+ delimit_slash = Delimit_Stage ('/' )
139
+ block_cell_line_key = BlockCellLineNonCellLineKey_Stage ()
140
+ subphrase_linked = RemoveSubIntervalOfMatchedBlockAncestralLink_Stage ()
141
+ cellline_to_implied_disease = CellLineToImpliedDisease_Stage ()
142
+ acr_to_expan = AcronymToExpansion_Stage ()
143
+ exact_match = ExactStringMatching_Stage (
144
+ [
145
+ "1" ,
146
+ "2" ,
147
+ "5" ,
148
+ "7" ,
149
+ "8" ,
150
+ "9" ,
151
+ "18" # Cellosaurus restricted to human cell lines
152
+ ],
153
+ query_len_thresh = 3
154
+ )
155
+ fuzzy_match = FuzzyStringMatching_Stage (0.1 , query_len_thresh = 3 )
156
+ two_char_match = TwoCharMappings_Stage ()
157
+ time_unit = ParseTimeWithUnit_Stage ()
158
+ prioritize_exact = PrioritizeExactMatchOverFuzzyMatch ()
159
+ artifact_term_combo = TermArtifactCombinations_Stage ()
147
160
148
161
stages = [
149
162
key_val_filt ,
@@ -158,8 +171,8 @@ def p_48():
158
171
spell_var ,
159
172
man_at_syn ,
160
173
acr_to_expan ,
161
- time_unit ,
162
174
exact_match ,
175
+ time_unit ,
163
176
two_char_match ,
164
177
prop_spec_syn ,
165
178
fuzzy_match ,
@@ -169,13 +182,15 @@ def p_48():
169
182
cellline_to_implied_disease ,
170
183
subphrase_linked ,
171
184
cust_conseq ,
185
+ artifact_term_combo ,
172
186
real_val ,
173
187
filt_match_priority ,
174
188
infer_cell_line ,
175
189
infer_dev_stage ,
176
- cell_culture ]
177
- return pc .Pipeline (stages , defaultdict (lambda : 1.0 ))
178
-
190
+ cell_culture ,
191
+ prioritize_exact
192
+ ]
193
+ return Pipeline (stages , defaultdict (lambda : 1.0 ))
179
194
180
195
if __name__ == "__main__" :
181
196
main ()
0 commit comments