-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnlp_diagram.py
77 lines (59 loc) · 2.2 KB
/
nlp_diagram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from diagrams import Cluster, Diagram, Edge
from diagrams.aws.compute import ECS
from diagrams.aws.database import RDS
from diagrams.aws.network import Route53
with Diagram("Natural Language Processing Overview", show=False, direction='TB') as d:
data = ECS('Data')
with Cluster('Preprocessing'):
remove_punct = ECS('Remove punctuation')
tolken = ECS('Tolkenization')
stopwords = ECS('Remove Stopwords')
remove_punct >> tolken >> stopwords
stem = ECS('Stemming')
lemma = ECS('Lemmatizing')
stopwords >> stem
stopwords >> lemma
clean_data = ECS('Clean Data')
stem >> clean_data
lemma >> clean_data
with Cluster('Feature Engineering'):
with Cluster('Vectorize'):
vectorize = [ECS('Count Vectorization'),ECS('N-Grams'),ECS('TF-IDF')]
transformation = ECS('Box-Cox Power Transformation')
standardizing_data = ECS('Standardizing data')
feat_creation = ECS('Feature Creation')
clean_data >> vectorize
clean_data >> transformation
clean_data >> standardizing_data
clean_data >> feat_creation
select_model = ECS('Select Model')
vectorize >> select_model
clean_data >> select_model
clean_data >> select_model
clean_data >> select_model
with Cluster('Models'):
ml = [ECS('Random Forest'),
ECS('Gradient Boosting'),
ECS('RNNs')]
with Cluster('Metrics'):
model_evaluation = [ECS('Accuracy'),ECS('Recall'),ECS('Precision')]
with Cluster('Machine Learning'):
ml_model = ECS('ML Model')
cross_validation = ECS('Cross Validation')
metrics = ECS('Metrics')
select_model >> ml_model
ml_model >> cross_validation
cross_validation >> metrics
metrics >> ml_model
ml_model >> ml
metrics >> model_evaluation
final_model = ECS('Final Model')
ml_model >> final_model
with Cluster('Applications'):
applications = [ECS('Sentiment Analysis'),
ECS('Topic Modeling'),
ECS('Text Classification'),
ECS('Sentence Segmentation')]
final_model >> applications
data >> remove_punct
d