-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathMakefile
120 lines (85 loc) · 3.08 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# ----------------------------------
# INSTALL & TEST
# ----------------------------------
install_requirements:
@pip install -r requirements.txt
check_code:
@flake8 scripts/* NLPmoviereviews/*.py
black:
@black scripts/* NLPmoviereviews/*.py
test:
@coverage run -m pytest tests/*.py
@coverage report -m --omit="${VIRTUAL_ENV}/lib/python*"
ftest:
@Write me
clean:
@rm -f */version.txt
@rm -f .coverage
@rm -fr */__pycache__ */*.pyc __pycache__
@rm -fr build dist
@rm -fr NLPmoviereviews-*.dist-info
@rm -fr NLPmoviereviews.egg-info
install:
@pip install . -U
all: clean install test black check_code
count_lines:
@find ./ -name '*.py' -exec wc -l {} \; | sort -n| awk \
'{printf "%4s %s\n", $$1, $$2}{s+=$$0}END{print s}'
@echo ''
@find ./scripts -name '*-*' -exec wc -l {} \; | sort -n| awk \
'{printf "%4s %s\n", $$1, $$2}{s+=$$0}END{print s}'
@echo ''
@find ./tests -name '*.py' -exec wc -l {} \; | sort -n| awk \
'{printf "%4s %s\n", $$1, $$2}{s+=$$0}END{print s}'
@echo ''
# ----------------------------------
# GOOGLE CLOUD
# ----------------------------------
# project id - replace with your GCP project id
PROJECT_ID=
# bucket name - replace with your GCP bucket name
BUCKET_NAME=
# choose your region from https://cloud.google.com/storage/docs/locations#available_locations
REGION=eu
set_project:
@gcloud config set project ${PROJECT_ID}
create_bucket:
@gsutil mb -l ${REGION} -p ${PROJECT_ID} gs://${BUCKET_NAME}
# replace with your local path to the `train_1k.csv` and make sure to put the path between quotes
LOCAL_PATH=" "
# bucket directory in which to store the uploaded file (`data` is an arbitrary name that we choose to use)
BUCKET_FOLDER=datasets
# name for the uploaded file inside of the bucket (we choose not to rename the file that we upload)
BUCKET_FILE_NAME=$(shell basename ${LOCAL_PATH})
upload_data:
# @gsutil cp train_1k.csv gs://wagon-ml-my-bucket-name/data/train_1k.csv
@gsutil cp ${LOCAL_PATH} gs://${BUCKET_NAME}/${BUCKET_FOLDER}/${BUCKET_FILE_NAME}
##### Training - - - - - - - - - - - - - - - - - - - - - -
# will store the packages uploaded to GCP for the training
BUCKET_TRAINING_FOLDER = 'trainings'
##### Model - - - - - - - - - - - - - - - - - - - - - - - -
# not required here
### GCP AI Platform - - - - - - - - - - - - - - - - - - - -
##### Machine configuration - - - - - - - - - - - - - - - -
REGION=eu
PYTHON_VERSION=3.8
FRAMEWORK=scikit-learn
RUNTIME_VERSION=1.15
##### Package params - - - - - - - - - - - - - - - - - - -
PACKAGE_NAME=NLPmoviereviews
FILENAME=main
##### Job - - - - - - - - - - - - - - - - - - - - - - - - -
JOB_NAME=nlpreview_training_$(shell date +'%Y%m%d_%H%M%S')
run_locally:
@python -m ${PACKAGE_NAME}.${FILENAME}
gcp_submit_training:
gcloud ai-platform jobs submit training ${JOB_NAME} \
--job-dir gs://${BUCKET_NAME}/${BUCKET_TRAINING_FOLDER} \
--package-path ${PACKAGE_NAME} \
--module-name ${PACKAGE_NAME}.${FILENAME} \
--python-version=${PYTHON_VERSION} \
--runtime-version=${RUNTIME_VERSION} \
--region ${REGION} \
--stream-logs
streamlit:
@streamlit run app.py