-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
389 lines (328 loc) · 17.4 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
include base_images.variable
SHELL := /bin/bash
CONDA_ENV_NAME=gnn
PYTHON_VERSION=3.9
DATE:=$(shell /bin/date "+%Y%m%d-%H%M")
PROJECT:=external-snap-ci-github-gigl
DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME:=gcr.io/${PROJECT}/gbml_dataflow_runtime
DOCKER_IMAGE_MAIN_CUDA_NAME:=gcr.io/${PROJECT}/gbml_cuda
DOCKER_IMAGE_MAIN_CPU_NAME:=gcr.io/${PROJECT}/gbml_cpu
DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG:=${DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME}:${DATE}
DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG:=${DOCKER_IMAGE_MAIN_CUDA_NAME}:${DATE}
DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG:=${DOCKER_IMAGE_MAIN_CPU_NAME}:${DATE}
PY_TEST_FILES?="*_test.py"
get_ver_hash:
# Fetches the git commit hash and stores it in `$GIT_COMMIT`
git diff --quiet || { echo Branch is dirty, please commit changes and ensure branch is clean; exit 1; }
$(eval GIT_COMMIT=$(shell git log -1 --pretty=format:"%H"))
initialize_environment:
conda create -y -c conda-forge --name ${CONDA_ENV_NAME} python=${PYTHON_VERSION} pip-tools
@echo "If conda environment was successfully installed, ensure to activate it and run \`make install_dev_deps\` or \`make install_deps\` to complete setup"
clean_environment:
if [ "${CONDA_DEFAULT_ENV}" == "${CONDA_ENV_NAME}" ]; then \
pip uninstall -y -r <(pip freeze); \
else \
echo Change your local env to dev first.; \
fi
reset_environment: generate_cpu_hashed_requirements clean_environment install_deps
rebuild_dev_environment:
conda deactivate
conda remove --name ${CONDA_ENV_NAME} --all -y
make initialize_environment
conda activate ${CONDA_ENV_NAME}
make install_dev_deps
check_if_valid_env:
@command -v docker >/dev/null 2>&1 || { echo >&2 "docker is required but it's not installed. Aborting."; exit 1; }
@command -v gsutil >/dev/null 2>&1 || { echo >&2 "gsutil is required but it's not installed. Aborting."; exit 1; }
@python --version | grep -q "Python ${PYTHON_VERSION}" || (echo "Python version is not 3.9" && exit 1)
# if developing, you need to install dev deps instead
install_dev_deps: check_if_valid_env
@# Install docker driver that will allow us to build multi-arch images
bash ./requirements/install_py_deps.sh --dev
bash ./requirements/install_scala_deps.sh
pip install -e ./python/
pre-commit install --hook-type pre-commit --hook-type pre-push
# Production environments, if you are developing use `make install_dev_deps` instead
install_deps:
bash ./requirements/install_py_deps.sh
bash ./requirements/install_scala_deps.sh
pip install -e ./python/
# Can only be run on an arm64 mac, otherwise generated hashed req file will be wrong
generate_mac_arm64_cpu_hashed_requirements:
pip-compile -v --allow-unsafe --generate-hashes --no-emit-index-url --resolver=backtracking \
--output-file=requirements/darwin_arm64_requirements_unified.txt \
--extra torch23-cpu --extra transform \
./python/pyproject.toml
# Can only be run on an arm64 mac, otherwise generated hashed req file will be wrong
generate_dev_mac_arm64_cpu_hashed_requirements:
pip-compile -v --allow-unsafe --generate-hashes --no-emit-index-url --resolver=backtracking \
--output-file=requirements/dev_darwin_arm64_requirements_unified.txt \
--extra torch23-cpu --extra transform --extra dev \
./python/pyproject.toml
# Can only be run on linux, otherwise generated hashed req file will be wrong
generate_linux_cpu_hashed_requirements:
pip-compile -v --allow-unsafe --generate-hashes --no-emit-index-url --resolver=backtracking \
--output-file=requirements/linux_cpu_requirements_unified.txt \
--extra torch23-cpu --extra transform \
./python/pyproject.toml
# Can only be run on linux, otherwise generated hashed req file will be wrong
generate_dev_linux_cpu_hashed_requirements:
pip-compile -v --allow-unsafe --generate-hashes --no-emit-index-url --resolver=backtracking \
--output-file=requirements/dev_linux_cpu_requirements_unified.txt \
--extra torch23-cpu --extra transform --extra dev \
./python/pyproject.toml
# Can only be run on linux, otherwise generated hashed req file will be wrong
generate_linux_cuda_hashed_requirements:
pip-compile -v --allow-unsafe --generate-hashes --no-emit-index-url --resolver=backtracking \
--output-file=requirements/linux_cuda_requirements_unified.txt \
--extra torch23-cuda-121 --extra transform \
./python/pyproject.toml
# Can only be run on linux, otherwise generated hashed req file will be wrong
generate_dev_linux_cuda_hashed_requirements:
pip-compile -v --allow-unsafe --generate-hashes --no-emit-index-url --resolver=backtracking \
--output-file=requirements/dev_linux_cuda_requirements_unified.txt \
--extra torch23-cuda-121 --extra transform --extra dev \
./python/pyproject.toml
# These are a collection of tests that are run before anything is installed using tools abialable on host.
# May include tests that check the sanity of the repo state i.e. ones that may even cause the failure of
# installation scripts
precondition_tests:
python shared/tests/requirements_check.py
assert_yaml_configs_parse:
python scripts/assert_yaml_configs_parse.py -d .
# TODO: (Open Source) Integration and unit tests currently run with project specific information. Before open sourcing we should swap out this resource config to point to all the public assets (gigl-public).
# Set PY_TEST_FILES=<TEST_FILE_NAME_GLOB> to test a specifc file.
# Ex. `make unit_test_py PY_TEST_FILES="eval_metrics_test.py"`
# By default, runs all tests under python/testing/unit.
# See the help text for "--test_file_pattern" in python/tests/test_args.py for more details.
unit_test_py: clean_build_files_py type_check
( cd python ; \
python -m tests.unit.main \
--env=test \
--resource_config_uri deployment/configs/unittest_resource_config.yaml \
--test_file_pattern=$(PY_TEST_FILES) \
)
unit_test_scala: clean_build_files_scala
( cd scala; sbt test )
( cd scala_spark35 ; sbt test )
# Runs unit tests for Python and Scala
# Asserts Python and Scala files are formatted correctly.
# Asserts YAML configs can be parsed.
# TODO(kmonte): We shouldn't be making assertions about format in unit_test, but we do so that
# we don't need to setup the dev environment twice in jenkins.
# Eventually, we should look into splitting these up.
# We run `make check_format` separately instead of as a dependent make rule so that it always runs after the actual testing.
# We don't want to fail the tests due to non-conformant formatting during development.
unit_test: unit_test_py unit_test_scala assert_yaml_configs_parse
make check_format
check_format_py:
autoflake --check --config python/pyproject.toml python scripts examples
isort --check-only --settings-path=python/pyproject.toml python scripts examples
black --check --config=python/pyproject.toml python scripts examples
check_format_scala:
( cd scala; sbt "scalafmtCheckAll; scalafixAll --check"; )
( cd scala_spark35; sbt "scalafmtCheckAll; scalafixAll --check"; )
check_format: check_format_py check_format_scala
# Set PY_TEST_FILES=<TEST_FILE_NAME_GLOB> to test a specifc file.
# Ex. `make integration_test PY_TEST_FILES="dataflow_test.py"`
# By default, runs all tests under python/testing/integration.
# See the help text for "--test_file_pattern" in python/tests/test_args.py for more details.
integration_test:
( \
cd python ;\
python -m tests.integration.main \
--env=test \
--resource_config_uri deployment/configs/unittest_resource_config.yaml \
--test_file_pattern=$(PY_TEST_FILES) \
)
mock_assets:
( cd python ; python -m gigl.src.mocking.dataset_asset_mocking_suite --resource_config_uri="../do_not_open_source/resource_configs/dev_default_resource_config.yaml" --env test)
format_py:
autoflake --config python/pyproject.toml python scripts
isort --settings-path=python/pyproject.toml python scripts
black --config=python/pyproject.toml python scripts
format_scala:
# We run "clean" before the formatting because otherwise some "scalafix.sbt.ScalafixFailed: NoFilesError" may get thrown after switching branches...
# TODO(kmonte): Once open sourced, follow up with scalafix people on this.
( cd scala; sbt clean scalafixAll scalafmtAll )
( cd scala_spark35; sbt clean scalafixAll scalafmtAll )
format: format_py format_scala
type_check:
mypy python scripts examples --check-untyped-defs
# compiles current working state of scala projects to local jars
compile_jars:
@echo "Compiling jars..."
@python -m scripts.scala_packager
# Removes local jar files from python/deps directory
remove_jars:
@echo "Removing jars..."
rm -rf python/deps/scala/subgraph_sampler/jars/*
push_cpu_docker_image:
@python -m scripts.build_and_push_docker_image --predefined_type cpu --image_name ${DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG}
push_cuda_docker_image:
@python -m scripts.build_and_push_docker_image --predefined_type cuda --image_name ${DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG}
push_dataflow_docker_image:
@python -m scripts.build_and_push_docker_image --predefined_type dataflow --image_name ${DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG}
push_new_docker_images: push_cuda_docker_image push_cpu_docker_image push_dataflow_docker_image
# Dockerize the src code and push it to gcr.
# You will need to update the base image tag below whenever the requirements are updated by:
# 1) running `make push_new_docker_base_image`
# 2) Replace the git hash `DOCKER_LATEST_BASE_IMAGE_TAG` that tags the base image with the new generated tag
# Note: don't forget to `make generate_cpu_hashed_requirements` and `make generate_cuda_hashed_requirements`
# before running this if you've updated requirements.in
# You may be able to utilize git comment `/make_cuda_hashed_req` to help you build the cuda hashed req as well
# See ci.yaml or type in `/help` in your PR for more info.
@echo "All Docker images compiled and pushed"
# MARKED FOR REFACTOR - OPEN SOURCE
# Compile and run an instance of pipelines
# Example:
# make \
job_name="{alias}_run_dev_gnn_kubeflow_pipeline" \
start_at="inferencer" \
task_config_uri="gs://TEMP DEV GBML PLACEHOLDER/e2e_DIST_cora_anchor_link_pred_1/config_populator/frozen_gbml_config.yaml" \
resource_config_uri="internal/resource_configs/platform/dev_mega_resource_config.yaml" \
run_dev_gnn_kubeflow_pipeline
run_dev_gnn_kubeflow_pipeline: compile_jars push_new_docker_images
python -m do_not_open_source.deployment.gnn \
--container_image_cuda=${DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG} \
--container_image_cpu=${DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG} \
--container_image_dataflow=${DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG} \
--kfp_env=dev \
--action=run \
--job_name=$(job_name) \
--start_at=$(start_at) \
$(if $(stop_after),--stop_after=$(stop_after)) \
--task_config_uri=$(task_config_uri) \
--resource_config_uri=$(resource_config_uri) \
# MARKED FOR DEPRECATION - OPEN SOURCE
# Generic make target to run e2e tests. Used by other make targets to run e2e tests.
# See usage w/ run_cora_nalp_e2e_kfp_test, run_cora_snc_e2e_kfp_test, run_cora_udl_e2e_kfp_test
# and run_all_e2e_tests
_run_e2e_kfp_test: compile_jars push_new_docker_images
$(eval BRANCH:=$(shell git rev-parse --abbrev-ref HEAD))
$(eval TRIMMED_BRANCH:=$(shell echo "${BRANCH}" | tr '/' '_' | cut -c 1-20 | tr '[:upper:]' '[:lower:]'))
$(eval TRIMMED_TIME:=$(shell date +%s | tail -c 6))
@should_wait_for_job_to_finish=false
@( \
set -e; \
read -a task_config_uris <<< "$(task_config_uris_str)"; \
read -a resource_config_uris <<< "$(resource_config_uris_str)"; \
read -a job_name_prefixes_str <<< "$(job_name_prefixes_str)"; \
if [ $${#task_config_uris[@]} -ne $${#resource_config_uris[@]} ] || [ $${#task_config_uris[@]} -ne $${#job_name_prefixes_str[@]} ]; then \
echo "Error: Arrays are not of the same length"; \
echo " task_config_uris = $${task_config_uris[@]}"; \
echo " resource_config_uris = $${resource_config_uris[@]}";\
echo " job_name_prefixes_str = $${job_name_prefixes_str[@]}"; \
exit 1; \
fi; \
for i in $${!task_config_uris[@]}; do \
job_name="$${job_name_prefixes_str[$$i]}_${TRIMMED_BRANCH}_${TRIMMED_TIME}"; \
CMD="python -m do_not_open_source.deployment.gnn \
--container_image_cuda=${DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG} \
--container_image_cpu=${DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG} \
--container_image_dataflow=${DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG} \
--action=run \
--kfp_env=dev \
$(if $(filter ${should_wait_for_job_to_finish},true),--wait,) \
--job_name='$${job_name}' \
--start_at='config_populator' \
--task_config_uri='$${task_config_uris[$$i]}' \
--resource_config_uri='$${resource_config_uris[$$i]}'"; \
echo "Running: $$CMD"; \
if [ "$(should_send_job_to_background)" == true ]; then \
echo "Will run CMD in background..."; \
eval "$${CMD} &"; \
pids+=($$!); \
else \
eval "$${CMD}"; \
fi; \
done; \
if [ "$(should_send_job_to_background)" == true ]; then \
echo "Waiting for background jobs to finish..."; \
for pid in "$${pids[@]}"; do \
wait "$$pid"; \
done; \
echo "All background jobs finished"; \
fi; \
)
run_cora_nalp_e2e_kfp_test: job_name_prefixes_str:="cora_nalp_test_on"
run_cora_nalp_e2e_kfp_test: task_config_uris_str:="gigl/src/mocking/configs/e2e_node_anchor_based_link_prediction_template_gbml_config.yaml"
run_cora_nalp_e2e_kfp_test: resource_config_uris_str:="deployment/configs/e2e_cicd_resource_config.yaml"
run_cora_nalp_e2e_kfp_test: _run_e2e_kfp_test
run_cora_snc_e2e_kfp_test: job_name_prefixes_str:="cora_snc_test_on"
run_cora_snc_e2e_kfp_test: task_config_uris_str:="gigl/src/mocking/configs/e2e_supervised_node_classification_template_gbml_config.yaml"
run_cora_snc_e2e_kfp_test: resource_config_uris_str:="deployment/configs/e2e_cicd_resource_config.yaml"
run_cora_snc_e2e_kfp_test: _run_e2e_kfp_test
# Note UDL dataset produces a transient issue due to UDL Split Strategy
# where in some cases the root node doesn't properly get added back to
# the returned subgraph. Meaning, trainer will fail.
run_cora_udl_e2e_kfp_test: job_name_prefixes_str:="cora_udl_test_on"
run_cora_udl_e2e_kfp_test: task_config_uris_str:="gigl/src/mocking/configs/e2e_udl_node_anchor_based_link_prediction_template_gbml_config.yaml"
run_cora_udl_e2e_kfp_test: resource_config_uris_str:="deployment/configs/e2e_cicd_resource_config.yaml"
run_cora_udl_e2e_kfp_test: _run_e2e_kfp_test
run_dblp_nalp_e2e_kfp_test: job_name_prefixes_str:="dblp_nalp_test_on"
run_dblp_nalp_e2e_kfp_test: task_config_uris_str:="gigl/src/mocking/configs/dblp_node_anchor_based_link_prediction_template_gbml_config.yaml"
run_dblp_nalp_e2e_kfp_test: resource_config_uris_str:="deployment/configs/e2e_cicd_resource_config.yaml"
run_dblp_nalp_e2e_kfp_test: _run_e2e_kfp_test
# Spawns a background job for each e2e test defined by job_name_prefix, task_config_uri, and resource_config_uri
# Waits for all jobs to finish since should_wait_for_job_to_finish:=true
run_all_e2e_tests: should_send_job_to_background:=true
run_all_e2e_tests: should_wait_for_job_to_finish:=true
run_all_e2e_tests: job_name_prefixes_str:=\
"cora_nalp_test_on" \
"cora_snc_test_on" \
"dblp_nalp_test_on"
# Removed UDL due to transient issue:
# "gigl/src/mocking/configs/e2e_udl_node_anchor_based_link_prediction_template_gbml_config.yaml"
run_all_e2e_tests: task_config_uris_str:=\
"gigl/src/mocking/configs/e2e_node_anchor_based_link_prediction_template_gbml_config.yaml" \
"gigl/src/mocking/configs/e2e_supervised_node_classification_template_gbml_config.yaml" \
"gigl/src/mocking/configs/dblp_node_anchor_based_link_prediction_template_gbml_config.yaml"
run_all_e2e_tests: resource_config_uris_str:=\
"deployment/configs/e2e_cicd_resource_config.yaml"\
"deployment/configs/e2e_cicd_resource_config.yaml"\
"deployment/configs/e2e_cicd_resource_config.yaml"
run_all_e2e_tests: _run_e2e_kfp_test
# MARKED FOR REFACTOR - OPEN SOURCE
# Compile instance of kfp pipeline
compile_gigl_kubeflow_pipeline: compile_jars push_new_docker_images
python -m do_not_open_source.deployment.gnn \
--action=compile \
--container_image_cuda=${DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG} \
--container_image_cpu=${DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG} \
--container_image_dataflow=${DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG} \
clean_build_files_py:
find . -name "*.pyc" -exec rm -f {} \;
clean_build_files_scala:
( cd scala; sbt clean; find . -type d -name "target" -prune -exec rm -rf {} \; )
( cd scala_spark35; sbt clean; find . -type d -name "target" -prune -exec rm -rf {} \; )
clean_build_files: clean_build_files_py clean_build_files_scala
# Call to generate new proto definitions if any of the .proto files have been changed.
# We intentionally rebuild *all* protos with one commmand as they should all be in sync.
# Run `make install_dev_deps` to setup the correct protoc versions.
compile_protos:
tools/python_protoc/bin/protoc \
--proto_path=proto \
--python_out=./python \
--mypy_out=./python \
proto/snapchat/research/gbml/*.proto
tools/scalapbc/scalapbc-0.11.11/bin/scalapbc \
--proto_path=proto \
--scala_out=scala/common/src/main/scala \
proto/snapchat/research/gbml/*.proto
tools/scalapbc/scalapbc-0.11.14/bin/scalapbc \
--proto_path=proto \
--scala_out=scala_spark35/common/src/main/scala \
proto/snapchat/research/gbml/*.proto
spark_run_local_test:
tools/scala/spark-3.1.3-bin-hadoop3.2/bin/spark-submit \
--class org.apache.spark.examples.SparkPi \
--master local[8] \
tools/scala/spark-3.1.3-bin-hadoop3.2/examples/jars/spark-examples_2.12-3.1.3.jar \
100
stop_toaster:
# Stop all existing running docker containers, if no containers to stop continue
docker stop $(shell docker ps -a -q) || true
# Deletes everything associated with all stopped containers including dangling resources
docker system prune -a --volumes
docker buildx prune