diff --git a/.gitignore b/.gitignore index 99dbb2e..0a451c2 100644 --- a/.gitignore +++ b/.gitignore @@ -113,3 +113,4 @@ venv.bak/ # SQL files .sql +__pycache__/ diff --git a/README.md b/README.md index 3351be9..4ef4171 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,52 @@ Data backup and recovery service for the CALYPR systems 🔄 +## Deployment + +This service is deployed using [Helm charts](https://github.com/ohsu-comp-bio/helm-charts/tree/main/charts/backups). The helm chart provides: + +- Automated backup scheduling via CronJob +- Secret management integration with existing PostgreSQL secrets +- Configurable S3 storage backends +- Simple deployment with `helm install` + +For manual job execution: +```bash +# Create a backup job +kubectl create job backup-job --from=cronjob/backup-service-cronjob --namespace backups + +# Create a restore job (set OPERATION=restore environment variable) +kubectl create job restore-job --from=cronjob/backup-service-cronjob --namespace backups +# Note: You'll need to patch the job to set OPERATION=restore environment variable +``` + +## Configuration + +The service can be configured through environment variables: + +- **OPERATION**: `backup` (default) or `restore` - determines the operation mode +- **RESTORE_DIR**: Directory path for restore operations (defaults to timestamped directory) +- **PGPASSWORD**: Can be sourced from existing `local-postgresql` secret (base64 encoded) +- **GRIP_GRAPH**: Graph name (should be configurable via helm global config) +- **GRIP_LIMIT**: Query limit (should be removed for production use) + +The helm chart automatically handles secret management and configuration from global helm values. + +## Best Practices + +### Namespace Configuration +While the helm chart defaults to a separate `backups` namespace, consider deploying in the same namespace as your databases to simplify network access and secret sharing, as the backup service needs direct access to database resources. + +### Secret Management +- **No separate secrets needed**: PGPASSWORD can be extracted from existing `local-postgresql` secret (base64 encoded) +- **Helm integration**: All configuration should be managed through helm values files +- **S3 credentials**: Configure S3 bucket and credentials through helm secrets file + +### Performance and Storage +- **Remove query limits**: Production deployments should remove GRIP_LIMIT for complete backups +- **Backup retention**: Implement a retention policy (e.g., keep daily backups for 30 days, weekly for 3+ months) +- **Global configuration**: Use helm global config for shared values like graph names instead of hardcoding + # 2. Quick Start ⚡ ```sh diff --git a/entrypoint.sh b/entrypoint.sh old mode 100644 new mode 100755 index bf53b42..8687037 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -3,28 +3,69 @@ set -e TIMESTAMP=$(date +"%Y-%m-%dT%H:%M:%S") +# Default operation is backup, but can be overridden with OPERATION env var +OPERATION=${OPERATION:-backup} + export DIR="${DIR}/${TIMESTAMP}" -# Postgres Dump -bak --debug pg dump \ - --dir "${DIR}" \ - --host "${PGHOST}" \ - --user "${PGUSER}" \ - --password "${PGPASSWORD}" - -# GRIP Backup -bak --debug grip backup \ - --dir "${DIR}" \ - --host "${GRIP_HOST}" \ - --graph "${GRIP_GRAPH}" \ - --limit "${GRIP_LIMIT}" \ - --vertex \ - --edge - -# S3 Upload -bak --debug s3 upload \ - --dir "${DIR}" \ - --endpoint "${ENDPOINT}" \ - --bucket "${BUCKET}" \ - --key "${KEY}" \ - --secret "${SECRET}" +if [ "$OPERATION" = "backup" ]; then + echo "Starting backup operation..." + + # Postgres Dump + bak --debug pg dump \ + --dir "${DIR}" \ + --host "${PGHOST}" \ + --user "${PGUSER}" \ + --password "${PGPASSWORD}" + + # GRIP Backup + bak --debug grip backup \ + --dir "${DIR}" \ + --host "${GRIP_HOST}" \ + --graph "${GRIP_GRAPH}" \ + --limit "${GRIP_LIMIT}" \ + --vertex \ + --edge + + # S3 Upload + bak --debug s3 upload \ + --dir "${DIR}" \ + --endpoint "${ENDPOINT}" \ + --bucket "${BUCKET}" \ + --key "${KEY}" \ + --secret "${SECRET}" + + echo "Backup operation completed successfully" + +elif [ "$OPERATION" = "restore" ]; then + echo "Starting restore operation..." + + # S3 Download - restore from specified backup directory or latest + RESTORE_DIR=${RESTORE_DIR:-"${DIR}"} + + bak --debug s3 download \ + --dir "${RESTORE_DIR}" \ + --endpoint "${ENDPOINT}" \ + --bucket "${BUCKET}" \ + --key "${KEY}" \ + --secret "${SECRET}" + + # Postgres Restore + bak --debug pg restore \ + --dir "${RESTORE_DIR}" \ + --host "${PGHOST}" \ + --user "${PGUSER}" \ + --password "${PGPASSWORD}" + + # GRIP Restore + bak --debug grip restore \ + --dir "${RESTORE_DIR}" \ + --host "${GRIP_HOST}" \ + --graph "${GRIP_GRAPH}" + + echo "Restore operation completed successfully" + +else + echo "Error: Unknown operation '${OPERATION}'. Valid operations are 'backup' or 'restore'" + exit 1 +fi diff --git a/src/backup/grip/__init__.py b/src/backup/grip/__init__.py index d14bbaa..21df879 100644 --- a/src/backup/grip/__init__.py +++ b/src/backup/grip/__init__.py @@ -45,7 +45,7 @@ def _getEdges(grip: GripConfig, graph: str, limit: int) -> list[str]: G = c.graph(graph) - for i in G.query().E().limit(limit): + for i in G.E().limit(limit): edges.append(i) return edges @@ -62,7 +62,7 @@ def _getVertices(grip: GripConfig, graph: str, limit: int) -> list[str]: G = c.graph(graph) - for i in G.query().V().limit(limit): + for i in G.V().limit(limit): vertices.append(i) return vertices @@ -91,12 +91,12 @@ def _dump(grip: GripConfig, graph: str, limit: int, vertex: bool, edge: bool, ou # write vertex and edge objects from grip DB to file if vertex: with open(out / f"{graph}.vertices", "wb") as f: - for i in G.query().V().limit(limit): + for i in G.V().limit(limit): f.write(orjson.dumps(i, option=orjson.OPT_APPEND_NEWLINE)) if edge: with open(out / f"{graph}.edges", "wb") as f: - for i in G.query().E().limit(limit): + for i in G.E().limit(limit): f.write(orjson.dumps(i, option=orjson.OPT_APPEND_NEWLINE)) # TODO: At this point you will need to reconnect to the new grip instance to load the data that was dumped @@ -107,34 +107,41 @@ def _restore(grip: GripConfig, graph: str, dir: Path): conn = _connect(grip) G = conn.graph(graph) - bulkV = G.bulkAdd() - with open("grip.vertices", "rb") as f: - count = 0 - for i in f: - data = orjson.loads(i) - _id = data["_id"] - _label = data["_label"] - del data["_id"], data["_label"] - bulkV.addVertex(_id, _label, data) - count += 1 - if count % 10000 == 0: - print("loaded %d vertices" % count) - err = bulkV.execute() - print("Vertices load res: ", str(err)) - - bulkE = G.bulkAdd() - with open("grip.edges", "rb") as f: - count = 0 - for i in f: - data = orjson.loads(i) - _id = data["_id"] - _label = data["_label"] - _to = data["_to"] - _from = data["_from"] - del data["_id"], data["_label"], data["_to"], data["_from"] - bulkE.addEdge(_to, _from, _label, data=data, gid=_id) - count += 1 - if count % 10000 == 0: - print("loaded %d edges" % count) - err = bulkE.execute() - print("Edges load res: ", str(err)) + vertex_file = dir / f"{graph}.vertices" + edge_file = dir / f"{graph}.edges" + + # Load vertices if file exists + if vertex_file.exists(): + bulkV = G.bulkAdd() + with open(vertex_file, "rb") as f: + count = 0 + for i in f: + data = orjson.loads(i) + _id = data["_id"] + _label = data["_label"] + del data["_id"], data["_label"] + bulkV.addVertex(_id, _label, data) + count += 1 + if count % 10000 == 0: + print("loaded %d vertices" % count) + err = bulkV.execute() + print("Vertices load res: ", str(err)) + + # Load edges if file exists + if edge_file.exists(): + bulkE = G.bulkAdd() + with open(edge_file, "rb") as f: + count = 0 + for i in f: + data = orjson.loads(i) + _id = data["_id"] + _label = data["_label"] + _to = data["_to"] + _from = data["_from"] + del data["_id"], data["_label"], data["_to"], data["_from"] + bulkE.addEdge(_to, _from, _label, data=data, gid=_id) + count += 1 + if count % 10000 == 0: + print("loaded %d edges" % count) + err = bulkE.execute() + print("Edges load res: ", str(err)) diff --git a/src/backup/postgres/__pycache__/__init__.cpython-312.pyc b/src/backup/postgres/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..2395a5d Binary files /dev/null and b/src/backup/postgres/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/backup/s3/__pycache__/__init__.cpython-312.pyc b/src/backup/s3/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..34eef7c Binary files /dev/null and b/src/backup/s3/__pycache__/__init__.cpython-312.pyc differ