Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,4 @@ venv.bak/

# SQL files
.sql
__pycache__/
46 changes: 46 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,52 @@

Data backup and recovery service for the CALYPR systems πŸ”„

## Deployment

This service is deployed using [Helm charts](https://github.com/ohsu-comp-bio/helm-charts/tree/main/charts/backups). The helm chart provides:

- Automated backup scheduling via CronJob
- Secret management integration with existing PostgreSQL secrets
- Configurable S3 storage backends
- Simple deployment with `helm install`

For manual job execution:
```bash
# Create a backup job
kubectl create job backup-job --from=cronjob/backup-service-cronjob --namespace backups

# Create a restore job (set OPERATION=restore environment variable)
kubectl create job restore-job --from=cronjob/backup-service-cronjob --namespace backups
# Note: You'll need to patch the job to set OPERATION=restore environment variable
```

## Configuration

The service can be configured through environment variables:

- **OPERATION**: `backup` (default) or `restore` - determines the operation mode
- **RESTORE_DIR**: Directory path for restore operations (defaults to timestamped directory)
- **PGPASSWORD**: Can be sourced from existing `local-postgresql` secret (base64 encoded)
- **GRIP_GRAPH**: Graph name (should be configurable via helm global config)
- **GRIP_LIMIT**: Query limit (should be removed for production use)

The helm chart automatically handles secret management and configuration from global helm values.

## Best Practices

### Namespace Configuration
While the helm chart defaults to a separate `backups` namespace, consider deploying in the same namespace as your databases to simplify network access and secret sharing, as the backup service needs direct access to database resources.

### Secret Management
- **No separate secrets needed**: PGPASSWORD can be extracted from existing `local-postgresql` secret (base64 encoded)
- **Helm integration**: All configuration should be managed through helm values files
- **S3 credentials**: Configure S3 bucket and credentials through helm secrets file

### Performance and Storage
- **Remove query limits**: Production deployments should remove GRIP_LIMIT for complete backups
- **Backup retention**: Implement a retention policy (e.g., keep daily backups for 30 days, weekly for 3+ months)
- **Global configuration**: Use helm global config for shared values like graph names instead of hardcoding

# 2. Quick Start ⚑

```sh
Expand Down
87 changes: 64 additions & 23 deletions entrypoint.sh
100644 β†’ 100755
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,69 @@ set -e

TIMESTAMP=$(date +"%Y-%m-%dT%H:%M:%S")

# Default operation is backup, but can be overridden with OPERATION env var
OPERATION=${OPERATION:-backup}

export DIR="${DIR}/${TIMESTAMP}"

# Postgres Dump
bak --debug pg dump \
--dir "${DIR}" \
--host "${PGHOST}" \
--user "${PGUSER}" \
--password "${PGPASSWORD}"

# GRIP Backup
bak --debug grip backup \
--dir "${DIR}" \
--host "${GRIP_HOST}" \
--graph "${GRIP_GRAPH}" \
--limit "${GRIP_LIMIT}" \
--vertex \
--edge

# S3 Upload
bak --debug s3 upload \
--dir "${DIR}" \
--endpoint "${ENDPOINT}" \
--bucket "${BUCKET}" \
--key "${KEY}" \
--secret "${SECRET}"
if [ "$OPERATION" = "backup" ]; then
echo "Starting backup operation..."

# Postgres Dump
bak --debug pg dump \
--dir "${DIR}" \
--host "${PGHOST}" \
--user "${PGUSER}" \
--password "${PGPASSWORD}"

# GRIP Backup
bak --debug grip backup \
--dir "${DIR}" \
--host "${GRIP_HOST}" \
--graph "${GRIP_GRAPH}" \
--limit "${GRIP_LIMIT}" \
--vertex \
--edge

# S3 Upload
bak --debug s3 upload \
--dir "${DIR}" \
--endpoint "${ENDPOINT}" \
--bucket "${BUCKET}" \
--key "${KEY}" \
--secret "${SECRET}"

echo "Backup operation completed successfully"

elif [ "$OPERATION" = "restore" ]; then
echo "Starting restore operation..."

# S3 Download - restore from specified backup directory or latest
RESTORE_DIR=${RESTORE_DIR:-"${DIR}"}

bak --debug s3 download \
--dir "${RESTORE_DIR}" \
--endpoint "${ENDPOINT}" \
--bucket "${BUCKET}" \
--key "${KEY}" \
--secret "${SECRET}"

# Postgres Restore
bak --debug pg restore \
--dir "${RESTORE_DIR}" \
--host "${PGHOST}" \
--user "${PGUSER}" \
--password "${PGPASSWORD}"

# GRIP Restore
bak --debug grip restore \
--dir "${RESTORE_DIR}" \
--host "${GRIP_HOST}" \
--graph "${GRIP_GRAPH}"

echo "Restore operation completed successfully"

else
echo "Error: Unknown operation '${OPERATION}'. Valid operations are 'backup' or 'restore'"
exit 1
fi
77 changes: 42 additions & 35 deletions src/backup/grip/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def _getEdges(grip: GripConfig, graph: str, limit: int) -> list[str]:

G = c.graph(graph)

for i in G.query().E().limit(limit):
for i in G.E().limit(limit):
edges.append(i)

return edges
Expand All @@ -62,7 +62,7 @@ def _getVertices(grip: GripConfig, graph: str, limit: int) -> list[str]:

G = c.graph(graph)

for i in G.query().V().limit(limit):
for i in G.V().limit(limit):
vertices.append(i)

return vertices
Expand Down Expand Up @@ -91,12 +91,12 @@ def _dump(grip: GripConfig, graph: str, limit: int, vertex: bool, edge: bool, ou
# write vertex and edge objects from grip DB to file
if vertex:
with open(out / f"{graph}.vertices", "wb") as f:
for i in G.query().V().limit(limit):
for i in G.V().limit(limit):
f.write(orjson.dumps(i, option=orjson.OPT_APPEND_NEWLINE))

if edge:
with open(out / f"{graph}.edges", "wb") as f:
for i in G.query().E().limit(limit):
for i in G.E().limit(limit):
f.write(orjson.dumps(i, option=orjson.OPT_APPEND_NEWLINE))

# TODO: At this point you will need to reconnect to the new grip instance to load the data that was dumped
Expand All @@ -107,34 +107,41 @@ def _restore(grip: GripConfig, graph: str, dir: Path):
conn = _connect(grip)
G = conn.graph(graph)

bulkV = G.bulkAdd()
with open("grip.vertices", "rb") as f:
count = 0
for i in f:
data = orjson.loads(i)
_id = data["_id"]
_label = data["_label"]
del data["_id"], data["_label"]
bulkV.addVertex(_id, _label, data)
count += 1
if count % 10000 == 0:
print("loaded %d vertices" % count)
err = bulkV.execute()
print("Vertices load res: ", str(err))

bulkE = G.bulkAdd()
with open("grip.edges", "rb") as f:
count = 0
for i in f:
data = orjson.loads(i)
_id = data["_id"]
_label = data["_label"]
_to = data["_to"]
_from = data["_from"]
del data["_id"], data["_label"], data["_to"], data["_from"]
bulkE.addEdge(_to, _from, _label, data=data, gid=_id)
count += 1
if count % 10000 == 0:
print("loaded %d edges" % count)
err = bulkE.execute()
print("Edges load res: ", str(err))
vertex_file = dir / f"{graph}.vertices"
edge_file = dir / f"{graph}.edges"

# Load vertices if file exists
if vertex_file.exists():
bulkV = G.bulkAdd()
with open(vertex_file, "rb") as f:
count = 0
for i in f:
data = orjson.loads(i)
_id = data["_id"]
_label = data["_label"]
del data["_id"], data["_label"]
bulkV.addVertex(_id, _label, data)
count += 1
if count % 10000 == 0:
print("loaded %d vertices" % count)
err = bulkV.execute()
print("Vertices load res: ", str(err))

# Load edges if file exists
if edge_file.exists():
bulkE = G.bulkAdd()
with open(edge_file, "rb") as f:
count = 0
for i in f:
data = orjson.loads(i)
_id = data["_id"]
_label = data["_label"]
_to = data["_to"]
_from = data["_from"]
del data["_id"], data["_label"], data["_to"], data["_from"]
bulkE.addEdge(_to, _from, _label, data=data, gid=_id)
count += 1
if count % 10000 == 0:
print("loaded %d edges" % count)
err = bulkE.execute()
print("Edges load res: ", str(err))
Binary file not shown.
Binary file not shown.