From 139c419268d97beaa112f1c751733393772f82ac Mon Sep 17 00:00:00 2001 From: makeworld Date: Mon, 22 Jul 2024 17:33:49 +0200 Subject: [PATCH] browsertrix: store crawl workflow metadata --- docs/attributes.md | 1 + webhook/browsertrix.go | 3 +++ 2 files changed, 4 insertions(+) diff --git a/docs/attributes.md b/docs/attributes.md index d86328c..220cd40 100644 --- a/docs/attributes.md +++ b/docs/attributes.md @@ -35,6 +35,7 @@ The majority of these attributes are set automatically upon ingestion, but all c - `project_id`: the name for the project this asset was ingested under - `project_path`: the path for the project within the sync folder - `asset_origin_sig_key_name`: may exist if the ingestion process involved verifiying a known, named public key +- Browsertrix crawl info: `crawl_workflow_name`, `crawl_workflow_description`, `crawl_workflow_tags` Encrypted files have the `encryption_type` attribute, currently always set to `secretstream`. See [encryption.md](./encryption.md) for more info. diff --git a/webhook/browsertrix.go b/webhook/browsertrix.go index 5c9ecb4..e8e6894 100644 --- a/webhook/browsertrix.go +++ b/webhook/browsertrix.go @@ -260,6 +260,9 @@ func handleBrowsertrixEvent(w http.ResponseWriter, r *http.Request) { metadataMap["asset_origin_type"] = []string{"wacz"} metadataMap["project_id"] = projectId metadataMap["file_name"] = e.Resources[0].Name + metadataMap["crawl_workflow_name"] = crawlInfo.Name + metadataMap["crawl_workflow_description"] = crawlInfo.Description + metadataMap["crawl_workflow_tags"] = crawlInfo.Tags err = util.MoveFile(tempFilePath, filepath.Join(outputDirectory, cid)) if err != nil {