Skip to content

Commit

Permalink
add importer
Browse files Browse the repository at this point in the history
  • Loading branch information
makew0rld committed Aug 27, 2024
1 parent 77ea189 commit 4289765
Show file tree
Hide file tree
Showing 4 changed files with 302 additions and 1 deletion.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
importers
importers/*
!importers/*.go
NOTES.md
/integrity-v2
/starling
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ require (
github.com/ipfs/go-cid v0.4.1
github.com/ipfs/go-datastore v0.6.0
github.com/jackc/pgx/v5 v5.5.5
github.com/joho/godotenv v1.5.1
github.com/lestrrat-go/jwx/v2 v2.0.21
github.com/multiformats/go-multicodec v0.9.0
github.com/openziti/secretstream v0.1.20
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,8 @@ github.com/jbenet/goprocess v0.1.4/go.mod h1:5yspPrukOVuOLORacaBi858NqyClJPQxYZl
github.com/jellevandenhooff/dkim v0.0.0-20150330215556-f50fe3d243e1/go.mod h1:E0B/fFc00Y+Rasa88328GlI/XbtyysCtTHZS8h7IrBU=
github.com/jessevdk/go-flags v0.0.0-20141203071132-1679536dcc89/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/jrick/logrotate v1.0.0/go.mod h1:LNinyqDIJnpAur+b8yyulnQw/wDuN1+BYKlTRt3OuAQ=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
Expand Down
297 changes: 297 additions & 0 deletions importers/chris.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,297 @@
package main

import (
"encoding/csv"
"fmt"
"io"
"os"
"slices"
"strings"

"github.com/starlinglab/integrity-v2/aa"

_ "github.com/joho/godotenv/autoload"
)

var (
metaCsv = os.Getenv("METADATA_CSV")
cidsCsv = os.Getenv("CIDS_CSV")
assetOriginId = os.Getenv("ASSET_ORIGIN_ID")

knownHeader = []string{"asset_description", "Ingest?", "asset_origin_id", "asset_collection", "asset_event", "asset_subcollection", "asset_act", "asset_subject", "asset_sequence", "SIGNER:asset_*", "relationship:type", "relationship:asset", "sequence_relationship", "SIGNER:relationships", "produced_by:type", "produced_by:name", "produced_by:url", "SIGNER:produced_by", "original_url", "SIGNER:original_url", "asset_medium", "SIGNER:asset_medium", "capture_location", "SIGNER:capture_location", "caption", "SIGNER:caption", "capture_date", "SIGNER:capture_date", "camera", "SIGNER:camera", "lens", "SIGNER:lens", "camera_settings", "SIGNER:camera_settings"}

chrisAA = &aa.AuthAttrInstance{
Url: "https://chris.aa.prod.starlinglab.org",
Jwt: os.Getenv("JWT"),
Mock: false,
}
kiraAA = &aa.AuthAttrInstance{
Url: "https://kira.aa.prod.starlinglab.org",
Jwt: os.Getenv("JWT"),
Mock: false,
}
)

// https://schema.org/author
type author struct {
Type string `json:"@type"`
Name string `json:"name"`
URL string `json:"url"`
}

func main() {
// Read filename -> CID mapping first

f, err := os.Open(cidsCsv)
if err != nil {
panic(err)
}
defer f.Close()

cidMap := make(map[string]string)
r := csv.NewReader(f)
for {
record, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
panic(err)
}
cidMap[record[0]] = record[1]
}

// Now read metadata

headerCols := make(map[string]int, len(knownHeader))
for i, s := range knownHeader {
headerCols[s] = i
}

getCell := func(columnName string, row []string) string {
i, ok := headerCols[columnName]
if !ok {
panic("unknown column " + columnName)
}
return row[i]
}

f, err = os.Open(metaCsv)
if err != nil {
panic(err)
}
defer f.Close()

r = csv.NewReader(f)

header, err := r.Read()
if err != nil {
panic(err)
}

if !slices.Equal(header, knownHeader) {
fmt.Printf("%#v\n", header)
panic("header different than expected")
}

for {
record, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
panic(err)
}
if getCell("Ingest?", record) != "TRUE" {
continue
}

aoi := getCell("asset_origin_id", record)
if aoi != assetOriginId {
continue
}

fmt.Println(aoi)

cid := cidMap[aoi]
if cid == "" {
panic("no CID found in CIDs CSV")
}

// Confirm file has already been ingested
if !chrisAA.Mock {
_, err := chrisAA.GetAttestation(cid, "file_name", aa.GetAttOpts{})
if err != nil {
panic(err)
}
}

// Collect key-value pairs for each signer

kiraKVs := make([]aa.PostKV, 0)
chrisKVs := make([]aa.PostKV, 0)

// Start with asset_* fields, all one signer

var kvs *[]aa.PostKV
switch getCell("SIGNER:asset_*", record) {
case "kira":
kvs = &kiraKVs
case "chris":
kvs = &chrisKVs
default:
panic("unknown signer")
}

// Unindexed columns
for _, s := range []string{
"asset_description",
} {
v := getCell(s, record)
if strings.TrimSpace(v) == "" {
continue
}
*kvs = append(*kvs, aa.PostKV{Key: s, Value: v})
}

// Indexed columns
for _, s := range []string{
"asset_collection", "asset_subcollection", "asset_subject",
"asset_sequence", "asset_act", "asset_event", "asset_origin_id",
} {
v := getCell(s, record)
if strings.TrimSpace(v) == "" {
continue
}
*kvs = append(*kvs, aa.PostKV{Key: s, Value: v, Type: "str"})
}

// Sequence relationship (indexed)

switch getCell("SIGNER:relationships", record) {
case "kira":
kvs = &kiraKVs
case "chris":
kvs = &chrisKVs
default:
panic("unknown signer")
}

v := getCell("sequence_relationship", record)
if strings.Contains(v, ",") {
panic(v)
}
if strings.TrimSpace(v) != "" {
*kvs = append(*kvs, aa.PostKV{
Key: "sequence_relationship",
Value: v,
Type: "str",
})
}

// produced_by author

switch getCell("SIGNER:produced_by", record) {
case "kira":
kvs = &kiraKVs
case "chris":
kvs = &chrisKVs
default:
panic("unknown signer")
}
*kvs = append(*kvs, aa.PostKV{
Key: "produced_by",
Value: author{
Type: getCell("produced_by:type", record),
Name: getCell("produced_by:name", record),
URL: getCell("produced_by:url", record),
},
})

// Simple text fields, each with their own signer
for _, s := range []string{"original_url", "asset_medium", "capture_location",
"caption", "camera", "lens", "camera_settings"} {
v := getCell(s, record)
if strings.TrimSpace(v) == "" {
continue
}
switch getCell("SIGNER:"+s, record) {
case "kira":
kvs = &kiraKVs
case "chris":
kvs = &chrisKVs
default:
fmt.Println(s)
panic("unknown signer")
}
*kvs = append(*kvs, aa.PostKV{Key: s, Value: v})
}

// Dates

switch getCell("SIGNER:capture_date", record) {
case "kira":
kvs = &kiraKVs
case "chris":
kvs = &chrisKVs
default:
panic("unknown signer")
}

// Parse MM/DD/YYYY (Google Sheets) and store as YYYY-MM-DD (RFC 3339)
var m, d, y int
_, err = fmt.Sscanf(getCell("capture_date", record), "%d/%d/%d", &m, &d, &y)
if err != nil {
panic(err)
}
*kvs = append(*kvs, aa.PostKV{
Key: "capture_date",
Value: fmt.Sprintf("%d-%02d-%02d", y, m, d),
})

// Send all the atts
err = kiraAA.SetAttestations(cid, true, kiraKVs)
if err != nil {
panic(err)
}
err = chrisAA.SetAttestations(cid, true, chrisKVs)
if err != nil {
panic(err)
}

// Relationships

var aaInstance *aa.AuthAttrInstance
switch getCell("SIGNER:relationships", record) {
case "kira":
aaInstance = kiraAA
case "chris":
aaInstance = chrisAA
default:
panic("unknown signer")
}

relAssets := getCell("relationship:asset", record)
if strings.TrimSpace(relAssets) != "" {
for _, asset := range strings.Split(relAssets, ",") {
relCid := cidMap[strings.TrimSpace(asset)]
if relCid == "" {
panic("relationship: no CID found in CIDs CSV")
} else {
if strings.TrimSpace(getCell("relationship:type", record)) == "" {
panic("empty relationship type")
}
err = aaInstance.AddRelationship(
cid,
"parents",
getCell("relationship:type", record),
relCid,
)
if err != nil {
panic(fmt.Errorf("adding relationship: %w", err))
}
}
}
}
}
}

0 comments on commit 4289765

Please sign in to comment.