forked from duke-libraries/archivesspace-duke-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
duke_update_archival_object_args_multiplefileversions.py
154 lines (119 loc) · 7.74 KB
/
duke_update_archival_object_args_multiplefileversions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import requests
import json
import csv
import os, sys, argparse #sys and argparse to allow for the passing of arguments in the command invocation
#This Script Works!! - Noah
#I added arguments and it still works!! - Farrell
#It will now look for two file versions!! - Farrell
# This script will create a new digital object and link it as an instance to an existing archival object
# This was written under the assumption that you might have a csv (or similar), exported from ASpace or
# compiled from an ASpace exported EAD, with an existing archival object's ref_id. Using only the ref_id,
# this will use the ASpace API to search for the existing archival object, retrieve its URI, store the archival
# object's JSON, create a new digital object using the title from the archival object and an identifier (also from the CSV),
# grab the URI for the newly created digital object, add the link as an instance to the archival object JSON,
# and repost the archival object to ASpace using the update archival object endpoint
# The hypothetical 5 column csv might look something like this:
# [Container number], [Component title], [ASpace ref_id], [an identifier], [uri to the digital object]
# Columns 3-5 are used in this script
# The archival_object_csv will be your starting csv with the ASpace ref_id of the archival object's to be updated,
# the identifier to be used in the newly created digital object (could be a barcode, a random string, etc) and the uri
# to the digital object that will be added as a file_uri in the ArchivesSpace digital object record
#archival_object_csv = os.path.normpath("g:/ead_work/hoskins/hoskinssarah-steady_digguide_part2.txt")
# The updated_archival_object_csv will be an updated csv that will be created at the end of this script, containing all of the same
# information as the starting csv, plus the ArchivesSpace uris for the archival and digital objects
#updated_archival_object_csv = os.path.normpath("g:/ead_work/hoskinshoskinssarah-steady_digguide_part2_with_uris.csv")
# Modify your ArchivesSpace backend url, username, and password as necessary
#aspace_url = 'BACKENDURL' #Backend URL for ASpace -- commented out here b/c trying argparse
#username= 'USERNAME' -- commented out here b/c trying argparse
#password = 'PASSWORD' -- commented out here b/c trying argparse
parser = argparse.ArgumentParser()
parser.add_argument("-a","--aspaceurl", help="The backend URL for ASpace")
parser.add_argument("-u","--user", help="Your aspace username")
parser.add_argument("-p","--password", help="Your aspace password")
parser.add_argument("-i","--input", help="Path to DigGuide created by XSLT")
parser.add_argument("-o","--output", help="Path to and filename for the updated CSV created to verify everything went well")
args = parser.parse_args()
if args.aspaceurl:
global aspace_backend
aspace_backend = args.aspaceurl
if args.user:
global aspace_user
aspace_user = args.user
if args.password:
global aspace_pword
aspace_pword = args.password
if args.input:
global digguide
digguide = os.path.normpath(args.input)
if args.output:
global csv_out
csv_out = os.path.normpath(args.output)
auth = requests.post(aspace_backend+'/users/'+aspace_user+'/login?password='+aspace_pword).json()
session = auth["session"]
headers = {'X-ArchivesSpace-Session':session}
#Modified for TSV input, which is default output of aspace_dig_guide_creator.xsl
with open(digguide,'rb') as tsvin, open(csv_out,'wb') as csvout:
tsvin = csv.reader(tsvin, delimiter='\t')
next(tsvin, None) #ignore header row
csvout = csv.writer(csvout)
for row in tsvin:
#Original code below assumes CSV input format
#with open(archival_object_csv,'rb') as csvfile:
# reader = csv.reader(csvfile)
# next(reader, None)
# for row in reader:
# Use an identifier and a file_uri from the csv to create the digital object
# If you don't have specific identifiers and just want a random string,
# you could import uuid up top and do something like 'identifier = uuid.uuid4()'
identifier = row[3] #column in TSV, first column is column 0
file_uri = row[6] #column in TSV, first column is 0
file_uri2 = row[8] #column in TSV, first column is 0
#Set file version use statement values (image-service, audio-streaming, etc.)
file_version_use_statement = row[7] #column in TSV, first column is 0
file_version_use_statement2 = row[9] #column in TSV, first column is 0
#Set whether the DO should be published or not
publish_yesorno = True
# Grab the archival object's ArchivesSpace ref_id from the csv
ref_id = row[5] #column in TSV, first column is 0
print ref_id
# Search ASpace for the ref_id
search = requests.get(aspace_backend+'/repositories/2/search?page=1&q='+ref_id,headers=headers).json() #change repository number as needed
# Grab the archival object uri from the search results
archival_object_uri = search['results'][0]['uri']
print archival_object_uri
# Submit a get request for the archival object and store the JSON
archival_object_json = requests.get(aspace_backend+archival_object_uri,headers=headers).json()
# Continue only if the search-returned archival object's ref_id matches our starting ref_id, just to be safe
if archival_object_json['ref_id'] == ref_id:
# Add the archival object uri to the row from the csv to write it out at the end
row.append(archival_object_uri)
# Reuse the display string from the archival object as the digital object title
# Note: a more sophisticated way of doing this would be to add the title and dates from the
# archival object separately into the appropriate title and date records in the digital object
# This also does not copy over any notes from the archival object
display_string = archival_object_json['display_string']
# Form the digital object JSON using the display string from the archival object and the identifier and the file_uri from the csv
dig_obj = {'title':display_string,'digital_object_id':identifier,'publish':publish_yesorno,'file_versions':[{'file_uri':file_uri,'use_statement':file_version_use_statement},{'file_uri':file_uri2,'use_statement':file_version_use_statement2}]}
dig_obj_data = json.dumps(dig_obj)
# Post the digital object
dig_obj_post = requests.post(aspace_backend+'/repositories/2/digital_objects',headers=headers,data=dig_obj_data).json()
print dig_obj_post
print 'Digital Object Status:', dig_obj_post['status']
# Grab the digital object uri
dig_obj_uri = dig_obj_post['uri']
print 'Digital Object URI:', dig_obj_uri
# Add the digital object uri to the row from the csv to write it out at the end
row.append(dig_obj_uri)
# Build a new instance to add to the archival object, linking to the digital object
dig_obj_instance = {'instance_type':'digital_object', 'digital_object':{'ref':dig_obj_uri}}
# Append the new instance to the existing archival object record's instances
archival_object_json['instances'].append(dig_obj_instance)
archival_object_data = json.dumps(archival_object_json)
print archival_object_data
# Repost the archival object
archival_object_update = requests.post(aspace_backend+archival_object_uri,headers=headers,data=archival_object_data).json()
print archival_object_update
# Write a new csv with all the info from the initial csv + the ArchivesSpace uris for the archival and digital objects
with open(csv_out,'ab') as csvout:
writer = csv.writer(csvout)
writer.writerow(row)