forked from dandpb/aws-glacier-multipart-upload
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathglacierupload.sh
executable file
·136 lines (104 loc) · 4.13 KB
/
glacierupload.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/bin/bash
# dependencies, jq and parallel:
# sudo apt install jq
# sudo apt install parallel
# sudo apt install awscli
if [ "$#" -le 1 ]; then
echo "USAGE: $0 filename vaultName resultFile glacierDb chunkSize"
exit 1
fi
filename=$(readlink -f $1)
description=$1
vaultName=$2
resultFile=$3
glacierDbFile=$4
chunkSize=$5
DEFAULT_STORAGE_PATH=~/.glacierupload/
mkdir -p $DEFAULT_STORAGE_PATH
if [ -z "$chunkSize" ]; then
chunkSize=1024
fi
if [ -z "$resultFile" ]; then
resultFile=$DEFAULT_STORAGE_PATH/glacier.last.out
fi
if [ -z "$glacierDbFile" ]; then
glacierDbFile=$DEFAULT_STORAGE_PATH/glacier-db
fi
byteSize=$(expr $chunkSize \* 1024 \* 1024)
prefix="__glacier_upload"
cd "$(dirname "$(readlink -f "$0")")"
if [ ! -f "TreeHashExample.class" ]; then
javac TreeHashExample.java
fi
if [ ! -f "$filename" ]; then
echo "No such target file: $filename"
exit
fi
# Part file out
if [[ $OSTYPE == linux* ]]; then
split --bytes=$byteSize --verbose "$filename" $prefix
elif [[ $OSTYPE == darwin* ]]; then
split -b ${chunkSize}m "$filename" $prefix # Mac OSX
fi
# count the number of files that begin with "$prefix"
fileCount=$(ls -1 | grep "^$prefix" | wc -l)
echo "📦 Glacier Upload with $AWS_PROFILE"
echo "Total parts to upload: " $fileCount
# get the list of part files to upload. Edit this if you chose a different prefix in the split command
files=$(ls | grep "^$prefix")
# initiate multipart upload connection to glacier
echo aws glacier initiate-multipart-upload --profile $AWS_PROFILE --account-id - --part-size $byteSize --vault-name $vaultName --archive-description "$description"
init=$(aws glacier initiate-multipart-upload --profile $AWS_PROFILE --account-id - --part-size $byteSize --vault-name $vaultName --archive-description "$description")
echo "---------------------------------------"
# xargs trims off the quotes
# jq pulls out the json element titled uploadId
uploadId=$(echo $init | jq '.uploadId' | xargs)
echo "🚀 uploadId: $uploadId"
if [ -z "$uploadId" ]; then
echo "Cannot get upload id: $init"
exit 1
fi
# create temp file to store commands
touch commands.txt
#get total size in bytes of the archive
archivesize=`wc -c < "$filename"`
# create upload commands to be run in parallel and store in commands.txt
byteStart=0
for f in $files
do
fileSize=`wc -c < $f`
byteEnd=$((byteStart+fileSize-1))
echo aws glacier upload-multipart-part --body $f --range "'"'bytes '"$byteStart"'-'"$byteEnd"'/*'"'" --profile $AWS_PROFILE --account-id - --vault-name "$vaultName" --upload-id $uploadId >> commands.txt
byteStart=$(($byteEnd+1))
done
echo "🏃 Will run commands:"
cat commands.txt
# run upload commands in parallel
# --load 100% option only gives new jobs out if the core is than 100% active
# -a commands.txt runs every line of that file in parallel, in potentially random order
# --notice supresses citation output to the console
# --bar provides a command line progress bar
parallel --load 100% -a commands.txt --no-notice --bar
echo "List Active Multipart Uploads:"
echo "Verify that a connection is open:"
aws glacier list-multipart-uploads --profile $AWS_PROFILE --account-id - --vault-name $vaultName
#compute the tree hash
checksum=`java TreeHashExample "$filename" | cut -d ' ' -f 5`
# end the multipart upload
result=`aws glacier complete-multipart-upload --profile $AWS_PROFILE --account-id - --vault-name $vaultName --upload-id $uploadId --archive-size $archivesize --checksum $checksum`
#store the json response from amazon for record keeping
DATE=$(date +"%Y%m%d_%Hh%Mm%Ss%Z")
echo $DATE $filename $result >> $resultFile
archiveId=$(echo $result | jq '.archiveId' | xargs)
echo $DATE $filename $archiveId >> $glacierDbFile
echo $DATE $filename $archiveId
# list open multipart connections
echo "------------------------------"
echo "List Active Multipart Uploads:"
echo "Verify that the connection is closed:"
aws glacier list-multipart-uploads --profile $AWS_PROFILE --account-id - --vault-name $vaultName
echo "--------------"
echo "Rename file: ${filename}.glacierd"
mv "${filename}" "${filename}.glacierd"
echo "Deleting temporary commands.txt"
rm ${prefix}* commands.txt