Skip to content

Commit 3738f77

Browse files
authored
fix: send checksum on s3 upload (#1926)
add `x-amz-checksum-sha256` header on s3 object put requests. This is nice as we reuse our existing sha256 hash of the car as an integrity check. AWS will calculate the sha256 of the body it receives and send us a 400 BadDigest error if they dont match, to avoid storing CARs that had bits flipped in transit. see: web3-storage/web3.storage#1068 License: (Apache-2.0 AND MIT) Signed-off-by: Oli Evans <oli@tableflip.io>
1 parent a6c6fa4 commit 3738f77

File tree

1 file changed

+39
-12
lines changed

1 file changed

+39
-12
lines changed

packages/api/src/utils/s3-backup-client.js

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,22 @@ export class S3BackupClient {
5151
}
5252

5353
/**
54-
* Gets a base32 encoded sha256 hash of the passed data.
55-
* @param {Uint8Array} data
54+
* Gets a sha256 multihash digest of a blob.
55+
* @param {Blob} blob
5656
*/
57-
async _getDataHash(data) {
58-
const hash = await sha256.digest(new Uint8Array(data))
59-
return uint8ArrayToString(hash.bytes, 'base32')
57+
async _getMultihash(blob) {
58+
const buf = await blob.arrayBuffer()
59+
return sha256.digest(new Uint8Array(buf))
60+
}
61+
62+
/**
63+
* Gets base64pad-encoded string from a multihash
64+
* @param {import('multiformats').digest.MultihashDigest} multihash
65+
*/
66+
_getAwsChecksum(multihash) {
67+
// strip the multihash varint prefix to get the raw sha256 digest for aws upload integrity check
68+
const rawSha256 = multihash.bytes.subarray(2)
69+
return uint8ArrayToString(rawSha256, 'base64pad')
6070
}
6171

6272
/**
@@ -67,19 +77,36 @@ export class S3BackupClient {
6777
* @param {import('../bindings').DagStructure} [structure]
6878
*/
6979
async backupCar(userId, rootCid, car, structure = 'Unknown') {
70-
const buf = await car.arrayBuffer()
71-
const dataHash = await this._getDataHash(new Uint8Array(buf))
72-
const key = `raw/${rootCid}/${this._appName}-${userId}/${dataHash}.car`
73-
const bucket = this._bucketName
80+
const multihash = await this._getMultihash(car)
81+
const hashStr = uint8ArrayToString(multihash.bytes, 'base32')
82+
const key = `raw/${rootCid}/${this._appName}-${userId}/${hashStr}.car`
7483
const cmdParams = {
75-
Bucket: bucket,
84+
Bucket: this._bucketName,
7685
Key: key,
7786
Body: car,
7887
Metadata: { structure },
88+
// ChecksumSHA256 specifies the base64-encoded, 256-bit SHA-256 digest of the object, used as a data integrity check to verify that the data received is the same data that was originally sent.
89+
// see: https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html#AmazonS3-PutObject-request-header-ChecksumSHA256
90+
ChecksumSHA256: this._getAwsChecksum(multihash),
7991
}
8092
/** @type {import('@aws-sdk/client-s3').PutObjectCommand} */
81-
const cmd = new PutObjectCommand(cmdParams)
82-
await this._s3.send(cmd)
93+
94+
try {
95+
await this._s3.send(new PutObjectCommand(cmdParams))
96+
} catch (/** @type {any} */ err) {
97+
if (err.name === 'BadDigest') {
98+
// s3 returns a 400 Bad Request `BadDigest` error if the hash does not match their calculation.
99+
// see: https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html#RESTErrorResponses
100+
// see: https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-s3/index.html#troubleshooting
101+
console.log(
102+
'BadDigest: sha256 of data recieved did not match what we sent. Maybe bits flipped in transit. Retrying once.'
103+
)
104+
await this._s3.send(new PutObjectCommand(cmdParams))
105+
} else {
106+
throw err
107+
}
108+
}
109+
83110
return new URL(key, this._baseUrl.toString())
84111
}
85112
}

0 commit comments

Comments
 (0)