Skip to content

Commit dfe0269

Browse files
authored
blobs: prevent NULL contentType in db (#1355)
Set `blob."contentType"` values to `application/octet-stream` if no mime type is supplied on upload. Related: #1351
1 parent 37b85b5 commit dfe0269

File tree

8 files changed

+158
-18
lines changed

8 files changed

+158
-18
lines changed

docs/api.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3892,7 +3892,7 @@ paths:
38923892
- Individual Form
38933893
summary: Downloading a Form Attachment
38943894
description: |-
3895-
To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time) will be given.
3895+
To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time, or `application/octet-stream` if none was supplied) will be given.
38963896

38973897
This endpoint supports `ETag`, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, it returns a value in `ETag` header, you can pass this value in the header `If-None-Match` of subsequent requests. If the file has not been changed since the previous request, you will receive `304 Not Modified` response otherwise you'll get the latest file.
38983898
operationId: Downloading a Form Attachment
@@ -4422,7 +4422,7 @@ paths:
44224422
summary: Downloading a Draft Form Attachment
44234423
description: To download a single file, use this endpoint. The appropriate `Content-Disposition`
44244424
(attachment with a filename or Dataset name) and `Content-Type` (based on
4425-
the type supplied at upload time or `text/csv` in the case of a linked Dataset)
4425+
the type supplied at upload time, or `text/csv` in the case of a linked Dataset, or `application/octet-stream` otherwise)
44264426
will be given.
44274427

44284428
This endpoint supports `ETag`, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, it returns a value in `ETag` header, you can pass this value in the header `If-None-Match` of subsequent requests. If the file has not been changed since the previous request, you will receive `304 Not Modified` response otherwise you'll get the latest file.
@@ -5123,7 +5123,7 @@ paths:
51235123
- Published Form Versions
51245124
summary: Downloading a Form Version Attachment
51255125
description: |-
5126-
To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time) will be given.
5126+
To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time, or `application/octet-stream` if none was supplied) will be given.
51275127

51285128
This endpoint supports `ETag` header, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, the endpoint returns a value in `ETag` header. If you pass that value in the `If-None-Match` header of a subsequent request, then if the file has not been changed since the previous request, you will receive `304 Not Modified` response; otherwise you'll get the latest file.
51295129
operationId: Downloading a Form Version Attachment
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// Copyright 2025 ODK Central Developers
2+
// See the NOTICE file at the top-level directory of this distribution and at
3+
// https://github.com/getodk/central-backend/blob/master/NOTICE.
4+
// This file is part of ODK Central. It is subject to the license terms in
5+
// the LICENSE file found in the top-level directory of this distribution and at
6+
// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central,
7+
// including this file, may be copied, modified, propagated, or distributed
8+
// except according to the terms contained in the LICENSE file.
9+
10+
const up = (db) => db.raw(`
11+
UPDATE blobs SET "contentType"='application/octet-stream' WHERE "contentType" IS NULL;
12+
ALTER TABLE blobs
13+
ALTER COLUMN "contentType" SET DEFAULT 'application/octet-stream',
14+
ALTER COLUMN "contentType" SET NOT NULL
15+
`);
16+
17+
const down = (db) => db.raw(`
18+
ALTER TABLE blobs
19+
ALTER COLUMN "contentType" DROP NOT NULL,
20+
ALTER COLUMN "contentType" DROP DEFAULT
21+
`);
22+
23+
module.exports = { up, down };

lib/model/query/blobs.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ const { construct } = require('../../util/util');
2222
const ensure = (blob) => ({ oneFirst }) => oneFirst(sql`
2323
with ensured as
2424
(insert into blobs (sha, md5, content, "contentType") values
25-
(${blob.sha}, ${blob.md5}, ${sql.binary(blob.content)}, ${blob.contentType || null})
25+
(${blob.sha}, ${blob.md5}, ${sql.binary(blob.content)}, ${blob.contentType || sql`DEFAULT`})
2626
on conflict (sha) do update set sha = ${blob.sha}
2727
returning id)
2828
select id from ensured`);
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
const assert = require('node:assert/strict');
2+
const { hash, randomBytes } = require('node:crypto');
3+
4+
const { // eslint-disable-line object-curly-newline
5+
assertTableContents,
6+
describeMigration,
7+
rowsExistFor,
8+
} = require('./utils'); // eslint-disable-line object-curly-newline
9+
10+
describeMigration('20250113-01-disable-nullable-blob-content-types', ({ runMigrationBeingTested }) => {
11+
const aBlobWith = props => {
12+
const randomContent = randomBytes(100);
13+
const md5 = hash('md5', randomContent); // eslint-disable-line no-multi-spaces
14+
const sha = hash('sha1', randomContent);
15+
return { md5, sha, ...props };
16+
};
17+
const aBlob = () => aBlobWith({});
18+
19+
const blob1 = aBlobWith({ contentType: null });
20+
const blob2 = aBlobWith({ contentType: 'text/plain' });
21+
22+
before(async () => {
23+
await rowsExistFor('blobs', blob1, blob2);
24+
25+
await runMigrationBeingTested();
26+
});
27+
28+
it('should change existing NULL contentType values to application/octet-stream, and preserve non-NULL values', async () => {
29+
await assertTableContents('blobs',
30+
{ ...blob1, contentType: 'application/octet-stream' },
31+
{ ...blob2, contentType: 'text/plain' },
32+
);
33+
});
34+
35+
it(`should create new blobs with contentType 'application/octet-stream' (contentType not supplied)`, async () => {
36+
const { md5, sha } = aBlob();
37+
38+
const created = await db.oneFirst(sql`
39+
INSERT INTO blobs (md5, sha)
40+
VALUES(${md5}, ${sha})
41+
RETURNING "contentType"
42+
`);
43+
44+
assert.equal(created, 'application/octet-stream');
45+
});
46+
47+
it(`should create new blobs with contentType 'application/octet-stream' (supplied DEFAULT contentType)`, async () => {
48+
const { md5, sha } = aBlob();
49+
50+
const created = await db.oneFirst(sql`
51+
INSERT INTO blobs (md5, sha, "contentType")
52+
VALUES(${md5}, ${sha}, DEFAULT)
53+
RETURNING "contentType"
54+
`);
55+
56+
assert.equal(created, 'application/octet-stream');
57+
});
58+
});

test/db-migrations/utils.js

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,72 @@ function assertIncludes(actual, expected) {
115115
}
116116
}
117117

118+
async function rowsExistFor(tableName, ...rows) {
119+
if (!rows.length) throw new Error(`Attempted to insert 0 rows into table ${tableName}`);
120+
121+
assertAllHaveSameProps(rows); // eslint-disable-line no-use-before-define
122+
const colNames = Object.keys(rows[0]);
123+
if (!colNames.length) throw new Error(`Attempted to insert data with 0 defined columns`);
124+
125+
const table = sql.identifier([tableName]);
126+
const cols = sql.join(colNames.map(k => sql.identifier([k])), sql`,`);
127+
128+
return db.query(
129+
sql`
130+
INSERT INTO ${table} (${cols})
131+
SELECT ${cols}
132+
FROM JSON_POPULATE_RECORDSET(NULL::${table}, ${JSON.stringify(rows)})
133+
`,
134+
);
135+
}
136+
137+
async function assertTableContents(tableName, ...expected) {
138+
const { rows: actual } = await db.query(sql`SELECT * FROM ${sql.identifier([tableName])}`);
139+
140+
assert.equal(
141+
actual.length,
142+
expected.length,
143+
`Unexpected number of rows in table '${tableName}'. ` +
144+
`Expected ${expected.length} but got ${actual.length}. ` +
145+
`DB returned: ${JSON.stringify(actual, null, 2)}`,
146+
);
147+
148+
const remainingRows = [ ...actual ];
149+
for (let i=0; i<expected.length; ++i) { // eslint-disable-line no-plusplus
150+
const x = expected[i];
151+
let found = false;
152+
for (let j=0; j<remainingRows.length; ++j) { // eslint-disable-line no-plusplus
153+
const rr = remainingRows[j];
154+
try {
155+
assertIncludes(rr, x);
156+
remainingRows.splice(j, 1);
157+
found = true;
158+
break;
159+
} catch (err) { /* keep searching */ }
160+
}
161+
if (!found) {
162+
const filteredRemainingRows = remainingRows.map(r => _.pick(r, Object.keys(x)));
163+
assert.fail(`Expected row ${i} not found in table '${tableName}':\n json=${JSON.stringify({ remainingRows, filteredRemainingRows, expectedRow: x })}`);
164+
}
165+
}
166+
}
167+
168+
function assertAllHaveSameProps(list) {
169+
if (list.length < 2) return;
170+
const [ first, ...rest ] = list.map(Object.keys);
171+
172+
rest.forEach((v, i) => {
173+
assert.deepEqual(v, first, `Row #${i+1} has different props to row #0. All supplied rows must have the same props.`);
174+
});
175+
}
176+
118177
module.exports = {
119178
assertIndexExists,
179+
assertTableContents,
120180
assertTableDoesNotExist,
121181
assertTableSchema,
182+
122183
describeMigration,
184+
185+
rowsExistFor,
123186
};

test/e2e/s3/test.js

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -379,9 +379,7 @@ describe('s3 support', () => {
379379

380380
const filepath = `${attDir}/${name}`;
381381

382-
// "null" is a questionable content-type, but matches current central behaviour
383-
// See: https://github.com/getodk/central-backend/pull/1352
384-
const expectedContentType = mimetypeFor(name) ?? 'null';
382+
const expectedContentType = mimetypeFor(name) ?? 'application/octet-stream';
385383

386384
const actualContentType = res.headers.get('content-type');
387385
should.equal(actualContentType, expectedContentType);

test/integration/api/submissions.js

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4377,8 +4377,7 @@ one,h,/data/h,2000-01-01T00:06,2000-01-01T00:07,-5,-6,,ee,ff
43774377
body.toString().should.equal('testvideo');
43784378
})))))));
43794379

4380-
// Ref https://github.com/getodk/central-backend/issues/1351
4381-
it('should attach a given file with empty Content-Type', testService((service) =>
4380+
it('should attach a given file with empty Content-Type and serve it with default mime type', testService((service) =>
43824381
service.login('alice', (asAlice) =>
43834382
asAlice.post('/v1/projects/1/forms?publish=true')
43844383
.set('Content-Type', 'application/xml')
@@ -4394,13 +4393,12 @@ one,h,/data/h,2000-01-01T00:06,2000-01-01T00:07,-5,-6,,ee,ff
43944393
.expect(200)
43954394
.then(() => asAlice.get('/v1/projects/1/forms/binaryType/submissions/both/attachments/my_file1.mp4')
43964395
.expect(200)
4397-
.then(({ headers, text }) => {
4398-
headers['content-type'].should.equal('null');
4399-
text.toString().should.equal('testvideo'); // use 'text' instead of 'body' to avoid supertest response parsing
4396+
.then(({ headers, body }) => {
4397+
headers['content-type'].should.equal('application/octet-stream');
4398+
body.toString().should.equal('testvideo');
44004399
})))))));
44014400

4402-
// Ref https://github.com/getodk/central-backend/issues/1351
4403-
it('should attach a given file with missing Content-Type', testService((service) =>
4401+
it('should attach a given file with missing Content-Type and serve it with default mime type', testService((service) =>
44044402
service.login('alice', (asAlice) =>
44054403
asAlice.post('/v1/projects/1/forms?publish=true')
44064404
.set('Content-Type', 'application/xml')
@@ -4416,9 +4414,9 @@ one,h,/data/h,2000-01-01T00:06,2000-01-01T00:07,-5,-6,,ee,ff
44164414
.expect(200)
44174415
.then(() => asAlice.get('/v1/projects/1/forms/binaryType/submissions/both/attachments/my_file1.mp4')
44184416
.expect(200)
4419-
.then(({ headers, text }) => {
4420-
headers['content-type'].should.equal('null');
4421-
text.toString().should.equal('testvideo'); // use 'text' instead of 'body' to avoid supertest response parsing
4417+
.then(({ headers, body }) => {
4418+
headers['content-type'].should.equal('application/octet-stream');
4419+
body.toString().should.equal('testvideo');
44224420
})))))));
44234421

44244422
it('should log an audit entry about initial attachment', testService((service, { Audits, Forms, Submissions, SubmissionAttachments }) =>

test/integration/task/s3.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ const aBlobExistsWith = async (container, { status }) => {
1111
const blob = await Blob.fromBuffer(crypto.randomBytes(100));
1212
await container.run(sql`
1313
INSERT INTO BLOBS (sha, md5, content, "contentType", s3_status)
14-
VALUES (${blob.sha}, ${blob.md5}, ${sql.binary(blob.content)}, ${blob.contentType || null}, ${status})
14+
VALUES (${blob.sha}, ${blob.md5}, ${sql.binary(blob.content)}, ${blob.contentType || sql`DEFAULT`}, ${status})
1515
`);
1616
};
1717

0 commit comments

Comments
 (0)