Skip to content

Only show world_readable or public rooms in the archive. Only index world_readable #66

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,18 @@ async function renderHydrogenVmRenderScriptToPageHtml(
const serializableSpans = getSerializableSpans();
const serializedSpans = JSON.stringify(serializableSpans);

// We shouldn't let some pages be indexed by search engines
let maybeNoIndexHtml = '';
if (pageOptions.noIndex) {
maybeNoIndexHtml = `<meta name="robots" content="noindex, nofollow" />`;
}

const pageHtml = `
<!doctype html>
<html lang="en">
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
${maybeNoIndexHtml}
${sanitizeHtml(`<title>${pageOptions.title}</title>`)}
${pageOptions.styles
.map((styleUrl) => `<link href="${styleUrl}" rel="stylesheet">`)
Expand Down
17 changes: 9 additions & 8 deletions server/lib/matrix-utils/ensure-room-joined.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const urlJoin = require('url-join');
const { fetchEndpointAsJson } = require('../fetch-endpoint');

const config = require('../config');
const StatusError = require('../status-error');
const matrixServerUrl = config.get('matrixServerUrl');
assert(matrixServerUrl);

Expand All @@ -15,18 +16,18 @@ async function ensureRoomJoined(accessToken, roomId, viaServers = []) {
qs.append('server_name', viaServer);
});

// TODO: Only join world_readable rooms. Perhaps we want to serve public rooms
// where we have been invited. GET
// /_matrix/client/v3/directory/list/room/{roomId} (Gets the visibility of a
// given room on the server’s public room directory.)
const joinEndpoint = urlJoin(
matrixServerUrl,
`_matrix/client/r0/join/${roomId}?${qs.toString()}`
);
await fetchEndpointAsJson(joinEndpoint, {
method: 'POST',
accessToken,
});
try {
await fetchEndpointAsJson(joinEndpoint, {
method: 'POST',
accessToken,
});
} catch (err) {
throw new StatusError(403, `Archiver is unable to join room: ${err.message}`);
}
}

module.exports = ensureRoomJoined;
33 changes: 32 additions & 1 deletion server/lib/matrix-utils/fetch-room-data.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,33 @@ async function fetchRoomData(accessToken, roomId) {
matrixServerUrl,
`_matrix/client/r0/rooms/${roomId}/state/m.room.avatar`
);
const stateHistoryVisibilityEndpoint = urlJoin(
matrixServerUrl,
`_matrix/client/r0/rooms/${roomId}/state/m.room.history_visibility`
);
const stateJoinRulesEndpoint = urlJoin(
matrixServerUrl,
`_matrix/client/r0/rooms/${roomId}/state/m.room.join_rules`
);

const [stateNameResDataOutcome, stateAvatarResDataOutcome] = await Promise.allSettled([
const [
stateNameResDataOutcome,
stateAvatarResDataOutcome,
stateHistoryVisibilityResDataOutcome,
stateJoinRulesResDataOutcome,
] = await Promise.allSettled([
fetchEndpointAsJson(stateNameEndpoint, {
accessToken,
}),
fetchEndpointAsJson(stateAvatarEndpoint, {
accessToken,
}),
fetchEndpointAsJson(stateHistoryVisibilityEndpoint, {
accessToken,
}),
fetchEndpointAsJson(stateJoinRulesEndpoint, {
accessToken,
}),
]);

let name;
Expand All @@ -42,10 +61,22 @@ async function fetchRoomData(accessToken, roomId) {
avatarUrl = stateAvatarResDataOutcome.value.url;
}

let historyVisibility;
if (stateHistoryVisibilityResDataOutcome.reason === undefined) {
historyVisibility = stateHistoryVisibilityResDataOutcome.value.history_visibility;
}

let joinRule;
if (stateJoinRulesResDataOutcome.reason === undefined) {
joinRule = stateJoinRulesResDataOutcome.value.join_rule;
}

return {
id: roomId,
name,
avatarUrl,
historyVisibility,
joinRule,
};
}

Expand Down
2 changes: 1 addition & 1 deletion server/lib/status-error.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ function StatusError(status, inputMessage) {
message = http.STATUS_CODES[status] || http.STATUS_CODES['500'];
}

this.message = message;
this.message = `${status} - ${message}`;
this.status = status;
this.name = 'StatusError';
Error.captureStackTrace(this, StatusError);
Expand Down
14 changes: 14 additions & 0 deletions server/routes/room-routes.js
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,18 @@ router.get(
),
]);

// Only `world_readable` or `shared` rooms that are `public` are viewable in the archive
const allowedToViewRoom =
roomData?.historyVisibility === 'world_readable' ||
(roomData?.historyVisibility === 'shared' && roomData?.joinRule === 'public');

if (!allowedToViewRoom) {
throw new StatusError(
403,
`Only \`world_readable\` or \`shared\` rooms that are \`public\` can be viewed in the archive. ${roomData.id} has m.room.history_visiblity=${roomData?.historyVisibility} m.room.join_rules=${roomData?.joinRule}`
);
}

if (events.length >= archiveMessageLimit) {
throw new Error('TODO: Redirect user to smaller hour range');
}
Expand All @@ -200,6 +212,8 @@ router.get(
title: `${roomData.name} - Matrix Public Archive`,
styles: [hydrogenStylesUrl, stylesUrl],
scripts: [jsBundleUrl],
// We only allow search engines to index `world_readable` rooms
noIndex: roomData?.historyVisibility !== `world_readable`,
}
);

Expand Down
3 changes: 2 additions & 1 deletion test/client-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ async function getTestClientForHs(testMatrixServerUrl) {
}

// Create a public room to test in
async function createTestRoom(client) {
async function createTestRoom(client, overrideCreateOptions) {
let qs = new URLSearchParams();
if (client.applicationServiceUserIdOverride) {
qs.append('user_id', client.applicationServiceUserIdOverride);
Expand All @@ -95,6 +95,7 @@ async function createTestRoom(client) {
},
},
],
...overrideCreateOptions,
},
accessToken: client.accessToken,
}
Expand Down
56 changes: 56 additions & 0 deletions test/e2e-tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -506,5 +506,61 @@ describe('matrix-public-archive', () => {
it(
`will render a room with a sparse amount of messages (a few per day) with no contamination between days`
);

describe('access controls', () => {
it('not allowed to view private room even when the archiver user is in the room', async () => {
const client = await getTestClientForHs(testMatrixServerUrl1);
const roomId = await createTestRoom(client, {
preset: 'private_chat',
initial_state: [],
});

try {
archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForRoom(roomId);
await fetchEndpointAsText(archiveUrl);
assert.fail(
'We expect the request to fail with a 403 since the archive should not be able to view a private room'
);
} catch (err) {
assert.strictEqual(err.response.status, 403);
}
});

it('search engines allowed to index `world_readable` room', async () => {
const client = await getTestClientForHs(testMatrixServerUrl1);
const roomId = await createTestRoom(client);

archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForRoom(roomId);
const archivePageHtml = await fetchEndpointAsText(archiveUrl);

const dom = parseHTML(archivePageHtml);

// Make sure the `<meta name="robots" ...>` tag does NOT exist on the
// page telling search engines not to index it
assert.strictEqual(dom.document.querySelector(`meta[name="robots"]`), null);
});

it('search engines not allowed to index `public` room', async () => {
const client = await getTestClientForHs(testMatrixServerUrl1);
const roomId = await createTestRoom(client, {
// The default options for the test rooms adds a
// `m.room.history_visiblity` state event so we override that here so
// it's only a public room.
initial_state: [],
});

archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForRoom(roomId);
const archivePageHtml = await fetchEndpointAsText(archiveUrl);

const dom = parseHTML(archivePageHtml);

// Make sure the `<meta name="robots" ...>` tag exists on the page
// telling search engines not to index it
assert.strictEqual(
dom.document.querySelector(`meta[name="robots"]`)?.getAttribute('content'),
'noindex, nofollow'
);
});
});
});
});