Skip to content

Commit

Permalink
A bit more to prevent bots from crawling /wiki/ (#346)
Browse files Browse the repository at this point in the history
* A bit more to prevent bots from crawling /wiki/

Now the proxy requires a specific header and anything else gets a 404.

* lint
  • Loading branch information
goto-bus-stop authored Dec 28, 2024
1 parent f279e89 commit 64112a2
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 2 deletions.
8 changes: 7 additions & 1 deletion src/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ app.get('/recent', t(async (req, res) => {
app.get('/robots.txt', (req, res) => {
// Search engines should not serve the proxied pages as if they are wikipedia
res.send(`User-agent: *
Disallow: /wiki/*
Disallow: /wiki/
`)
})

Expand All @@ -92,6 +92,12 @@ app.use(serveStatic(fileURLToPath(new URL('../public', import.meta.url))))
*/

app.get('/wiki/:page', t(async (req, res) => {
res.header('X-Robots-Tag', 'noindex,nofollow')
if (req.headers.authorization !== 'wikibattle.me client') {
res.status(404).end()
return
}

const body = await wiki.get(req.params.page)
res.end(body.content)
}))
Expand Down
8 changes: 7 additions & 1 deletion src/client/load-page.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
const cache = {}

const fetchOpts = {
headers: {
authorization: 'wikibattle.me client'
}
}

export default function load (page, cb) {
cache[page] ??= fetch(`./wiki/${page}`).then((response) => response.text())
cache[page] ??= fetch(`./wiki/${page}`, fetchOpts).then((response) => response.text())
cache[page].then((result) => cb(null, result), cb)
}

0 comments on commit 64112a2

Please sign in to comment.