diff --git a/apis/allorigins/app/get-page.js b/apis/allorigins/app/get-page.js new file mode 100644 index 0000000..3fa3e59 --- /dev/null +++ b/apis/allorigins/app/get-page.js @@ -0,0 +1,106 @@ +const { got } = require('./http-client') +const iconv = require('iconv-lite') + +module.exports = getPage + +function getPage({ url, format, requestMethod, charset }) { + if (format === 'info' || requestMethod === 'HEAD') { + return getPageInfo(url) + } else if (format === 'raw') { + return getRawPage(url, requestMethod, charset) + } + + return getPageContents(url, requestMethod, charset) +} + +async function getPageInfo(url) { + const { response, error } = await request(url, 'HEAD') + if (error) return processError(error) + + return { + url: url, + content_type: response.headers['content-type'], + content_length: +response.headers['content-length'] || -1, + http_code: response.statusCode, + } +} + +async function getRawPage(url, requestMethod, charset) { + const { content, response, error } = await request( + url, + requestMethod, + true, + charset + ) + if (error) return processError(error) + + const contentLength = Buffer.byteLength(content) + return { + content, + contentType: response.headers['content-type'], + contentLength, + } +} + +async function getPageContents(url, requestMethod, charset) { + const { content, response, error } = await request( + url, + requestMethod, + false, + charset + ) + if (error) return processError(error) + + const contentLength = Buffer.byteLength(content) + return { + contents: content.toString(), + status: { + url: url, + content_type: response.headers['content-type'], + content_length: contentLength, + http_code: response.statusCode, + }, + } +} + +async function request(url, requestMethod, raw = false, charset = null) { + try { + const options = { + method: requestMethod, + decompress: !raw, + } + + const response = await got(url, options) + if (options.method === 'HEAD') return { response } + + return processContent(response, charset) + } catch (error) { + return { error } + } +} + +async function processContent(response, charset) { + const res = { response: response, content: response.body } + if (charset && iconv.encodingExists(charset)) { + res.content = iconv.decode(res.content, charset) + } + return res +} + +async function processError(e) { + const { response } = e + if (!response) return { contents: null, status: { error: e } } + + const { url, statusCode: http_code, headers, body } = response + const contentLength = Buffer.byteLength(body) + + return { + contents: body.toString(), + status: { + url, + http_code, + content_type: headers['content-type'], + content_length: contentLength, + }, + } +} diff --git a/apis/allorigins/app/http-client.js b/apis/allorigins/app/http-client.js new file mode 100644 index 0000000..6ea831f --- /dev/null +++ b/apis/allorigins/app/http-client.js @@ -0,0 +1,43 @@ +const HttpAgent = require('agentkeepalive') +const QuickLRU = require('../vendor/quick-lru') +const got = require('got') + +const DEFAULT_USER_AGENT = `Mozilla/5.0 (compatible; allOrigins/${global.AO_VERSION}; +http://allorigins.win/)` + +module.exports = (function defaultGot() { + const gotOptions = { + agent: { + http: new HttpAgent({ + keepAlive: false, + }), + https: new HttpAgent.HttpsAgent({ + keepAlive: false, + }), + }, + responseType: 'buffer', + dnsCache: true, + headers: { 'user-agent': process.env.USER_AGENT || DEFAULT_USER_AGENT }, + } + + if (process.env.ENABLE_REDIS === '1' || true) { + gotOptions.cacheOptions = { + shared: true, + cacheHeuristic: 0.1, + immutableMinTimeToLive: 24 * 3600 * 1000, // 24h + ignoreCargoCult: true, + } + } + + const storageAdapter = new QuickLRU({ maxSize: 1000 }) + + gotOptions.handlers = [ + (options, next) => { + gotOptions.cache = storageAdapter + return next(options) + }, + ] + + const gotInstance = got.extend(gotOptions) + + return { got: gotInstance } +})() diff --git a/apis/allorigins/app/logger.js b/apis/allorigins/app/logger.js new file mode 100644 index 0000000..e19e62e --- /dev/null +++ b/apis/allorigins/app/logger.js @@ -0,0 +1,62 @@ +module.exports = function (debug = false) { + const logger = debug ? defaultLogger() : false + + return { + logger, + requestProcessed(data) { + if (!this.logger) return false + try { + const [to, from] = parseURLs(data) + + delete data.headers['host'] + + return this.logger.log(data, { + meta: { + to: to?.hostname, + from: from?.hostname || 'browser', + }, + }) + } catch (e) { + return e + } + }, + } +} + +function parseURLs(data) { + try { + const to = data.status.url && new URL(data.status.url) + const from = data.headers['origin'] && new URL(data.headers['origin']) + + return [to, from] + } catch (_) { + return [data.status.url, data.headers['origin']] + } +} + +function defaultLogger() { + const logger = { + log: (...args) => { + console.debug(...args) + return true + }, + warn: (...args) => { + console.warn(...args) + return true + }, + } + + function onSignal(signal) { + logger.warn(`received signal ${signal}, shutting down`) + shutdown() + } + + async function shutdown() { + process.exit(0) + } + + process.on('SIGTERM', onSignal) + process.on('SIGINT', onSignal) + + return logger +} diff --git a/apis/allorigins/app/process-request.js b/apis/allorigins/app/process-request.js new file mode 100644 index 0000000..871b0a9 --- /dev/null +++ b/apis/allorigins/app/process-request.js @@ -0,0 +1,94 @@ +const getPage = require('./get-page') +const getLogger = require('./logger') + +const logger = getLogger(process.env.DEBUG && process.env.DEBUG !== '0') + +const DEFAULT_CACHE_TIME = 60 * 60 // 60 minutes +const MIN_CACHE_TIME = 5 * 60 // 5 minutes + +module.exports = processRequest + +async function processRequest(req, res) { + const startTime = new Date() + const params = parseParams(req) + + if (params.requestMethod === 'OPTIONS') { + return res.end() + } + + const page = await getPage(params) + + return createResponse(page, params, res, startTime).then((resPage) => { + logger.requestProcessed({ + format: params.format, + headers: req.headers, + status: { + ...(typeof resPage.status !== 'object' + ? { + response_time: new Date() - startTime, + } + : resPage.status), + url: params.url, + }, + }) + }) +} + +function parseParams(req) { + const params = { + requestMethod: req.method, + ...req.query, + ...req.params, + } + params.requestMethod = parseRequestMethod(params.requestMethod) + params.format = (params.format || 'json').toLowerCase() + return params +} + +function parseRequestMethod(method) { + method = (method || '').toUpperCase() + + if (['HEAD', 'POST', 'PUT', 'DELETE', 'PATCH', 'OPTIONS'].includes(method)) { + return method + } + return 'GET' +} + +async function createResponse(page, params, res, startTime) { + if (['GET', 'HEAD'].includes(params.requestMethod)) { + const maxAge = params.disableCache + ? 0 + : Math.max( + MIN_CACHE_TIME, + Number(params.cacheMaxAge) || DEFAULT_CACHE_TIME + ) + + res.set('Cache-control', `public, max-age=${maxAge}, stale-if-error=600`) + } + + if (params.format === 'raw' && !(page.status || {}).error) { + res.set({ + 'Content-Length': page.contentLength, + 'Content-Type': page.contentType, + }) + return res.send(page.content) + } + + res.set( + 'Content-Type', + `application/json; charset=${params.charset || 'utf-8'}` + ) + + if (page.status) { + page.status.response_time = new Date() - startTime + } else { + page.response_time = new Date() - startTime + } + + if (params.callback) { + return res.jsonp(page) + } + + res.send(Buffer.from(JSON.stringify(page))) + return page +}