diff --git a/.github/actions/pull-ghcr/action.yml b/.github/actions/pull-ghcr/action.yml index 1b1f6183f..1eea0f6ef 100644 --- a/.github/actions/pull-ghcr/action.yml +++ b/.github/actions/pull-ghcr/action.yml @@ -15,7 +15,6 @@ runs: "elasticsearch" "wdqs" "wdqs-frontend" - "wdqs-proxy" "quickstatements" ) diff --git a/.github/workflows/_build_test.yml b/.github/workflows/_build_test.yml index f34ee2d40..985bf3b0f 100644 --- a/.github/workflows/_build_test.yml +++ b/.github/workflows/_build_test.yml @@ -28,7 +28,6 @@ jobs: - elasticsearch - wdqs - wdqs-frontend - - wdqs-proxy - quickstatements steps: diff --git a/.github/workflows/build_publish_image_release.yml b/.github/workflows/build_publish_image_release.yml index 1fcadd82f..b606ef6f7 100644 --- a/.github/workflows/build_publish_image_release.yml +++ b/.github/workflows/build_publish_image_release.yml @@ -7,7 +7,6 @@ on: - 'quickstatements@*' - 'wdqs@*' - 'wdqs-frontend@*' - - 'wdqs-proxy@*' - 'wikibase@*' - 'wikibase-lts@*' diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index c4b211c7c..a83149699 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -15,7 +15,6 @@ on: - quickstatements - wdqs - wdqs-frontend - - wdqs-proxy - wikibase - wikibase-lts - All projects with unreleased changes diff --git a/README.md b/README.md index 018c45862..2d175c10b 100644 --- a/README.md +++ b/README.md @@ -121,9 +121,7 @@ In order to test your own instances of the services, make sure to change the fol WIKIBASE_URL=http://wikibase WIKIBASE_CLIENT_URL=http://wikibase-client QUICKSTATEMENTS_URL=http://quickstatements -WDQS_FRONTEND_URL=http://wdqs-frontend -WDQS_URL=http://wdqs:9999 -WDQS_PROXY_URL=http://wdqs-proxy +WDQS_URL=http://query MW_ADMIN_NAME= MW_ADMIN_PASS= MW_SCRIPT_PATH=/w diff --git a/build/wdqs-frontend/README.md b/build/wdqs-frontend/README.md index cd933d0da..fe6443b7f 100644 --- a/build/wdqs-frontend/README.md +++ b/build/wdqs-frontend/README.md @@ -41,14 +41,13 @@ If QuickStatements and Wikibase are running on the same IP address, a reverse pr ### Environment variables -| Variable | Default | Description | -| --------------- | ---------------------------- | ---------------------------------------- | -| `LANGUAGE` | "en" | Language to use in the UI | -| `BRAND_TITLE` | "DockerWikibaseQueryService" | Name to display on the UI | -| `WIKIBASE_HOST` | "wikibase" | Hostname of the Wikibase host (required) | -| `WDQS_HOST` | "wdqs" | Hostname of the WDQS host | -| `WDQS_PORT` | "9999" | Port of the WDQS host | -| `COPYRIGHT_URL` | "undefined" | URL for the copyright notice | +Variables in **bold** are required. + +| Variable | Default | Description | +| ------------------------- | ---------------------------- | -------------------------------| +| `LANGUAGE` | "en" | Language to use in the UI | +| **`WDQS_PUBLIC_URL`** | | Hostname of the WDQS host | +| **`WIKIBASE_PUBLIC_URL`** | | Hostname of the Wikibase host | ## Example @@ -125,6 +124,7 @@ services: hard: 32768 volumes: - wdqs-data:/wdqs/data + # TODO: make it available healthcheck: test: curl --silent --fail localhost:9999/bigdata/namespace/wdq/sparql interval: 10s @@ -144,18 +144,17 @@ services: wdqs-frontend: image: wikibase/wdqs-frontend - depends_on: - - wdqs-proxy restart: unless-stopped ports: - 8834:80 labels: - "traefik.enable=true" - - "traefik.http.routers.wdqs-frontend.rule=Host(`query.example`)" + - "traefik.http.routers.wdqs-frontend.rule=Host(`query.wikibase.example`)" - "traefik.http.routers.wdqs-frontend.entrypoints=websecure" - "traefik.http.routers.wdqs-frontend.tls.certresolver=letsencrypt" environment: - WDQS_HOST: wdqs-proxy + WDQS_PUBLIC_URL: https://query.wikibase.example/sparql + WIKIBASE_PUBLIC_URL: https://wikibase.example/w/api.php healthcheck: test: curl --silent --fail localhost interval: 10s diff --git a/build/wdqs-frontend/custom-config.json b/build/wdqs-frontend/custom-config.json index bff91daf3..8b1bf4a04 100644 --- a/build/wdqs-frontend/custom-config.json +++ b/build/wdqs-frontend/custom-config.json @@ -1,10 +1,10 @@ { "api": { "sparql": { - "uri": "/proxy/wdqs/bigdata/namespace/wdq/sparql" + "uri": "$WDQS_PUBLIC_URL" }, "wikibase": { - "uri": "/proxy/wikibase/w/api.php" + "uri": "$WIKIBASE_PUBLIC_URL" }, "examples": { "server": "https://www.wikidata.org/", diff --git a/build/wdqs-frontend/default.conf b/build/wdqs-frontend/default.conf index 6b275cbec..5b696c92d 100644 --- a/build/wdqs-frontend/default.conf +++ b/build/wdqs-frontend/default.conf @@ -3,16 +3,6 @@ server { listen 80; server_name localhost; - location /proxy/wikibase { - rewrite /proxy/wikibase/(.*) /${DOLLAR}1 break; - proxy_pass http://$WIKIBASE_HOST:80; - } - - location /proxy/wdqs { - rewrite /proxy/wdqs/(.*) /${DOLLAR}1 break; - proxy_pass http://$WDQS_HOST:80; - } - location / { root /usr/share/nginx/html; index index.html index.htm; diff --git a/build/wdqs-frontend/entrypoint.sh b/build/wdqs-frontend/entrypoint.sh index ee5445f14..a9f16687a 100755 --- a/build/wdqs-frontend/entrypoint.sh +++ b/build/wdqs-frontend/entrypoint.sh @@ -3,13 +3,13 @@ # Test if required environment variables have been set if [ -z "$WIKIBASE_HOST" ]; then -echo "WIKIBASE_HOST is required but isn't set. You should pass it to docker. See: https://docs.docker.com/engine/reference/commandline/run/#set-environment-variables--e---env---env-file"; -exit 1; + echo "WIKIBASE_HOST is required but isn't set. You should pass it to docker. See: https://docs.docker.com/engine/reference/commandline/run/#set-environment-variables--e---env---env-file"; + exit 1; fi if [ -z "$WDQS_HOST" ]; then -echo "WDQS_HOST is required but isn't set. You should pass it to docker. See: https://docs.docker.com/engine/reference/commandline/run/#set-environment-variables--e---env---env-file"; -exit 1; + echo "WDQS_HOST is required but isn't set. You should pass it to docker. See: https://docs.docker.com/engine/reference/commandline/run/#set-environment-variables--e---env---env-file"; + exit 1; fi set -eu diff --git a/build/wdqs-proxy/CHANGELOG.md b/build/wdqs-proxy/CHANGELOG.md deleted file mode 100644 index b2feb6e26..000000000 --- a/build/wdqs-proxy/CHANGELOG.md +++ /dev/null @@ -1,5 +0,0 @@ -## **wdqs-proxy@1.0.1** (2024-10-09) - -### 📖 Documentation - -- Switch from `.example.com` to `.example` diff --git a/build/wdqs-proxy/Dockerfile b/build/wdqs-proxy/Dockerfile deleted file mode 100644 index d4602c0f2..000000000 --- a/build/wdqs-proxy/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -ARG NGINX_IMAGE_URL - -# ########################################################################### -# hadolint ignore=DL3006 -FROM ${NGINX_IMAGE_URL} -LABEL org.opencontainers.image.source="https://github.com/wmde/wikibase-release-pipeline" - -COPY entrypoint.sh /entrypoint.sh -COPY wdqs.template /etc/nginx/conf.d/wdqs.template - -ENV PROXY_MAX_QUERY_MILLIS=60000\ - PROXY_PASS_HOST=wdqs:9999 - -ENTRYPOINT "/entrypoint.sh" diff --git a/build/wdqs-proxy/README.md b/build/wdqs-proxy/README.md deleted file mode 100644 index 6f8b628f9..000000000 --- a/build/wdqs-proxy/README.md +++ /dev/null @@ -1,175 +0,0 @@ -# Wikibase Suite Wikidata Query Service Proxy (wdqs-proxy) image - -This is a proxy to put in front of the WBS WDQS image that enforces READONLY requests, implements query timeouts and limits access to Blazegraph SPARQL endpoints. - -> 💡 This image is part of Wikibase Suite (WBS). [WBS Deploy](https://github.com/wmde/wikibase-release-pipeline/deploy/README.md) provides everything you need to self-host a Wikibase instance out of the box. - -## Requirements - -In order to run WDQS Proxy, you need: - -- at least 2 GB RAM to start WDQS -- MediaWiki/Wikibase instance -- WDQS as server -- WDQS as updater -- Configuration via environment variables - -### MediaWiki/Wikibase instance - -We suggest using the [WBS Wikibase image](https://hub.docker.com/r/wikibase/wikibase) because this is the image we run all our tests against. Follow the setup instructions over there to get it up and running. - -### WDQS as server - -We suggest using the [WBS Wikibase image](https://hub.docker.com/r/wikibase/wdqs). - -### WDQS as updater - -We suggest using the [WBS Wikibase image](https://hub.docker.com/r/wikibase/wdqs), the same as used for WDQS server. Check out the [documentation](https://wikitech.wikimedia.org/wiki/Wikidata_Query_Service) to learn how to run it in updater mode. - -## Environment variables - -| Variable | Default | Description | -| ------------------------ | ----------- | ---------------------------- | -| `PROXY_PASS_HOST` | "wdqs:9999" | Where to forward requests to | -| `PROXY_MAX_QUERY_MILLIS` | 60000 | Timeout in milliseconds | - -## Example - -Here's an example of how to run this image together with the [WBS Wikibase image](https://hub.docker.com/r/wikibase/wikibase) and [WBS WDQS image](https://hub.docker.com/r/wikibase/wdqs) using Docker Compose. - -```yml -services: - wikibase: - image: wikibase/wikibase - depends_on: - mysql: - condition: service_healthy - restart: unless-stopped - ports: - - 8880:80 - labels: - - "traefik.enable=true" - - "traefik.http.routers.wikibase.rule=Host(`wikibase.example`)" - - "traefik.http.routers.wikibase.entrypoints=websecure" - - "traefik.http.routers.wikibase.tls.certresolver=letsencrypt" - volumes: - - ./config:/config - - wikibase-image-data:/var/www/html/images - environment: - MW_ADMIN_NAME: "admin" - MW_ADMIN_PASS: "change-this-password" - MW_ADMIN_EMAIL: "admin@wikibase.example" - MW_WG_SERVER: https://wikibase.example - DB_SERVER: mysql:3306 - DB_NAME: "my_wiki" - DB_USER: "mariadb-user" - DB_PASS: "change-this-password" - healthcheck: - test: curl --silent --fail localhost/wiki/Main_Page - interval: 10s - start_period: 5m - - wikibase-jobrunner: - image: wikibase/wikibase - command: /jobrunner-entrypoint.sh - depends_on: - wikibase: - condition: service_healthy - restart: always - volumes_from: - - wikibase - - mysql: - image: mariadb:10.11 - restart: unless-stopped - volumes: - - mysql-data:/var/lib/mysql - environment: - MYSQL_DATABASE: "my_wiki" - MYSQL_USER: "mariadb-user" - MYSQL_PASSWORD: "change-this-password" - MYSQL_RANDOM_ROOT_PASSWORD: yes - healthcheck: - test: healthcheck.sh --connect --innodb_initialized - start_period: 1m - interval: 20s - timeout: 5s - - wdqs: - image: wikibase/wdqs - command: /runBlazegraph.sh - depends_on: - wikibase: - condition: service_healthy - restart: unless-stopped - ulimits: - nofile: - soft: 32768 - hard: 32768 - volumes: - - wdqs-data:/wdqs/data - healthcheck: - test: curl --silent --fail localhost:9999/bigdata/namespace/wdq/sparql - interval: 10s - start_period: 2m - - wdqs-updater: - image: wikibase/wdqs - command: /runUpdate.sh - depends_on: - wdqs: - condition: service_healthy - restart: unless-stopped - ulimits: - nofile: - soft: 32768 - hard: 32768 - - wdqs-proxy: - image: wikibase/wdqs-proxy - depends_on: - wdqs: - condition: service_healthy - restart: unless-stopped - -volumes: - wikibase-image-data: - mysql-data: - wdqs-data: -``` - -## Releases - -Official releases of this image can be found on [Docker Hub wikibase/wdqs-frontend](https://hub.docker.com/r/wikibase/wdqs-frontend). - -## Tags and Versioning - -This WDQS Frontend image is using [semantic versioning](https://semver.org/spec/v2.0.0.html). - -We provide several tags that relate to the versioning semantics. - -| Tag | Example | Description | -| ----------------------------------------------- | ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| _MAJOR_ | 3 | Tags the latest image with this major version. Gets overwritten whenever a new version is released with this major version. This will include new builds triggered by base image changes, patch version updates and minor version updates. | -| _MAJOR_._MINOR_ | 3.1 | Tags the latest image with this major and minor version. Gets overwritten whenever a new version is released with this major and minor version. This will include new builds triggered by base image changes and patch version updates. | -| _MAJOR_._MINOR_._PATCH_ | 3.1.7 | Tags the latest image with this major, minor and patch version. Gets overwritten whenever a new version is released with this major, minor and patch version. This only happens for new builds triggered by base image changes. | -| _MAJOR_._MINOR_._PATCH_\_build*BUILD-TIMESTAMP* | 3.1.7_build20240530103941 | Tag that never gets overwritten. Every image will have this tag with a unique build timestamp. Can be used to reference images explicitly for reproducibility. | - -## Internal filesystem layout - -Hooking into the internal filesystem can extend the functionality of this image. - -| File | Description | -| --------------------------------- | --------------------------------------------------------------------------------------------------------- | -| `/etc/nginx/conf.d/wdqs.template` | Template for the nginx config (substituted to `/etc/nginx/conf.d/default.conf` at runtime) | -| `/etc/nginx/conf.d/default.conf` | nginx config. To override this you must also use a custom entrypoint to avoid the file being overwritten. | - -## Source - -This image is built from this [Dockerfile](https://github.com/wmde/wikibase-release-pipeline/blob/main/build/wdqs-proxy/Dockerfile). - -## Authors & Contact - -This image is maintained by the Wikibase Suite Team at [Wikimedia Germany (WMDE)](https://wikimedia.de). - -If you have questions not listed above or need help, use this [bug report form](https://phabricator.wikimedia.org/maniphest/task/edit/form/129/) to start a conversation with the engineering team. diff --git a/build/wdqs-proxy/build.env b/build/wdqs-proxy/build.env deleted file mode 100644 index 677fb68aa..000000000 --- a/build/wdqs-proxy/build.env +++ /dev/null @@ -1,2 +0,0 @@ -# https://hub.docker.com/_/nginx -NGINX_IMAGE_URL=nginx:1.27.0-bookworm diff --git a/build/wdqs-proxy/dockerhub.md b/build/wdqs-proxy/dockerhub.md deleted file mode 100644 index 40ab1a362..000000000 --- a/build/wdqs-proxy/dockerhub.md +++ /dev/null @@ -1,21 +0,0 @@ -# Wikibase Suite Wikidata Query Service Proxy (wdqs-proxy) Image - -Wikibase Suite (WBS) eases self-hosting [Wikibase](https://wikiba.se) in production, allowing you to maintain a knowledge graph similar to [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page). - -If you want to host your own WBS instance, head over to the [WBS Deploy documentation](https://github.com/wmde/wikibase-release-pipeline/blob/main/deploy/README.md). - -# Documentation - -Version specific documentation for this image is hosted in our [git repository](https://github.com/wmde/wikibase-release-pipeline/). - -## Currently supported versions - -- July 2024 [3.x.x](https://github.com/wmde/wikibase-release-pipeline/blob/deploy-3/build/wdqs-proxy/README.md) -- July 2024 [2.x.x](https://github.com/wmde/wikibase-release-pipeline/blob/deploy-2/build/wdqs-proxy/README.md) -- July 2024 [1.x.x](https://github.com/wmde/wikibase-release-pipeline/blob/deploy-1/build/wdqs-proxy/README.md) - -## Legacy versions - -- 17 April 2024 [wmde.20](https://github.com/wmde/wikibase-release-pipeline/blob/wmde.20/build/wdqs-proxy/README.md) -- 17 April 2024 [wmde.19](https://github.com/wmde/wikibase-release-pipeline/blob/wmde.19/build/wdqs-proxy/README.md) -- 29 April 2024 [wmde.18](https://github.com/wmde/wikibase-release-pipeline/blob/wmde.18/build/wdqs-proxy/README.md) diff --git a/build/wdqs-proxy/entrypoint.sh b/build/wdqs-proxy/entrypoint.sh deleted file mode 100755 index 907fce0f3..000000000 --- a/build/wdqs-proxy/entrypoint.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh -# This file is provided by the wikibase/wdqs-proxy docker image. - -# Test if required environment variables have been set -if [ -z "$PROXY_PASS_HOST" ]; then - echo "PROXY_PASS_HOST is required but isn't set. You should pass it to docker. See: https://docs.docker.com/engine/reference/commandline/run/#set-environment-variables--e---env---env-file"; - exit 1; -fi - -set -eu - -envsubst < /etc/nginx/conf.d/wdqs.template > /etc/nginx/conf.d/default.conf - -nginx -g 'daemon off;' diff --git a/build/wdqs-proxy/package.json b/build/wdqs-proxy/package.json deleted file mode 100644 index 9e8bf48f4..000000000 --- a/build/wdqs-proxy/package.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "name": "wdqs-proxy", - "version": "1.0.1", - "nx": { - "targets": { - "lint": {}, - "build": {}, - "nx-release-publish": {} - } - } -} diff --git a/build/wdqs-proxy/wdqs.template b/build/wdqs-proxy/wdqs.template deleted file mode 100644 index 44cebc8b3..000000000 --- a/build/wdqs-proxy/wdqs.template +++ /dev/null @@ -1,34 +0,0 @@ -# This file is provided by the wikibase/wdqs-proxy docker image. -server { - listen 80 default_server; - listen [::]:80 default_server; - - location ~ ^/bigdata/(namespace/[a-zA-Z0-9_]+/sparql|ldf|assets) { - # use IP address for the backend and not "localhost" to ensure - # connection is done over IPv4 (backend is configured as IPv4 only) - proxy_pass http://${PROXY_PASS_HOST}; - - proxy_set_header X-BIGDATA-MAX-QUERY-MILLIS ${PROXY_MAX_QUERY_MILLIS}; - proxy_set_header X-BIGDATA-READ-ONLY "yes"; - - add_header Access-Control-Allow-Origin * always; - #add_header Cache-Control "public, max-age=300"; - add_header Vary Accept; - - client_max_body_size 1m; - client_body_buffer_size 1m; - proxy_intercept_errors off; - proxy_buffering on; - proxy_buffer_size 16k; - proxy_buffers 256 16k; - proxy_busy_buffers_size 256k; - proxy_temp_file_write_size 16k; - proxy_max_temp_file_size 10m; - proxy_read_timeout 300; - - limit_except GET OPTIONS POST { - deny all; - } - - } -} diff --git a/build/wdqs/Dockerfile b/build/wdqs/Dockerfile index b21fbfb0c..ee1797e21 100644 --- a/build/wdqs/Dockerfile +++ b/build/wdqs/Dockerfile @@ -69,6 +69,8 @@ ENV MEMORY=""\ BLAZEGRAPH_EXTRA_OPTS=""\ LOG_CONFIG="/wdqs/logback.xml" +EXPOSE 9999 + WORKDIR /wdqs COPY --chown=blazegraph:blazegraph wait-for-it.sh entrypoint.sh runBlazegraph.sh runUpdate.sh / diff --git a/build/wdqs/README.md b/build/wdqs/README.md index da0434216..18b784995 100644 --- a/build/wdqs/README.md +++ b/build/wdqs/README.md @@ -30,11 +30,9 @@ You can send `GET` requests with your SPARQL query to the WDQS endpoint (followi You'll need one instance of the image to execute the updater started using `/runUpdate.sh`. This polls changes from Wikibase. -### WDQS Proxy for public facing setups +### Proxy for public facing setups -By default, WDQS exposes some endpoints and methods that reveal internal details or functionality that might allow for abuse of the system. Wikibase Suite offers the [WDQS-proxy](../WDQS-proxy/README.md) which filters out all long-running or unwanted requests. - -When running WDQS in a setup without WDQS-proxy, **please consider disabling these endpoints in some other way**. +By default, WDQS exposes some endpoints and methods that reveal internal details or functionality that might not be intended in every setup, especially when running as a public service. The example below includes a traefik proxy configuration limiting the functionality WDQS exposes. ### Environment variables @@ -132,6 +130,33 @@ services: test: curl --silent --fail localhost:9999/bigdata/namespace/wdq/sparql interval: 10s start_period: 2m + labels: + - "traefik.enable=true" + # Define router rules for WDQS service, including limits to HTTP methods + - "traefik.http.routers.wdqs-router.rule=Host(`query.wikibase.example`) && PathPrefix(`/sparql`) && Method(`GET`, `OPTIONS`, `POST`)" + # Add prefix to path before forwarding to upstream service + - "traefik.http.middlewares.wdqs-prefix.addprefix.prefix=/bigdata/namespace/wdq/" + # Announce limited HTTP methods in preflight requests + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Access-Control-Allow-Methods + - "traefik.http.middlewares.wdqs-headers.headers.accesscontrolallowmethods=GET,OPTIONS,POST" + # Announce Accept header can lead to varying responses + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Vary + - "traefik.http.middlewares.wdqs-headers.headers.customresponseheaders.VARY=Accept" + # WDQS/blazegraph config headers + # https://github.com/blazegraph/database/commit/fcecfd46d616735b651ccebf44116d6702f2b545 + - "traefik.http.middlewares.wdqs-headers.headers.customrequestheaders.X-BIGDATA-READ-ONLY=yes" + # https://github.com/blazegraph/database/wiki/REST_API#query + - "traefik.http.middlewares.wdqs-headers.headers.customrequestheaders.X-BIGDATA-MAX-QUERY-MILLIS=300000" + # CORS https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Access-Control-Allow-Origin#sect + # Allow unauthenticated requests from any origin + - "traefik.http.middlewares.wdqs-cors-headers.headers.accesscontrolallowheaders=*" + # Define middleware for rate limiting + # https://doc.traefik.io/traefik-hub/api-gateway/reference/routing/http/middlewares/ref-rate-limit#rate-and-burst + - "traefik.http.middlewares.wdqs-rate-limit.ratelimit.burst=30" + - "traefik.http.middlewares.wdqs-rate-limit.ratelimit.average=60" + - "traefik.http.middlewares.wdqs-rate-limit.ratelimit.period=1m" + # Apply middlewares to the wdqs router + - "traefik.http.routers.wdqs-router.middlewares=wdqs-prefix,wdqs-headers,wdqs-cors-headers,wdqs-rate-limit" wdqs-updater: image: wikibase/wdqs @@ -147,17 +172,40 @@ services: environment: WIKIBASE_CONCEPT_URI: https://wikibase.example - wdqs-proxy: - image: wikibase/wdqs-proxy - depends_on: - wdqs: - condition: service_healthy + traefik: + image: traefik:3.1 + command: + # Basic setup + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + # http endpoint + - "--entrypoints.web.address=:80" + # https endpoint + - "--entrypoints.websecure.address=:443" + - "--entrypoints.websecure.asdefault" + - "--entrypoints.websecure.http.tls.certresolver=letsencrypt" + # http to https redirect + - "--entrypoints.web.http.redirections.entryPoint.to=websecure" + - "--entrypoints.web.http.redirections.entryPoint.scheme=https" + - "--entrypoints.web.http.redirections.entrypoint.permanent=true" + # ACME SSL certificate generation + - "--certificatesresolvers.letsencrypt.acme.httpchallenge=true" + - "--certificatesresolvers.letsencrypt.acme.httpchallenge.entrypoint=web" + - "--certificatesresolvers.letsencrypt.acme.email=admin@wikibase.example" + - "--certificatesresolvers.letsencrypt.acme.storage=/letsencrypt/acme.json" restart: unless-stopped + ports: + - 80:80 + - 443:443 + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - traefik-letsencrypt-data:/letsencrypt volumes: wikibase-image-data: mysql-data: wdqs-data: + traefik-letsencrypt-data: ``` ## Releases diff --git a/deploy-lts/.env b/deploy-lts/.env index 216749c26..cecdc66a5 100644 --- a/deploy-lts/.env +++ b/deploy-lts/.env @@ -14,7 +14,7 @@ # deployed to. Note that you need three distinct names, e.g. three different # subdomains. Otherwise the reverse proxy cannot route properly. WIKIBASE_PUBLIC_HOST=wikibase.example -WDQS_FRONTEND_PUBLIC_HOST=wdqs-frontend.example +WDQS_PUBLIC_HOST=query.wikibase.example QUICKSTATEMENTS_PUBLIC_HOST=quickstatements.example # MediaWiki / Wikibase user configuration. diff --git a/deploy-lts/README.md b/deploy-lts/README.md index 7aaab8c26..43c4ae42a 100644 --- a/deploy-lts/README.md +++ b/deploy-lts/README.md @@ -45,7 +45,7 @@ WBS Deploy consists of the following services: You need three DNS records that resolve to your machine's IP address, one for each user-facing service: - Wikibase, e.g., "wikibase.example" -- QueryService, e.g., "query.example" +- QueryService, e.g., "query.wikibase.example" - QuickStatements, e.g., "quickstatements.example" ### Initial setup @@ -304,10 +304,10 @@ Removing the `traefik-letsencrypt-data` volume will request a new certificate fr ## WDQS Frontend -To interact with the WDQS frontend, navigate to the URL defined as `WDQS_FRONTEND_PUBLIC_HOST` in the `.env` file. By default, this is set to `wdqs-frontend.example`. +To interact with the WDQS frontend, navigate to the URL defined as `WDQS_PUBLIC_HOST` in the `.env` file. By default, this is set to `query.wikibase.example`. Alternatively, send `GET` requests with your SPARQL query to the WDQS frontend endpoint: -`https://wdqs-frontend.example.com/proxy/wdqs/bigdata/namespace/wdq/sparql?query={SPARQL}` +`https://query.wikibase.example/sparql?query={SPARQL}` ## FAQ diff --git a/deploy-lts/docker-compose.yml b/deploy-lts/docker-compose.yml index c9be32430..823259520 100644 --- a/deploy-lts/docker-compose.yml +++ b/deploy-lts/docker-compose.yml @@ -102,6 +102,33 @@ services: test: curl --silent --fail localhost:9999/bigdata/namespace/wdq/sparql interval: 10s start_period: 2m + labels: + - "traefik.enable=true" + # Define router rules for WDQS service, including limits to HTTP methods + - "traefik.http.routers.wdqs-router.rule=Host(`${WDQS_PUBLIC_HOST}`) && PathPrefix(`/sparql`) && (Method(`GET`) || Method(`OPTIONS`) || Method(`POST`))" + # Add prefix to path before forwarding to upstream service + - "traefik.http.middlewares.wdqs-prefix.addprefix.prefix=/bigdata/namespace/wdq/" + # Announce limited HTTP methods in preflight requests + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Access-Control-Allow-Methods + - "traefik.http.middlewares.wdqs-headers.headers.accesscontrolallowmethods=GET,OPTIONS,POST" + # Announce Accept header can lead to varying responses + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Vary + - "traefik.http.middlewares.wdqs-headers.headers.customresponseheaders.VARY=Accept" + # WDQS/blazegraph config headers + # https://github.com/blazegraph/database/commit/fcecfd46d616735b651ccebf44116d6702f2b545 + - "traefik.http.middlewares.wdqs-headers.headers.customrequestheaders.X-BIGDATA-READ-ONLY=yes" + # https://github.com/blazegraph/database/wiki/REST_API#query + - "traefik.http.middlewares.wdqs-headers.headers.customrequestheaders.X-BIGDATA-MAX-QUERY-MILLIS=300000" + # CORS https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Access-Control-Allow-Origin#sect + # Allow unauthenticated requests from any origin + - "traefik.http.middlewares.wdqs-cors-headers.headers.accesscontrolallowheaders=*" + # Define middleware for rate limiting + # https://doc.traefik.io/traefik-hub/api-gateway/reference/routing/http/middlewares/ref-rate-limit#rate-and-burst + - "traefik.http.middlewares.wdqs-rate-limit.ratelimit.burst=30" + - "traefik.http.middlewares.wdqs-rate-limit.ratelimit.average=60" + - "traefik.http.middlewares.wdqs-rate-limit.ratelimit.period=1m" + # Apply middlewares to the wdqs router + - "traefik.http.routers.wdqs-router.middlewares=wdqs-prefix,wdqs-headers,wdqs-cors-headers,wdqs-rate-limit" wdqs-updater: image: wikibase/wdqs:2 @@ -120,23 +147,15 @@ services: environment: WIKIBASE_CONCEPT_URI: https://${WIKIBASE_PUBLIC_HOST} - wdqs-proxy: - image: wikibase/wdqs-proxy:1 - depends_on: - wdqs: - condition: service_healthy - restart: unless-stopped - wdqs-frontend: image: wikibase/wdqs-frontend:1 - depends_on: - - wdqs-proxy restart: unless-stopped labels: - "traefik.enable=true" - - "traefik.http.routers.wdqs-frontend.rule=Host(`${WDQS_FRONTEND_PUBLIC_HOST}`)" + - "traefik.http.routers.wdqs-frontend.rule=Host(`${WDQS_PUBLIC_HOST}`) && !PathPrefix(`/sparql`)" environment: - WDQS_HOST: wdqs-proxy + WDQS_PUBLIC_URL: https://${WDQS_PUBLIC_HOST}/sparql + WIKIBASE_PUBLIC_URL: https://${WIKIBASE_PUBLIC_HOST}/w/api.php healthcheck: test: curl --silent --fail localhost interval: 10s @@ -186,8 +205,10 @@ services: - "--certificatesresolvers.letsencrypt.acme.httpchallenge.entrypoint=web" - "--certificatesresolvers.letsencrypt.acme.email=${MW_ADMIN_EMAIL}" - "--certificatesresolvers.letsencrypt.acme.storage=/letsencrypt/acme.json" + # Uncomment this line to only test ssl generation first, makes sure you don't run into letsencrypt rate limits # - "--certificatesresolvers.letsencrypt.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory" + restart: unless-stopped ports: - 80:80 - 443:443 diff --git a/deploy-lts/template.env b/deploy-lts/template.env index 216749c26..cecdc66a5 100644 --- a/deploy-lts/template.env +++ b/deploy-lts/template.env @@ -14,7 +14,7 @@ # deployed to. Note that you need three distinct names, e.g. three different # subdomains. Otherwise the reverse proxy cannot route properly. WIKIBASE_PUBLIC_HOST=wikibase.example -WDQS_FRONTEND_PUBLIC_HOST=wdqs-frontend.example +WDQS_PUBLIC_HOST=query.wikibase.example QUICKSTATEMENTS_PUBLIC_HOST=quickstatements.example # MediaWiki / Wikibase user configuration. diff --git a/deploy/README.md b/deploy/README.md index 7aaab8c26..8b9ee6351 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -16,7 +16,6 @@ WBS Deploy consists of the following services: - **[Elasticsearch](https://hub.docker.com/r/wikibase/elasticsearch)** Search service used by MediaWiki. - **[WDQS](https://hub.docker.com/r/wikibase/wdqs)** Wikidata Query Service to process SPARQL queries. - **[WDQS Frontend](https://hub.docker.com/r/wikibase/wdqs-frontend)** Web front end for SPARQL queries. -- **[WDQS Proxy](https://hub.docker.com/r/wikibase/wdqs-proxy)** A middle layer for WDQS which serves to filter requests and make the service more secure. - **[WDQS Updater](https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#runUpdate.sh)** Keeps the WDQS data in sync with Wikibase. - **[Quickstatements](https://hub.docker.com/r/wikibase/quickstatements)** A web-based tool to import and manipulate large amounts of data. - **[Traefik](https://hub.docker.com/_/traefik)** A reverse proxy that handles TLS termination and SSL certificate renewal through ACME. @@ -45,7 +44,7 @@ WBS Deploy consists of the following services: You need three DNS records that resolve to your machine's IP address, one for each user-facing service: - Wikibase, e.g., "wikibase.example" -- QueryService, e.g., "query.example" +- QueryService, e.g., "query.wikibase.example" - QuickStatements, e.g., "quickstatements.example" ### Initial setup @@ -304,10 +303,10 @@ Removing the `traefik-letsencrypt-data` volume will request a new certificate fr ## WDQS Frontend -To interact with the WDQS frontend, navigate to the URL defined as `WDQS_FRONTEND_PUBLIC_HOST` in the `.env` file. By default, this is set to `wdqs-frontend.example`. +To interact with the WDQS frontend, navigate to the URL defined as `WDQS_PUBLIC_HOST` in the `.env` file. By default, this is set to `query.wikibase.example`. Alternatively, send `GET` requests with your SPARQL query to the WDQS frontend endpoint: -`https://wdqs-frontend.example.com/proxy/wdqs/bigdata/namespace/wdq/sparql?query={SPARQL}` +`https://query.wikibase.example/sparql?query={SPARQL}` ## FAQ diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml index 2eff50cce..eb9c22b45 100644 --- a/deploy/docker-compose.yml +++ b/deploy/docker-compose.yml @@ -102,6 +102,33 @@ services: test: curl --silent --fail localhost:9999/bigdata/namespace/wdq/sparql interval: 10s start_period: 2m + labels: + - "traefik.enable=true" + # Define router rules for WDQS service, including limits to HTTP methods + - "traefik.http.routers.wdqs-router.rule=Host(`${WDQS_PUBLIC_HOST}`) && PathPrefix(`/sparql`) && (Method(`GET`) || Method(`OPTIONS`) || Method(`POST`))" + # Add prefix to path before forwarding to upstream service + - "traefik.http.middlewares.wdqs-prefix.addprefix.prefix=/bigdata/namespace/wdq/" + # Announce limited HTTP methods in preflight requests + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Access-Control-Allow-Methods + - "traefik.http.middlewares.wdqs-headers.headers.accesscontrolallowmethods=GET,OPTIONS,POST" + # Announce Accept header can lead to varying responses + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Vary + - "traefik.http.middlewares.wdqs-headers.headers.customresponseheaders.VARY=Accept" + # WDQS/blazegraph config headers + # https://github.com/blazegraph/database/commit/fcecfd46d616735b651ccebf44116d6702f2b545 + - "traefik.http.middlewares.wdqs-headers.headers.customrequestheaders.X-BIGDATA-READ-ONLY=yes" + # https://github.com/blazegraph/database/wiki/REST_API#query + - "traefik.http.middlewares.wdqs-headers.headers.customrequestheaders.X-BIGDATA-MAX-QUERY-MILLIS=300000" + # CORS https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Access-Control-Allow-Origin#sect + # Allow unauthenticated requests from any origin + - "traefik.http.middlewares.wdqs-cors-headers.headers.accesscontrolallowheaders=*" + # Define middleware for rate limiting + # https://doc.traefik.io/traefik-hub/api-gateway/reference/routing/http/middlewares/ref-rate-limit#rate-and-burst + - "traefik.http.middlewares.wdqs-rate-limit.ratelimit.burst=30" + - "traefik.http.middlewares.wdqs-rate-limit.ratelimit.average=60" + - "traefik.http.middlewares.wdqs-rate-limit.ratelimit.period=1m" + # Apply middlewares to the wdqs router + - "traefik.http.routers.wdqs-router.middlewares=wdqs-prefix,wdqs-headers,wdqs-cors-headers,wdqs-rate-limit" wdqs-updater: image: wikibase/wdqs:2 @@ -120,23 +147,15 @@ services: environment: WIKIBASE_CONCEPT_URI: https://${WIKIBASE_PUBLIC_HOST} - wdqs-proxy: - image: wikibase/wdqs-proxy:1 - depends_on: - wdqs: - condition: service_healthy - restart: unless-stopped - wdqs-frontend: - image: wikibase/wdqs-frontend:1 - depends_on: - - wdqs-proxy + image: wikibase/wdqs-frontend restart: unless-stopped labels: - "traefik.enable=true" - - "traefik.http.routers.wdqs-frontend.rule=Host(`${WDQS_FRONTEND_PUBLIC_HOST}`)" + - "traefik.http.routers.wdqs-frontend.rule=Host(`${WDQS_PUBLIC_HOST}`) && !PathPrefix(`/sparql`)" environment: - WDQS_HOST: wdqs-proxy + WDQS_PUBLIC_URL: https://${WDQS_PUBLIC_HOST}/sparql + WIKIBASE_PUBLIC_URL: https://${WIKIBASE_PUBLIC_HOST}/w/api.php healthcheck: test: curl --silent --fail localhost interval: 10s @@ -186,6 +205,7 @@ services: - "--certificatesresolvers.letsencrypt.acme.httpchallenge.entrypoint=web" - "--certificatesresolvers.letsencrypt.acme.email=${MW_ADMIN_EMAIL}" - "--certificatesresolvers.letsencrypt.acme.storage=/letsencrypt/acme.json" + # Uncomment this line to only test ssl generation first, makes sure you don't run into letsencrypt rate limits # - "--certificatesresolvers.letsencrypt.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory" restart: unless-stopped diff --git a/deploy/template.env b/deploy/template.env index 216749c26..cecdc66a5 100644 --- a/deploy/template.env +++ b/deploy/template.env @@ -14,7 +14,7 @@ # deployed to. Note that you need three distinct names, e.g. three different # subdomains. Otherwise the reverse proxy cannot route properly. WIKIBASE_PUBLIC_HOST=wikibase.example -WDQS_FRONTEND_PUBLIC_HOST=wdqs-frontend.example +WDQS_PUBLIC_HOST=query.wikibase.example QUICKSTATEMENTS_PUBLIC_HOST=quickstatements.example # MediaWiki / Wikibase user configuration. diff --git a/test/helpers/pages/queryservice-ui/queryservice-ui.page.ts b/test/helpers/pages/queryservice-ui/queryservice-ui.page.ts index 62bc567f7..f4952e230 100644 --- a/test/helpers/pages/queryservice-ui/queryservice-ui.page.ts +++ b/test/helpers/pages/queryservice-ui/queryservice-ui.page.ts @@ -10,18 +10,18 @@ class QueryServiceUIPage extends SubmittablePage { } /** - * `${testEnv.vars.WDQS_FRONTEND_URL}/#${prefixes, query}` + * `${testEnv.vars.WDQS_URL}/#${prefixes, query}` * * @param {string} query * @param {string[]} prefixes - Optional * @return {void} */ public async open( query: string, prefixes: string[] = [] ): Promise { - await browser.url( testEnv.vars.WDQS_FRONTEND_URL ); + await browser.url( testEnv.vars.WDQS_URL ); if ( prefixes ) { query = [ ...prefixes, query ].join( '\n' ); } - browser.url( `${ testEnv.vars.WDQS_FRONTEND_URL }/#${ encodeURI( query ) }` ); + browser.url( `${ testEnv.vars.WDQS_URL }/#${ encodeURI( query ) }` ); } public async resultIncludes( prop: string, value?: string ): Promise { diff --git a/test/specs/repo/queryservice.ts b/test/specs/repo/queryservice.ts index 938c170d0..373a2b88c 100644 --- a/test/specs/repo/queryservice.ts +++ b/test/specs/repo/queryservice.ts @@ -7,41 +7,33 @@ import SpecialNewItemPage from '../../helpers/pages/special/new-item.page.js'; import { wikibasePropertyString } from '../../helpers/wikibase-property-types.js'; describe( 'QueryService', function () { - it( 'Should not be able to post to sparql endpoint', async function () { - const result = await browser.makeRequest( - `${ testEnv.vars.WDQS_PROXY_URL }/bigdata/namespace/wdq/sparql`, - { validateStatus: false }, - {} - ); - expect( result.status ).toEqual( 405 ); - } ); - it( 'Should be able to get sparql endpoint', async function () { const result = await browser.makeRequest( - `${ testEnv.vars.WDQS_PROXY_URL }/bigdata/namespace/wdq/sparql` + `${ testEnv.vars.WDQS_URL }/sparql` ); expect( result.status ).toEqual( 200 ); } ); - it( 'Should not be possible to reach blazegraph ldf api that is not enabled', async function () { + it( 'Should not be able to post to sparql endpoint', async function () { const result = await browser.makeRequest( - `${ testEnv.vars.WDQS_PROXY_URL }/bigdata/namespace/wdq/ldf`, - { validateStatus: false } + `${ testEnv.vars.WDQS_URL }/sparql`, + { validateStatus: false }, + {} ); - expect( result.status ).toEqual( 404 ); + expect( result.status ).toEqual( 405 ); } ); - it( 'Should not be possible to reach blazegraph ldf assets thats not enabled', async function () { + it( 'Should not be possible to reach blazegraph ldf api that is not enabled', async function () { const result = await browser.makeRequest( - `${ testEnv.vars.WDQS_PROXY_URL }/bigdata/namespace/wdq/assets`, + `${ testEnv.vars.WDQS_URL }/ldf`, { validateStatus: false } ); expect( result.status ).toEqual( 404 ); } ); - it( 'Should not be possible to reach blazegraph workbench', async function () { + it( 'Should not be possible to reach blazegraph ldf assets thats not enabled', async function () { const result = await browser.makeRequest( - `${ testEnv.vars.WDQS_PROXY_URL }/bigdata/#query`, + `${ testEnv.vars.WDQS_URL }/assets`, { validateStatus: false } ); expect( result.status ).toEqual( 404 ); diff --git a/test/suites/docker-compose.override.yml b/test/suites/docker-compose.override.yml index 510c5d273..66dcd559c 100644 --- a/test/suites/docker-compose.override.yml +++ b/test/suites/docker-compose.override.yml @@ -30,11 +30,11 @@ services: environment: WIKIBASE_CONCEPT_URI: http://${WIKIBASE_PUBLIC_HOST} - wdqs-proxy: - image: wikibase/wdqs-proxy - wdqs-frontend: image: wikibase/wdqs-frontend + environment: + WDQS_PUBLIC_URL: http://${WDQS_PUBLIC_HOST}/sparql + WIKIBASE_PUBLIC_URL: http://${WIKIBASE_PUBLIC_HOST}/w/api.php quickstatements: image: wikibase/quickstatements @@ -68,9 +68,9 @@ services: networks: default: aliases: - - wikibase.example - - wdqs-frontend.example - - quickstatements.example + - ${WIKIBASE_PUBLIC_HOST} + - ${WDQS_PUBLIC_HOST} + - ${QUICKSTATEMENTS_PUBLIC_HOST} volumes: wikibase-config: diff --git a/test/test-services.env b/test/test-services.env index 30adab2e6..b67a598cc 100644 --- a/test/test-services.env +++ b/test/test-services.env @@ -1,7 +1,7 @@ # Any of these can be overridden locally in ../local.env WIKIBASE_PUBLIC_HOST=wikibase.example -WDQS_FRONTEND_PUBLIC_HOST=wdqs-frontend.example +WDQS_PUBLIC_HOST=query.wikibase.example QUICKSTATEMENTS_PUBLIC_HOST=quickstatements.example MW_ADMIN_NAME=admin @@ -11,13 +11,11 @@ MW_ADMIN_PASS=change-this-password # URLs used by tests to access services from within the # docker network through traefik proxy WIKIBASE_URL=http://wikibase.example -WDQS_FRONTEND_URL=http://wdqs-frontend.example +WDQS_URL=http://query.wikibase.example QUICKSTATEMENTS_URL=http://quickstatements.example # URLs used by tests to access services from within the docker network directly WIKIBASE_CLIENT_URL=http://wikibase-client -WDQS_URL=http://wdqs:9999 -WDQS_PROXY_URL=http://wdqs-proxy # CONFIG # wikibase / wikibase-client / wikibase-jobrunner