diff --git a/src/Downloader.ts b/src/Downloader.ts index de3f7faf..49c140f7 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -31,8 +31,6 @@ import { Dump } from './Dump.js' import * as logger from './Logger.js' import MediaWiki from './MediaWiki.js' import ApiURLDirector from './util/builders/url/api.director.js' -import DesktopURLDirector from './util/builders/url/desktop.director.js' -import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js' import basicURLDirector from './util/builders/url/basic.director.js' const imageminOptions = new Map() @@ -73,13 +71,6 @@ interface BackoffOptions { backoffHandler: (number: number, delay: number, error?: any) => void } -export interface MWCapabilities { - apiAvailable: boolean - veApiAvailable: boolean - coordinatesAvailable: boolean - desktopRestApiAvailable: boolean -} - export const defaultStreamRequestOptions: AxiosRequestConfig = { headers: { accept: 'application/octet-stream', @@ -114,8 +105,7 @@ class Downloader { private readonly urlPartCache: KVS = {} private readonly backoffOptions: BackoffOptions private readonly optimisationCacheUrl: string - private s3: S3 - private mwCapabilities: MWCapabilities // todo move to MW + private s3: S3 // todo move to MW private apiUrlDirector: ApiURLDirector constructor({ mw, uaString, speed, reqTimeout, optimisationCacheUrl, s3, webp, backoffOptions }: DownloaderOpts) { @@ -128,12 +118,6 @@ class Downloader { this.optimisationCacheUrl = optimisationCacheUrl this.webp = webp this.s3 = s3 - this.mwCapabilities = { - apiAvailable: false, - veApiAvailable: false, - coordinatesAvailable: true, - desktopRestApiAvailable: false, - } this.apiUrlDirector = new ApiURLDirector(mw.apiUrl.href) this.backoffOptions = { @@ -222,14 +206,14 @@ class Downloader { public async setBaseUrls() { //* Objects order in array matters! this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([ - { condition: this.mwCapabilities.desktopRestApiAvailable, value: this.mw.desktopRestApiUrl.href }, - { condition: this.mwCapabilities.veApiAvailable, value: this.mw.veApiUrl.href }, + { condition: this.mw.hasDesktopRestApi, value: this.mw.desktopRestApiUrl.href }, + { condition: this.mw.hasVeApi, value: this.mw.veapiUrl.href }, ]) //* Objects order in array matters! this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl([ - { condition: this.mwCapabilities.desktopRestApiAvailable, value: this.mw.desktopRestApiUrl.href }, - { condition: this.mwCapabilities.veApiAvailable, value: this.mw.veApiUrl.href }, + { condition: this.mw.hasDesktopRestApi, value: this.mw.desktopRestApiUrl.href }, + { condition: this.mw.hasVeApi, value: this.mw.veapiUrl.href }, ]) logger.log('Base Url: ', this.baseUrl) @@ -248,29 +232,7 @@ class Downloader { } } - public async checkCapabilities(testArticleId = 'MediaWiki:Sidebar'): Promise { - const desktopUrlDirector = new DesktopURLDirector(this.mw.desktopRestApiUrl.href) - const visualEditorURLDirector = new VisualEditorURLDirector(this.mw.veApiUrl.href) - - // By default check all API's responses and set the capabilities - // accordingly. We need to set a default page (always there because - // installed per default) to request the REST API, otherwise it would - // fail the check. - this.mwCapabilities.desktopRestApiAvailable = await this.checkApiAvailabilty(desktopUrlDirector.buildArticleURL(testArticleId)) - this.mwCapabilities.veApiAvailable = await this.checkApiAvailabilty(visualEditorURLDirector.buildArticleURL(testArticleId)) - this.mwCapabilities.apiAvailable = await this.checkApiAvailabilty(this.mw.apiUrl.href) - - // Coordinate fetching - const reqOpts = this.getArticleQueryOpts() - - const resp = await this.getJSON(this.apiUrlDirector.buildQueryURL(reqOpts)) - - const isCoordinateWarning = resp.warnings && resp.warnings.query && (resp.warnings.query['*'] || '').includes('coordinates') - if (isCoordinateWarning) { - logger.info('Coordinates not available on this wiki') - this.mwCapabilities.coordinatesAvailable = false - } - } + // TODO: Update usage of public async checkCapabilities public removeEtagWeakPrefix(etag: string): string { return etag && etag.replace(WEAK_ETAG_REGEX, '') @@ -288,7 +250,7 @@ class Downloader { const queryOpts: KVS = { ...this.getArticleQueryOpts(shouldGetThumbnail, true), titles: articleIds.join('|'), - ...(this.mwCapabilities.coordinatesAvailable ? { colimit: 'max' } : {}), + ...(this.mw.hasCoordinatesApi ? { colimit: 'max' } : {}), ...(this.mw.getCategories ? { cllimit: 'max', @@ -328,7 +290,7 @@ class Downloader { while (true) { const queryOpts: KVS = { ...this.getArticleQueryOpts(), - ...(this.mwCapabilities.coordinatesAvailable ? { colimit: 'max' } : {}), + ...(this.mw.hasCoordinatesApi ? { colimit: 'max' } : {}), ...(this.mw.getCategories ? { cllimit: 'max', @@ -387,11 +349,13 @@ class Downloader { logger.info(`Getting article [${articleId}] from ${articleApiUrl}`) + // Can retrieve not only json but html from page/html endpoint const json = await this.getJSON(articleApiUrl) if (json.error) { throw json.error } - return renderArticle(json, articleId, dump, articleDetailXId, this.mwCapabilities, articleDetail) + // TODO: this.mwCapabilities should be refactored + return articleRenderer.renderArticle(json, articleId, dump, articleDetailXId, this.mwCapabilities, articleDetail) } public async getJSON(_url: string): Promise { @@ -467,6 +431,7 @@ class Downloader { return { action: 'query', format: 'json', + // TODO: this.mwCapabilities should be refactored prop: `redirects|revisions${includePageimages ? '|pageimages' : ''}${this.mwCapabilities.coordinatesAvailable ? '|coordinates' : ''}${ this.mw.getCategories ? '|categories' : '' }`, diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index f9f582eb..7f4b30c6 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -11,26 +11,126 @@ import basicURLDirector from './util/builders/url/basic.director.js' import BaseURLDirector from './util/builders/url/base.director.js' import ApiURLDirector from './util/builders/url/api.director.js' +interface MWCapabilities { + apiAvailable: boolean + veApiAvailable: boolean + coordinatesAvailable: boolean + desktopRestApiAvailable: boolean +} class MediaWiki { public metaData: MWMetaData public readonly baseUrl: URL - public readonly modulePath: string - public readonly webUrl: URL - public readonly apiUrl: URL - public readonly veApiUrl: URL - public readonly restApiUrl: URL - public readonly mobileRestApiUrl: URL - public readonly desktopRestApiUrl: URL + public readonly modulePathConfig public readonly getCategories: boolean public readonly namespaces: MWNamespaces = {} public readonly namespacesToMirror: string[] = [] private readonly wikiPath: string + private readonly restApiPath: string private readonly username: string private readonly password: string private readonly apiPath: string private readonly domain: string private apiUrlDirector: ApiURLDirector + private baseUrlDirector: BaseURLDirector + + private _veapiUrl: URL + private _restApiUrl: URL + private _apiUrl: URL + private _modulePath: string + private _webUrl: URL + private _desktopRestApiUrl: URL + // TODO: Mobile builder was removed since there is no /mobile-sections endpoint + + // Set default MW capabilities + private readonly mwCapabilities: MWCapabilities + + /** + * veApiUrl based on top of 'new ApiURLDirecto' + */ + public get veapiUrl(): URL { + if (!this._veapiUrl) { + // TODO: This depend on baseUrlDirector.buildURL(this.apiPath) and looks like a weak solution + this._veapiUrl = this.apiUrlDirector.buildVisualEditorURL() + } + return this._veapiUrl + } + + /** + * restApiUrl, apiUrl, modulePath, webUrl and desktopRestApiUr are based on top of 'new BaseURLDirector' + */ + public get restApiUrl(): URL { + if (!this._restApiUrl) { + this._restApiUrl = this.baseUrlDirector.buildRestApiURL(this.restApiPath) + } + // TODO: define usage of this property + return this._restApiUrl + } + + public get apiUrl(): URL { + if (!this._apiUrl) { + this._apiUrl = this.baseUrlDirector.buildURL(this.apiPath) + } + return this._apiUrl + } + + public get modulePath() { + if (!this._modulePath) { + this._modulePath = this.baseUrlDirector.buildModuleURL(this.modulePathConfig) + } + return this._modulePath + } + + public get webUrl(): URL { + if (!this._webUrl) { + this._webUrl = this.baseUrlDirector.buildURL(this.wikiPath) + } + return this._webUrl + } + + public get desktopRestApiUrl(): URL { + if (!this._desktopRestApiUrl) { + this._desktopRestApiUrl = this.baseUrlDirector.buildDesktopRestApiURL(this.restApiPath) + } + return this._desktopRestApiUrl + } + + public hasDesktopRestApi = async function (loginCookie?: string, testArticleId?: string): Promise { + const desktopRestApiAvailable = await this.checkApiAvailabilty(this.getDesktopRestApiArticleUrl(testArticleId), loginCookie) + this.hasDesktopRestApi = async function (): Promise { + return desktopRestApiAvailable + } + } + + public hasVeApi = async function (loginCookie?: string, testArticleId?: string): Promise { + const veRestApiAvailable = await this.checkApiAvailabilty(this.getVeApiArticleUrl(testArticleId), loginCookie) + this.hasVeApi = async function (): Promise { + return veRestApiAvailable + } + } + + public hasCoordinatesApi = async function (downloader?: Downloader): Promise { + const validNamespaceIds = this.namespacesToMirror.map((ns) => this.namespaces[ns].num) + const reqOpts = { + action: 'query', + format: 'json', + // TODO: Do we need this.mwCapabilities.coordinatesAvailable here? + prop: `redirects|revisions${this.mwCapabilities.coordinatesAvailable ? '|coordinates' : ''}${this.getCategories ? '|categories' : ''}`, + rdlimit: 'max', + rdnamespace: validNamespaceIds.join('|'), + } + + // TODO: replace|rename|refactor getJSON later + if (downloader) { + const resp = await downloader.getJSON(this.apiUrlDirector.buildQueryURL(reqOpts)) + const isCoordinateWarning = resp.warnings && resp.warnings.query && (resp.warnings.query['*'] || '').includes('coordinates') + if (isCoordinateWarning) { + logger.info('Coordinates not available on this wiki') + return false + } + } + return true + } constructor(config: MWConfig) { this.domain = config.domain || '' @@ -39,23 +139,23 @@ class MediaWiki { this.getCategories = config.getCategories this.baseUrl = basicURLDirector.buildMediawikiBaseURL(config.base) - this.apiPath = config.apiPath ?? 'w/api.php' this.wikiPath = config.wikiPath ?? DEFAULT_WIKI_PATH + this.restApiPath = config.restApiPath + this.modulePathConfig = config.modulePath - const baseUrlDirector = new BaseURLDirector(this.baseUrl.href) - - this.webUrl = baseUrlDirector.buildURL(this.wikiPath) - this.apiUrl = baseUrlDirector.buildURL(this.apiPath) - + // Instantiate Url Directors + this.baseUrlDirector = new BaseURLDirector(this.baseUrl.href) this.apiUrlDirector = new ApiURLDirector(this.apiUrl.href) - this.veApiUrl = this.apiUrlDirector.buildVisualEditorURL() - - this.restApiUrl = baseUrlDirector.buildRestApiURL(config.restApiPath) - this.desktopRestApiUrl = baseUrlDirector.buildDesktopRestApiURL(config.restApiPath) - - this.modulePath = baseUrlDirector.buildModuleURL(config.modulePath) + // Default capabilities + // TODO: check whether to remove this object + this.mwCapabilities = { + apiAvailable: false, + veApiAvailable: false, + coordinatesAvailable: true, + desktopRestApiAvailable: false, + } } public async login(downloader: Downloader) { @@ -85,12 +185,17 @@ class MediaWiki { }, method: 'POST', }) - .then((resp) => { + .then(async (resp) => { if (resp.data.login.result !== 'Success') { throw new Error('Login Failed') } + /* + TODO: Cookie is shared between Downloader and Mediawiki, probably antipattern. Use as interim solution for now. + Also, double-check possible race condition - cookies should be set before checking capabilities. + */ downloader.loginCookie = resp.headers['set-cookie'].join(';') + await this.checkCapabilities(resp.headers['set-cookie'].join(';')) }) .catch((err) => { throw err @@ -296,6 +401,12 @@ class MediaWiki { return mwMetaData } + + private async checkCapabilities(loginCookie?: string, testArticleId = 'MediaWiki:Sidebar'): Promise { + await this.hasDesktopRestApi(loginCookie, testArticleId) + await this.hasVeApi(loginCookie, testArticleId) + await this.hasCoordinatesApi() + } } export default MediaWiki diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 77387b42..5061df89 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -209,7 +209,9 @@ async function execute(argv: any) { } } - await downloader.checkCapabilities(mwMetaData.mainPage) + // TODO: checkCapabilities is now in Mediawiki, do we need it here? + // await downloader.checkCapabilities(mwMetaData.mainPage) + await downloader.setBaseUrls() const redisStore = new RedisStore(argv.redis || config.defaults.redisPath) diff --git a/src/util/mw-api.ts b/src/util/mw-api.ts index 28d32d5d..a77f5fe7 100644 --- a/src/util/mw-api.ts +++ b/src/util/mw-api.ts @@ -3,6 +3,7 @@ import deepmerge from 'deepmerge' import * as logger from '../Logger.js' import Downloader from '../Downloader.js' import Timer from './Timer.js' +import axios from 'axios' export async function getArticlesByIds(articleIds: string[], downloader: Downloader, redisStore: RS, log = true): Promise { let from = 0 @@ -253,3 +254,12 @@ export function mwRetToArticleDetail(obj: QueryMwRet): KVS { } return ret } + +export async function checkApiAvailabilty(url: string, loginCookie = ''): Promise { + try { + const resp = await axios.get(url, { maxRedirects: 0, headers: { cookie: loginCookie } }) + return resp.status === 200 && !resp.headers['mediawiki-api-error'] + } catch (err) { + return false + } +} diff --git a/src/util/renderers/article.renderer.ts b/src/util/renderers/article.renderer.ts new file mode 100644 index 00000000..736d7b2c --- /dev/null +++ b/src/util/renderers/article.renderer.ts @@ -0,0 +1,104 @@ +import domino from 'domino' +import { MWCapabilities } from '../../Downloader.js' +import { Dump } from '../../Dump.js' +import { DELETED_ARTICLE_ERROR } from '../const.js' +import * as logger from '../../Logger.js' +import { getStrippedTitleFromHtml } from '../misc.js' + +export class ArticleRenderer { + async renderArticle( + json: any, + articleId: string, + dump: Dump, + articleDetailXId: RKVS, + capabilities: MWCapabilities, + articleDetailIn?: ArticleDetail, + ): Promise { + const articleDetail = articleDetailIn || (await articleDetailXId.get(articleId)) + const isMainPage = dump.isMainPage(articleId) + + // TODO: connect capabilities + if (isMainPage || (capabilities.veApiAvailable && !capabilities.desktopRestApiAvailable)) { + const html = this.renderDesktopArticle(json, articleId, articleDetail, isMainPage) + const strippedTitle = getStrippedTitleFromHtml(html) + return [ + { + articleId, + displayTitle: strippedTitle || articleId.replace('_', ' '), + html, + }, + ] + } + + const result = [] + const html = json + // Paginate when there are more than 200 subCategories + const numberOfPagesToSplitInto = Math.max(Math.ceil((articleDetail.subCategories || []).length / 200), 1) + for (let i = 0; i < numberOfPagesToSplitInto; i++) { + const pageId = i === 0 ? '' : `__${i}` + const _articleId = articleId + pageId + const _articleDetail = Object.assign({}, articleDetail, { + subCategories: (articleDetail.subCategories || []).slice(i * 200, (i + 1) * 200), + nextArticleId: numberOfPagesToSplitInto > i + 1 ? `${articleId}__${i + 1}` : null, + prevArticleId: i - 1 > 0 ? `${articleId}__${i - 1}` : i - 1 === 0 ? articleId : null, + }) + + if ((articleDetail.subCategories || []).length > 200) { + await articleDetailXId.set(_articleId, _articleDetail) + } + + let strippedTitle = getStrippedTitleFromHtml(html) + if (!strippedTitle) { + const title = (json.lead || { displaytitle: articleId }).displaytitle + const doc = domino.createDocument(`${title}`) + strippedTitle = doc.getElementsByClassName('mw-title')[0].textContent + } + + result.push({ + articleId: _articleId, + displayTitle: (strippedTitle || articleId.replace(/_/g, ' ')) + (i === 0 ? '' : `/${i}`), + html, + }) + } + + return result + } + + renderDesktopArticle(json: any, articleId: string, articleDetail: ArticleDetail, isMainPage = false): string { + if (!json) { + throw new Error(`Cannot render [${json}] into an article`) + } + if (json.visualeditor) { + // Testing if article has been deleted between fetching list and downloading content. + if (json.visualeditor.oldid === 0) { + logger.error(DELETED_ARTICLE_ERROR) + throw new Error(DELETED_ARTICLE_ERROR) + } + return isMainPage ? json.visualeditor.content : this.injectHeader(json.visualeditor.content, articleDetail) + } else if (json.contentmodel === 'wikitext' || (json.html && json.html.body)) { + return json.html.body + } else if (json.error) { + logger.error(`Error in retrieved article [${articleId}]:`, json.error) + return '' + } + return json // This is HTML probably (the problem is that this is hard to know at this stage, buggy architecture) + } + + private injectHeader(content: string, articleDetail: ArticleDetail): string { + const doc = domino.createDocument(content) + const header = doc.createElement('h1') + + header.appendChild(doc.createTextNode(articleDetail.title)) + header.classList.add('article-header') + + const target = doc.querySelector('body.mw-body-content') + + target.insertAdjacentElement('afterbegin', header) + + return doc.documentElement.outerHTML + } +} + +const articleRenderer = new ArticleRenderer() + +export default articleRenderer diff --git a/test/unit/downloader.test.ts b/test/unit/downloader.test.ts index 8328f95d..511f661e 100644 --- a/test/unit/downloader.test.ts +++ b/test/unit/downloader.test.ts @@ -31,7 +31,8 @@ describe('Downloader class', () => { downloader = new Downloader({ mw, uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: true, optimisationCacheUrl: '' }) await mw.getMwMetaData(downloader) - await downloader.checkCapabilities() + // TODO: checkCapabilities is now in Mediawiki, do we need it here? + // await downloader.checkCapabilities() await downloader.setBaseUrls() }) diff --git a/test/unit/mwApi.test.ts b/test/unit/mwApi.test.ts index 594db4a2..6a508a5d 100644 --- a/test/unit/mwApi.test.ts +++ b/test/unit/mwApi.test.ts @@ -25,7 +25,8 @@ describe('mwApi', () => { downloader = new Downloader({ mw, uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: false, optimisationCacheUrl: '' }) await mw.getMwMetaData(downloader) - await downloader.checkCapabilities() + // TODO: checkCapabilities is now in Mediawiki, do we need it here? + // await downloader.checkCapabilities() await mw.getNamespaces([], downloader) }) diff --git a/test/unit/saveArticles.test.ts b/test/unit/saveArticles.test.ts index 7168bcce..a078bb85 100644 --- a/test/unit/saveArticles.test.ts +++ b/test/unit/saveArticles.test.ts @@ -21,7 +21,8 @@ describe('saveArticles', () => { test('Article html processing', async () => { const { downloader, mw, dump } = await setupScrapeClasses() // en wikipedia - await downloader.checkCapabilities() + // TODO: checkCapabilities is now in Mediawiki, do we need it here? + // await downloader.checkCapabilities() await downloader.setBaseUrls() const _articlesDetail = await downloader.getArticleDetailsIds(['London']) const articlesDetail = mwRetToArticleDetail(_articlesDetail) @@ -187,7 +188,8 @@ describe('saveArticles', () => { test('--customFlavour', async () => { const { downloader, mw, dump } = await setupScrapeClasses({ format: 'nopic' }) // en wikipedia - await downloader.checkCapabilities() + // TODO: checkCapabilities is now in Mediawiki, do we need it here? + // await downloader.checkCapabilities() await downloader.setBaseUrls() class CustomFlavour implements CustomProcessor { // eslint-disable-next-line @typescript-eslint/no-unused-vars diff --git a/test/unit/treatments/article.treatment.test.ts b/test/unit/treatments/article.treatment.test.ts new file mode 100644 index 00000000..37574256 --- /dev/null +++ b/test/unit/treatments/article.treatment.test.ts @@ -0,0 +1,57 @@ +import { ZimArticle } from '@openzim/libzim' +import domino from 'domino' +import { mwRetToArticleDetail } from '../../../src/util/mw-api.js' +import { setupScrapeClasses } from '../../util.js' +import { redisStore, startRedis, stopRedis } from '../bootstrap.js' +import { saveArticles } from '../../../src/util/saveArticles.js' +import { jest } from '@jest/globals' + +jest.setTimeout(10000) + +describe('ArticleTreatment', () => { + beforeAll(startRedis) + afterAll(stopRedis) + + test('Article html processing', async () => { + const { downloader, mw, dump } = await setupScrapeClasses() // en wikipedia + // TODO: checkCapabilities is now in Mediawiki, do we need it here? + // await downloader.checkCapabilities() + await downloader.setBaseUrls() + const _articlesDetail = await downloader.getArticleDetailsIds(['London']) + const articlesDetail = mwRetToArticleDetail(_articlesDetail) + const { articleDetailXId } = redisStore + await articleDetailXId.flush() + await articleDetailXId.setMany(articlesDetail) + + const addedArticles: (typeof ZimArticle)[] = [] + + // TODO: use proper spied (like sinon.js) + await saveArticles( + { + addArticle(article: typeof ZimArticle) { + if (article.mimeType === 'text/html') { + addedArticles.push(article) + } + return Promise.resolve(null) + }, + } as any, + downloader, + redisStore, + mw, + dump, + ) + + // Successfully scrapped existent articles + expect(addedArticles).toHaveLength(1) + expect(addedArticles[0].aid).toEqual('A/London') + + await expect(downloader.getArticle('non-existent-article', dump, articleDetailXId)).rejects.toThrowError('') + + const articleDoc = domino.createDocument(addedArticles.shift().bufferData.toString()) + + // Successfully scrapped existent articles + expect(articleDoc.querySelector('meta[name="geo.position"]')).toBeDefined() + // Geo Position data is correct + expect(articleDoc.querySelector('meta[name="geo.position"]')?.getAttribute('content')).toEqual('51.50722222;-0.1275') + }) +}) diff --git a/test/unit/urlRewriting.test.ts b/test/unit/urlRewriting.test.ts index c31e6160..18d12cf9 100644 --- a/test/unit/urlRewriting.test.ts +++ b/test/unit/urlRewriting.test.ts @@ -138,7 +138,8 @@ describe('Styles', () => { await articleDetailXId.flush() await redisStore.redirectsXId.flush() const { downloader, mw, dump } = await setupScrapeClasses() // en wikipedia - await downloader.checkCapabilities() + // TODO: checkCapabilities is now in Mediawiki, do we need it here? + // await downloader.checkCapabilities() await downloader.setBaseUrls() await getArticleIds(downloader, redisStore, mw, '', ['London', 'British_Museum', 'Natural_History_Museum,_London', 'Farnborough/Aldershot_built-up_area']) diff --git a/test/util.ts b/test/util.ts index 0a39a501..0a1cfb44 100644 --- a/test/util.ts +++ b/test/util.ts @@ -38,7 +38,8 @@ export async function setupScrapeClasses({ mwUrl = 'https://en.wikipedia.org', f const downloader = new Downloader({ mw, uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: false, optimisationCacheUrl: '' }) await mw.getMwMetaData(downloader) - await downloader.checkCapabilities() + // TODO: checkCapabilities is now in Mediawiki, do we need it here? + // await downloader.checkCapabilities() const dump = new Dump(format, {} as any, mw.metaData)