diff --git a/src/Downloader.ts b/src/Downloader.ts index d470d66b..9d09a93e 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -20,10 +20,14 @@ import { normalizeMwResponse, DB_ERROR, WEAK_ETAG_REGEX, stripHttpFromUrl, isBit import S3 from './S3.js' import * as logger from './Logger.js' import MediaWiki, { QueryOpts } from './MediaWiki.js' +import { Dump } from './Dump.js' import ApiURLDirector from './util/builders/url/api.director.js' -import basicURLDirector from './util/builders/url/basic.director.js' import urlHelper from './util/url.helper.js' +import WikimediaDesktopURLDirector from './util/builders/url/desktop.director.js' +import WikimediaMobileURLDirector from './util/builders/url/mobile.director.js' +import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js' + const imageminOptions = new Map() imageminOptions.set('default', new Map()) imageminOptions.set('webp', new Map()) @@ -80,8 +84,6 @@ export const defaultStreamRequestOptions: AxiosRequestConfig = { class Downloader { public loginCookie = '' public readonly speed: number - public baseUrl: string - public baseUrlForMainPage: string public cssDependenceUrls: KVS = {} public readonly webp: boolean = false public readonly requestTimeout: number @@ -98,6 +100,8 @@ class Downloader { private readonly optimisationCacheUrl: string private s3: S3 private apiUrlDirector: ApiURLDirector + private articleUrlDirector: WikimediaDesktopURLDirector | WikimediaMobileURLDirector | VisualEditorURLDirector + private mainPageUrlDirector: WikimediaDesktopURLDirector | WikimediaMobileURLDirector | VisualEditorURLDirector constructor({ uaString, speed, reqTimeout, optimisationCacheUrl, s3, webp, backoffOptions }: DownloaderOpts) { this.uaString = uaString @@ -170,52 +174,32 @@ class Downloader { } } - public async setBaseUrls(forceRender = null) { - if (!forceRender) { - //* Objects order in array matters! - this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([ - { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.WikimediaMobileApiUrl.href }, - { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.WikimediaDesktopApiUrl.href }, - { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, - ]) - - //* Objects order in array matters! - this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl([ - { condition: await MediaWiki.hasWikimediaDesktopApi(), value: MediaWiki.WikimediaDesktopApiUrl.href }, - { condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href }, - { condition: await MediaWiki.hasWikimediaMobileApi(), value: MediaWiki.WikimediaMobileApiUrl.href }, - ]) - } else { - switch (forceRender) { - case 'WikimediaDesktop': - if (MediaWiki.hasWikimediaDesktopApi()) { - this.baseUrl = MediaWiki.WikimediaDesktopApiUrl.href - this.baseUrlForMainPage = MediaWiki.WikimediaDesktopApiUrl.href - break - } - break - case 'VisualEditor': - if (MediaWiki.hasVisualEditorApi()) { - this.baseUrl = MediaWiki.visualEditorApiUrl.href - this.baseUrlForMainPage = MediaWiki.visualEditorApiUrl.href - break - } - break - case 'WikimediaMobile': - if (MediaWiki.hasWikimediaMobileApi()) { - this.baseUrl = MediaWiki.WikimediaMobileApiUrl.href - this.baseUrlForMainPage = MediaWiki.WikimediaMobileApiUrl.href - break - } - break - default: - throw new Error('Unable to find specific API end-point to retrieve article HTML') - } + private getUrlDirector(renderer: object) { + switch (renderer.constructor.name) { + case 'WikimediaDesktopRenderer': + return new WikimediaDesktopURLDirector(MediaWiki.wikimediaDesktopApiUrl.href) + case 'VisualEditorRenderer': + return new VisualEditorURLDirector(MediaWiki.visualEditorApiUrl.href) + case 'WikimediaMobileRenderer': + return new WikimediaMobileURLDirector(MediaWiki.wikimediaMobileApiUrl.href) } - logger.log('Base Url: ', this.baseUrl) - logger.log('Base Url for Main Page: ', this.baseUrlForMainPage) + } + + public setUrlsDirectors(mainPageRenderer, articlesRenderer): void { + if (!this.articleUrlDirector) { + this.articleUrlDirector = this.getUrlDirector(articlesRenderer) + } + if (!this.mainPageUrlDirector) { + this.mainPageUrlDirector = this.getUrlDirector(mainPageRenderer) + } + } + + public getArticleUrl(articleId: string): string { + return this.articleUrlDirector.buildArticleURL(articleId) + } - if (!this.baseUrl || !this.baseUrlForMainPage) throw new Error('Unable to find appropriate API end-point to retrieve article HTML') + public getMainPageUrl(articleId: string): string { + return this.mainPageUrlDirector.buildArticleURL(articleId) } public removeEtagWeakPrefix(etag: string): string { @@ -334,7 +318,7 @@ class Downloader { articleDetailXId: RKVS, articleRenderer, articleUrl, - dump, + dump: Dump, articleDetail?: ArticleDetail, isMainPage?: boolean, ): Promise { @@ -383,7 +367,7 @@ class Downloader { await this.claimRequest() try { - return await new Promise((resolve, reject) => { + return new Promise((resolve, reject) => { const cb = (err: any, val: any) => { if (err) { reject(err) @@ -722,7 +706,7 @@ class Downloader { // Solution to handle aws js sdk v3 from https://github.com/aws/aws-sdk-js-v3/issues/1877 private async streamToBuffer(stream: Readable): Promise { - return await new Promise((resolve, reject) => { + return new Promise((resolve, reject) => { const chunks: Uint8Array[] = [] stream.on('data', (chunk) => chunks.push(chunk)) stream.on('error', reject) diff --git a/src/MediaWiki.ts b/src/MediaWiki.ts index 36684b5c..113b3ff3 100644 --- a/src/MediaWiki.ts +++ b/src/MediaWiki.ts @@ -42,6 +42,7 @@ class MediaWiki { public namespacesToMirror: string[] = [] public apiCheckArticleId: string public queryOpts: QueryOpts + public urlDirector: BaseURLDirector #wikiPath: string #actionApiPath: string @@ -50,20 +51,24 @@ class MediaWiki { #username: string #password: string #domain: string - private apiUrlDirector: ApiURLDirector - private baseUrlDirector: BaseURLDirector - private wikimediaDesktopUrlDirector: WikimediaDesktopURLDirector - private wikimediaMobileUrlDirector: WikimediaMobileURLDirector - private visualEditorURLDirector: VisualEditorURLDirector + + public wikimediaDesktopUrlDirector: WikimediaDesktopURLDirector + public wikimediaMobileUrlDirector: WikimediaMobileURLDirector + public visualEditorURLDirector: VisualEditorURLDirector public visualEditorApiUrl: URL public actionApiUrl: URL + public webUrl: URL + public wikimediaDesktopApiUrl: URL + public wikimediaMobileApiUrl: URL + public modulePath: string // only for reading public mobileModulePath: string - public webUrl: URL - public WikimediaDesktopApiUrl: URL - public WikimediaMobileApiUrl: URL + #apiUrlDirector: ApiURLDirector + #wikimediaDesktopUrlDirector: WikimediaDesktopURLDirector + #wikimediaMobileUrlDirector: WikimediaMobileURLDirector + #visualEditorURLDirector: VisualEditorURLDirector #hasWikimediaDesktopApi: boolean | null #hasWikimediaMobileApi: boolean | null #hasVisualEditorApi: boolean | null @@ -80,14 +85,16 @@ class MediaWiki { set actionApiPath(value: string) { if (value) { this.#actionApiPath = value - this.initApiURLDirector() + this.actionApiUrl = this.urlDirector.buildURL(this.#actionApiPath) + this.setVisualEditorURL() } } set restApiPath(value: string) { if (value) { this.#restApiPath = value - this.initApiURLDirector() + this.setWikimediaDesktopApiUrl() + this.setWikimediaMobileApiUrl() } } @@ -98,31 +105,33 @@ class MediaWiki { set wikiPath(value: string) { if (value) { this.#wikiPath = value - this.initApiURLDirector() + this.webUrl = this.urlDirector.buildURL(this.#wikiPath) } } set base(value: string) { if (value) { this.baseUrl = basicURLDirector.buildMediawikiBaseURL(value) - this.baseUrlDirector = new BaseURLDirector(this.baseUrl.href) - this.initMWApis() - this.initApiURLDirector() + this.urlDirector = new BaseURLDirector(this.baseUrl.href) + this.webUrl = this.urlDirector.buildURL(this.#wikiPath) + this.actionApiUrl = this.urlDirector.buildURL(this.#actionApiPath) + this.setWikimediaDesktopApiUrl() + this.setWikimediaMobileApiUrl() + this.setVisualEditorURL() + this.setModuleURL() + this.setMobileModuleUrl() } } set modulePathOpt(value: string) { - if (value) { + if (value !== undefined) { this.#modulePathOpt = value - if (this.baseUrlDirector) { - this.modulePath = this.baseUrlDirector.buildModuleURL(this.#modulePathOpt) - } else { - logger.error('Base url director should be specified first') - } - } else { - if (this.baseUrlDirector) { - this.modulePath = this.baseUrlDirector.buildModuleURL(this.#modulePathOpt) - } + } + + if (this.urlDirector) { + this.setModuleURL() + } else if (value) { + logger.error('Base url director should be specified first') } } @@ -163,7 +172,8 @@ class MediaWiki { public async hasWikimediaDesktopApi(): Promise { if (this.#hasWikimediaDesktopApi === null) { - this.#hasWikimediaDesktopApi = await checkApiAvailability(this.wikimediaDesktopUrlDirector.buildArticleURL(this.apiCheckArticleId)) + this.#wikimediaDesktopUrlDirector = new WikimediaDesktopURLDirector(this.wikimediaDesktopApiUrl.href) + this.#hasWikimediaDesktopApi = await checkApiAvailability(this.#wikimediaDesktopUrlDirector.buildArticleURL(this.apiCheckArticleId)) return this.#hasWikimediaDesktopApi } return this.#hasWikimediaDesktopApi @@ -171,7 +181,8 @@ class MediaWiki { public async hasWikimediaMobileApi(): Promise { if (this.#hasWikimediaMobileApi === null) { - this.#hasWikimediaMobileApi = await checkApiAvailability(this.wikimediaMobileUrlDirector.buildArticleURL(this.apiCheckArticleId)) + this.#wikimediaMobileUrlDirector = new WikimediaMobileURLDirector(this.wikimediaMobileApiUrl.href) + this.#hasWikimediaMobileApi = await checkApiAvailability(this.#wikimediaMobileUrlDirector.buildArticleURL(this.apiCheckArticleId)) return this.#hasWikimediaMobileApi } return this.#hasWikimediaMobileApi @@ -179,7 +190,8 @@ class MediaWiki { public async hasVisualEditorApi(): Promise { if (this.#hasVisualEditorApi === null) { - this.#hasVisualEditorApi = await checkApiAvailability(this.visualEditorURLDirector.buildArticleURL(this.apiCheckArticleId)) + this.#visualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href) + this.#hasVisualEditorApi = await checkApiAvailability(this.#visualEditorURLDirector.buildArticleURL(this.apiCheckArticleId)) return this.#hasVisualEditorApi } return this.#hasVisualEditorApi @@ -193,7 +205,7 @@ class MediaWiki { rdnamespace: validNamespaceIds, } - const resp = await downloader.getJSON(this.apiUrlDirector.buildQueryURL(reqOpts)) + const resp = await downloader.getJSON(this.#apiUrlDirector.buildQueryURL(reqOpts)) const isCoordinateWarning = JSON.stringify(resp?.warnings?.query ?? '').includes('coordinates') if (isCoordinateWarning) { logger.info('Coordinates not available on this wiki') @@ -204,20 +216,25 @@ class MediaWiki { return this.#hasCoordinates } - private initMWApis() { - this.WikimediaDesktopApiUrl = this.baseUrlDirector.buildWikimediaDesktopApiUrl(this.#restApiPath) - this.WikimediaMobileApiUrl = this.baseUrlDirector.buildWikimediaMobileApiUrl(this.#restApiPath) - this.mobileModulePath = this.baseUrlDirector.buildMobileModuleURL() - this.wikimediaDesktopUrlDirector = new WikimediaDesktopURLDirector(this.WikimediaDesktopApiUrl.href) - this.wikimediaMobileUrlDirector = new WikimediaMobileURLDirector(this.WikimediaMobileApiUrl.href) + private setWikimediaDesktopApiUrl() { + this.wikimediaDesktopApiUrl = this.urlDirector.buildWikimediaDesktopApiUrl(this.#restApiPath) + } + + private setWikimediaMobileApiUrl() { + this.wikimediaMobileApiUrl = this.urlDirector.buildWikimediaMobileApiUrl(this.#restApiPath) + } + + private setVisualEditorURL() { + this.#apiUrlDirector = new ApiURLDirector(this.actionApiUrl.href) + this.visualEditorApiUrl = this.#apiUrlDirector.buildVisualEditorURL() + } + + private setModuleURL() { + this.modulePath = this.urlDirector.buildModuleURL(this.#modulePathOpt) } - private initApiURLDirector() { - this.webUrl = this.baseUrlDirector.buildURL(this.#wikiPath) - this.actionApiUrl = this.baseUrlDirector.buildURL(this.#actionApiPath) - this.apiUrlDirector = new ApiURLDirector(this.actionApiUrl.href) - this.visualEditorApiUrl = this.apiUrlDirector.buildVisualEditorURL() - this.visualEditorURLDirector = new VisualEditorURLDirector(this.visualEditorApiUrl.href) + private setMobileModuleUrl() { + this.mobileModulePath = this.urlDirector.buildMobileModuleURL() } public async login(downloader: Downloader) { @@ -261,7 +278,7 @@ class MediaWiki { } public async getNamespaces(addNamespaces: number[], downloader: Downloader) { - const url = this.apiUrlDirector.buildNamespacesURL() + const url = this.#apiUrlDirector.buildNamespacesURL() const json: any = await downloader.getJSON(url) ;['namespaces', 'namespacealiases'].forEach((type) => { diff --git a/src/config.ts b/src/config.ts index 1bc3da66..1418c531 100644 --- a/src/config.ts +++ b/src/config.ts @@ -61,7 +61,7 @@ const config = { jsResources: ['../node_modules/details-element-polyfill/dist/details-element-polyfill'], wikimediaMobileCssResources: ['wm_mobile_override_style'], - mwMobileJsResources: ['wm_mobile_override_script'], + wikimediaMobileJsResources: ['wm_mobile_override_script'], // JS/CSS resources to be imported from MediaWiki mw: { diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index b0f8a9de..f85cef5b 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -214,7 +214,6 @@ async function execute(argv: any) { await MediaWiki.hasWikimediaDesktopApi() const hasWikimediaMobileApi = await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls(forceRender) RedisStore.setOptions(argv.redis || config.defaults.redisPath) await RedisStore.connect() @@ -608,7 +607,7 @@ async function execute(argv: any) { } async function fetchArticleDetail(articleId: string) { - return await articleDetailXId.get(articleId) + return articleDetailXId.get(articleId) } async function updateArticleThumbnail(articleDetail: any, articleId: string) { diff --git a/src/renderers/abstractMobile.render.ts b/src/renderers/abstractMobile.render.ts index b771bd0b..c470b4d9 100644 --- a/src/renderers/abstractMobile.render.ts +++ b/src/renderers/abstractMobile.render.ts @@ -11,7 +11,7 @@ export abstract class MobileRenderer extends Renderer { public staticFilesListMobile: string[] = [] constructor() { super() - this.staticFilesListMobile = this.staticFilesListCommon.concat(getStaticFiles(config.output.mwMobileJsResources, config.output.wikimediaMobileCssResources)) + this.staticFilesListMobile = this.staticFilesListCommon.concat(getStaticFiles(config.output.wikimediaMobileJsResources, config.output.wikimediaMobileCssResources)) } public filterWikimediaMobileModules(_moduleDependencies) { @@ -45,7 +45,7 @@ export abstract class MobileRenderer extends Renderer { const htmlTemplateString = htmlWikimediaMobileTemplateCode() .replace('__ARTICLE_CANONICAL_LINK__', genCanonicalLink(config, MediaWiki.webUrl.href, articleId)) .replace('__ARTICLE_CONFIGVARS_LIST__', '') - .replace('__JS_SCRIPTS__', this.genWikimediaMobileOverrideScript(config.output.mwMobileJsResources[0])) + .replace('__JS_SCRIPTS__', this.genWikimediaMobileOverrideScript(config.output.wikimediaMobileJsResources[0])) .replace('__CSS_LINKS__', this.genWikimediaMobileOverrideCSSLink(config.output.wikimediaMobileCssResources[0])) .replace( '__ARTICLE_JS_LIST__', diff --git a/src/util/builders/url/api.director.ts b/src/util/builders/url/api.director.ts index 6651d7c6..b4343019 100644 --- a/src/util/builders/url/api.director.ts +++ b/src/util/builders/url/api.director.ts @@ -49,19 +49,13 @@ export default class ApiURLDirector { } buildVisualEditorURL() { - return urlBuilder - .setDomain(this.baseDomain) - .setQueryParams({ action: 'visualeditor', mobileformat: 'html', format: 'json', paction: 'parse', formatversion: '2', page: '' }) - .build(true) + return urlBuilder.setDomain(this.baseDomain).setQueryParams({ action: 'visualeditor', mobileformat: 'html', format: 'json', paction: 'parse', formatversion: '2' }).build(true) } buildArticleApiURL(articleId: string) { - const domain = this.buildBaseArticleURL() - - return urlBuilder.setDomain(domain).setQueryParams({ page: articleId }, '&').build() - } - - private buildBaseArticleURL() { - return urlBuilder.setDomain(this.baseDomain).setQueryParams({ action: 'parse', format: 'json', prop: 'modules|jsconfigvars|headhtml', formatversion: '2' }).build() + return urlBuilder + .setDomain(this.baseDomain) + .setQueryParams({ action: 'parse', format: 'json', prop: 'modules|jsconfigvars|headhtml', formatversion: '2', page: articleId }) + .build() } } diff --git a/src/util/builders/url/visual-editor.director.ts b/src/util/builders/url/visual-editor.director.ts index e818d1d3..660f1e1d 100644 --- a/src/util/builders/url/visual-editor.director.ts +++ b/src/util/builders/url/visual-editor.director.ts @@ -11,9 +11,6 @@ export default class VisualEditorURLDirector { } buildArticleURL(articleId: string) { - return urlBuilder - .setDomain(this.baseDomain) - .setQueryParams({ page: encodeURIComponent(articleId) }, '&') - .build() + return urlBuilder.setDomain(this.baseDomain).setQueryParams({ page: articleId }, '&').build() } } diff --git a/src/util/saveArticles.ts b/src/util/saveArticles.ts index 847ffae8..f32577ca 100644 --- a/src/util/saveArticles.ts +++ b/src/util/saveArticles.ts @@ -131,9 +131,9 @@ async function getAllArticlesToKeep(downloader: Downloader, articleDetailXId: RK const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title) let rets: any try { - const articleUrl = getArticleUrl(downloader, dump, articleId) const isMainPage = dump.isMainPage(articleId) const renderer = isMainPage ? mainPageRenderer : articlesRenderer + const articleUrl = isMainPage ? downloader.getMainPageUrl(articleId) : downloader.getArticleUrl(articleId) rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage) for (const { articleId, html } of rets) { @@ -224,10 +224,6 @@ async function saveArticle( } } -export function getArticleUrl(downloader: Downloader, dump: Dump, articleId: string): string { - return `${dump.isMainPage(articleId) ? downloader.baseUrlForMainPage : downloader.baseUrl}${encodeURIComponent(articleId)}` -} - /* * Fetch Articles */ @@ -258,6 +254,7 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade renderType: hasWikimediaMobileApi ? 'mobile' : 'auto', }) } + downloader.setUrlsDirectors(mainPageRenderer, articlesRenderer) if (dump.customProcessor?.shouldKeepArticle) { await getAllArticlesToKeep(downloader, articleDetailXId, dump, mainPageRenderer, articlesRenderer) @@ -293,9 +290,9 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade let rets: any try { - const articleUrl = getArticleUrl(downloader, dump, articleId) const isMainPage = dump.isMainPage(articleId) const renderer = isMainPage ? mainPageRenderer : articlesRenderer + const articleUrl = isMainPage ? downloader.getMainPageUrl(articleId) : downloader.getArticleUrl(articleId) rets = await downloader.getArticle(downloader.webp, _moduleDependencies, articleId, articleDetailXId, renderer, articleUrl, dump, articleDetail, isMainPage) diff --git a/test/e2e/forceRender.test.ts b/test/e2e/forceRender.test.ts index ef1d8860..e8a6512d 100644 --- a/test/e2e/forceRender.test.ts +++ b/test/e2e/forceRender.test.ts @@ -53,7 +53,7 @@ describe('forceRender', () => { try { await mwoffliner.execute({ ...parameters, forceRender }) } catch (err) { - expect(err.message).toEqual('Unable to find specific API end-point to retrieve article HTML') + expect(err.message).toEqual('Unknown renderName for specific mode: unknownRenderName') } }) }) diff --git a/test/unit/builders/url/api.director.test.ts b/test/unit/builders/url/api.director.test.ts index 993b9dfa..a45423be 100644 --- a/test/unit/builders/url/api.director.test.ts +++ b/test/unit/builders/url/api.director.test.ts @@ -57,7 +57,7 @@ describe('ApiURLDirector', () => { it('should return base visual editor URL object with default query params', () => { const url = apiUrlDirector.buildVisualEditorURL() - expect(url.href).toBe('https://en.wikipedia.org/w/api.php?action=visualeditor&mobileformat=html&format=json&paction=parse&formatversion=2&page=') + expect(url.href).toBe('https://en.wikipedia.org/w/api.php?action=visualeditor&mobileformat=html&format=json&paction=parse&formatversion=2') }) }) }) diff --git a/test/unit/downloader.test.ts b/test/unit/downloader.test.ts index ed1a64b9..a1806fd1 100644 --- a/test/unit/downloader.test.ts +++ b/test/unit/downloader.test.ts @@ -6,7 +6,6 @@ import Axios from 'axios' import { mwRetToArticleDetail, stripHttpFromUrl, isImageUrl } from '../../src/util/index.js' import S3 from '../../src/S3.js' import { Dump } from '../../src/Dump.js' -import { getArticleUrl } from '../../src/util/saveArticles.js' import { config } from '../../src/config.js' import 'dotenv/config.js' import * as FileType from 'file-type' @@ -15,8 +14,8 @@ import urlParser from 'url' import { setTimeout } from 'timers/promises' import domino from 'domino' import { WikimediaDesktopRenderer } from '../../src/renderers/wikimedia-desktop.renderer.js' -import { VisualEditorRenderer } from '../../src/renderers/visual-editor.renderer.js' import { WikimediaMobileRenderer } from '../../src/renderers/wikimedia-mobile.renderer.js' +import { VisualEditorRenderer } from '../../src/renderers/visual-editor.renderer.js' import { RENDERERS_LIST } from '../../src/util/const.js' jest.setTimeout(200000) @@ -30,7 +29,6 @@ describe('Downloader class', () => { beforeAll(async () => { MediaWiki.base = 'https://en.wikipedia.org' MediaWiki.getCategories = true - downloader = new Downloader({ uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: true, optimisationCacheUrl: '' }) await MediaWiki.getMwMetaData(downloader) @@ -38,7 +36,6 @@ describe('Downloader class', () => { await MediaWiki.hasWikimediaDesktopApi() await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls() }) test('Test Action API version 2 response in comparison with version 1', async () => { @@ -129,15 +126,17 @@ describe('Downloader class', () => { describe('getArticle method', () => { let dump: Dump - const wikimediaDesktopRenderer = new WikimediaDesktopRenderer() + const wikimediaMobileRenderer = new WikimediaMobileRenderer() + beforeAll(async () => { const mwMetadata = await MediaWiki.getMwMetaData(downloader) dump = new Dump('', {} as any, mwMetadata) }) - test('getArticle of "London" returns one article for WikimediaDesktop render', async () => { + test('getArticle of "London" returns one article for WikimediaMobileRenderer render', async () => { const articleId = 'London' - const articleUrl = getArticleUrl(downloader, dump, articleId) + downloader.setUrlsDirectors(wikimediaMobileRenderer, wikimediaMobileRenderer) + const articleUrl = downloader.getArticleUrl(articleId) const articleDetail = { title: articleId, thumbnail: { @@ -155,7 +154,7 @@ describe('Downloader class', () => { _moduleDependencies, articleId, RedisStore.articleDetailXId, - wikimediaDesktopRenderer, + wikimediaMobileRenderer, articleUrl, dump, articleDetail, @@ -167,13 +166,15 @@ describe('Downloader class', () => { test('Categories with many subCategories are paginated for WikimediaDesktop render', async () => { const articleId = 'Category:Container_categories' const _moduleDependencies = await downloader.getModuleDependencies(articleId) + const wikimediaDesktopRenderer = new WikimediaDesktopRenderer() const articleDetail = { title: articleId, ns: 14, revisionId: 1168361498, timestamp: '2023-08-02T09:57:11Z', } - const articleUrl = getArticleUrl(downloader, dump, articleDetail.title) + // Enforce desktop url here as this test desktop API-specific + const articleUrl = `https://en.wikipedia.org/api/rest_v1/page/html/${articleId}` const PaginatedArticle = await downloader.getArticle( downloader.webp, _moduleDependencies, @@ -190,7 +191,7 @@ describe('Downloader class', () => { test('getArticle response status for non-existent article id is 404 for WikimediaDesktop render', async () => { const articleId = 'NeverExistingArticle' - const articleUrl = getArticleUrl(downloader, dump, articleId) + const articleUrl = downloader.getArticleUrl(articleId) const articleDetail = { title: articleId, missing: '', @@ -202,7 +203,7 @@ describe('Downloader class', () => { _moduleDependencies, 'NeverExistingArticle', RedisStore.articleDetailXId, - wikimediaDesktopRenderer, + wikimediaMobileRenderer, articleUrl, dump, articleDetail, @@ -236,7 +237,7 @@ describe('Downloader class', () => { test(`getArticle response status for non-existent article id is 404 for ${renderer} render`, async () => { const articleId = 'NeverExistingArticle' - const articleUrl = getArticleUrl(downloader, dump, articleId) + const articleUrl = downloader.getArticleUrl(articleId) const articleDetail = { title: articleId, missing: '', diff --git a/test/unit/renderers/renderer.builder.test.ts b/test/unit/renderers/renderer.builder.test.ts index 25e6b7db..3bd46ded 100644 --- a/test/unit/renderers/renderer.builder.test.ts +++ b/test/unit/renderers/renderer.builder.test.ts @@ -84,7 +84,7 @@ describe('RendererBuilder', () => { await MediaWiki.hasWikimediaDesktopApi() await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls() + await MediaWiki.hasVisualEditorApi() const rendererBuilderOptions = { MediaWiki, diff --git a/test/unit/saveArticles.test.ts b/test/unit/saveArticles.test.ts index 7ccbcb03..ae3baee8 100644 --- a/test/unit/saveArticles.test.ts +++ b/test/unit/saveArticles.test.ts @@ -7,7 +7,6 @@ import { saveArticles } from '../../src/util/saveArticles.js' import { ZimArticle } from '@openzim/libzim' import { mwRetToArticleDetail, DELETED_ARTICLE_ERROR } from '../../src/util/index.js' import { jest } from '@jest/globals' -import { getArticleUrl } from '../../src/util/saveArticles.js' import { WikimediaDesktopRenderer } from '../../src/renderers/wikimedia-desktop.renderer.js' import { VisualEditorRenderer } from '../../src/renderers/visual-editor.renderer.js' import { WikimediaMobileRenderer } from '../../src/renderers/wikimedia-mobile.renderer.js' @@ -41,7 +40,8 @@ describe('saveArticles', () => { await MediaWiki.hasWikimediaDesktopApi() await MediaWiki.hasWikimediaMobileApi() await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls(renderer) + await MediaWiki.hasVisualEditorApi() + const _articlesDetail = await downloader.getArticleDetailsIds(['London']) const articlesDetail = mwRetToArticleDetail(_articlesDetail) const { articleDetailXId } = RedisStore @@ -71,7 +71,7 @@ describe('saveArticles', () => { expect(addedArticles[0].aid).toEqual('A/London') const articleId = 'non-existent-article' - const articleUrl = getArticleUrl(downloader, dump, articleId) + const articleUrl = downloader.getArticleUrl(articleId) const articleDetail = { title: 'Non-existent-article', missing: '' } const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title) @@ -91,9 +91,9 @@ describe('saveArticles', () => { test(`Check nodet article for en.wikipedia.org using ${renderer} renderer`, async () => { const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia - await downloader.setBaseUrls(renderer) const articleId = 'Canada' - const articleUrl = getArticleUrl(downloader, dump, articleId) + downloader.setUrlsDirectors(rendererInstance, rendererInstance) + const articleUrl = downloader.getArticleUrl(articleId) const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId]) const articlesDetail = mwRetToArticleDetail(_articleDetailsRet) const { articleDetailXId } = RedisStore @@ -122,9 +122,9 @@ describe('saveArticles', () => { test(`Load main page and check that it is without header using ${renderer} renderer`, async () => { const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikivoyage.org' }) // en wikipedia - await downloader.setBaseUrls(renderer) + downloader.setUrlsDirectors(rendererInstance, rendererInstance) const articleId = 'Main_Page' - const articleUrl = getArticleUrl(downloader, dump, articleId) + const articleUrl = downloader.getArticleUrl(articleId) const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId]) const articlesDetail = mwRetToArticleDetail(_articleDetailsRet) const { articleDetailXId } = RedisStore @@ -147,12 +147,8 @@ describe('saveArticles', () => { }) test(`--customFlavour using ${renderer} renderer`, async () => { - const { MediaWiki, downloader, dump } = await setupScrapeClasses({ format: 'nopic' }) // en wikipedia - await MediaWiki.hasCoordinates(downloader) - await MediaWiki.hasWikimediaDesktopApi() - await MediaWiki.hasWikimediaMobileApi() - await MediaWiki.hasVisualEditorApi() - await downloader.setBaseUrls(renderer) + const { downloader, dump } = await setupScrapeClasses({ format: 'nopic' }) // en wikipedia + downloader.setUrlsDirectors(rendererInstance, rendererInstance) class CustomFlavour implements CustomProcessor { // eslint-disable-next-line @typescript-eslint/no-unused-vars public async shouldKeepArticle(articleId: string, doc: Document) { @@ -227,7 +223,7 @@ describe('saveArticles', () => { const downloader = classes.downloader await downloader.setCapabilities() - await downloader.setBaseUrls() + await downloader.setUrlsDirectors() const _articleDetailsRet = await downloader.getArticleDetailsIds(['Western_Greenland']) const articlesDetail = mwRetToArticleDetail(_articleDetailsRet) const { articleDetailXId } = redisStore diff --git a/test/unit/treatments/article.treatment.test.ts b/test/unit/treatments/article.treatment.test.ts index a26076dd..28387aba 100644 --- a/test/unit/treatments/article.treatment.test.ts +++ b/test/unit/treatments/article.treatment.test.ts @@ -6,7 +6,6 @@ import { setupScrapeClasses } from '../../util.js' import { startRedis, stopRedis } from '../bootstrap.js' import { saveArticles } from '../../../src/util/saveArticles.js' import { jest } from '@jest/globals' -import { getArticleUrl } from '../../../src/util/saveArticles.js' import { WikimediaDesktopRenderer } from '../../../src/renderers/wikimedia-desktop.renderer.js' import { WikimediaMobileRenderer } from '../../../src/renderers/wikimedia-mobile.renderer.js' import { VisualEditorRenderer } from '../../../src/renderers/visual-editor.renderer.js' @@ -36,7 +35,6 @@ describe('ArticleTreatment', () => { test(`Article html processing for ${renderer} render`, async () => { const { downloader, dump } = await setupScrapeClasses() // en wikipedia - await downloader.setBaseUrls() const title = 'London' const _articlesDetail = await downloader.getArticleDetailsIds([title]) const articlesDetail = mwRetToArticleDetail(_articlesDetail) @@ -47,7 +45,8 @@ describe('ArticleTreatment', () => { const addedArticles: (typeof ZimArticle)[] = [] const articleId = 'non-existent-article' - const articleUrl = getArticleUrl(downloader, dump, articleId) + downloader.setUrlsDirectors(rendererInstance, rendererInstance) + const articleUrl = downloader.getArticleUrl(articleId) const _moduleDependencies = await downloader.getModuleDependencies(title) const articleDetail = { @@ -75,6 +74,7 @@ describe('ArticleTreatment', () => { downloader, dump, true, + renderer, ) // Successfully scrapped existent articles diff --git a/test/unit/urlRewriting.test.ts b/test/unit/urlRewriting.test.ts index 60c384a6..1ba8eab2 100644 --- a/test/unit/urlRewriting.test.ts +++ b/test/unit/urlRewriting.test.ts @@ -140,7 +140,6 @@ describe('Styles', () => { await articleDetailXId.flush() await RedisStore.redirectsXId.flush() const { downloader, dump } = await setupScrapeClasses() // en wikipedia - await downloader.setBaseUrls() await getArticleIds(downloader, '', ['London', 'British_Museum', 'Natural_History_Museum,_London', 'Farnborough/Aldershot_built-up_area']) diff --git a/test/util.ts b/test/util.ts index 7625cb78..28a030fc 100644 --- a/test/util.ts +++ b/test/util.ts @@ -33,6 +33,10 @@ export function makeLink($doc: Document, href: string, rel: string, title: strin export async function setupScrapeClasses({ mwUrl = 'https://en.wikipedia.org', format = '' } = {}) { MediaWiki.base = mwUrl + const renderer = {} + + Object.defineProperty(renderer.constructor, 'name', { value: 'WikimediaDesktopRenderer' }) + const downloader = new Downloader({ uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: false, optimisationCacheUrl: '' }) await MediaWiki.getMwMetaData(downloader) @@ -47,6 +51,7 @@ export async function setupScrapeClasses({ mwUrl = 'https://en.wikipedia.org', f MediaWiki, downloader, dump, + renderer, } }