Skip to content

Commit

Permalink
Replace mwCapabilities to Mediawiki class (partial impl)
Browse files Browse the repository at this point in the history
  • Loading branch information
VadimKovalenkoSNF committed Aug 10, 2023
1 parent 39200dd commit 132d561
Show file tree
Hide file tree
Showing 11 changed files with 93 additions and 82 deletions.
67 changes: 7 additions & 60 deletions src/Downloader.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import * as path from 'path'
import deepmerge from 'deepmerge'
import * as backoff from 'backoff'
import { config } from './config.js'
Expand All @@ -19,8 +18,6 @@ import S3 from './S3.js'
import * as logger from './Logger.js'
import MediaWiki from './MediaWiki.js'
import ApiURLDirector from './util/builders/url/api.director.js'
import DesktopURLDirector from './util/builders/url/desktop.director.js'
import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js'
import basicURLDirector from './util/builders/url/basic.director.js'
import urlHelper from './util/url.helper.js'

Expand Down Expand Up @@ -62,13 +59,6 @@ interface BackoffOptions {
backoffHandler: (number: number, delay: number, error?: any) => void
}

export interface MWCapabilities {
apiAvailable: boolean
veApiAvailable: boolean
coordinatesAvailable: boolean
desktopRestApiAvailable: boolean
}

export const defaultStreamRequestOptions: AxiosRequestConfig = {
headers: {
accept: 'application/octet-stream',
Expand Down Expand Up @@ -103,7 +93,6 @@ class Downloader {
private readonly backoffOptions: BackoffOptions
private readonly optimisationCacheUrl: string
private s3: S3
public mwCapabilities: MWCapabilities // TODO: move to MW, temporary open the property
private apiUrlDirector: ApiURLDirector

constructor({ mw, uaString, speed, reqTimeout, optimisationCacheUrl, s3, webp, backoffOptions }: DownloaderOpts) {
Expand All @@ -116,12 +105,6 @@ class Downloader {
this.optimisationCacheUrl = optimisationCacheUrl
this.webp = webp
this.s3 = s3
this.mwCapabilities = {
apiAvailable: false,
veApiAvailable: false,
coordinatesAvailable: true,
desktopRestApiAvailable: false,
}
this.apiUrlDirector = new ApiURLDirector(mw.apiUrl.href)

this.backoffOptions = {
Expand Down Expand Up @@ -187,14 +170,14 @@ class Downloader {
public async setBaseUrls() {
//* Objects order in array matters!
this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([
{ condition: this.mwCapabilities.desktopRestApiAvailable, value: this.mw.desktopRestApiUrl.href },
{ condition: this.mwCapabilities.veApiAvailable, value: this.mw.veApiUrl.href },
{ condition: await this.mw.hasDesktopRestApi(), value: this.mw.desktopRestApiUrl.href },
{ condition: await this.mw.hasVeApi(), value: this.mw.veApiUrl.href },
])

//* Objects order in array matters!
this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl([
{ condition: this.mwCapabilities.desktopRestApiAvailable, value: this.mw.desktopRestApiUrl.href },
{ condition: this.mwCapabilities.veApiAvailable, value: this.mw.veApiUrl.href },
{ condition: await this.mw.hasDesktopRestApi(), value: this.mw.desktopRestApiUrl.href },
{ condition: await this.mw.hasVeApi(), value: this.mw.veApiUrl.href },
])

logger.log('Base Url: ', this.baseUrl)
Expand All @@ -203,40 +186,6 @@ class Downloader {
if (!this.baseUrl || !this.baseUrlForMainPage) throw new Error('Unable to find appropriate API end-point to retrieve article HTML')
}

public async checkApiAvailabilty(url: string): Promise<boolean> {
try {
const resp = await axios.get(url, { headers: { cookie: this.loginCookie } })
// Check for hostname is for domain name in cases of redirects.
return resp.status === 200 && !resp.headers['mediawiki-api-error'] && path.dirname(url) === path.dirname(resp.request.res.responseUrl)
} catch (err) {
return false
}
}

public async checkCapabilities(testArticleId = 'MediaWiki:Sidebar'): Promise<void> {
const desktopUrlDirector = new DesktopURLDirector(this.mw.desktopRestApiUrl.href)
const visualEditorURLDirector = new VisualEditorURLDirector(this.mw.veApiUrl.href)

// By default check all API's responses and set the capabilities
// accordingly. We need to set a default page (always there because
// installed per default) to request the REST API, otherwise it would
// fail the check.
this.mwCapabilities.desktopRestApiAvailable = await this.checkApiAvailabilty(desktopUrlDirector.buildArticleURL(testArticleId))
this.mwCapabilities.veApiAvailable = await this.checkApiAvailabilty(visualEditorURLDirector.buildArticleURL(testArticleId))
this.mwCapabilities.apiAvailable = await this.checkApiAvailabilty(this.mw.apiUrl.href)

// Coordinate fetching
const reqOpts = this.getArticleQueryOpts()

const resp = await this.getJSON<MwApiResponse>(this.apiUrlDirector.buildQueryURL(reqOpts))

const isCoordinateWarning = resp.warnings && resp.warnings.query && (resp.warnings.query['*'] || '').includes('coordinates')
if (isCoordinateWarning) {
logger.info('Coordinates not available on this wiki')
this.mwCapabilities.coordinatesAvailable = false
}
}

public removeEtagWeakPrefix(etag: string): string {
return etag && etag.replace(WEAK_ETAG_REGEX, '')
}
Expand All @@ -253,7 +202,7 @@ class Downloader {
const queryOpts: KVS<any> = {
...this.getArticleQueryOpts(shouldGetThumbnail, true),
titles: articleIds.join('|'),
...(this.mwCapabilities.coordinatesAvailable ? { colimit: 'max' } : {}),
...(this.mw.hasCoordinatesApi ? { colimit: 'max' } : {}),
...(this.mw.getCategories
? {
cllimit: 'max',
Expand Down Expand Up @@ -293,7 +242,7 @@ class Downloader {
while (true) {
const queryOpts: KVS<any> = {
...this.getArticleQueryOpts(),
...(this.mwCapabilities.coordinatesAvailable ? { colimit: 'max' } : {}),
...(this.mw.hasCoordinatesApi ? { colimit: 'max' } : {}),
...(this.mw.getCategories
? {
cllimit: 'max',
Expand Down Expand Up @@ -441,9 +390,7 @@ class Downloader {
return {
action: 'query',
format: 'json',
prop: `redirects|revisions${includePageimages ? '|pageimages' : ''}${this.mwCapabilities.coordinatesAvailable ? '|coordinates' : ''}${
this.mw.getCategories ? '|categories' : ''
}`,
prop: `redirects|revisions${includePageimages ? '|pageimages' : ''}${this.mw.hasCoordinatesApi() ? '|coordinates' : ''}${this.mw.getCategories ? '|categories' : ''}`,
rdlimit: 'max',
rdnamespace: validNamespaceIds.join('|'),
redirects: redirects ? true : undefined,
Expand Down
82 changes: 69 additions & 13 deletions src/MediaWiki.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,65 @@ import semver from 'semver'
import basicURLDirector from './util/builders/url/basic.director.js'
import BaseURLDirector from './util/builders/url/base.director.js'
import ApiURLDirector from './util/builders/url/api.director.js'
import { checkApiAvailabilty } from './util/mw-api.js'

class MediaWiki {
public metaData: MWMetaData
public readonly baseUrl: URL
public readonly modulePath: string
public readonly webUrl: URL
public readonly apiUrl: URL
public readonly veApiUrl: URL
public readonly restApiUrl: URL
public readonly mobileRestApiUrl: URL
public readonly desktopRestApiUrl: URL
public readonly getCategories: boolean
public readonly modulePathConfig: string
public readonly namespaces: MWNamespaces = {}
public readonly namespacesToMirror: string[] = []

private readonly wikiPath: string
private readonly restApiPath: string
private readonly username: string
private readonly password: string
private readonly apiPath: string
private readonly domain: string
private apiUrlDirector: ApiURLDirector
private baseUrlDirector: BaseURLDirector

public veApiUrl: URL
public restApiUrl: URL
public apiUrl: URL
public modulePath: string
public webUrl: URL
public desktopRestApiUrl: URL

public hasDesktopRestApi = async function (loginCookie?: string, testArticleId?: string): Promise<any> {

Check warning on line 39 in src/MediaWiki.ts

View workflow job for this annotation

GitHub Actions / build (18.x)

'testArticleId' is defined but never used
const desktopRestApiAvailable = await checkApiAvailabilty(this.desktopRestApiUrl, loginCookie)
this.hasDesktopRestApi = async function (): Promise<boolean> {
return desktopRestApiAvailable
}
}

public hasVeApi = async function (loginCookie?: string, testArticleId?: string): Promise<any> {

Check warning on line 46 in src/MediaWiki.ts

View workflow job for this annotation

GitHub Actions / build (18.x)

'testArticleId' is defined but never used
const veRestApiAvailable = await checkApiAvailabilty(this.veApiUrl, loginCookie)
this.hasVeApi = async function (): Promise<boolean> {
return veRestApiAvailable
}
}

public hasCoordinatesApi = async function (downloader?: Downloader): Promise<any> {
const validNamespaceIds = this.namespacesToMirror.map((ns) => this.namespaces[ns].num)
const reqOpts = {
action: 'query',
format: 'json',
prop: `redirects|revisions${(await this.hasCoordinatesApi()) ? '|coordinates' : ''}${this.getCategories ? '|categories' : ''}`,
rdlimit: 'max',
rdnamespace: validNamespaceIds.join('|'),
}
if (downloader) {
const resp = await downloader.getJSON<MwApiResponse>(this.apiUrlDirector.buildQueryURL(reqOpts))
const isCoordinateWarning = resp.warnings && resp.warnings.query && (resp.warnings.query['*'] || '').includes('coordinates')
if (isCoordinateWarning) {
logger.info('Coordinates not available on this wiki')
return false
}
}
return true
}

constructor(config: MWConfig) {
this.domain = config.domain || ''
Expand All @@ -42,20 +80,30 @@ class MediaWiki {

this.apiPath = config.apiPath ?? 'w/api.php'
this.wikiPath = config.wikiPath ?? DEFAULT_WIKI_PATH
this.restApiPath = config.restApiPath
this.modulePathConfig = config.modulePath

const baseUrlDirector = new BaseURLDirector(this.baseUrl.href)

this.webUrl = baseUrlDirector.buildURL(this.wikiPath)
this.apiUrl = baseUrlDirector.buildURL(this.apiPath)

this.apiUrlDirector = new ApiURLDirector(this.apiUrl.href)

this.veApiUrl = this.apiUrlDirector.buildVisualEditorURL()

this.restApiUrl = baseUrlDirector.buildRestApiURL(config.restApiPath)
this.desktopRestApiUrl = baseUrlDirector.buildDesktopRestApiURL(config.restApiPath)
this.restApiUrl = baseUrlDirector.buildRestApiURL(this.restApiPath)
this.desktopRestApiUrl = baseUrlDirector.buildDesktopRestApiURL(this.restApiPath)

this.modulePath = baseUrlDirector.buildModuleURL(config.modulePath)
this.modulePath = baseUrlDirector.buildModuleURL(this.modulePathConfig)

/*
this.restApiUrl = this.baseUrlDirector.buildRestApiURL(this.restApiPath)
this.apiUrl = this.baseUrlDirector.buildURL(this.apiPath)
this.modulePath = this.baseUrlDirector.buildModuleURL(this.modulePathConfig)
this.webUrl = this.baseUrlDirector.buildURL(this.wikiPath)
this.desktopRestApiUrl = this.baseUrlDirector.buildDesktopRestApiURL(config.restApiPath)
this.veApiUrl = this.apiUrlDirector.buildVisualEditorURL()
*/
}

public async login(downloader: Downloader) {
Expand Down Expand Up @@ -85,12 +133,13 @@ class MediaWiki {
},
method: 'POST',
})
.then((resp) => {
.then(async (resp) => {
if (resp.data.login.result !== 'Success') {
throw new Error('Login Failed')
}

downloader.loginCookie = resp.headers['set-cookie'].join(';')
await this.checkCapabilities(resp.headers['set-cookie'].join(';'))
})
.catch((err) => {
throw err
Expand Down Expand Up @@ -296,6 +345,13 @@ class MediaWiki {

return mwMetaData
}

// Set capability properties, usied while mw.login
private async checkCapabilities(loginCookie?: string, testArticleId = 'MediaWiki:Sidebar'): Promise<void> {
await this.hasDesktopRestApi(loginCookie, testArticleId)
await this.hasVeApi(loginCookie, testArticleId)
await this.hasCoordinatesApi()
}
}

export default MediaWiki
4 changes: 2 additions & 2 deletions src/mwoffliner.lib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import semver from 'semver'
import * as path from 'path'
import * as QueryStringParser from 'querystring'
import { ZimArticle, ZimCreator } from '@openzim/libzim'
import { checkApiAvailabilty } from './util/mw-api.js'

import {
MAX_CPU_CORES,
Expand Down Expand Up @@ -205,12 +206,11 @@ async function execute(argv: any) {
if (customMainPage) {
mainPage = customMainPage
const mainPageUrl = mw.webUrl + encodeURIComponent(mainPage)
if (!(await downloader.checkApiAvailabilty(mainPageUrl))) {
if (!(await checkApiAvailabilty(mainPageUrl))) {
throw new Error(`customMainPage doesn't return 200 status code for url ${mainPageUrl}`)
}
}

await downloader.checkCapabilities(mwMetaData.mainPage)
await downloader.setBaseUrls()

const redisStore = new RedisStore(argv.redis || config.defaults.redisPath)
Expand Down
2 changes: 2 additions & 0 deletions src/util/builders/url/basic.director.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ class BasicURLDirector {
buildDownloaderBaseUrl(conditions: DownloaderBaseUrlConditions): string | undefined {
let baseUrl: string

console.log('Args for buildDownloaderBaseUrl ', conditions)

for (const { condition, value } of conditions) {
if (condition) {
baseUrl = value
Expand Down
10 changes: 10 additions & 0 deletions src/util/mw-api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import deepmerge from 'deepmerge'
import * as logger from '../Logger.js'
import Downloader from '../Downloader.js'
import Timer from './Timer.js'
import axios from 'axios'

export async function getArticlesByIds(articleIds: string[], downloader: Downloader, redisStore: RS, log = true): Promise<void> {
let from = 0
Expand Down Expand Up @@ -253,3 +254,12 @@ export function mwRetToArticleDetail(obj: QueryMwRet): KVS<ArticleDetail> {
}
return ret
}

export async function checkApiAvailabilty(url: string, loginCookie = ''): Promise<boolean> {
try {
const resp = await axios.get(url, { maxRedirects: 0, headers: { cookie: loginCookie } })
return resp.status === 200 && !resp.headers['mediawiki-api-error']
} catch (err) {
return false
}
}
1 change: 0 additions & 1 deletion test/unit/downloader.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ describe('Downloader class', () => {
downloader = new Downloader({ mw, uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: true, optimisationCacheUrl: '' })

await mw.getMwMetaData(downloader)
await downloader.checkCapabilities()
await downloader.setBaseUrls()
})

Expand Down
2 changes: 1 addition & 1 deletion test/unit/mwApi.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ describe('mwApi', () => {
downloader = new Downloader({ mw, uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: false, optimisationCacheUrl: '' })

await mw.getMwMetaData(downloader)
await downloader.checkCapabilities()
// await downloader.checkCapabilities()

await mw.getNamespaces([], downloader)
})
Expand Down
2 changes: 0 additions & 2 deletions test/unit/saveArticles.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ describe('saveArticles', () => {

test('Article html processing', async () => {
const { downloader, mw, dump } = await setupScrapeClasses() // en wikipedia
await downloader.checkCapabilities()
await downloader.setBaseUrls()
const _articlesDetail = await downloader.getArticleDetailsIds(['London'])
const articlesDetail = mwRetToArticleDetail(_articlesDetail)
Expand Down Expand Up @@ -130,7 +129,6 @@ describe('saveArticles', () => {

test('--customFlavour', async () => {
const { downloader, mw, dump } = await setupScrapeClasses({ format: 'nopic' }) // en wikipedia
await downloader.checkCapabilities()
await downloader.setBaseUrls()
class CustomFlavour implements CustomProcessor {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
Expand Down
1 change: 0 additions & 1 deletion test/unit/treatments/article.treatment.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ describe('ArticleTreatment', () => {

test('Article html processing', async () => {
const { downloader, mw, dump } = await setupScrapeClasses() // en wikipedia
await downloader.checkCapabilities()
await downloader.setBaseUrls()
const _articlesDetail = await downloader.getArticleDetailsIds(['London'])
const articlesDetail = mwRetToArticleDetail(_articlesDetail)
Expand Down
2 changes: 1 addition & 1 deletion test/unit/urlRewriting.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ describe('Styles', () => {
await articleDetailXId.flush()
await redisStore.redirectsXId.flush()
const { downloader, mw, dump } = await setupScrapeClasses() // en wikipedia
await downloader.checkCapabilities()
// await downloader.checkCapabilities()
await downloader.setBaseUrls()

await getArticleIds(downloader, redisStore, mw, '', ['London', 'British_Museum', 'Natural_History_Museum,_London', 'Farnborough/Aldershot_built-up_area'])
Expand Down
2 changes: 1 addition & 1 deletion test/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ export async function setupScrapeClasses({ mwUrl = 'https://en.wikipedia.org', f
const downloader = new Downloader({ mw, uaString: `${config.userAgent} (contact@kiwix.org)`, speed: 1, reqTimeout: 1000 * 60, webp: false, optimisationCacheUrl: '' })

await mw.getMwMetaData(downloader)
await downloader.checkCapabilities()
// await downloader.checkCapabilities()

const dump = new Dump(format, {} as any, mw.metaData)

Expand Down

0 comments on commit 132d561

Please sign in to comment.