Skip to content

Commit

Permalink
Introduce forceRender param
Browse files Browse the repository at this point in the history
  • Loading branch information
VadimKovalenkoSNF committed Sep 6, 2023
1 parent fad1254 commit 149fd77
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 24 deletions.
60 changes: 43 additions & 17 deletions src/Downloader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -164,23 +164,49 @@ class Downloader {
}
}

public async setBaseUrls() {
//* Objects order in array matters!
this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([
{ condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href },
{ condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href },
])

//* Objects order in array matters!
this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl([
{ condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href },
{ condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href },
])

logger.log('Base Url: ', this.baseUrl)
logger.log('Base Url for Main Page: ', this.baseUrlForMainPage)

if (!this.baseUrl || !this.baseUrlForMainPage) throw new Error('Unable to find appropriate API end-point to retrieve article HTML')
public async setBaseUrls(forceRender = null) {
if (!forceRender) {
//* Objects order in array matters!
this.baseUrl = basicURLDirector.buildDownloaderBaseUrl([
{ condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href },
{ condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href },
])

//* Objects order in array matters!
this.baseUrlForMainPage = basicURLDirector.buildDownloaderBaseUrl([
{ condition: await MediaWiki.hasWikimediaDesktopRestApi(), value: MediaWiki.desktopRestApiUrl.href },
{ condition: await MediaWiki.hasVisualEditorApi(), value: MediaWiki.visualEditorApiUrl.href },
])

logger.log('Base Url: ', this.baseUrl)
logger.log('Base Url for Main Page: ', this.baseUrlForMainPage)

if (!this.baseUrl || !this.baseUrlForMainPage) throw new Error('Unable to find appropriate API end-point to retrieve article HTML')
} else {
switch (forceRender) {
case 'WikimediaDesktop':
if (MediaWiki.hasWikimediaDesktopRestApi()) {
this.baseUrl = MediaWiki.desktopRestApiUrl.href
this.baseUrlForMainPage = MediaWiki.desktopRestApiUrl.href
logger.log('Base Url: ', this.baseUrl)
logger.log('Base Url for Main Page: ', this.baseUrlForMainPage)
break
}
break
case 'VisualEditor':

Check warning on line 196 in src/Downloader.ts

View check run for this annotation

Codecov / codecov/patch

src/Downloader.ts#L195-L196

Added lines #L195 - L196 were not covered by tests
if (MediaWiki.hasVisualEditorApi()) {
this.baseUrl = MediaWiki.visualEditorApiUrl.href
this.baseUrlForMainPage = MediaWiki.visualEditorApiUrl.href

Check warning on line 199 in src/Downloader.ts

View check run for this annotation

Codecov / codecov/patch

src/Downloader.ts#L198-L199

Added lines #L198 - L199 were not covered by tests

logger.log('Base Url: ', this.baseUrl)
logger.log('Base Url for Main Page: ', this.baseUrlForMainPage)
break

Check warning on line 203 in src/Downloader.ts

View check run for this annotation

Codecov / codecov/patch

src/Downloader.ts#L201-L203

Added lines #L201 - L203 were not covered by tests
}
break
default:
throw new Error('Unable to find specific API end-point to retrieve article HTML')

Check warning on line 207 in src/Downloader.ts

View check run for this annotation

Codecov / codecov/patch

src/Downloader.ts#L205-L207

Added lines #L205 - L207 were not covered by tests
}
}
}

public removeEtagWeakPrefix(etag: string): string {
Expand Down
6 changes: 3 additions & 3 deletions src/mwoffliner.lib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ async function execute(argv: any) {
customZimFavicon,
optimisationCacheUrl,
customFlavour,
forceRender,
} = argv

let { articleList, articleListToIgnore } = argv
Expand Down Expand Up @@ -212,8 +213,7 @@ async function execute(argv: any) {
await MediaWiki.hasCoordinates(downloader)
await MediaWiki.hasWikimediaDesktopRestApi()
await MediaWiki.hasVisualEditorApi()

await downloader.setBaseUrls()
await downloader.setBaseUrls(forceRender)

const redisStore = new RedisStore(argv.redis || config.defaults.redisPath)
await redisStore.connect()
Expand Down Expand Up @@ -420,7 +420,7 @@ async function execute(argv: any) {

logger.log('Getting articles')
stime = Date.now()
const { jsModuleDependencies, cssModuleDependencies } = await saveArticles(zimCreator, downloader, redisStore, dump)
const { jsModuleDependencies, cssModuleDependencies } = await saveArticles(zimCreator, downloader, redisStore, dump, forceRender)
logger.log(`Fetching Articles finished in ${(Date.now() - stime) / 1000} seconds`)

logger.log(`Found [${jsModuleDependencies.size}] js module dependencies`)
Expand Down
2 changes: 2 additions & 0 deletions src/parameterList.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ export const parameterDescriptions = {
osTmpDir: 'Override default operating system temporary directory path environment variable',
customFlavour: 'A custom processor that can filter and process articles (see extensions/*.js)',
optimisationCacheUrl: 'S3 url, including credentials and bucket name',
forceRender:
'Force the usage of a specific API end-point/render, automatically chosen otherwise. Accepted values: [ VisualEditor, WikimediaDesktop. WikimediaMobile ]. More details at https://github.com/openzim/mwoffliner/wiki/API-end-points',
}

// TODO: Add an interface based on the object above
30 changes: 29 additions & 1 deletion src/sanitize-argument.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,19 @@ const parametersWithArrayType = ['format']

export async function sanitize_all(argv: any) {
// extracting all arguments
const { articleList, addNamespaces, speed: _speed, adminEmail, mwUrl, customZimFavicon, optimisationCacheUrl, verbose, customZimLongDescription, customZimDescription } = argv
const {
articleList,
addNamespaces,
speed: _speed,
adminEmail,
mwUrl,
customZimFavicon,
optimisationCacheUrl,
verbose,
customZimLongDescription,
customZimDescription,
forceRender,
} = argv

sanitizeDoubleUsedParameters(argv)

Expand Down Expand Up @@ -73,6 +85,11 @@ export async function sanitize_all(argv: any) {
// sanitizing adminEmail
sanitize_adminEmail(adminEmail)

// sanitizing renderer
if (forceRender) {
sanitize_forceRender(forceRender)
}

// Redis client sanitization
// created a redis client and then closed it.
sanitize_redis(argv)
Expand Down Expand Up @@ -173,3 +190,14 @@ export function sanitize_customFlavour(customFlavour: string): string {
}) || null
)
}

export function sanitize_forceRender(renderName: string): string {
const renderNames = ['VisualEditor', 'WikimediaDesktop', 'WikimediaMobile']
const checkRenderName = (arr: string[], val: string) => {
return arr.some((arrVal) => val === arrVal)
}
if (checkRenderName(renderNames, renderName)) {
return renderName
}
throw new Error(`Invalid render name: ${renderName}`)
}
16 changes: 13 additions & 3 deletions src/util/saveArticles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ export function getArticleUrl(downloader: Downloader, dump: Dump, articleId: str
/*
* Fetch Articles
*/
export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, redisStore: RS, dump: Dump) {
export async function saveArticles(zimCreator: ZimCreator, downloader: Downloader, redisStore: RS, dump: Dump, forceRender = null) {
const jsModuleDependencies = new Set<string>()
const cssModuleDependencies = new Set<string>()
let jsConfigVars = ''
Expand All @@ -256,9 +256,19 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade
const articlesTotal = await articleDetailXId.len()

const rendererBuilder = new RendererBuilder()
const rendererBuilderOptions: RendererBuilderOptions = {
renderType: 'auto',

let rendererBuilderOptions: RendererBuilderOptions
if (forceRender) {
rendererBuilderOptions = {
renderType: 'specific',
renderName: forceRender,
}
} else {
rendererBuilderOptions = {
renderType: 'auto',
}
}

const mainPageRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions)
// TODO: article renderer will be switched to the mobiel mode later
const articlesRenderer = await rendererBuilder.createRenderer(rendererBuilderOptions)
Expand Down
55 changes: 55 additions & 0 deletions test/e2e/forceRender.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import * as mwoffliner from '../../src/mwoffliner.lib.js'
import { execa } from 'execa'
import rimraf from 'rimraf'
import { jest } from '@jest/globals'
import { zimcheckAvailable, zimcheck } from '../util.js'

jest.setTimeout(200000)

describe('forceRender', () => {
const now = new Date()
const testId = `mwo-test-${+now}`

const parameters = {
mwUrl: 'https://bm.wikipedia.org',
adminEmail: 'test@kiwix.org',
outputDirectory: testId,
redis: process.env.REDIS,
format: ['nopic'],
articleList: 'France',
}

beforeAll(async () => {
await execa('redis-cli flushall', { shell: true })
})

test('Scrape article from bm.wikipedia.org using WikimediaDesktop render', async () => {
const forceRender = 'WikimediaDesktop'
const outFiles = await mwoffliner.execute({ ...parameters, forceRender })

if (await zimcheckAvailable()) {
await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError()
} else {
console.log('Zimcheck not installed, skipping test')
}

rimraf.sync(`./${testId}`)
const redisScan = await execa('redis-cli --scan', { shell: true })
// Redis has been cleared
expect(redisScan.stdout).toEqual('')
})

test('Scrape article from bm.wikipedia.org should throw error when using VisualEditor render', async () => {
const forceRender = 'VisualEditor'
expect(async () => {
await mwoffliner.execute({ ...parameters, forceRender })
}).rejects.toThrowError()
})

test('Scrape article from bm.wikipedia.org should throw error when using wrong render', async () => {
const forceRender = 'unknownRenderName'
expect(async () => {
await mwoffliner.execute({ ...parameters, forceRender })
}).rejects.toThrowError()
})
})

0 comments on commit 149fd77

Please sign in to comment.