Skip to content

Commit

Permalink
Wrap renderers using Builder pattern, refactor Render instantiation
Browse files Browse the repository at this point in the history
  • Loading branch information
VadimKovalenkoSNF committed Aug 8, 2023
1 parent b8578b3 commit 34ae794
Show file tree
Hide file tree
Showing 9 changed files with 131 additions and 164 deletions.
32 changes: 25 additions & 7 deletions src/Downloader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import DesktopURLDirector from './util/builders/url/desktop.director.js'
import VisualEditorURLDirector from './util/builders/url/visual-editor.director.js'
import basicURLDirector from './util/builders/url/basic.director.js'
import urlHelper from './util/url.helper.js'
import articleRenderer from './util/renderers/article.renderer.js'
import { ArticleRenderer } from './util/builders/renderers/renderer.js'

const imageminOptions = new Map()
imageminOptions.set('default', new Map())
Expand Down Expand Up @@ -105,7 +105,7 @@ class Downloader {
private readonly backoffOptions: BackoffOptions
private readonly optimisationCacheUrl: string
private s3: S3
private mwCapabilities: MWCapabilities // todo move to MW
public mwCapabilities: MWCapabilities // TODO: move to MW, temporary open the property
private apiUrlDirector: ApiURLDirector

constructor({ mw, uaString, speed, reqTimeout, optimisationCacheUrl, s3, webp, backoffOptions }: DownloaderOpts) {
Expand Down Expand Up @@ -215,6 +215,7 @@ class Downloader {
}
}

// TODO: Why this method is public?
public async checkCapabilities(testArticleId = 'MediaWiki:Sidebar'): Promise<void> {
const desktopUrlDirector = new DesktopURLDirector(this.mw.desktopRestApiUrl.href)
const visualEditorURLDirector = new VisualEditorURLDirector(this.mw.veApiUrl.href)
Expand Down Expand Up @@ -348,17 +349,34 @@ class Downloader {
}
}

public async getArticle(articleId: string, dump: Dump, articleDetailXId: RKVS<ArticleDetail>, articleDetail?: ArticleDetail): Promise<RenderedArticle[]> {
public async getArticle(articleId: string, dump: Dump, articleDetailXId: RKVS<ArticleDetail>, articleRenderer: ArticleRenderer, articleDetail?: ArticleDetail): Promise<any> {
// TODO: Can this condition be true for other renderers?
const isMainPage = dump.isMainPage(articleId)
const articleApiUrl = this.getArticleUrl(articleId, isMainPage)

logger.info(`Getting article [${articleId}] from ${articleApiUrl}`)

const json = await this.getJSON<any>(articleApiUrl)
if (json.error) {
throw json.error
const data = await this.getJSON<any>(articleApiUrl)
if (data.error) {
throw data.error
}
return articleRenderer.renderArticle(json, articleId, this.mwCapabilities, articleDetailXId, dump, articleDetail)

const renderOpts = {
data,
articleId,
articleDetailXId,
articleDetail,
dump,
isMainPage,
}

// Render visual editor representation of the article
if (isMainPage || (this.mwCapabilities.veApiAvailable && !this.mwCapabilities.desktopRestApiAvailable)) {
// return this.mwRenderer(mwRendererArgs)
return articleRenderer.renderArticle(renderOpts, 'visual-editor')
}
// Render Parsoid page/html that comes from Wikimedia REST API
return articleRenderer.renderArticle(renderOpts, 'desktop')
}

public async getJSON<T>(_url: string): Promise<T> {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
export abstract class Renderer {
abstract render(): Promise<any>
abstract render(): void
}
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
import domino from 'domino'
import { Renderer } from './abstractRenderer.js'
import { getStrippedTitleFromHtml } from '../misc.js'
import { MWRendererArgs } from './article.renderer.js'
import { Renderer } from './abstract.js'
import { getStrippedTitleFromHtml } from '../../misc.js'

// Represent 'https://{wikimedia-wiki}/api/rest_v1/page/html/'
export class ParsoidHtmlRestApiRenderer extends Renderer {
export class DesktopRendererDirector extends Renderer {
private data
private articleId
private articleDetail
private articleDetailXId

constructor(mwRendererArgs: MWRendererArgs) {
constructor(renderOpts) {
super()
this.data = mwRendererArgs.data
this.articleId = mwRendererArgs.articleId
this.articleDetail = mwRendererArgs.articleDetail
this.articleDetailXId = mwRendererArgs.articleDetailXId
this.data = renderOpts.data
this.articleId = renderOpts.articleId
this.articleDetail = renderOpts.articleDetail
this.articleDetailXId = renderOpts.articleDetailXId
}

public async render(): Promise<any> {
Expand All @@ -32,13 +31,13 @@ export class ParsoidHtmlRestApiRenderer extends Renderer {
prevArticleId: i - 1 > 0 ? `${this.articleId}__${i - 1}` : i - 1 === 0 ? this.articleId : null,
})

if ((this.articleDetail.subCategories || []).length > 200) {
if (this.articleDetailXId && (this.articleDetail.subCategories || []).length > 200) {
await this.articleDetailXId.set(_articleId, _articleDetail)
}

let strippedTitle = getStrippedTitleFromHtml(this.data)
if (!strippedTitle) {
const title = (this.data.lead || { displaytitle: this.articleId }).displaytitle
const title = this.articleId
const doc = domino.createDocument(`<span class='mw-title'>${title}</span>`)
strippedTitle = doc.getElementsByClassName('mw-title')[0].textContent
}
Expand Down
24 changes: 24 additions & 0 deletions src/util/builders/renderers/renderer.builder.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { Renderer } from './abstract.js'
import { DesktopRendererDirector } from './desktop.director.js'
import { VisualEditorRendererDirector } from './visual-editor.director.js'

export class RendererBuilder {
private renderer: Renderer | null = null

setDesktopRenderer(renderOpts): RendererBuilder {
this.renderer = new DesktopRendererDirector(renderOpts)
return this
}

setVisualEditorRenderer(renderOpts): RendererBuilder {
this.renderer = new VisualEditorRendererDirector(renderOpts)
return this
}

createRenderer(): Renderer {
if (this.renderer === null) {
throw new Error('Article renderer type is not set.')
}
return this.renderer
}
}
21 changes: 21 additions & 0 deletions src/util/builders/renderers/renderer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { RendererBuilder } from './renderer.builder.js'

export class ArticleRenderer {
private builder: RendererBuilder = new RendererBuilder()

public renderArticle(renderOpts: any, renderMode: string): void {
switch (renderMode) {
case 'desktop':
this.builder.setDesktopRenderer(renderOpts)
break
case 'visual-editor':
this.builder.setVisualEditorRenderer(renderOpts)
break
default:
throw new Error(`Unknown mwType: ${renderMode}`)
}

const renderer = this.builder.createRenderer()
return renderer.render()
}
}
44 changes: 44 additions & 0 deletions src/util/builders/renderers/visual-editor.director.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import domino from 'domino'
import { Renderer } from './abstract.js'

/*
Represent 'https://{wikimedia-wiki}/w/api.php?action=visualeditor&mobileformat=html&format=json&paction=parse&page={title}'
or
'https://{3rd-part-wikimedia-wiki}/w/api.php?action=visualeditor&mobileformat=html&format=json&paction=parse&page={title}'
*/
export class VisualEditorRendererDirector extends Renderer {
private data
private articleId
private articleDetail
private isMainPage

constructor(renderOpts) {
super()
this.data = renderOpts.data
this.articleId = renderOpts.articleId
this.articleDetail = renderOpts.articleDetail
this.isMainPage = renderOpts.isMainPage
}

// visualeditor.content returns an HTML for rendering
public async render(): Promise<any> {
if (!this.data) {
throw new Error(`Cannot render [${this.data}] into an article`)
}
return this.isMainPage ? this.data.visualeditor.content : this.injectHeader(this.data.visualeditor.content, this.articleDetail)
}

private injectHeader(content: string, articleDetail: ArticleDetail): string {
const doc = domino.createDocument(content)
const header = doc.createElement('h1')

header.appendChild(doc.createTextNode(articleDetail.title))
header.classList.add('article-header')

const target = doc.querySelector('body.mw-body-content')

target.insertAdjacentElement('afterbegin', header)

return doc.documentElement.outerHTML
}
}
79 changes: 0 additions & 79 deletions src/util/renderers/article.renderer.ts

This file was deleted.

61 changes: 0 additions & 61 deletions src/util/renderers/visualEditor.renderer.ts

This file was deleted.

11 changes: 6 additions & 5 deletions src/util/saveArticles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import { CONCURRENCY_LIMIT, DELETED_ARTICLE_ERROR, MAX_FILE_DOWNLOAD_RETRIES } f
import ApiURLDirector from './builders/url/api.director.js'
import articleTreatment from './treatments/article.treatment.js'
import urlHelper from './url.helper.js'
import { ArticleRenderer } from './builders/renderers/renderer.js'

const genericJsModules = config.output.mw.js
const genericCssModules = config.output.mw.css
Expand Down Expand Up @@ -129,11 +130,11 @@ async function downloadBulk(listOfArguments: any[], downloader: Downloader): Pro
}
}

async function getAllArticlesToKeep(downloader: Downloader, articleDetailXId: RKVS<ArticleDetail>, mw: MediaWiki, dump: Dump) {
async function getAllArticlesToKeep(downloader: Downloader, articleDetailXId: RKVS<ArticleDetail>, mw: MediaWiki, dump: Dump, articleRenderer: ArticleRenderer) {
await articleDetailXId.iterateItems(downloader.speed, async (articleKeyValuePairs) => {
for (const [articleId, articleDetail] of Object.entries(articleKeyValuePairs)) {
try {
const rets = await downloader.getArticle(articleId, dump, articleDetailXId, articleDetail)
const rets = await downloader.getArticle(articleId, dump, articleDetailXId, articleRenderer, articleDetail)
for (const { articleId, html: articleHtml } of rets) {
if (!articleHtml) {
continue
Expand Down Expand Up @@ -242,12 +243,12 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade
const cssModuleDependencies = new Set<string>()
let jsConfigVars = ''
let prevPercentProgress: string

const articleRenderer = new ArticleRenderer()
const { articleDetailXId } = redisStore
const articlesTotal = await articleDetailXId.len()

if (dump.customProcessor?.shouldKeepArticle) {
await getAllArticlesToKeep(downloader, articleDetailXId, mw, dump)
await getAllArticlesToKeep(downloader, articleDetailXId, mw, dump, articleRenderer)
}

const stages = ['Download Article', 'Get module dependencies', 'Parse and Save to ZIM', 'Await left-over promises']
Expand Down Expand Up @@ -277,7 +278,7 @@ export async function saveArticles(zimCreator: ZimCreator, downloader: Downloade
const promises: [string, Promise<Error>][] = []

try {
const rets = await downloader.getArticle(articleId, dump, articleDetailXId, articleDetail)
const rets = await downloader.getArticle(articleId, dump, articleDetailXId, articleRenderer, articleDetail)

for (const { articleId, displayTitle: articleTitle, html: articleHtml } of rets) {
const nonPaginatedArticleId = articleDetail.title
Expand Down

0 comments on commit 34ae794

Please sign in to comment.