Skip to content

Commit

Permalink
perf: ⚡️ add batch printing to pdf
Browse files Browse the repository at this point in the history
  • Loading branch information
lagleki committed Sep 1, 2024
1 parent 32a2331 commit d96faf3
Showing 1 changed file with 54 additions and 43 deletions.
97 changes: 54 additions & 43 deletions src/lib/printer/index.js
Original file line number Diff line number Diff line change
@@ -1,69 +1,80 @@
const puppeteer = require("playwright-core");
const playwright = require("playwright-core");
const fs = require("fs");
const { sluggify } = require("../html-prettifier/slugger");
const { languages } = require("../../config/locales.json");

const allLanguages = Object.keys(languages);
const CONCURRENCY_LIMIT = 2;

(async function printPDF() {
let browser;
async function generatePDF(browser, url, shortLang) {
const page = await browser.newPage();
try {
console.log(`opening page: ${url}`);
await page.goto(url, {
waitUntil: "networkidle0",
timeout: 0,
});

const pdf = await page.pdf({
printBackground: true,
preferCSSPageSize: true,
quality: 100,
format: "A4",
margin: { top: "20px", right: "20px", bottom: "20px", left: "20px" },
timeout: 0,
});

const pdfFile = `/vreji/uencu/${shortLang}/${url.split("/").slice("-1")[0]}.pdf`;
fs.mkdirSync(`/vreji/uencu/${shortLang}`, { recursive: true });
fs.writeFileSync(pdfFile, pdf);
console.log(`pdf file saved: ${pdfFile}`);
} catch (error) {
console.error(`Error generating PDF for ${url}:`, error);
} finally {
await page.close();
}
}

async function processBatch(browser, urls, shortLang) {
const results = [];
for (let i = 0; i < urls.length; i += CONCURRENCY_LIMIT) {
const batch = urls.slice(i, i + CONCURRENCY_LIMIT);
const batchPromises = batch.map(url => generatePDF(browser, url, shortLang));
results.push(...await Promise.all(batchPromises));
}
return results;
}

async function printPDF() {
for (const lang of allLanguages) {
const shortLang = languages[lang].short;
try {
const urls = fs
.readdirSync(`/app/src/md_pages/${shortLang}/books/`)
.filter((i) => i.endsWith(".md"))
.map((i) => sluggify(i.replace(/.md$/, "")));
.map((i) => sluggify(i.replace(/.md$/, "")))
.map((url) => `http://127.0.0.1:3000/${shortLang}/books/${url}`);

console.log("generating PDF files for", urls);
browser = await puppeteer.chromium.launch({
const browser = await playwright.chromium.launch({
headless: true,
args: [
"--disable-dev-shm-usage",
"--no-sandbox",
"--disable-setuid-sandbox",
],
});
const page = await browser.newPage();
for (let url of urls) {
url = `http://127.0.0.1:3000/${shortLang}/books/` + url;
console.log(`opening page: ${url}`);
await page.goto(url, {
waitUntil: "networkidle0",
timeout: 0,
});
// await new Promise(resolve => setTimeout(resolve, 1500));
// await page.screenshot();
// await page.evaluateHandle('document.fonts.ready');

// let div_selector_to_remove = ".print:hidden";
// await page.evaluate((sel) => {
// const elements = document.querySelectorAll(sel);
// for (let i = 0; i < elements.length; i++) {
// elements[i].parentNode.removeChild(elements[i]);
// }
// }, div_selector_to_remove);
const pdf = await page.pdf({
printBackground: true,
preferCSSPageSize: true,
quality: 100,
format: "A4",
margin: { top: "20px", right: "20px", bottom: "20px", left: "20px" },
timeout: 0,
});
const pdfFile =
`/vreji/uencu/${shortLang}/` +
url.split("/").slice("-1")[0] +
"-pre.pdf";
fs.mkdirSync(`/vreji/uencu/${shortLang}`, { recursive: true });
fs.writeFileSync(pdfFile, pdf);
console.log(`pdf file saved: ${pdfFile}`);
try {
// Process URLs in batches with concurrency limit
await processBatch(browser, urls, shortLang);
} finally {
await browser.close();
}
} catch (err) {
continue;
console.error(`Error processing language ${lang}:`, err);
}
try {
await browser.close();
} catch (error) {}
}
})();
}

printPDF().catch(console.error);

0 comments on commit d96faf3

Please sign in to comment.