Skip to content
This repository has been archived by the owner on Jul 4, 2024. It is now read-only.

Commit

Permalink
Merge pull request #6 from eukarya-inc/fetch-icon
Browse files Browse the repository at this point in the history
chore: supports automatic og tag logo extraction
  • Loading branch information
tomoyane authored Apr 6, 2024
2 parents 9618379 + 856ef4f commit b47a5c3
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 68 deletions.
72 changes: 45 additions & 27 deletions README.md

Large diffs are not rendered by default.

9 changes: 7 additions & 2 deletions src/config/proxyConfig.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ class ProxyConfig {
this.notionPageId = process.env.NOTION_PAGE_ID || 'f1db0cfbe246475784c67f279289abea';
this.customScript = process.env.CUSTOM_SCRIPT || '';
this.contentCacheSec = process.env.CONTENT_CACHE_SEC || '300';
this.autoSetOgp = process.env.AUTO_SET_OGP || 'false';
this.autoSetOgp = this.autoSetOgp === 'true';
this.iconUrl = process.env.ICON_URL || '';
this.autoSetOgTag = process.env.AUTO_SET_OG_TAG || 'false';
this.autoSetOgTag = this.autoSetOgTag === 'true';
this.slugToPage = {
"": this.notionPageId
}
Expand Down Expand Up @@ -52,6 +53,10 @@ class ProxyConfig {
throw new Error("Invalid CONTENT_CACHE_SEC environment. Allow number");
}
}

replaceIconUrl(v) {
this.iconUrl = v;
}
}

class TwitterTag {
Expand Down
21 changes: 19 additions & 2 deletions src/lib/autoOgpExtractor.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@ const {JSDOM} = require("jsdom");
* The image has Chrome installed if the notion proxy is running on a container. See Dockerfile.
*/
class AutoOgpExtractor {
constructor(notionId, domain, isTls) {
constructor(notionId, domain, isTls, proxyPort) {
this.notionId = notionId;
this.domain = domain;
this.isTls = isTls;
this.proxyPort = proxyPort;
}

async fetchHtmlAfterExecutedJs() {
Expand All @@ -24,7 +25,7 @@ class AutoOgpExtractor {
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
});
const page = await browser.newPage();
await page.goto(`http://localhost:3456/${this.notionId}`);
await page.goto(`http://localhost:${this.proxyPort}/${this.notionId}`);
await page.waitForSelector('.notion-topbar');
const html = await page.content();
await browser.close();
Expand Down Expand Up @@ -66,6 +67,22 @@ class AutoOgpExtractor {
return `${protocol}://${this.domain}/${uri}`;
}

extractIcon(htmlStr) {
if (htmlStr === '' || htmlStr === null) {
return null;
}
const dom = new JSDOM(htmlStr);
const imgElements = dom.window.document.querySelectorAll('img[alt="Page icon"]');
const srcValues = Array.from(imgElements).map(img => img.getAttribute('src'));
if (!srcValues || srcValues.length === 0) {
return null;
}

const protocol = this.isTls ? 'https' : 'http';
let uri = srcValues[0].substring(1);;
return `${protocol}://${this.domain}/${uri}`;
}

extractOgDesc(htmlStr) {
if (htmlStr === '' || htmlStr === null) {
return null;
Expand Down
16 changes: 13 additions & 3 deletions src/lib/autoOgpExtractor.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ const testHtmlStr = `
</head>
<body>
<p>Hello</p>
<div>
<img alt="Page icon" src="/image/icon.png" referrerpolicy="same-origin">
</div>
<div class="layout-full">
<img src="/image/hello.png" referrerpolicy="same-origin" style="display: block; object-fit: cover; border-radius: 0px; width: 100%; height: 30vh; opacity: 1; object-position: center 50%;">
</div>
Expand All @@ -28,7 +31,7 @@ function getAutoOgpExtractor() {
const notionId = 'f1db0cfbe246475784c67f279289abea';
const domain = 'eukarya.io';
const isTls = true;
return new AutoOgpExtractor(notionId, domain, isTls);
return new AutoOgpExtractor(notionId, domain, isTls, '3456');
}

test('Extract og title', () => {
Expand All @@ -40,7 +43,14 @@ test('Extract og title', () => {

test('Extract og image', () => {
const extractor = getAutoOgpExtractor();
const imgage = extractor.extractOgImage(testHtmlStr);
const image = extractor.extractOgImage(testHtmlStr);

expect(image).toBe('https://eukarya.io/image/hello.png');
});

test('Extract icon', () => {
const extractor = getAutoOgpExtractor();
const icon = extractor.extractIcon(testHtmlStr);

expect(imgage).toBe('https://eukarya.io/image/hello.png');
expect(icon).toBe('https://eukarya.io/image/icon.png');
});
44 changes: 41 additions & 3 deletions src/lib/htmlParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,20 @@ class HtmlParser {
* @param pageUrl ProxyConfig.ogTag.url
* @param pageType ProxyConfig.ogTag.pageType
* @param twitterCard ProxyConfig.twitterTag.twitterCard
* @param iconUrl ProxyConfig.iconUrl
* @param googleFont ProxyConfig.googleFont
* @param domain ProxyConfig.domain
* @param customScript ProxyConfig.customScript
* @param isTls ProxyConfig.isTls
* @param stp slug to page record
*/
constructor(pageTitle, pageDesc, pageImage, pageUrl, pageType, twitterCard, googleFont, domain, customScript, isTls, stp) {
constructor(pageTitle, pageDesc, pageImage, pageUrl, pageType, twitterCard, iconUrl, googleFont, domain, customScript, isTls, stp) {
this.pageTitle = pageTitle;
this.pageDescription = pageDesc;
this.pageImage = pageImage;
this.pageUrl = pageUrl;
this.pageType = pageType;
this.iconUrl = iconUrl;
this.twitterCard = twitterCard;
this.googleFont = googleFont;
this.domain = domain;
Expand All @@ -42,7 +44,7 @@ class HtmlParser {
parseMeta(element) {
try {
if (this.pageTitle !== '') {
if (element.getAttribute('property') === 'og:title' || element.getAttribute('name') === 'twitter:title') {
if (element.getAttribute('property') === 'og:title' || element.getAttribute('name') === 'twitter:title' || element.getAttribute('property') === 'og:site_name') {
element.setAttribute('content', this.pageTitle);
}
}
Expand Down Expand Up @@ -79,6 +81,25 @@ class HtmlParser {
}
}

parseIcon(element, document) {
if (this.iconUrl !== '') {
element.setAttribute('href', this.iconUrl);

// og:logo
const headElement = document.querySelector('head');
const metaElement = document.createElement('meta');
metaElement.setAttribute('property', 'og:logo');
metaElement.setAttribute('content', this.iconUrl);
headElement.appendChild(metaElement);

// apple-touch-icon
const appleTouchIcon = document.querySelector('link[rel="apple-touch-icon"]');
if (appleTouchIcon) {
appleTouchIcon.setAttribute('href', this.iconUrl);
}
}
}

parseHead(element) {
if (this.googleFont !== '') {
element.innerHTML += `<link href="https://fonts.googleapis.com/css?family=
Expand Down Expand Up @@ -120,7 +141,19 @@ class HtmlParser {
history.replaceState(history.state, '', '/' + slug);
}
}
const observer = new MutationObserver(function() {
var linkElement = document.querySelector('link[rel="shortcut icon"]');
const observer = new MutationObserver(function(mutationsList) {
if ('${this.iconUrl}' !== '') {
for (var mutation of mutationsList) {
if (mutation.type === 'childList' && mutation.addedNodes.length > 0) {
for (var node of mutation.addedNodes) {
if (node.nodeType === 1 && node.classList.contains('notion-presence-container') && linkElement) {
linkElement.href = '${this.iconUrl}';
}
}
}
}
}
if (redirected) {
return;
}
Expand Down Expand Up @@ -189,6 +222,11 @@ class HtmlParser {
this.parseMeta(metas[m])
}

const shortcutIcon = document.querySelector('link[rel="shortcut icon"]');
if (shortcutIcon) {
this.parseIcon(shortcutIcon, document)
}

let head = document.querySelector('head')
if (head) {
this.parseHead(head)
Expand Down
27 changes: 22 additions & 5 deletions src/lib/htmlParser.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ function getParser() {
const desc = 'Test Desc';
const image = 'https://eukarya.io/img/logo.svg';
const url = 'https://eukarya.io';
const iconUrl = 'https://reearth.io/img/logo.svg';
const type = 'website';
const twitterCard = 'summary_large_image';
const googleFont = '';
Expand All @@ -21,6 +22,7 @@ function getParser() {
url,
type,
twitterCard,
iconUrl,
googleFont,
domain,
customScript,
Expand All @@ -32,8 +34,7 @@ test('Parse html for Notion', () => {
const parser = getParser();
const element = new JSDOM(
`
<html class="notion-html">
<head lang="en">
<html class="notion-html"><head lang="en">
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,height=device-height,initial-scale=1,maximum-scale=1,user-scalable=no,viewport-fit=cover">
<title>Notion – The all-in-one workspace for your notes, tasks, wikis, and databases.</title>
Expand All @@ -45,6 +46,8 @@ test('Parse html for Notion', () => {
<meta property="og:description" content="A new tool that blends your everyday work apps into one. It's the all-in-one workspace for you and your team">
<meta property="og:image" content="https://www.notion.so/images/meta/default.png">
<meta property="og:locale" content="en_US">
<link rel="shortcut icon" href="https://www.notion.so/images/meta/default.png">
<link rel="apple-touch-icon" href="https://www.notion.so/images/meta/default.png">
</head>
<body>
<p>Hello</p>
Expand All @@ -59,14 +62,16 @@ test('Parse html for Notion', () => {
<meta name="viewport" content="width=device-width,height=device-height,initial-scale=1,maximum-scale=1,user-scalable=no,viewport-fit=cover">
<title>Notion – The all-in-one workspace for your notes, tasks, wikis, and databases.</title>
<meta name="description" content="Test Desc">
<meta property="og:site_name" content="Notion">
<meta property="og:site_name" content="Test Title">
<meta property="og:type" content="website">
<meta property="og:url" content="https://eukarya.io">
<meta property="og:title" content="Test Title">
<meta property="og:description" content="Test Desc">
<meta property="og:image" content="https://eukarya.io/img/logo.svg">
<meta property="og:locale" content="en_US">
</head>
<link rel="shortcut icon" href="https://reearth.io/img/logo.svg">
<link rel="apple-touch-icon" href="https://reearth.io/img/logo.svg">
<meta property="og:logo" content="https://reearth.io/img/logo.svg"></head>
<body>
<p>Hello</p>
Expand Down Expand Up @@ -97,7 +102,19 @@ test('Parse html for Notion', () => {
history.replaceState(history.state, '', '/' + slug);
}
}
const observer = new MutationObserver(function() {
var linkElement = document.querySelector('link[rel="shortcut icon"]');
const observer = new MutationObserver(function(mutationsList) {
if ('https://reearth.io/img/logo.svg' !== '') {
for (var mutation of mutationsList) {
if (mutation.type === 'childList' && mutation.addedNodes.length > 0) {
for (var node of mutation.addedNodes) {
if (node.nodeType === 1 && node.classList.contains('notion-presence-container') && linkElement) {
linkElement.href = 'https://reearth.io/img/logo.svg';
}
}
}
}
}
if (redirected) {
return;
}
Expand Down
Loading

0 comments on commit b47a5c3

Please sign in to comment.