Skip to content

Commit e11ea71

Browse files
committed
feat(Extraction): Updated extraction function to be more robust and support a greater number of useful tags.
1 parent 85aad6a commit e11ea71

File tree

3 files changed

+8516
-64
lines changed

3 files changed

+8516
-64
lines changed

src/app/models/knowledge.source.model.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,6 @@ export interface KnowledgeSource {
2626
title: string;
2727
topics?: string[];
2828
accessLink: string;
29-
thumbnail: string;
29+
thumbnail?: any;
3030
metadata: WebsiteMetaTagsModel[];
3131
}

src/app/services/chrome-extension.service.ts

+29-63
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,7 @@ export class ChromeExtensionService {
3131
return tab;
3232
}
3333

34-
35-
getMetaTitle = () => {
36-
37-
}
38-
39-
40-
4134
/**
42-
* TODO: finish this, get title, charset, og: and dc: tags
4335
* @param tab
4436
*/
4537
async getMetadata(tab: ChromeTab) {
@@ -73,67 +65,41 @@ export class ChromeExtensionService {
7365

7466
try {
7567
let result;
76-
console.log('Trying to get metatags...');
7768
[{result}] = await chrome.scripting.executeScript({
7869
target: {tabId: tab.id},
7970
function: () => {
80-
let meta = document.getElementsByTagName('meta');
81-
82-
console.log('meta tags: ', meta);
83-
84-
if (!meta || meta.length <= 0) {
85-
return [];
86-
}
87-
88-
let attr: (..._: any) => string | null = (i: number, j: number) => {
89-
return meta[i]?.attributes[j]?.textContent;
90-
}
91-
92-
let matches: (..._: any) => boolean = (target: string, i: number, j: number) => {
93-
return attr(i, j)?.startsWith(target) ?? false;
94-
}
95-
96-
let filterPermute: (..._: any) => string = (target: string, i: number) => {
97-
let val1 = attr(i, 1) ?? '';
98-
let val2 = attr(i, 2) ?? '';
99-
100-
if (val1.startsWith(target)) {
101-
if (val2.startsWith(target)) {
102-
return '';
103-
} else {
104-
return val2
105-
}
106-
} else {
107-
return val1;
108-
}
109-
}
110-
11171
let metatags: WebsiteMetaTagsModel[] = [];
112-
let targets: string[] = ['og:', 'dc:', 'keywords']
113-
114-
// TODO: remove
115-
console.log('meta tags: ', meta);
116-
console.log('Targets: ', targets);
72+
let meta = document.getElementsByTagName('meta');
11773

11874
for (let i = 0; i < meta.length; i++) {
119-
let name = meta[i]?.attributes[0]?.name;
120-
if (name && name === 'charset') { /* Charset tags */
121-
metatags.push({
122-
key: 'charset',
123-
value: attr(i, 0),
124-
property: ''
125-
});
126-
} else {
127-
for (let target of targets) {
128-
if (matches(target, i, 0)) {
129-
let val = filterPermute(target, i);
130-
if (val !== '' && !val.startsWith(target)) {
131-
metatags.push({
132-
key: attr(i, 0),
133-
value: val,
134-
property: ''
135-
})
136-
}
75+
console.log(`Meta[${i}] = `, meta[i]);
76+
77+
let names = [
78+
meta[i].name,
79+
meta[i].attributes.getNamedItem('property')?.textContent,
80+
].filter(n => n);
81+
82+
const isTarget = names.some(n => n && (
83+
n.startsWith('og:') || /* OpenGraph */
84+
n.startsWith('dc:') || /* Dublin Core */
85+
(n.startsWith('twitter:') && !n.startsWith('twitter:app:')) || /* Twitter (but not app) */
86+
n.startsWith('description') || /* Description */
87+
n.startsWith('article:') || /* Articles */
88+
n.startsWith('keywords') /* Keywords */
89+
));
90+
91+
if (isTarget) {
92+
let contents = [
93+
meta[i].content,
94+
].filter(c => c);
95+
96+
for (let name of names) {
97+
for (let content of contents) {
98+
metatags.push({
99+
key: name,
100+
value: content,
101+
property: ''
102+
});
137103
}
138104
}
139105
}

0 commit comments

Comments
 (0)