-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimageAgentScraper.ts
51 lines (40 loc) · 1.59 KB
/
imageAgentScraper.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import puppeteer from "puppeteer";
export default class ImageAgentScraper {
browser?: puppeteer.Browser = undefined;
constructor() { }
/**
* launch the scraper browser
*/
async launch() {
this.browser = await puppeteer.launch({
headless: true,
executablePath: '/usr/bin/google-chrome',
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
}
/**
* screape duckduckgo for images
* @param query the image search query
* @returns array of all result images
*/
async getImages(query: string) {
// check if browser was launched previously
if (!this.browser) throw new Error("Browser not launched");
// open a new page with duckduckgo results
// qwant https://www.qwant.com/?l=de&q=ice+king&t=images
const page = await this.browser.newPage();
//await page.setJavaScriptEnabled(false);
const url = `https://yandex.com/images/search?text=${encodeURI(query)}`;
//const url = `https://images.search.yahoo.com/search/images?p=${encodeURI(query)}`;
//const url = `https://www.google.com/search?q=${encodeURI(query)}&tbm=isch`;
await page.goto(url, { waitUntil: 'domcontentloaded' });
//await page.waitFor(1000);
// @ts-ignore
let images = await page.evaluate(() => Array.from(document.images, e => e.src));
//let text = await page.evaluate(() => document.body.innerHTML);
page.close();
// filter images
images = images.filter(image => image != "");
return images;
}
}