Merge pull request #42 from MhmdSalah/code-optimize

code optimizations
HermanFassett · Nov 2, 2020 · d426ace · d426ace
2 parents 15f5078 + 7f5620c
commit d426ace
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 185 deletions.
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,12 +1,11 @@
 {
     "name": "youtube-scrape",
-    "version": "0.1.2",
+    "version": "0.1.3",
     "description": "Scrape YouTube searches",
     "main": "server.js",
     "author": "Herman Fassett",
     "dependencies": {
         "express": "latest",
-        "request": "latest",
-        "cheerio": "latest"
+        "request": "latest"
     }
 }
diff --git a/scraper.js b/scraper.js
@@ -1,4 +1,3 @@
-const cheerio = require('cheerio');
 const request = require('request');
 
 async function youtube(query, page) {
@@ -10,29 +9,22 @@ async function youtube(query, page) {
         request(url, (error, response, html) => {
             // Check for errors
             if (!error && response.statusCode === 200) {
-                const $ = cheerio.load(html);
                 let json = { results: [], version: require('./package.json').version };
 
-                // First attempt to parse old youtube search result style
-                $(".yt-lockup-dismissable").each((index, vid) => {
-                    json["parser"] = "html_format";
-                    json.results.push(parseOldFormat($, vid));
-                });
-
                 // If that fails, we have to parse new format from json data in html script tag
                 if (!json.results.length) {
                     json["parser"] = "json_format";
 
                     // Get script json data from html to parse
                     let data, sectionLists = [];
                     try {
-                        let match = html.match(/ytInitialData"[^{]*(.*);\s*window\["ytInitialPlayerResponse"\]/s);
+                        let match = html.match(/ytInitialData[^{]*(.*);\s*\/\/ scraper_data_end/s);
                         if (match && match.length > 1) {
-                            json["parser"] += ".original";
+                            json["parser"] += ".scraper_data";
                         }
                         else {
-                            json["parser"] += ".scraper_data";
-                            match = html.match(/ytInitialData[^{]*(.*);\s*\/\/ scraper_data_end/s);
+                            json["parser"] += ".original";
+                            match = html.match(/ytInitialData"[^{]*(.*);\s*window\["ytInitialPlayerResponse"\]/s);
                         }
                         data = JSON.parse(match[1]);
                         json["estimatedResults"] = data.estimatedResults || "0";
@@ -81,39 +73,6 @@ async function youtube(query, page) {
     });
 }
 
-/**
- * Parse youtube search results from dom elements
- * @param {CheerioStatic} $ - The youtube search results loaded with cheerio
- * @param {CheerioElement} vid - The current video being parsed
- * @returns object with data to return for this video
- */
-function parseOldFormat($, vid) {
-    // Get video details
-    let $metainfo = $(vid).find(".yt-lockup-meta-info li");
-    let $thumbnail = $(vid).find(".yt-thumb img");
-    let video = {
-        "id": $(vid).parent().data("context-item-id"),
-        "title": $(vid).find(".yt-lockup-title").children().first().text(),
-        "url": `https://www.youtube.com${$(vid).find(".yt-lockup-title").children().first().attr("href")}`,
-        "duration": $(vid).find(".video-time").text().trim() || "Playlist",
-        "snippet": $(vid).find(".yt-lockup-description").text(),
-        "upload_date": $metainfo.first().text(),
-        "thumbnail_src": $thumbnail.data("thumb") || $thumbnail.attr("src"),
-        "views": $metainfo.last().text()
-    };
-
-    // Get user details
-    let $byline = $(vid).find(".yt-lockup-byline");
-    let uploader = {
-        "username": $byline.text(),
-        "url": `https://www.youtube.com${$byline.find("a").attr("href")}`,
-        "verified": !!$byline.find("[title=Verified]").length
-    };
-
-    // Return json
-    return { video: video, uploader: uploader };
-}
-
 /**
  * Parse a channelRenderer object from youtube search results
  * @param {object} renderer - The channel renderer