From 4d4dc75958131997320beb41cadef2cbbdaa996b Mon Sep 17 00:00:00 2001 From: Nikhil Rayaprolu Date: Tue, 8 Aug 2017 02:24:11 +0530 Subject: [PATCH] Addresses #58 implementation of image search for google --- app/scraper.py | 36 +++++++++++++++++++++++++++++------- app/server.py | 3 ++- app/templates/index.html | 37 +++++++++++++++++++++++++++++-------- 3 files changed, 60 insertions(+), 16 deletions(-) diff --git a/app/scraper.py b/app/scraper.py index f677a664..302e1fda 100644 --- a/app/scraper.py +++ b/app/scraper.py @@ -80,14 +80,17 @@ def get_google_page(query): payload = {'q': query} response = requests.get('https://www.google.com/search', headers=header, params=payload) return response - -def get_google_page(query,startIndex): + + +def get_google_page(query, startIndex, image=False): """ Fetch the google search results page Returns : Results Page """ header = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36'} - payload = {'q': query,'start':startIndex} + payload = {'q': query, 'start': startIndex} + if image: + payload = {'q': query, 'start': startIndex, 'tbm': 'isch'} response = requests.get('https://www.google.com/search', headers=header, params=payload) return response @@ -98,8 +101,8 @@ def google_search(query): [[Tile1,url1], [Title2, url2],..] """ urls = [] - for count in range(0,10): - response = get_google_page(query,count*10) + for count in range(0, 10): + response = get_google_page(query, count * 10) soup = BeautifulSoup(response.text, 'html.parser') for h3 in soup.findAll('h3', {'class': 'r'}): links = h3.find('a') @@ -109,6 +112,23 @@ def google_search(query): return urls +def google_image_search(query): + """ Search google for the query and return set of urls + Returns: urls (list) + [[Tile1,url1], [Title2, url2],..] + """ + urls = [] + for count in range(0, 10): + response = get_google_page(query, count * 10, image=True) + soup = BeautifulSoup(response.text, 'html.parser') + for image_data in soup.findAll('div', {'class': 'rg_meta'}): + j = json.loads(image_data.getText()) + urls.append({'title': j['pt'], + 'link': j['ou']}) + + return urls + + def get_yahoo_page(query): """ Fetch the yahoo search results Returns : Results Page @@ -155,8 +175,10 @@ def small_test(): assert type(google_search('fossasia')) is list -def feedgen(query, engine): - if engine == 'g': +def feedgen(query,type, engine): + if engine == 'g' and type == "images": + urls = google_image_search(query) + elif engine == 'g': urls = google_search(query) elif engine == 'd': urls = duckduckgo_search(query) diff --git a/app/server.py b/app/server.py index a36f9288..96d6684e 100644 --- a/app/server.py +++ b/app/server.py @@ -39,11 +39,12 @@ def search(search_engine): return bad_request(err) query = request.args.get('query') + stype = request.args.get('type') if not query: err = [400, 'Not Found - missing query', qformat] return bad_request(err) - result = feedgen(query,engine[0]) + result = feedgen(query,stype,engine[0]) if not result: err = [404, 'No response', qformat] return bad_request(err) diff --git a/app/templates/index.html b/app/templates/index.html index 08ac0391..9461da3f 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -48,6 +48,17 @@

query-server

+
+
+ + +
+
+
@@ -70,7 +81,7 @@

query-server

- +


@@ -84,7 +95,8 @@

query-server

var sengine = $(this).val(); var squery = $('#query').val(); var sformat = $(' #format label.active input').val(); - var urlloc = window.location.href.split(/\?|#/)[0] + "api/v1/search/" + sengine + "?query=" + squery + "&format=" + sformat; + var stype = $(' #type label.active input').val(); + var urlloc = window.location.href.split(/\?|#/)[0] + "api/v1/search/" + sengine + "?query=" + squery + "&format=" + sformat + "&type=" +stype; $.ajax({ url: urlloc, type: 'GET', @@ -113,16 +125,25 @@

query-server

if($(this).hasClass('active')){ $(this).removeClass('active') } else { - $(".active").removeClass("active"); + $("#format .active").removeClass("active"); + $(this).addClass('active') + } + }); + $('.typeButton').click(function(e){ + e.preventDefault(); + if($(this).hasClass('active')){ + $(this).removeClass('active') + } else { + $("#type .active").removeClass("active"); $(this).addClass('active') } }); $(window).keydown(function(event){ - if(event.keyCode == 13) { - event.preventDefault(); - return false; - } - }); + if(event.keyCode == 13) { + event.preventDefault(); + return false; + } + });