Skip to content

Commit

Permalink
Addresses fossasia#58 implementation of image search for google
Browse files Browse the repository at this point in the history
  • Loading branch information
nikhilrayaprolu committed Aug 7, 2017
1 parent 676dc4e commit 4d4dc75
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 16 deletions.
36 changes: 29 additions & 7 deletions app/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,17 @@ def get_google_page(query):
payload = {'q': query}
response = requests.get('https://www.google.com/search', headers=header, params=payload)
return response

def get_google_page(query,startIndex):


def get_google_page(query, startIndex, image=False):
""" Fetch the google search results page
Returns : Results Page
"""
header = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36'}
payload = {'q': query,'start':startIndex}
payload = {'q': query, 'start': startIndex}
if image:
payload = {'q': query, 'start': startIndex, 'tbm': 'isch'}
response = requests.get('https://www.google.com/search', headers=header, params=payload)
return response

Expand All @@ -98,8 +101,8 @@ def google_search(query):
[[Tile1,url1], [Title2, url2],..]
"""
urls = []
for count in range(0,10):
response = get_google_page(query,count*10)
for count in range(0, 10):
response = get_google_page(query, count * 10)
soup = BeautifulSoup(response.text, 'html.parser')
for h3 in soup.findAll('h3', {'class': 'r'}):
links = h3.find('a')
Expand All @@ -109,6 +112,23 @@ def google_search(query):
return urls


def google_image_search(query):
""" Search google for the query and return set of urls
Returns: urls (list)
[[Tile1,url1], [Title2, url2],..]
"""
urls = []
for count in range(0, 10):
response = get_google_page(query, count * 10, image=True)
soup = BeautifulSoup(response.text, 'html.parser')
for image_data in soup.findAll('div', {'class': 'rg_meta'}):
j = json.loads(image_data.getText())
urls.append({'title': j['pt'],
'link': j['ou']})

return urls


def get_yahoo_page(query):
""" Fetch the yahoo search results
Returns : Results Page
Expand Down Expand Up @@ -155,8 +175,10 @@ def small_test():
assert type(google_search('fossasia')) is list


def feedgen(query, engine):
if engine == 'g':
def feedgen(query,type, engine):
if engine == 'g' and type == "images":
urls = google_image_search(query)
elif engine == 'g':
urls = google_search(query)
elif engine == 'd':
urls = duckduckgo_search(query)
Expand Down
3 changes: 2 additions & 1 deletion app/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,12 @@ def search(search_engine):
return bad_request(err)

query = request.args.get('query')
stype = request.args.get('type')
if not query:
err = [400, 'Not Found - missing query', qformat]
return bad_request(err)

result = feedgen(query,engine[0])
result = feedgen(query,stype,engine[0])
if not result:
err = [404, 'No response', qformat]
return bad_request(err)
Expand Down
37 changes: 29 additions & 8 deletions app/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@ <h1><code>query-server</code></h1>
</label>
</div>
</div>
<div class="col-sm-4" style="display: inline;">
<div class="btn-group" id="type" data-toggle="buttons">
<label class="btn btn-lg btn-primary active typeButton">
<input type="radio" value="text" autocomplete="off">Text
</label>
<label class="btn btn-lg btn-primary typeButton">
<input type="radio" value="images" autocomplete="off">Images
</label>
</div>
</div>

</div>
<br/>
<div class="btn-toolbar">
Expand All @@ -70,7 +81,7 @@ <h1><code>query-server</code></h1>
</footer>
</div>
<div class="pagination-centered" >
<img id="load" src="{{ url_for('static', filename='images/ripple.gif') }}">
<img id="load" src="{{ url_for('static', filename='images/ripple.gif') }}">
</div>
<pre id="feed" style="display:none;"><code class="language-xml"></code></pre>
<br/><br/>
Expand All @@ -84,7 +95,8 @@ <h1><code>query-server</code></h1>
var sengine = $(this).val();
var squery = $('#query').val();
var sformat = $(' #format label.active input').val();
var urlloc = window.location.href.split(/\?|#/)[0] + "api/v1/search/" + sengine + "?query=" + squery + "&format=" + sformat;
var stype = $(' #type label.active input').val();
var urlloc = window.location.href.split(/\?|#/)[0] + "api/v1/search/" + sengine + "?query=" + squery + "&format=" + sformat + "&type=" +stype;
$.ajax({
url: urlloc,
type: 'GET',
Expand Down Expand Up @@ -113,16 +125,25 @@ <h1><code>query-server</code></h1>
if($(this).hasClass('active')){
$(this).removeClass('active')
} else {
$(".active").removeClass("active");
$("#format .active").removeClass("active");
$(this).addClass('active')
}
});
$('.typeButton').click(function(e){
e.preventDefault();
if($(this).hasClass('active')){
$(this).removeClass('active')
} else {
$("#type .active").removeClass("active");
$(this).addClass('active')
}
});
$(window).keydown(function(event){
if(event.keyCode == 13) {
event.preventDefault();
return false;
}
});
if(event.keyCode == 13) {
event.preventDefault();
return false;
}
});
</script>
</body>
</html>

0 comments on commit 4d4dc75

Please sign in to comment.