Skip to content

Commit

Permalink
Addresses fossasia#320 Add video support for Google search (fossasia#426
Browse files Browse the repository at this point in the history
)

* video-search : google

* solve build error
  • Loading branch information
umangahuja1 authored and bhaveshAn committed Jan 15, 2018
1 parent c3aa71d commit fac4abf
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 57 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ app/static/bower_components/*
*.swp
Pipfile
Pipfile.lock
.vscode/*

6 changes: 3 additions & 3 deletions app/scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@


def small_test():
assert isinstance(scrapers['google'].search('fossasia', 1), list)
assert isinstance(scrapers['google'].search('fossasia', 1), list)


def feed_gen(query, engine, count=10):
def feed_gen(query, engine, count=10, qtype=''):
engine = engine.lower()
# provide temporary backwards compatibility for old names
old_names = {'ubaidu': 'baidu',
Expand All @@ -43,5 +43,5 @@ def feed_gen(query, engine, count=10):
if engine in ('quora', 'youtube'):
urls = scrapers[engine].search_without_count(query)
else:
urls = scrapers[engine].search(query, count)
urls = scrapers[engine].search(query, count, qtype)
return urls
13 changes: 8 additions & 5 deletions app/scrapers/generalized.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class Scraper:
startKey = ''
queryKey = 'q'
defaultStart = 0
qtype = ''
headers = {
'User-Agent': (
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) '
Expand All @@ -20,12 +21,14 @@ class Scraper:
def __init__(self):
pass

def get_page(self, query, startIndex=0):
def get_page(self, query, startIndex=0, qtype=''):
""" Fetch the google search results page
Returns : Results Page
"""
payload = {self.queryKey: query, self.startKey: startIndex}
payload = {self.queryKey: query, self.startKey: startIndex,
self.qtype: qtype}
response = requests.get(self.url, headers=self.headers, params=payload)
print(response.url)
return response

def parse_response(self, soup):
Expand All @@ -34,16 +37,16 @@ def parse_response(self, soup):
def next_start(self, current_start, prev_results):
return current_start + len(prev_results)

def search(self, query, num_results):
def search(self, query, num_results, qtype=''):
"""
Search for the query and return set of urls
Returns: list
"""
urls = []
current_start = self.defaultStart

while(len(urls) < num_results):
response = self.get_page(query, current_start)
while (len(urls) < num_results):
response = self.get_page(query, current_start, qtype)
soup = BeautifulSoup(response.text, 'html.parser')
new_results = self.parse_response(soup)
if new_results is None:
Expand Down
1 change: 1 addition & 0 deletions app/scrapers/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ def __init__(self):
self.url = 'https://www.google.com/search'
self.defaultStart = 0
self.startKey = 'start'
self.qtype = 'tbm'

def next_start(self, current_start, prev_results):
return current_start + len(prev_results)
Expand Down
3 changes: 2 additions & 1 deletion app/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def search(search_engine):
try:
count = int(request.args.get('num', 10))
qformat = request.args.get('format', 'json').lower()
qtype = request.args.get('type', '')
if qformat not in ('json', 'xml', 'csv'):
abort(400, 'Not Found - undefined format')

Expand All @@ -68,7 +69,7 @@ def search(search_engine):
if result:
print("cache hit: {}".format(engine_and_query))
else:
result = feed_gen(query, engine, count)
result = feed_gen(query, engine, count, qtype)
if result:
# store the result in the cache to speed up future searches
store(engine_and_query, result)
Expand Down
125 changes: 77 additions & 48 deletions app/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -64,52 +64,70 @@ <h1><code>query-server</code></h1>
</div>
</div>
<br/>
<div class="col-sm-4 col-xs-6" style="padding:0; margin: 0 auto;">
<div class="dropdown">
<label>Engine:</label><br/>
<button class="btn btn-primary dropdown-toggle" type="button" data-toggle="dropdown">
<span id="drop_down_text">
<img src='static/images/google_icon.png' width='25px'>&nbsp;google
</span>
&nbsp;
<span class="caret"></span>
</button>
<input type="hidden" id="engine" value="google">
<ul class="dropdown-menu">
{% for engine in engines_list %}
<li onClick="update_button('{{ engine }}')" id="{{engine}}">
<a style="cursor:pointer">
{% if engine in ['ask', 'baidu', 'bing', 'yahoo'] %}
<img src="{{ url_for('static', filename='images/' + engine + '_icon.ico') }}" width="15px">
{% else %}
<img src="{{ url_for('static', filename='images/' + engine + '_icon.png') }}" width="15px">
{% endif %}
{{ engine }}
</a>
</li>
{% endfor %}
</ul>
</div>
</div>
<div class="col-sm-4 col-xs-4">
<div class="form-group" style="display:inline-block; margin: 0 auto; min-width:50%;">
<label for="resp">Max:</label>
<select class="form-control" id="resp">
<option>10</option>
<option>20</option>
<option>30</option>
<option>40</option>
<option>50</option>
<option>60</option>
<option>70</option>
<option>80</option>
<option>90</option>
<option>100</option>
</select>

<div class="row">
<div class="col-sm-3 col-xs-6" style="padding:0; margin: 0 auto;">
<div class="dropdown">
<label>Engine:</label><br/>
<button class="btn btn-primary dropdown-toggle" type="button" data-toggle="dropdown">
<span id="drop_down_text">
<img src='static/images/google_icon.png' width='25px'>&nbsp;google
</span>
&nbsp;
<span class="caret"></span>
</button>
<input type="hidden" id="engine" value="google">
<ul class="dropdown-menu">
{% for engine in engines_list %}
<li onClick="update_button('{{ engine }}')" id="{{engine}}">
<a style="cursor:pointer">
{% if engine in ['ask', 'baidu', 'bing', 'yahoo'] %}
<img src="{{ url_for('static', filename='images/' + engine + '_icon.ico') }}" width="15px">
{% else %}
<img src="{{ url_for('static', filename='images/' + engine + '_icon.png') }}" width="15px">
{% endif %}
{{ engine }}
</a>
</li>
{% endfor %}
</ul>
</div>
</div>
<div class="col-sm-5 col-xs-6">
<label>Type:</label><br/>
<div id="type" class="btn-group btn-group-vertical" style="display:inline-flex;padding:0; margin: 0 auto;" data-toggle="buttons">
<label class=" active typeButton" style="padding:10px;">General<br/>
<input type="radio" name = "stype" value="" autocomplete="off" checked>
</label>
<label class=" typeButton" style="padding:10px;">Images<br/>
<input type="radio" name = "stype" value="isch" autocomplete="off">
</label>
<label class=" typeButton" style="padding:10px;">
Video<br/>
<input type="radio" name = "stype" value="vid" autocomplete="off">
</label>
</div>
</div>
<div class="col-sm-2 col-xs-6">
<div class="form-group" style="display:inline-block; margin: 0 ; min-width:50%;">
<label for="resp">Max:</label>
<select class="form-control" id="resp">
<option>10</option>
<option>20</option>
<option>30</option>
<option>40</option>
<option>50</option>
<option>60</option>
<option>70</option>
<option>80</option>
<option>90</option>
<option>100</option>
</select>
</div>
</div>
<div class="col-sm-2 col-xs-6" style="margin-top:20px;">
<input type="submit" value="Submit" id="submitter" class="btn btn-primary"/>
</div>
</div>
<div class="col-sm-4 col-xs-s6" style="margin-top:20px;">
<input type="submit" value="Submit" id="submitter" class="btn btn-primary"/>
</div>
</div>
</form>
Expand Down Expand Up @@ -177,10 +195,10 @@ <h1><code>query-server</code></h1>
$('#feed').hide();
var sengine = $("#engine").val();
var squery = $('#query').val();
var stype = $("input[name=stype]:checked").val()
var sformat = $(' #format label.active input').val();
var count = $('#resp').val();
var urlloc = window.location.href.split(/\?|#/)[0] + "api/v1/search/" + sengine +
"?query=" + squery + "&format=" + sformat + "&num=" + count;
var urlloc = window.location.href.split(/\?|#/)[0] + "api/v1/search/" + sengine + "?query=" + squery + "&type=" + stype + "&format=" + sformat + "&num=" + count;
$.ajax({
url: urlloc,
type: 'GET',
Expand Down Expand Up @@ -209,10 +227,21 @@ <h1><code>query-server</code></h1>
$('.formatButton').click(function (e) {
e.preventDefault();
if (!$(this).hasClass('active')) {
$(".active").removeClass("active");
$(".formatButton .active").removeClass("active");
$(this).addClass('active')
}
});

/*
$('.typeButton').click(function (e) {
e.preventDefault();
if (!$(this).hasClass('active')) {
$(".typeButton .active").removeClass("active");
$(this).addClass('active')
}
});
*/


$(window).keydown(function (event) {
if (event.keyCode == 13) {
Expand Down

0 comments on commit fac4abf

Please sign in to comment.