Skip to content

Commit 6d8b325

Browse files
cclaussParthS007
authored andcommitted
Add query_cache.py to speed repeated queries (fossasia#399)
Add query_cache.py to speed repeated queries
1 parent 0bf6b9a commit 6d8b325

File tree

5 files changed

+67
-13
lines changed

5 files changed

+67
-13
lines changed

.travis.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ language: python
22
python:
33
- "2.7"
44
- "3.6"
5+
services:
6+
- mongodb
57
install:
68
- pip install codecov flake8 pytest
79
- pip install -r requirements.txt

app/query_cache.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
query_cache.py -- Implements a caching system for query server based on MongoDB
5+
6+
Before sending a query to a remote search engine, use lookup() see if results
7+
from that same search engine and query are already in the cache. If so, then
8+
print a cache hit message return the cached results. If not, then use store()
9+
to write the search engine, query, query results, and a datetime created into
10+
the cache. MongoDB will use the datetime to automatically delete out dated
11+
query results.
12+
13+
Ideas for improvement:
14+
* Add a lookup_count to see how often cache actually saves us time.
15+
"""
16+
17+
import datetime as dt
18+
import os
19+
20+
from pymongo import DESCENDING, MongoClient
21+
22+
client = MongoClient(os.environ.get('MONGO_URI', 'mongodb://localhost:27017/'))
23+
db = client['query-server-v2']
24+
db = db['queries'] # Automatically delete records that are older than one day
25+
db.create_index([('createdAt', DESCENDING)], expireAfterSeconds=60 * 60 * 24)
26+
27+
28+
def lookup(url):
29+
"""return search result if the URL is in the db or None on a cache miss."""
30+
data = db.find_one({'url': url}) or {}
31+
return data.get('links', None)
32+
33+
34+
def store(url, links):
35+
"""write the URL, the links, and a UTC timestamp into the database."""
36+
db.delete_many({'url': url}) # remove all records for this URL
37+
db.insert({'url': url, 'links': links, 'createdAt': dt.datetime.utcnow()})
38+
39+
40+
if __name__ == '__main__':
41+
url = 'test_url'
42+
print(lookup(url))
43+
store(url, 'a b c d e'.split())
44+
print(lookup(url))

app/server.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,13 @@
55
from dicttoxml import dicttoxml
66
from flask import (Flask, Response, abort, jsonify, make_response,
77
render_template, request)
8-
from pymongo import MongoClient
98

9+
from query_cache import lookup, store
1010
from scrapers import feed_gen, scrapers
1111

1212
app = Flask(__name__)
1313
err = ""
1414

15-
client = MongoClient(os.environ.get('MONGO_URI', 'mongodb://localhost:27017/'))
16-
db = client['query-server-v2']
1715
errorObj = {
1816
'type': 'Internal Server Error',
1917
'status_code': 500,
@@ -59,14 +57,19 @@ def search(search_engine):
5957
error = [400, 'Not Found - missing query', qformat]
6058
return bad_request(error)
6159

62-
result = feed_gen(query, engine, count)
63-
if not result:
64-
error = [404, 'No response', qformat]
65-
return bad_request(error)
66-
67-
if db['queries'].find({query: query}).limit(1) is False:
68-
db['queries'].insert(
69-
{"query": query, "engine": engine, "qformat": qformat})
60+
# first see if we can get the results for the cache
61+
engine_and_query = engine + ':' + query
62+
result = lookup(engine_and_query)
63+
if result:
64+
print("cache hit: {}".format(engine_and_query))
65+
else:
66+
result = feed_gen(query, engine, count)
67+
if result:
68+
# store the result in the cache to speed up future searches
69+
store(engine_and_query, result)
70+
else:
71+
error = [404, 'No response', engine_and_query]
72+
return bad_request(error)
7073

7174
try:
7275
unicode # unicode is undefined in Python 3 so NameError is raised

app/test_server.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import pytest
44
import requests
55

6+
from .scrapers import small_test
67

78
REASON = 'Do you have query-server running on http://127.0.0.1:7001 ?'
89
TRAVIS_CI = os.getenv('TRAVIS', False) # Running in Travis CI?
@@ -12,6 +13,10 @@ def test_true():
1213
assert True, "We have a problem!"
1314

1415

16+
def test_small_test():
17+
small_test()
18+
19+
1520
@pytest.mark.xfail(not TRAVIS_CI, reason=REASON)
1621
def test_invalid_url_api_call():
1722
response = requests.get('http://localhost:7001/api/v1/search/invalid_url')

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ futures>=3.0.5
88
html5lib>=0.9999999
99
Jinja2>=2.9.5
1010
lxml>=3.7.2
11-
pymongo>=3.4.0
11+
pymongo>=3.6.0
1212
pytest>=3.0.6
1313
pytest-cov>=2.4.0
1414
requests>=2.13.0
1515
webencodings>=0.5
16-
defusedxml>=0.5.0
16+
defusedxml>=0.5.0

0 commit comments

Comments
 (0)