-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquery.py
142 lines (122 loc) · 4.48 KB
/
query.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import re
import urllib.parse
import urllib.request
import requests
from bs4 import BeautifulSoup
from private_info import OMDB_API_KEY
OMDB_LINK = 'http://www.omdbapi.com/'
FANDANGO_LINK = (
'https://www.fandango.com/theaterlistings-prn.aspx?'
'location={0[zip]}&pn=1&sdate={0[start_date]}&'
'tid=AAAPP,AAJMM,AAIJQ,AANJV,AAWPB,AAHIP,AANVP,AAHIF,AAHIJ,AAUHN'
)
NOT_FOUND_STR = '{id} for {t} not found.'
class Theater(object):
def __init__(self, name, showtimes):
self.name = name
self.showtimes = showtimes
def __str__(self):
return '{}: {}'.format(self.name, ', '.join(self.showtimes))
def __repr__(self):
return self.name
class Movie(object):
def __init__(self, title, rating, duration):
self.title = title
self.rating = rating
self.duration = duration
self.theaters = []
@property
def showtimes(self):
return '\n'.join(str(t) for t in self.theaters)
def __str__(self):
"""Print Movie instance as descriptive features."""
return ', '.join(
val for val in self.__dict__.values()
if val and not isinstance(val, list)
)
def __repr__(self):
return self.title
def movie_data_query(**kwargs):
"""Query OMDB for information on passed film.
Args:
kwargs (dict): Arguments for query.
Returns:
Movie data in formatted string.
"""
kwargs.update({'apikey': OMDB_API_KEY})
link = '?'.join((OMDB_LINK, urllib.parse.urlencode(kwargs)))
json = requests.get(link).json
if json.get('Error', '') == 'Movie not found!':
return NOT_FOUND_STR.format(id='Movie info', t=kwargs['t'].title())
else:
return (
"{0[Title]}\n{0[Rated]}, {0[Year]}, {0[Runtime]}\n{0[Genre]}\n"
"Director: {0[Director]}\nCast: {0[Actors]}\n{0[Plot]}\n"
"Metascore: {0[Metascore]}\nIMDB: {0[imdbRating]}\n"
"Rotten Tomatoes: {0[Ratings][0][Value]}\n\n"
"----------\n"
"To receive showtimes in your area for this film, please respond "
"SHOWTIMES and the zipcode. E.g. SHOWTIMES 97211"
).format(json)
def format_movie_data(movies, title):
"""Find film and create string representation.
Args:
movies (dict): Nested dict of films and showtimes.
title (str): Title of desired movie.
Returns:
String with movie title and times.
"""
try:
selection = next(mov for mov in movies.values() if title in mov.title)
except StopIteration:
return "Couldn't find movie {} in showtimes".format(title)
else:
movie = movies[selection.title]
return '\n'.join([str(movie).title(), movie.showtimes])
def split_line(row):
"""Split the movie information into its correct variables.
Args:
line (str): Movie information condensed into passed string.
"""
movie_line = ' '.join(row.find('td').text.split()).lower()
if (
movie_line.startswith('showtimes are currently not available')
or not all(char in movie_line for char in ['hr', 'min', '(', ')'])
):
return False
else:
items = re.split('\W+', movie_line)
title = ' '.join(items[:-5])
duration = ' '.join(items[-4:])
rating = items[-5:-4][0]
return title, rating, duration
def showtimes_query(**kwargs):
"""Scrape showtimes from site and return data.
Args:
kwargs (dict): Arguments for query.
Returns:
Movie data in formatted string.
"""
req = urllib.request.Request(
FANDANGO_LINK.format(kwargs), headers={'User-Agent': 'Mozilla/5.0'}
)
try:
html = urllib.request.urlopen(req)
except ValueError:
return NOT_FOUND_STR.format(id='Showtimes', t=kwargs['t'].title())
else:
soup = BeautifulSoup(html, 'html.parser')
movies = {}
for theater_table in soup.find_all('table'):
theater = theater_table.find('h4').text.strip()
for row in theater_table.find_all('tr')[1:]:
movie_header = split_line(row)
if not movie_header:
continue
title = movie_header[0]
movie = movies.get(title, Movie(*movie_header))
showtimes = [st.text for st in row.find_all('span')]
movie.theaters.append(Theater(theater, showtimes))
if movie not in movies:
movies[title] = movie
return format_movie_data(movies, kwargs['t'])