Skip to content

Commit

Permalink
Merge pull request #1 from ppatrzyk/commentsfix
Browse files Browse the repository at this point in the history
Commentsfix
  • Loading branch information
ppatrzyk authored Oct 30, 2020
2 parents 1fd267f + f46c06b commit c5f2e72
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 21 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
build
dist
filmweb.egg-info
.vscode
16 changes: 10 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,14 @@ INFO:root:Fetching data...
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [00:06<00:00, 5.13it/s]
INFO:root:Parsing data...
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [00:06<00:00, 4.52it/s]
INFO:root:pieca_filmweb_20201003.csv written!
$ head -3 pieca_filmweb_20201003.csv
"duration_min","year","global_votes","global_rating","directors","countries","genres","timestamp","iso_date","user_vote","original_title","pl_title","link"
"105","2013-03-15","19006","7.108230113983154","['Sławomir Fabicki']","['Polska']","['Dramat obyczajowy']","1594412058","2020-07-10T22:14:18","9","","Miłość","https://www.filmweb.pl/film/Mi%C5%82o%C5%9B%C4%87-2012-631551"
"113","2017-09-22","5418","5.435029983520508","['Krzysztof Krauze']","['Polska']","['Dramat społeczny']","1594304693","2020-07-09T16:24:53","7","","Ptaki śpiewają w Kigali","https://www.filmweb.pl/film/Ptaki+%C5%9Bpiewaj%C4%85+w+Kigali-2017-595615"
INFO:root:pieca_filmweb_20201030.csv written!
$ head -6 pieca_filmweb_20201030.csv
"timestamp","iso_date","user_comment","user_vote","global_rating","global_votes","original_title","pl_title","directors","countries","genres","link","duration_min","year"
"1570914666","2019-10-12T23:11:06","","7","6.438159942626953","3590","Play","Gra","['Ruben Östlund']","['Dania', 'Francja', 'Szwecja']","['Dramat', 'Akcja']","https://www.filmweb.pl/film/Gra-2011-508918","118","2011-11-11"
"1570914495","2019-10-12T23:08:15","","4","7.019690036773682","14935","Kraftidioten","Obywatel roku","['Hans Petter Moland']","['Norwegia', 'Szwecja']","['Komedia kryminalna']","https://www.filmweb.pl/film/Obywatel+roku-2014-684846","116","2014-05-16"
"1588403409","2020-05-02T09:10:09","","8","6.9715399742126465","773","Slava","Sława","['Kristina Grozeva']","['Grecja', 'Bułgaria']","['Dramat']","https://www.filmweb.pl/film/S%C5%82awa-2016-769511","101","2017-08-25"
"1570477126","2019-10-07T21:38:46","","5","6.0","4","","Důvěrný nepřítel","[]","['Czechy', 'Słowacja']","['Thriller']","https://www.filmweb.pl/film/D%C5%AFv%C4%9Brn%C3%BD+nep%C5%99%C3%ADtel-2018-819208","","2018-08-16"
"1570272939","2019-10-05T12:55:39","","6","6.264530181884766","5557","","Attenberg","['Athina Rachel Tsangari']","['Grecja']","['Dramat']","https://www.filmweb.pl/film/Attenberg-2010-591326","95","2011-11-25"
```

lub ocen innego użytkownika (musi być znajomym logującego się):
Expand Down Expand Up @@ -72,11 +75,12 @@ Options:
- timestamp: _[czas oceny (unix)](https://pl.wikipedia.org/wiki/Czas_uniksowy)_
- iso_date: _[czas oceny (ISO)](https://pl.wikipedia.org/wiki/ISO_8601)_
- user_vote: _ocena użytkownika_
- user_comment: _komentarz użytkownika_
- original_title: _tytuł oryginalny_
- pl_title: _tytuł polski_
- link: _strona filmu_

## Znane ograniczenia:
## Znane problemy:

- Logowanie tylko kontem filmweb,
- Eksport tylko ocen filmów, inne (np. seriale) niedostępne,
6 changes: 3 additions & 3 deletions filmweb/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from math import ceil
from copy import deepcopy
import requests
from multiprocessing import Pool
import multiprocessing
import tqdm
from .utils import (
get_movie_ratings,
Expand All @@ -27,7 +27,7 @@
write_data,
)

PARALLEL_PROC = 4
PARALLEL_PROC = multiprocessing.cpu_count()
MOVIES_PER_PAGE = 25

def main():
Expand All @@ -42,7 +42,7 @@ def main():
else:
logging.basicConfig(level=logging.INFO)
session = requests.session()
pool = Pool(processes=PARALLEL_PROC)
pool = multiprocessing.Pool(processes=PARALLEL_PROC)
try:
login(session, user, password)
get_vote_count_kwargs = {
Expand Down
36 changes: 26 additions & 10 deletions filmweb/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,22 @@
'countries': 'filmPreview__info--countries',
'genres': 'filmPreview__info--genres',
}
CSV_ROWS = (
'timestamp',
'iso_date',
'user_comment',
'user_vote',
'global_rating',
'global_votes',
'original_title',
'pl_title',
'directors',
'countries',
'genres',
'link',
'duration_min',
'year',
)

def login(session, user, password):
"""
Expand Down Expand Up @@ -155,23 +171,24 @@ def get_movie_ratings(content):
data = tuple()
film_data[key] = data
try:
original_title = film_info_container.find(re.compile('.*'), attrs={'class': 'filmPreview__originalTitle'}).contents[0]
film_data['original_title'] = film_info_container.find(re.compile('.*'), attrs={'class': 'filmPreview__originalTitle'}).contents[0]
except:
original_title = None
pass
try:
pl_title = film_info_container.find(re.compile('.*'), attrs={'class': 'filmPreview__title'}).contents[0]
film_data['pl_title'] = film_info_container.find(re.compile('.*'), attrs={'class': 'filmPreview__title'}).contents[0]
except:
pl_title = None
link = 'https://www.filmweb.pl' + film_info_container.find(re.compile('.*'), attrs={'class': 'filmPreview__link'})['href']
pass
try:
film_data['link'] = 'https://www.filmweb.pl' + film_info_container.find(re.compile('.*'), attrs={'class': 'filmPreview__link'})['href']
except:
pass
timestamp = movie.get('t')
clean_movie = {
**film_data,
'timestamp': timestamp,
'iso_date': datetime.fromtimestamp(timestamp).isoformat(),
'user_vote': movie.get('r'),
'original_title': original_title,
'pl_title': pl_title,
'link': link,
'user_comment': movie.get('c'),
}
movies.append(clean_movie)
# necessary for multiprocessing pickle to work
Expand All @@ -191,9 +208,8 @@ def write_data(movies, user, data_format='json'):
out_file.write(json.dumps(movies_clean))
elif data_format == 'csv':
file_name = f'{user}_filmweb_{date}.csv'
field_names = tuple(movies_clean[0].keys())
with open(file_name, 'w') as out_file:
writer = csv.DictWriter(out_file, fieldnames=field_names, dialect='unix')
writer = csv.DictWriter(out_file, fieldnames=CSV_ROWS, dialect='unix')
writer.writeheader()
for movie in movies_clean:
writer.writerow(movie)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
long_description = fh.read()

setup(name='filmweb',
version='0.2',
version='0.3',
license='MIT',
description='Export movie ratings from filmweb.pl',
long_description=long_description,
Expand All @@ -13,7 +13,7 @@
author='Piotr Patrzyk',
url='https://github.com/ppatrzyk/filmweb-export',
packages=['filmweb'],
python_requires='>=3.7',
python_requires='>=3.6',
install_requires=[
'beautifulsoup4>=4.9.1',
'docopt>=0.6.2',
Expand Down

0 comments on commit c5f2e72

Please sign in to comment.