From 0d739e5ee768b99f25d3dae4eaf5b0a5c7d11573 Mon Sep 17 00:00:00 2001 From: Parker Higgins Date: Sat, 30 Sep 2023 19:52:33 -0400 Subject: [PATCH] USAT: add alternate downloader (#133) --- xword_dl/downloader/amuniversaldownloader.py | 69 ++++++++++++++++++-- 1 file changed, 62 insertions(+), 7 deletions(-) diff --git a/xword_dl/downloader/amuniversaldownloader.py b/xword_dl/downloader/amuniversaldownloader.py index 35d0b19..69c6d0d 100644 --- a/xword_dl/downloader/amuniversaldownloader.py +++ b/xword_dl/downloader/amuniversaldownloader.py @@ -6,9 +6,12 @@ import puz import requests +import xmltodict + +from urllib.parse import unquote from .basedownloader import BaseDownloader -from ..util import XWordDLException +from ..util import XWordDLException, unidecode class AMUniversalDownloader(BaseDownloader): def __init__(self, **kwargs): @@ -92,8 +95,27 @@ def parse_xword(self, xword_data): return puzzle - -class USATodayDownloader(AMUniversalDownloader): +# As of Sept 2023, the JSON data for USA Today is not consistently populated. +# I'd rather use the JSON data if possible, but until that's sorted, we can +# use an alternative approach. As such, commenting out but not deleting the +# earlier version here. +# +#class USATodayDownloader(AMUniversalDownloader): +# command = 'usa' +# outlet = 'USA Today' +# outlet_prefix = 'USA Today' +# +# def __init__(self, **kwargs): +# super().__init__(**kwargs) +# +# self.url_blob = 'https://gamedata.services.amuniversal.com/c/uupuz/l/U2FsdGVkX18CR3EauHsCV8JgqcLh1ptpjBeQ%2Bnjkzhu8zNO00WYK6b%2BaiZHnKcAD%0A9vwtmWJp2uHE9XU1bRw2gA%3D%3D/g/usaon/d/' +# +# def process_clues(self, clue_list): +# """Remove the end marker found in USA Today puzzle JSON.""" +# +# return clue_list[:-1] + +class USATodayDownloader(BaseDownloader): command = 'usa' outlet = 'USA Today' outlet_prefix = 'USA Today' @@ -101,12 +123,45 @@ class USATodayDownloader(AMUniversalDownloader): def __init__(self, **kwargs): super().__init__(**kwargs) - self.url_blob = 'https://gamedata.services.amuniversal.com/c/uupuz/l/U2FsdGVkX18CR3EauHsCV8JgqcLh1ptpjBeQ%2Bnjkzhu8zNO00WYK6b%2BaiZHnKcAD%0A9vwtmWJp2uHE9XU1bRw2gA%3D%3D/g/usaon/d/' + def find_by_date(self, dt): + self.date = dt + + return f'http://picayune.uclick.com/comics/usaon/data/usaon{dt:%y%m%d}-data.xml' - def process_clues(self, clue_list): - """Remove the end marker found in USA Today puzzle JSON.""" + def find_latest(self): + return self.find_by_date(datetime.datetime.today()) + + def find_solver(self, url): + return url + + def fetch_data(self, solver_url): + res = requests.get(solver_url) - return clue_list[:-1] + xw_data = res.content.decode() + + return xw_data + + def parse_xword(self, xword_data): + xw = xmltodict.parse(xword_data).get('crossword') + + puzzle = puz.Puzzle() + + puzzle.title = unquote(xw.get('Title',[]).get('@v') or '') + puzzle.author = unquote(xw.get('Author',[]).get('@v') or '') + puzzle.copyright = unquote(xw.get('Copyright',[]).get('@v') or '') + + puzzle.width = int(xw.get('Width')['@v']) + puzzle.height = int(xw.get('Height')['@v']) + + puzzle.solution = xw.get('AllAnswer',[]).get('@v').replace('-', '.') + puzzle.fill = ''.join([c if c == '.' else '-' for c in puzzle.solution]) + + xw_clues = sorted(list(xw['across'].values()) + list(xw['down'].values()), + key=lambda c: int(c['@cn'])) + + puzzle.clues = [unidecode(unquote(c.get('@c') or '')) for c in xw_clues] + + return puzzle class UniversalDownloader(AMUniversalDownloader):