From 0d739e5ee768b99f25d3dae4eaf5b0a5c7d11573 Mon Sep 17 00:00:00 2001
From: Parker Higgins <parker@parkerhiggins.net>
Date: Sat, 30 Sep 2023 19:52:33 -0400
Subject: [PATCH] USAT: add alternate downloader (#133)

---
 xword_dl/downloader/amuniversaldownloader.py | 69 ++++++++++++++++++--
 1 file changed, 62 insertions(+), 7 deletions(-)

diff --git a/xword_dl/downloader/amuniversaldownloader.py b/xword_dl/downloader/amuniversaldownloader.py
index 35d0b19..69c6d0d 100644
--- a/xword_dl/downloader/amuniversaldownloader.py
+++ b/xword_dl/downloader/amuniversaldownloader.py
@@ -6,9 +6,12 @@
 
 import puz
 import requests
+import xmltodict
+
+from urllib.parse import unquote
 
 from .basedownloader import BaseDownloader
-from ..util import XWordDLException
+from ..util import XWordDLException, unidecode
 
 class AMUniversalDownloader(BaseDownloader):
     def __init__(self, **kwargs):
@@ -92,8 +95,27 @@ def parse_xword(self, xword_data):
 
         return puzzle
 
-
-class USATodayDownloader(AMUniversalDownloader):
+# As of Sept 2023, the JSON data for USA Today is not consistently populated.
+# I'd rather use the JSON data if possible, but until that's sorted, we can
+# use an alternative approach. As such, commenting out but not deleting the
+# earlier version here.
+#
+#class USATodayDownloader(AMUniversalDownloader):
+#    command = 'usa'
+#    outlet = 'USA Today'
+#    outlet_prefix = 'USA Today'
+#
+#    def __init__(self, **kwargs):
+#        super().__init__(**kwargs)
+#
+#        self.url_blob = 'https://gamedata.services.amuniversal.com/c/uupuz/l/U2FsdGVkX18CR3EauHsCV8JgqcLh1ptpjBeQ%2Bnjkzhu8zNO00WYK6b%2BaiZHnKcAD%0A9vwtmWJp2uHE9XU1bRw2gA%3D%3D/g/usaon/d/'
+#
+#    def process_clues(self, clue_list):
+#        """Remove the end marker found in USA Today puzzle JSON."""
+#
+#        return clue_list[:-1]
+
+class USATodayDownloader(BaseDownloader):
     command = 'usa'
     outlet = 'USA Today'
     outlet_prefix = 'USA Today'
@@ -101,12 +123,45 @@ class USATodayDownloader(AMUniversalDownloader):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
-        self.url_blob = 'https://gamedata.services.amuniversal.com/c/uupuz/l/U2FsdGVkX18CR3EauHsCV8JgqcLh1ptpjBeQ%2Bnjkzhu8zNO00WYK6b%2BaiZHnKcAD%0A9vwtmWJp2uHE9XU1bRw2gA%3D%3D/g/usaon/d/'
+    def find_by_date(self, dt):
+        self.date = dt
+
+        return f'http://picayune.uclick.com/comics/usaon/data/usaon{dt:%y%m%d}-data.xml'
 
-    def process_clues(self, clue_list):
-        """Remove the end marker found in USA Today puzzle JSON."""
+    def find_latest(self):
+        return self.find_by_date(datetime.datetime.today())
+
+    def find_solver(self, url):
+        return url
+
+    def fetch_data(self, solver_url):
+        res = requests.get(solver_url)
 
-        return clue_list[:-1]
+        xw_data = res.content.decode()
+
+        return xw_data
+
+    def parse_xword(self, xword_data):
+        xw = xmltodict.parse(xword_data).get('crossword')
+
+        puzzle = puz.Puzzle()
+
+        puzzle.title = unquote(xw.get('Title',[]).get('@v') or '')
+        puzzle.author = unquote(xw.get('Author',[]).get('@v') or '')
+        puzzle.copyright = unquote(xw.get('Copyright',[]).get('@v') or '')
+
+        puzzle.width = int(xw.get('Width')['@v'])
+        puzzle.height = int(xw.get('Height')['@v'])
+
+        puzzle.solution = xw.get('AllAnswer',[]).get('@v').replace('-', '.')
+        puzzle.fill = ''.join([c if c == '.' else '-' for c in puzzle.solution])
+
+        xw_clues = sorted(list(xw['across'].values()) + list(xw['down'].values()),
+                          key=lambda c: int(c['@cn']))
+
+        puzzle.clues = [unidecode(unquote(c.get('@c') or '')) for c in xw_clues]
+
+        return puzzle
 
 
 class UniversalDownloader(AMUniversalDownloader):