Skip to content

Commit

Permalink
AmuseLabs: add new key-finding technique (#140)
Browse files Browse the repository at this point in the history
  • Loading branch information
thisisparker authored Oct 27, 2023
1 parent a346e85 commit 885a502
Showing 1 changed file with 19 additions and 1 deletion.
20 changes: 19 additions & 1 deletion xword_dl/downloader/amuselabsdownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def fetch_data(self, solver_url):

## In some cases we need to pull the underlying JavaScript ##
# Find the JavaScript URL
amuseKey = None
m1 = re.search(r'"([^"]+c-min.js[^"]+)"', res.text)
js_url_fragment = m1.groups()[0]
js_url = urllib.parse.urljoin(solver_url, js_url_fragment)
Expand All @@ -98,6 +99,20 @@ def fetch_data(self, solver_url):
amuseKey = [int(x) for x in
re.findall(r'=\[\]\).push\(([0-9]{1,2})\)', res2.text)]

# But now that might not be the right key, and there's another one
# that we need to try!
# (current as of 10/26/2023)
key_2_order_regex = r'i=(\d+);i<t.length;i\+='
key_2_digit_regex = r't.length\?(\d+)'

key_digits = [int(x) for x in
re.findall(key_2_digit_regex, res2.text)]
key_orders = [int(x) for x in
re.findall(key_2_order_regex, res2.text)]

amuseKey2 = [x for x, _ in sorted(zip(key_digits, key_orders), key=lambda pair: pair[1])]


# helper function to decode rawc
# as occasionally it can be obfuscated
def load_rawc(rawc, amuseKey=None):
Expand Down Expand Up @@ -153,7 +168,10 @@ def amuse_b64(e, amuseKey=None):
amuse_b64(rawc, amuseKey)
).decode("utf-8"))

xword_data = load_rawc(rawc, amuseKey=amuseKey)
try:
xword_data = load_rawc(rawc, amuseKey=amuseKey)
except UnicodeDecodeError:
xword_data = load_rawc(rawc, amuseKey=amuseKey2)

return xword_data

Expand Down

0 comments on commit 885a502

Please sign in to comment.