Skip to content

Commit

Permalink
add url spoofing and support 4399
Browse files Browse the repository at this point in the history
  • Loading branch information
mathgeniuszach committed Sep 3, 2024
1 parent c489695 commit 488a2a1
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 10 deletions.
32 changes: 25 additions & 7 deletions fpcurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def toggle_console():
<li><b>Keep URLVars</b> - When checked, the downloader will append url vars present on links being downloaded to the end of the html file. This is only necessary when you have two links to the same webpage that generate different html due to the url vars.</li>
<li><b>Clear Done URLs</b> - When checked, the downloader will clear any urls in the list when they are downloaded. Errored urls will remain in the list.</li>
<li><b>Notify When Done</b> - When checked, the downloader will show a message box when it is done downloading.</li>
<li><b>Spoof Referrer</b> - When checked, the downloader will spoof the referrer of the urls to be the url itself.</li>
</ul>
Here are some basic usage steps:
<ol>
Expand Down Expand Up @@ -225,8 +226,8 @@ def toggle_console():
# This uuid uniquely defines fpcurator. (there is a 0 on the end after the text)
UUID = '51be8a01-3307-4103-8913-c2f70e64d83'

TITLE = "fpcurator v1.7.0"
ABOUT = "Created by Zach K - v1.7.0"
TITLE = "fpcurator v1.7.1"
ABOUT = "Created by Zach K - v1.7.1"
VER = 7

SITES_FOLDER = "sites"
Expand Down Expand Up @@ -454,6 +455,7 @@ def save(self):
downloader["keep_vars"] = self.downloader.keep_vars.get()
downloader["clear"] = self.downloader.clear.get()
downloader["show_done"] = self.downloader.show_done.get()
downloader["spoof"] = self.downloader.spoof.get()

downloader["urls"] = self.downloader.stxt.txt.get("0.0", "end").strip()

Expand Down Expand Up @@ -519,6 +521,7 @@ def load(self):
self.downloader.keep_vars.set(downloader["keep_vars"])
self.downloader.clear.set(downloader["clear"])
self.downloader.show_done.set(downloader["show_done"])
self.downloader.spoof.set(downloader["spoof"])

txt = self.downloader.stxt.txt
txt.delete("0.0", "end")
Expand Down Expand Up @@ -826,6 +829,8 @@ def __init__(self, parent):
self.original.set(True)
self.replace_https = tk.BooleanVar()
self.replace_https.set(True)
self.spoof = tk.BooleanVar()
self.spoof.set(True)

original = tk.Checkbutton(cframe, bg="white", text='Rename "web.archive.org"', var=self.original) # pyright: ignore [reportCallIssue] # tkinter does have "var"
original.pack(side="left")
Expand All @@ -835,17 +840,25 @@ def __init__(self, parent):
clear.pack(side="left")
show_done = tk.Checkbutton(cframe, bg="white", text='Notify When Done', var=self.show_done) # pyright: ignore [reportCallIssue] # tkinter does have "var"
show_done.pack(side="left", padx=5)
spoof = tk.Checkbutton(cframe, bg="white", text='Spoof Referrer', var=self.spoof) # pyright: ignore [reportCallIssue] # tkinter does have "var"
spoof.pack(side="left")

Tooltip(original, text="When checked, the downloader will put all urls downloaded from the web archive back into their original domains.")
Tooltip(keep_vars, text="When checked, the downloader will append url vars present on links being downloaded to the end of the html file. This is only necessary when you have two links to the same webpage that generate different html due to the url vars.")
Tooltip(clear, text="When checked, the downloader will clear any urls in the list when they are downloaded. Errored urls will remain in the list.")
Tooltip(show_done, text="When checked, the downloader will show a message box when it is done downloading.")
Tooltip(spoof, text="When checked, the downloader will spoof the referrer of the urls to be the url itself.")

# Create panel for inputting urls to download
lbl = tk.Label(self, bg="white", text=" Put URLs to download in this box:")
# Panels
lbl = tk.Label(self, bg="white", text="Put URLs to download at the top and headers at the bottom.")
lbl.pack(fill="x")
self.stxt = ScrolledText(self, width=10, height=10, wrap="none")
self.stxt.pack(expand=True, fill="both", padx=5, pady=5)
txts = tk.Frame(self, bg="white")
txts.pack(expand=True, fill="both", padx=5, pady=(0, 5))

self.stxt = ScrolledText(txts, width=10, height=10, wrap="none")
self.stxt.pack(side="top", expand=True, fill="both")
self.stxt_headers = ScrolledText(txts, width=10, height=10, wrap="none")
self.stxt_headers.pack(side="top", expand=False, fill="both")

def folder(self):
# For changing the output directory
Expand All @@ -856,10 +869,15 @@ def folder(self):

def i_download(self):
txt = self.stxt.txt
htxt = self.stxt_headers.txt
try:
headers = {}
for key, value in [i.strip().split("=", 1) for i in htxt.get("0.0", "end").replace("\r\n", "\n").replace("\r", "\n").split("\n") if i.strip()]:
headers[key.strip()] = value.strip()

links = [i.strip() for i in txt.get("0.0", "end").replace("\r\n", "\n").replace("\r", "\n").split("\n") if i.strip()]
if links:
errs = fpclib.download_all(links, self.output.get() or "output", not self.original.get(), self.keep_vars.get(), True)
errs = fpclib.download_all(links, self.output.get() or "output", not self.original.get(), self.keep_vars.get(), True, spoof=self.spoof.get(), headers=headers)
if self.show_done.get():
if errs:
if len(errs) == len(links):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "fpcurator"
version = "1.7.0"
version = "1.7.1"
description = "fpcurator is a Python and fpclib powered tool for downloading urls, auto-generating curations, bulk searching for already curated games, and listing tags/platforms/games/animations for Flashpoint."
authors = ["mathgeniuszach <huntingmanzach@gmail.com>"]
readme = "README.md"
Expand Down
147 changes: 147 additions & 0 deletions sites/c4399.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import fpclib
import requests
import re
import bs4

regex = '4399.com'
ver = 7

GAME_URL = re.compile(r"_\d+.htm$")
IS_HTML5 = re.compile(r"var\s+isHTML5\s*=\s*(\d+)", re.IGNORECASE)
SCREENSHOT = re.compile(r'var\s+\w+GamePic\s*=\s*"(.*?)"', re.IGNORECASE)
GAMEPATH = re.compile(r'var\s+\w+GamePath\s*=\s*"(.*?)"', re.IGNORECASE)
DIMS = re.compile(r'var\s+_w\s*=\s*(\d+);?\s*var\s+_h\s*=\s*(\d+)')

HTML_EMBED = """<body>
<style>
body { background-color: #16202c; height: 100%%; margin: 0; }
iframe { position: absolute; top: 0; bottom: 0; left: 0; right: 0; margin: auto; }
</style>
<iframe width="%s" height="%s" src="%s"></iframe>
</body>
"""
FLASH_EMBED = """<body>
<style>
body { background-color: #16202c; height: 100%%; margin: 0; }
object { position: absolute; top: 0; bottom: 0; left: 0; right: 0; margin: auto; }
</style>
<object type="application/x-shockwave-flash" width="%s" height="%s" data="%s">
<param name="allowscriptaccess" value="always">
<param name="allowfullscreen" value="true">
<param name="allowfullscreeninteractive" value="true">
<param name="allownetworking" value="all">
<param name="wmode" value="direct">
</object>
</body>
"""

class c4399(fpclib.Curation):
def soupify(self):
# Correct URL if not on the actual game page
with requests.get(self.src) as resp:
soup = bs4.BeautifulSoup(resp.content, "html.parser")
if not GAME_URL.search(self.src):
self.src = "https://wwww.4399.com" + soup.select_one(".play > a")["href"]
with requests.get(self.src) as resp:
soup = bs4.BeautifulSoup(resp.content, "html.parser")
return soup

def parse(self, soup):
# Basic metadata
self.title = soup.select_one(".game-des > .name > a").text.strip()
self.date = soup.select_one(".game-des > .sorts.cf > em:last-of-type").text.strip()[3:]
self.lang = 'zh'
self.pub = "4399"

# Description transformation
box = soup.select_one("#playmethod > .box-l")
has_ptex = bool(box.select_one("#p-tex"))
desc = []
for tag in box.children:
# Skip random strings
if isinstance(tag, bs4.element.NavigableString): continue
# Grab header elements as is
if tag.name == "b": desc.append(tag.text.strip() + "\n")
# Grab content elements as is
if "content" in tag.get("class"): desc.append(tag.text.strip() + "\n" + "\n")

# Transform control information (but only if a direct description is not provided)
if tag.get("id") == "GameKey" and not has_ptex:
for ul in tag.children:
# Skip random strings
if isinstance(ul, bs4.element.NavigableString): continue
# Loop over each list
for li in ul.children:
# Skip random strings
if isinstance(ul, bs4.element.NavigableString): continue

for elem in li.children:
# Add text as is
if isinstance(elem, bs4.element.NavigableString):
desc.append(str(elem)+" ")
continue

# Only Span elements have their class-name translated to text
if elem.name != "span": continue

cs = elem.get("class")[0]
if not cs: continue
if cs.startswith("player"):
desc.append("玩家" + cs[6:] + " ")
elif cs == "ico_c_arrows":
desc.append("Arrow Keys ")
elif cs == "ico_c_wasd":
desc.append("WASD ")
elif cs.startswith("ico_c_"):
desc.append(cs[6:].title() + " ")
elif elem.text:
desc.append(elem.text + " ")

# After a list ends add a newline for the next list.
desc.append("\n")

self.desc = ''.join(desc)

headtxt = str(soup.head)

# Screenshot
try:
self.ss = "https:" + SCREENSHOT.search(headtxt)[1].strip()
except:
fpclib.debug("Screenshot not found", 1, pre="[WARN] ")

# Platform detection (Flash, Unity, and HTML5)
try:
is_html = bool(int(IS_HTML5.search(headtxt)[1]))
except:
is_html = False
self.embed = fpclib.normalize(self.src, False)
self.cdn = "http://sda.4399.com/4399swf" + GAMEPATH.search(headtxt)[1]

dims = DIMS.search(headtxt)
self.dims = (dims[1], dims[2])

if is_html:
self.platform = "HTML5"
self.app = fpclib.FPNAVIGATOR
self.cmd = self.embed
elif self.cdn.endswith(".swf"):
self.platform = "Flash"
self.app = fpclib.FLASH
self.cmd = self.cdn
self.add_app("Embedded Page", self.embed, fpclib.FPNAVIGATOR)
else:
self.platform = "Unity"
self.app = fpclib.UNITY
self.cmd = self.embed

def get_files(self):
# Create embed file
if self.platform == "Flash":
html = FLASH_EMBED % (self.dims[0], self.dims[1], self.cdn)
else:
html = HTML_EMBED % (self.dims[0], self.dims[1], self.cdn)
fpclib.write(self.embed[self.embed.index("://")+3:], html)

# Download the game's true embedded file
fpclib.download_all((self.cdn,), spoof=True)
5 changes: 3 additions & 2 deletions sites/defs.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
1725250543.8975272
1725346764.5213957
AddictingGames.py
Construct.py
CoolmathGames.py
Expand All @@ -15,4 +15,5 @@ Miniclip.py
Newgrounds.py
Therese.py
Unknown.py
Y8.py
Y8.py
c4399.py

0 comments on commit 488a2a1

Please sign in to comment.