Skip to content

Commit 0aedcc3

Browse files
author
Tero Karvinen
committedJun 15, 2024
Add --maff-html-file-name to allow accessing embedded HTML inside MAFF
1 parent 5a4ce41 commit 0aedcc3

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed
 

‎hoto.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ def parseArgs():
127127
#parser.add_argument("-n", "--count", type=int, default=0, help="Number of records to process. Use zero (0) for unlimited.")
128128
parser.add_argument("--suggest", "-s", default=False, action=argparse.BooleanOptionalAction, help='''Suggest tags and metadata for files, showing both selectors "{sel.h1}" and matches "Tero's homepage".''')
129129
parser.add_argument("--rename", default=False, action=argparse.BooleanOptionalAction, help='''Rename files to output format.''')
130+
parser.add_argument("--maff-html-file-name", "-m", default="index.html", help='''HTML file to analyze inside MAFF archive. If one HTML file embeds another, the main is often included with an alternate name, such as "index_1.html".''')
130131
parser.add_argument("--no-action", "-n", default=False, action=argparse.BooleanOptionalAction, help='''Does not actually modify any files, but shows what would happen.''')
131132
args = parser.parse_args()
132133

@@ -215,7 +216,7 @@ def __init__(self, rdfStr):
215216
__setattr__ = dict.__setitem__
216217
__delattr__ = dict.__delitem__
217218

218-
def readPath(path):
219+
def readPath(path, args):
219220
"Read pathlib.Path path to string, optionally extracting files from inside MAFF zip"
220221
info(f'Reading "{path}"...')
221222
# verify arguments
@@ -239,8 +240,8 @@ def readPath(path):
239240
with zf.open(zippedFile, "r") as f:
240241
b = f.read()
241242
rdfStr = b.decode("utf-8")
242-
if zippedFile.endswith("/index.html"):
243-
debug(f'''matched index.html: "{zippedFile}"''')
243+
if zippedFile.endswith("/"+args.maff_html_file_name):
244+
debug(f'''matched {args.maff_html_file_name}: "{zippedFile}"''')
244245
with zf.open(zippedFile, "r") as f:
245246
b = f.read()
246247
htmlStr = b.decode("utf-8")
@@ -263,7 +264,7 @@ def filenameClean(s, keepext=None):
263264

264265
def processFile(path, args):
265266
info(f'## Processing file "{path}"')
266-
htmlStr, rdfStr = readPath(path)
267+
htmlStr, rdfStr = readPath(path, args)
267268

268269
info(f'### Extracting Tags and Metadata from "{path}"')
269270
sel = Selector(htmlStr)

0 commit comments

Comments
 (0)