From 30fe1296807073d507922dffe982d9debcb6371d Mon Sep 17 00:00:00 2001 From: "Jonatan \"jaw\" Wallmander" Date: Thu, 12 Oct 2023 17:11:58 +0200 Subject: [PATCH] Ensure HTML is read as UTF-8 This has been tested on Windows where Python could detect HTML files as a windows codepage rather than UTF-8. As it seems Sphinx generates HTML in UTF-8, it should be safe to enforce it here. Signed-off-by: Jonatan "jaw" Wallmander --- NOTICE.md | 2 ++ doxysphinx/html_parser.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/NOTICE.md b/NOTICE.md index 8c8cdc4..5013ba3 100644 --- a/NOTICE.md +++ b/NOTICE.md @@ -42,3 +42,5 @@ Please keep the list sorted. * Stefan Schulz - * Stream HPC B.V. * Gergely Meszaros +* Klarälvdalens Datakonsult AB (KDAB) + * Jonatan Wallmander diff --git a/doxysphinx/html_parser.py b/doxysphinx/html_parser.py index 697f171..deaab02 100644 --- a/doxysphinx/html_parser.py +++ b/doxysphinx/html_parser.py @@ -484,7 +484,7 @@ def parse(self, file: Path) -> HtmlParseResult: :return: The result of the parsing :rtype: ParseResult """ - buffer = file.read_text() + buffer = file.read_text(encoding="utf-8") tree = etree.document_fromstring(buffer).getroottree() meta_title, project, title = self._read_project_and_title(buffer, file)