Skip to content

Commit

Permalink
fix(ofx): Use compliant UNICODE name for UTF-8 encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
Holzhaus committed Jan 12, 2025
1 parent 6ad4256 commit febd0c3
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 8 deletions.
22 changes: 19 additions & 3 deletions src/ofxstatement/ofx.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import codecs
from typing import Optional, Union
from datetime import datetime, date
from decimal import Decimal
Expand All @@ -22,20 +23,35 @@ def __init__(self, statement: Statement) -> None:
self.default_float_precision = 2
self.invest_transactions_float_precision = 5

def toxml(self, pretty: bool = False) -> str:
def toxml(self, pretty: bool = False, encoding: str = "utf-8") -> str:
et = self.buildDocument()
xmlstring = etree.tostring(et.getroot(), "unicode")
if pretty:
dom = minidom.parseString(xmlstring)
xmlstring = dom.toprettyxml(indent=" ", newl="\r\n")
xmlstring = xmlstring.replace('<?xml version="1.0" ?>', "").lstrip()

codec = codecs.lookup(encoding)
if codec.name == "utf-8":
encoding_name = "UNICODE"
charset_name = "NONE"
elif codec.name.startswith("cp"):
encoding_name = "USASCII"
charset_name = codec.name[2:]
else:
# This is non-standard, because according to the OFX spec the
# CHARSET should be the codepage number. We handle this gracefully,
# since the only alternative is throwing an error here.
encoding_name = "USASCII"
charset_name = codec.name.upper()

header = (
"OFXHEADER:100\r\n"
"DATA:OFXSGML\r\n"
"VERSION:102\r\n"
"SECURITY:NONE\r\n"
"ENCODING:UTF-8\r\n"
"CHARSET:NONE\r\n"
f"ENCODING:{encoding_name}\r\n"
f"CHARSET:{charset_name}\r\n"
"COMPRESSION:NONE\r\n"
"OLDFILEUID:NONE\r\n"
"NEWFILEUID:NONE\r\n"
Expand Down
4 changes: 2 additions & 2 deletions src/ofxstatement/tests/test_ofx.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
DATA:OFXSGML
VERSION:102
SECURITY:NONE
ENCODING:UTF-8
ENCODING:UNICODE
CHARSET:NONE
COMPRESSION:NONE
OLDFILEUID:NONE
Expand Down Expand Up @@ -130,7 +130,7 @@ def test_ofxWriter_pretty(self) -> None:
"DATA:OFXSGML",
"VERSION:102",
"SECURITY:NONE",
"ENCODING:UTF-8",
"ENCODING:UNICODE",
"CHARSET:NONE",
"COMPRESSION:NONE",
"OLDFILEUID:NONE",
Expand Down
2 changes: 1 addition & 1 deletion src/ofxstatement/tests/test_ofx_invest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
DATA:OFXSGML
VERSION:102
SECURITY:NONE
ENCODING:UTF-8
ENCODING:UNICODE
CHARSET:NONE
COMPRESSION:NONE
OLDFILEUID:NONE
Expand Down
5 changes: 3 additions & 2 deletions src/ofxstatement/tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,10 @@ def convert(args: argparse.Namespace) -> int:
log.error("Statement validation error: %s" % (e.message))
return 2 # Validation error

with smart_open(args.output, settings.get("encoding", None)) as out:
encoding = settings.get("encoding", "utf-8")
with smart_open(args.output, encoding) as out:
writer = ofx.OfxWriter(statement)
out.write(writer.toxml(pretty=args.pretty))
out.write(writer.toxml(pretty=args.pretty, encoding=encoding))

n_lines = len(statement.lines)
log.info(
Expand Down

0 comments on commit febd0c3

Please sign in to comment.