Skip to content

Commit ebfe884

Browse files
committed
fix(ofx): Use compliant UNICODE name for UTF-8 encoding
1 parent 6ad4256 commit ebfe884

File tree

4 files changed

+25
-8
lines changed

4 files changed

+25
-8
lines changed

src/ofxstatement/ofx.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import codecs
12
from typing import Optional, Union
23
from datetime import datetime, date
34
from decimal import Decimal
@@ -22,20 +23,35 @@ def __init__(self, statement: Statement) -> None:
2223
self.default_float_precision = 2
2324
self.invest_transactions_float_precision = 5
2425

25-
def toxml(self, pretty: bool = False) -> str:
26+
def toxml(self, pretty: bool = False, encoding: str = "utf-8") -> str:
2627
et = self.buildDocument()
2728
xmlstring = etree.tostring(et.getroot(), "unicode")
2829
if pretty:
2930
dom = minidom.parseString(xmlstring)
3031
xmlstring = dom.toprettyxml(indent=" ", newl="\r\n")
3132
xmlstring = xmlstring.replace('<?xml version="1.0" ?>', "").lstrip()
33+
34+
codec = codecs.lookup(encoding)
35+
if codec.name == "utf-8":
36+
encoding_name = "UNICODE"
37+
charset_name = "UTF-8"
38+
elif codec.name.startswith("cp"):
39+
encoding_name = "USASCII"
40+
charset_name = codec.name[2:]
41+
else:
42+
# This is non-standard, because according to the OFX spec the
43+
# CHARSET should be the codepage number. We handle this gracefully,
44+
# since the only alternative is throwing an error here.
45+
encoding_name = "USASCII"
46+
charset_name = codec.name.upper()
47+
3248
header = (
3349
"OFXHEADER:100\r\n"
3450
"DATA:OFXSGML\r\n"
3551
"VERSION:102\r\n"
3652
"SECURITY:NONE\r\n"
37-
"ENCODING:UTF-8\r\n"
38-
"CHARSET:NONE\r\n"
53+
f"ENCODING:{encoding_name}\r\n"
54+
f"CHARSET:{charset_name}\r\n"
3955
"COMPRESSION:NONE\r\n"
4056
"OLDFILEUID:NONE\r\n"
4157
"NEWFILEUID:NONE\r\n"

src/ofxstatement/tests/test_ofx.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
DATA:OFXSGML
1313
VERSION:102
1414
SECURITY:NONE
15-
ENCODING:UTF-8
15+
ENCODING:UNICODE
1616
CHARSET:NONE
1717
COMPRESSION:NONE
1818
OLDFILEUID:NONE
@@ -130,7 +130,7 @@ def test_ofxWriter_pretty(self) -> None:
130130
"DATA:OFXSGML",
131131
"VERSION:102",
132132
"SECURITY:NONE",
133-
"ENCODING:UTF-8",
133+
"ENCODING:UNICODE",
134134
"CHARSET:NONE",
135135
"COMPRESSION:NONE",
136136
"OLDFILEUID:NONE",

src/ofxstatement/tests/test_ofx_invest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
DATA:OFXSGML
1313
VERSION:102
1414
SECURITY:NONE
15-
ENCODING:UTF-8
15+
ENCODING:UNICODE
1616
CHARSET:NONE
1717
COMPRESSION:NONE
1818
OLDFILEUID:NONE

src/ofxstatement/tool.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,9 +201,10 @@ def convert(args: argparse.Namespace) -> int:
201201
log.error("Statement validation error: %s" % (e.message))
202202
return 2 # Validation error
203203

204-
with smart_open(args.output, settings.get("encoding", None)) as out:
204+
encoding = settings.get("encoding", "utf-8")
205+
with smart_open(args.output, encoding) as out:
205206
writer = ofx.OfxWriter(statement)
206-
out.write(writer.toxml(pretty=args.pretty))
207+
out.write(writer.toxml(pretty=args.pretty, encoding=encoding))
207208

208209
n_lines = len(statement.lines)
209210
log.info(

0 commit comments

Comments
 (0)