eclecticiq · orsinium · Oct 8, 2019 · Oct 9, 2019 · Oct 9, 2019 · Oct 9, 2019
diff --git a/README.rst b/README.rst
@@ -31,7 +31,7 @@ To parse a string containing a ``data:`` uri, use ``parse()``:
 
 .. code-block:: python
 
-  >>> parsed = datauri.parse('data:text/plain,A%20brief%20note')
+  >>> parsed = datauri.parse('data:text/plain;charset=UTF-8,A%20brief%20note')
 
 This returns a parse result:
 
@@ -43,6 +43,10 @@ This returns a parse result:
   b'A brief note'
   >>> parsed.uri
   'data:text/plain,A%20brief%20note'
+  >>> parsed.charset
+  'UTF-8'
+  >>> parsed.text
+  'A brief note'
 
 This is a simple container class with a few attributes:
 
@@ -54,6 +58,12 @@ This is a simple container class with a few attributes:
 * The ``data`` attribute is a byte string (``bytes``) with the decoded
   data. URL encoding and base64 is handled transparently.
 
+* The ``charset`` attribute is a charset as is if is specified.
+  ``None`` otherwise.
+
+* The ``text`` is ``data`` decoded by ``charset``. If ``charset``
+  is not specified, ``ascii`` will be used.
+
 * For convenience, the ``uri`` attribute contains the input uri.
 
 Parsed URIs compare equal if their media type and data are the same.
@@ -116,6 +126,10 @@ Please use Github issues to report problems or propose improvements.
 Version history
 ===============
 
+* 1.0.1
+
+  Added ``charset`` and ``text`` properties.
+
 * 1.0.0
 
   Initial release.

diff --git a/datauri/datauri.py b/datauri/datauri.py
@@ -22,6 +22,21 @@ class DataURIError(ValueError):
     pass
 
 
+# https://github.com/bottlepy/bottle/commit/fa7733e075da0d790d809aa3d2f53071897e6f76
+class CachedProperty(object):
+    def __init__(self, func):
+        self.func = func
+
+    def __get__(self, obj, cls):
+        if obj is None:
+            return self
+        value = obj.__dict__[self.func.__name__] = self.func(obj)
+        return value
+
+
+cached_property = CachedProperty
+
+
 class ParsedDataURI:
     """
     Container for parsed data URIs.
@@ -33,6 +48,21 @@ def __init__(self, media_type, data, uri):
         self.data = data
         self.uri = uri
 
+    @cached_property
+    def charset(self):
+        prefix = 'charset='
+        chunks = self.media_type.split(';')
+        for chunk in chunks:
+            if chunk.startswith(prefix):
+                return chunk[len(prefix):]
+        return None
+
+    @cached_property
+    def text(self):
+        if not self.media_type.startswith('text/'):
+            return None
+        return self.data.decode(self.charset or 'ascii')
+
     def __repr__(self):
         raw = self.data
         if len(raw) > 20:

diff --git a/setup.py b/setup.py
@@ -8,7 +8,7 @@
     name='datauri',
     description="implementation of the data uri scheme defined in rfc2397",
     long_description=long_description,
-    version='1.0.0',
+    version='1.0.1',
     author="EclecticIQ",
     author_email="info@eclecticiq.com",
     packages=['datauri'],

diff --git a/tests/test_datauri.py b/tests/test_datauri.py
@@ -68,6 +68,17 @@ def test_discover():
     assert actual == expected
 
 
+@pytest.mark.parametrize('data, charset, text', [
+    ('data:text/plain;charset=UTF-8;base64,0L7Qu9C10LM=', 'UTF-8', 'олег'),
+    ('data:text/plain;base64,YW55IGNhcm5hbCBwbGVhc3Vy', None, 'any carnal pleasur'),
+    ('data:image/png;base64,YW55IGNhcm5hbCBwbGVhc3Vy', None, None),
+])
+def test_text_decoding(data, charset, text):
+    parsed = datauri.parse(data)
+    assert parsed.charset == charset
+    assert parsed.text == text
+
+
 def test_container_equality():
     a = datauri.parse(SAMPLE_URL_ENCODED)
     b = datauri.parse(SAMPLE_URL_ENCODED)

diff --git a/tox.ini b/tox.ini
@@ -6,3 +6,6 @@ deps=-rrequirements-test.txt
 commands=
     pytest --cov {envsitepackagesdir}/datauri {posargs} tests/
     flake8 datauri/
+
+[flake8]
+max-line-length = 90