From c3d6c57a88d91c239f706d4f2e3925439cb63075 Mon Sep 17 00:00:00 2001 From: Alex Rasmussen Date: Sun, 2 Dec 2018 18:07:31 -0800 Subject: [PATCH 1/2] Fix string decoding. Currently, strings will encode as utf-8 by default, but decode to bytes. This becomes particularly problematic when exporting to JSON, but it's generally a bit inconsistent. This diff should fix that. Since this changes how strings decode, this will be sufficient to bump bread to v3. --- bread/string.py | 6 ++-- docs/source/spec_language.rst | 2 +- test.py | 55 ++++++++++++++++++++--------------- 3 files changed, 35 insertions(+), 28 deletions(-) diff --git a/bread/string.py b/bread/string.py index d2b1caf..8b91cb2 100644 --- a/bread/string.py +++ b/bread/string.py @@ -2,18 +2,18 @@ from .field import BreadField -def string(length): +def string(length, encoding='utf-8'): def make_string_field(parent, **field_options): length_in_bits = length * 8 def encode_string(value): if type(value) != bytes: - value = value.encode('utf-8') + value = value.encode(encoding) return BitArray(bytes=value) def decode_string(encoded): - return encoded.bytes + return encoded.bytes.decode(encoding) return BreadField(length_in_bits, encode_string, decode_string, str_format=field_options.get('str_format', None)) diff --git a/docs/source/spec_language.rst b/docs/source/spec_language.rst index 141f1a3..7075ee0 100644 --- a/docs/source/spec_language.rst +++ b/docs/source/spec_language.rst @@ -47,7 +47,7 @@ For convenience and improved readability, the following shorthands are defined: Strings ~~~~~~~ -``string(length)`` - the next ``length`` bytes represent a string of the given length +``string(length, encoding)`` - the next ``length`` bytes represent a string of the given length. You can pick an encoding for the strings to encode and decode in; the default is ``utf-8``. Booleans ~~~~~~~~ diff --git a/test.py b/test.py index d46ff77..2a4d6c9 100755 --- a/test.py +++ b/test.py @@ -1,11 +1,14 @@ #!/usr/bin/env python -import pytest - -import struct, sys, pprint, unittest, itertools, tempfile, os, json -import bread as b +import itertools +import json +import os +import struct +import tempfile import bitstring +import bread as b +import pytest # Shared structs for bread struct test @@ -94,10 +97,10 @@ def test_simple_struct(): assert len(test) == 168 - assert test.flag_one == True - assert test.flag_two == False - assert test.flag_three == True - assert test.flag_four == False + assert test.flag_one + assert not test.flag_two + assert test.flag_three + assert not test.flag_four assert test.first == 0xfb assert test.blah == 0xdddd @@ -169,10 +172,10 @@ def test_updates_do_not_leak(): assert len(test2) == 168 - assert test2.flag_one == False - assert test2.flag_two == False - assert test2.flag_three == False - assert test2.flag_four == True + assert not test2.flag_one + assert not test2.flag_two + assert not test2.flag_three + assert test2.flag_four assert test2.first == 0xde assert test2.blah == 0xfafe @@ -182,10 +185,10 @@ def test_updates_do_not_leak(): # Updating test2 shouldn't impact test - assert test.flag_one == True - assert test.flag_two == False - assert test.flag_three == True - assert test.flag_four == False + assert test.flag_one + assert not test.flag_two + assert test.flag_three + assert not test.flag_four assert test.first == 0xfb assert test.blah == 0xdddd @@ -269,7 +272,7 @@ def test_nested_struct(): assert supernested_test.__offsets__.dummy == current_byte * 8 current_byte += 1 - assert supernested_test.dummy.ok == False + assert not supernested_test.dummy.ok assert b.write(supernested_test, deeply_nested_struct) == bytearray(list(range(34)) + [0b0]) @@ -343,7 +346,7 @@ def test_conditional(): true_test = b.parse(true_data, conditional_test) assert true_test._length == 13 - assert true_test.qux == True + assert true_test.qux assert hasattr(true_test, "frooz") assert not hasattr(true_test, "fooz") assert true_test.frooz == 0b1001 @@ -361,7 +364,7 @@ def test_conditional(): false_test = b.parse(false_data, conditional_test) assert false_test._length == 17 - assert false_test.qux == False + assert not false_test.qux assert hasattr(false_test, "fooz") assert not hasattr(false_test, "frooz") assert false_test.fooz == 0b10010001 @@ -500,6 +503,7 @@ def test_array_of_conditionals(): assert test_parsed.foos[2].baz == 0b11 assert test_parsed._length == 32 + def test_modifying_conditional_with_structs_that_have_different_lengths(): true_data = bitstring.BitArray(bytearray([0b11001010, 0b11101000])) true_data.append('0b0') @@ -510,6 +514,7 @@ def test_modifying_conditional_with_structs_that_have_different_lengths(): assert true_test._length == 17 assert true_test.fooz == 0b10010101 + def test_field_properties_in_array(): array_endian_test = [ ("little_arr", b.array(3, b.uint16), {"endianness": b.LITTLE_ENDIAN}), @@ -600,7 +605,7 @@ def test_parse_str(): test_parsed = b.parse(test_str, test_struct) - assert test_parsed.str.decode('utf-8') == "gabbagabbahey" + assert test_parsed.str == "gabbagabbahey" def test_str(): @@ -608,10 +613,12 @@ def test_str(): data = bytearray([0x68, 0x65, 0x6c, 0x6c, 0x6f]) result = b.parse(data, str_test) - assert result.msg.decode('utf-8') == "hello" + assert result.msg == "hello" assert b.write(result, str_test) == data + assert result.as_json() == json.dumps({'msg': 'hello'}) + def test_str_unicode(): str_test = [("msg", b.string(5))] @@ -619,7 +626,7 @@ def test_str_unicode(): data = bytearray([104, 101, 108, 108, 111]) result = b.parse(data, str_test) - assert result.msg.decode('utf-8') == "hello" + assert result.msg == "hello" assert b.write(result, str_test) == data result.msg = "abate" @@ -628,7 +635,7 @@ def test_str_unicode(): edited_result = b.parse(output_data, str_test) - assert result.msg == "abate" + assert edited_result.msg == "abate" def test_enum(): @@ -1068,7 +1075,7 @@ def test_new(): assert len(empty_struct) == 8 * 5 + 4 - assert empty_struct.greeting == b'\x00\x00\x00\x00\x00' + assert empty_struct.greeting == b'\x00\x00\x00\x00\x00'.decode('utf-8') assert empty_struct.age == 0 empty_struct.greeting = 'hello' From 7d028256d69771f0a1d9ecad8b5cffb61601e156 Mon Sep 17 00:00:00 2001 From: Alex Rasmussen Date: Sun, 2 Dec 2018 20:07:04 -0800 Subject: [PATCH 2/2] Bumping version to v3.0.0 --- bread/__init__.py | 2 +- docs/source/conf.py | 4 ++-- setup.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bread/__init__.py b/bread/__init__.py index 6d834e7..d544480 100644 --- a/bread/__init__.py +++ b/bread/__init__.py @@ -11,7 +11,7 @@ from .lifecycle import * __title__ = 'bread' -__version__ = '2.3.0' +__version__ = '3.0.0' __author__ = 'Alex Rasmussen' __license__ = 'MIT' __copyright__ = 'Copyright 2015 Alex Rasmussen' diff --git a/docs/source/conf.py b/docs/source/conf.py index 5a52566..b647448 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -48,9 +48,9 @@ # built documents. # # The short X.Y version. -version = '2.3.0' +version = '3.0.0' # The full version, including alpha/beta/rc tags. -release = '2.3.0' +release = '3.0.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/setup.py b/setup.py index 796bbf8..02fc720 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup setup(name='bread', - version='2.3.0', + version='3.0.0', description='Binary format parsing made easier', url='https://github.com/alexras/bread', author='Alex Rasmussen',