Skip to content

Commit

Permalink
Implement serialization of decimal logical type (#22)
Browse files Browse the repository at this point in the history
Thanks to @thiagorb for the contribution!
  • Loading branch information
thiagorb authored Apr 18, 2024
1 parent 340f8e2 commit 820c14b
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 6 deletions.
67 changes: 62 additions & 5 deletions lib/avro/datum.php
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ function write_data($writers_schema, $datum, $encoder)
case AvroSchema::STRING_TYPE:
return $encoder->write_string($datum);
case AvroSchema::BYTES_TYPE:
return $encoder->write_bytes($datum);
return $encoder->write_bytes($writers_schema, $datum);
case AvroSchema::ARRAY_SCHEMA:
return $this->write_array($writers_schema, $datum, $encoder);
case AvroSchema::MAP_SCHEMA:
Expand Down Expand Up @@ -386,13 +386,25 @@ public function write_double($datum)
* @param string $str
* @uses self::write_bytes()
*/
function write_string($str) { $this->write_bytes($str); }
function write_string($str) { $this->write_bytes(null, $str); }

/**
* @param AvroSchema|null $writers_schema
* @param string $bytes
* @throws AvroException
*/
function write_bytes($bytes)
function write_bytes($writers_schema, $bytes)
{
if ($writers_schema !== null && $writers_schema->logical_type() === 'decimal') {
$scale = $writers_schema->extra_attributes()['scale'] ?? 0;
$precision = $writers_schema->extra_attributes()['precision'] ?? null;
if ($precision === null) {
throw new AvroException('Decimal precision is required');
}

$bytes = self::decimal_to_bytes($bytes, $scale, $precision);
}

$this->write_long(strlen($bytes));
$this->write($bytes);
}
Expand All @@ -401,6 +413,49 @@ function write_bytes($bytes)
* @param string $datum
*/
function write($datum) { $this->io->write($datum); }

/**
* @throws AvroException
*/
private static function decimal_to_bytes($decimal, int $scale, int $precision): string
{
if (!is_numeric($decimal)) {
throw new AvroException('Decimal must be a numeric value');
}

$value = $decimal * (10 ** $scale);
if (!is_int($value)) {
$value = (int)round($value);
}
if (abs($value) > (10 ** $precision - 1)) {
throw new AvroException('Decimal value is out of range');
}

$packed = pack('J', $value);
$significantBit = self::getMostSignificantBitAt($packed, 0);
$trimByte = $significantBit ? 0xff : 0x00;

$offset = 0;
$packedLength = strlen($packed);
while ($offset < $packedLength - 1) {
if (ord($packed[$offset]) !== $trimByte) {
break;
}

if (self::getMostSignificantBitAt($packed, $offset + 1) !== $significantBit) {
break;
}

$offset++;
}

return substr($packed, $offset);
}

private static function getMostSignificantBitAt($bytes, $offset): int
{
return ord($bytes[$offset]) & 0x80;
}
}

/**
Expand Down Expand Up @@ -925,8 +980,10 @@ static public function long_bits_to_double($bits)
*/
static public function bytes_to_decimal($bytes, $scale = 0)
{
$int = hexdec(bin2hex($bytes));
return $scale > 0 ? ($int / (10 ** $scale)) : $int;
$mostSignificantBit = ord($bytes[0]) & 0x80;
$padded = str_pad($bytes, 8, $mostSignificantBit ? "\xff" : "\x00", STR_PAD_LEFT);
$int = unpack('J', $padded)[1];
return $scale > 0 ? ($int / (10 ** $scale)) : $int;
}

/**
Expand Down
4 changes: 4 additions & 0 deletions lib/avro/schema.php
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,11 @@ public static function is_valid_datum($expected_schema, $datum)
case self::BOOLEAN_TYPE:
return is_bool($datum);
case self::STRING_TYPE:
return is_string($datum);
case self::BYTES_TYPE:
if ($expected_schema->logical_type() === 'decimal') {
return is_numeric($datum);
}
return is_string($datum);
case self::INT_TYPE:
return (is_int($datum)
Expand Down
29 changes: 28 additions & 1 deletion test/DatumIOTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,34 @@ function data_provider()
'B', "\x02"),
array('{"name":"rec","type":"record","fields":[{"name":"a","type":"int"},{"name":"b","type":"boolean"}]}',
array('a' => 1, 'b' => false),
"\x02\x00")
"\x02\x00"),
array('{"type":"bytes","logicalType": "decimal","precision": 4,"scale": 1}',
'1',
"\x02\x0a"),
array('{"type":"bytes","logicalType": "decimal","precision": 4,"scale": 1}',
'-0.1',
"\x02\xff"),
array('{"type":"bytes","logicalType": "decimal","precision": 4,"scale": 1}',
-0.1,
"\x02\xff"),
array('{"type":"bytes","logicalType": "decimal","precision": 4,"scale": 1}',
3.1,
"\x02\x1f"),
array('{"type":"bytes","logicalType": "decimal","precision": 4,"scale": 2}',
2.55,
"\x04\x00\xff"),
array('{"type":"bytes","logicalType": "decimal","precision": 4,"scale": 0}',
-256,
"\x04\xff\x00"),
array('{"type":"bytes","logicalType": "decimal","precision": 4,"scale": 3}',
0.127,
"\x02\x7f"),
array('{"type":"bytes","logicalType": "decimal","precision": 19,"scale": 0}',
PHP_INT_MAX,
"\x10\x7f\xff\xff\xff\xff\xff\xff\xff"),
array('{"type":"bytes","logicalType": "decimal","precision": 19,"scale": 0}',
PHP_INT_MIN,
"\x10\x80\x00\x00\x00\x00\x00\x00\x00")
);
}

Expand Down

0 comments on commit 820c14b

Please sign in to comment.