Skip to content

Commit

Permalink
Merge pull request #1 from elecena/initial-version
Browse files Browse the repository at this point in the history
Initial version
  • Loading branch information
macbre authored Nov 27, 2023
2 parents c35f3c3 + fc29256 commit 2560900
Show file tree
Hide file tree
Showing 5 changed files with 266 additions and 1 deletion.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
.idea/
/.idea/
/vendor/
/.php-cs-fixer.cache
/.phpunit.cache/
/.phpunit.result.cache
27 changes: 27 additions & 0 deletions phpunit.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?xml version="1.0" encoding="UTF-8"?>
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/10.3/phpunit.xsd"
bootstrap="vendor/autoload.php"
cacheDirectory=".phpunit.cache"
executionOrder="depends,defects"
beStrictAboutCoverageMetadata="true"
beStrictAboutOutputDuringTests="true"
failOnRisky="true"
failOnWarning="true"
displayDetailsOnTestsThatTriggerDeprecations="true"
displayDetailsOnTestsThatTriggerErrors="true"
displayDetailsOnTestsThatTriggerNotices="true"
displayDetailsOnTestsThatTriggerWarnings="true"
>
<testsuites>
<testsuite name="default">
<directory>tests</directory>
</testsuite>
</testsuites>

<source>
<include>
<directory>src</directory>
</include>
</source>
</phpunit>
106 changes: 106 additions & 0 deletions src/JsonlParser.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
<?php

namespace Elecena\JsonlParser;

class JsonlParser implements \Countable
{
const LINES_SEPARATOR = "\n";

/**
* @param resource $stream
*/
public function __construct(protected $stream)
{
}

public function push(array|string $item): void
{
$encoded = json_encode($item);
fwrite($this->stream, $encoded . self::LINES_SEPARATOR);
}

public function pushItems(\Iterator $items): void
{
foreach($items as $item) {
$this->push($item);
}
}

/**
* This method returns the last item from the file and removes it.
*/
public function pop(): null|array|string
{
/***
* Rewind to the end of the file and try to find the last newline
*
* @see https://www.php.net/manual/en/function.fseek.php
*/
fseek($this->stream, 0, SEEK_END);

// this stream is now empty
if (ftell($this->stream) === 0) {
return null;
}

// start reading from the end of the stream in reverse order, byte by byte
fseek($this->stream, -1, SEEK_END);
$buffer = fread($this->stream, 1);

while(ftell($this->stream) > 1) {
// move two bytes back (one already read and the one before it)
fseek($this->stream, -2, SEEK_CUR);

$char = fread($this->stream, 1);
$buffer .= $char;

if ($char === self::LINES_SEPARATOR) {
break;
}

if (ftell($this->stream) === 0) {
break;
}
}

$buffer = strrev($buffer);

// truncate the stream and remove the trailing newline
$pos = ftell($this->stream);
ftruncate($this->stream, $pos < 1 ? 0 : $pos-1);

return json_decode($buffer, associative: true);
}

/**
* @return \Generator<array,string>
*/
public function iterate(): \Generator
{
while(!is_null($item=$this->pop())) {
yield $item;
}
}

/**
* This method returns how many JSON-encoded lines are in the stream.
*
* This can be heavy on large files this method rewinds and then reads the entire stream content.
*
* @return int
*/
public function count(): int
{
$count = 0;
rewind($this->stream);

/**
* https://www.php.net/manual/en/function.stream-get-line.php
*/
while(stream_get_line($this->stream, 1024 * 1024, self::LINES_SEPARATOR) !== false) {
$count++;
}

return $count;
}
}
20 changes: 20 additions & 0 deletions tests/BaseTestCase.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?php

use PHPUnit\Framework\TestCase;

abstract class BaseTestCase extends TestCase
{
/**
* Returns a readable, in-memory stream with the provided string as a content
*
* @return resource
*/
protected static function streamFromString(string $string)
{
$stream = fopen('php://memory', 'r+');
fwrite($stream, $string);
rewind($stream);

return $stream;
}
}
109 changes: 109 additions & 0 deletions tests/JsonParserTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
<?php

use \Elecena\JsonlParser\JsonlParser;

class JsonParserTest extends BaseTestCase
{
const ITEM = ['foo' => 'bar', 'ok' => true];
const ITEM_ONE = ['foo' => 'bar', 'ok' => true];
const ITEM_TWO = ['foo' => 'test', 'ok' => true];

public function testOpensAnEmptyString(): void
{
$stream = self::streamFromString('');
$parser = new JsonlParser($stream);
$this->assertNull($parser->pop());
$this->assertCount(0, $parser);
}

public function testOpensASingleLine(): void
{
$stream = self::streamFromString(json_encode(self::ITEM));
$parser = new JsonlParser($stream);
$this->assertCount(1, $parser);
$this->assertSame(self::ITEM, $parser->pop());
$this->assertCount(0, $parser);
}
public function testOpensASingleLineWithTrailingNewLine(): void
{
$stream = self::streamFromString(json_encode(self::ITEM) . JsonlParser::LINES_SEPARATOR);
$parser = new JsonlParser($stream);
$this->assertCount(1, $parser);
$this->assertSame(self::ITEM, $parser->pop());
$this->assertCount(0, $parser);
}
public function testOpensTwoLines(): void
{
$stream = self::streamFromString(json_encode(self::ITEM_ONE) . JsonlParser::LINES_SEPARATOR . json_encode(self::ITEM_TWO));
$parser = new JsonlParser($stream);
$this->assertCount(2, $parser);
$this->assertSame(self::ITEM_TWO, $parser->pop());
$this->assertCount(1, $parser);
$this->assertSame(self::ITEM_ONE, $parser->pop());
$this->assertCount(0, $parser);
}

public function testOpensAnEmptyStringAndAddsAnItem(): void
{
$stream = self::streamFromString('');
$parser = new JsonlParser($stream);
$this->assertCount(0, $parser);
$parser->push(self::ITEM);
$this->assertCount(1, $parser);
$this->assertSame(self::ITEM, $parser->pop());
$this->assertCount(0, $parser);
}

public function testHandlesStrings(): void
{
$item = 'https://foo.bar.net';

$stream = self::streamFromString('');
$parser = new JsonlParser($stream);
$this->assertCount(0, $parser);
$parser->push($item);
$this->assertCount(1, $parser);
$this->assertSame($item, $parser->pop());
$this->assertCount(0, $parser);
$this->assertNull($parser->pop());
}

public function testIterator(): void
{
$stream = self::streamFromString('');
$parser = new JsonlParser($stream);

$parser->push('one');
$parser->push('two');
$parser->push('three');
$this->assertCount(3, $parser);

$list = iterator_to_array($parser->iterate());

$this->assertCount(0, $parser);
$this->assertCount(3, $list);
$this->assertSame(['three', 'two', 'one'], $list);
}

public function testPushItems(): void
{
$stream = self::streamFromString('');
$parser = new JsonlParser($stream);

function iterator(): Generator
{
yield 'one';
yield 'two';
yield 'three';
}

$parser->pushItems(items:iterator());
$this->assertCount(3, $parser);

$list = iterator_to_array($parser->iterate());

$this->assertCount(0, $parser);
$this->assertCount(3, $list);
$this->assertSame(['three', 'two', 'one'], $list);
}
}

0 comments on commit 2560900

Please sign in to comment.