From 35b2e5ff8eb910dd49410c8e6d1e8f4e4238acad Mon Sep 17 00:00:00 2001 From: Mikhail Tavarez Date: Sat, 21 Sep 2024 10:19:26 -0500 Subject: [PATCH] update changlog --- CHANGELOG.md | 4 ++++ benchmarks/string_builder.mojo | 39 ++++++++++++++++++---------------- mojoproject.toml | 2 +- src/gojo/bytes/buffer.mojo | 14 ++++++------ src/gojo/strings/builder.mojo | 15 +++++++------ src/recipe.yaml | 2 +- 6 files changed, 43 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b0cbd0..fb47a20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] - yyyy-mm-dd +## [0.1.10] - 2024-09-21 + +- Add `consume()` to `StringBuilder` and `Buffer` to take ownership of the internal buffer instead of copying it. + ## [0.1.9] - 2024-09-13 - Fix usage of abort instead of panic. diff --git a/benchmarks/string_builder.mojo b/benchmarks/string_builder.mojo index 423db1f..d98fa05 100644 --- a/benchmarks/string_builder.mojo +++ b/benchmarks/string_builder.mojo @@ -1,6 +1,7 @@ import benchmark +import pathlib +import time from gojo.strings import StringBuilder -from gojo.bytes.buffer import Buffer alias SAMPLE_TEXT = """Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.""" @@ -23,14 +24,25 @@ fn benchmark_string_builder[batches: Int](): _ = str(new_builder) -fn benchmark_bytes_buffer[batches: Int](): - var buffer = Buffer(capacity=batches * len(SAMPLE_TEXT)) - for _ in range(batches): - _ = buffer.write_string(SAMPLE_TEXT) - _ = str(buffer) +fn benchmark_consume_and_str() raises: + var builder = StringBuilder() + var path = str(pathlib._dir_of_current_file()) + "/data/test_big_file.txt" + with open(path, "r") as file: + var data = file.read() + for _ in range(10): + _ = builder.write_string(data) + + var start = time.perf_counter_ns() + var result = str(builder) + print("Stringify buffer: ", time.perf_counter_ns() - start) + start = time.perf_counter_ns() + result = builder.consume() + print("Consume buffer: ", time.perf_counter_ns() - start) + _ = result -fn main(): + +fn main() raises: # There's a performance penalty for benchmark concat bc it also includes # the building of the list of strings it concatenates. Trying to build it at comptime takes a loooong time. print("Running benchmark_concat - 100 batches") @@ -41,10 +53,6 @@ fn main(): report = benchmark.run[benchmark_string_builder[100]](max_iters=20) report.print(benchmark.Unit.ms) - print("Running benchmark_bytes_buffer - 100 batches") - report = benchmark.run[benchmark_bytes_buffer[100]](max_iters=20) - report.print(benchmark.Unit.ms) - print("Running benchmark_concat - 1000 batches") report = benchmark.run[benchmark_concat[1000]](max_iters=20) report.print(benchmark.Unit.ms) @@ -53,10 +61,6 @@ fn main(): report = benchmark.run[benchmark_string_builder[1000]](max_iters=20) report.print(benchmark.Unit.ms) - print("Running benchmark_bytes_buffer - 1000 batches") - report = benchmark.run[benchmark_bytes_buffer[1000]](max_iters=20) - report.print(benchmark.Unit.ms) - print("Running benchmark_concat - 10000 batches") report = benchmark.run[benchmark_concat[10000]](max_iters=2) report.print(benchmark.Unit.ms) @@ -65,6 +69,5 @@ fn main(): report = benchmark.run[benchmark_string_builder[10000]](max_iters=20) report.print(benchmark.Unit.ms) - print("Running benchmark_bytes_buffer - 10000 batches") - report = benchmark.run[benchmark_bytes_buffer[10000]](max_iters=20) - report.print(benchmark.Unit.ms) + print("Running benchmark_consume_and_str") + benchmark_consume_and_str() diff --git a/mojoproject.toml b/mojoproject.toml index 548279d..e78216d 100644 --- a/mojoproject.toml +++ b/mojoproject.toml @@ -4,7 +4,7 @@ channels = ["conda-forge", "https://conda.modular.com/max"] description = "Experiments in porting over Golang stdlib into Mojo." name = "gojo" platforms = ["osx-arm64", "linux-64"] -version = "0.1.9" +version = "0.1.10" [tasks] tests = "bash scripts/tests.sh" diff --git a/src/gojo/bytes/buffer.mojo b/src/gojo/bytes/buffer.mojo index 49fa276..2a237a0 100644 --- a/src/gojo/bytes/buffer.mojo +++ b/src/gojo/bytes/buffer.mojo @@ -208,15 +208,17 @@ struct Buffer( """ return self.as_string_slice() - @deprecated("Buffer.render() has been deprecated. Use Buffer.as_string_slice() or call str() instead.") - fn render(self) -> String: + fn consume(inout self) -> String: """ - Return a StringSlice view of the data owned by the builder. + Transfers the buffer's data to a string and resets the buffer. Effectively consuming the Buffer. Returns: - The string representation of the string builder. Returns an empty string if the string builder is empty. + The string representation of the buffer. Returns an empty string if the buffer is empty. """ - return self.as_string_slice() + var result = String(self._data, self._size) + self._data = UnsafePointer[UInt8]() + self._size = 0 + return result fn write(inout self, src: Span[UInt8]) -> (Int, Error): """ @@ -280,7 +282,7 @@ struct Buffer( self.last_read = OP_INVALID fn _read(inout self, inout dest: UnsafePointer[UInt8], capacity: Int) -> (Int, Error): - """Reads the next len(dest) bytes from the buffer or until the buffer + """Reads the next `len(dest)` bytes from the buffer or until the buffer is drained. The return value `bytes_read` is the number of bytes read. If the buffer has no data to return, err is `io.EOF` (unless `len(dest)` is zero); diff --git a/src/gojo/strings/builder.mojo b/src/gojo/strings/builder.mojo index 879c01a..7837f16 100644 --- a/src/gojo/strings/builder.mojo +++ b/src/gojo/strings/builder.mojo @@ -90,16 +90,17 @@ struct StringBuilder[growth_factor: Float32 = 2]( """ return self.as_string_slice() - @deprecated( - "StringBuilder.render() has been deprecated. Use StringBuilder.as_string_slice() or call str() instead." - ) - fn render(ref [_]self) -> String: - """Return a StringSlice view of the data owned by the builder. + fn consume(inout self) -> String: + """ + Transfers the string builder's data to a string and resets the string builder. Effectively consuming the string builder. Returns: - The string representation of the string builder. Returns an empty string if the string builder is empty. + The string representation of the string builder. Returns an empty string if the buffer is empty. """ - return self.as_string_slice() + var result = String(self._data, self._size) + self._data = UnsafePointer[UInt8]() + self._size = 0 + return result fn _resize(inout self, capacity: Int) -> None: """Resizes the string builder buffer. diff --git a/src/recipe.yaml b/src/recipe.yaml index b8ee568..f201e12 100644 --- a/src/recipe.yaml +++ b/src/recipe.yaml @@ -5,7 +5,7 @@ context: package: name: "gojo" - version: 0.1.9 + version: 0.1.10 source: - path: .