diff --git a/gojo/bytes/buffer.mojo b/gojo/bytes/buffer.mojo index e0d0fb8..b5e8601 100644 --- a/gojo/bytes/buffer.mojo +++ b/gojo/bytes/buffer.mojo @@ -80,10 +80,17 @@ struct Buffer( return self.size - self.offset @always_inline - fn bytes(self) -> DTypePointer[DType.uint8]: + fn bytes_ptr(self) -> DTypePointer[DType.uint8]: """Returns a pointer holding the unread portion of the buffer.""" return self.data.offset(self.offset) + @always_inline + fn bytes(self) -> List[UInt8]: + """Returns a list of bytes holding a copy of the unread portion of the buffer.""" + var copy = UnsafePointer[UInt8]().alloc(self.size) + memcpy(copy, self.data.offset(self.offset), self.size) + return List[UInt8](unsafe_pointer=copy, size=self.size - self.offset, capacity=self.size - self.offset) + @always_inline fn _resize(inout self, capacity: Int) -> None: """ @@ -308,7 +315,7 @@ struct Buffer( A List[Byte] struct containing the data up to and including the delimiter. """ var at_eof = False - var i = index_byte(bytes=self.bytes(), size=self.size, delim=delim) + var i = index_byte(bytes=self.bytes_ptr(), size=self.size, delim=delim) var end = self.offset + i + 1 if i < 0: @@ -375,6 +382,50 @@ struct Buffer( return line + fn write_to[W: io.Writer](inout self, inout writer: W) -> (Int64, Error): + """Writes data to w until the buffer is drained or an error occurs. + The return value n is the number of bytes written; it always fits into an + Int, but it is int64 to match the io.WriterTo trait. Any error + encountered during the write is also returned. + + Args: + writer: The writer to write to. + + Returns: + The number of bytes written to the writer. + """ + self.last_read = OP_INVALID + var bytes_to_write = len(self) + var total_bytes_written: Int64 = 0 + + if bytes_to_write > 0: + # TODO: Replace usage of this intermediate slice when normal slicing, once slice references work. + var byte_count = bytes_to_write - self.offset + var bytes_written: Int + var err: Error + var copy = UnsafePointer[UInt8]().alloc(byte_count) + memcpy(copy, self.data.offset(self.offset), byte_count) + var line = List[Byte](unsafe_pointer=copy, size=byte_count, capacity=byte_count) + + bytes_written, err = writer.write(line) + if bytes_written > bytes_to_write: + panic("bytes.Buffer.write_to: invalid write count") + + self.offset += bytes_written + total_bytes_written = Int64(bytes_written) + + var err_message = str(err) + if err_message != "": + return total_bytes_written, err + + # all bytes should have been written, by definition of write method in io.Writer + if bytes_written != bytes_to_write: + return total_bytes_written, Error(ERR_SHORT_WRITE) + + # Buffer is now empty; reset. + self.reset() + return total_bytes_written, Error() + @value struct LegacyBuffer( diff --git a/gojo/unicode/__init__.mojo b/gojo/unicode/__init__.mojo index bd4cba6..10ee57e 100644 --- a/gojo/unicode/__init__.mojo +++ b/gojo/unicode/__init__.mojo @@ -1 +1 @@ -from .utf8 import string_iterator, rune_count_in_string +from .utf8 import rune_count_in_string diff --git a/gojo/unicode/utf8/__init__.mojo b/gojo/unicode/utf8/__init__.mojo index b8732ec..201f380 100644 --- a/gojo/unicode/utf8/__init__.mojo +++ b/gojo/unicode/utf8/__init__.mojo @@ -1,4 +1,4 @@ """Almost all of the actual implementation in this module was written by @mzaks (https://github.com/mzaks)! This would not be possible without his help. """ -from .runes import string_iterator, rune_count_in_string +from .runes import rune_count_in_string diff --git a/gojo/unicode/utf8/runes.mojo b/gojo/unicode/utf8/runes.mojo index 175aeeb..5171282 100644 --- a/gojo/unicode/utf8/runes.mojo +++ b/gojo/unicode/utf8/runes.mojo @@ -8,7 +8,8 @@ from sys.info import simdwidthof from bit import countl_zero -alias simd_width_u8 = simdwidthof[DType.uint8]() +# alias simd_width_u8 = simdwidthof[DType.uint8]() +alias simd_width_u8 = 1 fn rune_count_in_string(s: String) -> Int: diff --git a/tests/test_performance.mojo b/tests/test_performance.mojo index e41ae0f..00cd485 100644 --- a/tests/test_performance.mojo +++ b/tests/test_performance.mojo @@ -10,7 +10,7 @@ fn test_string_builder() raises: # Create a string from the buffer var new_builder_write_start_time = now() var new_builder = StringBuilder() - for _ in range(10000): + for _ in range(100): _ = new_builder.write_string( "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod" " tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim" @@ -36,7 +36,7 @@ fn test_string_builder() raises: # Create a string using the + operator print("Testing string concatenation performance") var vec = List[String]() - for i in range(10000): + for i in range(100): vec.append( "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod" " tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim" @@ -57,8 +57,8 @@ fn test_string_builder() raises: print("Testing new buffer performance") # Create a string from the buffer var new_buffer_write_start_time = now() - var new_buffer = BufferNew() - for _ in range(10000): + var new_buffer = Buffer() + for _ in range(100): _ = new_buffer.write_string( "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod" " tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim" @@ -104,14 +104,14 @@ fn test_string_builder() raises: # fn test_std_writer_speed() raises: # """STDWriter is roughly 6-7x faster currently.""" # var print_start_time = now() -# for i in range(1, 10000): +# for i in range(1, 100): # print(i) # var print_execution_time = now() - print_start_time # # Create stdout writer # var writer = STDWriter(1) # var writer_start_time = now() -# for i in range(1, 10000): +# for i in range(1, 100): # _ = writer.write_string(str(i)) # var writer_execution_time = now() - writer_start_time @@ -128,7 +128,7 @@ fn main() raises: # # Create a string from the buffer # var new_builder_write_start_time = now() # var new_builder = VectorizedStringBuilder() - # for _ in range(10000): + # for _ in range(100): # _ = new_builder.write_string( # "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod" # " tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim"