diff --git a/gojo/bufio/bufio.mojo b/gojo/bufio/bufio.mojo index 92fc6b9..332cfec 100644 --- a/gojo/bufio/bufio.mojo +++ b/gojo/bufio/bufio.mojo @@ -1,4 +1,3 @@ -from math import max from ..io import traits as io from ..builtins import copy, panic from ..builtins.bytes import Byte, index_byte diff --git a/gojo/builtins/bytes.mojo b/gojo/builtins/bytes.mojo index 0504d16..d8ba406 100644 --- a/gojo/builtins/bytes.mojo +++ b/gojo/builtins/bytes.mojo @@ -1,7 +1,4 @@ -from .list import equals - - -alias Byte = Int8 +alias Byte = UInt8 fn has_prefix(bytes: List[Byte], prefix: List[Byte]) -> Bool: @@ -44,7 +41,6 @@ fn index_byte(bytes: List[Byte], delim: Byte) -> Int: Returns: The index of the first occurrence of the byte delim. """ - var i = 0 for i in range(len(bytes)): if bytes[i] == delim: return i diff --git a/gojo/builtins/list.mojo b/gojo/builtins/list.mojo deleted file mode 100644 index cb32504..0000000 --- a/gojo/builtins/list.mojo +++ /dev/null @@ -1,133 +0,0 @@ -fn equals(left: List[Int8], right: List[Int8]) -> Bool: - if len(left) != len(right): - return False - for i in range(len(left)): - if left[i] != right[i]: - return False - return True - - -fn equals(left: List[UInt8], right: List[UInt8]) -> Bool: - if len(left) != len(right): - return False - for i in range(len(left)): - if left[i] != right[i]: - return False - return True - - -fn equals(left: List[Int16], right: List[Int16]) -> Bool: - if len(left) != len(right): - return False - for i in range(len(left)): - if left[i] != right[i]: - return False - return True - - -fn equals(left: List[UInt16], right: List[UInt16]) -> Bool: - if len(left) != len(right): - return False - for i in range(len(left)): - if left[i] != right[i]: - return False - return True - - -fn equals(left: List[Int32], right: List[Int32]) -> Bool: - if len(left) != len(right): - return False - for i in range(len(left)): - if left[i] != right[i]: - return False - return True - - -fn equals(left: List[UInt32], right: List[UInt32]) -> Bool: - if len(left) != len(right): - return False - for i in range(len(left)): - if left[i] != right[i]: - return False - return True - - -fn equals(left: List[Int64], right: List[Int64]) -> Bool: - if len(left) != len(right): - return False - for i in range(len(left)): - if left[i] != right[i]: - return False - return True - - -fn equals(left: List[UInt64], right: List[UInt64]) -> Bool: - if len(left) != len(right): - return False - for i in range(len(left)): - if left[i] != right[i]: - return False - return True - - -fn equals(left: List[Int], right: List[Int]) -> Bool: - if len(left) != len(right): - return False - for i in range(len(left)): - if left[i] != right[i]: - return False - return True - - -fn equals(left: List[Float16], right: List[Float16]) -> Bool: - if len(left) != len(right): - return False - for i in range(len(left)): - if left[i] != right[i]: - return False - return True - - -fn equals(left: List[Float32], right: List[Float32]) -> Bool: - if len(left) != len(right): - return False - for i in range(len(left)): - if left[i] != right[i]: - return False - return True - - -fn equals(left: List[Float64], right: List[Float64]) -> Bool: - if len(left) != len(right): - return False - for i in range(len(left)): - if left[i] != right[i]: - return False - return True - - -fn equals(left: List[String], right: List[String]) -> Bool: - if len(left) != len(right): - return False - for i in range(len(left)): - if left[i] != right[i]: - return False - return True - - -fn equals(left: List[StringLiteral], right: List[StringLiteral]) -> Bool: - if len(left) != len(right): - return False - for i in range(len(left)): - if left[i] != right[i]: - return False - return True - - -fn equals(left: List[Bool], right: List[Bool]) -> Bool: - if len(left) != len(right): - return False - for i in range(len(left)): - if left[i] != right[i]: - return False - return True diff --git a/gojo/bytes/reader.mojo b/gojo/bytes/reader.mojo index 9539672..90588df 100644 --- a/gojo/bytes/reader.mojo +++ b/gojo/bytes/reader.mojo @@ -87,7 +87,7 @@ struct Reader( """Reads and returns a single byte from the internal buffer. Implements the [io.ByteReader] Interface.""" self.prev_rune = -1 if self.index >= len(self.buffer): - return Int8(0), Error(io.EOF) + return UInt8(0), Error(io.EOF) var byte = self.buffer[int(self.index)] self.index += 1 diff --git a/gojo/fmt/fmt.mojo b/gojo/fmt/fmt.mojo index a44cdf3..3b31275 100644 --- a/gojo/fmt/fmt.mojo +++ b/gojo/fmt/fmt.mojo @@ -162,15 +162,15 @@ fn sprintf(formatting: String, *args: Args) -> String: for i in range(len(args)): var argument = args[i] if argument.isa[String](): - text = format_string(text, argument.get[String]()[]) + text = format_string(text, argument[String]) elif argument.isa[List[Byte]](): - text = format_bytes(text, argument.get[List[Byte]]()[]) + text = format_bytes(text, argument[List[Byte]]) elif argument.isa[Int](): - text = format_integer(text, argument.get[Int]()[]) + text = format_integer(text, argument[Int]) elif argument.isa[Float64](): - text = format_float(text, argument.get[Float64]()[]) + text = format_float(text, argument[Float64]) elif argument.isa[Bool](): - text = format_boolean(text, argument.get[Bool]()[]) + text = format_boolean(text, argument[Bool]) return text @@ -204,15 +204,15 @@ fn printf(formatting: String, *args: Args) raises: for i in range(len(args)): var argument = args[i] if argument.isa[String](): - text = format_string(text, argument.get[String]()[]) + text = format_string(text, argument[String]) elif argument.isa[List[Byte]](): - text = format_bytes(text, argument.get[List[Byte]]()[]) + text = format_bytes(text, argument[List[Byte]]) elif argument.isa[Int](): - text = format_integer(text, argument.get[Int]()[]) + text = format_integer(text, argument[Int]) elif argument.isa[Float64](): - text = format_float(text, argument.get[Float64]()[]) + text = format_float(text, argument[Float64]) elif argument.isa[Bool](): - text = format_boolean(text, argument.get[Bool]()[]) + text = format_boolean(text, argument[Bool]) else: raise Error("Unknown for argument #" + String(i)) diff --git a/gojo/io/__init__.mojo b/gojo/io/__init__.mojo index 979bee7..74b8a52 100644 --- a/gojo/io/__init__.mojo +++ b/gojo/io/__init__.mojo @@ -32,3 +32,8 @@ from .traits import ( EOF, ) from .io import write_string, read_at_least, read_full, read_all, BUFFER_SIZE + + +alias i1 = __mlir_type.i1 +alias i1_1 = __mlir_attr.`1: i1` +alias i1_0 = __mlir_attr.`0: i1` diff --git a/gojo/io/io.mojo b/gojo/io/io.mojo index 338da66..c9fc8d1 100644 --- a/gojo/io/io.mojo +++ b/gojo/io/io.mojo @@ -1,8 +1,7 @@ from collections.optional import Optional -from ..builtins import cap, copy, Byte, Error, panic +from ..builtins import cap, copy, Byte, panic from .traits import ERR_UNEXPECTED_EOF - alias BUFFER_SIZE = 4096 @@ -419,7 +418,6 @@ fn read_all[R: Reader](inout reader: R) -> (List[Byte], Error): Returns: The data read.""" var dest = List[Byte](capacity=BUFFER_SIZE) - var index: Int = 0 var at_eof: Bool = False while True: diff --git a/gojo/io/traits.mojo b/gojo/io/traits.mojo index 0fa248c..97c3aa5 100644 --- a/gojo/io/traits.mojo +++ b/gojo/io/traits.mojo @@ -1,5 +1,5 @@ from collections.optional import Optional -from ..builtins import Byte, Error +from ..builtins import Byte alias Rune = Int32 diff --git a/gojo/net/net.mojo b/gojo/net/net.mojo index 324681a..1c20df8 100644 --- a/gojo/net/net.mojo +++ b/gojo/net/net.mojo @@ -1,4 +1,4 @@ -from memory._arc import Arc +from memory.arc import Arc import ..io from ..builtins import Byte from .socket import Socket diff --git a/gojo/net/socket.mojo b/gojo/net/socket.mojo index 594cc23..10fcd7b 100644 --- a/gojo/net/socket.mojo +++ b/gojo/net/socket.mojo @@ -348,7 +348,7 @@ struct Socket(FileDescriptorBase): src: The data to send. max_attempts: The maximum number of attempts to send the data. """ - var header_pointer = Pointer[Int8](src.data.address).bitcast[UInt8]() + var header_pointer = src.unsafe_ptr() var total_bytes_sent = 0 var attempts = 0 diff --git a/gojo/strings/builder.mojo b/gojo/strings/builder.mojo index e7a76ed..4ae03de 100644 --- a/gojo/strings/builder.mojo +++ b/gojo/strings/builder.mojo @@ -1,5 +1,5 @@ # Adapted from https://github.com/maniartech/mojo-strings/blob/master/strings/builder.mojo -# Modified to use List[Int8] instead of List[String] +# Modified to use List[Byte] instead of List[String] import ..io from ..builtins import Byte @@ -48,7 +48,7 @@ struct StringBuilder(Stringable, Sized, io.Writer, io.ByteWriter, io.StringWrite copy.append(0) return String(copy) - fn get_bytes(self) -> List[Int8]: + fn get_bytes(self) -> List[Byte]: """ Returns a deepcopy of the byte array of the string builder. @@ -57,7 +57,7 @@ struct StringBuilder(Stringable, Sized, io.Writer, io.ByteWriter, io.StringWrite """ return List[Byte](self._vector) - fn get_null_terminated_bytes(self) -> List[Int8]: + fn get_null_terminated_bytes(self) -> List[Byte]: """ Returns a deepcopy of the byte array of the string builder with a null terminator. @@ -80,7 +80,7 @@ struct StringBuilder(Stringable, Sized, io.Writer, io.ByteWriter, io.StringWrite self._vector.extend(src) return len(src), Error() - fn write_byte(inout self, byte: Int8) -> (Int, Error): + fn write_byte(inout self, byte: Byte) -> (Int, Error): """ Appends a byte array to the builder buffer. @@ -122,7 +122,7 @@ struct StringBuilder(Stringable, Sized, io.Writer, io.ByteWriter, io.StringWrite """ return self._vector[index] - fn __setitem__(inout self, index: Int, value: Int8): + fn __setitem__(inout self, index: Int, value: Byte): """ Sets the string at the given index. @@ -131,3 +131,90 @@ struct StringBuilder(Stringable, Sized, io.Writer, io.ByteWriter, io.StringWrite value: The value to set. """ self._vector[index] = value + + +@value +struct NewStringBuilder(Stringable, Sized): + """ + A string builder class that allows for efficient string management and concatenation. + This class is useful when you need to build a string by appending multiple strings + together. It is around 20x faster than using the `+` operator to concatenate + strings because it avoids the overhead of creating and destroying many + intermediate strings and performs memcopy operations. + + The result is a more efficient when building larger string concatenations. It + is generally not recommended to use this class for small concatenations such as + a few strings like `a + b + c + d` because the overhead of creating the string + builder and appending the strings is not worth the performance gain. + + Example: + ``` + from strings.builder import StringBuilder + + var sb = StringBuilder() + sb.write_string("mojo") + sb.write_string("jojo") + print(sb) # mojojojo + ``` + """ + + var _vector: DTypePointer[DType.uint8] + var _size: Int + + @always_inline + fn __init__(inout self, *, size: Int = 4096): + self._vector = DTypePointer[DType.uint8]().alloc(size) + self._size = 0 + + @always_inline + fn __str__(self) -> String: + """ + Converts the string builder to a string. + + Returns: + The string representation of the string builder. Returns an empty + string if the string builder is empty. + """ + var copy = DTypePointer[DType.uint8]().alloc(self._size + 1) + memcpy(copy, self._vector, self._size) + copy[self._size] = 0 + return StringRef(copy, self._size + 1) + + @always_inline + fn __del__(owned self): + if self._vector: + self._vector.free() + + @always_inline + fn write(inout self, src: Span[Byte]) -> (Int, Error): + """ + Appends a byte Span to the builder buffer. + + Args: + src: The byte array to append. + """ + for i in range(len(src)): + self._vector[i] = src._data[i] + self._size += 1 + + return len(src), Error() + + @always_inline + fn write_string(inout self, src: String) -> (Int, Error): + """ + Appends a string to the builder buffer. + + Args: + src: The string to append. + """ + return self.write(src.as_bytes_slice()) + + @always_inline + fn __len__(self) -> Int: + """ + Returns the length of the string builder. + + Returns: + The length of the string builder. + """ + return self._size diff --git a/gojo/syscall/net.mojo b/gojo/syscall/net.mojo index e396d3f..f3cdb02 100644 --- a/gojo/syscall/net.mojo +++ b/gojo/syscall/net.mojo @@ -1,5 +1,6 @@ from .types import c_char, c_int, c_ushort, c_uint, c_void, c_size_t, c_ssize_t, strlen from .file import O_CLOEXEC, O_NONBLOCK +from utils.static_tuple import StaticTuple alias IPPROTO_IPV6 = 41 alias IPV6_V6ONLY = 26 @@ -64,7 +65,7 @@ fn to_char_ptr(s: String) -> Pointer[c_char]: fn c_charptr_to_string(s: Pointer[c_char]) -> String: - return String(s.bitcast[Int8](), strlen(s)) + return String(s.bitcast[UInt8](), strlen(s)) fn cftob(val: c_int) -> Bool: diff --git a/gojo/unicode/utf8/runes.mojo b/gojo/unicode/utf8/runes.mojo index 56da84b..7346162 100644 --- a/gojo/unicode/utf8/runes.mojo +++ b/gojo/unicode/utf8/runes.mojo @@ -6,7 +6,7 @@ from ...builtins import Rune from algorithm.functional import vectorize from memory.unsafe import DTypePointer from sys.info import simdwidthof -from math.bit import ctlz +from bit import countl_zero # The default lowest and highest continuation byte. @@ -322,32 +322,13 @@ fn rune_count_in_string(s: String) -> Int: Returns: The number of runes in the string. """ - var p = s._as_ptr().bitcast[DType.uint8]() + var p = DTypePointer[DType.uint8](s.unsafe_uint8_ptr()) var string_byte_length = len(s) var result = 0 @parameter fn count[simd_width: Int](offset: Int): - result += int(((p.load[width=simd_width](offset) >> 6) != 0b10).cast[DType.uint8]().reduce_add()) + result += int(((p.load[width=simd_width](offset) >> 6) != 0b10).reduce_add()) vectorize[count, simd_width_u8](string_byte_length) return result - - -fn string_iterator(s: String, func: fn (String) -> None): - """Iterate over the runes in a string and call the given function with each rune. - - Args: - s: The string to iterate over. - func: The function to call with each rune. - """ - var bytes = len(s) - var p = s._as_ptr().bitcast[DType.uint8]() - while bytes > 0: - var char_length = int((p.load() >> 7 == 0).cast[DType.uint8]() * 1 + ctlz(~p.load())) - var sp = DTypePointer[DType.int8].alloc(char_length + 1) - memcpy(sp, p.bitcast[DType.int8](), char_length) - sp[char_length] = 0 - func(String(sp, char_length + 1)) - bytes -= char_length - p += char_length diff --git a/tests/test_performance.mojo b/tests/test_performance.mojo index d0449f2..a2ace9f 100644 --- a/tests/test_performance.mojo +++ b/tests/test_performance.mojo @@ -1,5 +1,5 @@ from time import now -from gojo.strings import StringBuilder +from gojo.strings.builder import StringBuilder, NewStringBuilder from gojo.bytes import buffer from goodies import STDWriter @@ -7,8 +7,9 @@ from goodies import STDWriter fn test_string_builder() raises: print("Testing string builder performance") # Create a string from the buffer + var builder_write_start_time = now() var builder = StringBuilder() - for i in range(100): + for _ in range(100): _ = builder.write_string( "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod" " tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim" @@ -18,11 +19,34 @@ fn test_string_builder() raises: " occaecat cupidatat non proident, sunt in culpa qui officia deserunt" " mollit anim id est laborum." ) + var builder_write_execution_time = now() - builder_write_start_time + print("StringBuilder:", "(", builder_write_execution_time, "ns)") var builder_start_time = now() var output = str(builder) var builder_execution_time = now() - builder_start_time + print("Testing new string builder performance") + # Create a string from the buffer + var new_builder_write_start_time = now() + var new_builder = NewStringBuilder() + for _ in range(100): + _ = new_builder.write_string( + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod" + " tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim" + " veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea" + " commodo consequat. Duis aute irure dolor in reprehenderit in voluptate" + " velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint" + " occaecat cupidatat non proident, sunt in culpa qui officia deserunt" + " mollit anim id est laborum." + ) + var new_builder_write_execution_time = now() - new_builder_write_start_time + print("NewStringBuilder:", "(", new_builder_write_execution_time, "ns)") + + var new_builder_start_time = now() + var new_output = str(new_builder) + var new_builder_execution_time = now() - new_builder_start_time + # Create a string using the + operator print("Testing string concatenation performance") var vec = List[String]() @@ -62,6 +86,7 @@ fn test_string_builder() raises: var buffer_execution_time = now() - buffer_start_time print("StringBuilder:", "(", builder_execution_time, "ns)") + print("NewStringBuilder:", "(", new_builder_execution_time, "ns)") print("String concat:", "(", concat_execution_time, "ns)") print("Bytes Buffer:", "(", buffer_execution_time, "ns)") print( @@ -69,6 +94,8 @@ fn test_string_builder() raises: str(concat_execution_time - builder_execution_time) + "ns", ": StringBuilder is ", str(concat_execution_time // builder_execution_time) + "x faster", + ": NewStringBuilder is ", + str(concat_execution_time // new_builder_execution_time) + "x faster", ": Bytes Buffer is ", str(concat_execution_time // buffer_execution_time) + "x faster", ) @@ -96,3 +123,27 @@ fn test_std_writer_speed() raises: fn main() raises: # test_std_writer_speed() test_string_builder() + + print("Testing new string builder performance") + # Create a string from the buffer + # var new_builder_write_start_time = now() + # var new_builder = NewStringBuilder() + # for _ in range(100): + # _ = new_builder.write_string( + # "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod" + # " tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim" + # " veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea" + # " commodo consequat. Duis aute irure dolor in reprehenderit in voluptate" + # " velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint" + # " occaecat cupidatat non proident, sunt in culpa qui officia deserunt" + # " mollit anim id est laborum." + # ) + # var new_builder_write_execution_time = now() - new_builder_write_start_time + # print("NewStringBuilder:", "(", new_builder_write_execution_time, "ns)") + + # var new_builder_start_time = now() + # var new_output = str(new_builder) + # var new_builder_execution_time = now() - new_builder_start_time + # print("NewStringBuilder:", "(", new_builder_execution_time, "ns)") + # # print(new_output) + # print("done")