Skip to content

Commit

Permalink
Feature/24.2 updates (#28)
Browse files Browse the repository at this point in the history
* quick fixes, probably didn't cover everything. Waiting for changelog

* pull mojo-csv changes to csv module
  • Loading branch information
thatstoasty authored Apr 1, 2024
1 parent 1af654d commit 49601d4
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 36 deletions.
10 changes: 3 additions & 7 deletions external/csv/csv_builder.mojo
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""https://github.com/mzaks/mojo-csv/tree/main all sourced from Maxim's mojo-csv repository!"""

from memory.memory import memcpy
from memory.buffer import Buffer, Dim
from buffer import Buffer, Dim
from .string_utils import find_indices, contains_any_of, string_from_pointer

alias BufferType = Buffer[DType.int8]
Expand Down Expand Up @@ -52,9 +50,7 @@ struct CsvBuilder:
var size = len(s)
self.push(s, False)

fn push_stringabel[
T: Stringable
](inout self, value: T, consider_escaping: Bool = False):
fn push_stringabel[T: Stringable](inout self, value: T, consider_escaping: Bool = False):
self.push(str(value), consider_escaping)

fn push_empty(inout self):
Expand Down Expand Up @@ -134,4 +130,4 @@ fn escape_quotes_in(s: String) -> String:

var last_index = indices[i_size - 1].to_int()
memcpy(p_result.offset(offset), p_current.offset(last_index), size - last_index)
return string_from_pointer(p_result, size + i_size)
return string_from_pointer(p_result, size + i_size)
6 changes: 2 additions & 4 deletions external/csv/csv_table.mojo
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
"""https://github.com/mzaks/mojo-csv/tree/main all sourced from Maxim's mojo-csv repository!"""

from .string_utils import find_indices, string_from_pointer
from algorithm.functional import vectorize
from sys.info import simdwidthof
Expand Down Expand Up @@ -82,7 +80,7 @@ struct CsvTable:
@always_inline
@parameter
fn find_indicies[simd_width: Int](offset: Int):
var chars = p.simd_load[simd_width](offset)
var chars = p.load[width=simd_width](offset)
var quotes = chars == QUOTE
var commas = chars == COMMA
var lfs = chars == LF
Expand Down Expand Up @@ -163,4 +161,4 @@ struct CsvTable:
return self._inner_string[self._starts[index] : self._ends[index]]

fn row_count(self) -> Int:
return len(self._starts) // self.column_count
return len(self._starts) // self.column_count
34 changes: 10 additions & 24 deletions external/csv/string_utils.mojo
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
"""https://github.com/mzaks/mojo-csv/tree/main all sourced from Maxim's mojo-csv repository!"""

from algorithm.functional import vectorize
from sys.info import simdwidthof
from sys.intrinsics import compressed_store
Expand All @@ -10,45 +8,37 @@ from collections.vector import InlinedFixedVector

alias simd_width_i8 = simdwidthof[DType.int8]()


fn vectorize_and_exit[
simd_width: Int, workgroup_function: fn[i: Int] (Int) capturing -> Bool
](size: Int):
fn vectorize_and_exit[simd_width: Int, workgroup_function: fn[i: Int](Int) capturing -> Bool](size: Int):
var loops = size // simd_width
for i in range(loops):
if workgroup_function[simd_width](i * simd_width):
return

var rest = size & (simd_width - 1)

@parameter
if simd_width >= 64:
if rest >= 32:
if workgroup_function[32](size - rest):
return
rest -= 32

@parameter
if simd_width >= 32:
if rest >= 16:
if workgroup_function[16](size - rest):
return
rest -= 16

@parameter
if simd_width >= 16:
if rest >= 8:
if workgroup_function[8](size - rest):
return
rest -= 8

@parameter
if simd_width >= 8:
if rest >= 4:
if workgroup_function[4](size - rest):
return
rest -= 4

@parameter
if simd_width >= 4:
if rest >= 2:
Expand All @@ -57,7 +47,7 @@ fn vectorize_and_exit[
rest -= 2

if rest == 1:
_ = workgroup_function[1](size - rest)
_= workgroup_function[1](size - rest)


fn find_indices(s: String, c: String) -> List[UInt64]:
Expand All @@ -73,18 +63,14 @@ fn find_indices(s: String, c: String) -> List[UInt64]:
if p.offset(offset).load() == char:
return result.append(offset)
else:
var chunk = p.simd_load[simd_width](offset)
var chunk = p.load[width=simd_width](offset)
var occurrence = chunk == char
var offsets = iota[DType.uint64, simd_width]() + offset
var occurrence_count = reduce_bit_count(occurrence)
var current_len = len(result)
result.reserve(current_len + occurrence_count)
result.resize(current_len + occurrence_count, 0)
compressed_store(
offsets,
DTypePointer[DType.uint64](result.data.value).offset(current_len),
occurrence,
)
compressed_store(offsets, DTypePointer[DType.uint64](result.data.value).offset(current_len), occurrence)

vectorize[find, simd_width_i8](size)
return result
Expand All @@ -108,7 +94,7 @@ fn occurrence_count(s: String, *c: String) -> Int:
result += 1
return
else:
var chunk = p.simd_load[simd_width](offset)
var chunk = p.load[width=simd_width](offset)

var occurrence = SIMD[DType.bool, simd_width](False)
for i in range(len(chars)):
Expand All @@ -131,7 +117,7 @@ fn contains_any_of(s: String, *c: String) -> Bool:

@parameter
fn find[simd_width: Int](i: Int) -> Bool:
var chunk = p.simd_load[simd_width]()
var chunk = p.load[width=simd_width]()
p = p.offset(simd_width)
for i in range(len(chars)):
var occurrence = chunk == chars[i]
Expand All @@ -153,7 +139,7 @@ fn string_from_pointer(p: DTypePointer[DType.int8], length: Int) -> String:


fn print_v(v: List[UInt64]):
print_no_newline("(", len(v), ")", "[")
print("(" + str(len(v)) + ")[")
for i in range(len(v)):
print_no_newline(v[i], ",")
print("]")
var end = ", " if i < len(v) - 1 else "]\n"
print(v[i], ",")
1 change: 0 additions & 1 deletion gojo/builtins/_bytes.mojo
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from .errors import panic


alias Byte = Int8


Expand Down

0 comments on commit 49601d4

Please sign in to comment.