Skip to content
This repository was archived by the owner on Feb 14, 2025. It is now read-only.

Commit f342d48

Browse files
committed
use inlinelist for bufio
1 parent 8da061b commit f342d48

File tree

9 files changed

+567
-664
lines changed

9 files changed

+567
-664
lines changed

gojo/bufio/bufio.mojo

Lines changed: 403 additions & 375 deletions
Large diffs are not rendered by default.

gojo/bufio/scan.mojo

Lines changed: 49 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ struct Scanner[R: io.Reader]():
3030
var split: SplitFunction # The function to split the tokens.
3131
var max_token_size: Int # Maximum size of a token; modified by tests.
3232
var token: List[Byte] # Last token returned by split.
33-
var buf: List[Byte] # buffer used as argument to split.
33+
var data: UnsafePointer[UInt8] # contents are the bytes buf[off : len(buf)]
34+
var size: Int
35+
var capacity: Int
3436
var start: Int # First non-processed byte in buf.
3537
var end: Int # End of data in buf.
3638
var empties: Int # Count of successive empty tokens.
@@ -44,7 +46,9 @@ struct Scanner[R: io.Reader]():
4446
split: SplitFunction = scan_lines,
4547
max_token_size: Int = MAX_SCAN_TOKEN_SIZE,
4648
token: List[Byte] = List[Byte](capacity=io.BUFFER_SIZE),
47-
buf: List[Byte] = List[Byte](capacity=io.BUFFER_SIZE),
49+
data: UnsafePointer[UInt8] = UnsafePointer[UInt8](),
50+
size: Int = 0,
51+
capacity: Int = io.BUFFER_SIZE,
4852
start: Int = 0,
4953
end: Int = 0,
5054
empties: Int = 0,
@@ -55,14 +59,42 @@ struct Scanner[R: io.Reader]():
5559
self.split = split
5660
self.max_token_size = max_token_size
5761
self.token = token
58-
self.buf = buf
62+
self.data = data
63+
self.size = size
64+
self.capacity = capacity
5965
self.start = start
6066
self.end = end
6167
self.empties = empties
6268
self.scan_called = scan_called
6369
self.done = done
6470
self.err = Error()
6571

72+
@always_inline
73+
fn _resize(inout self, capacity: Int) -> None:
74+
"""
75+
Resizes the string builder buffer.
76+
77+
Args:
78+
capacity: The new capacity of the string builder buffer.
79+
"""
80+
var new_data = UnsafePointer[UInt8]().alloc(capacity)
81+
memcpy(new_data, self.data, self.size)
82+
self.data.free()
83+
self.data = new_data
84+
self.capacity = capacity
85+
86+
return None
87+
88+
@always_inline
89+
fn __del__(owned self):
90+
if self.data:
91+
self.data.free()
92+
93+
@always_inline
94+
fn as_bytes_slice(self: Reference[Self]) -> Span[UInt8, self.is_mutable, self.lifetime]:
95+
"""Returns the internal buffer data as a Span[UInt8]."""
96+
return Span[UInt8, self.is_mutable, self.lifetime](unsafe_ptr=self[].data, len=self[].size)
97+
6698
fn current_token_as_bytes(self) -> List[Byte]:
6799
"""Returns the most recent token generated by a call to [Scanner.Scan].
68100
The underlying array may point to data that will be overwritten
@@ -73,7 +105,9 @@ struct Scanner[R: io.Reader]():
73105
fn current_token(self) -> String:
74106
"""Returns the most recent token generated by a call to [Scanner.Scan]
75107
as a newly allocated string holding its bytes."""
76-
return String(self.token)
108+
var copy = self.token
109+
copy.append(0)
110+
return String(copy)
77111

78112
fn scan(inout self) raises -> Bool:
79113
"""Advances the [Scanner] to the next token, which will then be
@@ -101,7 +135,7 @@ struct Scanner[R: io.Reader]():
101135
var at_eof = False
102136
if self.err:
103137
at_eof = True
104-
advance, token, err = self.split(self.buf[self.start : self.end], at_eof)
138+
advance, token, err = self.split(self.as_bytes_slice()[self.start : self.end], at_eof)
105139
if err:
106140
if str(err) == str(ERR_FINAL_TOKEN):
107141
self.token = token
@@ -140,27 +174,24 @@ struct Scanner[R: io.Reader]():
140174
# Must read more data.
141175
# First, shift data to beginning of buffer if there's lots of empty space
142176
# or space is needed.
143-
if self.start > 0 and (self.end == len(self.buf) or self.start > int(len(self.buf) / 2)):
144-
_ = copy(self.buf, self.buf[self.start : self.end])
177+
if self.start > 0 and (self.end == self.size or self.start > int(self.size / 2)):
178+
self.data = self.as_bytes_slice()[self.start : self.end].unsafe_ptr()
145179
self.end -= self.start
146180
self.start = 0
147181

148182
# Is the buffer full? If so, resize.
149-
if self.end == len(self.buf):
183+
if self.end == self.size:
150184
# Guarantee no overflow in the multiplication below.
151-
if len(self.buf) >= self.max_token_size or len(self.buf) > int(MAX_INT / 2):
152-
self.set_err(Error(str(ERR_TOO_LONG)))
185+
if self.size >= self.max_token_size or self.size > int(MAX_INT / 2):
186+
self.set_err(ERR_TOO_LONG)
153187
return False
154188

155-
var new_size = len(self.buf) * 2
189+
var new_size = self.size * 2
156190
if new_size == 0:
157191
new_size = START_BUF_SIZE
158192

159-
# Make a new List[Byte] buffer and copy the elements in
160193
new_size = min(new_size, self.max_token_size)
161-
var new_buf = List[Byte](capacity=new_size)
162-
_ = copy(new_buf, self.buf[self.start : self.end])
163-
self.buf = new_buf
194+
self._resize(new_size)
164195
self.end -= self.start
165196
self.start = 0
166197

@@ -170,13 +201,13 @@ struct Scanner[R: io.Reader]():
170201
var loop = 0
171202
while True:
172203
var bytes_read: Int
173-
var sl = self.buf[self.end : len(self.buf)]
204+
var sl = List[UInt8](self.as_bytes_slice()[self.end : self.size])
174205
var err: Error
175206

176207
# Catch any reader errors and set the internal error field to that err instead of bubbling it up.
177208
bytes_read, err = self.reader.read(sl)
178-
_ = copy(self.buf, sl, self.end)
179-
if bytes_read < 0 or len(self.buf) - self.end < bytes_read:
209+
self.data = sl.steal_data()
210+
if bytes_read < 0 or self.size - self.end < bytes_read:
180211
self.set_err(Error(str(ERR_BAD_READ_COUNT)))
181212
break
182213

gojo/builtins/attributes.mojo

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from collections import InlineList
2+
3+
14
fn copy[T: CollectionElement](inout target: List[T], source: List[T], start: Int = 0) -> Int:
25
"""Copies the contents of source into target at the same index. Returns the number of bytes copied.
36
Added a start parameter to specify the index to start copying into.
@@ -46,6 +49,30 @@ fn copy[T: CollectionElement](inout target: List[T], source: Span[T], start: Int
4649
return count
4750

4851

52+
fn copy[T: CollectionElement](inout list: InlineList[T], source: Span[T], start: Int = 0) -> Int:
53+
"""Copies the contents of source into target at the same index. Returns the number of bytes copied.
54+
Added a start parameter to specify the index to start copying into.
55+
56+
Args:
57+
list: The buffer to copy into.
58+
source: The buffer to copy from.
59+
start: The index to start copying into.
60+
61+
Returns:
62+
The number of bytes copied.
63+
"""
64+
var count = 0
65+
66+
for i in range(len(source)):
67+
if i + start > len(list):
68+
list[i + start] = source[i]
69+
else:
70+
list.append(source[i])
71+
count += 1
72+
73+
return count
74+
75+
4976
fn copy(
5077
inout target: List[UInt8],
5178
source: DTypePointer[DType.uint8],
@@ -78,27 +105,6 @@ fn copy(
78105
return count
79106

80107

81-
# fn copy[T: CollectionElement](inout target: Span[T], source: Span[T], start: Int = 0) -> Int:
82-
# """Copies the contents of source into target at the same index. Returns the number of bytes copied.
83-
# Added a start parameter to specify the index to start copying into.
84-
85-
# Args:
86-
# target: The buffer to copy into.
87-
# source: The buffer to copy from.
88-
# start: The index to start copying into.
89-
90-
# Returns:
91-
# The number of bytes copied.
92-
# """
93-
# var count = 0
94-
95-
# for i in range(len(source)):
96-
# target[i + start] = source[i]
97-
# count += 1
98-
99-
# return count
100-
101-
102108
fn cap[T: CollectionElement](iterable: List[T]) -> Int:
103109
"""Returns the capacity of the List.
104110

gojo/bytes/buffer.mojo

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -485,5 +485,4 @@ fn new_buffer(owned s: String) -> Buffer:
485485
Returns:
486486
A new [Buffer] initialized with the provided string.
487487
"""
488-
var bytes_buffer = List[Byte](s.as_bytes())
489-
return Buffer(bytes_buffer^)
488+
return Buffer(s.as_bytes())

gojo/bytes/reader.mojo

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ struct Reader(
5656
slice."""
5757
return self.size - int(self.index)
5858

59+
@always_inline
60+
fn __del__(owned self):
61+
if self.data:
62+
self.data.free()
63+
5964
@always_inline
6065
fn as_bytes_slice(self: Reference[Self]) -> Span[UInt8, self.is_mutable, self.lifetime]:
6166
"""Returns the internal data as a Span[UInt8]."""

gojo/io/io.mojo

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
from collections.optional import Optional
2-
from ..builtins import cap, copy, Byte, panic
1+
from ..builtins import copy, Byte, panic
32
from .traits import ERR_UNEXPECTED_EOF
43

54
alias BUFFER_SIZE = 4096

0 commit comments

Comments
 (0)