@@ -30,7 +30,9 @@ struct Scanner[R: io.Reader]():
30
30
var split : SplitFunction # The function to split the tokens.
31
31
var max_token_size : Int # Maximum size of a token; modified by tests.
32
32
var token : List[Byte] # Last token returned by split.
33
- var buf : List[Byte] # buffer used as argument to split.
33
+ var data : UnsafePointer[UInt8] # contents are the bytes buf[off : len(buf)]
34
+ var size : Int
35
+ var capacity : Int
34
36
var start : Int # First non-processed byte in buf.
35
37
var end : Int # End of data in buf.
36
38
var empties : Int # Count of successive empty tokens.
@@ -44,7 +46,9 @@ struct Scanner[R: io.Reader]():
44
46
split : SplitFunction = scan_lines,
45
47
max_token_size : Int = MAX_SCAN_TOKEN_SIZE ,
46
48
token : List[Byte] = List[Byte](capacity = io.BUFFER_SIZE ),
47
- buf : List[Byte] = List[Byte](capacity = io.BUFFER_SIZE ),
49
+ data : UnsafePointer[UInt8] = UnsafePointer[UInt8](),
50
+ size : Int = 0 ,
51
+ capacity : Int = io.BUFFER_SIZE ,
48
52
start : Int = 0 ,
49
53
end : Int = 0 ,
50
54
empties : Int = 0 ,
@@ -55,14 +59,42 @@ struct Scanner[R: io.Reader]():
55
59
self .split = split
56
60
self .max_token_size = max_token_size
57
61
self .token = token
58
- self .buf = buf
62
+ self .data = data
63
+ self .size = size
64
+ self .capacity = capacity
59
65
self .start = start
60
66
self .end = end
61
67
self .empties = empties
62
68
self .scan_called = scan_called
63
69
self .done = done
64
70
self .err = Error()
65
71
72
+ @always_inline
73
+ fn _resize (inout self , capacity : Int) -> None :
74
+ """
75
+ Resizes the string builder buffer.
76
+
77
+ Args:
78
+ capacity: The new capacity of the string builder buffer.
79
+ """
80
+ var new_data = UnsafePointer[UInt8]().alloc(capacity)
81
+ memcpy(new_data, self .data, self .size)
82
+ self .data.free()
83
+ self .data = new_data
84
+ self .capacity = capacity
85
+
86
+ return None
87
+
88
+ @always_inline
89
+ fn __del__ (owned self ):
90
+ if self .data:
91
+ self .data.free()
92
+
93
+ @always_inline
94
+ fn as_bytes_slice (self : Reference[Self]) -> Span[UInt8, self .is_mutable, self .lifetime]:
95
+ """ Returns the internal buffer data as a Span[UInt8]."""
96
+ return Span[UInt8, self .is_mutable, self .lifetime](unsafe_ptr = self [].data, len = self [].size)
97
+
66
98
fn current_token_as_bytes (self ) -> List[Byte]:
67
99
""" Returns the most recent token generated by a call to [Scanner.Scan].
68
100
The underlying array may point to data that will be overwritten
@@ -73,7 +105,9 @@ struct Scanner[R: io.Reader]():
73
105
fn current_token (self ) -> String:
74
106
""" Returns the most recent token generated by a call to [Scanner.Scan]
75
107
as a newly allocated string holding its bytes."""
76
- return String(self .token)
108
+ var copy = self .token
109
+ copy.append(0 )
110
+ return String(copy)
77
111
78
112
fn scan (inout self ) raises -> Bool:
79
113
""" Advances the [Scanner] to the next token, which will then be
@@ -101,7 +135,7 @@ struct Scanner[R: io.Reader]():
101
135
var at_eof = False
102
136
if self .err:
103
137
at_eof = True
104
- advance, token, err = self .split(self .buf [self .start : self .end], at_eof)
138
+ advance, token, err = self .split(self .as_bytes_slice() [self .start : self .end], at_eof)
105
139
if err:
106
140
if str (err) == str (ERR_FINAL_TOKEN ):
107
141
self .token = token
@@ -140,27 +174,24 @@ struct Scanner[R: io.Reader]():
140
174
# Must read more data.
141
175
# First, shift data to beginning of buffer if there's lots of empty space
142
176
# or space is needed.
143
- if self .start > 0 and (self .end == len ( self .buf) or self .start > int (len ( self .buf) / 2 )):
144
- _ = copy( self .buf, self .buf [self .start : self .end])
177
+ if self .start > 0 and (self .end == self .size or self .start > int (self .size / 2 )):
178
+ self .data = self .as_bytes_slice() [self .start : self .end].unsafe_ptr( )
145
179
self .end -= self .start
146
180
self .start = 0
147
181
148
182
# Is the buffer full? If so, resize.
149
- if self .end == len ( self .buf) :
183
+ if self .end == self .size :
150
184
# Guarantee no overflow in the multiplication below.
151
- if len ( self .buf) >= self .max_token_size or len ( self .buf) > int (MAX_INT / 2 ):
152
- self .set_err(Error( str ( ERR_TOO_LONG )) )
185
+ if self .size >= self .max_token_size or self .size > int (MAX_INT / 2 ):
186
+ self .set_err(ERR_TOO_LONG )
153
187
return False
154
188
155
- var new_size = len ( self .buf) * 2
189
+ var new_size = self .size * 2
156
190
if new_size == 0 :
157
191
new_size = START_BUF_SIZE
158
192
159
- # Make a new List[Byte] buffer and copy the elements in
160
193
new_size = min (new_size, self .max_token_size)
161
- var new_buf = List[Byte](capacity = new_size)
162
- _ = copy(new_buf, self .buf[self .start : self .end])
163
- self .buf = new_buf
194
+ self ._resize(new_size)
164
195
self .end -= self .start
165
196
self .start = 0
166
197
@@ -170,13 +201,13 @@ struct Scanner[R: io.Reader]():
170
201
var loop = 0
171
202
while True :
172
203
var bytes_read : Int
173
- var sl = self .buf [self .end : len ( self .buf)]
204
+ var sl = List[UInt8]( self .as_bytes_slice() [self .end : self .size])
174
205
var err : Error
175
206
176
207
# Catch any reader errors and set the internal error field to that err instead of bubbling it up.
177
208
bytes_read, err = self .reader.read(sl)
178
- _ = copy( self .buf, sl, self .end )
179
- if bytes_read < 0 or len ( self .buf) - self .end < bytes_read:
209
+ self .data = sl.steal_data( )
210
+ if bytes_read < 0 or self .size - self .end < bytes_read:
180
211
self .set_err(Error(str (ERR_BAD_READ_COUNT )))
181
212
break
182
213
0 commit comments