1- # This file is a part of Julia . License is MIT: http://julialang.org/license
1+ # This file is a part of StringEncodings.jl . License is MIT: http://julialang.org/license
22
33module StringEncodings
44import Base: close, eof, flush, read, readall, write, show
@@ -8,6 +8,7 @@ export StringEncoder, StringDecoder, encode, decode, encodings
88export StringEncodingError, OutputBufferError, IConvError
99export InvalidEncodingError, InvalidSequenceError, IncompleteSequenceError
1010
11+ include (" encodings.jl" )
1112
1213abstract StringEncodingError
1314
@@ -62,7 +63,7 @@ function iconv_close(cd::Ptr{Void})
6263 end
6364end
6465
65- function iconv_open (tocode, fromcode)
66+ function iconv_open (tocode:: ASCIIString , fromcode:: ASCIIString )
6667 p = ccall ((:iconv_open , libiconv), Ptr{Void}, (Cstring, Cstring), tocode, fromcode)
6768 if p != Ptr {Void} (- 1 )
6869 return p
@@ -173,14 +174,16 @@ end
173174# # StringEncoder
174175
175176"""
176- StringEncoder(istream, to, from="UTF-8")
177+ StringEncoder(istream, to, from=enc "UTF-8")
177178
178179Returns a new write-only I/O stream, which converts any text in the encoding `from`
179180written to it into text in the encoding `to` written to ostream. Calling `close` on the
180181stream is necessary to complete the encoding (but does not close `ostream`).
182+
183+ `to` and `from` can be specified either as a string or as an `Encoding` object.
181184"""
182- function StringEncoder (ostream:: IO , to:: ASCIIString , from:: ASCIIString = " UTF-8" )
183- cd = iconv_open (to, from)
185+ function StringEncoder (ostream:: IO , to:: Encoding , from:: Encoding = enc " UTF-8" )
186+ cd = iconv_open (ASCIIString (to), ASCIIString ( from) )
184187 inbuf = Vector {UInt8} (BUFSIZE)
185188 outbuf = Vector {UInt8} (BUFSIZE)
186189 s = StringEncoder (ostream, cd, inbuf, outbuf,
@@ -190,6 +193,11 @@ function StringEncoder(ostream::IO, to::ASCIIString, from::ASCIIString="UTF-8")
190193 s
191194end
192195
196+ StringEncoder (ostream:: IO , to:: AbstractString , from:: Encoding = enc " UTF-8" ) =
197+ StringEncoder (ostream, Encoding (to), from)
198+ StringEncoder (ostream:: IO , to:: AbstractString , from:: AbstractString ) =
199+ StringEncoder (ostream, Encoding (to), Encoding (from))
200+
193201# Flush input buffer and convert it into output buffer
194202# Returns the number of bytes written to output buffer
195203function flush (s:: StringEncoder )
@@ -226,16 +234,18 @@ end
226234# # StringDecoder
227235
228236"""
229- StringDecoder(istream, from, to= "UTF-8")
237+ StringDecoder(istream, from::Encoding , to::Encoding=enc "UTF-8")
230238
231239Returns a new read-only I/O stream, which converts text in the encoding `from`
232240read from `istream` into text in the encoding `to`.
233241
242+ `to` and `from` can be specified either as a string or as an `Encoding` object.
243+
234244Note that some implementations (notably the Windows one) may accept invalid sequences
235245in the input data without raising an error.
236246"""
237- function StringDecoder (istream:: IO , from:: ASCIIString , to:: ASCIIString = " UTF-8" )
238- cd = iconv_open (to, from)
247+ function StringDecoder (istream:: IO , from:: Encoding , to:: Encoding = enc " UTF-8" )
248+ cd = iconv_open (ASCIIString (to), ASCIIString ( from) )
239249 inbuf = Vector {UInt8} (BUFSIZE)
240250 outbuf = Vector {UInt8} (BUFSIZE)
241251 s = StringDecoder (istream, cd, inbuf, outbuf,
@@ -245,6 +255,11 @@ function StringDecoder(istream::IO, from::ASCIIString, to::ASCIIString="UTF-8")
245255 s
246256end
247257
258+ StringDecoder (istream:: IO , from:: AbstractString , to:: Encoding = enc " UTF-8" ) =
259+ StringDecoder (istream, Encoding (from), to)
260+ StringDecoder (istream:: IO , from:: AbstractString , to:: AbstractString ) =
261+ StringDecoder (istream, Encoding (from), Encoding (to))
262+
248263# Fill input buffer and convert it into output buffer
249264# Returns the number of bytes written to output buffer
250265function fill_buffer! (s:: StringDecoder )
@@ -289,68 +304,67 @@ end
289304# # Convenience I/O functions
290305if isdefined (Base, :readstring )
291306 @doc """
292- readstring(stream or filename, enc::ASCIIString )
307+ readstring(stream or filename, enc::Encoding )
293308
294309 Read the entire contents of an I/O stream or a file in encoding `enc` as a string.
295310 """ ->
296- Base. readstring (s:: IO , enc:: ASCIIString ) = readstring (StringDecoder (s, enc))
297- Base. readstring (filename:: AbstractString , enc:: ASCIIString ) = open (io-> readstring (io, enc), filename)
311+ Base. readstring (s:: IO , enc:: Encoding ) = readstring (StringDecoder (s, enc))
312+ Base. readstring (filename:: AbstractString , enc:: Encoding ) = open (io-> readstring (io, enc), filename)
298313else # Compatibility with Julia 0.4
299314 @doc """
300- readall(stream or filename, enc::ASCIIString )
315+ readall(stream or filename, enc::Encoding )
301316
302317 Read the entire contents of an I/O stream or a file in encoding `enc` as a string.
303318 """ ->
304- Base. readall (s:: IO , enc:: ASCIIString ) = readall (StringDecoder (s, enc))
305- Base. readall (filename:: AbstractString , enc:: ASCIIString ) = open (io-> readall (io, enc), filename)
319+ Base. readall (s:: IO , enc:: Encoding ) = readall (StringDecoder (s, enc))
320+ Base. readall (filename:: AbstractString , enc:: Encoding ) = open (io-> readall (io, enc), filename)
306321end
307322
308323
309324# # Functions to encode/decode strings
310325
311- encoding_string (:: Type{ASCIIString} ) = " ASCII"
312- encoding_string (:: Type{UTF8String} ) = " UTF-8"
313- encoding_string (:: Type{UTF16String} ) = (ENDIAN_BOM == 0x04030201 ) ? " UTF-16LE" : " UTF-16BE"
314- encoding_string (:: Type{UTF32String} ) = (ENDIAN_BOM == 0x04030201 ) ? " UTF-32LE" : " UTF-32BE"
315-
316326"""
317- decode([T,] a::Vector{UInt8}, enc::ASCIIString )
327+ decode([T,] a::Vector{UInt8}, enc)
318328
319329Convert an array of bytes `a` representing text in encoding `enc` to a string of type `T`.
320330By default, a `UTF8String` is returned.
321331
332+ `enc` can be specified either as a string or as an `Encoding` object.
333+
322334Note that some implementations (notably the Windows one) may accept invalid sequences
323335in the input data without raising an error.
324336"""
325- function decode {T<:AbstractString} (:: Type{T} , a:: Vector{UInt8} , enc:: ASCIIString )
337+ function decode {T<:AbstractString} (:: Type{T} , a:: Vector{UInt8} , enc:: Encoding )
326338 b = IOBuffer (a)
327339 try
328- T (readbytes (StringDecoder (b, enc, encoding_string (T))))
340+ T (readbytes (StringDecoder (b, enc, encoding (T))))
329341 finally
330342 close (b)
331343 end
332344end
333345
334- decode (a:: Vector{UInt8} , enc:: ASCIIString ) = decode (UTF8String, a, enc)
346+ decode {T<:AbstractString} (:: Type{T} , a:: Vector{UInt8} , enc:: AbstractString ) = decode (T, a, Encoding (enc))
347+
348+ decode (a:: Vector{UInt8} , enc:: AbstractString ) = decode (UTF8String, a, Encoding (enc))
349+ decode (a:: Vector{UInt8} , enc:: Union{AbstractString, Encoding} ) = decode (UTF8String, a, enc)
335350
336351"""
337- encode(s::AbstractString, enc::ASCIIString )
352+ encode(s::AbstractString, enc)
338353
339354Convert string `s` to an array of bytes representing text in encoding `enc`.
355+ `enc` can be specified either as a string or as an `Encoding` object.
340356"""
341- function encode (s:: AbstractString , enc:: ASCIIString )
357+ function encode (s:: AbstractString , enc:: Encoding )
342358 b = IOBuffer ()
343- p = StringEncoder (b, enc, encoding_string (typeof (s)))
359+ p = StringEncoder (b, enc, encoding (typeof (s)))
344360 write (p, s)
345361 close (p)
346362 takebuf_array (b)
347363end
348364
365+ encode (s:: AbstractString , enc:: AbstractString ) = encode (s, Encoding (enc))
349366
350- # # Function to list supported encodings
351- include (" encodings.jl" )
352-
353- function test_encoding (enc)
367+ function test_encoding (enc:: ASCIIString )
354368 # We assume that an encoding is supported if it's possible to convert from it to UTF-8:
355369 cd = ccall ((:iconv_open , libiconv), Ptr{Void}, (Cstring, Cstring), enc, " UTF-8" )
356370 if cd == Ptr {Void} (- 1 )
0 commit comments