Skip to content

Commit

Permalink
deprecate utf8 for String
Browse files Browse the repository at this point in the history
  • Loading branch information
StefanKarpinski committed May 20, 2016
1 parent 8b1b350 commit 3347632
Show file tree
Hide file tree
Showing 23 changed files with 40 additions and 98 deletions.
4 changes: 2 additions & 2 deletions base/REPL.jl
Original file line number Diff line number Diff line change
Expand Up @@ -323,11 +323,11 @@ An editor may have converted tabs to spaces at line """

function hist_getline(file)
while !eof(file)
line = utf8(readline(file))
line = readline(file)
isempty(line) && return line
line[1] in "\r\n" || return line
end
return utf8("")
return ""
end

function hist_from_file(hp, file)
Expand Down
25 changes: 1 addition & 24 deletions base/docs/helpdb/Base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1854,7 +1854,7 @@ Dict{String,Float64} with 2 entries:
"bar" => 42.0
"foo" => 0.0
julia> b = Dict(utf8("baz") => 17, utf8("bar") => 4711)
julia> b = Dict("baz" => 17, "bar" => 4711)
Dict{String,Int64} with 2 entries:
"bar" => 4711
"baz" => 17
Expand Down Expand Up @@ -2962,29 +2962,6 @@ Extract a named field from a `value` of composite type. The syntax `a.b` calls
"""
getfield

"""
utf8(::Array{UInt8,1})
Create a UTF-8 string from a byte array.
"""
utf8(::Vector{UInt8})

"""
utf8(::Ptr{UInt8}, [length])
Create a UTF-8 string from the address of a C (0-terminated) string encoded in UTF-8. A copy
is made; the ptr can be safely freed. If `length` is specified, the string does not have to
be 0-terminated.
"""
utf8(::Ptr{UInt8}, length::Int = 1)

"""
utf8(s)
Convert a string to a contiguous UTF-8 string (all characters must be valid UTF-8 characters).
"""
utf8(s)

"""
hvcat(rows::Tuple{Vararg{Int}}, values...)
Expand Down
2 changes: 1 addition & 1 deletion base/env.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ function access_env(onError::Function, str::AbstractString)
var = cwstring(str)
len = _getenvlen(var)
if len == 0
return Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND ? utf8("") : onError(str)
return Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND ? "" : onError(str)
end
val = zeros(UInt16,len)
ret = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt16},UInt32),var,val,len)
Expand Down
1 change: 0 additions & 1 deletion base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -887,7 +887,6 @@ export
ucfirst,
unescape_string,
uppercase,
utf8,
utf16,
utf32,
warn,
Expand Down
2 changes: 1 addition & 1 deletion base/libc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ function FormatMessage end
FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_MAX_WIDTH_MASK,
C_NULL, e, 0, lpMsgBuf, 0, C_NULL)
p = lpMsgBuf[1]
len == 0 && return utf8("")
len == 0 && return ""
buf = Array(UInt16, len)
unsafe_copy!(pointer(buf), p, len)
ccall(:LocalFree,stdcall,Ptr{Void},(Ptr{Void},),p)
Expand Down
2 changes: 1 addition & 1 deletion base/libgit2.jl
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ function snapshot(repo::GitRepo)
work = try
with(GitIndex, repo) do idx
if length(readdir(path(repo))) > 1
add!(idx, utf8("."))
add!(idx, ".")
write!(idx)
end
write_tree!(idx)
Expand Down
1 change: 0 additions & 1 deletion base/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,6 @@ precompile(Base.UInt, (UInt,))
precompile(Base.unsafe_copy!, (Array{Dict{Any, Any}, 1}, Int, Array{Dict{Any, Any}, 1}, Int, Int))
precompile(Base.unsafe_copy!, (Ptr{Dict{Any, Any}}, Ptr{Dict{Any, Any}}, Int))
precompile(Base.unshift!, (Array{Any,1}, Task))
precompile(Base.utf8, (String,))
precompile(Base.uv_error, (String, Bool))
precompile(Base.uvfinalize, (Base.TTY,))
precompile(Base.vcat, (Base.LineEdit.Prompt,))
Expand Down
4 changes: 2 additions & 2 deletions base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ function matchall(re::Regex, str::String, overlap::Bool=false)
matches
end

matchall(re::Regex, str::Union{String,SubString}, overlap::Bool=false) =
matchall(re, utf8(str), overlap)
matchall(re::Regex, str::SubString, overlap::Bool=false) =
matchall(re, String(str), overlap)

function search(str::Union{String,SubString}, re::Regex, idx::Integer)
if idx > nextind(str,endof(str))
Expand Down
2 changes: 1 addition & 1 deletion base/show.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1189,7 +1189,7 @@ Accept keyword args `c` for alternate single character marker.
"""
function replace_with_centered_mark(s::AbstractString;c::Char = '')
N = length(s)
return join(setindex!([utf8(" ") for i=1:N],string(c),ceil(Int,N/2)))
return join(setindex!([" " for i=1:N],string(c),ceil(Int,N/2)))
end

"""
Expand Down
8 changes: 0 additions & 8 deletions base/unicode/utf8.jl
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,6 @@ write(io::IO, s::String) = write(io, s.data)

## transcoding to UTF-8 ##

utf8(x) = convert(String, x)
convert(::Type{String}, s::String) = s

function convert(::Type{String}, dat::Vector{UInt8})
Expand Down Expand Up @@ -350,10 +349,3 @@ function encode_to_utf8{T<:Union{UInt16, UInt32}}(::Type{T}, dat, len)
end
String(buf)
end

utf8(p::Ptr{UInt8}) =
utf8(p, p == C_NULL ? Csize_t(0) : ccall(:strlen, Csize_t, (Ptr{UInt8},), p))
function utf8(p::Ptr{UInt8}, len::Integer)
p == C_NULL && throw(ArgumentError("cannot convert NULL to string"))
String(ccall(:jl_pchar_to_array, Vector{UInt8}, (Ptr{UInt8}, Csize_t), p, len))
end
1 change: 0 additions & 1 deletion contrib/BBEditTextWrangler-julia.plist
Original file line number Diff line number Diff line change
Expand Up @@ -1177,7 +1177,6 @@
<string>using</string>
<string>utf16</string>
<string>utf32</string>
<string>utf8</string>
<string>values</string>
<string>var</string>
<string>varm</string>
Expand Down
2 changes: 1 addition & 1 deletion doc/stdlib/collections.rst
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,7 @@ Given a dictionary ``D``, the syntax ``D[x]`` returns the value of key ``x`` (if
"bar" => 42.0
"foo" => 0.0

julia> b = Dict(utf8("baz") => 17, utf8("bar") => 4711)
julia> b = Dict("baz" => 17, "bar" => 4711)
Dict{String,Int64} with 2 entries:
"bar" => 4711
"baz" => 17
Expand Down
18 changes: 0 additions & 18 deletions doc/stdlib/strings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,24 +68,6 @@
Convert a string to ``String`` type and check that it contains only ASCII data, otherwise throwing an ``ArugmentError`` indicating the position of the first non-ASCII byte.

.. function:: utf8(::Array{UInt8,1})

.. Docstring generated from Julia source
Create a UTF-8 string from a byte array.

.. function:: utf8(::Ptr{UInt8}, [length])

.. Docstring generated from Julia source
Create a UTF-8 string from the address of a C (0-terminated) string encoded in UTF-8. A copy is made; the ptr can be safely freed. If ``length`` is specified, the string does not have to be 0-terminated.

.. function:: utf8(s)

.. Docstring generated from Julia source
Convert a string to a contiguous UTF-8 string (all characters must be valid UTF-8 characters).

.. function:: @r_str -> Regex

.. Docstring generated from Julia source
Expand Down
2 changes: 1 addition & 1 deletion test/base64.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ end
rm(fname)

# Encode to string and decode
@test utf8(base64decode(base64encode(inputText))) == inputText
@test String(base64decode(base64encode(inputText))) == inputText

# Decode with max line chars = 76 and padding
ipipe = Base64DecodePipe(IOBuffer(encodedMaxLine76))
Expand Down
2 changes: 1 addition & 1 deletion test/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ end
for d in (Dict("\n" => "\n", "1" => "\n", "\n" => "2"),
[string(i) => i for i = 1:30],
[reshape(1:i^2,i,i) => reshape(1:i^2,i,i) for i = 1:24],
[utf8(Char['α':'α'+i;]) => utf8(Char['α':'α'+i;]) for i = (1:10)*10],
[String(Char['α':'α'+i;]) => String(Char['α':'α'+i;]) for i = (1:10)*10],
Dict("key" => zeros(0, 0)))
for cols in (12, 40, 80), rows in (2, 10, 24)
# Ensure output is limited as requested
Expand Down
2 changes: 1 addition & 1 deletion test/replcompletions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ c, r, res = test_scomplete(s)
withenv("PATH" => string(tempdir(), ":", dir)) do
s = string("repl-completio")
c,r = test_scomplete(s)
@test [utf8("repl-completion")] == c
@test ["repl-completion"] == c
@test s[r] == "repl-completio"
end

Expand Down
23 changes: 9 additions & 14 deletions test/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -215,14 +215,14 @@ end
# issue #11142
s = "abcdefghij"
sp = pointer(s)
@test utf8(sp) == s
@test utf8(sp,5) == "abcde"
@test typeof(utf8(sp)) == String
@test String(sp) == s
@test String(sp,5) == "abcde"
@test typeof(String(sp)) == String
s = "abcde\uff\u2000\U1f596"
sp = pointer(s)
@test utf8(sp) == s
@test utf8(sp,5) == "abcde"
@test typeof(utf8(sp)) == String
@test String(sp) == s
@test String(sp,5) == "abcde"
@test typeof(String(sp)) == String

@test get(tryparse(BigInt, "1234567890")) == BigInt(1234567890)
@test isnull(tryparse(BigInt, "1234567890-"))
Expand Down Expand Up @@ -464,11 +464,11 @@ end
# issue # 11464: uppercase/lowercase of UTF16String becomes a String
str = "abcdef\uff\uffff\u10ffffABCDEF"
@test typeof(uppercase("abcdef")) == String
@test typeof(uppercase(utf8(str))) == String
@test typeof(uppercase(String(str))) == String
@test typeof(uppercase(utf16(str))) == UTF16String
@test typeof(uppercase(utf32(str))) == UTF32String
@test typeof(lowercase("ABCDEF")) == String
@test typeof(lowercase(utf8(str))) == String
@test typeof(lowercase(String(str))) == String
@test typeof(lowercase(utf16(str))) == UTF16String
@test typeof(lowercase(utf32(str))) == UTF32String

Expand All @@ -481,16 +481,11 @@ foobaz(ch) = reinterpret(Char, typemax(UInt32))

@test "a".*["b","c"] == ["ab","ac"]
@test ["b","c"].*"a" == ["ba","ca"]
@test utf8("a").*["b","c"] == ["ab","ac"]
@test "a".*map(utf8,["b","c"]) == ["ab","ac"]
@test ["a","b"].*["c","d"]' == ["ac" "ad"; "bc" "bd"]

# Make sure NULL pointer are handled consistently by
# `String`, `ascii` and `utf8`
# Make sure NULL pointer are handled consistently by String
@test_throws ArgumentError String(Ptr{UInt8}(0))
@test_throws ArgumentError String(Ptr{UInt8}(0), 10)
@test_throws ArgumentError utf8(Ptr{UInt8}(0))
@test_throws ArgumentError utf8(Ptr{UInt8}(0), 10)

# ascii works on ASCII strings and fails on non-ASCII strings
@test ascii("Hello, world") == "Hello, world"
Expand Down
7 changes: 3 additions & 4 deletions test/strings/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ slen_u8str2 = length(u8str2)
@test len_u8str2 == 2 * len_u8str
@test slen_u8str2 == 2 * slen_u8str

u8str2plain = utf8(u8str2)
u8str2plain = String(u8str2)

for i1 = 1:length(u8str2)
if !isvalid(u8str2, i1); continue; end
Expand Down Expand Up @@ -93,8 +93,7 @@ u = SubString(str, 1, 5)
@test prevind(SubString("{var}",2,4),4) == 3

# issue #4183
@test split(SubString(ascii("x"), 2, 0), "y") == AbstractString[""]
@test split(SubString(utf8("x"), 2, 0), "y") == AbstractString[""]
@test split(SubString("x", 2, 0), "y") == AbstractString[""]

# issue #6772
@test float(SubString("10",1,1)) === 1.0
Expand Down Expand Up @@ -132,7 +131,7 @@ let s="lorem ipsum",
end #let

#for isvalid(SubString{String})
let s = utf8("Σx + βz - 2")
let s = String("Σx + βz - 2")
for i in -1:length(s)+2
ss=SubString(s,1,i)
@test isvalid(ss,i)==isvalid(s,i)
Expand Down
6 changes: 3 additions & 3 deletions test/unicode/checkstring.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,10 @@ try
end

# Long encoding of 0x01
@test_throws UnicodeError utf8(b"\xf0\x80\x80\x80")
@test_throws UnicodeError String(b"\xf0\x80\x80\x80")
# Test ends of long encoded surrogates
@test_throws UnicodeError utf8(b"\xf0\x8d\xa0\x80")
@test_throws UnicodeError utf8(b"\xf0\x8d\xbf\xbf")
@test_throws UnicodeError String(b"\xf0\x8d\xa0\x80")
@test_throws UnicodeError String(b"\xf0\x8d\xbf\xbf")
@test_throws UnicodeError Base.checkstring(b"\xf0\x80\x80\x80")
@test Base.checkstring(b"\xc0\x81"; accept_long_char=true) == (1,0x1,0,0,0)
@test Base.checkstring(b"\xf0\x80\x80\x80"; accept_long_char=true) == (1,0x1,0,0,0)
Expand Down
2 changes: 1 addition & 1 deletion test/unicode/utf16.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ u16 = utf16(u8)
@test sizeof(u16) == 18
@test length(u16.data) == 10 && u16.data[end] == 0
@test length(u16) == 5
@test utf8(u16) == u8
@test String(u16) == u8
@test collect(u8) == collect(u16)
@test u8 == utf16(u16.data[1:end-1]) == utf16(copy!(Array(UInt8, 18), 1, reinterpret(UInt8, u16.data), 1, 18))
@test u8 == utf16(pointer(u16)) == utf16(convert(Ptr{Int16}, pointer(u16)))
Expand Down
18 changes: 9 additions & 9 deletions test/unicode/utf32.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ u32 = utf32(u8)
@test sizeof(u32) == 20
@test length(u32.data) == 6 && u32.data[end] == 0
@test length(u32) == 5
@test utf8(u32) == u8
@test String(u32) == u8
@test collect(u8) == collect(u32)
@test u8 == utf32(u32.data[1:end-1]) == utf32(copy!(Array(UInt8, 20), 1, reinterpret(UInt8, u32.data), 1, 20))
@test u8 == utf32(pointer(u32)) == utf32(convert(Ptr{Int32}, pointer(u32)))
Expand All @@ -16,9 +16,9 @@ u32 = utf32(u8)
function tstcvt(strUTF8::String, strUTF16::UTF16String, strUTF32::UTF32String)
@test utf16(strUTF8) == strUTF16
@test utf32(strUTF8) == strUTF32
@test utf8(strUTF16) == strUTF8
@test String(strUTF16) == strUTF8
@test utf32(strUTF16) == strUTF32
@test utf8(strUTF32) == strUTF8
@test String(strUTF32) == strUTF8
@test utf16(strUTF32) == strUTF16
end

Expand Down Expand Up @@ -49,7 +49,7 @@ str3_UTF32 = utf32(str3_UTF8)
str4_UTF32 = utf32(str4_UTF8)
strS_UTF32 = utf32(strS_UTF8)

@test utf8(strAscii) == strAscii
@test String(strAscii) == strAscii
@test utf16(strAscii) == strAscii
@test utf32(strAscii) == strAscii

Expand All @@ -62,13 +62,13 @@ tstcvt(str4_UTF8,str4_UTF16,str4_UTF32)
# Test converting surrogate pairs
@test utf16(strS_UTF8) == strC_UTF8
@test utf32(strS_UTF8) == strC_UTF8
@test utf8(strS_UTF16) == strC_UTF8
@test String(strS_UTF16) == strC_UTF8
@test utf32(strS_UTF16) == strC_UTF8
@test utf8(strS_UTF32) == strC_UTF8
@test String(strS_UTF32) == strC_UTF8
@test utf16(strS_UTF32) == strC_UTF8

# Test converting overlong \0
@test utf8(strZ) == strz_UTF8
@test String(strZ) == strz_UTF8
@test utf16(String(strZ)) == strz_UTF8
@test utf32(String(strZ)) == strz_UTF8

Expand Down Expand Up @@ -172,7 +172,7 @@ end
# Wstring
u8 = "\U10ffff\U1d565\U1d7f6\U00066\U2008a"
w = wstring(u8)
@test length(w) == 5 && utf8(w) == u8 && collect(u8) == collect(w)
@test length(w) == 5 && String(w) == u8 && collect(u8) == collect(w)
@test u8 == WString(w.data)

# 12268
Expand Down Expand Up @@ -211,7 +211,7 @@ end

# Test pointer() functions
let str = ascii("this ")
u8 = utf8(str)
u8 = String(str)
u16 = utf16(str)
u32 = utf32(str)
pa = pointer(str)
Expand Down
2 changes: 1 addition & 1 deletion test/unicode/utf8.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
let ch = 0x10000
for hichar = 0xd800:0xdbff
for lochar = 0xdc00:0xdfff
@test convert(String, utf8(Char[hichar, lochar]).data) == string(Char(ch))
@test convert(String, String(Char[hichar, lochar]).data) == string(Char(ch))
ch += 1
end
end
Expand Down
2 changes: 1 addition & 1 deletion test/unicode/utf8proc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ let grphtest = (("b\u0300lahβlahb\u0302láh", ["b\u0300","l","a","h",
"\U1d4c1\u0300"]),
("x",["x"]),
("abc",["a","b","c"]))
for T in (utf8,utf16,utf32)
for T in (String,utf16,utf32)
for nf in (:NFC, :NFD)
for (s, g) in grphtest
s_ = T(normalize_string(s, nf))
Expand Down

0 comments on commit 3347632

Please sign in to comment.