From 334763277d077b8085455a3dbb58593573a0fba4 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Thu, 19 May 2016 22:47:11 -0400 Subject: [PATCH] deprecate utf8 for String --- base/REPL.jl | 4 ++-- base/docs/helpdb/Base.jl | 25 +------------------------ base/env.jl | 2 +- base/exports.jl | 1 - base/libc.jl | 2 +- base/libgit2.jl | 2 +- base/precompile.jl | 1 - base/regex.jl | 4 ++-- base/show.jl | 2 +- base/unicode/utf8.jl | 8 -------- contrib/BBEditTextWrangler-julia.plist | 1 - doc/stdlib/collections.rst | 2 +- doc/stdlib/strings.rst | 18 ------------------ test/base64.jl | 2 +- test/dict.jl | 2 +- test/replcompletions.jl | 2 +- test/strings/basic.jl | 23 +++++++++-------------- test/strings/types.jl | 7 +++---- test/unicode/checkstring.jl | 6 +++--- test/unicode/utf16.jl | 2 +- test/unicode/utf32.jl | 18 +++++++++--------- test/unicode/utf8.jl | 2 +- test/unicode/utf8proc.jl | 2 +- 23 files changed, 40 insertions(+), 98 deletions(-) diff --git a/base/REPL.jl b/base/REPL.jl index d5ac3865de569c..8b156ec8c820e3 100644 --- a/base/REPL.jl +++ b/base/REPL.jl @@ -323,11 +323,11 @@ An editor may have converted tabs to spaces at line """ function hist_getline(file) while !eof(file) - line = utf8(readline(file)) + line = readline(file) isempty(line) && return line line[1] in "\r\n" || return line end - return utf8("") + return "" end function hist_from_file(hp, file) diff --git a/base/docs/helpdb/Base.jl b/base/docs/helpdb/Base.jl index 412daf429e2f27..0d0bd501447dcd 100644 --- a/base/docs/helpdb/Base.jl +++ b/base/docs/helpdb/Base.jl @@ -1854,7 +1854,7 @@ Dict{String,Float64} with 2 entries: "bar" => 42.0 "foo" => 0.0 -julia> b = Dict(utf8("baz") => 17, utf8("bar") => 4711) +julia> b = Dict("baz" => 17, "bar" => 4711) Dict{String,Int64} with 2 entries: "bar" => 4711 "baz" => 17 @@ -2962,29 +2962,6 @@ Extract a named field from a `value` of composite type. The syntax `a.b` calls """ getfield -""" - utf8(::Array{UInt8,1}) - -Create a UTF-8 string from a byte array. -""" -utf8(::Vector{UInt8}) - -""" - utf8(::Ptr{UInt8}, [length]) - -Create a UTF-8 string from the address of a C (0-terminated) string encoded in UTF-8. A copy -is made; the ptr can be safely freed. If `length` is specified, the string does not have to -be 0-terminated. -""" -utf8(::Ptr{UInt8}, length::Int = 1) - -""" - utf8(s) - -Convert a string to a contiguous UTF-8 string (all characters must be valid UTF-8 characters). -""" -utf8(s) - """ hvcat(rows::Tuple{Vararg{Int}}, values...) diff --git a/base/env.jl b/base/env.jl index bc0482c2e0702a..7b3e7f1da3ef6a 100644 --- a/base/env.jl +++ b/base/env.jl @@ -34,7 +34,7 @@ function access_env(onError::Function, str::AbstractString) var = cwstring(str) len = _getenvlen(var) if len == 0 - return Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND ? utf8("") : onError(str) + return Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND ? "" : onError(str) end val = zeros(UInt16,len) ret = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt16},UInt32),var,val,len) diff --git a/base/exports.jl b/base/exports.jl index d0810e0e9d814f..bf47c04a31ca11 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -887,7 +887,6 @@ export ucfirst, unescape_string, uppercase, - utf8, utf16, utf32, warn, diff --git a/base/libc.jl b/base/libc.jl index 6585e4753f1f34..648b1a4024c409 100644 --- a/base/libc.jl +++ b/base/libc.jl @@ -259,7 +259,7 @@ function FormatMessage end FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_MAX_WIDTH_MASK, C_NULL, e, 0, lpMsgBuf, 0, C_NULL) p = lpMsgBuf[1] - len == 0 && return utf8("") + len == 0 && return "" buf = Array(UInt16, len) unsafe_copy!(pointer(buf), p, len) ccall(:LocalFree,stdcall,Ptr{Void},(Ptr{Void},),p) diff --git a/base/libgit2.jl b/base/libgit2.jl index cada5eae0e7f52..4148e26465153e 100644 --- a/base/libgit2.jl +++ b/base/libgit2.jl @@ -465,7 +465,7 @@ function snapshot(repo::GitRepo) work = try with(GitIndex, repo) do idx if length(readdir(path(repo))) > 1 - add!(idx, utf8(".")) + add!(idx, ".") write!(idx) end write_tree!(idx) diff --git a/base/precompile.jl b/base/precompile.jl index a9f6935a6abcd0..b70c331c18f0ea 100644 --- a/base/precompile.jl +++ b/base/precompile.jl @@ -400,7 +400,6 @@ precompile(Base.UInt, (UInt,)) precompile(Base.unsafe_copy!, (Array{Dict{Any, Any}, 1}, Int, Array{Dict{Any, Any}, 1}, Int, Int)) precompile(Base.unsafe_copy!, (Ptr{Dict{Any, Any}}, Ptr{Dict{Any, Any}}, Int)) precompile(Base.unshift!, (Array{Any,1}, Task)) -precompile(Base.utf8, (String,)) precompile(Base.uv_error, (String, Bool)) precompile(Base.uvfinalize, (Base.TTY,)) precompile(Base.vcat, (Base.LineEdit.Prompt,)) diff --git a/base/regex.jl b/base/regex.jl index fd2eb708a9b645..42b606bbf359fb 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -209,8 +209,8 @@ function matchall(re::Regex, str::String, overlap::Bool=false) matches end -matchall(re::Regex, str::Union{String,SubString}, overlap::Bool=false) = - matchall(re, utf8(str), overlap) +matchall(re::Regex, str::SubString, overlap::Bool=false) = + matchall(re, String(str), overlap) function search(str::Union{String,SubString}, re::Regex, idx::Integer) if idx > nextind(str,endof(str)) diff --git a/base/show.jl b/base/show.jl index 938bd78b7c1694..617229b261a606 100644 --- a/base/show.jl +++ b/base/show.jl @@ -1189,7 +1189,7 @@ Accept keyword args `c` for alternate single character marker. """ function replace_with_centered_mark(s::AbstractString;c::Char = '⋅') N = length(s) - return join(setindex!([utf8(" ") for i=1:N],string(c),ceil(Int,N/2))) + return join(setindex!([" " for i=1:N],string(c),ceil(Int,N/2))) end """ diff --git a/base/unicode/utf8.jl b/base/unicode/utf8.jl index 0e7da162a405cf..28d0a0f9f609a2 100644 --- a/base/unicode/utf8.jl +++ b/base/unicode/utf8.jl @@ -230,7 +230,6 @@ write(io::IO, s::String) = write(io, s.data) ## transcoding to UTF-8 ## -utf8(x) = convert(String, x) convert(::Type{String}, s::String) = s function convert(::Type{String}, dat::Vector{UInt8}) @@ -350,10 +349,3 @@ function encode_to_utf8{T<:Union{UInt16, UInt32}}(::Type{T}, dat, len) end String(buf) end - -utf8(p::Ptr{UInt8}) = - utf8(p, p == C_NULL ? Csize_t(0) : ccall(:strlen, Csize_t, (Ptr{UInt8},), p)) -function utf8(p::Ptr{UInt8}, len::Integer) - p == C_NULL && throw(ArgumentError("cannot convert NULL to string")) - String(ccall(:jl_pchar_to_array, Vector{UInt8}, (Ptr{UInt8}, Csize_t), p, len)) -end diff --git a/contrib/BBEditTextWrangler-julia.plist b/contrib/BBEditTextWrangler-julia.plist index fb6a20cb8f1e8f..b4c88a9ccfc554 100644 --- a/contrib/BBEditTextWrangler-julia.plist +++ b/contrib/BBEditTextWrangler-julia.plist @@ -1177,7 +1177,6 @@ using utf16 utf32 - utf8 values var varm diff --git a/doc/stdlib/collections.rst b/doc/stdlib/collections.rst index 4e401e3cd64454..fcbe58b194c289 100644 --- a/doc/stdlib/collections.rst +++ b/doc/stdlib/collections.rst @@ -842,7 +842,7 @@ Given a dictionary ``D``, the syntax ``D[x]`` returns the value of key ``x`` (if "bar" => 42.0 "foo" => 0.0 - julia> b = Dict(utf8("baz") => 17, utf8("bar") => 4711) + julia> b = Dict("baz" => 17, "bar" => 4711) Dict{String,Int64} with 2 entries: "bar" => 4711 "baz" => 17 diff --git a/doc/stdlib/strings.rst b/doc/stdlib/strings.rst index fc378b859f7bac..a1bb0175c94774 100644 --- a/doc/stdlib/strings.rst +++ b/doc/stdlib/strings.rst @@ -68,24 +68,6 @@ Convert a string to ``String`` type and check that it contains only ASCII data, otherwise throwing an ``ArugmentError`` indicating the position of the first non-ASCII byte. -.. function:: utf8(::Array{UInt8,1}) - - .. Docstring generated from Julia source - - Create a UTF-8 string from a byte array. - -.. function:: utf8(::Ptr{UInt8}, [length]) - - .. Docstring generated from Julia source - - Create a UTF-8 string from the address of a C (0-terminated) string encoded in UTF-8. A copy is made; the ptr can be safely freed. If ``length`` is specified, the string does not have to be 0-terminated. - -.. function:: utf8(s) - - .. Docstring generated from Julia source - - Convert a string to a contiguous UTF-8 string (all characters must be valid UTF-8 characters). - .. function:: @r_str -> Regex .. Docstring generated from Julia source diff --git a/test/base64.jl b/test/base64.jl index e3e71d79a5989a..1055cc1c302f90 100644 --- a/test/base64.jl +++ b/test/base64.jl @@ -24,7 +24,7 @@ end rm(fname) # Encode to string and decode -@test utf8(base64decode(base64encode(inputText))) == inputText +@test String(base64decode(base64encode(inputText))) == inputText # Decode with max line chars = 76 and padding ipipe = Base64DecodePipe(IOBuffer(encodedMaxLine76)) diff --git a/test/dict.jl b/test/dict.jl index d31da6008edb81..a9351357df14db 100644 --- a/test/dict.jl +++ b/test/dict.jl @@ -263,7 +263,7 @@ end for d in (Dict("\n" => "\n", "1" => "\n", "\n" => "2"), [string(i) => i for i = 1:30], [reshape(1:i^2,i,i) => reshape(1:i^2,i,i) for i = 1:24], - [utf8(Char['α':'α'+i;]) => utf8(Char['α':'α'+i;]) for i = (1:10)*10], + [String(Char['α':'α'+i;]) => String(Char['α':'α'+i;]) for i = (1:10)*10], Dict("key" => zeros(0, 0))) for cols in (12, 40, 80), rows in (2, 10, 24) # Ensure output is limited as requested diff --git a/test/replcompletions.jl b/test/replcompletions.jl index f446004fdccbda..26a1cfb034e0d0 100644 --- a/test/replcompletions.jl +++ b/test/replcompletions.jl @@ -569,7 +569,7 @@ c, r, res = test_scomplete(s) withenv("PATH" => string(tempdir(), ":", dir)) do s = string("repl-completio") c,r = test_scomplete(s) - @test [utf8("repl-completion")] == c + @test ["repl-completion"] == c @test s[r] == "repl-completio" end diff --git a/test/strings/basic.jl b/test/strings/basic.jl index a45e0d7a5e08f4..52a653a0c89723 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -215,14 +215,14 @@ end # issue #11142 s = "abcdefghij" sp = pointer(s) -@test utf8(sp) == s -@test utf8(sp,5) == "abcde" -@test typeof(utf8(sp)) == String +@test String(sp) == s +@test String(sp,5) == "abcde" +@test typeof(String(sp)) == String s = "abcde\uff\u2000\U1f596" sp = pointer(s) -@test utf8(sp) == s -@test utf8(sp,5) == "abcde" -@test typeof(utf8(sp)) == String +@test String(sp) == s +@test String(sp,5) == "abcde" +@test typeof(String(sp)) == String @test get(tryparse(BigInt, "1234567890")) == BigInt(1234567890) @test isnull(tryparse(BigInt, "1234567890-")) @@ -464,11 +464,11 @@ end # issue # 11464: uppercase/lowercase of UTF16String becomes a String str = "abcdef\uff\uffff\u10ffffABCDEF" @test typeof(uppercase("abcdef")) == String -@test typeof(uppercase(utf8(str))) == String +@test typeof(uppercase(String(str))) == String @test typeof(uppercase(utf16(str))) == UTF16String @test typeof(uppercase(utf32(str))) == UTF32String @test typeof(lowercase("ABCDEF")) == String -@test typeof(lowercase(utf8(str))) == String +@test typeof(lowercase(String(str))) == String @test typeof(lowercase(utf16(str))) == UTF16String @test typeof(lowercase(utf32(str))) == UTF32String @@ -481,16 +481,11 @@ foobaz(ch) = reinterpret(Char, typemax(UInt32)) @test "a".*["b","c"] == ["ab","ac"] @test ["b","c"].*"a" == ["ba","ca"] -@test utf8("a").*["b","c"] == ["ab","ac"] -@test "a".*map(utf8,["b","c"]) == ["ab","ac"] @test ["a","b"].*["c","d"]' == ["ac" "ad"; "bc" "bd"] -# Make sure NULL pointer are handled consistently by -# `String`, `ascii` and `utf8` +# Make sure NULL pointer are handled consistently by String @test_throws ArgumentError String(Ptr{UInt8}(0)) @test_throws ArgumentError String(Ptr{UInt8}(0), 10) -@test_throws ArgumentError utf8(Ptr{UInt8}(0)) -@test_throws ArgumentError utf8(Ptr{UInt8}(0), 10) # ascii works on ASCII strings and fails on non-ASCII strings @test ascii("Hello, world") == "Hello, world" diff --git a/test/strings/types.jl b/test/strings/types.jl index 0a510b8355b4cb..7b27fcec7837d3 100644 --- a/test/strings/types.jl +++ b/test/strings/types.jl @@ -13,7 +13,7 @@ slen_u8str2 = length(u8str2) @test len_u8str2 == 2 * len_u8str @test slen_u8str2 == 2 * slen_u8str -u8str2plain = utf8(u8str2) +u8str2plain = String(u8str2) for i1 = 1:length(u8str2) if !isvalid(u8str2, i1); continue; end @@ -93,8 +93,7 @@ u = SubString(str, 1, 5) @test prevind(SubString("{var}",2,4),4) == 3 # issue #4183 -@test split(SubString(ascii("x"), 2, 0), "y") == AbstractString[""] -@test split(SubString(utf8("x"), 2, 0), "y") == AbstractString[""] +@test split(SubString("x", 2, 0), "y") == AbstractString[""] # issue #6772 @test float(SubString("10",1,1)) === 1.0 @@ -132,7 +131,7 @@ let s="lorem ipsum", end #let #for isvalid(SubString{String}) -let s = utf8("Σx + βz - 2") +let s = String("Σx + βz - 2") for i in -1:length(s)+2 ss=SubString(s,1,i) @test isvalid(ss,i)==isvalid(s,i) diff --git a/test/unicode/checkstring.jl b/test/unicode/checkstring.jl index c19b8b9a7324ce..0b694f0bfeab95 100644 --- a/test/unicode/checkstring.jl +++ b/test/unicode/checkstring.jl @@ -90,10 +90,10 @@ try end # Long encoding of 0x01 - @test_throws UnicodeError utf8(b"\xf0\x80\x80\x80") + @test_throws UnicodeError String(b"\xf0\x80\x80\x80") # Test ends of long encoded surrogates - @test_throws UnicodeError utf8(b"\xf0\x8d\xa0\x80") - @test_throws UnicodeError utf8(b"\xf0\x8d\xbf\xbf") + @test_throws UnicodeError String(b"\xf0\x8d\xa0\x80") + @test_throws UnicodeError String(b"\xf0\x8d\xbf\xbf") @test_throws UnicodeError Base.checkstring(b"\xf0\x80\x80\x80") @test Base.checkstring(b"\xc0\x81"; accept_long_char=true) == (1,0x1,0,0,0) @test Base.checkstring(b"\xf0\x80\x80\x80"; accept_long_char=true) == (1,0x1,0,0,0) diff --git a/test/unicode/utf16.jl b/test/unicode/utf16.jl index 6e0a0b14ec03fb..1c8e31cdece981 100644 --- a/test/unicode/utf16.jl +++ b/test/unicode/utf16.jl @@ -6,7 +6,7 @@ u16 = utf16(u8) @test sizeof(u16) == 18 @test length(u16.data) == 10 && u16.data[end] == 0 @test length(u16) == 5 -@test utf8(u16) == u8 +@test String(u16) == u8 @test collect(u8) == collect(u16) @test u8 == utf16(u16.data[1:end-1]) == utf16(copy!(Array(UInt8, 18), 1, reinterpret(UInt8, u16.data), 1, 18)) @test u8 == utf16(pointer(u16)) == utf16(convert(Ptr{Int16}, pointer(u16))) diff --git a/test/unicode/utf32.jl b/test/unicode/utf32.jl index 8165a6aaecc6d9..c8049b90c29767 100644 --- a/test/unicode/utf32.jl +++ b/test/unicode/utf32.jl @@ -6,7 +6,7 @@ u32 = utf32(u8) @test sizeof(u32) == 20 @test length(u32.data) == 6 && u32.data[end] == 0 @test length(u32) == 5 -@test utf8(u32) == u8 +@test String(u32) == u8 @test collect(u8) == collect(u32) @test u8 == utf32(u32.data[1:end-1]) == utf32(copy!(Array(UInt8, 20), 1, reinterpret(UInt8, u32.data), 1, 20)) @test u8 == utf32(pointer(u32)) == utf32(convert(Ptr{Int32}, pointer(u32))) @@ -16,9 +16,9 @@ u32 = utf32(u8) function tstcvt(strUTF8::String, strUTF16::UTF16String, strUTF32::UTF32String) @test utf16(strUTF8) == strUTF16 @test utf32(strUTF8) == strUTF32 - @test utf8(strUTF16) == strUTF8 + @test String(strUTF16) == strUTF8 @test utf32(strUTF16) == strUTF32 - @test utf8(strUTF32) == strUTF8 + @test String(strUTF32) == strUTF8 @test utf16(strUTF32) == strUTF16 end @@ -49,7 +49,7 @@ str3_UTF32 = utf32(str3_UTF8) str4_UTF32 = utf32(str4_UTF8) strS_UTF32 = utf32(strS_UTF8) -@test utf8(strAscii) == strAscii +@test String(strAscii) == strAscii @test utf16(strAscii) == strAscii @test utf32(strAscii) == strAscii @@ -62,13 +62,13 @@ tstcvt(str4_UTF8,str4_UTF16,str4_UTF32) # Test converting surrogate pairs @test utf16(strS_UTF8) == strC_UTF8 @test utf32(strS_UTF8) == strC_UTF8 -@test utf8(strS_UTF16) == strC_UTF8 +@test String(strS_UTF16) == strC_UTF8 @test utf32(strS_UTF16) == strC_UTF8 -@test utf8(strS_UTF32) == strC_UTF8 +@test String(strS_UTF32) == strC_UTF8 @test utf16(strS_UTF32) == strC_UTF8 # Test converting overlong \0 -@test utf8(strZ) == strz_UTF8 +@test String(strZ) == strz_UTF8 @test utf16(String(strZ)) == strz_UTF8 @test utf32(String(strZ)) == strz_UTF8 @@ -172,7 +172,7 @@ end # Wstring u8 = "\U10ffff\U1d565\U1d7f6\U00066\U2008a" w = wstring(u8) -@test length(w) == 5 && utf8(w) == u8 && collect(u8) == collect(w) +@test length(w) == 5 && String(w) == u8 && collect(u8) == collect(w) @test u8 == WString(w.data) # 12268 @@ -211,7 +211,7 @@ end # Test pointer() functions let str = ascii("this ") - u8 = utf8(str) + u8 = String(str) u16 = utf16(str) u32 = utf32(str) pa = pointer(str) diff --git a/test/unicode/utf8.jl b/test/unicode/utf8.jl index 073f5f2b4d29cd..c3037a7d624dfd 100644 --- a/test/unicode/utf8.jl +++ b/test/unicode/utf8.jl @@ -5,7 +5,7 @@ let ch = 0x10000 for hichar = 0xd800:0xdbff for lochar = 0xdc00:0xdfff - @test convert(String, utf8(Char[hichar, lochar]).data) == string(Char(ch)) + @test convert(String, String(Char[hichar, lochar]).data) == string(Char(ch)) ch += 1 end end diff --git a/test/unicode/utf8proc.jl b/test/unicode/utf8proc.jl index 6ba0ac6f4e8128..2a829a5717d55f 100644 --- a/test/unicode/utf8proc.jl +++ b/test/unicode/utf8proc.jl @@ -234,7 +234,7 @@ let grphtest = (("b\u0300lahβlahb\u0302láh", ["b\u0300","l","a","h", "\U1d4c1\u0300"]), ("x",["x"]), ("abc",["a","b","c"])) - for T in (utf8,utf16,utf32) + for T in (String,utf16,utf32) for nf in (:NFC, :NFD) for (s, g) in grphtest s_ = T(normalize_string(s, nf))