From 334763277d077b8085455a3dbb58593573a0fba4 Mon Sep 17 00:00:00 2001
From: Stefan Karpinski <stefan@karpinski.org>
Date: Thu, 19 May 2016 22:47:11 -0400
Subject: [PATCH] deprecate utf8 for String

---
 base/REPL.jl                           |  4 ++--
 base/docs/helpdb/Base.jl               | 25 +------------------------
 base/env.jl                            |  2 +-
 base/exports.jl                        |  1 -
 base/libc.jl                           |  2 +-
 base/libgit2.jl                        |  2 +-
 base/precompile.jl                     |  1 -
 base/regex.jl                          |  4 ++--
 base/show.jl                           |  2 +-
 base/unicode/utf8.jl                   |  8 --------
 contrib/BBEditTextWrangler-julia.plist |  1 -
 doc/stdlib/collections.rst             |  2 +-
 doc/stdlib/strings.rst                 | 18 ------------------
 test/base64.jl                         |  2 +-
 test/dict.jl                           |  2 +-
 test/replcompletions.jl                |  2 +-
 test/strings/basic.jl                  | 23 +++++++++--------------
 test/strings/types.jl                  |  7 +++----
 test/unicode/checkstring.jl            |  6 +++---
 test/unicode/utf16.jl                  |  2 +-
 test/unicode/utf32.jl                  | 18 +++++++++---------
 test/unicode/utf8.jl                   |  2 +-
 test/unicode/utf8proc.jl               |  2 +-
 23 files changed, 40 insertions(+), 98 deletions(-)

diff --git a/base/REPL.jl b/base/REPL.jl
index d5ac3865de569c..8b156ec8c820e3 100644
--- a/base/REPL.jl
+++ b/base/REPL.jl
@@ -323,11 +323,11 @@ An editor may have converted tabs to spaces at line """
 
 function hist_getline(file)
     while !eof(file)
-        line = utf8(readline(file))
+        line = readline(file)
         isempty(line) && return line
         line[1] in "\r\n" || return line
     end
-    return utf8("")
+    return ""
 end
 
 function hist_from_file(hp, file)
diff --git a/base/docs/helpdb/Base.jl b/base/docs/helpdb/Base.jl
index 412daf429e2f27..0d0bd501447dcd 100644
--- a/base/docs/helpdb/Base.jl
+++ b/base/docs/helpdb/Base.jl
@@ -1854,7 +1854,7 @@ Dict{String,Float64} with 2 entries:
   "bar" => 42.0
   "foo" => 0.0
 
-julia> b = Dict(utf8("baz") => 17, utf8("bar") => 4711)
+julia> b = Dict("baz" => 17, "bar" => 4711)
 Dict{String,Int64} with 2 entries:
   "bar" => 4711
   "baz" => 17
@@ -2962,29 +2962,6 @@ Extract a named field from a `value` of composite type. The syntax `a.b` calls
 """
 getfield
 
-"""
-    utf8(::Array{UInt8,1})
-
-Create a UTF-8 string from a byte array.
-"""
-utf8(::Vector{UInt8})
-
-"""
-    utf8(::Ptr{UInt8}, [length])
-
-Create a UTF-8 string from the address of a C (0-terminated) string encoded in UTF-8. A copy
-is made; the ptr can be safely freed. If `length` is specified, the string does not have to
-be 0-terminated.
-"""
-utf8(::Ptr{UInt8}, length::Int = 1)
-
-"""
-    utf8(s)
-
-Convert a string to a contiguous UTF-8 string (all characters must be valid UTF-8 characters).
-"""
-utf8(s)
-
 """
     hvcat(rows::Tuple{Vararg{Int}}, values...)
 
diff --git a/base/env.jl b/base/env.jl
index bc0482c2e0702a..7b3e7f1da3ef6a 100644
--- a/base/env.jl
+++ b/base/env.jl
@@ -34,7 +34,7 @@ function access_env(onError::Function, str::AbstractString)
     var = cwstring(str)
     len = _getenvlen(var)
     if len == 0
-        return Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND ? utf8("") : onError(str)
+        return Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND ? "" : onError(str)
     end
     val = zeros(UInt16,len)
     ret = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt16},UInt32),var,val,len)
diff --git a/base/exports.jl b/base/exports.jl
index d0810e0e9d814f..bf47c04a31ca11 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -887,7 +887,6 @@ export
     ucfirst,
     unescape_string,
     uppercase,
-    utf8,
     utf16,
     utf32,
     warn,
diff --git a/base/libc.jl b/base/libc.jl
index 6585e4753f1f34..648b1a4024c409 100644
--- a/base/libc.jl
+++ b/base/libc.jl
@@ -259,7 +259,7 @@ function FormatMessage end
                     FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_MAX_WIDTH_MASK,
                     C_NULL, e, 0, lpMsgBuf, 0, C_NULL)
         p = lpMsgBuf[1]
-        len == 0 && return utf8("")
+        len == 0 && return ""
         buf = Array(UInt16, len)
         unsafe_copy!(pointer(buf), p, len)
         ccall(:LocalFree,stdcall,Ptr{Void},(Ptr{Void},),p)
diff --git a/base/libgit2.jl b/base/libgit2.jl
index cada5eae0e7f52..4148e26465153e 100644
--- a/base/libgit2.jl
+++ b/base/libgit2.jl
@@ -465,7 +465,7 @@ function snapshot(repo::GitRepo)
     work = try
         with(GitIndex, repo) do idx
             if length(readdir(path(repo))) > 1
-                add!(idx, utf8("."))
+                add!(idx, ".")
                 write!(idx)
             end
             write_tree!(idx)
diff --git a/base/precompile.jl b/base/precompile.jl
index a9f6935a6abcd0..b70c331c18f0ea 100644
--- a/base/precompile.jl
+++ b/base/precompile.jl
@@ -400,7 +400,6 @@ precompile(Base.UInt, (UInt,))
 precompile(Base.unsafe_copy!, (Array{Dict{Any, Any}, 1}, Int, Array{Dict{Any, Any}, 1}, Int, Int))
 precompile(Base.unsafe_copy!, (Ptr{Dict{Any, Any}}, Ptr{Dict{Any, Any}}, Int))
 precompile(Base.unshift!, (Array{Any,1}, Task))
-precompile(Base.utf8, (String,))
 precompile(Base.uv_error, (String, Bool))
 precompile(Base.uvfinalize, (Base.TTY,))
 precompile(Base.vcat, (Base.LineEdit.Prompt,))
diff --git a/base/regex.jl b/base/regex.jl
index fd2eb708a9b645..42b606bbf359fb 100644
--- a/base/regex.jl
+++ b/base/regex.jl
@@ -209,8 +209,8 @@ function matchall(re::Regex, str::String, overlap::Bool=false)
     matches
 end
 
-matchall(re::Regex, str::Union{String,SubString}, overlap::Bool=false) =
-    matchall(re, utf8(str), overlap)
+matchall(re::Regex, str::SubString, overlap::Bool=false) =
+    matchall(re, String(str), overlap)
 
 function search(str::Union{String,SubString}, re::Regex, idx::Integer)
     if idx > nextind(str,endof(str))
diff --git a/base/show.jl b/base/show.jl
index 938bd78b7c1694..617229b261a606 100644
--- a/base/show.jl
+++ b/base/show.jl
@@ -1189,7 +1189,7 @@ Accept keyword args `c` for alternate single character marker.
 """
 function replace_with_centered_mark(s::AbstractString;c::Char = '⋅')
     N = length(s)
-    return join(setindex!([utf8(" ") for i=1:N],string(c),ceil(Int,N/2)))
+    return join(setindex!([" " for i=1:N],string(c),ceil(Int,N/2)))
 end
 
 """
diff --git a/base/unicode/utf8.jl b/base/unicode/utf8.jl
index 0e7da162a405cf..28d0a0f9f609a2 100644
--- a/base/unicode/utf8.jl
+++ b/base/unicode/utf8.jl
@@ -230,7 +230,6 @@ write(io::IO, s::String) = write(io, s.data)
 
 ## transcoding to UTF-8 ##
 
-utf8(x) = convert(String, x)
 convert(::Type{String}, s::String) = s
 
 function convert(::Type{String}, dat::Vector{UInt8})
@@ -350,10 +349,3 @@ function encode_to_utf8{T<:Union{UInt16, UInt32}}(::Type{T}, dat, len)
     end
     String(buf)
 end
-
-utf8(p::Ptr{UInt8}) =
-    utf8(p, p == C_NULL ? Csize_t(0) : ccall(:strlen, Csize_t, (Ptr{UInt8},), p))
-function utf8(p::Ptr{UInt8}, len::Integer)
-    p == C_NULL && throw(ArgumentError("cannot convert NULL to string"))
-    String(ccall(:jl_pchar_to_array, Vector{UInt8}, (Ptr{UInt8}, Csize_t), p, len))
-end
diff --git a/contrib/BBEditTextWrangler-julia.plist b/contrib/BBEditTextWrangler-julia.plist
index fb6a20cb8f1e8f..b4c88a9ccfc554 100644
--- a/contrib/BBEditTextWrangler-julia.plist
+++ b/contrib/BBEditTextWrangler-julia.plist
@@ -1177,7 +1177,6 @@
             <string>using</string>
             <string>utf16</string>
             <string>utf32</string>
-            <string>utf8</string>
             <string>values</string>
             <string>var</string>
             <string>varm</string>
diff --git a/doc/stdlib/collections.rst b/doc/stdlib/collections.rst
index 4e401e3cd64454..fcbe58b194c289 100644
--- a/doc/stdlib/collections.rst
+++ b/doc/stdlib/collections.rst
@@ -842,7 +842,7 @@ Given a dictionary ``D``, the syntax ``D[x]`` returns the value of key ``x`` (if
          "bar" => 42.0
          "foo" => 0.0
 
-       julia> b = Dict(utf8("baz") => 17, utf8("bar") => 4711)
+       julia> b = Dict("baz" => 17, "bar" => 4711)
        Dict{String,Int64} with 2 entries:
          "bar" => 4711
          "baz" => 17
diff --git a/doc/stdlib/strings.rst b/doc/stdlib/strings.rst
index fc378b859f7bac..a1bb0175c94774 100644
--- a/doc/stdlib/strings.rst
+++ b/doc/stdlib/strings.rst
@@ -68,24 +68,6 @@
 
    Convert a string to ``String`` type and check that it contains only ASCII data, otherwise throwing an ``ArugmentError`` indicating the position of the first non-ASCII byte.
 
-.. function:: utf8(::Array{UInt8,1})
-
-   .. Docstring generated from Julia source
-
-   Create a UTF-8 string from a byte array.
-
-.. function:: utf8(::Ptr{UInt8}, [length])
-
-   .. Docstring generated from Julia source
-
-   Create a UTF-8 string from the address of a C (0-terminated) string encoded in UTF-8. A copy is made; the ptr can be safely freed. If ``length`` is specified, the string does not have to be 0-terminated.
-
-.. function:: utf8(s)
-
-   .. Docstring generated from Julia source
-
-   Convert a string to a contiguous UTF-8 string (all characters must be valid UTF-8 characters).
-
 .. function:: @r_str -> Regex
 
    .. Docstring generated from Julia source
diff --git a/test/base64.jl b/test/base64.jl
index e3e71d79a5989a..1055cc1c302f90 100644
--- a/test/base64.jl
+++ b/test/base64.jl
@@ -24,7 +24,7 @@ end
 rm(fname)
 
 # Encode to string and decode
-@test utf8(base64decode(base64encode(inputText))) == inputText
+@test String(base64decode(base64encode(inputText))) == inputText
 
 # Decode with max line chars = 76 and padding
 ipipe = Base64DecodePipe(IOBuffer(encodedMaxLine76))
diff --git a/test/dict.jl b/test/dict.jl
index d31da6008edb81..a9351357df14db 100644
--- a/test/dict.jl
+++ b/test/dict.jl
@@ -263,7 +263,7 @@ end
 for d in (Dict("\n" => "\n", "1" => "\n", "\n" => "2"),
           [string(i) => i for i = 1:30],
           [reshape(1:i^2,i,i) => reshape(1:i^2,i,i) for i = 1:24],
-          [utf8(Char['α':'α'+i;]) => utf8(Char['α':'α'+i;]) for i = (1:10)*10],
+          [String(Char['α':'α'+i;]) => String(Char['α':'α'+i;]) for i = (1:10)*10],
           Dict("key" => zeros(0, 0)))
     for cols in (12, 40, 80), rows in (2, 10, 24)
         # Ensure output is limited as requested
diff --git a/test/replcompletions.jl b/test/replcompletions.jl
index f446004fdccbda..26a1cfb034e0d0 100644
--- a/test/replcompletions.jl
+++ b/test/replcompletions.jl
@@ -569,7 +569,7 @@ c, r, res = test_scomplete(s)
             withenv("PATH" => string(tempdir(), ":", dir)) do
                 s = string("repl-completio")
                 c,r = test_scomplete(s)
-                @test [utf8("repl-completion")] == c
+                @test ["repl-completion"] == c
                 @test s[r] == "repl-completio"
             end
 
diff --git a/test/strings/basic.jl b/test/strings/basic.jl
index a45e0d7a5e08f4..52a653a0c89723 100644
--- a/test/strings/basic.jl
+++ b/test/strings/basic.jl
@@ -215,14 +215,14 @@ end
 # issue #11142
 s = "abcdefghij"
 sp = pointer(s)
-@test utf8(sp) == s
-@test utf8(sp,5) == "abcde"
-@test typeof(utf8(sp)) == String
+@test String(sp) == s
+@test String(sp,5) == "abcde"
+@test typeof(String(sp)) == String
 s = "abcde\uff\u2000\U1f596"
 sp = pointer(s)
-@test utf8(sp) == s
-@test utf8(sp,5) == "abcde"
-@test typeof(utf8(sp)) == String
+@test String(sp) == s
+@test String(sp,5) == "abcde"
+@test typeof(String(sp)) == String
 
 @test get(tryparse(BigInt, "1234567890")) == BigInt(1234567890)
 @test isnull(tryparse(BigInt, "1234567890-"))
@@ -464,11 +464,11 @@ end
 # issue # 11464: uppercase/lowercase of UTF16String becomes a String
 str = "abcdef\uff\uffff\u10ffffABCDEF"
 @test typeof(uppercase("abcdef")) == String
-@test typeof(uppercase(utf8(str))) == String
+@test typeof(uppercase(String(str))) == String
 @test typeof(uppercase(utf16(str))) == UTF16String
 @test typeof(uppercase(utf32(str))) == UTF32String
 @test typeof(lowercase("ABCDEF")) == String
-@test typeof(lowercase(utf8(str))) == String
+@test typeof(lowercase(String(str))) == String
 @test typeof(lowercase(utf16(str))) == UTF16String
 @test typeof(lowercase(utf32(str))) == UTF32String
 
@@ -481,16 +481,11 @@ foobaz(ch) = reinterpret(Char, typemax(UInt32))
 
 @test "a".*["b","c"] == ["ab","ac"]
 @test ["b","c"].*"a" == ["ba","ca"]
-@test utf8("a").*["b","c"] == ["ab","ac"]
-@test "a".*map(utf8,["b","c"]) == ["ab","ac"]
 @test ["a","b"].*["c","d"]' == ["ac" "ad"; "bc" "bd"]
 
-# Make sure NULL pointer are handled consistently by
-# `String`, `ascii` and `utf8`
+# Make sure NULL pointer are handled consistently by String
 @test_throws ArgumentError String(Ptr{UInt8}(0))
 @test_throws ArgumentError String(Ptr{UInt8}(0), 10)
-@test_throws ArgumentError utf8(Ptr{UInt8}(0))
-@test_throws ArgumentError utf8(Ptr{UInt8}(0), 10)
 
 # ascii works on ASCII strings and fails on non-ASCII strings
 @test ascii("Hello, world") == "Hello, world"
diff --git a/test/strings/types.jl b/test/strings/types.jl
index 0a510b8355b4cb..7b27fcec7837d3 100644
--- a/test/strings/types.jl
+++ b/test/strings/types.jl
@@ -13,7 +13,7 @@ slen_u8str2 = length(u8str2)
 @test len_u8str2 == 2 * len_u8str
 @test slen_u8str2 == 2 * slen_u8str
 
-u8str2plain = utf8(u8str2)
+u8str2plain = String(u8str2)
 
 for i1 = 1:length(u8str2)
     if !isvalid(u8str2, i1); continue; end
@@ -93,8 +93,7 @@ u = SubString(str, 1, 5)
 @test prevind(SubString("{var}",2,4),4) == 3
 
 # issue #4183
-@test split(SubString(ascii("x"), 2, 0), "y") == AbstractString[""]
-@test split(SubString(utf8("x"), 2, 0), "y") == AbstractString[""]
+@test split(SubString("x", 2, 0), "y") == AbstractString[""]
 
 # issue #6772
 @test float(SubString("10",1,1)) === 1.0
@@ -132,7 +131,7 @@ let s="lorem ipsum",
 end #let
 
 #for isvalid(SubString{String})
-let s = utf8("Σx + βz - 2")
+let s = String("Σx + βz - 2")
   for i in -1:length(s)+2
       ss=SubString(s,1,i)
       @test isvalid(ss,i)==isvalid(s,i)
diff --git a/test/unicode/checkstring.jl b/test/unicode/checkstring.jl
index c19b8b9a7324ce..0b694f0bfeab95 100644
--- a/test/unicode/checkstring.jl
+++ b/test/unicode/checkstring.jl
@@ -90,10 +90,10 @@ try
     end
 
     # Long encoding of 0x01
-    @test_throws UnicodeError utf8(b"\xf0\x80\x80\x80")
+    @test_throws UnicodeError String(b"\xf0\x80\x80\x80")
     # Test ends of long encoded surrogates
-    @test_throws UnicodeError utf8(b"\xf0\x8d\xa0\x80")
-    @test_throws UnicodeError utf8(b"\xf0\x8d\xbf\xbf")
+    @test_throws UnicodeError String(b"\xf0\x8d\xa0\x80")
+    @test_throws UnicodeError String(b"\xf0\x8d\xbf\xbf")
     @test_throws UnicodeError Base.checkstring(b"\xf0\x80\x80\x80")
     @test Base.checkstring(b"\xc0\x81"; accept_long_char=true) == (1,0x1,0,0,0)
     @test Base.checkstring(b"\xf0\x80\x80\x80"; accept_long_char=true) == (1,0x1,0,0,0)
diff --git a/test/unicode/utf16.jl b/test/unicode/utf16.jl
index 6e0a0b14ec03fb..1c8e31cdece981 100644
--- a/test/unicode/utf16.jl
+++ b/test/unicode/utf16.jl
@@ -6,7 +6,7 @@ u16 = utf16(u8)
 @test sizeof(u16) == 18
 @test length(u16.data) == 10 && u16.data[end] == 0
 @test length(u16) == 5
-@test utf8(u16) == u8
+@test String(u16) == u8
 @test collect(u8) == collect(u16)
 @test u8 == utf16(u16.data[1:end-1]) == utf16(copy!(Array(UInt8, 18), 1, reinterpret(UInt8, u16.data), 1, 18))
 @test u8 == utf16(pointer(u16)) == utf16(convert(Ptr{Int16}, pointer(u16)))
diff --git a/test/unicode/utf32.jl b/test/unicode/utf32.jl
index 8165a6aaecc6d9..c8049b90c29767 100644
--- a/test/unicode/utf32.jl
+++ b/test/unicode/utf32.jl
@@ -6,7 +6,7 @@ u32 = utf32(u8)
 @test sizeof(u32) == 20
 @test length(u32.data) == 6 && u32.data[end] == 0
 @test length(u32) == 5
-@test utf8(u32) == u8
+@test String(u32) == u8
 @test collect(u8) == collect(u32)
 @test u8 == utf32(u32.data[1:end-1]) == utf32(copy!(Array(UInt8, 20), 1, reinterpret(UInt8, u32.data), 1, 20))
 @test u8 == utf32(pointer(u32)) == utf32(convert(Ptr{Int32}, pointer(u32)))
@@ -16,9 +16,9 @@ u32 = utf32(u8)
 function tstcvt(strUTF8::String, strUTF16::UTF16String, strUTF32::UTF32String)
     @test utf16(strUTF8) == strUTF16
     @test utf32(strUTF8) == strUTF32
-    @test utf8(strUTF16) == strUTF8
+    @test String(strUTF16) == strUTF8
     @test utf32(strUTF16) == strUTF32
-    @test utf8(strUTF32)  == strUTF8
+    @test String(strUTF32)  == strUTF8
     @test utf16(strUTF32) == strUTF16
 end
 
@@ -49,7 +49,7 @@ str3_UTF32 = utf32(str3_UTF8)
 str4_UTF32 = utf32(str4_UTF8)
 strS_UTF32 = utf32(strS_UTF8)
 
-@test utf8(strAscii) == strAscii
+@test String(strAscii) == strAscii
 @test utf16(strAscii) == strAscii
 @test utf32(strAscii) == strAscii
 
@@ -62,13 +62,13 @@ tstcvt(str4_UTF8,str4_UTF16,str4_UTF32)
 # Test converting surrogate pairs
 @test utf16(strS_UTF8) == strC_UTF8
 @test utf32(strS_UTF8) == strC_UTF8
-@test utf8(strS_UTF16) == strC_UTF8
+@test String(strS_UTF16) == strC_UTF8
 @test utf32(strS_UTF16) == strC_UTF8
-@test utf8(strS_UTF32)  == strC_UTF8
+@test String(strS_UTF32)  == strC_UTF8
 @test utf16(strS_UTF32) == strC_UTF8
 
 # Test converting overlong \0
-@test utf8(strZ)  == strz_UTF8
+@test String(strZ)  == strz_UTF8
 @test utf16(String(strZ)) == strz_UTF8
 @test utf32(String(strZ)) == strz_UTF8
 
@@ -172,7 +172,7 @@ end
 # Wstring
 u8 = "\U10ffff\U1d565\U1d7f6\U00066\U2008a"
 w = wstring(u8)
-@test length(w) == 5 && utf8(w) == u8 && collect(u8) == collect(w)
+@test length(w) == 5 && String(w) == u8 && collect(u8) == collect(w)
 @test u8 == WString(w.data)
 
 # 12268
@@ -211,7 +211,7 @@ end
 
 # Test pointer() functions
 let str = ascii("this ")
-    u8  = utf8(str)
+    u8  = String(str)
     u16 = utf16(str)
     u32 = utf32(str)
     pa  = pointer(str)
diff --git a/test/unicode/utf8.jl b/test/unicode/utf8.jl
index 073f5f2b4d29cd..c3037a7d624dfd 100644
--- a/test/unicode/utf8.jl
+++ b/test/unicode/utf8.jl
@@ -5,7 +5,7 @@
 let ch = 0x10000
     for hichar = 0xd800:0xdbff
         for lochar = 0xdc00:0xdfff
-            @test convert(String, utf8(Char[hichar, lochar]).data) == string(Char(ch))
+            @test convert(String, String(Char[hichar, lochar]).data) == string(Char(ch))
             ch += 1
         end
     end
diff --git a/test/unicode/utf8proc.jl b/test/unicode/utf8proc.jl
index 6ba0ac6f4e8128..2a829a5717d55f 100644
--- a/test/unicode/utf8proc.jl
+++ b/test/unicode/utf8proc.jl
@@ -234,7 +234,7 @@ let grphtest = (("b\u0300lahβlahb\u0302láh", ["b\u0300","l","a","h",
                                                 "\U1d4c1\u0300"]),
                 ("x",["x"]),
                 ("abc",["a","b","c"]))
-    for T in (utf8,utf16,utf32)
+    for T in (String,utf16,utf32)
         for nf in (:NFC, :NFD)
             for (s, g) in grphtest
                 s_ = T(normalize_string(s, nf))