Skip to content

Commit

Permalink
Allow user to specify integer parsing type (fixes #223) (#224)
Browse files Browse the repository at this point in the history
* Allow user to specify integer parsing type

* Added a ParserContext object, containing the dicttype and
  inttype to use for parsing JSON

* Use keytype instead of accessing DictType.parameters[1]

* four space indent

* Rearrange function argument order to put ParserContext first

* These are unexported, internal functions, and this ordering makes more sense.

* Test inttype=BigInt as well

* Update README with `inttype` information.
  • Loading branch information
kmsquire authored and TotalVerb committed Nov 18, 2017
1 parent d1edcf8 commit 8d5ad9b
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 27 deletions.
14 changes: 11 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ json(a::Any)
Returns a compact JSON representation as an `AbstractString`.

```julia
JSON.parse(s::AbstractString; dicttype=Dict)
JSON.parse(io::IO; dicttype=Dict)
JSON.parsefile(filename::AbstractString; dicttype=Dict, use_mmap=true)
JSON.parse(s::AbstractString; dicttype=Dict, inttype=Int64)
JSON.parse(io::IO; dicttype=Dict, inttype=Int64)
JSON.parsefile(filename::AbstractString; dicttype=Dict, inttype=Int64, use_mmap=true)
```

Parses a JSON `AbstractString` or IO stream into a nested `Array` or `Dict`.
Expand All @@ -70,6 +70,14 @@ package](https://github.com/JuliaLang/DataStructures.jl) is
installed), you can pass `dicttype=DataStructures.OrderedDict` to
maintain the insertion order of the items in the object.

The `inttype` argument controls how integers are parsed. If a number in a JSON
file is recognized to be an integer, it is parsed as one; otherwise it is parsed
as a `Float64`. The `inttype` defaults to `Int64`, but, for example, if you know
that your integer numbers are all small and want to save space, you can pass
`inttype=Int32`. Alternatively, if your JSON input has integers which are too large
for Int64, you can pass `inttype=Int128` or `inttype=BigInt`. `inttype` can be any
subtype of `Real`.

```julia
JSON.lower(p::Point2D) = [p.x, p.y]
```
Expand Down
63 changes: 39 additions & 24 deletions src/Parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ mutable struct StreamingParserState{T <: IO} <: ParserState
end
StreamingParserState(io::IO) = StreamingParserState(io, 0x00, true)

struct ParserContext{DictType, IntType} end

"""
Return the byte at the current position of the `ParserState`. If there is no
byte (that is, the `ParserState` is done), then an error is thrown that the
Expand Down Expand Up @@ -146,18 +148,18 @@ end
Given a `ParserState`, after possibly any amount of whitespace, return the next
parseable value.
"""
function parse_value(ps::ParserState, dictT::Type)
function parse_value(pc::ParserContext, ps::ParserState)
chomp_space!(ps)

@inbounds byte = byteat(ps)
if byte == STRING_DELIM
parse_string(ps)
elseif isjsondigit(byte) || byte == MINUS_SIGN
parse_number(ps)
parse_number(pc, ps)
elseif byte == OBJECT_BEGIN
parse_object(ps, dictT)
parse_object(pc, ps)
elseif byte == ARRAY_BEGIN
parse_array(ps, dictT)
parse_array(pc, ps)
else
parse_jsconstant(ps::ParserState)
end
Expand All @@ -179,13 +181,13 @@ function parse_jsconstant(ps::ParserState)
end
end

function parse_array(ps::ParserState, dictT::Type)
function parse_array(pc::ParserContext, ps::ParserState)
result = Any[]
@inbounds incr!(ps) # Skip over opening '['
chomp_space!(ps)
if byteat(ps) ARRAY_END # special case for empty array
@inbounds while true
push!(result, parse_value(ps, dictT))
push!(result, parse_value(pc, ps))
chomp_space!(ps)
byteat(ps) == ARRAY_END && break
skip!(ps, DELIMITER)
Expand All @@ -197,9 +199,9 @@ function parse_array(ps::ParserState, dictT::Type)
end


function parse_object(ps::ParserState, dictT::Type)
obj = dictT()
keyT = dictT.parameters[1]
function parse_object(pc::ParserContext{DictType, <:Real}, ps::ParserState) where DictType
obj = DictType()
keyT = keytype(DictType)

incr!(ps) # Skip over opening '{'
chomp_space!(ps)
Expand All @@ -212,7 +214,7 @@ function parse_object(ps::ParserState, dictT::Type)
chomp_space!(ps)
skip!(ps, SEPARATOR)
# Read value
value = parse_value(ps, dictT)
value = parse_value(pc, ps)
chomp_space!(ps)
obj[convert(keyT, key)] = value
byteat(ps) == OBJECT_END && break
Expand Down Expand Up @@ -313,17 +315,25 @@ end
Parse an integer from the given bytes vector, starting at `from` and ending at
the byte before `to`. Bytes enclosed should all be ASCII characters.
"""
function int_from_bytes(bytes::Vector{UInt8}, from::Int, to::Int)
function int_from_bytes(pc::ParserContext{<:Associative,IntType},
ps::ParserState,
bytes::Vector{UInt8},
from::Int,
to::Int) where IntType <: Real
@inbounds isnegative = bytes[from] == MINUS_SIGN ? (from += 1; true) : false
num = Int64(0)
num = IntType(0)
@inbounds for i in from:to
num = Int64(10) * num + Int64(bytes[i] - DIGIT_ZERO)
num = IntType(10) * num + IntType(bytes[i] - DIGIT_ZERO)
end
ifelse(isnegative, -num, num)
end

function number_from_bytes(
ps::ParserState, isint::Bool, bytes::Vector{UInt8}, from::Int, to::Int)
function number_from_bytes(pc::ParserContext,
ps::ParserState,
isint::Bool,
bytes::Vector{UInt8},
from::Int,
to::Int)
@inbounds if hasleadingzero(bytes, from, to)
_error(E_LEADING_ZERO, ps)
end
Expand All @@ -332,15 +342,15 @@ function number_from_bytes(
@inbounds if to == from && bytes[from] == MINUS_SIGN
_error(E_BAD_NUMBER, ps)
end
int_from_bytes(bytes, from, to)
int_from_bytes(pc, ps, bytes, from, to)
else
res = float_from_bytes(bytes, from, to)
isnull(res) ? _error(E_BAD_NUMBER, ps) : get(res)
end
end


function parse_number(ps::ParserState)
function parse_number(pc::ParserContext, ps::ParserState)
# Determine the end of the floating point by skipping past ASCII values
# 0-9, +, -, e, E, and .
number = UInt8[]
Expand All @@ -361,7 +371,7 @@ function parse_number(ps::ParserState)
incr!(ps)
end

number_from_bytes(ps, isint, number, 1, length(number))
number_from_bytes(pc, ps, isint, number, 1, length(number))
end


Expand All @@ -370,26 +380,31 @@ function unparameterize_type(T::Type)
candidate <: Union{} ? T : candidate
end

function parse(str::AbstractString; dicttype::Type{<:Associative}=Dict{String,Any})
function parse(str::AbstractString; dicttype::Type{<:Associative}=Dict{String,Any}, inttype::Type{<:Real}=Int64)
pc = ParserContext{unparameterize_type(dicttype), inttype}()
ps = MemoryParserState(Vector{UInt8}(String(str)), 1)
v = parse_value(ps, unparameterize_type(dicttype))
v = parse_value(pc, ps)
chomp_space!(ps)
if hasmore(ps)
_error(E_EXPECTED_EOF, ps)
end
v
end

function parse(io::IO; dicttype::Type{<:Associative}=Dict{String,Any})
function parse(io::IO; dicttype::Type{<:Associative}=Dict{String,Any}, inttype::Type{<:Real}=Int64)
pc = ParserContext{unparameterize_type(dicttype), inttype}()
ps = StreamingParserState(io)
parse_value(ps, unparameterize_type(dicttype))
parse_value(pc, ps)
end

function parsefile(filename::AbstractString; dicttype::Type{<:Associative}=Dict{String, Any}, use_mmap=true)
function parsefile(filename::AbstractString;
dicttype::Type{<:Associative}=Dict{String, Any},
inttype::Type{<:Real}=Int64,
use_mmap=true)
sz = filesize(filename)
open(filename) do io
s = use_mmap ? String(Mmap.mmap(io, Vector{UInt8}, sz)) : read(io, String)
parse(s; dicttype=dicttype)
parse(s; dicttype=dicttype, inttype=inttype)
end
end

Expand Down
16 changes: 16 additions & 0 deletions test/parser/inttype.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
@testset for T in [Int32, Int64, Int128, BigInt]
val = JSON.parse("{\"x\": 3}", inttype=T)
@test isa(val, Dict{String, Any})
@test length(val) == 1
key = collect(keys(val))[1]
@test string(key) == "x"
value = val[key]
@test value == 3
@test typeof(value) == T
end

@testset begin
teststr = """{"201736327611975630": 18005722827070440994}"""
val = JSON.parse(teststr, inttype=Int128)
@test val == Dict{String,Any}("201736327611975630"=> 18005722827070440994)
end
4 changes: 4 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ include("json-samples.jl")
include("parser/dicttype.jl")
end

@testset "inttype" begin
include("parser/inttype.jl")
end

@testset "Miscellaneous" begin
# test for single values
@test JSON.parse("true") == true
Expand Down

0 comments on commit 8d5ad9b

Please sign in to comment.