Skip to content

Make only one extra bytes vlr per dataset #30

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/LAS.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ export SpatialInfo, AxisInfo, Range

export LasVariableLengthRecord, get_user_id, get_record_id, get_description, get_data, is_extended
export GeoKeys, GeoDoubleParamsTag, GeoAsciiParamsTag, OGC_WKT
export ClassificationLookup, TextAreaDescription, ExtraBytes, WaveformPacketDescriptor, WaveformDataPackets
export ClassificationLookup, TextAreaDescription, ExtraBytes, ExtraBytesCollection, WaveformPacketDescriptor, WaveformDataPackets
export get_horizontal_unit, get_vertical_unit, get_wkt_string
export get_classes, get_description, set_description!
export @register_vlr_type, read_vlr_data, extract_vlr_type
Expand Down
2 changes: 1 addition & 1 deletion src/constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ const ID_SUPERSEDED = UInt16(7)
const ID_WAVEFORMPACKETDATA = UInt16(65535)

const DEFAULT_LAS_COLUMNS = (:position, :intensity, :classification, :returnnumber, :numberofreturns, :color, :point_source_id, :gps_time, :overlap)
const ALL_LAS_COLUMNS = SVector{0,Symbol}()
const ALL_LAS_COLUMNS = nothing

POINT_SCALE = 0.0001
global const _VLR_TYPE_MAP = Dict()
Expand Down
65 changes: 37 additions & 28 deletions src/dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,18 @@ mutable struct LasDataset
check_user_type(col_type)

# grab information about the existing ExtraBytes VLRs - need to see if we need to update them or not
extra_bytes_vlrs = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)
extra_bytes_data = get_data.(extra_bytes_vlrs)
extra_bytes_vlr = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)
@assert length(extra_bytes_vlr) ≤ 1 "Found multiple Extra Bytes VLRs in LAS file!"
if isempty(extra_bytes_vlr)
extra_bytes_vlr = LasVariableLengthRecord(LAS_SPEC_USER_ID, ID_EXTRABYTES, "Extra Bytes", ExtraBytesCollection())
# make sure we add the VLR to our collection and update any header info
push!(vlrs, extra_bytes_vlr)
header.n_vlr += 1
header.data_offset += sizeof(extra_bytes_vlr)
else
extra_bytes_vlr = extra_bytes_vlr[1]
end
extra_bytes_data = get_extra_bytes(get_data(extra_bytes_vlr))
user_field_names = Symbol.(name.(extra_bytes_data))
user_field_types = data_type.(extra_bytes_data)

Expand All @@ -95,13 +105,11 @@ mutable struct LasDataset
continue
elseif !isnothing(matches_name_idx)
# if we find one with matching name (not type), we'll need to update the header record length to account for this new type
header.data_record_length -= sizeof(data_type(get_data(vlrs[matches_name_idx])))
header.data_record_length -= sizeof(data_type(extra_bytes_data[matches_name_idx]))
end
# now make a new ExtraBytes VLR and add it to our dataset, updating the header information as we go
extra_bytes_vlr = construct_extra_bytes_vlr(col_name, eltype(type_to_check))
push!(vlrs, extra_bytes_vlr)
header.n_vlr += 1
header.data_offset += sizeof(extra_bytes_vlr)
add_extra_bytes_to_collection!(get_data(extra_bytes_vlr), col_name, eltype(type_to_check))
header.data_offset += sizeof(ExtraBytes)
header.data_record_length += sizeof(type_to_check)
end
end
Expand Down Expand Up @@ -287,15 +295,22 @@ function add_column!(las::LasDataset, column::Symbol, values::AbstractVector{T})
las.header.data_record_length += sizeof(T)
vlrs = get_vlrs(las)
extra_bytes_vlrs = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)

@assert length(extra_bytes_vlrs) ≤ 1 "Found $(length(extra_bytes_vlrs)) Extra Bytes VLRs when we can only have a max of 1"
if isempty(extra_bytes_vlrs)
extra_bytes_vlr = LasVariableLengthRecord(LAS_SPEC_USER_ID, ID_EXTRABYTES, "Extra Bytes Records", ExtraBytesCollection())
# make sure we add it to the dataset to account for offsets in the header etc.
add_vlr!(las, extra_bytes_vlr)
else
extra_bytes_vlr = extra_bytes_vlrs[1]
end
if T <: SVector
# user field arrays have to be saved as sequential extra bytes records with names of the form "column [i]" (zero indexing encouraged)
split_col_name = split_column_name(column, length(T))
for i ∈ 1:length(T)
add_extra_bytes!(las, split_col_name[i], eltype(T), extra_bytes_vlrs)
add_extra_bytes!(las, split_col_name[i], eltype(T), extra_bytes_vlr)
end
else
add_extra_bytes!(las, column, T, extra_bytes_vlrs)
add_extra_bytes!(las, column, T, extra_bytes_vlr)
end
nothing
end
Expand Down Expand Up @@ -341,24 +356,18 @@ Add an extra bytes VLR to a LAS dataset to document an extra user-field for poin
* `las` : LAS dataset to add extra bytes to
* `col_name` : Name to save the user field as
* `T` : Data type for the user field (must be a base type as specified in the spec or a static vector of one of these types)
* `extra_bytes_vlr` : Set of existing extra bytes VLRs already present in the LAS dataset
* `extra_bytes_vlr` : An Extra Bytes Collection VLR that already exists in the dataset
"""
function add_extra_bytes!(las::LasDataset, col_name::Symbol, ::Type{T}, extra_bytes_vlrs::Vector{LasVariableLengthRecord}) where T
matching_extra_bytes_vlr = findfirst(Symbol.(name.(get_data.(extra_bytes_vlrs))) .== col_name)
if !isnothing(matching_extra_bytes_vlr)
remove_vlr!(las, extra_bytes_vlrs[matching_extra_bytes_vlr])
function add_extra_bytes!(las::LasDataset, col_name::Symbol, ::Type{T}, extra_bytes_vlr::LasVariableLengthRecord{ExtraBytesCollection}) where T
extra_bytes = get_extra_bytes(get_data(extra_bytes_vlr))
matching_extra_bytes = findfirst(Symbol.(name.(extra_bytes)) .== col_name)
if !isnothing(matching_extra_bytes)
deleteat!(extra_bytes, matching_extra_bytes)
header = get_header(las)
header.data_offset -= (length(matching_extra_bytes) * sizeof(ExtraBytes))
@assert header.data_offset > 0 "Inconsistent data configuration! Got data offset of $(header.data_offset) after removing Extra Bytes Record"
end
extra_bytes_vlr = construct_extra_bytes_vlr(col_name, T)
add_vlr!(las, extra_bytes_vlr)
end

"""
$(TYPEDSIGNATURES)

Construct an extra bytes VLR with a field name `col_name` and data type `T`
"""
function construct_extra_bytes_vlr(col_name::Symbol, ::Type{T}) where T
@assert length(String(col_name)) ≤ 32 "Custom column name $(col_name) too long! Must be ≤ 32 Bytes, got $(length(String(col_name))) Bytes"
extra_bytes = ExtraBytes(0x00, String(col_name), zero(T), zero(T), zero(T), zero(T), zero(T), "$(col_name)")
LasVariableLengthRecord(LAS_SPEC_USER_ID, ID_EXTRABYTES, String(col_name), extra_bytes)
add_extra_bytes_to_collection!(get_data(extra_bytes_vlr), col_name, T)
header = get_header(las)
header.data_offset += sizeof(ExtraBytes)
end
2 changes: 1 addition & 1 deletion src/header.jl
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ creation_year(h::LasHeader) = h.creation_year

Get the size of a header `h` in bytes
"""
header_size(h::LasHeader) = h.header_size
header_size(h::LasHeader) = Int(h.header_size)

"""
$(TYPEDSIGNATURES)
Expand Down
13 changes: 8 additions & 5 deletions src/read.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Load a LAS dataset from a source file

# Arguments
* `file_name` : Name of the LAS file to extract data from
* `fields` : Name of the LAS point fields to extract as columns in the output data. Default `DEFAULT_LAS_COLUMNS`
* `fields` : Name of the LAS point fields to extract as columns in the output data. If set to `nothing`, ingest all available columns. Default `DEFAULT_LAS_COLUMNS`
"""
function load_las(file_name::AbstractString,
fields::TFields = DEFAULT_LAS_COLUMNS;
Expand All @@ -25,7 +25,7 @@ end

Ingest LAS point data in a tabular format
"""
function load_pointcloud(file_name::AbstractString, fields::AbstractVector{Symbol} = collect(DEFAULT_LAS_COLUMNS); kwargs...)
function load_pointcloud(file_name::AbstractString, fields::Union{Nothing, AbstractVector{Symbol}} = collect(DEFAULT_LAS_COLUMNS); kwargs...)
las = load_las(file_name, fields; kwargs...)
return get_pointcloud(las)
end
Expand Down Expand Up @@ -106,7 +106,9 @@ function read_las_data(io::TIO, required_columns::TTuple=DEFAULT_LAS_COLUMNS;
pos = header.header_size + vlr_length
user_defined_bytes = read(io, header.data_offset - pos)

extra_bytes = Vector{ExtraBytes}(map(vlr -> get_data(vlr), extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)))
extra_bytes_vlr = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)
@assert length(extra_bytes_vlr) ≤ 1 "Found multiple extra bytes columns!"
extra_bytes = isempty(extra_bytes_vlr) ? ExtraBytes[] : get_extra_bytes(get_data(extra_bytes_vlr[1]))

this_format = record_format(header, extra_bytes)
xyz = spatial_info(header)
Expand Down Expand Up @@ -194,11 +196,12 @@ Helper function that finds the names of user-defined point fields that have been
Note according to spec that user-defined array field names must be of the form `col [0], col[1], ..., col[N]` where `N` is the dimension of the user field
"""
function get_user_fields_for_table(records::Vector{TRecord}, Names::Tuple, required_columns::TTuple) where {TRecord <: Union{ExtendedPointRecord, FullRecord}, TTuple}
user_fields = filter(field -> get_base_field_name(field) ∈ required_columns, Names)
get_all_fields = isnothing(required_columns)
user_fields = filter(field -> get_all_fields || get_base_field_name(field) ∈ required_columns, Names)
raw_user_data = Dict{Symbol, Vector}(field => getproperty.(getproperty.(records, :user_fields), field) for field ∈ user_fields)
user_field_map = get_user_field_map(user_fields)
grouped_field_names = collect(keys(user_field_map))
user_fields = filter(field -> field ∈ required_columns, grouped_field_names)
user_fields = filter(field -> get_all_fields || field ∈ required_columns, grouped_field_names)
grouped_user_fields = group_user_fields(raw_user_data, user_field_map)
return user_fields, grouped_user_fields
end
Expand Down
64 changes: 62 additions & 2 deletions src/registered_vlrs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,6 @@ end
Base.sizeof(::Type{ExtraBytes}) = 192
Base.sizeof(::ExtraBytes) = Base.sizeof(ExtraBytes)

@register_vlr_type ExtraBytes LAS_SPEC_USER_ID ID_EXTRABYTES

# we can rely on this indexing safely since we're restricted to TData being in SUPPORTED_EXTRA_BYTES_TYPES
data_code_from_type(::Type{TData}) where TData = (TData == Missing ? 0x00 : UInt8(indexin([TData], SUPPORTED_EXTRA_BYTES_TYPES)[1]))
data_code_from_type(::ExtraBytes{TData}) where TData = data_code_from_type(TData)
Expand Down Expand Up @@ -214,6 +212,68 @@ function Base.write(io::IO, extra_bytes::ExtraBytes{TData}) where TData
writestring(io, extra_bytes.description, 32)
end

"""
$(TYPEDEF)

A collection of Extra Bytes records that gets packed into a *VLR*

$(TYPEDFIELDS)
---
$(METHODLIST)
"""
struct ExtraBytesCollection
"""Collection of Extra Bytes Records, each documenting one user field in the dataset"""
extra_bytes::Vector{ExtraBytes}

function ExtraBytesCollection(extra_bytes::AbstractVector{T}) where {T <: ExtraBytes}
new(extra_bytes)
end
end

ExtraBytesCollection() = ExtraBytesCollection(ExtraBytes[])

"""
$(TYPEDSIGNATURES)

Helper function that gets the set of Extra Bytes records from an Extra Bytes `collection`
"""
get_extra_bytes(collection::ExtraBytesCollection) = collection.extra_bytes

@register_vlr_type ExtraBytesCollection LAS_SPEC_USER_ID ID_EXTRABYTES

Base.sizeof(collection::ExtraBytesCollection) = length(collection.extra_bytes) * Base.sizeof(ExtraBytes)

# equal if all their records are equal
function Base.:(==)(c1::ExtraBytesCollection, c2::ExtraBytesCollection)
extra_bytes1 = get_extra_bytes(c1)
extra_bytes2 = get_extra_bytes(c2)
return (length(extra_bytes1) == length(extra_bytes2)) && all(extra_bytes1 .== extra_bytes2)
end

function read_vlr_data(io::IO, ::Type{ExtraBytesCollection}, nb::Integer)
@assert nb % sizeof(ExtraBytes) == 0 "Number of bytes $(nb) is not a multiple of Extra Bytes record size $(sizeof(ExtraBytes))"
num_extra_bytes_records = Int(nb/sizeof(ExtraBytes))
extra_bytes = map(_ -> read(io, ExtraBytes), 1:num_extra_bytes_records)
return ExtraBytesCollection(extra_bytes)
end

function Base.write(io::IO, collection::ExtraBytesCollection)
for e ∈ collection.extra_bytes
write(io, e)
end
end

"""
$(TYPEDSIGNATURES)

Construct an extra bytes VLR with a field name `col_name` and data type `T`
"""
function add_extra_bytes_to_collection!(collection::ExtraBytesCollection, col_name::Symbol, ::Type{T}) where T
@assert length(String(col_name)) ≤ 32 "Custom column name $(col_name) too long! Must be ≤ 32 Bytes, got $(length(String(col_name))) Bytes"
extra_bytes = ExtraBytes(0x00, String(col_name), zero(T), zero(T), zero(T), zero(T), zero(T), "$(col_name)")
push!(get_extra_bytes(collection), extra_bytes)
end

"""
$(TYPEDEF)

Expand Down
18 changes: 11 additions & 7 deletions src/write.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,18 @@ function write_las(io::IO, las::LasDataset, compressed::Bool = false)
user_fields = ()
# find indices of existing extra bytes VLRs
extra_bytes_idxs = findall(vlr -> (get_user_id(vlr) == LAS_SPEC_USER_ID) && (get_record_id(vlr) == ID_EXTRABYTES), vlrs)
# need to adjust the data record length in the header to remove these extra bytes
for i ∈ extra_bytes_idxs
header.data_record_length -= sizeof(data_type(get_data(vlrs[i])))
if !isempty(extra_bytes_idxs)
@assert length(extra_bytes_idxs) == 1 "Found $(length(extra_bytes_idxs)) Extra Bytes VLRs when we should only have 1"
extra_bytes_vlr = vlrs[extra_bytes_idxs[1]]
# need to adjust the data record length in the header to remove these extra bytes
for extra_bytes ∈ get_extra_bytes(get_data(extra_bytes_vlr))
header.data_record_length -= sizeof(data_type(extra_bytes))
end
# make sure we remove the extra bytes vlrs and adjust the header info
header.n_vlr -= 1
header.data_offset -= sizeof(extra_bytes_vlr)
deleteat!(vlrs, extra_bytes_idxs)
end
# make sure we remove the extra bytes vlrs and adjust the header info
header.n_vlr -= length(extra_bytes_idxs)
header.data_offset -= sum(sizeof.(vlrs[extra_bytes_idxs]))
deleteat!(vlrs, extra_bytes_idxs)
end

write(io, header)
Expand Down
42 changes: 22 additions & 20 deletions test/dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,28 +60,30 @@
@test this_pc.other_thing == spicy_pc.other_thing
# we should have documented our columns as extra bytes VLRs now
vlrs = get_vlrs(las)
@test length(vlrs) == 2
@test length(vlrs) == 1
vlr_data = get_data.(vlrs)
@test all(isa.(vlr_data, ExtraBytes))
@test (LAS.name(vlr_data[1]) == "thing") && (LAS.data_type(vlr_data[1]) == Float64)
@test (LAS.name(vlr_data[2]) == "other_thing") && (LAS.data_type(vlr_data[2]) == Int16)
@test vlr_data[1] isa ExtraBytesCollection
extra_bytes = LAS.get_extra_bytes(vlr_data[1])
@test (LAS.name(extra_bytes[1]) == "thing") && (LAS.data_type(extra_bytes[1]) == Float64)
@test (LAS.name(extra_bytes[2]) == "other_thing") && (LAS.data_type(extra_bytes[2]) == Int16)
# and our header should be updated appropriately
@test number_of_vlrs(header) == 2
@test number_of_vlrs(header) == 1
@test point_data_offset(header) == header_size(header) + sum(sizeof.(vlrs))
# now add another user field directly to the dataset
new_thing = rand(Float32, num_points)
add_column!(las, :new_thing, new_thing)
vlrs = get_vlrs(las)
@test length(vlrs) == 3
new_extra_bytes = get_data(vlrs[3])
@test length(vlrs) == 1
new_extra_bytes = LAS.get_extra_bytes(get_data(vlrs[1]))[3]
@test (LAS.name(new_extra_bytes) == "new_thing") && (LAS.data_type(new_extra_bytes) == Float32)
# we shouldn't be able to add columns of different length to the LAS data
@test_throws AssertionError add_column!(las, :bad, rand(10))
# now if we replace the values of one of the user fields with a different type, it should work
new_thing = rand(UInt8, num_points)
add_column!(las, :thing, new_thing)
vlrs = get_vlrs(las)
@test length(vlrs) == 3
new_extra_bytes = get_data(vlrs[3])
@test length(vlrs) == 1
new_extra_bytes = LAS.get_extra_bytes(get_data(vlrs[1]))[3]
@test (LAS.name(new_extra_bytes) == "thing") && (LAS.data_type(new_extra_bytes) == UInt8)

# merge some data into our dataset
Expand All @@ -98,11 +100,11 @@
)
add_vlr!(las, desc)
vlrs = get_vlrs(las)
@test length(vlrs) == 4
@test vlrs[4] == desc
@test length(vlrs) == 2
@test vlrs[2] == desc
# make sure we've updated the header correctly
header = get_header(las)
@test number_of_vlrs(header) == 4
@test number_of_vlrs(header) == 2
@test point_data_offset(header) == header_size(header) + sum(sizeof.(vlrs))
# now let's replace this description for another one
new_desc = LasVariableLengthRecord(
Expand All @@ -115,9 +117,9 @@
# mark the old one as superseded
set_superseded!(las, desc)
vlrs = get_vlrs(las)
@test length(vlrs) == 5
@test vlrs[5] == new_desc
superseded_desc = vlrs[4]
@test length(vlrs) == 3
@test vlrs[3] == new_desc
superseded_desc = vlrs[2]
@test get_user_id(superseded_desc) == get_user_id(desc)
@test get_record_id(superseded_desc) == LAS.ID_SUPERSEDED
@test get_description(superseded_desc) == get_description(desc)
Expand All @@ -126,9 +128,9 @@
# we can also remove the old one entirely
remove_vlr!(las, superseded_desc)
vlrs = get_vlrs(las)
@test length(vlrs) == 4
@test vlrs[4] == new_desc
@test number_of_vlrs(get_header(las)) == 4
@test length(vlrs) == 2
@test vlrs[2] == new_desc
@test number_of_vlrs(get_header(las)) == 2

# this stuff should also work for EVLRs
struct Comment
Expand All @@ -140,8 +142,8 @@
add_vlr!(las, long_comment)
header = get_header(las)
# vlrs should stay the same
@test length(get_vlrs(las)) == 4
@test number_of_vlrs(header) == 4
@test length(get_vlrs(las)) == 2
@test number_of_vlrs(header) == 2
# should have updated the EVLRs
evlrs = get_evlrs(las)
@test length(evlrs) == 1
Expand Down
14 changes: 8 additions & 6 deletions test/file_io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -497,12 +497,14 @@ end
save_las(file_name, pc)
new_las = load_las(file_name, columnnames(pc))
vlrs = get_vlrs(new_las)
# 5 Extra Bytes VLRs for "thing" and 2 VLRs for "other_thing"
@test length(vlrs) == 6
@test all(map(i -> LAS.name(get_data(vlrs[i])) == "thing [$(i - 1)]", 1:5))
@test LAS.name(get_data(vlrs[6])) == "other_thing"
@test all(LAS.data_type.(get_data.(vlrs[1:5])) .== Float64)
@test LAS.data_type(get_data(vlrs[6])) == Int
# 1 Extra Bytes VLR with 5 entries for "thing" and 1 entry for "other_thing"
@test length(vlrs) == 1
extra_bytes = LAS.get_extra_bytes(get_data(vlrs[1]))
@test length(extra_bytes) == 6
@test all(map(i -> LAS.name(extra_bytes[i]) == "thing [$(i - 1)]", 1:5))
@test LAS.name(extra_bytes[6]) == "other_thing"
@test all(LAS.data_type.(extra_bytes[1:5]) .== Float64)
@test LAS.data_type(extra_bytes[6]) == Int
new_pc = get_pointcloud(new_las)
for col ∈ columnnames(pc)
@test all(isapprox.(getproperty(new_pc, col), getproperty(pc, col); atol = LAS.POINT_SCALE))
Expand Down
Loading