Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make only one extra bytes vlr per dataset #30

Merged
merged 3 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "LAS"
uuid = "cc498e2a-d443-4943-8f26-2a8a0f3c7cdb"
authors = ["BenCurran98 <b.curran@fugro.com>"]
version = "0.1.1"
version = "0.2.0"

[deps]
ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3"
Expand Down
2 changes: 1 addition & 1 deletion src/LAS.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ export SpatialInfo, AxisInfo, Range

export LasVariableLengthRecord, get_user_id, get_record_id, get_description, get_data, is_extended
export GeoKeys, GeoDoubleParamsTag, GeoAsciiParamsTag, OGC_WKT
export ClassificationLookup, TextAreaDescription, ExtraBytes, WaveformPacketDescriptor, WaveformDataPackets
export ClassificationLookup, TextAreaDescription, ExtraBytes, ExtraBytesCollection, WaveformPacketDescriptor, WaveformDataPackets
export get_horizontal_unit, get_vertical_unit, get_wkt_string
export get_classes, get_description, set_description!
export @register_vlr_type, read_vlr_data, extract_vlr_type
Expand Down
2 changes: 1 addition & 1 deletion src/constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ const ID_SUPERSEDED = UInt16(7)
const ID_WAVEFORMPACKETDATA = UInt16(65535)

const DEFAULT_LAS_COLUMNS = (:position, :intensity, :classification, :returnnumber, :numberofreturns, :color, :point_source_id, :gps_time, :overlap)
const ALL_LAS_COLUMNS = SVector{0,Symbol}()
const ALL_LAS_COLUMNS = nothing

POINT_SCALE = 0.0001
global const _VLR_TYPE_MAP = Dict()
Expand Down
65 changes: 37 additions & 28 deletions src/dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,18 @@ mutable struct LasDataset
check_user_type(col_type)

# grab information about the existing ExtraBytes VLRs - need to see if we need to update them or not
extra_bytes_vlrs = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)
extra_bytes_data = get_data.(extra_bytes_vlrs)
extra_bytes_vlr = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)
@assert length(extra_bytes_vlr) ≤ 1 "Found multiple Extra Bytes VLRs in LAS file!"
if isempty(extra_bytes_vlr)
extra_bytes_vlr = LasVariableLengthRecord(LAS_SPEC_USER_ID, ID_EXTRABYTES, "Extra Bytes", ExtraBytesCollection())
# make sure we add the VLR to our collection and update any header info
push!(vlrs, extra_bytes_vlr)
header.n_vlr += 1
header.data_offset += sizeof(extra_bytes_vlr)
else
extra_bytes_vlr = extra_bytes_vlr[1]
end
extra_bytes_data = get_extra_bytes(get_data(extra_bytes_vlr))
user_field_names = Symbol.(name.(extra_bytes_data))
user_field_types = data_type.(extra_bytes_data)

Expand All @@ -95,13 +105,11 @@ mutable struct LasDataset
continue
elseif !isnothing(matches_name_idx)
# if we find one with matching name (not type), we'll need to update the header record length to account for this new type
header.data_record_length -= sizeof(data_type(get_data(vlrs[matches_name_idx])))
header.data_record_length -= sizeof(data_type(extra_bytes_data[matches_name_idx]))
end
# now make a new ExtraBytes VLR and add it to our dataset, updating the header information as we go
extra_bytes_vlr = construct_extra_bytes_vlr(col_name, eltype(type_to_check))
push!(vlrs, extra_bytes_vlr)
header.n_vlr += 1
header.data_offset += sizeof(extra_bytes_vlr)
add_extra_bytes_to_collection!(get_data(extra_bytes_vlr), col_name, eltype(type_to_check))
header.data_offset += sizeof(ExtraBytes)
header.data_record_length += sizeof(type_to_check)
end
end
Expand Down Expand Up @@ -287,15 +295,22 @@ function add_column!(las::LasDataset, column::Symbol, values::AbstractVector{T})
las.header.data_record_length += sizeof(T)
vlrs = get_vlrs(las)
extra_bytes_vlrs = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)

@assert length(extra_bytes_vlrs) ≤ 1 "Found $(length(extra_bytes_vlrs)) Extra Bytes VLRs when we can only have a max of 1"
if isempty(extra_bytes_vlrs)
extra_bytes_vlr = LasVariableLengthRecord(LAS_SPEC_USER_ID, ID_EXTRABYTES, "Extra Bytes Records", ExtraBytesCollection())
# make sure we add it to the dataset to account for offsets in the header etc.
add_vlr!(las, extra_bytes_vlr)
else
extra_bytes_vlr = extra_bytes_vlrs[1]
MeganDawson42 marked this conversation as resolved.
Show resolved Hide resolved
end
if T <: SVector
# user field arrays have to be saved as sequential extra bytes records with names of the form "column [i]" (zero indexing encouraged)
split_col_name = split_column_name(column, length(T))
for i ∈ 1:length(T)
add_extra_bytes!(las, split_col_name[i], eltype(T), extra_bytes_vlrs)
add_extra_bytes!(las, split_col_name[i], eltype(T), extra_bytes_vlr)
end
else
add_extra_bytes!(las, column, T, extra_bytes_vlrs)
add_extra_bytes!(las, column, T, extra_bytes_vlr)
end
nothing
end
Expand Down Expand Up @@ -341,24 +356,18 @@ Add an extra bytes VLR to a LAS dataset to document an extra user-field for poin
* `las` : LAS dataset to add extra bytes to
* `col_name` : Name to save the user field as
* `T` : Data type for the user field (must be a base type as specified in the spec or a static vector of one of these types)
* `extra_bytes_vlr` : Set of existing extra bytes VLRs already present in the LAS dataset
* `extra_bytes_vlr` : An Extra Bytes Collection VLR that already exists in the dataset
"""
function add_extra_bytes!(las::LasDataset, col_name::Symbol, ::Type{T}, extra_bytes_vlrs::Vector{LasVariableLengthRecord}) where T
matching_extra_bytes_vlr = findfirst(Symbol.(name.(get_data.(extra_bytes_vlrs))) .== col_name)
if !isnothing(matching_extra_bytes_vlr)
remove_vlr!(las, extra_bytes_vlrs[matching_extra_bytes_vlr])
function add_extra_bytes!(las::LasDataset, col_name::Symbol, ::Type{T}, extra_bytes_vlr::LasVariableLengthRecord{ExtraBytesCollection}) where T
BenCurran98 marked this conversation as resolved.
Show resolved Hide resolved
extra_bytes = get_extra_bytes(get_data(extra_bytes_vlr))
matching_extra_bytes = findfirst(Symbol.(name.(extra_bytes)) .== col_name)
if !isnothing(matching_extra_bytes)
deleteat!(extra_bytes, matching_extra_bytes)
header = get_header(las)
header.data_offset -= (length(matching_extra_bytes) * sizeof(ExtraBytes))
@assert header.data_offset > 0 "Inconsistent data configuration! Got data offset of $(header.data_offset) after removing Extra Bytes Record"
end
extra_bytes_vlr = construct_extra_bytes_vlr(col_name, T)
add_vlr!(las, extra_bytes_vlr)
end

"""
$(TYPEDSIGNATURES)

Construct an extra bytes VLR with a field name `col_name` and data type `T`
"""
function construct_extra_bytes_vlr(col_name::Symbol, ::Type{T}) where T
@assert length(String(col_name)) ≤ 32 "Custom column name $(col_name) too long! Must be ≤ 32 Bytes, got $(length(String(col_name))) Bytes"
extra_bytes = ExtraBytes(0x00, String(col_name), zero(T), zero(T), zero(T), zero(T), zero(T), "$(col_name)")
LasVariableLengthRecord(LAS_SPEC_USER_ID, ID_EXTRABYTES, String(col_name), extra_bytes)
add_extra_bytes_to_collection!(get_data(extra_bytes_vlr), col_name, T)
header = get_header(las)
header.data_offset += sizeof(ExtraBytes)
end
2 changes: 1 addition & 1 deletion src/header.jl
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ creation_year(h::LasHeader) = h.creation_year

Get the size of a header `h` in bytes
"""
header_size(h::LasHeader) = h.header_size
header_size(h::LasHeader) = Int(h.header_size)

"""
$(TYPEDSIGNATURES)
Expand Down
13 changes: 8 additions & 5 deletions src/read.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Load a LAS dataset from a source file

# Arguments
* `file_name` : Name of the LAS file to extract data from
* `fields` : Name of the LAS point fields to extract as columns in the output data. Default `DEFAULT_LAS_COLUMNS`
* `fields` : Name of the LAS point fields to extract as columns in the output data. If set to `nothing`, ingest all available columns. Default `DEFAULT_LAS_COLUMNS`
"""
function load_las(file_name::AbstractString,
fields::TFields = DEFAULT_LAS_COLUMNS;
Expand All @@ -25,7 +25,7 @@ end

Ingest LAS point data in a tabular format
"""
function load_pointcloud(file_name::AbstractString, fields::AbstractVector{Symbol} = collect(DEFAULT_LAS_COLUMNS); kwargs...)
function load_pointcloud(file_name::AbstractString, fields::Union{Nothing, AbstractVector{Symbol}} = collect(DEFAULT_LAS_COLUMNS); kwargs...)
las = load_las(file_name, fields; kwargs...)
return get_pointcloud(las)
end
Expand Down Expand Up @@ -106,7 +106,9 @@ function read_las_data(io::TIO, required_columns::TTuple=DEFAULT_LAS_COLUMNS;
pos = header.header_size + vlr_length
user_defined_bytes = read(io, header.data_offset - pos)

extra_bytes = Vector{ExtraBytes}(map(vlr -> get_data(vlr), extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)))
extra_bytes_vlr = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)
@assert length(extra_bytes_vlr) ≤ 1 "Found multiple extra bytes columns!"
extra_bytes = isempty(extra_bytes_vlr) ? ExtraBytes[] : get_extra_bytes(get_data(extra_bytes_vlr[1]))

this_format = record_format(header, extra_bytes)
xyz = spatial_info(header)
Expand Down Expand Up @@ -194,11 +196,12 @@ Helper function that finds the names of user-defined point fields that have been
Note according to spec that user-defined array field names must be of the form `col [0], col[1], ..., col[N]` where `N` is the dimension of the user field
"""
function get_user_fields_for_table(records::Vector{TRecord}, Names::Tuple, required_columns::TTuple) where {TRecord <: Union{ExtendedPointRecord, FullRecord}, TTuple}
user_fields = filter(field -> get_base_field_name(field) ∈ required_columns, Names)
get_all_fields = isnothing(required_columns)
user_fields = filter(field -> get_all_fields || get_base_field_name(field) ∈ required_columns, Names)
raw_user_data = Dict{Symbol, Vector}(field => getproperty.(getproperty.(records, :user_fields), field) for field ∈ user_fields)
user_field_map = get_user_field_map(user_fields)
grouped_field_names = collect(keys(user_field_map))
user_fields = filter(field -> field ∈ required_columns, grouped_field_names)
user_fields = filter(field -> get_all_fields || field ∈ required_columns, grouped_field_names)
grouped_user_fields = group_user_fields(raw_user_data, user_field_map)
return user_fields, grouped_user_fields
end
Expand Down
64 changes: 62 additions & 2 deletions src/registered_vlrs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,6 @@ end
Base.sizeof(::Type{ExtraBytes}) = 192
Base.sizeof(::ExtraBytes) = Base.sizeof(ExtraBytes)

@register_vlr_type ExtraBytes LAS_SPEC_USER_ID ID_EXTRABYTES

# we can rely on this indexing safely since we're restricted to TData being in SUPPORTED_EXTRA_BYTES_TYPES
data_code_from_type(::Type{TData}) where TData = (TData == Missing ? 0x00 : UInt8(indexin([TData], SUPPORTED_EXTRA_BYTES_TYPES)[1]))
data_code_from_type(::ExtraBytes{TData}) where TData = data_code_from_type(TData)
Expand Down Expand Up @@ -214,6 +212,68 @@ function Base.write(io::IO, extra_bytes::ExtraBytes{TData}) where TData
writestring(io, extra_bytes.description, 32)
end

"""
$(TYPEDEF)

A collection of Extra Bytes records that gets packed into a *VLR*

$(TYPEDFIELDS)
---
$(METHODLIST)
"""
struct ExtraBytesCollection
"""Collection of Extra Bytes Records, each documenting one user field in the dataset"""
extra_bytes::Vector{ExtraBytes}

function ExtraBytesCollection(extra_bytes::AbstractVector{T}) where {T <: ExtraBytes}
new(extra_bytes)
end
end

ExtraBytesCollection() = ExtraBytesCollection(ExtraBytes[])

"""
$(TYPEDSIGNATURES)

Helper function that gets the set of Extra Bytes records from an Extra Bytes `collection`
"""
get_extra_bytes(collection::ExtraBytesCollection) = collection.extra_bytes

@register_vlr_type ExtraBytesCollection LAS_SPEC_USER_ID ID_EXTRABYTES

Base.sizeof(collection::ExtraBytesCollection) = length(collection.extra_bytes) * Base.sizeof(ExtraBytes)

# equal if all their records are equal
function Base.:(==)(c1::ExtraBytesCollection, c2::ExtraBytesCollection)
extra_bytes1 = get_extra_bytes(c1)
extra_bytes2 = get_extra_bytes(c2)
return (length(extra_bytes1) == length(extra_bytes2)) && all(extra_bytes1 .== extra_bytes2)
end

function read_vlr_data(io::IO, ::Type{ExtraBytesCollection}, nb::Integer)
@assert nb % sizeof(ExtraBytes) == 0 "Number of bytes $(nb) is not a multiple of Extra Bytes record size $(sizeof(ExtraBytes))"
num_extra_bytes_records = Int(nb/sizeof(ExtraBytes))
extra_bytes = map(_ -> read(io, ExtraBytes), 1:num_extra_bytes_records)
return ExtraBytesCollection(extra_bytes)
end

function Base.write(io::IO, collection::ExtraBytesCollection)
for e ∈ collection.extra_bytes
write(io, e)
end
end

"""
$(TYPEDSIGNATURES)

Construct an extra bytes VLR with a field name `col_name` and data type `T`
"""
function add_extra_bytes_to_collection!(collection::ExtraBytesCollection, col_name::Symbol, ::Type{T}) where T
@assert length(String(col_name)) ≤ 32 "Custom column name $(col_name) too long! Must be ≤ 32 Bytes, got $(length(String(col_name))) Bytes"
extra_bytes = ExtraBytes(0x00, String(col_name), zero(T), zero(T), zero(T), zero(T), zero(T), "$(col_name)")
push!(get_extra_bytes(collection), extra_bytes)
end

"""
$(TYPEDEF)

Expand Down
42 changes: 16 additions & 26 deletions src/write.jl
Original file line number Diff line number Diff line change
Expand Up @@ -97,25 +97,6 @@ function write_las(io::IO, las::LasDataset, compressed::Bool = false)

user_fields = ismissing(las._user_data) ? () : filter(c -> c != :undocumented_bytes, columnnames(las._user_data))

# LASzip doesn't support extra bytes :(
if compressed && !isempty(user_fields)
# need to make copies here so we don't permanently modify the LAS dataset
header = deepcopy(get_header(las))
vlrs = deepcopy(get_vlrs(las))
@warn "Can't compress custom user fields into LAZ! Ignoring user fields and extra bytes VLRs..."
user_fields = ()
# find indices of existing extra bytes VLRs
extra_bytes_idxs = findall(vlr -> (get_user_id(vlr) == LAS_SPEC_USER_ID) && (get_record_id(vlr) == ID_EXTRABYTES), vlrs)
# need to adjust the data record length in the header to remove these extra bytes
for i ∈ extra_bytes_idxs
header.data_record_length -= sizeof(data_type(get_data(vlrs[i])))
end
# make sure we remove the extra bytes vlrs and adjust the header info
header.n_vlr -= length(extra_bytes_idxs)
header.data_offset -= sum(sizeof.(vlrs[extra_bytes_idxs]))
deleteat!(vlrs, extra_bytes_idxs)
end

write(io, header)

for vlr ∈ vlrs
Expand All @@ -128,7 +109,7 @@ function write_las(io::IO, las::LasDataset, compressed::Bool = false)

# packing points into a StructVector makes operations where you have to access per-point fields many times like in get_record_bytes below faster
las_records = StructVector(las_record.(this_point_format, pc, Ref(xyz), undoc_bytes, Ref(user_fields)); unwrap = t -> (t <: LasPoint) || (t <: UserFields))
byte_vector = get_record_bytes(las_records)
byte_vector = get_record_bytes(las_records, vlrs)
write(io, byte_vector)

for evlr ∈ get_evlrs(las)
Expand All @@ -143,7 +124,7 @@ end

Construct an array of bytes that correctly encodes the information stored in a set of LAS `records` according to the spec
"""
function get_record_bytes(records::StructVector{TRecord}) where {TRecord <: LasRecord}
function get_record_bytes(records::StructVector{TRecord}, vlrs::Vector{LasVariableLengthRecord}) where {TRecord <: LasRecord}
point_format = get_point_format(TRecord)
point_fields = collect(fieldnames(point_format))
bytes_per_point_field = sizeof.(fieldtypes(point_format))
Expand All @@ -168,16 +149,25 @@ function get_record_bytes(records::StructVector{TRecord}) where {TRecord <: LasR
end

if user_field_bytes > 0
# need to write the extra bytes fields in the same order as they appear in the VLR
extra_bytes_vlrs = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)
@assert length(extra_bytes_vlrs) == 1 "Expected to find 1 Extra Bytes VLR, instead found $(length(extra_bytes_vlrs))"
# get the order they appear in the VLR
user_field_names = unique(get_base_field_name.(Symbol.(name.(get_extra_bytes(get_data(extra_bytes_vlrs[1]))))))
# create a mapping between the order in the VLR and the order in the record
per_record_user_field_names = get_user_field_names(TRecord)
user_field_idxs = indexin(user_field_names, collect(per_record_user_field_names))
user_field_types = get_user_field_types(TRecord)
bytes_per_user_field = sizeof.(user_field_types)
for (i, user_field) ∈ enumerate(get_user_field_names(TRecord))
for (i, user_field) ∈ enumerate(user_field_names)
field_byte_vec = reinterpret(UInt8, getproperty(lazy.user_fields, user_field))
if bytes_per_user_field[i] ∉ keys(field_idxs)
field_idxs[bytes_per_user_field[i]] = reduce(vcat, map(j -> (0:bytes_per_user_field[i] - 1) .+ j, 1:bytes_per_record:total_num_bytes))
idx = user_field_idxs[i]
if bytes_per_user_field[idx] ∉ keys(field_idxs)
field_idxs[bytes_per_user_field[idx]] = reduce(vcat, map(j -> (0:bytes_per_user_field[idx] - 1) .+ j, 1:bytes_per_record:total_num_bytes))
end
this_field_idxs = field_idxs[bytes_per_user_field[i]] .+ byte_offset
this_field_idxs = field_idxs[bytes_per_user_field[idx]] .+ byte_offset
view(whole_byte_vec, this_field_idxs) .= field_byte_vec
byte_offset += bytes_per_user_field[i]
byte_offset += bytes_per_user_field[idx]
end
end

Expand Down
Loading
Loading