fugro-oss · BenCurran98 · Jun 13, 2024 · Jun 12, 2024 · Jun 12, 2024 · Jun 12, 2024
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "LAS"
 uuid = "cc498e2a-d443-4943-8f26-2a8a0f3c7cdb"
 authors = ["BenCurran98 <b.curran@fugro.com>"]
-version = "0.1.1"
+version = "0.2.0"
 
 [deps]
 ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3"

diff --git a/src/LAS.jl b/src/LAS.jl
@@ -48,7 +48,7 @@ export SpatialInfo, AxisInfo, Range
 
 export LasVariableLengthRecord, get_user_id, get_record_id, get_description, get_data, is_extended
 export GeoKeys, GeoDoubleParamsTag, GeoAsciiParamsTag, OGC_WKT 
-export ClassificationLookup, TextAreaDescription, ExtraBytes, WaveformPacketDescriptor, WaveformDataPackets
+export ClassificationLookup, TextAreaDescription, ExtraBytes, ExtraBytesCollection, WaveformPacketDescriptor, WaveformDataPackets
 export get_horizontal_unit, get_vertical_unit, get_wkt_string
 export get_classes, get_description, set_description!
 export @register_vlr_type, read_vlr_data, extract_vlr_type

diff --git a/src/constants.jl b/src/constants.jl
@@ -31,7 +31,7 @@ const ID_SUPERSEDED = UInt16(7)
 const ID_WAVEFORMPACKETDATA = UInt16(65535)
 
 const DEFAULT_LAS_COLUMNS = (:position, :intensity, :classification, :returnnumber, :numberofreturns, :color, :point_source_id, :gps_time, :overlap)
-const ALL_LAS_COLUMNS = SVector{0,Symbol}()
+const ALL_LAS_COLUMNS = nothing
 
 POINT_SCALE = 0.0001
 global const _VLR_TYPE_MAP = Dict()

diff --git a/src/dataset.jl b/src/dataset.jl
@@ -74,8 +74,18 @@ mutable struct LasDataset
                 check_user_type(col_type)
 
                 # grab information about the existing ExtraBytes VLRs - need to see if we need to update them or not
-                extra_bytes_vlrs = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)
-                extra_bytes_data = get_data.(extra_bytes_vlrs)
+                extra_bytes_vlr = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)
+                @assert length(extra_bytes_vlr) ≤ 1 "Found multiple Extra Bytes VLRs in LAS file!"
+                if isempty(extra_bytes_vlr)
+                    extra_bytes_vlr = LasVariableLengthRecord(LAS_SPEC_USER_ID, ID_EXTRABYTES, "Extra Bytes", ExtraBytesCollection())
+                    # make sure we add the VLR to our collection and update any header info
+                    push!(vlrs, extra_bytes_vlr)
+                    header.n_vlr += 1
+                    header.data_offset += sizeof(extra_bytes_vlr)
+                else
+                    extra_bytes_vlr = extra_bytes_vlr[1]
+                end
+                extra_bytes_data = get_extra_bytes(get_data(extra_bytes_vlr))
                 user_field_names = Symbol.(name.(extra_bytes_data))
                 user_field_types = data_type.(extra_bytes_data)
 
@@ -95,13 +105,11 @@ mutable struct LasDataset
                         continue
                     elseif !isnothing(matches_name_idx)
                         # if we find one with matching name (not type), we'll need to update the header record length to account for this new type
-                        header.data_record_length -= sizeof(data_type(get_data(vlrs[matches_name_idx])))
+                        header.data_record_length -= sizeof(data_type(extra_bytes_data[matches_name_idx]))
                     end
                     # now make a new ExtraBytes VLR and add it to our dataset, updating the header information as we go
-                    extra_bytes_vlr = construct_extra_bytes_vlr(col_name, eltype(type_to_check))
-                    push!(vlrs, extra_bytes_vlr)
-                    header.n_vlr += 1
-                    header.data_offset += sizeof(extra_bytes_vlr)
+                    add_extra_bytes_to_collection!(get_data(extra_bytes_vlr), col_name, eltype(type_to_check))
+                    header.data_offset += sizeof(ExtraBytes)
                     header.data_record_length += sizeof(type_to_check)
                 end
             end
@@ -287,15 +295,22 @@ function add_column!(las::LasDataset, column::Symbol, values::AbstractVector{T})
     las.header.data_record_length += sizeof(T)
     vlrs = get_vlrs(las)
     extra_bytes_vlrs = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)
-
+    @assert length(extra_bytes_vlrs) ≤ 1 "Found $(length(extra_bytes_vlrs)) Extra Bytes VLRs when we can only have a max of 1"
+    if isempty(extra_bytes_vlrs)
+        extra_bytes_vlr = LasVariableLengthRecord(LAS_SPEC_USER_ID, ID_EXTRABYTES, "Extra Bytes Records", ExtraBytesCollection())
+        # make sure we add it to the dataset to account for offsets in the header etc.
+        add_vlr!(las, extra_bytes_vlr)
+    else
+        extra_bytes_vlr = extra_bytes_vlrs[1]
+    end
     if T <: SVector
         # user field arrays have to be saved as sequential extra bytes records with names of the form "column [i]" (zero indexing encouraged)
         split_col_name = split_column_name(column, length(T))
         for i ∈ 1:length(T)
-            add_extra_bytes!(las, split_col_name[i], eltype(T), extra_bytes_vlrs)
+            add_extra_bytes!(las, split_col_name[i], eltype(T), extra_bytes_vlr)
         end
     else
-        add_extra_bytes!(las, column, T, extra_bytes_vlrs)
+        add_extra_bytes!(las, column, T, extra_bytes_vlr)
     end
     nothing
 end
@@ -341,24 +356,18 @@ Add an extra bytes VLR to a LAS dataset to document an extra user-field for poin
 * `las` : LAS dataset to add extra bytes to
 * `col_name` : Name to save the user field as
 * `T` : Data type for the user field (must be a base type as specified in the spec or a static vector of one of these types)
-* `extra_bytes_vlr` : Set of existing extra bytes VLRs already present in the LAS dataset
+* `extra_bytes_vlr` : An Extra Bytes Collection VLR that already exists in the dataset
 """
-function add_extra_bytes!(las::LasDataset, col_name::Symbol, ::Type{T}, extra_bytes_vlrs::Vector{LasVariableLengthRecord}) where T
-    matching_extra_bytes_vlr = findfirst(Symbol.(name.(get_data.(extra_bytes_vlrs))) .== col_name)
-    if !isnothing(matching_extra_bytes_vlr)
-        remove_vlr!(las, extra_bytes_vlrs[matching_extra_bytes_vlr])
+function add_extra_bytes!(las::LasDataset, col_name::Symbol, ::Type{T}, extra_bytes_vlr::LasVariableLengthRecord{ExtraBytesCollection}) where T
+    extra_bytes = get_extra_bytes(get_data(extra_bytes_vlr))
+    matching_extra_bytes = findfirst(Symbol.(name.(extra_bytes)) .== col_name)
+    if !isnothing(matching_extra_bytes)
+        deleteat!(extra_bytes, matching_extra_bytes)
+        header = get_header(las)
+        header.data_offset -= (length(matching_extra_bytes) * sizeof(ExtraBytes))
+        @assert header.data_offset > 0 "Inconsistent data configuration! Got data offset of $(header.data_offset) after removing Extra Bytes Record"
     end
-    extra_bytes_vlr = construct_extra_bytes_vlr(col_name, T)
-    add_vlr!(las, extra_bytes_vlr)
-end
-
-"""
-    $(TYPEDSIGNATURES)
-
-Construct an extra bytes VLR with a field name `col_name` and data type `T`
-"""
-function construct_extra_bytes_vlr(col_name::Symbol, ::Type{T}) where T
-    @assert length(String(col_name)) ≤ 32 "Custom column name $(col_name) too long! Must be ≤ 32 Bytes, got $(length(String(col_name))) Bytes"
-    extra_bytes = ExtraBytes(0x00, String(col_name), zero(T), zero(T), zero(T), zero(T), zero(T), "$(col_name)")
-    LasVariableLengthRecord(LAS_SPEC_USER_ID, ID_EXTRABYTES, String(col_name), extra_bytes)
+    add_extra_bytes_to_collection!(get_data(extra_bytes_vlr), col_name, T)
+    header = get_header(las)
+    header.data_offset += sizeof(ExtraBytes)
 end
diff --git a/src/header.jl b/src/header.jl
@@ -413,7 +413,7 @@ creation_year(h::LasHeader) = h.creation_year
 
 Get the size of a header `h` in bytes
 """
-header_size(h::LasHeader) = h.header_size
+header_size(h::LasHeader) = Int(h.header_size)
 
 """
     $(TYPEDSIGNATURES)

diff --git a/src/read.jl b/src/read.jl
@@ -5,7 +5,7 @@ Load a LAS dataset from a source file
 
 # Arguments
 * `file_name` : Name of the LAS file to extract data from
-* `fields` : Name of the LAS point fields to extract as columns in the output data. Default `DEFAULT_LAS_COLUMNS`
+* `fields` : Name of the LAS point fields to extract as columns in the output data. If set to `nothing`, ingest all available columns. Default `DEFAULT_LAS_COLUMNS`
 """
 function load_las(file_name::AbstractString, 
                     fields::TFields = DEFAULT_LAS_COLUMNS;
@@ -25,7 +25,7 @@ end
 
 Ingest LAS point data in a tabular format
 """
-function load_pointcloud(file_name::AbstractString, fields::AbstractVector{Symbol} = collect(DEFAULT_LAS_COLUMNS); kwargs...)
+function load_pointcloud(file_name::AbstractString, fields::Union{Nothing, AbstractVector{Symbol}} = collect(DEFAULT_LAS_COLUMNS); kwargs...)
     las = load_las(file_name, fields; kwargs...)
     return get_pointcloud(las)
 end
@@ -106,7 +106,9 @@ function read_las_data(io::TIO, required_columns::TTuple=DEFAULT_LAS_COLUMNS;
     pos = header.header_size + vlr_length
     user_defined_bytes = read(io, header.data_offset - pos)
 
-    extra_bytes = Vector{ExtraBytes}(map(vlr -> get_data(vlr), extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)))
+    extra_bytes_vlr = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)
+    @assert length(extra_bytes_vlr) ≤ 1 "Found multiple extra bytes columns!"
+    extra_bytes = isempty(extra_bytes_vlr) ? ExtraBytes[] : get_extra_bytes(get_data(extra_bytes_vlr[1]))
 
     this_format = record_format(header, extra_bytes)
     xyz = spatial_info(header)
@@ -194,11 +196,12 @@ Helper function that finds the names of user-defined point fields that have been
 Note according to spec that user-defined array field names must be of the form `col [0], col[1], ..., col[N]` where `N` is the dimension of the user field
 """
 function get_user_fields_for_table(records::Vector{TRecord}, Names::Tuple, required_columns::TTuple) where {TRecord <: Union{ExtendedPointRecord, FullRecord}, TTuple}
-    user_fields = filter(field -> get_base_field_name(field) ∈ required_columns, Names)
+    get_all_fields = isnothing(required_columns)
+    user_fields = filter(field -> get_all_fields || get_base_field_name(field) ∈ required_columns, Names)
     raw_user_data = Dict{Symbol, Vector}(field => getproperty.(getproperty.(records, :user_fields), field) for field ∈ user_fields)
     user_field_map = get_user_field_map(user_fields)
     grouped_field_names = collect(keys(user_field_map))
-    user_fields = filter(field -> field ∈ required_columns, grouped_field_names)
+    user_fields = filter(field -> get_all_fields || field ∈ required_columns, grouped_field_names)
     grouped_user_fields = group_user_fields(raw_user_data, user_field_map)
     return user_fields, grouped_user_fields
 end

diff --git a/src/registered_vlrs.jl b/src/registered_vlrs.jl
@@ -117,8 +117,6 @@ end
 Base.sizeof(::Type{ExtraBytes}) = 192
 Base.sizeof(::ExtraBytes) = Base.sizeof(ExtraBytes)
 
-@register_vlr_type ExtraBytes LAS_SPEC_USER_ID ID_EXTRABYTES
-
 # we can rely on this indexing safely since we're restricted to TData being in SUPPORTED_EXTRA_BYTES_TYPES
 data_code_from_type(::Type{TData}) where TData = (TData == Missing ? 0x00 : UInt8(indexin([TData], SUPPORTED_EXTRA_BYTES_TYPES)[1]))
 data_code_from_type(::ExtraBytes{TData}) where TData = data_code_from_type(TData)
@@ -214,6 +212,68 @@ function Base.write(io::IO, extra_bytes::ExtraBytes{TData}) where TData
     writestring(io, extra_bytes.description, 32)
 end
 
+"""
+    $(TYPEDEF)
+
+A collection of Extra Bytes records that gets packed into a *VLR*
+
+$(TYPEDFIELDS)
+---
+$(METHODLIST)
+"""
+struct ExtraBytesCollection
+    """Collection of Extra Bytes Records, each documenting one user field in the dataset"""
+    extra_bytes::Vector{ExtraBytes}
+
+    function ExtraBytesCollection(extra_bytes::AbstractVector{T}) where {T <: ExtraBytes}
+        new(extra_bytes)
+    end
+end
+
+ExtraBytesCollection() = ExtraBytesCollection(ExtraBytes[])
+
+"""
+    $(TYPEDSIGNATURES)
+
+Helper function that gets the set of Extra Bytes records from an Extra Bytes `collection`
+"""
+get_extra_bytes(collection::ExtraBytesCollection) = collection.extra_bytes
+
+@register_vlr_type ExtraBytesCollection LAS_SPEC_USER_ID ID_EXTRABYTES
+
+Base.sizeof(collection::ExtraBytesCollection) = length(collection.extra_bytes) * Base.sizeof(ExtraBytes)
+
+# equal if all their records are equal
+function Base.:(==)(c1::ExtraBytesCollection, c2::ExtraBytesCollection)
+    extra_bytes1 = get_extra_bytes(c1)
+    extra_bytes2 = get_extra_bytes(c2)
+    return (length(extra_bytes1) == length(extra_bytes2)) && all(extra_bytes1 .== extra_bytes2)
+end
+
+function read_vlr_data(io::IO, ::Type{ExtraBytesCollection}, nb::Integer)
+    @assert nb % sizeof(ExtraBytes) == 0 "Number of bytes $(nb) is not a multiple of Extra Bytes record size $(sizeof(ExtraBytes))"
+    num_extra_bytes_records = Int(nb/sizeof(ExtraBytes))
+    extra_bytes = map(_ -> read(io, ExtraBytes), 1:num_extra_bytes_records)
+    return ExtraBytesCollection(extra_bytes)
+end
+
+function Base.write(io::IO, collection::ExtraBytesCollection)
+    for e ∈ collection.extra_bytes
+        write(io, e)
+    end
+end
+
+"""
+    $(TYPEDSIGNATURES)
+
+Construct an extra bytes VLR with a field name `col_name` and data type `T`
+"""
+function add_extra_bytes_to_collection!(collection::ExtraBytesCollection, col_name::Symbol, ::Type{T}) where T
+    @assert length(String(col_name)) ≤ 32 "Custom column name $(col_name) too long! Must be ≤ 32 Bytes, got $(length(String(col_name))) Bytes"
+    extra_bytes = ExtraBytes(0x00, String(col_name), zero(T), zero(T), zero(T), zero(T), zero(T), "$(col_name)")
+    push!(get_extra_bytes(collection), extra_bytes)
+end
+
 """
     $(TYPEDEF)
 

diff --git a/src/write.jl b/src/write.jl
@@ -97,25 +97,6 @@ function write_las(io::IO, las::LasDataset, compressed::Bool = false)
 
     user_fields = ismissing(las._user_data) ? () : filter(c -> c != :undocumented_bytes, columnnames(las._user_data))
 
-    # LASzip doesn't support extra bytes :(
-    if compressed && !isempty(user_fields)
-        # need to make copies here so we don't permanently modify the LAS dataset
-        header = deepcopy(get_header(las))
-        vlrs = deepcopy(get_vlrs(las))
-        @warn "Can't compress custom user fields into LAZ! Ignoring user fields and extra bytes VLRs..."
-        user_fields = ()
-        # find indices of existing extra bytes VLRs
-        extra_bytes_idxs = findall(vlr -> (get_user_id(vlr) == LAS_SPEC_USER_ID) && (get_record_id(vlr) == ID_EXTRABYTES), vlrs)
-        # need to adjust the data record length in the header to remove these extra bytes
-        for i ∈ extra_bytes_idxs
-            header.data_record_length -= sizeof(data_type(get_data(vlrs[i])))
-        end
-        # make sure we remove the extra bytes vlrs and adjust the header info
-        header.n_vlr -= length(extra_bytes_idxs)
-        header.data_offset -= sum(sizeof.(vlrs[extra_bytes_idxs]))
-        deleteat!(vlrs, extra_bytes_idxs)
-    end
-
     write(io, header)
 
     for vlr ∈ vlrs
@@ -128,7 +109,7 @@ function write_las(io::IO, las::LasDataset, compressed::Bool = false)
 
     # packing points into a StructVector makes operations where you have to access per-point fields many times like in get_record_bytes below faster
     las_records = StructVector(las_record.(this_point_format, pc, Ref(xyz), undoc_bytes, Ref(user_fields)); unwrap = t -> (t <: LasPoint) || (t <: UserFields))
-    byte_vector = get_record_bytes(las_records)
+    byte_vector = get_record_bytes(las_records, vlrs)
     write(io, byte_vector)
 
     for evlr ∈ get_evlrs(las)
@@ -143,7 +124,7 @@ end
 
 Construct an array of bytes that correctly encodes the information stored in a set of LAS `records` according to the spec
 """
-function get_record_bytes(records::StructVector{TRecord}) where {TRecord <: LasRecord}
+function get_record_bytes(records::StructVector{TRecord}, vlrs::Vector{LasVariableLengthRecord}) where {TRecord <: LasRecord}
     point_format = get_point_format(TRecord)
     point_fields = collect(fieldnames(point_format))
     bytes_per_point_field = sizeof.(fieldtypes(point_format))
@@ -168,16 +149,25 @@ function get_record_bytes(records::StructVector{TRecord}) where {TRecord <: LasR
     end
 
     if user_field_bytes > 0
+        # need to write the extra bytes fields in the same order as they appear in the VLR
+        extra_bytes_vlrs = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)
+        @assert length(extra_bytes_vlrs) == 1 "Expected to find 1 Extra Bytes VLR, instead found $(length(extra_bytes_vlrs))"
+        # get the order they appear in the VLR
+        user_field_names = unique(get_base_field_name.(Symbol.(name.(get_extra_bytes(get_data(extra_bytes_vlrs[1]))))))
+        # create a mapping between the order in the VLR and the order in the record
+        per_record_user_field_names = get_user_field_names(TRecord)
+        user_field_idxs = indexin(user_field_names, collect(per_record_user_field_names))
         user_field_types = get_user_field_types(TRecord)
         bytes_per_user_field = sizeof.(user_field_types)
-        for (i, user_field) ∈ enumerate(get_user_field_names(TRecord))
+        for (i, user_field) ∈ enumerate(user_field_names)
             field_byte_vec = reinterpret(UInt8, getproperty(lazy.user_fields, user_field))
-            if bytes_per_user_field[i] ∉ keys(field_idxs)
-                field_idxs[bytes_per_user_field[i]] = reduce(vcat, map(j -> (0:bytes_per_user_field[i] - 1) .+ j, 1:bytes_per_record:total_num_bytes))
+            idx = user_field_idxs[i]
+            if bytes_per_user_field[idx] ∉ keys(field_idxs)
+                field_idxs[bytes_per_user_field[idx]] = reduce(vcat, map(j -> (0:bytes_per_user_field[idx] - 1) .+ j, 1:bytes_per_record:total_num_bytes))
             end
-            this_field_idxs = field_idxs[bytes_per_user_field[i]] .+ byte_offset
+            this_field_idxs = field_idxs[bytes_per_user_field[idx]] .+ byte_offset
             view(whole_byte_vec, this_field_idxs) .= field_byte_vec
-            byte_offset += bytes_per_user_field[i]
+            byte_offset += bytes_per_user_field[idx]
         end
     end