fugro-oss · BenCurran98 · Jun 13, 2024 · Jun 12, 2024 · Jun 12, 2024 · Jun 12, 2024
diff --git a/src/LAS.jl b/src/LAS.jl
@@ -48,7 +48,7 @@ export SpatialInfo, AxisInfo, Range
 
 export LasVariableLengthRecord, get_user_id, get_record_id, get_description, get_data, is_extended
 export GeoKeys, GeoDoubleParamsTag, GeoAsciiParamsTag, OGC_WKT 
-export ClassificationLookup, TextAreaDescription, ExtraBytes, WaveformPacketDescriptor, WaveformDataPackets
+export ClassificationLookup, TextAreaDescription, ExtraBytes, ExtraBytesCollection, WaveformPacketDescriptor, WaveformDataPackets
 export get_horizontal_unit, get_vertical_unit, get_wkt_string
 export get_classes, get_description, set_description!
 export @register_vlr_type, read_vlr_data, extract_vlr_type

diff --git a/src/constants.jl b/src/constants.jl
@@ -31,7 +31,7 @@ const ID_SUPERSEDED = UInt16(7)
 const ID_WAVEFORMPACKETDATA = UInt16(65535)
 
 const DEFAULT_LAS_COLUMNS = (:position, :intensity, :classification, :returnnumber, :numberofreturns, :color, :point_source_id, :gps_time, :overlap)
-const ALL_LAS_COLUMNS = SVector{0,Symbol}()
+const ALL_LAS_COLUMNS = nothing
 
 POINT_SCALE = 0.0001
 global const _VLR_TYPE_MAP = Dict()

diff --git a/src/dataset.jl b/src/dataset.jl
@@ -74,8 +74,18 @@ mutable struct LasDataset
                 check_user_type(col_type)
 
                 # grab information about the existing ExtraBytes VLRs - need to see if we need to update them or not
-                extra_bytes_vlrs = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)
-                extra_bytes_data = get_data.(extra_bytes_vlrs)
+                extra_bytes_vlr = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)
+                @assert length(extra_bytes_vlr) ≤ 1 "Found multiple Extra Bytes VLRs in LAS file!"
+                if isempty(extra_bytes_vlr)
+                    extra_bytes_vlr = LasVariableLengthRecord(LAS_SPEC_USER_ID, ID_EXTRABYTES, "Extra Bytes", ExtraBytesCollection())
+                    # make sure we add the VLR to our collection and update any header info
+                    push!(vlrs, extra_bytes_vlr)
+                    header.n_vlr += 1
+                    header.data_offset += sizeof(extra_bytes_vlr)
+                else
+                    extra_bytes_vlr = extra_bytes_vlr[1]
+                end
+                extra_bytes_data = get_extra_bytes(get_data(extra_bytes_vlr))
                 user_field_names = Symbol.(name.(extra_bytes_data))
                 user_field_types = data_type.(extra_bytes_data)
 
@@ -95,13 +105,11 @@ mutable struct LasDataset
                         continue
                     elseif !isnothing(matches_name_idx)
                         # if we find one with matching name (not type), we'll need to update the header record length to account for this new type
-                        header.data_record_length -= sizeof(data_type(get_data(vlrs[matches_name_idx])))
+                        header.data_record_length -= sizeof(data_type(extra_bytes_data[matches_name_idx]))
                     end
                     # now make a new ExtraBytes VLR and add it to our dataset, updating the header information as we go
-                    extra_bytes_vlr = construct_extra_bytes_vlr(col_name, eltype(type_to_check))
-                    push!(vlrs, extra_bytes_vlr)
-                    header.n_vlr += 1
-                    header.data_offset += sizeof(extra_bytes_vlr)
+                    add_extra_bytes_to_collection!(get_data(extra_bytes_vlr), col_name, eltype(type_to_check))
+                    header.data_offset += sizeof(ExtraBytes)
                     header.data_record_length += sizeof(type_to_check)
                 end
             end
@@ -287,15 +295,22 @@ function add_column!(las::LasDataset, column::Symbol, values::AbstractVector{T})
     las.header.data_record_length += sizeof(T)
     vlrs = get_vlrs(las)
     extra_bytes_vlrs = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)
-
+    @assert length(extra_bytes_vlrs) ≤ 1 "Found $(length(extra_bytes_vlrs)) Extra Bytes VLRs when we can only have a max of 1"
+    if isempty(extra_bytes_vlrs)
+        extra_bytes_vlr = LasVariableLengthRecord(LAS_SPEC_USER_ID, ID_EXTRABYTES, "Extra Bytes Records", ExtraBytesCollection())
+        # make sure we add it to the dataset to account for offsets in the header etc.
+        add_vlr!(las, extra_bytes_vlr)
+    else
+        extra_bytes_vlr = extra_bytes_vlrs[1]
+    end
     if T <: SVector
         # user field arrays have to be saved as sequential extra bytes records with names of the form "column [i]" (zero indexing encouraged)
         split_col_name = split_column_name(column, length(T))
         for i ∈ 1:length(T)
-            add_extra_bytes!(las, split_col_name[i], eltype(T), extra_bytes_vlrs)
+            add_extra_bytes!(las, split_col_name[i], eltype(T), extra_bytes_vlr)
         end
     else
-        add_extra_bytes!(las, column, T, extra_bytes_vlrs)
+        add_extra_bytes!(las, column, T, extra_bytes_vlr)
     end
     nothing
 end
@@ -341,24 +356,18 @@ Add an extra bytes VLR to a LAS dataset to document an extra user-field for poin
 * `las` : LAS dataset to add extra bytes to
 * `col_name` : Name to save the user field as
 * `T` : Data type for the user field (must be a base type as specified in the spec or a static vector of one of these types)
-* `extra_bytes_vlr` : Set of existing extra bytes VLRs already present in the LAS dataset
+* `extra_bytes_vlr` : An Extra Bytes Collection VLR that already exists in the dataset
 """
-function add_extra_bytes!(las::LasDataset, col_name::Symbol, ::Type{T}, extra_bytes_vlrs::Vector{LasVariableLengthRecord}) where T
-    matching_extra_bytes_vlr = findfirst(Symbol.(name.(get_data.(extra_bytes_vlrs))) .== col_name)
-    if !isnothing(matching_extra_bytes_vlr)
-        remove_vlr!(las, extra_bytes_vlrs[matching_extra_bytes_vlr])
+function add_extra_bytes!(las::LasDataset, col_name::Symbol, ::Type{T}, extra_bytes_vlr::LasVariableLengthRecord{ExtraBytesCollection}) where T
+    extra_bytes = get_extra_bytes(get_data(extra_bytes_vlr))
+    matching_extra_bytes = findfirst(Symbol.(name.(extra_bytes)) .== col_name)
+    if !isnothing(matching_extra_bytes)
+        deleteat!(extra_bytes, matching_extra_bytes)
+        header = get_header(las)
+        header.data_offset -= (length(matching_extra_bytes) * sizeof(ExtraBytes))
+        @assert header.data_offset > 0 "Inconsistent data configuration! Got data offset of $(header.data_offset) after removing Extra Bytes Record"
     end
-    extra_bytes_vlr = construct_extra_bytes_vlr(col_name, T)
-    add_vlr!(las, extra_bytes_vlr)
-end
-
-"""
-    $(TYPEDSIGNATURES)
-
-Construct an extra bytes VLR with a field name `col_name` and data type `T`
-"""
-function construct_extra_bytes_vlr(col_name::Symbol, ::Type{T}) where T
-    @assert length(String(col_name)) ≤ 32 "Custom column name $(col_name) too long! Must be ≤ 32 Bytes, got $(length(String(col_name))) Bytes"
-    extra_bytes = ExtraBytes(0x00, String(col_name), zero(T), zero(T), zero(T), zero(T), zero(T), "$(col_name)")
-    LasVariableLengthRecord(LAS_SPEC_USER_ID, ID_EXTRABYTES, String(col_name), extra_bytes)
+    add_extra_bytes_to_collection!(get_data(extra_bytes_vlr), col_name, T)
+    header = get_header(las)
+    header.data_offset += sizeof(ExtraBytes)
 end
diff --git a/src/header.jl b/src/header.jl
@@ -413,7 +413,7 @@ creation_year(h::LasHeader) = h.creation_year
 
 Get the size of a header `h` in bytes
 """
-header_size(h::LasHeader) = h.header_size
+header_size(h::LasHeader) = Int(h.header_size)
 
 """
     $(TYPEDSIGNATURES)

diff --git a/src/read.jl b/src/read.jl
@@ -5,7 +5,7 @@ Load a LAS dataset from a source file
 
 # Arguments
 * `file_name` : Name of the LAS file to extract data from
-* `fields` : Name of the LAS point fields to extract as columns in the output data. Default `DEFAULT_LAS_COLUMNS`
+* `fields` : Name of the LAS point fields to extract as columns in the output data. If set to `nothing`, ingest all available columns. Default `DEFAULT_LAS_COLUMNS`
 """
 function load_las(file_name::AbstractString, 
                     fields::TFields = DEFAULT_LAS_COLUMNS;
@@ -25,7 +25,7 @@ end
 
 Ingest LAS point data in a tabular format
 """
-function load_pointcloud(file_name::AbstractString, fields::AbstractVector{Symbol} = collect(DEFAULT_LAS_COLUMNS); kwargs...)
+function load_pointcloud(file_name::AbstractString, fields::Union{Nothing, AbstractVector{Symbol}} = collect(DEFAULT_LAS_COLUMNS); kwargs...)
     las = load_las(file_name, fields; kwargs...)
     return get_pointcloud(las)
 end
@@ -106,7 +106,9 @@ function read_las_data(io::TIO, required_columns::TTuple=DEFAULT_LAS_COLUMNS;
     pos = header.header_size + vlr_length
     user_defined_bytes = read(io, header.data_offset - pos)
 
-    extra_bytes = Vector{ExtraBytes}(map(vlr -> get_data(vlr), extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)))
+    extra_bytes_vlr = extract_vlr_type(vlrs, LAS_SPEC_USER_ID, ID_EXTRABYTES)
+    @assert length(extra_bytes_vlr) ≤ 1 "Found multiple extra bytes columns!"
+    extra_bytes = isempty(extra_bytes_vlr) ? ExtraBytes[] : get_extra_bytes(get_data(extra_bytes_vlr[1]))
 
     this_format = record_format(header, extra_bytes)
     xyz = spatial_info(header)
@@ -194,11 +196,12 @@ Helper function that finds the names of user-defined point fields that have been
 Note according to spec that user-defined array field names must be of the form `col [0], col[1], ..., col[N]` where `N` is the dimension of the user field
 """
 function get_user_fields_for_table(records::Vector{TRecord}, Names::Tuple, required_columns::TTuple) where {TRecord <: Union{ExtendedPointRecord, FullRecord}, TTuple}
-    user_fields = filter(field -> get_base_field_name(field) ∈ required_columns, Names)
+    get_all_fields = isnothing(required_columns)
+    user_fields = filter(field -> get_all_fields || get_base_field_name(field) ∈ required_columns, Names)
     raw_user_data = Dict{Symbol, Vector}(field => getproperty.(getproperty.(records, :user_fields), field) for field ∈ user_fields)
     user_field_map = get_user_field_map(user_fields)
     grouped_field_names = collect(keys(user_field_map))
-    user_fields = filter(field -> field ∈ required_columns, grouped_field_names)
+    user_fields = filter(field -> get_all_fields || field ∈ required_columns, grouped_field_names)
     grouped_user_fields = group_user_fields(raw_user_data, user_field_map)
     return user_fields, grouped_user_fields
 end

diff --git a/src/registered_vlrs.jl b/src/registered_vlrs.jl
@@ -117,8 +117,6 @@ end
 Base.sizeof(::Type{ExtraBytes}) = 192
 Base.sizeof(::ExtraBytes) = Base.sizeof(ExtraBytes)
 
-@register_vlr_type ExtraBytes LAS_SPEC_USER_ID ID_EXTRABYTES
-
 # we can rely on this indexing safely since we're restricted to TData being in SUPPORTED_EXTRA_BYTES_TYPES
 data_code_from_type(::Type{TData}) where TData = (TData == Missing ? 0x00 : UInt8(indexin([TData], SUPPORTED_EXTRA_BYTES_TYPES)[1]))
 data_code_from_type(::ExtraBytes{TData}) where TData = data_code_from_type(TData)
@@ -214,6 +212,68 @@ function Base.write(io::IO, extra_bytes::ExtraBytes{TData}) where TData
     writestring(io, extra_bytes.description, 32)
 end
 
+"""
+    $(TYPEDEF)
+
+A collection of Extra Bytes records that gets packed into a *VLR*
+
+$(TYPEDFIELDS)
+---
+$(METHODLIST)
+"""
+struct ExtraBytesCollection
+    """Collection of Extra Bytes Records, each documenting one user field in the dataset"""
+    extra_bytes::Vector{ExtraBytes}
+
+    function ExtraBytesCollection(extra_bytes::AbstractVector{T}) where {T <: ExtraBytes}
+        new(extra_bytes)
+    end
+end
+
+ExtraBytesCollection() = ExtraBytesCollection(ExtraBytes[])
+
+"""
+    $(TYPEDSIGNATURES)
+
+Helper function that gets the set of Extra Bytes records from an Extra Bytes `collection`
+"""
+get_extra_bytes(collection::ExtraBytesCollection) = collection.extra_bytes
+
+@register_vlr_type ExtraBytesCollection LAS_SPEC_USER_ID ID_EXTRABYTES
+
+Base.sizeof(collection::ExtraBytesCollection) = length(collection.extra_bytes) * Base.sizeof(ExtraBytes)
+
+# equal if all their records are equal
+function Base.:(==)(c1::ExtraBytesCollection, c2::ExtraBytesCollection)
+    extra_bytes1 = get_extra_bytes(c1)
+    extra_bytes2 = get_extra_bytes(c2)
+    return (length(extra_bytes1) == length(extra_bytes2)) && all(extra_bytes1 .== extra_bytes2)
+end
+
+function read_vlr_data(io::IO, ::Type{ExtraBytesCollection}, nb::Integer)
+    @assert nb % sizeof(ExtraBytes) == 0 "Number of bytes $(nb) is not a multiple of Extra Bytes record size $(sizeof(ExtraBytes))"
+    num_extra_bytes_records = Int(nb/sizeof(ExtraBytes))
+    extra_bytes = map(_ -> read(io, ExtraBytes), 1:num_extra_bytes_records)
+    return ExtraBytesCollection(extra_bytes)
+end
+
+function Base.write(io::IO, collection::ExtraBytesCollection)
+    for e ∈ collection.extra_bytes
+        write(io, e)
+    end
+end
+
+"""
+    $(TYPEDSIGNATURES)
+
+Construct an extra bytes VLR with a field name `col_name` and data type `T`
+"""
+function add_extra_bytes_to_collection!(collection::ExtraBytesCollection, col_name::Symbol, ::Type{T}) where T
+    @assert length(String(col_name)) ≤ 32 "Custom column name $(col_name) too long! Must be ≤ 32 Bytes, got $(length(String(col_name))) Bytes"
+    extra_bytes = ExtraBytes(0x00, String(col_name), zero(T), zero(T), zero(T), zero(T), zero(T), "$(col_name)")
+    push!(get_extra_bytes(collection), extra_bytes)
+end
+
 """
     $(TYPEDEF)
 

diff --git a/src/write.jl b/src/write.jl
@@ -106,14 +106,18 @@ function write_las(io::IO, las::LasDataset, compressed::Bool = false)
         user_fields = ()
         # find indices of existing extra bytes VLRs
         extra_bytes_idxs = findall(vlr -> (get_user_id(vlr) == LAS_SPEC_USER_ID) && (get_record_id(vlr) == ID_EXTRABYTES), vlrs)
-        # need to adjust the data record length in the header to remove these extra bytes
-        for i ∈ extra_bytes_idxs
-            header.data_record_length -= sizeof(data_type(get_data(vlrs[i])))
+        if !isempty(extra_bytes_idxs)
+            @assert length(extra_bytes_idxs) == 1 "Found $(length(extra_bytes_idxs)) Extra Bytes VLRs when we should only have 1"
+            extra_bytes_vlr = vlrs[extra_bytes_idxs[1]]
+            # need to adjust the data record length in the header to remove these extra bytes
+            for extra_bytes ∈ get_extra_bytes(get_data(extra_bytes_vlr))
+                header.data_record_length -= sizeof(data_type(extra_bytes))
+            end
+            # make sure we remove the extra bytes vlrs and adjust the header info
+            header.n_vlr -= 1
+            header.data_offset -= sizeof(extra_bytes_vlr)
+            deleteat!(vlrs, extra_bytes_idxs)
         end
-        # make sure we remove the extra bytes vlrs and adjust the header info
-        header.n_vlr -= length(extra_bytes_idxs)
-        header.data_offset -= sum(sizeof.(vlrs[extra_bytes_idxs]))
-        deleteat!(vlrs, extra_bytes_idxs)
     end
 
     write(io, header)

diff --git a/test/dataset.jl b/test/dataset.jl
@@ -60,28 +60,30 @@
     @test this_pc.other_thing == spicy_pc.other_thing
     # we should have documented our columns as extra bytes VLRs now
     vlrs = get_vlrs(las)
-    @test length(vlrs) == 2
+    @test length(vlrs) == 1
     vlr_data = get_data.(vlrs)
-    @test all(isa.(vlr_data, ExtraBytes))
-    @test (LAS.name(vlr_data[1]) == "thing") && (LAS.data_type(vlr_data[1]) == Float64)
-    @test (LAS.name(vlr_data[2]) == "other_thing") && (LAS.data_type(vlr_data[2]) == Int16)
+    @test  vlr_data[1] isa ExtraBytesCollection
+    extra_bytes = LAS.get_extra_bytes(vlr_data[1])
+    @test (LAS.name(extra_bytes[1]) == "thing") && (LAS.data_type(extra_bytes[1]) == Float64)
+    @test (LAS.name(extra_bytes[2]) == "other_thing") && (LAS.data_type(extra_bytes[2]) == Int16)
     # and our header should be updated appropriately
-    @test number_of_vlrs(header) == 2
+    @test number_of_vlrs(header) == 1
+    @test point_data_offset(header) == header_size(header) + sum(sizeof.(vlrs))
     # now add another user field directly to the dataset
     new_thing = rand(Float32, num_points)
     add_column!(las, :new_thing, new_thing)
     vlrs = get_vlrs(las)
-    @test length(vlrs) == 3
-    new_extra_bytes = get_data(vlrs[3])
+    @test length(vlrs) == 1
+    new_extra_bytes = LAS.get_extra_bytes(get_data(vlrs[1]))[3]
     @test (LAS.name(new_extra_bytes) == "new_thing") && (LAS.data_type(new_extra_bytes) == Float32)
     # we shouldn't be able to add columns of different length to the LAS data
     @test_throws AssertionError add_column!(las, :bad, rand(10))
     # now if we replace the values of one of the user fields with a different type, it should work
     new_thing = rand(UInt8, num_points)
     add_column!(las, :thing, new_thing)
     vlrs = get_vlrs(las)
-    @test length(vlrs) == 3
-    new_extra_bytes = get_data(vlrs[3])
+    @test length(vlrs) == 1
+    new_extra_bytes = LAS.get_extra_bytes(get_data(vlrs[1]))[3]
     @test (LAS.name(new_extra_bytes) == "thing") && (LAS.data_type(new_extra_bytes) == UInt8)
 
     # merge some data into our dataset
@@ -98,11 +100,11 @@
     )
     add_vlr!(las, desc)
     vlrs = get_vlrs(las)
-    @test length(vlrs) == 4
-    @test vlrs[4] == desc
+    @test length(vlrs) == 2
+    @test vlrs[2] == desc
     # make sure we've updated the header correctly
     header = get_header(las)
-    @test number_of_vlrs(header) == 4
+    @test number_of_vlrs(header) == 2
     @test point_data_offset(header) == header_size(header) + sum(sizeof.(vlrs))
     # now let's replace this description for another one
     new_desc = LasVariableLengthRecord(
@@ -115,9 +117,9 @@
     # mark the old one as superseded
     set_superseded!(las, desc)
     vlrs = get_vlrs(las)
-    @test length(vlrs) == 5
-    @test vlrs[5] == new_desc
-    superseded_desc = vlrs[4]
+    @test length(vlrs) == 3
+    @test vlrs[3] == new_desc
+    superseded_desc = vlrs[2]
     @test get_user_id(superseded_desc) == get_user_id(desc)
     @test get_record_id(superseded_desc) == LAS.ID_SUPERSEDED
     @test get_description(superseded_desc) == get_description(desc)
@@ -126,9 +128,9 @@
     # we can also remove the old one entirely
     remove_vlr!(las, superseded_desc)
     vlrs = get_vlrs(las)
-    @test length(vlrs) == 4
-    @test vlrs[4] == new_desc
-    @test number_of_vlrs(get_header(las)) == 4
+    @test length(vlrs) == 2
+    @test vlrs[2] == new_desc
+    @test number_of_vlrs(get_header(las)) == 2
 
     # this stuff should also work for EVLRs
     struct Comment
@@ -140,8 +142,8 @@
     add_vlr!(las, long_comment)
     header = get_header(las)
     # vlrs should stay the same
-    @test length(get_vlrs(las)) == 4
-    @test number_of_vlrs(header) == 4
+    @test length(get_vlrs(las)) == 2
+    @test number_of_vlrs(header) == 2
     # should have updated the EVLRs
     evlrs = get_evlrs(las)
     @test length(evlrs) == 1

diff --git a/test/file_io.jl b/test/file_io.jl
@@ -497,12 +497,14 @@ end
         save_las(file_name, pc)
         new_las = load_las(file_name, columnnames(pc))
         vlrs = get_vlrs(new_las)
-        # 5 Extra Bytes VLRs for "thing" and 2 VLRs for "other_thing"
-        @test length(vlrs) == 6
-        @test all(map(i -> LAS.name(get_data(vlrs[i])) == "thing [$(i - 1)]", 1:5))
-        @test LAS.name(get_data(vlrs[6])) == "other_thing"
-        @test all(LAS.data_type.(get_data.(vlrs[1:5])) .== Float64)
-        @test LAS.data_type(get_data(vlrs[6])) == Int
+        # 1 Extra Bytes VLR with 5 entries for "thing" and 1 entry for "other_thing"
+        @test length(vlrs) == 1
+        extra_bytes = LAS.get_extra_bytes(get_data(vlrs[1]))
+        @test length(extra_bytes) == 6
+        @test all(map(i -> LAS.name(extra_bytes[i]) == "thing [$(i - 1)]", 1:5))
+        @test LAS.name(extra_bytes[6]) == "other_thing"
+        @test all(LAS.data_type.(extra_bytes[1:5]) .== Float64)
+        @test LAS.data_type(extra_bytes[6]) == Int
         new_pc = get_pointcloud(new_las)
         for col ∈ columnnames(pc)
             @test all(isapprox.(getproperty(new_pc, col), getproperty(pc, col); atol = LAS.POINT_SCALE))