Skip to content

Commit

Permalink
Remove orc::gpu namespace (rapidsai#17891)
Browse files Browse the repository at this point in the history
Moving forward with removal of the (redundant) `gpu` namespace in cuIO.
Also moved the entire ORC implementation to `cudf::io::orc::detail`, leaving only the implementation of the public API in `cudf::io::orc`.

Also removed a few unused headers, or moved them to be included in the right files.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

URL: rapidsai#17891
  • Loading branch information
vuule authored Feb 1, 2025
1 parent aeb90de commit a6acba0
Show file tree
Hide file tree
Showing 19 changed files with 282 additions and 329 deletions.
10 changes: 5 additions & 5 deletions cpp/include/cudf/io/orc_metadata.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -171,12 +171,12 @@ using statistics_type = std::variant<no_statistics,
timestamp_statistics>;

//! Orc I/O interfaces
namespace orc {
namespace orc::detail {
// forward declare the type that ProtobufReader uses. The `cudf::io::column_statistics` objects,
// returned from `read_parsed_orc_statistics`, are constructed from
// `cudf::io::orc::column_statistics` objects that `ProtobufReader` initializes.
// `cudf::io::orc::detail::column_statistics` objects that `ProtobufReader` initializes.
struct column_statistics;
} // namespace orc
} // namespace orc::detail

/**
* @brief Contains per-column ORC statistics.
Expand All @@ -194,7 +194,7 @@ struct column_statistics {
*
* @param detail_statistics The statistics to initialize the object with
*/
column_statistics(orc::column_statistics&& detail_statistics);
column_statistics(orc::detail::column_statistics&& detail_statistics);
};

/**
Expand Down
14 changes: 7 additions & 7 deletions cpp/src/io/functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ raw_orc_statistics read_raw_orc_statistics(source_info const& src_info,
CUDF_FAIL("Unsupported source type");
}

orc::metadata const metadata(source.get(), stream);
orc::detail::metadata const metadata(source.get(), stream);

// Initialize statistics to return
raw_orc_statistics result;
Expand All @@ -318,7 +318,7 @@ raw_orc_statistics read_raw_orc_statistics(source_info const& src_info,
return result;
}

column_statistics::column_statistics(orc::column_statistics&& cs)
column_statistics::column_statistics(orc::detail::column_statistics&& cs)
{
number_of_values = cs.number_of_values;
has_null = cs.has_null;
Expand Down Expand Up @@ -350,9 +350,9 @@ parsed_orc_statistics read_parsed_orc_statistics(source_info const& src_info,
result.column_names = raw_stats.column_names;

auto parse_column_statistics = [](auto const& raw_col_stats) {
orc::column_statistics stats_internal;
orc::ProtobufReader(reinterpret_cast<uint8_t const*>(raw_col_stats.c_str()),
raw_col_stats.size())
orc::detail::column_statistics stats_internal;
orc::detail::ProtobufReader(reinterpret_cast<uint8_t const*>(raw_col_stats.c_str()),
raw_col_stats.size())
.read(stats_internal);
return column_statistics(std::move(stats_internal));
};
Expand All @@ -373,7 +373,7 @@ parsed_orc_statistics read_parsed_orc_statistics(source_info const& src_info,
return result;
}
namespace {
orc_column_schema make_orc_column_schema(host_span<orc::SchemaType const> orc_schema,
orc_column_schema make_orc_column_schema(host_span<orc::detail::SchemaType const> orc_schema,
uint32_t column_id,
std::string column_name)
{
Expand All @@ -400,7 +400,7 @@ orc_metadata read_orc_metadata(source_info const& src_info, rmm::cuda_stream_vie
auto sources = make_datasources(src_info);

CUDF_EXPECTS(sources.size() == 1, "Only a single source is currently supported.");
auto const footer = orc::metadata(sources.front().get(), stream).ff;
auto const footer = orc::detail::metadata(sources.front().get(), stream).ff;

return {{make_orc_column_schema(footer.types, 0, "")},
footer.numberOfRows,
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/orc/dict_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

#include <rmm/cuda_stream_view.hpp>

namespace cudf::io::orc::gpu {
namespace cudf::io::orc::detail {

/**
* @brief Counts the number of characters in each rowgroup of each string column.
Expand Down Expand Up @@ -266,4 +266,4 @@ void get_dictionary_indices(device_2dspan<stripe_dictionary> dictionaries,
<<<dictionaries.count(), block_size, 0, stream.value()>>>(dictionaries, columns);
}

} // namespace cudf::io::orc::gpu
} // namespace cudf::io::orc::detail
9 changes: 5 additions & 4 deletions cpp/src/io/orc/orc.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,6 +16,7 @@

#include "orc.hpp"

#include "io/comp/io_uncomp.hpp"
#include "orc_field_reader.hpp"
#include "orc_field_writer.hpp"

Expand All @@ -25,7 +26,7 @@

#include <string>

namespace cudf::io::orc {
namespace cudf::io::orc::detail {

namespace {
[[nodiscard]] constexpr uint32_t varint_size(uint64_t val)
Expand Down Expand Up @@ -496,7 +497,7 @@ metadata::metadata(datasource* const src, rmm::cuda_stream_view stream) : source
buffer =
source->host_read(len - ps_length - 1 - ps.footerLength - ps.metadataLength, ps.metadataLength);
auto const md_data = decompressor->decompress_blocks({buffer->data(), buffer->size()}, stream);
orc::ProtobufReader(md_data.data(), md_data.size()).read(md);
ProtobufReader(md_data.data(), md_data.size()).read(md);

init_parent_descriptors();
init_column_names();
Expand Down Expand Up @@ -546,4 +547,4 @@ void metadata::init_parent_descriptors()
}
}

} // namespace cudf::io::orc
} // namespace cudf::io::orc::detail
10 changes: 2 additions & 8 deletions cpp/src/io/orc/orc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

#pragma once

#include "io/comp/io_uncomp.hpp"

#include <cudf/column/column_device_view.cuh>
#include <cudf/io/datasource.hpp>
#include <cudf/io/orc_metadata.hpp>
Expand All @@ -34,9 +32,7 @@
#include <string>
#include <vector>

namespace cudf {
namespace io {
namespace orc {
namespace cudf::io::orc::detail {

static constexpr uint32_t block_header_size = 3;
// Seconds from January 1st, 1970 to January 1st, 2015
Expand Down Expand Up @@ -710,6 +706,4 @@ struct rowgroup_rows {
[[nodiscard]] CUDF_HOST_DEVICE constexpr auto size() const noexcept { return end - begin; }
};

} // namespace orc
} // namespace io
} // namespace cudf
} // namespace cudf::io::orc::detail
12 changes: 4 additions & 8 deletions cpp/src/io/orc/orc_field_reader.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,17 +17,15 @@

#include "orc.hpp"

#include <string>
#include <cstdint>

/**
* @file orc_field_reader.hpp
* @brief Functors to encapsulate common functionality required to implement
* ProtobufReader::read(...) functions
*/

namespace cudf {
namespace io {
namespace orc {
namespace cudf::io::orc::detail {

/**
* @brief Functor to run an operator for a specified field.
Expand Down Expand Up @@ -90,6 +88,4 @@ inline void ProtobufReader::function_builder(T& s, size_t maxlen, std::tuple<Ope
CUDF_EXPECTS(m_cur <= end, "Current pointer to metadata stream is out of bounds");
}

} // namespace orc
} // namespace io
} // namespace cudf
} // namespace cudf::io::orc::detail
10 changes: 3 additions & 7 deletions cpp/src/io/orc/orc_field_writer.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -26,9 +26,7 @@
* `ProtobufWriter::write(...)` functions
*/

namespace cudf {
namespace io {
namespace orc {
namespace cudf::io::orc::detail {

struct ProtobufWriter::ProtobufFieldWriter {
int struct_size{0};
Expand Down Expand Up @@ -129,6 +127,4 @@ struct ProtobufWriter::ProtobufFieldWriter {
size_t value() { return struct_size; }
};

} // namespace orc
} // namespace io
} // namespace cudf
} // namespace cudf::io::orc::detail
15 changes: 3 additions & 12 deletions cpp/src/io/orc/orc_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

#include "io/comp/comp.hpp"
#include "io/statistics/statistics.cuh"
#include "io/utilities/column_buffer.hpp"
#include "orc.hpp"

#include <cudf/detail/cuco_helpers.hpp>
Expand All @@ -33,10 +32,7 @@

#include <cuco/static_map.cuh>

namespace cudf {
namespace io {
namespace orc {
namespace gpu {
namespace cudf::io::orc::detail {

using cudf::detail::device_2dspan;
using cudf::detail::host_2dspan;
Expand Down Expand Up @@ -65,9 +61,7 @@ auto constexpr VALUE_SENTINEL = size_type{-1};
struct CompressedStreamInfo {
CompressedStreamInfo() = default;
explicit constexpr CompressedStreamInfo(uint8_t const* compressed_data_, size_t compressed_size_)
: compressed_data(compressed_data_),
uncompressed_data(nullptr),
compressed_data_size(compressed_size_)
: compressed_data(compressed_data_), compressed_data_size(compressed_size_)
{
}
uint8_t const* compressed_data{}; // [in] base ptr to compressed stream data
Expand Down Expand Up @@ -500,7 +494,4 @@ void reduce_pushdown_masks(device_span<orc_column_device_view const> orc_columns
device_2dspan<cudf::size_type> set_counts,
rmm::cuda_stream_view stream);

} // namespace gpu
} // namespace orc
} // namespace io
} // namespace cudf
} // namespace cudf::io::orc::detail
Loading

0 comments on commit a6acba0

Please sign in to comment.