Skip to content

Commit

Permalink
Faster async MDS buf init. Experiments with mem init.
Browse files Browse the repository at this point in the history
  • Loading branch information
kautlenbachs committed Nov 20, 2023
1 parent bd731f7 commit e815b39
Show file tree
Hide file tree
Showing 13 changed files with 271 additions and 62 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ jobs:
ssh -o "StrictHostKeyChecking no" $INSTANCE_USERNAME@$instance_ip "cp -r /home/$INSTANCE_USERNAME/e2e /dev/shm/"
- uses: actions/checkout@v4
- name: End-to-end tests
id: e2e-run
continue-on-error: true
env:
INSTANCE_USERNAME: ${{ secrets.EAGLE_USERNAME }}
Expand All @@ -84,6 +85,10 @@ jobs:
ssh -o "StrictHostKeyChecking no" $INSTANCE_USERNAME@$instance_ip "sudo chown -R $INSTANCE_USERNAME:$INSTANCE_USERNAME /dev/shm/e2e"
ssh -o "StrictHostKeyChecking no" $INSTANCE_USERNAME@$instance_ip "rsync -av /dev/shm/e2e /home/$INSTANCE_USERNAME/"
ssh -o "StrictHostKeyChecking no" $INSTANCE_USERNAME@$instance_ip "rm -rf /dev/shm/e2e"
# This is needed because 'continue-on-error: true' for 'e2e-run' would not make this step failure in actions UI
- name: Report E2E run error
if: steps.e2e-run.outcome != 'success'
run: exit 1

stop-gpu-instance:
name: Stop GPU instance
Expand Down
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,12 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_
else ()
target_link_libraries(asar_focus PRIVATE gdal cufft Eigen3::Eigen app-static util-static envisat-format-static cuda-util-static)
endif()

if (DEFINED ENV{ALUS_ENABLE_EXPERIMENTS})
set(ALUS_ENABLE_EXPERIMENTS $ENV{ALUS_ENABLE_EXPERIMENTS})
endif ()

if (ALUS_ENABLE_EXPERIMENTS)
set(ALUS_EXPERIMENTS_BINARY_OUTPUT_DIR ${PROJECT_BINARY_DIR}/experiments)
add_subdirectory(experiments)
endif ()
4 changes: 2 additions & 2 deletions cuda_util/cuda_algorithm.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
namespace alus::cuda::algorithm {

template <typename T>
__device__ inline const T clamp(const T& v, const T& lo, const T& hi) {
return max(lo, min(hi, v));
__device__ __host__ inline const T clamp(const T& v, const T& lo, const T& hi) {
return v > hi ? hi : (v < lo ? lo : v);
}

} // namespace alus::cuda::algorithm
1 change: 1 addition & 0 deletions envisat_format/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ list(APPEND ENVISAT_FORMAT_SOURCES
${CMAKE_CURRENT_LIST_DIR}/src/asar_constants.cc
${CMAKE_CURRENT_LIST_DIR}/src/doris_orbit.cc
${CMAKE_CURRENT_LIST_DIR}/src/envisat_aux_file.cc
${CMAKE_CURRENT_LIST_DIR}/src/envisat_format_kernels.cu
${CMAKE_CURRENT_LIST_DIR}/src/envisat_im_parse.cc
${CMAKE_CURRENT_LIST_DIR}/src/envisat_lvl0_parser.cc
${CMAKE_CURRENT_LIST_DIR}/src/envisat_lvl1_writer.cc
Expand Down
20 changes: 20 additions & 0 deletions envisat_format/include/envisat_format_kernels.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* ENVISAT and ERS ASAR instrument focusser for QA4EO activity (c) by CGI Estonia AS
*
* ENVISAT and ERS ASAR instrument focusser for QA4EO activity is licensed under a
* Creative Commons Attribution-ShareAlike 4.0 International License.
*
* You should have received a copy of the license along with this
* work. If not, see http://creativecommons.org/licenses/by-sa/4.0/
*/
#pragma once

#include "cuda_util/device_padded_image.cuh"
#include "envisat_format/include/envisat_types.h"

namespace alus::asar::envformat {

// dest_buffer must be on device and have enough capacity to accommodate original image (complex float) without padding.
void ConditionResults(DevicePaddedImage& img, char* dest_space, size_t record_header_size, float calibration_constant);

}
67 changes: 67 additions & 0 deletions envisat_format/src/envisat_format_kernels.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/**
* ENVISAT and ERS ASAR instrument focusser for QA4EO activity (c) by CGI Estonia AS
*
* ENVISAT and ERS ASAR instrument focusser for QA4EO activity is licensed under a
* Creative Commons Attribution-ShareAlike 4.0 International License.
*
* You should have received a copy of the license along with this
* work. If not, see http://creativecommons.org/licenses/by-sa/4.0/
*/

#include "envisat_format_kernels.h"

#include "cuda_util/cuda_algorithm.cuh"
#include "cuda_util/cuda_bit.cuh"
#include "envisat_types.h"

namespace alus::asar::envformat {

__global__ void CalibrateClampToBigEndian(cufftComplex* data, int x_size_stride, int x_size, int y_size,
char* dest_space, int record_header_size, float calibration_constant) {
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;

const int src_idx = (y * x_size_stride) + x;
const int mds_x_size = x_size * sizeof(IQ16) + record_header_size;

if (x < x_size && y < y_size) {
auto pix = data[src_idx];
IQ16 result;
result.i = static_cast<int16_t>(
alus::cuda::algorithm::clamp<float>(pix.x * calibration_constant, INT16_MIN, INT16_MAX));
result.q = static_cast<int16_t>(
alus::cuda::algorithm::clamp<float>(pix.y * calibration_constant, INT16_MIN, INT16_MAX));

result.i = alus::cuda::bit::Byteswap(result.i);
result.q = alus::cuda::bit::Byteswap(result.q);

// |HEADER...|packet data....................................|
// |HEADER...|packet data....................................|
// ...
const int dest_idx = (y * mds_x_size) + record_header_size + (x * sizeof(IQ16));
if (x == 0) {
for (int i{record_header_size}; i >= 0; i--) {
dest_space[dest_idx - i] = 0x00;
}
}

dest_space[dest_idx] = static_cast<char>(result.i & 0x00FF);
dest_space[dest_idx + 1] = static_cast<char>((result.i >> 8) & 0x00FF);
dest_space[dest_idx + 2] = static_cast<char>(result.q & 0x00FF);
dest_space[dest_idx + 3] = static_cast<char>((result.q >> 8) & 0x00FF);
}
}

void ConditionResults(DevicePaddedImage& img, char* dest_space, size_t record_header_size, float calibration_constant) {
const auto x_size_stride = img.XStride(); // Need to count for FFT padding when rows are concerned
const auto x_size = img.XSize();
const auto y_size = img.YSize(); // No need to calculate on Y padded FFT data
dim3 block_sz(16, 16);
dim3 grid_sz((x_size_stride + 15) / 16, (y_size + 15) / 16);
CalibrateClampToBigEndian<<<grid_sz, block_sz>>>(img.Data(), x_size_stride, x_size, y_size, dest_space,
record_header_size, calibration_constant);
CHECK_CUDA_ERR(cudaDeviceSynchronize());
CHECK_CUDA_ERR(cudaGetLastError());
}

} // namespace alus::asar::envformat
11 changes: 11 additions & 0 deletions experiments/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

add_executable(host-memory-allocation ${CMAKE_CURRENT_LIST_DIR}/host_memory_allocation.cc)

target_link_libraries(host-memory-allocation
PRIVATE
util-static)

set_target_properties(host-memory-allocation
PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${ALUS_EXPERIMENTS_BINARY_OUTPUT_DIR}
)
116 changes: 116 additions & 0 deletions experiments/host_memory_allocation.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#include <unistd.h>
#include <array>
#include <chrono>
#include <climits>
#include <cstddef>
#include <cstring>
#include <iostream>
#include <random>
#include <string_view>

#include <sys/mman.h>

namespace {
constexpr int MiB = 1 << 20; // 1 MB = 2^20 bytes

std::array<char, 1000 * MiB> BUFFER_OF_1GB{};

auto TimePoint() { return std::chrono::steady_clock::now(); }

void TimeLog(std::chrono::steady_clock::time_point beg, std::chrono::steady_clock::time_point end, const char* msg) {
auto diff = std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count();
std::cout << msg << " time = " << diff << " ms" << std::endl;
}

void Init() {
// Use random_device to obtain seed for the random number engine
std::random_device rd;
// Use Mersenne Twister 19937 as the random number engine
std::mt19937 gen(rd());
// Define the distribution for random values (you can adjust the range as needed)
std::uniform_int_distribution<char> dis(CHAR_MIN, CHAR_MAX);

// Fill the array with random values
for (auto& e : BUFFER_OF_1GB) {
e = dis(gen);
}
}

enum class InitializeBufferStrategy {
ALLOCATE_ONLY,
ALLOCATE_WRITE_SINGLE_ITEM,
ALLOCATE_MEMSET_ALL_BUFFER,
ALLOCATE_WRITE_SINGLE_BYTE_TO_EACH_PAGE,
ALLOCATE_MMAP
};
char* InitializeBufferFor(size_t bytes, InitializeBufferStrategy strategy) {
char* buf = new char[bytes];
if (strategy == InitializeBufferStrategy::ALLOCATE_ONLY) {
return buf;
} else if (strategy == InitializeBufferStrategy::ALLOCATE_WRITE_SINGLE_ITEM) {
buf[0] = 0x12;
return buf;
} else if (strategy == InitializeBufferStrategy::ALLOCATE_MEMSET_ALL_BUFFER) {
std::memset(buf, 0xAA, BUFFER_OF_1GB.size());
return buf;
} else if (strategy == InitializeBufferStrategy::ALLOCATE_WRITE_SINGLE_BYTE_TO_EACH_PAGE) {
for (size_t pg{0}; pg < BUFFER_OF_1GB.size() / sysconf(_SC_PAGE_SIZE); pg++) {
buf[pg * 4096] = 0x12;
}
return buf;
} else if (strategy == InitializeBufferStrategy::ALLOCATE_MMAP) {
void* ptr = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0);
return (char*)ptr;
}

return nullptr;
}

std::string_view InitializeBufferStrategyStr(const InitializeBufferStrategy strategy) {
switch (strategy) {
case InitializeBufferStrategy::ALLOCATE_ONLY:
return "ALLOCATE_ONLY";
case InitializeBufferStrategy::ALLOCATE_WRITE_SINGLE_ITEM:
return "ALLOCATE_WRITE_SINGLE_ITEM";
case InitializeBufferStrategy::ALLOCATE_MEMSET_ALL_BUFFER:
return "ALLOCATE_MEMSET_ALL_BUFFER";
case InitializeBufferStrategy::ALLOCATE_WRITE_SINGLE_BYTE_TO_EACH_PAGE:
return "ALLOCATE_WRITE_SINGLE_BYTE_TO_EACH_PAGE";
case InitializeBufferStrategy::ALLOCATE_MMAP:
return "ALLOCATE_MMAP";
}

return "";
}

void RunTest(const char* data_from, size_t size, const InitializeBufferStrategy strategy) {
const auto strategy_name = std::string(InitializeBufferStrategyStr(strategy));
const auto the_begin = TimePoint();
auto* buf = InitializeBufferFor(size, strategy);
TimeLog(the_begin, TimePoint(), strategy_name.c_str());
const auto write_start = TimePoint();
std::memcpy(buf, data_from, size);
TimeLog(write_start, TimePoint(), "Buffer write time");
TimeLog(the_begin, TimePoint(), (strategy_name + " total").c_str());
if (strategy == InitializeBufferStrategy::ALLOCATE_MMAP) {
munmap(buf, size);
} else {
delete[] buf;
}
}

} // namespace

int main(int, char*[]) {
std::cout << "Initializing " << BUFFER_OF_1GB.size() / MiB << "MiB" << std::endl;
Init();

RunTest(BUFFER_OF_1GB.data(), BUFFER_OF_1GB.size(), InitializeBufferStrategy::ALLOCATE_ONLY);
RunTest(BUFFER_OF_1GB.data(), BUFFER_OF_1GB.size(), InitializeBufferStrategy::ALLOCATE_WRITE_SINGLE_ITEM);
RunTest(BUFFER_OF_1GB.data(), BUFFER_OF_1GB.size(), InitializeBufferStrategy::ALLOCATE_MEMSET_ALL_BUFFER);
RunTest(BUFFER_OF_1GB.data(), BUFFER_OF_1GB.size(),
InitializeBufferStrategy::ALLOCATE_WRITE_SINGLE_BYTE_TO_EACH_PAGE);
RunTest(BUFFER_OF_1GB.data(), BUFFER_OF_1GB.size(), InitializeBufferStrategy::ALLOCATE_MMAP);

return 0;
}
4 changes: 2 additions & 2 deletions main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "img_output.h"
#include "main_flow.h"
#include "math_utils.h"
#include "mem_alloc.h"
#include "plot.h"
#include "sar/fractional_doppler_centroid.cuh"
#include "sar/iq_correction.cuh"
Expand Down Expand Up @@ -271,8 +272,7 @@ int main(int argc, char* argv[]) {
mds.n_records = mds_record_count;
mds.record_size = mds_record_size;
auto mds_buffer_init = std::async(std::launch::async, [&mds] {
mds.buf = new char[mds.n_records * mds.record_size];
mds.buf[0] = 0;
mds.buf = static_cast<char*>(alus::util::Memalloc(mds.n_records * mds.record_size));
});

std::string lvl1_out_name = asar_meta.product_name;
Expand Down
3 changes: 2 additions & 1 deletion main_flow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "alus_log.h"
#include "date_time_util.h"
#include "envisat_format_kernels.h"
#include "ers_aux_file.h"
#include "sar/iq_correction.cuh"
#include "status_assembly.h"
Expand Down Expand Up @@ -106,6 +107,6 @@ void FetchAuxFiles(InstrumentFile& ins_file, ConfigurationFile& conf_file, ASARM
}

void FormatResults(DevicePaddedImage& img, char* dest_space, size_t record_header_size, float calibration_constant) {
ConditionResults(img, dest_space, record_header_size, calibration_constant);
envformat::ConditionResults(img, dest_space, record_header_size, calibration_constant);
}
} // namespace alus::asar::mainflow
52 changes: 1 addition & 51 deletions sar/iq_correction.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@
#include "alus_log.h"
#include "checks.h"
#include "cuda_util/cuda_algorithm.cuh"
#include "cuda_util/cuda_bit.cuh"
#include "cuda_util/cuda_cleanup.h"
#include "envisat_types.h"

namespace {

Expand Down Expand Up @@ -324,52 +322,4 @@ void RawDataCorrection(DevicePaddedImage& img, CorrectionParams par, SARResults&
ApplyPhaseCorrection<<<grid_sz, block_sz>>>(img.Data(), x_size, y_size, sin_corr, cos_corr);
}
}
}

__global__ void CalibrateClampToBigEndian(cufftComplex* data, int x_size_stride, int x_size, int y_size,
char* dest_space, int record_header_size, float calibration_constant) {
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;

const int src_idx = (y * x_size_stride) + x;
const int mds_x_size = x_size * sizeof(IQ16) + record_header_size;

if (x < x_size && y < y_size) {
auto pix = data[src_idx];
IQ16 result;
result.i = static_cast<int16_t>(
alus::cuda::algorithm::clamp<float>(pix.x * calibration_constant, INT16_MIN, INT16_MAX));
result.q = static_cast<int16_t>(
alus::cuda::algorithm::clamp<float>(pix.y * calibration_constant, INT16_MIN, INT16_MAX));

result.i = alus::cuda::bit::Byteswap(result.i);
result.q = alus::cuda::bit::Byteswap(result.q);

// |HEADER...|packet data....................................|
// |HEADER...|packet data....................................|
// ...
const int dest_idx = (y * mds_x_size) + record_header_size + (x * sizeof(IQ16));
if (x == 0) {
for (int i{record_header_size}; i >= 0; i--) {
dest_space[dest_idx - i] = 0x00;
}
}

dest_space[dest_idx] = static_cast<char>(result.i & 0x00FF);
dest_space[dest_idx + 1] = static_cast<char>((result.i >> 8) & 0x00FF);
dest_space[dest_idx + 2] = static_cast<char>(result.q & 0x00FF);
dest_space[dest_idx + 3] = static_cast<char>((result.q >> 8) & 0x00FF);
}
}

void ConditionResults(DevicePaddedImage& img, char* dest_space, size_t record_header_size, float calibration_constant) {
const auto x_size_stride = img.XStride(); // Need to count for FFT padding when rows are concerned
const auto x_size = img.XSize();
const auto y_size = img.YSize(); // No need to calculate on Y padded FFT data
dim3 block_sz(16, 16);
dim3 grid_sz((x_size_stride + 15) / 16, (y_size + 15) / 16);
CalibrateClampToBigEndian<<<grid_sz, block_sz>>>(img.Data(), x_size_stride, x_size, y_size, dest_space,
record_header_size, calibration_constant);
CHECK_CUDA_ERR(cudaDeviceSynchronize());
CHECK_CUDA_ERR(cudaGetLastError());
}
}
15 changes: 9 additions & 6 deletions sar/iq_correction.cuh
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
/**
* ENVISAT and ERS ASAR instrument focusser for QA4EO activity (c) by CGI Estonia AS
*
* ENVISAT and ERS ASAR instrument focusser for QA4EO activity is licensed under a
* Creative Commons Attribution-ShareAlike 4.0 International License.
*
* You should have received a copy of the license along with this
* work. If not, see http://creativecommons.org/licenses/by-sa/4.0/
*/
#pragma once

#include <memory>

#include "cuda_util/device_padded_image.cuh"
#include "envisat_format/include/envisat_types.h"
#include "sar_metadata.h"

struct CorrectionParams {
size_t n_total_samples;
// int n_sm;
};
void RawDataCorrection(DevicePaddedImage& img, CorrectionParams par, SARResults& results);

// dest_buffer must be on device and have enough capacity to accommodate original image (complex float) without padding.
void ConditionResults(DevicePaddedImage& img, char* dest_space, size_t record_header_size, float calibration_constant);
Loading

0 comments on commit e815b39

Please sign in to comment.