Skip to content

Commit

Permalink
ERS fields definitions and CUDA util
Browse files Browse the repository at this point in the history
* Doc added to ERS fields. Some fields now correctly parsed.
* Cuda device initialization and separate cuda_util module.
* Fix E2E problems when stashing E2E dir
  • Loading branch information
kautlenbachs authored Nov 10, 2023
1 parent 4e61011 commit bbaa0d3
Show file tree
Hide file tree
Showing 22 changed files with 506 additions and 144 deletions.
25 changes: 4 additions & 21 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ jobs:
ssh -o "StrictHostKeyChecking no" $INSTANCE_USERNAME@$instance_ip "cp -r /home/$INSTANCE_USERNAME/e2e /dev/shm/"
- uses: actions/checkout@v4
- name: End-to-end tests
continue-on-error: true
env:
INSTANCE_USERNAME: ${{ secrets.EAGLE_USERNAME }}
ACCESS_KEY_ID: ${{ secrets.PALMY_ACCESS_KEY_ID }}
Expand All @@ -75,37 +76,19 @@ jobs:
echo "aws_secret_access_key=${SECRET_ACCESS_KEY}" >> /tmp/cred
scp -o "StrictHostKeyChecking no" /tmp/cred $INSTANCE_USERNAME@$instance_ip:/tmp/
ssh -o "StrictHostKeyChecking no" $INSTANCE_USERNAME@$instance_ip "/tmp/asar-focus/build-automation/run_e2e_container.sh /dev/shm/e2e $exe_image_name /tmp/cred"
# Separate step because when tests fail we would like to analyze the results still
stash-assets-from-ramdisk:
name: Stash E2E assets from ramdisk to persistent disk
runs-on: ubuntu-latest
needs: [start-gpu-instance, end-to-end-testing]
if: |
needs.end-to-end-testing.result != 'skipped' &&
needs.end-to-end-testing.result != 'cancelled'
steps:
- name: Create SSH key
env:
SSH_PRIVATE_KEY: ${{ secrets.EAGLE_SSH_KEY }}
run: |
key_location=~/.ssh/
mkdir -p $key_location
key_path=$key_location/id_rsa
echo "$SSH_PRIVATE_KEY" > $key_path
sudo chmod 600 $key_path
ssh-keygen -f $key_path -y > $key_path.pub
- name: Modify ownership and move assets
- name: Stash assets from ramdisk
env:
INSTANCE_USERNAME: ${{ secrets.EAGLE_USERNAME }}
run: |
instance_ip=${{ needs.start-gpu-instance.outputs.instance_ip }}
ssh -o "StrictHostKeyChecking no" $INSTANCE_USERNAME@$instance_ip "sudo chown -R $INSTANCE_USERNAME:$INSTANCE_USERNAME /dev/shm/e2e"
ssh -o "StrictHostKeyChecking no" $INSTANCE_USERNAME@$instance_ip "rsync -av /dev/shm/e2e /home/$INSTANCE_USERNAME/"
ssh -o "StrictHostKeyChecking no" $INSTANCE_USERNAME@$instance_ip "rm -rf /dev/shm/e2e"
stop-gpu-instance:
name: Stop GPU instance
runs-on: ubuntu-latest
needs: [start-gpu-instance, end-to-end-testing, stash-assets-from-ramdisk]
needs: [start-gpu-instance, end-to-end-testing]
if: |
always() &&
needs.start-gpu-instance.result == 'success'
Expand Down
17 changes: 4 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,8 @@ set(SAR_SOURCES
sar/sar_metadata.h
)

set(CUDA_UTIL_SOURCES
cuda_util/cuda_cleanup.h
cuda_util/cuda_util.h
cuda_util/cuda_workplace.h
cuda_util/cufft_plan.cpp
cuda_util/cufft_plan.h
cuda_util/device_padded_image.cu
cuda_util/device_padded_image.cuh
)

set(SOURCES
main.cc main_flow.cc ${SAR_SOURCES} ${CUDA_UTIL_SOURCES}
main.cc main_flow.cc ${SAR_SOURCES}
)

add_executable(asar_focus ${SOURCES})
Expand Down Expand Up @@ -76,13 +66,14 @@ include(FetchContent)
include(dependencies/FetchContents.cmake)

add_subdirectory(app)
add_subdirectory(cuda_util)
add_subdirectory(envisat_format)
add_subdirectory(util)

target_include_directories(asar_focus PRIVATE ${CMAKE_CURRENT_LIST_DIR})
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9)
message("Also adding 'stdc++fs' for linking since the g++ version ${CMAKE_CXX_COMPILER_VERSION} requires it.")
target_link_libraries(asar_focus PRIVATE gdal cufft Eigen3::Eigen stdc++fs app-static util-static envisat-format-static)
target_link_libraries(asar_focus PRIVATE gdal cufft Eigen3::Eigen stdc++fs app-static util-static envisat-format-static cuda-util-static)
else ()
target_link_libraries(asar_focus PRIVATE gdal cufft Eigen3::Eigen app-static util-static envisat-format-static)
target_link_libraries(asar_focus PRIVATE gdal cufft Eigen3::Eigen app-static util-static envisat-format-static cuda-util-static)
endif()
2 changes: 2 additions & 0 deletions build-automation/create_exe_in_container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ docker pull "${docker_image}"
set +e
docker stop "${container_name}"
docker rm "${container_name}"
# Also remove images built before along with children.
docker rmi -f "${container_name}-exe"
set -e
docker run -t -d --name "${container_name}" "${docker_image}"
docker cp "${repo_dir}" "${container_name}":"${container_work_dir}/"
Expand Down
2 changes: 1 addition & 1 deletion build-automation/run_e2e_container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ if [ -d "$e2e_dir" ]; then
# Extract the last folder name
e2e_dir_name=$(basename "$e2e_dir")
else
echo "Given repo dir ${e2e_dir} is not a directory or does not exist."
echo "Given E2E dir '${e2e_dir}' is not a directory or does not exist."
exit 1
fi

Expand Down
25 changes: 25 additions & 0 deletions cuda_util/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
list(APPEND CUDA_UTIL_INCLUDE_DIRS
${CMAKE_CURRENT_LIST_DIR})

list(APPEND CUDA_UTIL_SOURCES
${CMAKE_CURRENT_LIST_DIR}/cuda_device.cu
${CMAKE_CURRENT_LIST_DIR}/cuda_device_init.cu
${CMAKE_CURRENT_LIST_DIR}/cufft_plan.cc
${CMAKE_CURRENT_LIST_DIR}/device_padded_image.cu
${CMAKE_CURRENT_LIST_DIR}/memory_policy.cc
)

add_library(cuda-util-static STATIC ${CUDA_UTIL_SOURCES})
target_include_directories(cuda-util-static
PUBLIC
${CUDA_UTIL_INCLUDE_DIRS}
)
target_link_libraries(cuda-util-static
PUBLIC
CUDA::cudart
app-static
)
set_target_properties(cuda-util-static
PROPERTIES
OUTPUT_NAME cuda-util
)
44 changes: 44 additions & 0 deletions cuda_util/cuda_device.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/**
* ENVISAT and ERS ASAR instrument focusser for QA4EO activity (c) by CGI Estonia AS
*
* ENVISAT and ERS ASAR instrument focusser for QA4EO activity is licensed under a
* Creative Commons Attribution-ShareAlike 4.0 International License.
*
* You should have received a copy of the license along with this
* work. If not, see http://creativecommons.org/licenses/by-sa/4.0/
*/

#include "cuda_device.h"

#include <cuda_runtime_api.h>

#include "cuda_util.h"

namespace alus::cuda {

CudaDevice::CudaDevice(int device_nr, void* device_prop) : device_nr_{device_nr} {
cudaDeviceProp* dev = reinterpret_cast<cudaDeviceProp*>(device_prop);
cc_major_ = dev->major;
cc_minor_ = dev->minor;
name_ = dev->name;
sm_count_ = dev->multiProcessorCount;
max_threads_per_sm_ = dev->maxThreadsPerMultiProcessor;
warp_size_ = dev->warpSize;
total_global_memory_ = dev->totalGlobalMem;
alignment_ = dev->textureAlignment;
}

void CudaDevice::Set() const {
CHECK_CUDA_ERR(cudaSetDevice(device_nr_));
}

size_t CudaDevice::GetFreeGlobalMemory() const {
Set();
size_t total;
size_t free;
CHECK_CUDA_ERR(cudaMemGetInfo(&free, &total));

return free;
}

} // namespace alus::cuda
51 changes: 51 additions & 0 deletions cuda_util/cuda_device.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/**
* ENVISAT and ERS ASAR instrument focusser for QA4EO activity (c) by CGI Estonia AS
*
* ENVISAT and ERS ASAR instrument focusser for QA4EO activity is licensed under a
* Creative Commons Attribution-ShareAlike 4.0 International License.
*
* You should have received a copy of the license along with this
* work. If not, see http://creativecommons.org/licenses/by-sa/4.0/
*/

#pragma once

#include <cstddef>
#include <string>
#include <string_view>

namespace alus::cuda {
class CudaDevice final {
public:
CudaDevice() = delete;
/*
* Parses GPU device properties from 'cudaDeviceProp' struct pointer.
* It is opaque one here in order to not include CUDA SDK headers to host compilation.
*/
CudaDevice(int device_nr, void* device_prop);

void Set() const;

[[nodiscard]] int GetDeviceNr() const { return device_nr_; }
[[nodiscard]] std::string_view GetName() const { return name_; }
[[nodiscard]] size_t GetCcMajor() const { return cc_major_; }
[[nodiscard]] size_t GetCcMinor() const { return cc_minor_; }
[[nodiscard]] size_t GetSmCount() const { return sm_count_; }
[[nodiscard]] size_t GetMaxThreadsPerSm() const { return max_threads_per_sm_; }
[[nodiscard]] size_t GetWarpSize() const { return warp_size_; }
[[nodiscard]] size_t GetTotalGlobalMemory() const { return total_global_memory_; };
[[nodiscard]] size_t GetFreeGlobalMemory() const;
[[nodiscard]] size_t GetMemoryAlignment() const { return alignment_; }

private:
int device_nr_;
size_t cc_major_;
size_t cc_minor_;
std::string name_;
size_t sm_count_;
size_t max_threads_per_sm_;
size_t warp_size_;
size_t total_global_memory_;
size_t alignment_;
};
} // namespace alus::cuda
73 changes: 73 additions & 0 deletions cuda_util/cuda_device_init.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/**
* ENVISAT and ERS ASAR instrument focusser for QA4EO activity (c) by CGI Estonia AS
*
* ENVISAT and ERS ASAR instrument focusser for QA4EO activity is licensed under a
* Creative Commons Attribution-ShareAlike 4.0 International License.
*
* You should have received a copy of the license along with this
* work. If not, see http://creativecommons.org/licenses/by-sa/4.0/
*/

#include "cuda_device_init.h"

#include <stdexcept>
#include <thread>

#include <cuda_runtime.h>

#include "cuda_util.h"

namespace alus::cuda {
CudaInit::CudaInit() {
init_future_ = std::async(std::launch::async, [this]() { this->QueryDevices(); });
}

bool CudaInit::IsFinished() const {
if (!init_future_.valid()) {
throw std::runtime_error("The future is already a past, invalid state queried.");
}
return init_future_.wait_for(std::chrono::milliseconds(0)) == std::future_status::ready;
}

void CudaInit::QueryDevices() {
int device_count{};
CHECK_CUDA_ERR(cudaGetDeviceCount(&device_count));
if (!device_count) {
throw std::runtime_error("No GPU devices detected");
}
for (int i{}; i < device_count; i++) {
cudaDeviceProp deviceProp;
CHECK_CUDA_ERR(cudaGetDeviceProperties(&deviceProp, i));
devices_.emplace_back(i, &deviceProp);
// Whatever will first start invoking GPU, might be delayed if this thread does not finish.
// But when waiting, a first invocation of GPU could be delayed by waiting here.
// Also no error checking is done, because if there are errors, then sooner or later they will pop out
// somewhere else.
device_warmups_.emplace_back([i]() {
cudaSetDevice(i);
cudaFree(nullptr);
});
}
}

void CudaInit::CheckErrors() {
if (!init_future_.valid()) {
throw std::runtime_error("The future is already a past, invalid state queried.");
}
init_future_.get();
}

CudaInit::~CudaInit() {
// Just in case wait if any left hanging.
if (init_future_.valid()) {
init_future_.wait_for(std::chrono::seconds(10));
}

for (auto& t : device_warmups_) {
if (t.joinable()) {
t.join();
}
}
}

} // namespace alus::cuda
38 changes: 38 additions & 0 deletions cuda_util/cuda_device_init.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/**
* ENVISAT and ERS ASAR instrument focusser for QA4EO activity (c) by CGI Estonia AS
*
* ENVISAT and ERS ASAR instrument focusser for QA4EO activity is licensed under a
* Creative Commons Attribution-ShareAlike 4.0 International License.
*
* You should have received a copy of the license along with this
* work. If not, see http://creativecommons.org/licenses/by-sa/4.0/
*/

#pragma once

#include <future>
#include <thread>
#include <vector>

#include "cuda_device.h"

namespace alus::cuda {
class CudaInit final {
public:
CudaInit();

[[nodiscard]] bool IsFinished() const;
void CheckErrors();

[[nodiscard]] const std::vector<CudaDevice>& GetDevices() const { return devices_; }

~CudaInit();

private:
void QueryDevices();

std::vector<CudaDevice> devices_;
std::future<void> init_future_;
std::vector<std::thread> device_warmups_;
};
} // namespace alus::cuda
1 change: 1 addition & 0 deletions cuda_util/cuda_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
* You should have received a copy of the license along with this
* work. If not, see http://creativecommons.org/licenses/by-sa/4.0/
*/

#pragma once

#include <stdexcept>
Expand Down
Loading

0 comments on commit bbaa0d3

Please sign in to comment.