The Gaudi Framework  master (181af51f)
Loading...
Searching...
No Matches
CUDADeviceArray.cpp
Go to the documentation of this file.
1/***********************************************************************************\
2* (c) Copyright 2024 CERN for the benefit of the LHCb and ATLAS collaborations *
3* *
4* This software is distributed under the terms of the Apache version 2 licence, *
5* copied verbatim in the file "LICENSE". *
6* *
7* In applying this licence, CERN does not waive the privileges and immunities *
8* granted to it by virtue of its status as an Intergovernmental Organization *
9* or submit itself to any jurisdiction. *
10\***********************************************************************************/
11#include "CUDADeviceArray.h"
12
13// Gaudi
16
17// CUDA
18#ifndef __CUDACC__
19# include <cuda_runtime.h>
20#endif
21
22// Fibers
23#include <boost/fiber/condition_variable.hpp>
24#include <boost/fiber/mutex.hpp>
25
26// standard library
27#include <chrono>
28#include <format>
29#include <string>
30#include <thread>
31
33 using namespace std::chrono_literals;
34 namespace {
35 const std::string DEVARREXC = "CUDADeviceArrayException";
36 std::string err_fmt( cudaError_t err, std::string file, int line ) {
37 const char* errname = cudaGetErrorName( err );
38 const char* errstr = cudaGetErrorString( err );
39 std::string errmsg =
40 std::format( "Encountered CUDA error {} [{}]: {} on {}:{}", errname, int( err ), errstr, file, line );
41 return errmsg;
42 }
43
44 boost::fibers::mutex gpu_mem_mtx;
45 boost::fibers::condition_variable gpu_mem_cv;
46 } // namespace
47
48 void* allocateWithStream( std::size_t size, Stream& stream ) {
49 void* devPtr = nullptr;
50 cudaError_t err = cudaSuccess;
51 auto start_time = std::chrono::steady_clock::now();
52 do {
53 err = cudaMallocAsync( &devPtr, size, stream );
54 if ( err == cudaSuccess ) { break; }
55 if ( err == cudaErrorMemoryAllocation ) {
56 cudaGetLastError();
57 std::unique_lock lck( gpu_mem_mtx );
58 gpu_mem_cv.wait( lck );
59 } else {
60 throw GaudiException( err_fmt( err, __FILE__, __LINE__ ), DEVARREXC, StatusCode::FAILURE );
61 }
62 } while ( err == cudaErrorMemoryAllocation );
63 // In case we suspended
64 stream.parent()->restoreAfterSuspend().orThrow( "Error restoring", DEVARREXC );
65 return devPtr;
66 }
67
68 void* allocateNoStream( std::size_t size, Gaudi::AsynchronousAlgorithm* parent ) {
69 void* devPtr = nullptr;
70 cudaError_t err = cudaSuccess;
71 auto start_time = std::chrono::steady_clock::now();
72 do {
73 err = cudaMalloc( &devPtr, size );
74 if ( err == cudaSuccess ) { break; }
75 if ( err == cudaErrorMemoryAllocation ) {
76 cudaGetLastError();
77 // If called from an AsynchronousAlgorithm, wait as in the with stream variant
78 // Otherwise, the thread should sleep
79 if ( parent != nullptr ) {
80 std::unique_lock lck( gpu_mem_mtx );
81 gpu_mem_cv.wait( lck );
82 parent->restoreAfterSuspend().orThrow( "Error restoring", DEVARREXC );
83 } else {
84 std::this_thread::sleep_for( 100ms );
85 }
86 } else {
87 throw GaudiException( err_fmt( err, __FILE__, __LINE__ ), DEVARREXC, StatusCode::FAILURE );
88 }
89 } while ( err == cudaErrorMemoryAllocation );
90 return devPtr;
91 }
92
93 void freeWithStream( void* ptr, Stream& stream ) {
94 cudaError_t err = cudaFreeAsync( ptr, stream );
95 if ( err != cudaSuccess ) {
96 throw GaudiException( err_fmt( err, __FILE__, __LINE__ ), DEVARREXC, StatusCode::FAILURE );
97 }
98 gpu_mem_cv.notify_all();
99 }
100
101 void freeNoStream( void* ptr ) {
102 cudaError_t err = cudaFree( ptr );
103 if ( err != cudaSuccess ) {
104 throw GaudiException( err_fmt( err, __FILE__, __LINE__ ), DEVARREXC, StatusCode::FAILURE );
105 }
106 gpu_mem_cv.notify_all();
107 }
108
109 void copyHostToDeviceWithStream( void* devPtr, const void* hstPtr, std::size_t size, Stream& stream ) {
110 cudaError_t err = cudaMemcpyAsync( devPtr, hstPtr, size, cudaMemcpyHostToDevice, stream );
111 if ( err != cudaSuccess ) {
112 throw GaudiException( err_fmt( err, __FILE__, __LINE__ ), DEVARREXC, StatusCode::FAILURE );
113 }
114 // await stream to avoid deleting host memory before copy is done
115 stream.await().orThrow( "Await error", DEVARREXC );
116 }
117
118 void copyHostToDeviceNoStream( void* devPtr, const void* hstPtr, std::size_t size ) {
119 cudaError_t err = cudaMemcpy( devPtr, hstPtr, size, cudaMemcpyHostToDevice );
120 if ( err != cudaSuccess ) {
121 throw GaudiException( err_fmt( err, __FILE__, __LINE__ ), DEVARREXC, StatusCode::FAILURE );
122 }
123 }
124
125 void copyDeviceToHostWithStream( void* hstPtr, const void* devPtr, std::size_t size, Stream& stream ) {
126 cudaError_t err = cudaMemcpyAsync( hstPtr, devPtr, size, cudaMemcpyDeviceToHost, stream );
127 if ( err != cudaSuccess ) {
128 throw GaudiException( err_fmt( err, __FILE__, __LINE__ ), DEVARREXC, StatusCode::FAILURE );
129 }
130 // await stream to avoid deleting host memory before copy is done
131 stream.await().orThrow( "Await error", DEVARREXC );
132 }
133
134 void copyDeviceToHostNoStream( void* hstPtr, const void* devPtr, std::size_t size ) {
135 cudaError_t err = cudaMemcpy( hstPtr, devPtr, size, cudaMemcpyDeviceToHost );
136 if ( err != cudaSuccess ) {
137 throw GaudiException( err_fmt( err, __FILE__, __LINE__ ), DEVARREXC, StatusCode::FAILURE );
138 }
139 }
140
141 void copyDeviceToDeviceWithStream( void* destDevPtr, const void* srcDevPtr, std::size_t size, Stream& stream ) {
142 cudaError_t err = cudaMemcpyAsync( destDevPtr, srcDevPtr, size, cudaMemcpyDeviceToDevice, stream );
143 if ( err != cudaSuccess ) {
144 throw GaudiException( err_fmt( err, __FILE__, __LINE__ ), DEVARREXC, StatusCode::FAILURE );
145 }
146 }
147
148 void copyDeviceToDeviceNoStream( void* destDevPtr, const void* srcDevPtr, std::size_t size ) {
149 cudaError_t err = cudaMemcpy( destDevPtr, srcDevPtr, size, cudaMemcpyDeviceToDevice );
150 if ( err != cudaSuccess ) {
151 throw GaudiException( err_fmt( err, __FILE__, __LINE__ ), DEVARREXC, StatusCode::FAILURE );
152 }
153 }
154} // namespace Gaudi::CUDA::Detail
Base class for asynchronous algorithms.
virtual StatusCode restoreAfterSuspend() const
Restore after suspend.
Define general base for Gaudi exception.
const StatusCode & orThrow(std::string_view message, std::string_view tag) const
Throw a GaudiException in case of failures.
Definition StatusCode.h:206
constexpr static const auto FAILURE
Definition StatusCode.h:100
void copyDeviceToDeviceNoStream(void *destDevPtr, const void *srcDevPtr, std::size_t size)
void * allocateNoStream(std::size_t size, Gaudi::AsynchronousAlgorithm *parent)
void copyDeviceToHostWithStream(void *hstPtr, const void *devPtr, std::size_t size, Stream &stream)
void copyHostToDeviceNoStream(void *devPtr, const void *hstPtr, std::size_t size)
void * allocateWithStream(std::size_t size, Stream &stream)
void copyDeviceToHostNoStream(void *hstPtr, const void *devPtr, std::size_t size)
void copyDeviceToDeviceWithStream(void *destDevPtr, const void *srcDevPtr, std::size_t size, Stream &stream)
void freeNoStream(void *ptr)
void copyHostToDeviceWithStream(void *devPtr, const void *hstPtr, std::size_t size, Stream &stream)
void freeWithStream(void *ptr, Stream &stream)