19# include <cuda_runtime.h>
23#include <boost/fiber/condition_variable.hpp>
24#include <boost/fiber/mutex.hpp>
33 using namespace std::chrono_literals;
35 const std::string DEVARREXC =
"CUDADeviceArrayException";
36 std::string err_fmt( cudaError_t err, std::string file,
int line ) {
37 const char* errname = cudaGetErrorName( err );
38 const char* errstr = cudaGetErrorString( err );
40 std::format(
"Encountered CUDA error {} [{}]: {} on {}:{}", errname,
int( err ), errstr, file, line );
44 boost::fibers::mutex gpu_mem_mtx;
45 boost::fibers::condition_variable gpu_mem_cv;
49 void* devPtr =
nullptr;
50 cudaError_t err = cudaSuccess;
51 auto start_time = std::chrono::steady_clock::now();
53 err = cudaMallocAsync( &devPtr, size, stream );
54 if ( err == cudaSuccess ) {
break; }
55 if ( err == cudaErrorMemoryAllocation ) {
57 std::unique_lock lck( gpu_mem_mtx );
58 gpu_mem_cv.wait( lck );
62 }
while ( err == cudaErrorMemoryAllocation );
64 stream.parent()->restoreAfterSuspend().orThrow(
"Error restoring", DEVARREXC );
69 void* devPtr =
nullptr;
70 cudaError_t err = cudaSuccess;
71 auto start_time = std::chrono::steady_clock::now();
73 err = cudaMalloc( &devPtr, size );
74 if ( err == cudaSuccess ) {
break; }
75 if ( err == cudaErrorMemoryAllocation ) {
79 if ( parent !=
nullptr ) {
80 std::unique_lock lck( gpu_mem_mtx );
81 gpu_mem_cv.wait( lck );
84 std::this_thread::sleep_for( 100ms );
89 }
while ( err == cudaErrorMemoryAllocation );
94 cudaError_t err = cudaFreeAsync( ptr, stream );
95 if ( err != cudaSuccess ) {
98 gpu_mem_cv.notify_all();
102 cudaError_t err = cudaFree( ptr );
103 if ( err != cudaSuccess ) {
106 gpu_mem_cv.notify_all();
110 cudaError_t err = cudaMemcpyAsync( devPtr, hstPtr, size, cudaMemcpyHostToDevice, stream );
111 if ( err != cudaSuccess ) {
115 stream.await().orThrow(
"Await error", DEVARREXC );
119 cudaError_t err = cudaMemcpy( devPtr, hstPtr, size, cudaMemcpyHostToDevice );
120 if ( err != cudaSuccess ) {
126 cudaError_t err = cudaMemcpyAsync( hstPtr, devPtr, size, cudaMemcpyDeviceToHost, stream );
127 if ( err != cudaSuccess ) {
131 stream.await().orThrow(
"Await error", DEVARREXC );
135 cudaError_t err = cudaMemcpy( hstPtr, devPtr, size, cudaMemcpyDeviceToHost );
136 if ( err != cudaSuccess ) {
142 cudaError_t err = cudaMemcpyAsync( destDevPtr, srcDevPtr, size, cudaMemcpyDeviceToDevice, stream );
143 if ( err != cudaSuccess ) {
149 cudaError_t err = cudaMemcpy( destDevPtr, srcDevPtr, size, cudaMemcpyDeviceToDevice );
150 if ( err != cudaSuccess ) {
Base class for asynchronous algorithms.
virtual StatusCode restoreAfterSuspend() const
Restore after suspend.
Define general base for Gaudi exception.
const StatusCode & orThrow(std::string_view message, std::string_view tag) const
Throw a GaudiException in case of failures.
constexpr static const auto FAILURE
void copyDeviceToDeviceNoStream(void *destDevPtr, const void *srcDevPtr, std::size_t size)
void * allocateNoStream(std::size_t size, Gaudi::AsynchronousAlgorithm *parent)
void copyDeviceToHostWithStream(void *hstPtr, const void *devPtr, std::size_t size, Stream &stream)
void copyHostToDeviceNoStream(void *devPtr, const void *hstPtr, std::size_t size)
void * allocateWithStream(std::size_t size, Stream &stream)
void copyDeviceToHostNoStream(void *hstPtr, const void *devPtr, std::size_t size)
void copyDeviceToDeviceWithStream(void *destDevPtr, const void *srcDevPtr, std::size_t size, Stream &stream)
void freeNoStream(void *ptr)
void copyHostToDeviceWithStream(void *devPtr, const void *hstPtr, std::size_t size, Stream &stream)
void freeWithStream(void *ptr, Stream &stream)