21 # include <Gaudi/CUDAAsynchronousAlgHelper.cuh>
25 #include <boost/fiber/all.hpp>
26 #include <boost/unordered/unordered_flat_set.hpp>
28 #include <fmt/format.h>
41 #define ACCALG_AWAIT( stmt ) \
46 if ( restoreAfterSuspend().isFailure() ) return StatusCode::FAILURE;
51 boost::fibers::fiber_specific_ptr<std::size_t> s_currentSlot{};
55 if ( !
whiteboard()->selectStore( *s_currentSlot ).isSuccess() ) {
64 setAsynchronous(
true );
70 msg() <<
MSG::DEBUG <<
"Starting sysExecute for AsynchronousAlgorithm on slot " <<
ctx.slot()
71 <<
"with s_currentSlot = " << fmt::to_string( fmt::ptr( s_currentSlot.get() ) ) <<
endmsg;
72 if ( s_currentSlot.get() ==
nullptr ) {
74 }
else if ( *s_currentSlot !=
ctx.slot() ) {
75 error() <<
"Current slot is " <<
ctx.slot() <<
" but s_currentSlot exists and is " << *s_currentSlot <<
endmsg;
83 boost::this_fiber::yield();
84 return restoreAfterSuspend();
88 template <
typename Clock,
typename Duration>
90 boost::this_fiber::sleep_until( sleep_time );
91 return restoreAfterSuspend();
95 template <
typename Rep,
typename Period>
97 boost::this_fiber::sleep_for( dur );
98 return restoreAfterSuspend();
101 #ifdef GAUDI_USE_CUDA
102 StatusCode cuda_stream_await( cudaStream_t cudaStream )
const {
104 CUDA_CHECK( Gaudi::CUDA::cuda_stream_await( cudaStream ) );
105 return restoreAfterSuspend();
116 #ifdef GAUDI_USE_CUDA
118 using namespace std::chrono_literals;
125 boost::unordered_flat_set<void*> allocations{};
130 operator cudaStream_t() {
return stream; }
132 template <
typename T>
134 void* devPtr =
nullptr;
135 cudaError_t err = cudaSuccess;
136 if constexpr ( !std::is_same_v<T, void> ) { len *=
sizeof( T ); }
139 err = cudaMallocAsync( &devPtr, len,
stream );
140 if ( err == cudaErrorMemoryAllocation ) {
142 if ( sc.
isFailure() ) { parent->print_cuda_error(
"Yield error" ); }
144 }
while ( err == cudaErrorMemoryAllocation );
145 const double waittime =
149 if ( waittime >= 0.01 ) {
150 fmt::print(
"Waited {} to allocate {} of GPU memory\n", SI( waittime,
"s" ), SI( len,
"B" ) );
152 allocations.insert( devPtr );
153 return static_cast<T*
>( devPtr );
156 template <
typename T>
157 void free( T* d_ptr ) {
158 auto iter = allocations.find( d_ptr );
159 if ( iter == allocations.end() ) {
160 parent->print_cuda_error(
"Called stream.free on an allocation not created by this stream" );
162 cudaFreeAsync(
static_cast<void*
>( d_ptr ),
stream );
163 allocations.erase( iter );