The Gaudi Framework  master (37c0b60a)
CUDADeviceArray.h
Go to the documentation of this file.
1 /***********************************************************************************\
2 * (c) Copyright 2024 CERN for the benefit of the LHCb and ATLAS collaborations *
3 * *
4 * This software is distributed under the terms of the Apache version 2 licence, *
5 * copied verbatim in the file "LICENSE". *
6 * *
7 * In applying this licence, CERN does not waive the privileges and immunities *
8 * granted to it by virtue of its status as an Intergovernmental Organization *
9 * or submit itself to any jurisdiction. *
10 \***********************************************************************************/
11 // Gaudi
12 #include <Gaudi/CUDA/CUDAStream.h>
13 #include <GaudiKernel/StatusCode.h>
14 
15 // Standard Library
16 #include <ranges>
17 #include <span>
18 #include <type_traits>
19 
20 namespace Gaudi::CUDA {
22  template <class T>
24 
26  template <class R, class T>
27  concept HostRange = std::ranges::contiguous_range<R> && std::ranges::sized_range<R> &&
28  std::is_same_v<T, std::ranges::range_value_t<R>>;
29 
31  const bool hasAsyncParent;
32  union {
35  };
36 
38  DeviceArrayGlobalTag_t( Gaudi::Algorithm* parent ) : hasAsyncParent( false ), syncPtr( parent ) {}
39  };
40 
41  template <TriviallyCopyable T>
42  class DeviceArray {
43  public:
49  ~DeviceArray();
50 
52  template <HostRange<T> R>
53  DeviceArray& operator=( const R& src );
55  DeviceArray& operator=( const T& src ) { return this->operator=( std::span<T, 1>( &src, 1 ) ); }
56 
60  DeviceArray& operator=( const DeviceArray& rhs );
62  template <HostRange<T> R>
63  void toHost( R& dest );
65  void toHost( T& dest );
67  T* devPtr() { return m_ptr; };
69  T* operator&() { return m_ptr; }
71  T* operator->() { return m_ptr; }
72 
73  private:
74  T* m_ptr;
77  Stream* const m_stream;
78  };
79 
80  namespace Detail {
83 
84  void freeWithStream( void* ptr, Stream& stream );
85  void freeNoStream( void* ptr );
86 
87  void copyHostToDeviceWithStream( void* devPtr, const void* hstPtr, std::size_t size, Stream& stream );
88  void copyHostToDeviceNoStream( void* devPtr, const void* hstPtr, std::size_t size );
89 
90  void copyDeviceToHostWithStream( void* hstPtr, const void* devPtr, std::size_t size, Stream& stream );
91  void copyDeviceToHostNoStream( void* hstPtr, const void* devPtr, std::size_t size );
92 
93  void copyDeviceToDeviceWithStream( void* destDevPtr, const void* srcDevPtr, std::size_t size, Stream& stream );
94  void copyDeviceToDeviceNoStream( void* destDevPtr, const void* srcDevPtr, std::size_t size );
95  } // namespace Detail
96 
97  template <TriviallyCopyable T>
99  : m_ptr( static_cast<T*>( Detail::allocateWithStream( len * sizeof( T ), stream ) ) )
100  , m_len( len )
101  , m_size( len * sizeof( T ) )
102  , m_stream( &stream ) {
103  stream.registerDependency();
104  }
105 
106  template <TriviallyCopyable T>
108  : m_ptr( static_cast<T*>(
109  Detail::allocateNoStream( len * sizeof( T ), globalTag.hasAsyncParent ? globalTag.asyncPtr : nullptr ) ) )
110  , m_len( len )
111  , m_size( len * sizeof( T ) )
112  , m_stream( nullptr ) {}
113 
114  template <TriviallyCopyable T>
116  if ( m_stream == nullptr ) {
117  Detail::freeNoStream( m_ptr );
118  } else {
119  Detail::freeWithStream( m_ptr, *m_stream );
120  }
121  m_stream->removeDependency();
122  }
123 
124  template <TriviallyCopyable T>
125  template <HostRange<T> R>
127  // Guard against mismatched sizes
128  if ( std::ranges::size( src ) != m_len ) {
129  if ( m_stream != nullptr ) {
130  m_stream->parent()->error() << "Host to device copy with mismatched sizes: " << std::ranges::size( src )
131  << " on host and " << m_len << " on device" << endmsg;
132  throw GaudiException( "Host to device copy with mismatched sizes", "CUDADeviceArrayException",
134  } // We want the error message so we throw an exception
135  throw GaudiException( "Host to device copy with mismatched sizes", "CUDADeviceArrayException",
137  }
138  if ( m_stream == nullptr ) {
139  Detail::copyHostToDeviceNoStream( m_ptr, std::ranges::data( src ), m_size );
140  return *this;
141  }
142  Detail::copyHostToDeviceWithStream( m_ptr, std::ranges::data( src ), m_size, *m_stream );
143  return *this;
144  }
145 
146  template <TriviallyCopyable T>
148  if ( this == &rhs ) { return *this; }
149  if ( m_stream != nullptr && rhs.m_stream != nullptr && m_stream != rhs.m_stream ) {
150  m_stream->parent()->error()
151  << "Device to device copies between DeviceArrays on different streams are not allowed!" << endmsg;
152  throw GaudiException( "Device to device copy with mismatched streams", "CUDADeviceArrayException",
154  }
155  if ( m_len != rhs.m_len ) {
156  if ( m_stream != nullptr ) {
157  m_stream->parent()->error() << "Device to device copy with mismatched sizes: " << rhs.m_len << " and " << m_len
158  << endmsg;
159  throw GaudiException( "Device to device copy with mismatched sizes", "CUDADeviceArrayException",
161  }
162  if ( rhs.m_stream != nullptr ) {
163  rhs.m_stream->parent()->error() << "Device to device copy with mismatched sizes: " << rhs.m_len << " and "
164  << m_len << endmsg;
165  throw GaudiException( "Device to device copy with mismatched sizes", "CUDADeviceArrayException",
167  }
168  // We want the error message so we throw an exception
169  throw GaudiException( "Device to device copy with mismatched sizes", "CUDADeviceArrayException",
171  }
172 
173  // Do the copy
174  if ( m_stream != nullptr ) {
175  Detail::copyDeviceToDeviceWithStream( m_ptr, rhs.m_ptr, m_size, *m_stream );
176  } else if ( rhs.m_stream != nullptr ) {
177  Detail::copyDeviceToDeviceWithStream( m_ptr, rhs.m_ptr, m_size, *rhs.m_stream );
178  } else {
179  Detail::copyDeviceToDeviceNoStream( m_ptr, rhs.m_ptr, m_size );
180  }
181  return *this;
182  }
183 
184  template <TriviallyCopyable T>
185  template <HostRange<T> R>
187  // Guard against mismatched sizes
188  if ( std::ranges::size( dest ) != m_len ) {
189  if ( m_stream != nullptr ) {
190  m_stream->parent()->error() << "Device to host copy with mismatched sizes: " << m_len << " on device and "
191  << std::ranges::size( dest ) << " on host" << endmsg;
192  throw GaudiException( "Device to host copy with mismatched sizes", "CUDADeviceArrayException",
194  } // We want the error message so we throw an exception
195  throw GaudiException( "Device to host copy with mismatched sizes", "CUDADeviceArrayException",
197  }
198  if ( m_stream == nullptr ) {
199  Detail::copyDeviceToHostNoStream( std::ranges::data( dest ), m_ptr, m_size );
200  return;
201  }
202  Detail::copyDeviceToHostWithStream( std::ranges::data( dest ), m_ptr, m_size, *m_stream );
203  }
204 
205  template <TriviallyCopyable T>
207  std::span<T, 1> dest_span( &dest, 1 );
208  this->toHost( dest_span );
209  }
210 
211 } // namespace Gaudi::CUDA
Gaudi::CUDA::DeviceArray::operator=
DeviceArray & operator=(const T &src)
Copy from a host value (for size 1)
Definition: CUDADeviceArray.h:55
Gaudi::CUDA::DeviceArray::toHost
void toHost(R &dest)
Copy to a HostRange.
Definition: CUDADeviceArray.h:186
Gaudi::CUDA::HostRange
concept HostRange
Constrain ranges that can represent host memory compatible with this array.
Definition: CUDADeviceArray.h:27
Gaudi::CUDA::DeviceArrayGlobalTag_t::syncPtr
Gaudi::Algorithm *const syncPtr
Definition: CUDADeviceArray.h:34
Write.stream
stream
Definition: Write.py:32
Gaudi::CUDA::DeviceArray::operator->
T * operator->()
Allow access to struct members.
Definition: CUDADeviceArray.h:71
Gaudi::CUDA::DeviceArray::operator&
T * operator&()
Alias for devPtr.
Definition: CUDADeviceArray.h:69
details::size
constexpr auto size(const T &, Args &&...) noexcept
Definition: AnyDataWrapper.h:23
Gaudi::CUDA::DeviceArrayGlobalTag_t::hasAsyncParent
const bool hasAsyncParent
Definition: CUDADeviceArray.h:31
Gaudi::CUDA::Detail::copyDeviceToDeviceNoStream
void copyDeviceToDeviceNoStream(void *destDevPtr, const void *srcDevPtr, std::size_t size)
Definition: CUDADeviceArray.cpp:148
GaudiException
Definition: GaudiException.h:31
Gaudi::CUDA::DeviceArray::m_size
const std::size_t m_size
Definition: CUDADeviceArray.h:76
Gaudi::CUDA::DeviceArrayGlobalTag_t::DeviceArrayGlobalTag_t
DeviceArrayGlobalTag_t(Gaudi::Algorithm *parent)
Definition: CUDADeviceArray.h:38
Gaudi::CUDA::Detail::copyDeviceToDeviceWithStream
void copyDeviceToDeviceWithStream(void *destDevPtr, const void *srcDevPtr, std::size_t size, Stream &stream)
Definition: CUDADeviceArray.cpp:141
CUDAStream.h
Gaudi::CUDA::Stream::parent
const Gaudi::AsynchronousAlgorithm * parent()
Access the parent algorithm.
Definition: CUDAStream.h:36
Gaudi::CUDA::DeviceArrayGlobalTag_t::DeviceArrayGlobalTag_t
DeviceArrayGlobalTag_t(Gaudi::AsynchronousAlgorithm *parent)
Definition: CUDADeviceArray.h:37
StatusCode.h
Gaudi::CUDA::Detail::copyHostToDeviceNoStream
void copyHostToDeviceNoStream(void *devPtr, const void *hstPtr, std::size_t size)
Definition: CUDADeviceArray.cpp:118
Gaudi::CUDA::Detail::freeWithStream
void freeWithStream(void *ptr, Stream &stream)
Definition: CUDADeviceArray.cpp:93
Gaudi::CUDA::Detail::allocateNoStream
void * allocateNoStream(std::size_t size, Gaudi::AsynchronousAlgorithm *parent)
Definition: CUDADeviceArray.cpp:68
Gaudi::CUDA::DeviceArray::devPtr
T * devPtr()
Return raw device pointer.
Definition: CUDADeviceArray.h:67
Gaudi::CUDA::Detail::allocateWithStream
void * allocateWithStream(std::size_t size, Stream &stream)
Definition: CUDADeviceArray.cpp:48
std::is_trivially_copyable
Gaudi::CUDA::DeviceArray::~DeviceArray
~DeviceArray()
Destructor.
Definition: CUDADeviceArray.h:115
Gaudi::Algorithm
Base class from which all concrete algorithm classes should be derived.
Definition: Algorithm.h:90
Gaudi::CUDA::Stream
Definition: CUDAStream.h:21
Gaudi::CUDA::DeviceArray::m_stream
Stream *const m_stream
Definition: CUDADeviceArray.h:77
GaudiPython.Bindings.nullptr
nullptr
Definition: Bindings.py:87
Gaudi::AsynchronousAlgorithm
Base class for asynchronous algorithms.
Definition: AsynchronousAlgorithm.h:34
endmsg
MsgStream & endmsg(MsgStream &s)
MsgStream Modifier: endmsg. Calls the output method of the MsgStream.
Definition: MsgStream.h:202
Gaudi::CUDA::Detail::copyDeviceToHostWithStream
void copyDeviceToHostWithStream(void *hstPtr, const void *devPtr, std::size_t size, Stream &stream)
Definition: CUDADeviceArray.cpp:125
Gaudi::CUDA::DeviceArrayGlobalTag_t
Definition: CUDADeviceArray.h:30
Gaudi::CUDA::TriviallyCopyable
concept TriviallyCopyable
Ensure type is trivially copyable (conceptization of std::is_trivially_copyable)
Definition: CUDADeviceArray.h:23
Gaudi::CUDA::Detail::freeNoStream
void freeNoStream(void *ptr)
Definition: CUDADeviceArray.cpp:101
gaudirun.dest
dest
Definition: gaudirun.py:224
Gaudi::CUDA::DeviceArray::operator=
DeviceArray & operator=(const R &src)
Copy from a HostRange.
Gaudi::CUDA
Definition: CUDAStream.h:20
Gaudi::CUDA::DeviceArray::DeviceArray
DeviceArray(Stream &stream, std::size_t len)
Construct DeviceArray local to a Stream (i.e. Algorithm). May not be stored in whiteboard.
Definition: CUDADeviceArray.h:98
Gaudi::CUDA::Detail::copyHostToDeviceWithStream
void copyHostToDeviceWithStream(void *devPtr, const void *hstPtr, std::size_t size, Stream &stream)
Definition: CUDADeviceArray.cpp:109
std::size_t
Gaudi::CUDA::DeviceArray::m_len
const std::size_t m_len
Definition: CUDADeviceArray.h:75
StatusCode::FAILURE
constexpr static const auto FAILURE
Definition: StatusCode.h:101
Gaudi::CUDA::Detail::copyDeviceToHostNoStream
void copyDeviceToHostNoStream(void *hstPtr, const void *devPtr, std::size_t size)
Definition: CUDADeviceArray.cpp:134
Gaudi::CUDA::DeviceArrayGlobalTag_t::asyncPtr
Gaudi::AsynchronousAlgorithm *const asyncPtr
Definition: CUDADeviceArray.h:33
Gaudi::CUDA::DeviceArray::m_ptr
T * m_ptr
Definition: CUDADeviceArray.h:74
Gaudi::CUDA::DeviceArray
Definition: CUDADeviceArray.h:42