The Gaudi Framework  master (b9786168)
Loading...
Searching...
No Matches
CUDADeviceArray.h
Go to the documentation of this file.
1/***********************************************************************************\
2* (c) Copyright 2024-2025 CERN for the benefit of the LHCb and ATLAS collaborations *
3* *
4* This software is distributed under the terms of the Apache version 2 licence, *
5* copied verbatim in the file "LICENSE". *
6* *
7* In applying this licence, CERN does not waive the privileges and immunities *
8* granted to it by virtue of its status as an Intergovernmental Organization *
9* or submit itself to any jurisdiction. *
10\***********************************************************************************/
11#pragma once
12
13// Gaudi
16
17// Standard Library
18#include <ranges>
19#include <span>
20#include <type_traits>
21
22namespace Gaudi::CUDA {
24 template <class T>
25 concept TriviallyCopyable = std::is_trivially_copyable<T>::value;
26
28 template <class R, class T>
29 concept HostRange = std::ranges::contiguous_range<R> && std::ranges::sized_range<R> &&
30 std::is_same_v<T, std::ranges::range_value_t<R>>;
31
42
43 template <TriviallyCopyable T>
45 public:
47 DeviceArray( Stream& stream, std::size_t len );
49 DeviceArray( DeviceArrayGlobalTag_t globalTag, std::size_t len );
52
54 template <HostRange<T> R>
55 DeviceArray& operator=( const R& src );
57 DeviceArray& operator=( const T& src ) { return this->operator=( std::span<T, 1>( &src, 1 ) ); }
58
62 DeviceArray& operator=( const DeviceArray& rhs );
64 template <HostRange<T> R>
65 void toHost( R& dest );
67 void toHost( T& dest );
69 T* devPtr() { return m_ptr; };
71 T* operator&() { return m_ptr; }
73 T* operator->() { return m_ptr; }
74
75 private:
77 const std::size_t m_len;
78 const std::size_t m_size;
80 };
81
82 namespace Detail {
83 void* allocateWithStream( std::size_t size, Stream& stream );
84 void* allocateNoStream( std::size_t size, Gaudi::AsynchronousAlgorithm* parent );
85
86 void freeWithStream( void* ptr, Stream& stream );
87 void freeNoStream( void* ptr );
88
89 void copyHostToDeviceWithStream( void* devPtr, const void* hstPtr, std::size_t size, Stream& stream );
90 void copyHostToDeviceNoStream( void* devPtr, const void* hstPtr, std::size_t size );
91
92 void copyDeviceToHostWithStream( void* hstPtr, const void* devPtr, std::size_t size, Stream& stream );
93 void copyDeviceToHostNoStream( void* hstPtr, const void* devPtr, std::size_t size );
94
95 void copyDeviceToDeviceWithStream( void* destDevPtr, const void* srcDevPtr, std::size_t size, Stream& stream );
96 void copyDeviceToDeviceNoStream( void* destDevPtr, const void* srcDevPtr, std::size_t size );
97 } // namespace Detail
98
99 template <TriviallyCopyable T>
100 DeviceArray<T>::DeviceArray( Stream& stream, std::size_t len )
101 : m_ptr( static_cast<T*>( Detail::allocateWithStream( len * sizeof( T ), stream ) ) )
102 , m_len( len )
103 , m_size( len * sizeof( T ) )
104 , m_stream( &stream ) {
105 stream.registerDependency();
106 }
107
108 template <TriviallyCopyable T>
110 : m_ptr( static_cast<T*>(
111 Detail::allocateNoStream( len * sizeof( T ), globalTag.hasAsyncParent ? globalTag.asyncPtr : nullptr ) ) )
112 , m_len( len )
113 , m_size( len * sizeof( T ) )
114 , m_stream( nullptr ) {}
115
116 template <TriviallyCopyable T>
118 if ( m_stream == nullptr ) {
120 } else {
122 }
123 m_stream->removeDependency();
124 }
125
126 template <TriviallyCopyable T>
127 template <HostRange<T> R>
129 // Guard against mismatched sizes
130 if ( std::ranges::size( src ) != m_len ) {
131 if ( m_stream != nullptr ) {
132 m_stream->parent()->error() << "Host to device copy with mismatched sizes: " << std::ranges::size( src )
133 << " on host and " << m_len << " on device" << endmsg;
134 throw GaudiException( "Host to device copy with mismatched sizes", "CUDADeviceArrayException",
136 } // We want the error message so we throw an exception
137 throw GaudiException( "Host to device copy with mismatched sizes", "CUDADeviceArrayException",
139 }
140 if ( m_stream == nullptr ) {
141 Detail::copyHostToDeviceNoStream( m_ptr, std::ranges::data( src ), m_size );
142 return *this;
143 }
144 Detail::copyHostToDeviceWithStream( m_ptr, std::ranges::data( src ), m_size, *m_stream );
145 return *this;
146 }
147
148 template <TriviallyCopyable T>
150 if ( this == &rhs ) { return *this; }
151 if ( m_stream != nullptr && rhs.m_stream != nullptr && m_stream != rhs.m_stream ) {
152 m_stream->parent()->error()
153 << "Device to device copies between DeviceArrays on different streams are not allowed!" << endmsg;
154 throw GaudiException( "Device to device copy with mismatched streams", "CUDADeviceArrayException",
156 }
157 if ( m_len != rhs.m_len ) {
158 if ( m_stream != nullptr ) {
159 m_stream->parent()->error() << "Device to device copy with mismatched sizes: " << rhs.m_len << " and " << m_len
160 << endmsg;
161 throw GaudiException( "Device to device copy with mismatched sizes", "CUDADeviceArrayException",
163 }
164 if ( rhs.m_stream != nullptr ) {
165 rhs.m_stream->parent()->error() << "Device to device copy with mismatched sizes: " << rhs.m_len << " and "
166 << m_len << endmsg;
167 throw GaudiException( "Device to device copy with mismatched sizes", "CUDADeviceArrayException",
169 }
170 // We want the error message so we throw an exception
171 throw GaudiException( "Device to device copy with mismatched sizes", "CUDADeviceArrayException",
173 }
174
175 // Do the copy
176 if ( m_stream != nullptr ) {
178 } else if ( rhs.m_stream != nullptr ) {
180 } else {
182 }
183 return *this;
184 }
185
186 template <TriviallyCopyable T>
187 template <HostRange<T> R>
188 void DeviceArray<T>::toHost( R& dest ) {
189 // Guard against mismatched sizes
190 if ( std::ranges::size( dest ) != m_len ) {
191 if ( m_stream != nullptr ) {
192 m_stream->parent()->error() << "Device to host copy with mismatched sizes: " << m_len << " on device and "
193 << std::ranges::size( dest ) << " on host" << endmsg;
194 throw GaudiException( "Device to host copy with mismatched sizes", "CUDADeviceArrayException",
196 } // We want the error message so we throw an exception
197 throw GaudiException( "Device to host copy with mismatched sizes", "CUDADeviceArrayException",
199 }
200 if ( m_stream == nullptr ) {
201 Detail::copyDeviceToHostNoStream( std::ranges::data( dest ), m_ptr, m_size );
202 return;
203 }
204 Detail::copyDeviceToHostWithStream( std::ranges::data( dest ), m_ptr, m_size, *m_stream );
205 }
206
207 template <TriviallyCopyable T>
208 void DeviceArray<T>::toHost( T& dest ) {
209 std::span<T, 1> dest_span( &dest, 1 );
210 this->toHost( dest_span );
211 }
212
213} // namespace Gaudi::CUDA
MsgStream & endmsg(MsgStream &s)
MsgStream Modifier: endmsg. Calls the output method of the MsgStream.
Definition MsgStream.h:198
MsgStream & error() const
shortcut for the method msgStream(MSG::ERROR)
Base class from which all concrete algorithm classes should be derived.
Definition Algorithm.h:87
Base class for asynchronous algorithms.
DeviceArray & operator=(const T &src)
Copy from a host value (for size 1)
const std::size_t m_size
void toHost(R &dest)
Copy to a HostRange.
T * devPtr()
Return raw device pointer.
DeviceArray & operator=(const R &src)
Copy from a HostRange.
const std::size_t m_len
T * operator->()
Allow access to struct members.
DeviceArray(Stream &stream, std::size_t len)
Construct DeviceArray local to a Stream (i.e. Algorithm). May not be stored in whiteboard.
T * operator&()
Alias for devPtr.
const Gaudi::AsynchronousAlgorithm * parent()
Access the parent algorithm.
Definition CUDAStream.h:36
Define general base for Gaudi exception.
constexpr static const auto FAILURE
Definition StatusCode.h:100
Constrain ranges that can represent host memory compatible with this array.
Ensure type is trivially copyable (conceptization of std::is_trivially_copyable)
void copyDeviceToDeviceNoStream(void *destDevPtr, const void *srcDevPtr, std::size_t size)
void * allocateNoStream(std::size_t size, Gaudi::AsynchronousAlgorithm *parent)
void copyDeviceToHostWithStream(void *hstPtr, const void *devPtr, std::size_t size, Stream &stream)
void copyHostToDeviceNoStream(void *devPtr, const void *hstPtr, std::size_t size)
void * allocateWithStream(std::size_t size, Stream &stream)
void copyDeviceToHostNoStream(void *hstPtr, const void *devPtr, std::size_t size)
void copyDeviceToDeviceWithStream(void *destDevPtr, const void *srcDevPtr, std::size_t size, Stream &stream)
void freeNoStream(void *ptr)
void copyHostToDeviceWithStream(void *devPtr, const void *hstPtr, std::size_t size, Stream &stream)
void freeWithStream(void *ptr, Stream &stream)
DeviceArrayGlobalTag_t(Gaudi::Algorithm *parent)
Gaudi::AsynchronousAlgorithm *const asyncPtr
DeviceArrayGlobalTag_t(Gaudi::AsynchronousAlgorithm *parent)