The Gaudi Framework  v39r1 (adb068b2)
GPUCruncher Class Reference

#include </builds/gaudi/Gaudi/GaudiHive/src/GPUCruncher.h>

Inheritance diagram for GPUCruncher:
Collaboration diagram for GPUCruncher:

Public Types

typedef tbb::concurrent_hash_map< std::string, unsigned int > CHM
 

Public Member Functions

bool isClonable () const override
 
StatusCode execute (const EventContext &ctx) const override
 the execution of the algorithm More...
 
StatusCode initialize () override
 Its initialization. More...
 
StatusCode finalize () override
 the finalization of the algorithm More...
 
double get_runtime () const
 
 GPUCruncher (const std::string &name, ISvcLocator *pSvc)
 
virtual ~GPUCruncher ()
 virtual & protected desctrustor More...
 

Private Member Functions

 GPUCruncher ()
 the default constructor is disabled More...
 
 GPUCruncher (const GPUCruncher &)
 the copy constructor is disabled More...
 
GPUCruncheroperator= (const GPUCruncher &)
 the assignement operator is disabled More...
 
StatusCode gpuExecute (const std::pmr::vector< double > &in, std::vector< double > &out) const
 The GPU intensive function. More...
 

Private Attributes

Gaudi::Property< std::vector< std::string > > m_inpKeys { this, "inpKeys", {}, "" }
 
Gaudi::Property< std::vector< std::string > > m_outKeys { this, "outKeys", {}, "" }
 
Gaudi::Property< double > m_avg_runtime { this, "avgRuntime", 1., "Average runtime of the module." }
 
Gaudi::Property< double > m_var_runtime { this, "varRuntime", 0.01, "Variance of the runtime of the module." }
 
Gaudi::Property< bool > m_local_rndm_gen
 
std::pmr::memory_resource * pinned
 
const uint MAX_INPUTS = 40
 
const uint MAX_OUTPUTS = 10
 
std::vector< DataObjectHandle< DataObject > * > m_inputHandles
 
std::vector< DataObjectHandle< DataObject > * > m_outputHandles
 

Static Private Attributes

static CHM m_name_ncopies_map
 

Detailed Description

A test asynchronous algorithm. Might eventually run computations on a GPU but for now it just sleeps for a few seconds.

Definition at line 26 of file GPUCruncher.h.

Member Typedef Documentation

◆ CHM

typedef tbb::concurrent_hash_map<std::string, unsigned int> GPUCruncher::CHM

Definition at line 29 of file GPUCruncher.h.

Constructor & Destructor Documentation

◆ GPUCruncher() [1/3]

GPUCruncher::GPUCruncher ( const std::string name,
ISvcLocator pSvc 
)

Definition at line 34 of file GPUCruncher.cpp.

36  : AsynchronousAlgorithm( name, pSvc ) {
37 
38  // Register the algo in the static concurrent hash map in order to
39  // monitor the # of copies
40  CHM::accessor name_ninstances;
41  m_name_ncopies_map.insert( name_ninstances, name );
42  name_ninstances->second += 1;
43 }

◆ ~GPUCruncher()

GPUCruncher::~GPUCruncher ( )
virtual

virtual & protected desctrustor

Definition at line 45 of file GPUCruncher.cpp.

45  {
46  for ( uint i = 0; i < m_inputHandles.size(); ++i ) delete m_inputHandles[i];
47 
48  for ( uint i = 0; i < m_outputHandles.size(); ++i ) delete m_outputHandles[i];
49 }

◆ GPUCruncher() [2/3]

GPUCruncher::GPUCruncher ( )
private

the default constructor is disabled

◆ GPUCruncher() [3/3]

GPUCruncher::GPUCruncher ( const GPUCruncher )
private

the copy constructor is disabled

Member Function Documentation

◆ execute()

StatusCode GPUCruncher::execute ( const EventContext ctx) const
override

the execution of the algorithm

Definition at line 84 of file GPUCruncher.cpp.

85 {
86 
87  double crunchtime;
88  std::pmr::vector<double> input( pinned );
89  if ( m_local_rndm_gen ) {
90  /* This will disappear with a thread safe random number generator service.
91  * Use basic Box-Muller to generate Gaussian random numbers.
92  * The quality is not good for in depth study given that the generator is a
93  * linear congruent.
94  * Throw away basically a free number: we are in a ~~cpu~~ /gpu/ cruncher after all.
95  * The seed is taken from the clock, but we could assign a seed per module to
96  * ensure reproducibility.
97  *
98  * This is not an overkill but rather an exercise towards a thread safe
99  * random number generation.
100  */
101 
102  auto getGausRandom = []( double mean, double sigma ) -> double {
103  unsigned int seed = std::clock();
104 
105  auto getUnifRandom = []( unsigned int& seed ) -> double {
106  // from "Numerical Recipes"
107  constexpr unsigned int m = 232;
108  constexpr unsigned int a = 1664525;
109  constexpr unsigned int c = 1013904223;
110  seed = ( a * seed + c ) % m;
111  const double unif = double( seed ) / m;
112  return unif;
113  };
114 
115  double unif1, unif2;
116  do {
117  unif1 = getUnifRandom( seed );
118  unif2 = getUnifRandom( seed );
119  } while ( unif1 < std::numeric_limits<double>::epsilon() );
120 
121  const double normal = sqrt( -2. * log( unif1 ) ) * cos( 2 * M_PI * unif2 );
122 
123  return normal * sigma + mean;
124  };
125 
126  crunchtime = fabs( getGausRandom( m_avg_runtime, m_var_runtime ) );
127  // Generate input vector
128  input.reserve( 40000 * crunchtime );
129  for ( int i = 0; i < 40000 * crunchtime; ++i ) { input.push_back( getGausRandom( 10.0, 1.0 ) ); }
130  // End Of temp block
131  } else {
132  // Should be a member.
134  crunchtime = std::fabs( rndmgaus() );
135  // Generate input vector
136  for ( int i = 0; i < 2000 * crunchtime; ++i ) { input.push_back( rndmgaus() ); }
137  }
138  unsigned int crunchtime_ms = 1000 * crunchtime;
139 
140  DEBUG_MSG << "Crunching time will be: " << crunchtime_ms << " ms" << endmsg;
141  DEBUG_MSG << "Start event " << ctx.evt() << " in slot " << ctx.slot() << " on pthreadID " << std::hex
142  << pthread_self() << std::dec << endmsg;
143 
144  // start timer
145  tbb::tick_count starttbb = tbb::tick_count::now();
146 
147  VERBOSE_MSG << "inputs number: " << m_inputHandles.size() << endmsg;
148  for ( auto& inputHandle : m_inputHandles ) {
149  if ( !inputHandle->isValid() ) continue;
150 
151  VERBOSE_MSG << "get from TS: " << inputHandle->objKey() << endmsg;
152  DataObject* obj = nullptr;
153  obj = inputHandle->get();
154  if ( obj == nullptr ) error() << "A read object was a null pointer." << endmsg;
155  }
156 
157  // Use fiber sleep, should eventually be a GPU computation
158  info() << "Crunching..." << endmsg;
159  auto startcrunch = std::chrono::steady_clock::now();
160  std::vector<double> out{ 3.0, 5.0 };
161  gpuExecute( input, out ).orThrow( "GPU_EXECUTE" );
162  auto endcrunch = std::chrono::steady_clock::now();
163  info() << "Crunched." << endmsg;
164  fmt::print( "{} GPU Crunch time: {}. Input length {}, output length {}.\n", name(),
165  Gaudi::CUDA::SI(
166  std::chrono::duration_cast<std::chrono::milliseconds>( endcrunch - startcrunch ).count() / 1e3, "s" ),
167  input.size(), out.size() );
168 
169  VERBOSE_MSG << "outputs number: " << m_outputHandles.size() << endmsg;
170  for ( auto& outputHandle : m_outputHandles ) {
171  if ( !outputHandle->isValid() ) continue;
172 
173  VERBOSE_MSG << "put to TS: " << outputHandle->objKey() << endmsg;
174  outputHandle->put( std::make_unique<DataObject>() );
175  }
176 
177  tbb::tick_count endtbb = tbb::tick_count::now();
178  const double actualRuntime = ( endtbb - starttbb ).seconds();
179 
180  DEBUG_MSG << "Finish event " << ctx.evt() << " in " << int( 1000 * actualRuntime ) << " ms" << endmsg;
181 
182  DEBUG_MSG << "Timing: ExpectedCrunchtime= " << crunchtime_ms
183  << " ms. ActualTotalRuntime= " << int( 1000 * actualRuntime )
184  << " ms. Ratio= " << crunchtime / actualRuntime << endmsg;
185 
186  return StatusCode::SUCCESS;
187 }

◆ finalize()

StatusCode GPUCruncher::finalize ( )
override

the finalization of the algorithm

Definition at line 191 of file GPUCruncher.cpp.

192 {
193  MsgStream log( msgSvc(), name() );
194 
195  unsigned int ninstances;
196 
197  {
198  CHM::const_accessor const_name_ninstances;
199  m_name_ncopies_map.find( const_name_ninstances, name() );
200  ninstances = const_name_ninstances->second;
201  }
202 
203  constexpr double s2ms = 1000.;
204  // do not show repetitions
205  if ( ninstances != 0 ) {
206  info() << "Summary: name= " << name() << "\t avg_runtime= " << m_avg_runtime * s2ms << "\t n_clones= " << ninstances
207  << endmsg;
208 
209  CHM::accessor name_ninstances;
210  m_name_ncopies_map.find( name_ninstances, name() );
211  name_ninstances->second = 0;
212  }
213 
214  return Algorithm::finalize();
215 }

◆ get_runtime()

double GPUCruncher::get_runtime ( ) const
inline

Definition at line 40 of file GPUCruncher.h.

40 { return m_avg_runtime; }

◆ gpuExecute()

StatusCode GPUCruncher::gpuExecute ( const std::pmr::vector< double > &  in,
std::vector< double > &  out 
) const
private

The GPU intensive function.

◆ initialize()

StatusCode GPUCruncher::initialize ( )
override

Its initialization.

Definition at line 51 of file GPUCruncher.cpp.

51  {
52  auto sc = Algorithm::initialize();
53  if ( !sc ) return sc;
54 
55  pinned = Gaudi::CUDA::get_pinned_memory_resource();
56 
57  // This is a bit ugly. There is no way to declare a vector of DataObjectHandles, so
58  // we need to wait until initialize when we've read in the input and output key
59  // properties, and know their size, and then turn them
60  // into Handles and register them with the framework by calling declareProperty. We
61  // could call declareInput/declareOutput on them too.
62 
63  int i = 0;
64  for ( auto k : m_inpKeys ) {
65  DEBUG_MSG << "adding input key " << k << endmsg;
67  declareProperty( "dummy_in_" + std::to_string( i ), *( m_inputHandles.back() ) );
68  i++;
69  }
70 
71  i = 0;
72  for ( auto k : m_outKeys ) {
73  DEBUG_MSG << "adding output key " << k << endmsg;
75  declareProperty( "dummy_out_" + std::to_string( i ), *( m_outputHandles.back() ) );
76  i++;
77  }
78 
79  return sc;
80 }

◆ isClonable()

bool GPUCruncher::isClonable ( ) const
inlineoverride

Definition at line 31 of file GPUCruncher.h.

31 { return true; }

◆ operator=()

GPUCruncher& GPUCruncher::operator= ( const GPUCruncher )
private

the assignement operator is disabled

Member Data Documentation

◆ m_avg_runtime

Gaudi::Property<double> GPUCruncher::m_avg_runtime { this, "avgRuntime", 1., "Average runtime of the module." }
private

Definition at line 61 of file GPUCruncher.h.

◆ m_inpKeys

Gaudi::Property<std::vector<std::string> > GPUCruncher::m_inpKeys { this, "inpKeys", {}, "" }
private

Definition at line 58 of file GPUCruncher.h.

◆ m_inputHandles

std::vector<DataObjectHandle<DataObject>*> GPUCruncher::m_inputHandles
private

Definition at line 73 of file GPUCruncher.h.

◆ m_local_rndm_gen

Gaudi::Property<bool> GPUCruncher::m_local_rndm_gen
private
Initial value:
{ this, "localRndm", true,
"Decide if the local random generator is to be used" }

Definition at line 63 of file GPUCruncher.h.

◆ m_name_ncopies_map

GPUCruncher::CHM GPUCruncher::m_name_ncopies_map
staticprivate

Definition at line 76 of file GPUCruncher.h.

◆ m_outKeys

Gaudi::Property<std::vector<std::string> > GPUCruncher::m_outKeys { this, "outKeys", {}, "" }
private

Definition at line 59 of file GPUCruncher.h.

◆ m_outputHandles

std::vector<DataObjectHandle<DataObject>*> GPUCruncher::m_outputHandles
private

Definition at line 74 of file GPUCruncher.h.

◆ m_var_runtime

Gaudi::Property<double> GPUCruncher::m_var_runtime { this, "varRuntime", 0.01, "Variance of the runtime of the module." }
private

Definition at line 62 of file GPUCruncher.h.

◆ MAX_INPUTS

const uint GPUCruncher::MAX_INPUTS = 40
private

Definition at line 70 of file GPUCruncher.h.

◆ MAX_OUTPUTS

const uint GPUCruncher::MAX_OUTPUTS = 10
private

Definition at line 71 of file GPUCruncher.h.

◆ pinned

std::pmr::memory_resource* GPUCruncher::pinned
private

Definition at line 67 of file GPUCruncher.h.


The documentation for this class was generated from the following files:
Gaudi::Accumulators::sqrt
auto sqrt(std::chrono::duration< Rep, Period > d)
sqrt for std::chrono::duration
Definition: Counters.h:34
GPUCruncher::m_inpKeys
Gaudi::Property< std::vector< std::string > > m_inpKeys
Definition: GPUCruncher.h:58
DEBUG_MSG
#define DEBUG_MSG
Definition: GPUCruncher.cpp:27
GPUCruncher::m_outputHandles
std::vector< DataObjectHandle< DataObject > * > m_outputHandles
Definition: GPUCruncher.h:74
Gaudi.Configuration.log
log
Definition: Configuration.py:28
std::fabs
T fabs(T... args)
StatusCode::orThrow
const StatusCode & orThrow(std::string_view message, std::string_view tag) const
Throw a GaudiException in case of failures.
Definition: StatusCode.h:206
std::cos
T cos(T... args)
std::vector< double >
std::vector::size
T size(T... args)
GPUCruncher::m_var_runtime
Gaudi::Property< double > m_var_runtime
Definition: GPUCruncher.h:62
Gaudi::Algorithm::initialize
StatusCode initialize() override
the default (empty) implementation of IStateful::initialize() method
Definition: Algorithm.h:178
gaudirun.c
c
Definition: gaudirun.py:525
std::vector::back
T back(T... args)
DataObjectHandle< DataObject >
Gaudi::DataHandle::Writer
@ Writer
Definition: DataHandle.h:40
AvalancheSchedulerErrorTest.msgSvc
msgSvc
Definition: AvalancheSchedulerErrorTest.py:80
std::hex
T hex(T... args)
std::vector::push_back
T push_back(T... args)
std::clock
T clock(T... args)
GPUCruncher::m_inputHandles
std::vector< DataObjectHandle< DataObject > * > m_inputHandles
Definition: GPUCruncher.h:73
GaudiPython.Pythonizations.ctx
ctx
Definition: Pythonizations.py:578
Rndm::Gauss
Parameters for the Gauss random number generation.
Definition: RndmGenerators.h:32
GPUCruncher::pinned
std::pmr::memory_resource * pinned
Definition: GPUCruncher.h:67
Gaudi::Units::m
constexpr double m
Definition: SystemOfUnits.h:108
VERBOSE_MSG
#define VERBOSE_MSG
Definition: GPUCruncher.cpp:30
std::to_string
T to_string(T... args)
GPUCruncher::gpuExecute
StatusCode gpuExecute(const std::pmr::vector< double > &in, std::vector< double > &out) const
The GPU intensive function.
endmsg
MsgStream & endmsg(MsgStream &s)
MsgStream Modifier: endmsg. Calls the output method of the MsgStream.
Definition: MsgStream.h:203
MsgStream
Definition: MsgStream.h:34
Gaudi::Algorithm::finalize
StatusCode finalize() override
the default (empty) implementation of IStateful::finalize() method
Definition: Algorithm.h:184
StatusCode::SUCCESS
constexpr static const auto SUCCESS
Definition: StatusCode.h:100
ConditionsStallTest.name
name
Definition: ConditionsStallTest.py:77
OffloadAtlasMCRecoScenario.seed
seed
Definition: OffloadAtlasMCRecoScenario.py:52
DataObject
Definition: DataObject.h:36
std::count
T count(T... args)
GPUCruncher::m_outKeys
Gaudi::Property< std::vector< std::string > > m_outKeys
Definition: GPUCruncher.h:59
GPUCruncher::m_name_ncopies_map
static CHM m_name_ncopies_map
Definition: GPUCruncher.h:76
Gaudi::DataHandle::Reader
@ Reader
Definition: DataHandle.h:40
HepRndm::Engine
Definition: HepRndmEngine.h:35
GPUCruncher::m_avg_runtime
Gaudi::Property< double > m_avg_runtime
Definition: GPUCruncher.h:61
HiveRndm::HiveNumbers
Definition: HiveNumbers.h:38
std::numeric_limits
GPUCruncher::m_local_rndm_gen
Gaudi::Property< bool > m_local_rndm_gen
Definition: GPUCruncher.h:63
PrepareBase.out
out
Definition: PrepareBase.py:20
std::chrono::steady_clock::now
T now(T... args)