The Gaudi Framework  v39r1 (adb068b2)
GPUCruncher.cpp
Go to the documentation of this file.
1 /***********************************************************************************\
2 * (c) Copyright 2023-2024 CERN for the benefit of the LHCb and ATLAS collaborations *
3 * *
4 * This software is distributed under the terms of the Apache version 2 licence, *
5 * copied verbatim in the file "LICENSE". *
6 * *
7 * In applying this licence, CERN does not waive the privileges and immunities *
8 * granted to it by virtue of its status as an Intergovernmental Organization *
9 * or submit itself to any jurisdiction. *
10 \***********************************************************************************/
11 
12 #include "GPUCruncher.h"
13 #include "HiveNumbers.h"
14 #include <chrono>
15 #include <ctime>
16 #include <fmt/format.h>
17 #include <sys/resource.h>
18 #include <sys/times.h>
19 #include <tbb/tick_count.h>
20 #include <thread>
21 
23 
25 
26 #define ON_DEBUG if ( msgLevel( MSG::DEBUG ) )
27 #define DEBUG_MSG ON_DEBUG debug()
28 
29 #define ON_VERBOSE if ( msgLevel( MSG::VERBOSE ) )
30 #define VERBOSE_MSG ON_VERBOSE verbose()
31 
32 //------------------------------------------------------------------------------
33 
34 GPUCruncher::GPUCruncher( const std::string& name, // the algorithm instance name
35  ISvcLocator* pSvc )
36  : AsynchronousAlgorithm( name, pSvc ) {
37 
38  // Register the algo in the static concurrent hash map in order to
39  // monitor the # of copies
40  CHM::accessor name_ninstances;
41  m_name_ncopies_map.insert( name_ninstances, name );
42  name_ninstances->second += 1;
43 }
44 
46  for ( uint i = 0; i < m_inputHandles.size(); ++i ) delete m_inputHandles[i];
47 
48  for ( uint i = 0; i < m_outputHandles.size(); ++i ) delete m_outputHandles[i];
49 }
50 
52  auto sc = Algorithm::initialize();
53  if ( !sc ) return sc;
54 
55  pinned = Gaudi::CUDA::get_pinned_memory_resource();
56 
57  // This is a bit ugly. There is no way to declare a vector of DataObjectHandles, so
58  // we need to wait until initialize when we've read in the input and output key
59  // properties, and know their size, and then turn them
60  // into Handles and register them with the framework by calling declareProperty. We
61  // could call declareInput/declareOutput on them too.
62 
63  int i = 0;
64  for ( auto k : m_inpKeys ) {
65  DEBUG_MSG << "adding input key " << k << endmsg;
67  declareProperty( "dummy_in_" + std::to_string( i ), *( m_inputHandles.back() ) );
68  i++;
69  }
70 
71  i = 0;
72  for ( auto k : m_outKeys ) {
73  DEBUG_MSG << "adding output key " << k << endmsg;
75  declareProperty( "dummy_out_" + std::to_string( i ), *( m_outputHandles.back() ) );
76  i++;
77  }
78 
79  return sc;
80 }
81 
82 //------------------------------------------------------------------------------
83 
84 StatusCode GPUCruncher::execute( const EventContext& ctx ) const // the execution of the algorithm
85 {
86 
87  double crunchtime;
88  std::pmr::vector<double> input( pinned );
89  if ( m_local_rndm_gen ) {
90  /* This will disappear with a thread safe random number generator service.
91  * Use basic Box-Muller to generate Gaussian random numbers.
92  * The quality is not good for in depth study given that the generator is a
93  * linear congruent.
94  * Throw away basically a free number: we are in a ~~cpu~~ /gpu/ cruncher after all.
95  * The seed is taken from the clock, but we could assign a seed per module to
96  * ensure reproducibility.
97  *
98  * This is not an overkill but rather an exercise towards a thread safe
99  * random number generation.
100  */
101 
102  auto getGausRandom = []( double mean, double sigma ) -> double {
103  unsigned int seed = std::clock();
104 
105  auto getUnifRandom = []( unsigned int& seed ) -> double {
106  // from "Numerical Recipes"
107  constexpr unsigned int m = 232;
108  constexpr unsigned int a = 1664525;
109  constexpr unsigned int c = 1013904223;
110  seed = ( a * seed + c ) % m;
111  const double unif = double( seed ) / m;
112  return unif;
113  };
114 
115  double unif1, unif2;
116  do {
117  unif1 = getUnifRandom( seed );
118  unif2 = getUnifRandom( seed );
119  } while ( unif1 < std::numeric_limits<double>::epsilon() );
120 
121  const double normal = sqrt( -2. * log( unif1 ) ) * cos( 2 * M_PI * unif2 );
122 
123  return normal * sigma + mean;
124  };
125 
126  crunchtime = fabs( getGausRandom( m_avg_runtime, m_var_runtime ) );
127  // Generate input vector
128  input.reserve( 40000 * crunchtime );
129  for ( int i = 0; i < 40000 * crunchtime; ++i ) { input.push_back( getGausRandom( 10.0, 1.0 ) ); }
130  // End Of temp block
131  } else {
132  // Should be a member.
133  HiveRndm::HiveNumbers rndmgaus( randSvc(), Rndm::Gauss( m_avg_runtime, m_var_runtime ) );
134  crunchtime = std::fabs( rndmgaus() );
135  // Generate input vector
136  for ( int i = 0; i < 2000 * crunchtime; ++i ) { input.push_back( rndmgaus() ); }
137  }
138  unsigned int crunchtime_ms = 1000 * crunchtime;
139 
140  DEBUG_MSG << "Crunching time will be: " << crunchtime_ms << " ms" << endmsg;
141  DEBUG_MSG << "Start event " << ctx.evt() << " in slot " << ctx.slot() << " on pthreadID " << std::hex
142  << pthread_self() << std::dec << endmsg;
143 
144  // start timer
145  tbb::tick_count starttbb = tbb::tick_count::now();
146 
147  VERBOSE_MSG << "inputs number: " << m_inputHandles.size() << endmsg;
148  for ( auto& inputHandle : m_inputHandles ) {
149  if ( !inputHandle->isValid() ) continue;
150 
151  VERBOSE_MSG << "get from TS: " << inputHandle->objKey() << endmsg;
152  DataObject* obj = nullptr;
153  obj = inputHandle->get();
154  if ( obj == nullptr ) error() << "A read object was a null pointer." << endmsg;
155  }
156 
157  // Use fiber sleep, should eventually be a GPU computation
158  info() << "Crunching..." << endmsg;
159  auto startcrunch = std::chrono::steady_clock::now();
160  std::vector<double> out{ 3.0, 5.0 };
161  gpuExecute( input, out ).orThrow( "GPU_EXECUTE" );
162  auto endcrunch = std::chrono::steady_clock::now();
163  info() << "Crunched." << endmsg;
164  fmt::print( "{} GPU Crunch time: {}. Input length {}, output length {}.\n", name(),
165  Gaudi::CUDA::SI(
166  std::chrono::duration_cast<std::chrono::milliseconds>( endcrunch - startcrunch ).count() / 1e3, "s" ),
167  input.size(), out.size() );
168 
169  VERBOSE_MSG << "outputs number: " << m_outputHandles.size() << endmsg;
170  for ( auto& outputHandle : m_outputHandles ) {
171  if ( !outputHandle->isValid() ) continue;
172 
173  VERBOSE_MSG << "put to TS: " << outputHandle->objKey() << endmsg;
174  outputHandle->put( std::make_unique<DataObject>() );
175  }
176 
177  tbb::tick_count endtbb = tbb::tick_count::now();
178  const double actualRuntime = ( endtbb - starttbb ).seconds();
179 
180  DEBUG_MSG << "Finish event " << ctx.evt() << " in " << int( 1000 * actualRuntime ) << " ms" << endmsg;
181 
182  DEBUG_MSG << "Timing: ExpectedCrunchtime= " << crunchtime_ms
183  << " ms. ActualTotalRuntime= " << int( 1000 * actualRuntime )
184  << " ms. Ratio= " << crunchtime / actualRuntime << endmsg;
185 
186  return StatusCode::SUCCESS;
187 }
188 
189 //------------------------------------------------------------------------------
190 
191 StatusCode GPUCruncher::finalize() // the finalization of the algorithm
192 {
193  MsgStream log( msgSvc(), name() );
194 
195  unsigned int ninstances;
196 
197  {
198  CHM::const_accessor const_name_ninstances;
199  m_name_ncopies_map.find( const_name_ninstances, name() );
200  ninstances = const_name_ninstances->second;
201  }
202 
203  constexpr double s2ms = 1000.;
204  // do not show repetitions
205  if ( ninstances != 0 ) {
206  info() << "Summary: name= " << name() << "\t avg_runtime= " << m_avg_runtime * s2ms << "\t n_clones= " << ninstances
207  << endmsg;
208 
209  CHM::accessor name_ninstances;
210  m_name_ncopies_map.find( name_ninstances, name() );
211  name_ninstances->second = 0;
212  }
213 
214  return Algorithm::finalize();
215 }
216 
217 //------------------------------------------------------------------------------
Gaudi::Accumulators::sqrt
auto sqrt(std::chrono::duration< Rep, Period > d)
sqrt for std::chrono::duration
Definition: Counters.h:34
GPUCruncher::m_inpKeys
Gaudi::Property< std::vector< std::string > > m_inpKeys
Definition: GPUCruncher.h:58
DEBUG_MSG
#define DEBUG_MSG
Definition: GPUCruncher.cpp:27
std::string
STL class.
GPUCruncher::finalize
StatusCode finalize() override
the finalization of the algorithm
Definition: GPUCruncher.cpp:191
GPUCruncher::m_outputHandles
std::vector< DataObjectHandle< DataObject > * > m_outputHandles
Definition: GPUCruncher.h:74
Gaudi.Configuration.log
log
Definition: Configuration.py:28
std::fabs
T fabs(T... args)
std::vector< double >
std::vector::size
T size(T... args)
ISvcLocator
Definition: ISvcLocator.h:46
Gaudi::Algorithm::initialize
StatusCode initialize() override
the default (empty) implementation of IStateful::initialize() method
Definition: Algorithm.h:178
GPUCruncher::execute
StatusCode execute(const EventContext &ctx) const override
the execution of the algorithm
Definition: GPUCruncher.cpp:84
GPUCruncher::~GPUCruncher
virtual ~GPUCruncher()
virtual & protected desctrustor
Definition: GPUCruncher.cpp:45
gaudirun.c
c
Definition: gaudirun.py:525
std::vector::back
T back(T... args)
HiveNumbers.h
DataObjectHandle< DataObject >
Gaudi::DataHandle::Writer
@ Writer
Definition: DataHandle.h:40
AvalancheSchedulerErrorTest.msgSvc
msgSvc
Definition: AvalancheSchedulerErrorTest.py:80
GPUCruncher::initialize
StatusCode initialize() override
Its initialization.
Definition: GPUCruncher.cpp:51
std::hex
T hex(T... args)
std::vector::push_back
T push_back(T... args)
std::clock
T clock(T... args)
GPUCruncher::m_inputHandles
std::vector< DataObjectHandle< DataObject > * > m_inputHandles
Definition: GPUCruncher.h:73
GaudiPython.Pythonizations.ctx
ctx
Definition: Pythonizations.py:578
StatusCode
Definition: StatusCode.h:65
Rndm::Gauss
Parameters for the Gauss random number generation.
Definition: RndmGenerators.h:32
GPUCruncher::pinned
std::pmr::memory_resource * pinned
Definition: GPUCruncher.h:67
Gaudi::Units::m
constexpr double m
Definition: SystemOfUnits.h:108
VERBOSE_MSG
#define VERBOSE_MSG
Definition: GPUCruncher.cpp:30
std::to_string
T to_string(T... args)
endmsg
MsgStream & endmsg(MsgStream &s)
MsgStream Modifier: endmsg. Calls the output method of the MsgStream.
Definition: MsgStream.h:203
MsgStream
Definition: MsgStream.h:34
GPUCruncher::GPUCruncher
GPUCruncher()
the default constructor is disabled
Gaudi::Algorithm::finalize
StatusCode finalize() override
the default (empty) implementation of IStateful::finalize() method
Definition: Algorithm.h:184
StatusCode::SUCCESS
constexpr static const auto SUCCESS
Definition: StatusCode.h:100
ConditionsStallTest.name
name
Definition: ConditionsStallTest.py:77
DECLARE_COMPONENT
#define DECLARE_COMPONENT(type)
Definition: PluginServiceV1.h:46
OffloadAtlasMCRecoScenario.seed
seed
Definition: OffloadAtlasMCRecoScenario.py:52
EventContext
Definition: EventContext.h:34
DataObject
Definition: DataObject.h:36
GPUCruncher::m_outKeys
Gaudi::Property< std::vector< std::string > > m_outKeys
Definition: GPUCruncher.h:59
GPUCruncher::m_name_ncopies_map
static CHM m_name_ncopies_map
Definition: GPUCruncher.h:76
Gaudi::DataHandle::Reader
@ Reader
Definition: DataHandle.h:40
GPUCruncher::CHM
tbb::concurrent_hash_map< std::string, unsigned int > CHM
Definition: GPUCruncher.h:29
HepRndm::Engine
Definition: HepRndmEngine.h:35
GPUCruncher::m_avg_runtime
Gaudi::Property< double > m_avg_runtime
Definition: GPUCruncher.h:61
GPUCruncher
Definition: GPUCruncher.h:26
GPUCruncher.h
HiveRndm::HiveNumbers
Definition: HiveNumbers.h:38
std::numeric_limits
PrepareBase.out
out
Definition: PrepareBase.py:20
std::chrono::steady_clock::now
T now(T... args)