The Gaudi Framework  master (37c0b60a)
GPUCruncher.cpp
Go to the documentation of this file.
1 /***********************************************************************************\
2 * (c) Copyright 2023-2024 CERN for the benefit of the LHCb and ATLAS collaborations *
3 * *
4 * This software is distributed under the terms of the Apache version 2 licence, *
5 * copied verbatim in the file "LICENSE". *
6 * *
7 * In applying this licence, CERN does not waive the privileges and immunities *
8 * granted to it by virtue of its status as an Intergovernmental Organization *
9 * or submit itself to any jurisdiction. *
10 \***********************************************************************************/
11 
12 #include "GPUCruncher.h"
13 #include <algorithm>
14 #include <chrono>
15 #include <ctime>
16 #include <sys/resource.h>
17 #include <sys/times.h>
18 #include <tbb/tick_count.h>
19 
21 
23 
24 #define ON_DEBUG if ( msgLevel( MSG::DEBUG ) )
25 #define DEBUG_MSG ON_DEBUG debug()
26 
27 #define ON_VERBOSE if ( msgLevel( MSG::VERBOSE ) )
28 #define VERBOSE_MSG ON_VERBOSE verbose()
29 
30 //------------------------------------------------------------------------------
31 
32 GPUCruncher::GPUCruncher( const std::string& name, // the algorithm instance name
33  ISvcLocator* pSvc )
34  : AsynchronousAlgorithm( name, pSvc ) {
35 
36  // Register the algo in the static concurrent hash map in order to
37  // monitor the # of copies
38  CHM::accessor name_ninstances;
39  m_name_ncopies_map.insert( name_ninstances, name );
40  name_ninstances->second += 1;
41 }
42 
44  for ( uint i = 0; i < m_inputHandles.size(); ++i ) delete m_inputHandles[i];
45 
46  for ( uint i = 0; i < m_outputHandles.size(); ++i ) delete m_outputHandles[i];
47 }
48 
50  auto sc = Algorithm::initialize();
51  if ( !sc ) return sc;
52 
53  // This is a bit ugly. There is no way to declare a vector of DataObjectHandles, so
54  // we need to wait until initialize when we've read in the input and output key
55  // properties, and know their size, and then turn them
56  // into Handles and register them with the framework by calling declareProperty. We
57  // could call declareInput/declareOutput on them too.
58 
59  int i = 0;
60  for ( auto k : m_inpKeys ) {
61  DEBUG_MSG << "adding input key " << k << endmsg;
63  declareProperty( "dummy_in_" + std::to_string( i ), *( m_inputHandles.back() ) );
64  i++;
65  }
66 
67  i = 0;
68  for ( auto k : m_outKeys ) {
69  DEBUG_MSG << "adding output key " << k << endmsg;
71  declareProperty( "dummy_out_" + std::to_string( i ), *( m_outputHandles.back() ) );
72  i++;
73  }
74 
75  return sc;
76 }
77 
78 //------------------------------------------------------------------------------
79 
80 StatusCode GPUCruncher::execute( const EventContext& ctx ) const // the execution of the algorithm
81 {
82 
83  double crunchtime;
84  std::vector<double> input{};
85  /* This will disappear with a thread safe random number generator service.
86  * Use basic Box-Muller to generate Gaussian random numbers.
87  * The quality is not good for in depth study given that the generator is a
88  * linear congruent.
89  * Throw away basically a free number: we are in a ~~cpu~~ /gpu/ cruncher after all.
90  * The seed is taken from the clock, but we could assign a seed per module to
91  * ensure reproducibility.
92  *
93  * This is not an overkill but rather an exercise towards a thread safe
94  * random number generation.
95  */
96 
97  auto getGausRandom = []( double mean, double sigma ) -> double {
98  unsigned int seed = std::clock();
99 
100  auto getUnifRandom = []( unsigned int& seed ) -> double {
101  // from "Numerical Recipes"
102  constexpr unsigned int m = 232;
103  constexpr unsigned int a = 1664525;
104  constexpr unsigned int c = 1013904223;
105  seed = ( a * seed + c ) % m;
106  const double unif = double( seed ) / m;
107  return unif;
108  };
109 
110  double unif1, unif2;
111  do {
112  unif1 = getUnifRandom( seed );
113  unif2 = getUnifRandom( seed );
114  } while ( unif1 < std::numeric_limits<double>::epsilon() );
115 
116  const double normal = sqrt( -2. * log( unif1 ) ) * cos( 2 * M_PI * unif2 );
117 
118  return normal * sigma + mean;
119  };
120 
121  crunchtime = fabs( getGausRandom( m_avg_runtime, m_var_runtime ) );
122  // Generate input vector
123  input.reserve( 50000 * crunchtime );
124  for ( int i = 0; i < 50000 * crunchtime; ++i ) { input.push_back( getGausRandom( 20.0, 1.0 ) ); }
125  unsigned int crunchtime_ms = 1000 * crunchtime;
126 
127  // First figure out what output should be
128  double lower_bound = std::ranges::min( input );
129  double upper_bound = std::ranges::max( input ) * 256;
130  DEBUG_MSG << "Crunching time will be: " << crunchtime_ms << " ms" << endmsg;
131  DEBUG_MSG << "Start event " << ctx.evt() << " in slot " << ctx.slot() << " on pthreadID " << std::hex
132  << pthread_self() << std::dec << endmsg;
133 
134  // start timer
135  tbb::tick_count starttbb = tbb::tick_count::now();
136 
137  VERBOSE_MSG << "inputs number: " << m_inputHandles.size() << endmsg;
138  for ( auto& inputHandle : m_inputHandles ) {
139  if ( !inputHandle->isValid() ) continue;
140 
141  VERBOSE_MSG << "get from TS: " << inputHandle->objKey() << endmsg;
142  DataObject* obj = nullptr;
143  try {
144  obj = inputHandle->get();
145  } catch ( const GaudiException& e ) {
146  error() << "Caught exception with message " << e.what() << " in evt " << ctx.evt() << endmsg;
147  throw;
148  }
149  if ( obj == nullptr ) error() << "A read object was a null pointer." << endmsg;
150  }
151 
152  info() << "Crunching..." << endmsg;
153  auto startcrunch = std::chrono::steady_clock::now();
155  gpuExecute( input, out ).orThrow( "GPU_EXECUTE" );
156  auto endcrunch = std::chrono::steady_clock::now();
157  int total_entries = std::accumulate( out.begin() + 2, out.end(), 0, std::plus{} );
158  bool match =
159  ( out.at( 0 ) == lower_bound ) && ( out.at( 1 ) == upper_bound ) && ( total_entries == 256 * input.size() );
160  info() << "Crunched." << endmsg;
161  ( match ? info() : warning() )
162  << std::format(
163  "GPU Crunch time: {} s. Input length {}, total entries {}. Pass: Lower {}, Upper {}, Entries {} ({} "
164  "missing)",
165  std::chrono::duration_cast<std::chrono::milliseconds>( endcrunch - startcrunch ).count() / 1e3,
166  input.size(), total_entries, out.at( 0 ) == lower_bound, out.at( 1 ) == upper_bound,
167  total_entries == 256 * input.size(), 256 * input.size() - total_entries )
168  << endmsg;
169 
170  VERBOSE_MSG << "outputs number: " << m_outputHandles.size() << endmsg;
171  for ( auto& outputHandle : m_outputHandles ) {
172  if ( !outputHandle->isValid() ) continue;
173 
174  VERBOSE_MSG << "put to TS: " << outputHandle->objKey() << endmsg;
175  try {
176  outputHandle->put( std::make_unique<DataObject>() );
177  } catch ( const GaudiException& e ) {
178  error() << "Caught exception with message " << e.what() << " in evt " << ctx.evt() << endmsg;
179  throw;
180  }
181  }
182 
183  tbb::tick_count endtbb = tbb::tick_count::now();
184  const double actualRuntime = ( endtbb - starttbb ).seconds();
185 
186  DEBUG_MSG << "Finish event " << ctx.evt() << " in " << int( 1000 * actualRuntime ) << " ms" << endmsg;
187 
188  DEBUG_MSG << "Timing: ExpectedCrunchtime= " << crunchtime_ms
189  << " ms. ActualTotalRuntime= " << int( 1000 * actualRuntime )
190  << " ms. Ratio= " << crunchtime / actualRuntime << endmsg;
191 
192  return StatusCode::SUCCESS;
193 }
194 
195 //------------------------------------------------------------------------------
196 
197 StatusCode GPUCruncher::finalize() // the finalization of the algorithm
198 {
199  MsgStream log( msgSvc(), name() );
200 
201  unsigned int ninstances;
202 
203  {
204  CHM::const_accessor const_name_ninstances;
205  m_name_ncopies_map.find( const_name_ninstances, name() );
206  ninstances = const_name_ninstances->second;
207  }
208 
209  constexpr double s2ms = 1000.;
210  // do not show repetitions
211  if ( ninstances != 0 ) {
212  info() << "Summary: name= " << name() << "\t avg_runtime= " << m_avg_runtime * s2ms << "\t n_clones= " << ninstances
213  << endmsg;
214 
215  CHM::accessor name_ninstances;
216  m_name_ncopies_map.find( name_ninstances, name() );
217  name_ninstances->second = 0;
218  }
219 
220  return Algorithm::finalize();
221 }
222 
223 //------------------------------------------------------------------------------
Gaudi::Accumulators::sqrt
auto sqrt(std::chrono::duration< Rep, Period > d)
sqrt for std::chrono::duration
Definition: Counters.h:34
GPUCruncher::m_inpKeys
Gaudi::Property< std::vector< std::string > > m_inpKeys
Definition: GPUCruncher.h:57
DEBUG_MSG
#define DEBUG_MSG
Definition: GPUCruncher.cpp:25
std::string
STL class.
GPUCruncher::finalize
StatusCode finalize() override
the finalization of the algorithm
Definition: GPUCruncher.cpp:197
GPUCruncher::m_outputHandles
std::vector< DataObjectHandle< DataObject > * > m_outputHandles
Definition: GPUCruncher.h:68
Gaudi.Configuration.log
log
Definition: Configuration.py:28
Gaudi::Algorithm::name
const std::string & name() const override
The identifying name of the algorithm object.
Definition: Algorithm.cpp:526
std::vector< double >
std::vector::size
T size(T... args)
ISvcLocator
Definition: ISvcLocator.h:46
Gaudi::Algorithm::initialize
StatusCode initialize() override
the default (empty) implementation of IStateful::initialize() method
Definition: Algorithm.h:178
GaudiException
Definition: GaudiException.h:31
GPUCruncher::execute
StatusCode execute(const EventContext &ctx) const override
the execution of the algorithm
Definition: GPUCruncher.cpp:80
Gaudi::Algorithm::declareProperty
Gaudi::Details::PropertyBase * declareProperty(const std::string &name, ToolHandle< T > &hndl, const std::string &doc="none")
Definition: Algorithm.h:304
GPUCruncher::~GPUCruncher
virtual ~GPUCruncher()
virtual & protected desctrustor
Definition: GPUCruncher.cpp:43
gaudirun.c
c
Definition: gaudirun.py:525
std::vector::back
T back(T... args)
DataObjectHandle< DataObject >
Gaudi::DataHandle::Writer
@ Writer
Definition: DataHandle.h:40
AvalancheSchedulerErrorTest.msgSvc
msgSvc
Definition: AvalancheSchedulerErrorTest.py:80
GPUCruncher::initialize
StatusCode initialize() override
Its initialization.
Definition: GPUCruncher.cpp:49
std::hex
T hex(T... args)
std::vector::push_back
T push_back(T... args)
std::clock
T clock(T... args)
GPUCruncher::m_inputHandles
std::vector< DataObjectHandle< DataObject > * > m_inputHandles
Definition: GPUCruncher.h:67
GaudiPython.Pythonizations.ctx
ctx
Definition: Pythonizations.py:578
StatusCode
Definition: StatusCode.h:65
Gaudi::Units::m
constexpr double m
Definition: SystemOfUnits.h:108
std::plus
VERBOSE_MSG
#define VERBOSE_MSG
Definition: GPUCruncher.cpp:28
std::to_string
T to_string(T... args)
std::accumulate
T accumulate(T... args)
format
GAUDI_API std::string format(const char *,...)
MsgStream format utility "a la sprintf(...)".
Definition: MsgStream.cpp:119
endmsg
MsgStream & endmsg(MsgStream &s)
MsgStream Modifier: endmsg. Calls the output method of the MsgStream.
Definition: MsgStream.h:202
MsgStream
Definition: MsgStream.h:33
GPUCruncher::GPUCruncher
GPUCruncher()
the default constructor is disabled
Gaudi::Algorithm::finalize
StatusCode finalize() override
the default (empty) implementation of IStateful::finalize() method
Definition: Algorithm.h:184
StatusCode::SUCCESS
constexpr static const auto SUCCESS
Definition: StatusCode.h:100
ConditionsStallTest.name
name
Definition: ConditionsStallTest.py:77
DECLARE_COMPONENT
#define DECLARE_COMPONENT(type)
Definition: PluginServiceV1.h:46
OffloadAtlasMCRecoScenario.seed
seed
Definition: OffloadAtlasMCRecoScenario.py:52
EventContext
Definition: EventContext.h:34
GPUCruncher::CHM
tbb::concurrent_hash_map< std::string, unsigned int > CHM
Definition: GPUCruncher.h:28
DataObject
Definition: DataObject.h:36
GPUCruncher::m_outKeys
Gaudi::Property< std::vector< std::string > > m_outKeys
Definition: GPUCruncher.h:58
GPUCruncher::m_name_ncopies_map
static CHM m_name_ncopies_map
Definition: GPUCruncher.h:70
Gaudi::DataHandle::Reader
@ Reader
Definition: DataHandle.h:40
GPUCruncher::m_avg_runtime
Gaudi::Property< double > m_avg_runtime
Definition: GPUCruncher.h:60
GPUCruncher
Definition: GPUCruncher.h:25
GPUCruncher.h
std::numeric_limits
e3
HepRndm::Engine< DRand48Engine > e3
Definition: HepRndmEngines.cpp:205
GaudiException::what
const char * what() const override
method from std::exception
Definition: GaudiException.h:110
PrepareBase.out
out
Definition: PrepareBase.py:20
std::chrono::steady_clock::now
T now(T... args)