The Gaudi Framework  v30r3 (a5ef0a68)
CPUCruncher.cpp
Go to the documentation of this file.
1 #include "CPUCruncher.h"
3 #include "HiveNumbers.h"
4 #include <ctime>
5 #include <sys/resource.h>
6 #include <sys/times.h>
7 
8 #include <tbb/tick_count.h>
9 #include <thread>
10 
14 
16 
17 #define ON_DEBUG if ( msgLevel( MSG::DEBUG ) )
18 #define DEBUG_MSG ON_DEBUG debug()
19 
20 #define ON_VERBOSE if ( msgLevel( MSG::VERBOSE ) )
21 #define VERBOSE_MSG ON_VERBOSE verbose()
22 
23 //------------------------------------------------------------------------------
24 
25 CPUCruncher::CPUCruncher( const std::string& name, // the algorithm instance name
26  ISvcLocator* pSvc )
27  : GaudiAlgorithm( name, pSvc )
28 {
29 
30  declareProperty( "NIterationsVect", m_niters_vect, "Number of iterations for the calibration." );
31  declareProperty( "NTimesVect", m_times_vect, "Number of seconds for the calibration." );
32 
33  // Register the algo in the static concurrent hash map in order to
34  // monitor the # of copies
35  CHM::accessor name_ninstances;
36  m_name_ncopies_map.insert( name_ninstances, name );
37  name_ninstances->second += 1;
38 }
39 
41 {
42  for ( uint i = 0; i < m_inputHandles.size(); ++i ) delete m_inputHandles[i];
43 
44  for ( uint i = 0; i < m_outputHandles.size(); ++i ) delete m_outputHandles[i];
45 }
46 
48 {
49  auto sc = GaudiAlgorithm::initialize();
50  if ( !sc ) return sc;
51 
52  if ( m_times_vect.size() == 0 ) calibrate();
53 
54  // if an algorithm was setup to sleep, for whatever period, it effectively becomes I/O-bound
55  if ( m_sleepFraction != 0.0f ) setIOBound( true );
56 
57  // This is a bit ugly. There is no way to declare a vector of DataObjectHandles, so
58  // we need to wait until initialize when we've read in the input and output key
59  // properties, and know their size, and then turn them
60  // into Handles and register them with the framework by calling declareProperty. We
61  // could call declareInput/declareOutput on them too.
62 
63  int i = 0;
64  for ( auto k : m_inpKeys ) {
65  DEBUG_MSG << "adding input key " << k << endmsg;
67  declareProperty( "dummy_in_" + std::to_string( i ), *( m_inputHandles.back() ) );
68  i++;
69  }
70 
71  i = 0;
72  for ( auto k : m_outKeys ) {
73  DEBUG_MSG << "adding output key " << k << endmsg;
75  declareProperty( "dummy_out_" + std::to_string( i ), *( m_outputHandles.back() ) );
76  i++;
77  }
78 
79  return sc;
80 }
81 
82 /*
83 Calibrate the crunching finding the right relation between max number to be searched and time spent.
84 The relation is a sqrt for times greater than 10^-4 seconds.
85 */
87 {
88  m_niters_vect = {0, 500, 600, 700, 800, 1000, 1300, 1600, 2000, 2300, 2600, 3000, 3300, 3500, 3900,
89  4200, 5000, 6000, 8000, 10000, 12000, 15000, 17000, 20000, 25000, 30000, 35000, 40000, 60000};
90  if ( !m_shortCalib ) {
91  m_niters_vect.push_back( 100000 );
92  m_niters_vect.push_back( 200000 );
93  }
94 
96  m_times_vect[0] = 0.;
97 
98  info() << "Starting calibration..." << endmsg;
99  for ( unsigned int i = 1; i < m_niters_vect.size(); ++i ) {
100  unsigned long niters = m_niters_vect[i];
101  unsigned int trials = 30;
102  do {
103  auto start_cali = tbb::tick_count::now();
104  findPrimes( niters );
105  auto stop_cali = tbb::tick_count::now();
106  double deltat = ( stop_cali - start_cali ).seconds();
107  m_times_vect[i] = deltat;
108  DEBUG_MSG << "Calibration: # iters = " << niters << " => " << deltat << endmsg;
109  trials--;
110  } while ( trials > 0 and m_times_vect[i] < m_times_vect[i - 1] ); // make sure that they are monotonic
111  }
112  info() << "Calibration finished!" << endmsg;
113 }
114 
115 unsigned long CPUCruncher::getNCaliIters( double runtime )
116 {
117 
118  unsigned int smaller_i = 0;
119  double time = 0.;
120  bool found = false;
121  // We know that the first entry is 0, so we start to iterate from 1
122  for ( unsigned int i = 1; i < m_times_vect.size(); i++ ) {
123  time = m_times_vect[i];
124  if ( time > runtime ) {
125  smaller_i = i - 1;
126  found = true;
127  break;
128  }
129  }
130 
131  // Case 1: we are outside the interpolation range, we take the last 2 points
132  if ( not found ) smaller_i = m_times_vect.size() - 2;
133 
134  // Case 2: we maeke a linear interpolation
135  // y=mx+q
136  const double x0 = m_times_vect[smaller_i];
137  const double x1 = m_times_vect[smaller_i + 1];
138  const double y0 = m_niters_vect[smaller_i];
139  const double y1 = m_niters_vect[smaller_i + 1];
140  const double m = ( y1 - y0 ) / ( x1 - x0 );
141  const double q = y0 - m * x0;
142 
143  const unsigned long nCaliIters = m * runtime + q;
144  // always() << x0 << "<" << runtime << "<" << x1 << " Corresponding to " << nCaliIters << " iterations" << endmsg;
145 
146  return nCaliIters;
147 }
148 
149 void CPUCruncher::findPrimes( const unsigned long int n_iterations )
150 {
151  // Flag to trigger the allocation
152  bool is_prime;
153 
154  // Let's prepare the material for the allocations
155  unsigned int primes_size = 1;
156  unsigned long* primes = new unsigned long[primes_size];
157  primes[0] = 2;
158 
159  unsigned long i = 2;
160 
161  // Loop on numbers
162  for ( unsigned long int iiter = 0; iiter < n_iterations; iiter++ ) {
163  // Once at max, it returns to 0
164  i += 1;
165 
166  // Check if it can be divided by the smaller ones
167  is_prime = true;
168  for ( unsigned long j = 2; j < i && is_prime; ++j ) {
169  if ( i % j == 0 ) is_prime = false;
170  } // end loop on numbers < than tested one
171 
172  if ( is_prime ) {
173  // copy the array of primes (INEFFICIENT ON PURPOSE!)
174  unsigned int new_primes_size = 1 + primes_size;
175  unsigned long* new_primes = new unsigned long[new_primes_size];
176 
177  for ( unsigned int prime_index = 0; prime_index < primes_size; prime_index++ ) {
178  new_primes[prime_index] = primes[prime_index];
179  }
180  // attach the last prime
181  new_primes[primes_size] = i;
182 
183  // Update primes array
184  delete[] primes;
185  primes = new_primes;
186  primes_size = new_primes_size;
187  } // end is prime
188 
189  } // end of while loop
190 
191  // Fool Compiler optimisations:
192  for ( unsigned int prime_index = 0; prime_index < primes_size; prime_index++ )
193  if ( primes[prime_index] == 4 )
194  debug() << "This does never happen, but it's necessary too fool aggressive compiler optimisations!" << endmsg;
195 
196  delete[] primes;
197 }
198 
199 //------------------------------------------------------------------------------
201 {
202  //
203  for ( const auto& k : outputDataObjs() ) {
204  auto outputHandle = new DataObjectHandle<DataObject>( k, Gaudi::DataHandle::Writer, this );
205  VERBOSE_MSG << "found late-attributed output: " << outputHandle->objKey() << endmsg;
206  m_outputHandles.push_back( outputHandle );
207  declareProperty( "dummy_out_" + outputHandle->objKey(), *( m_outputHandles.back() ) );
208  }
209 
211 
212  m_declAugmented = true;
213 }
214 
215 //------------------------------------------------------------------------------
216 
217 StatusCode CPUCruncher::execute() // the execution of the algorithm
218 {
219 
221 
222  float crunchtime;
223 
224  if ( m_local_rndm_gen ) {
225  /* This will disappear with a thread safe random number generator service.
226  * Use basic Box-Muller to generate Gaussian random numbers.
227  * The quality is not good for in depth study given that the generator is a
228  * linear congruent.
229  * Throw away basically a free number: we are in a cpu cruncher after all.
230  * The seed is taken from the clock, but we could assign a seed per module to
231  * ensure reproducibility.
232  *
233  * This is not an overkill but rather an exercise towards a thread safe
234  * random number generation.
235  */
236 
237  auto getGausRandom = []( double mean, double sigma ) -> double {
238 
239  unsigned int seed = std::clock();
240 
241  auto getUnifRandom = []( unsigned int& seed ) -> double {
242  // from "Numerical Recipes"
243  constexpr unsigned int m = 232;
244  constexpr unsigned int a = 1664525;
245  constexpr unsigned int c = 1013904223;
246  seed = ( a * seed + c ) % m;
247  const double unif = double( seed ) / m;
248  return unif;
249  };
250 
251  double unif1, unif2;
252  do {
253  unif1 = getUnifRandom( seed );
254  unif2 = getUnifRandom( seed );
255  } while ( unif1 == 0. );
256 
257  const double normal = sqrt( -2. * log( unif1 ) ) * cos( 2 * M_PI * unif2 );
258 
259  return normal * sigma + mean;
260  };
261 
262  crunchtime = fabs( getGausRandom( m_avg_runtime * ( 1. - m_sleepFraction ), m_var_runtime ) );
263  // End Of temp block
264  } else {
265  // Should be a member.
267  crunchtime = std::fabs( rndmgaus() );
268  }
269 
270  // Prepare to sleep (even if we won't enter the following if clause for sleeping).
271  // This is needed to distribute evenly among all algorithms the overhead (around sleeping) which is harmful when
272  // trying to achieve uniform distribution of algorithm timings.
273  const double dreamtime = m_avg_runtime * m_sleepFraction;
274  const std::chrono::duration<double> dreamtime_duration( dreamtime );
275  tbb::tick_count startSleeptbb;
276  tbb::tick_count endSleeptbb;
277 
278  // Start to measure the total time here, together with the dreaming process straight ahead
279  tbb::tick_count starttbb = tbb::tick_count::now();
280 
281  // If the algorithm was set as I/O-bound, we will replace requested part of crunching with plain sleeping
282  if ( isIOBound() ) {
283  // in this block (and not in other places around) msgLevel is checked for the same reason as above, when
284  // preparing to sleep several lines above: to reduce as much as possible the overhead around sleeping
285  DEBUG_MSG << "Dreaming time will be: " << dreamtime << endmsg;
286 
287  ON_DEBUG startSleeptbb = tbb::tick_count::now();
288  std::this_thread::sleep_for( dreamtime_duration );
289  ON_DEBUG endSleeptbb = tbb::tick_count::now();
290 
291  // actual sleeping time can be longer due to scheduling or resource contention delays
292  ON_DEBUG
293  {
294  const double actualDreamTime = ( endSleeptbb - startSleeptbb ).seconds();
295  debug() << "Actual dreaming time was: " << actualDreamTime << "s" << endmsg;
296  }
297  } // end of "sleeping block"
298 
299  DEBUG_MSG << "Crunching time will be: " << crunchtime << endmsg;
301  DEBUG_MSG << "Start event " << context.evt() << " in slot " << context.slot() << " on pthreadID " << std::hex
302  << pthread_self() << std::dec << endmsg;
303 
304  VERBOSE_MSG << "inputs number: " << m_inputHandles.size() << endmsg;
305  for ( auto& inputHandle : m_inputHandles ) {
306  if ( !inputHandle->isValid() ) continue;
307 
308  VERBOSE_MSG << "get from TS: " << inputHandle->objKey() << endmsg;
309  DataObject* obj = nullptr;
310  for ( unsigned int i = 0; i < m_rwRepetitions; ++i ) {
311  obj = inputHandle->get();
312  }
313  if ( obj == nullptr ) error() << "A read object was a null pointer." << endmsg;
314  }
315 
316  const unsigned long n_iters = getNCaliIters( crunchtime );
317  findPrimes( n_iters );
318 
319  // Return error on fraction of events if configured
320  if ( m_failNEvents > 0 && context.evt() > 0 && ( context.evt() % m_failNEvents ) == 0 ) {
321  return StatusCode::FAILURE;
322  }
323 
324  VERBOSE_MSG << "outputs number: " << m_outputHandles.size() << endmsg;
325  for ( auto& outputHandle : m_outputHandles ) {
326  if ( !outputHandle->isValid() ) continue;
327 
328  VERBOSE_MSG << "put to TS: " << outputHandle->objKey() << endmsg;
329  outputHandle->put( new DataObject() );
330  }
331 
332  tbb::tick_count endtbb = tbb::tick_count::now();
333 
334  const double actualRuntime = ( endtbb - starttbb ).seconds();
335 
336  DEBUG_MSG << "Finish event " << context.evt()
337  // << " on pthreadID " << context.m_thread_id
338  << " in " << actualRuntime << " seconds" << endmsg;
339 
340  DEBUG_MSG << "Timing: ExpectedCrunchtime= " << crunchtime << " ExpectedDreamtime= " << dreamtime
341  << " ActualTotalRuntime= " << actualRuntime << " Ratio= " << ( crunchtime + dreamtime ) / actualRuntime
342  << " Niters= " << n_iters << endmsg;
343 
345 
346  return StatusCode::SUCCESS;
347 }
348 
349 //------------------------------------------------------------------------------
350 
351 StatusCode CPUCruncher::finalize() // the finalization of the algorithm
352 {
353  MsgStream log( msgSvc(), name() );
354 
355  unsigned int ninstances;
356 
357  {
358  CHM::const_accessor const_name_ninstances;
359  m_name_ncopies_map.find( const_name_ninstances, name() );
360  ninstances = const_name_ninstances->second;
361  }
362 
363  constexpr double s2ms = 1000.;
364  // do not show repetitions
365  if ( ninstances != 0 ) {
366  info() << "Summary: name= " << name() << "\t avg_runtime= " << m_avg_runtime * s2ms << "\t n_clones= " << ninstances
367  << endmsg;
368 
369  CHM::accessor name_ninstances;
370  m_name_ncopies_map.find( name_ninstances, name() );
371  name_ninstances->second = 0;
372  }
373 
374  return GaudiAlgorithm::finalize();
375 }
376 
377 //------------------------------------------------------------------------------
StatusCode execute() override
the execution of the algorithm
constexpr static const auto FAILURE
Definition: StatusCode.h:88
Definition of the MsgStream class used to transmit messages.
Definition: MsgStream.h:24
SmartIF< IRndmGenSvc > & randSvc() const
The standard RandomGen service, Return a pointer to the service if present.
Definition: Algorithm.cpp:827
Gaudi::Property< float > m_sleepFraction
Definition: CPUCruncher.h:68
The ISvcLocator is the interface implemented by the Service Factory in the Application Manager to loc...
Definition: ISvcLocator.h:25
const std::string & name() const override
The identifying name of the algorithm object.
Definition: Algorithm.cpp:765
A class that implements a search for prime numbers.
Definition: CPUCruncher.h:18
void setFilterPassed(bool state) const override
Set the filter passed flag to the specified state.
Definition: Algorithm.cpp:791
ContextID_t slot() const
Definition: EventContext.h:40
Gaudi::Property< bool > m_loader
Definition: CPUCruncher.h:58
MsgStream & info() const
shortcut for the method msgStream(MSG::INFO)
void setIOBound(bool value)
Definition: Algorithm.h:478
StatusCode initialize() override
standard initialization method
virtual ~CPUCruncher()
virtual & protected desctrustor
Definition: CPUCruncher.cpp:40
T to_string(T...args)
void findPrimes(const unsigned long int)
The CPU intensive function.
T clock(T...args)
Gaudi::Property< unsigned int > m_rwRepetitions
Definition: CPUCruncher.h:67
void initDataHandleHolder()
initializes all handles - called by the sysInitialize method of any descendant of this ...
T sleep_for(T...args)
const std::string & context() const
Returns the "context" string. Used to identify different processing states.
Definition: GaudiCommon.h:748
Parameters for the Gauss random number generation.
std::vector< DataObjectHandle< DataObject > * > m_outputHandles
Definition: CPUCruncher.h:83
void calibrate()
Calibrate.
Definition: CPUCruncher.cpp:86
This class represents an entry point to all the event specific data.
Definition: EventContext.h:24
long unsigned int getNCaliIters(double)
#define VERBOSE_MSG
Definition: CPUCruncher.cpp:21
T resize(T...args)
ContextEvt_t evt() const
Definition: EventContext.h:39
STL class.
#define DECLARE_COMPONENT(type)
tbb::concurrent_hash_map< std::string, unsigned int > CHM
Definition: CPUCruncher.h:22
T push_back(T...args)
MsgStream & error() const
shortcut for the method msgStream(MSG::ERROR)
static std::vector< unsigned int > m_niters_vect
Definition: CPUCruncher.h:75
static CHM m_name_ncopies_map
Definition: CPUCruncher.h:85
This class is used for returning status codes from appropriate routines.
Definition: StatusCode.h:51
constexpr double m
Definition: SystemOfUnits.h:94
StatusCode finalize() override
standard finalization method
Gaudi::Property< bool > m_invertCFD
Definition: CPUCruncher.h:71
The useful base class for data processing algorithms.
std::vector< DataObjectHandle< DataObject > * > m_inputHandles
Definition: CPUCruncher.h:82
GAUDI_API const EventContext & currentContext()
const SmartIF< IMessageSvc > & msgSvc() const
The standard message service.
T fabs(T...args)
constexpr static const auto SUCCESS
Definition: StatusCode.h:87
static std::vector< double > m_times_vect
Definition: CPUCruncher.h:76
#define DEBUG_MSG
Definition: CPUCruncher.cpp:18
T size(T...args)
Gaudi::Property< bool > m_shortCalib
Definition: CPUCruncher.h:66
void declareRuntimeRequestedOutputs()
Pick up late-attributed data outputs.
MsgStream & debug() const
shortcut for the method msgStream(MSG::DEBUG)
Gaudi::Property< double > m_avg_runtime
Definition: CPUCruncher.h:63
bool isIOBound() const
Definition: Algorithm.h:476
StatusCode initialize() override
Its initialization.
Definition: CPUCruncher.cpp:47
T back(T...args)
CPUCruncher()
the default constructor is disabled
Gaudi::Property< unsigned int > m_failNEvents
Definition: CPUCruncher.h:72
T hex(T...args)
Gaudi::Property< std::vector< std::string > > m_outKeys
Definition: CPUCruncher.h:61
Gaudi::Details::PropertyBase * declareProperty(const std::string &name, ToolHandle< T > &hndl, const std::string &doc="none")
Definition: Algorithm.h:370
Gaudi::Property< std::vector< std::string > > m_inpKeys
Definition: CPUCruncher.h:60
A DataObject is the base class of any identifiable object on any data store.
Definition: DataObject.h:30
StatusCode finalize() override
the finalization of the algorithm
MsgStream & endmsg(MsgStream &s)
MsgStream Modifier: endmsg. Calls the output method of the MsgStream.
Definition: MsgStream.h:209
Gaudi::Property< bool > m_local_rndm_gen
Definition: CPUCruncher.h:65
Gaudi::Property< double > m_var_runtime
Definition: CPUCruncher.h:64
#define ON_DEBUG
Definition: CPUCruncher.cpp:17
bool m_declAugmented
Definition: CPUCruncher.h:57