The Gaudi Framework  v29r0 (ff2e7097)
CPUCruncher.cpp
Go to the documentation of this file.
1 #include "CPUCruncher.h"
3 #include "HiveNumbers.h"
4 #include <ctime>
5 #include <sys/resource.h>
6 #include <sys/times.h>
7 
8 #include <tbb/tick_count.h>
9 #include <thread>
10 
14 
16 
17 #define ON_DEBUG if ( msgLevel( MSG::DEBUG ) )
18 #define DEBUG_MSG ON_DEBUG debug()
19 
20 #define ON_VERBOSE if ( msgLevel( MSG::VERBOSE ) )
21 #define VERBOSE_MSG ON_VERBOSE verbose()
22 
23 //------------------------------------------------------------------------------
24 
25 CPUCruncher::CPUCruncher( const std::string& name, // the algorithm instance name
26  ISvcLocator* pSvc )
27  : GaudiAlgorithm( name, pSvc )
28 {
29 
30  declareProperty( "NIterationsVect", m_niters_vect, "Number of iterations for the calibration." );
31  declareProperty( "NTimesVect", m_times_vect, "Number of seconds for the calibration." );
32 
33  // Register the algo in the static concurrent hash map in order to
34  // monitor the # of copies
35  CHM::accessor name_ninstances;
36  m_name_ncopies_map.insert( name_ninstances, name );
37  name_ninstances->second += 1;
38 }
39 
41 {
42  for ( uint i = 0; i < m_inputHandles.size(); ++i ) delete m_inputHandles[i];
43 
44  for ( uint i = 0; i < m_outputHandles.size(); ++i ) delete m_outputHandles[i];
45 }
46 
48 {
49  auto sc = GaudiAlgorithm::initialize();
50  if ( !sc ) return sc;
51 
52  if ( m_times_vect.size() == 0 ) calibrate();
53 
54  // if an algorithm was setup to sleep, for whatever period, it effectively becomes I/O-bound
55  if ( m_sleepFraction != 0.0f ) setIOBound( true );
56 
57  // This is a bit ugly. There is no way to declare a vector of DataObjectHandles, so
58  // we need to wait until initialize when we've read in the input and output key
59  // properties, and know their size, and then turn them
60  // into Handles and register them with the framework by calling declareProperty. We
61  // could call declareInput/declareOutput on them too.
62 
63  int i = 0;
64  for ( auto k : m_inpKeys ) {
65  DEBUG_MSG << "adding input key " << k << endmsg;
67  declareProperty( "dummy_in_" + std::to_string( i ), *( m_inputHandles.back() ) );
68  i++;
69  }
70 
71  i = 0;
72  for ( auto k : m_outKeys ) {
73  DEBUG_MSG << "adding output key " << k << endmsg;
75  declareProperty( "dummy_out_" + std::to_string( i ), *( m_outputHandles.back() ) );
76  i++;
77  }
78 
79  return sc;
80 }
81 
82 /*
83 Calibrate the crunching finding the right relation between max number to be searched and time spent.
84 The relation is a sqrt for times greater than 10^-4 seconds.
85 */
87 {
88  m_niters_vect = {0, 500, 600, 700, 800, 1000, 1300, 1600, 2000, 2300, 2600, 3000, 3300, 3500, 3900,
89  4200, 5000, 6000, 8000, 10000, 12000, 15000, 17000, 20000, 25000, 30000, 35000, 40000, 60000};
90  if ( !m_shortCalib ) {
91  m_niters_vect.push_back( 100000 );
92  m_niters_vect.push_back( 200000 );
93  }
94 
96  m_times_vect[0] = 0.;
97 
98  info() << "Starting calibration..." << endmsg;
99  for ( unsigned int i = 1; i < m_niters_vect.size(); ++i ) {
100  unsigned long niters = m_niters_vect[i];
101  unsigned int trials = 30;
102  do {
103  auto start_cali = tbb::tick_count::now();
104  findPrimes( niters );
105  auto stop_cali = tbb::tick_count::now();
106  double deltat = ( stop_cali - start_cali ).seconds();
107  m_times_vect[i] = deltat;
108  DEBUG_MSG << "Calibration: # iters = " << niters << " => " << deltat << endmsg;
109  trials--;
110  } while ( trials > 0 and m_times_vect[i] < m_times_vect[i - 1] ); // make sure that they are monotonic
111  }
112  info() << "Calibration finished!" << endmsg;
113 }
114 
115 unsigned long CPUCruncher::getNCaliIters( double runtime )
116 {
117 
118  unsigned int smaller_i = 0;
119  double time = 0.;
120  bool found = false;
121  // We know that the first entry is 0, so we start to iterate from 1
122  for ( unsigned int i = 1; i < m_times_vect.size(); i++ ) {
123  time = m_times_vect[i];
124  if ( time > runtime ) {
125  smaller_i = i - 1;
126  found = true;
127  break;
128  }
129  }
130 
131  // Case 1: we are outside the interpolation range, we take the last 2 points
132  if ( not found ) smaller_i = m_times_vect.size() - 2;
133 
134  // Case 2: we maeke a linear interpolation
135  // y=mx+q
136  const double x0 = m_times_vect[smaller_i];
137  const double x1 = m_times_vect[smaller_i + 1];
138  const double y0 = m_niters_vect[smaller_i];
139  const double y1 = m_niters_vect[smaller_i + 1];
140  const double m = ( y1 - y0 ) / ( x1 - x0 );
141  const double q = y0 - m * x0;
142 
143  const unsigned long nCaliIters = m * runtime + q;
144  // always() << x0 << "<" << runtime << "<" << x1 << " Corresponding to " << nCaliIters << " iterations" << endmsg;
145 
146  return nCaliIters;
147 }
148 
149 void CPUCruncher::findPrimes( const unsigned long int n_iterations )
150 {
151  // Flag to trigger the allocation
152  bool is_prime;
153 
154  // Let's prepare the material for the allocations
155  unsigned int primes_size = 1;
156  unsigned long* primes = new unsigned long[primes_size];
157  primes[0] = 2;
158 
159  unsigned long i = 2;
160 
161  // Loop on numbers
162  for ( unsigned long int iiter = 0; iiter < n_iterations; iiter++ ) {
163  // Once at max, it returns to 0
164  i += 1;
165 
166  // Check if it can be divided by the smaller ones
167  is_prime = true;
168  for ( unsigned long j = 2; j < i && is_prime; ++j ) {
169  if ( i % j == 0 ) is_prime = false;
170  } // end loop on numbers < than tested one
171 
172  if ( is_prime ) {
173  // copy the array of primes (INEFFICIENT ON PURPOSE!)
174  unsigned int new_primes_size = 1 + primes_size;
175  unsigned long* new_primes = new unsigned long[new_primes_size];
176 
177  for ( unsigned int prime_index = 0; prime_index < primes_size; prime_index++ ) {
178  new_primes[prime_index] = primes[prime_index];
179  }
180  // attach the last prime
181  new_primes[primes_size] = i;
182 
183  // Update primes array
184  delete[] primes;
185  primes = new_primes;
186  primes_size = new_primes_size;
187  } // end is prime
188 
189  } // end of while loop
190 
191  // Fool Compiler optimisations:
192  for ( unsigned int prime_index = 0; prime_index < primes_size; prime_index++ )
193  if ( primes[prime_index] == 4 )
194  debug() << "This does never happen, but it's necessary too fool aggressive compiler optimisations!" << endmsg;
195 
196  delete[] primes;
197 }
198 
199 //------------------------------------------------------------------------------
200 
201 StatusCode CPUCruncher::execute() // the execution of the algorithm
202 {
203  float crunchtime;
204 
205  if ( m_local_rndm_gen ) {
206  /* This will disappear with a thread safe random number generator service.
207  * Use basic Box-Muller to generate Gaussian random numbers.
208  * The quality is not good for in depth study given that the generator is a
209  * linear congruent.
210  * Throw away basically a free number: we are in a cpu cruncher after all.
211  * The seed is taken from the clock, but we could assign a seed per module to
212  * ensure reproducibility.
213  *
214  * This is not an overkill but rather an exercise towards a thread safe
215  * random number generation.
216  */
217 
218  auto getGausRandom = []( double mean, double sigma ) -> double {
219 
220  unsigned int seed = std::clock();
221 
222  auto getUnifRandom = []( unsigned int& seed ) -> double {
223  // from "Numerical Recipes"
224  constexpr unsigned int m = 232;
225  constexpr unsigned int a = 1664525;
226  constexpr unsigned int c = 1013904223;
227  seed = ( a * seed + c ) % m;
228  const double unif = double( seed ) / m;
229  return unif;
230  };
231 
232  double unif1, unif2;
233  do {
234  unif1 = getUnifRandom( seed );
235  unif2 = getUnifRandom( seed );
236  } while ( unif1 == 0. );
237 
238  const double normal = sqrt( -2. * log( unif1 ) ) * cos( 2 * M_PI * unif2 );
239 
240  return normal * sigma + mean;
241  };
242 
243  crunchtime = fabs( getGausRandom( m_avg_runtime * ( 1. - m_sleepFraction ), m_var_runtime ) );
244  // End Of temp block
245  } else {
246  // Should be a member.
248  crunchtime = std::fabs( rndmgaus() );
249  }
250 
251  // Prepare to sleep (even if we won't enter the following if clause for sleeping).
252  // This is needed to distribute evenly among all algorithms the overhead (around sleeping) which is harmful when
253  // trying to achieve uniform distribution of algorithm timings.
254  const double dreamtime = m_avg_runtime * m_sleepFraction;
255  const std::chrono::duration<double> dreamtime_duration( dreamtime );
256  tbb::tick_count startSleeptbb;
257  tbb::tick_count endSleeptbb;
258 
259  // Start to measure the total time here, together with the dreaming process straight ahead
260  tbb::tick_count starttbb = tbb::tick_count::now();
261 
262  // If the algorithm was set as I/O-bound, we will replace requested part of crunching with plain sleeping
263  if ( isIOBound() ) {
264  // in this block (and not in other places around) msgLevel is checked for the same reason as above, when
265  // preparing to sleep several lines above: to reduce as much as possible the overhead around sleeping
266  DEBUG_MSG << "Dreaming time will be: " << dreamtime << endmsg;
267 
268  ON_DEBUG startSleeptbb = tbb::tick_count::now();
269  std::this_thread::sleep_for( dreamtime_duration );
270  ON_DEBUG endSleeptbb = tbb::tick_count::now();
271 
272  // actual sleeping time can be longer due to scheduling or resource contention delays
273  ON_DEBUG
274  {
275  const double actualDreamTime = ( endSleeptbb - startSleeptbb ).seconds();
276  debug() << "Actual dreaming time was: " << actualDreamTime << "s" << endmsg;
277  }
278  } // end of "sleeping block"
279 
280  DEBUG_MSG << "Crunching time will be: " << crunchtime << endmsg;
282  DEBUG_MSG << "Start event " << context.evt() << " in slot " << context.slot() << " on pthreadID " << std::hex
283  << pthread_self() << std::dec << endmsg;
284 
285  VERBOSE_MSG << "inputs number: " << m_inputHandles.size() << endmsg;
286  for ( auto& inputHandle : m_inputHandles ) {
287  if ( !inputHandle->isValid() ) continue;
288 
289  VERBOSE_MSG << "get from TS: " << inputHandle->objKey() << endmsg;
290  DataObject* obj = nullptr;
291  for ( unsigned int i = 0; i < m_rwRepetitions; ++i ) {
292  obj = inputHandle->get();
293  }
294  if ( obj == nullptr ) error() << "A read object was a null pointer." << endmsg;
295  }
296 
297  const unsigned long n_iters = getNCaliIters( crunchtime );
298  findPrimes( n_iters );
299 
300  VERBOSE_MSG << "outputs number: " << m_outputHandles.size() << endmsg;
301  for ( auto& outputHandle : m_outputHandles ) {
302  if ( !outputHandle->isValid() ) continue;
303 
304  VERBOSE_MSG << "put to TS: " << outputHandle->objKey() << endmsg;
305  outputHandle->put( new DataObject() );
306  }
307 
308  tbb::tick_count endtbb = tbb::tick_count::now();
309 
310  const double actualRuntime = ( endtbb - starttbb ).seconds();
311 
312  DEBUG_MSG << "Finish event " << context.evt()
313  // << " on pthreadID " << context.m_thread_id
314  << " in " << actualRuntime << " seconds" << endmsg;
315 
316  DEBUG_MSG << "Timing: ExpectedCrunchtime= " << crunchtime << " ExpectedDreamtime= " << dreamtime
317  << " ActualTotalRuntime= " << actualRuntime << " Ratio= " << ( crunchtime + dreamtime ) / actualRuntime
318  << " Niters= " << n_iters << endmsg;
319 
321 
322  return StatusCode::SUCCESS;
323 }
324 
325 //------------------------------------------------------------------------------
326 
327 StatusCode CPUCruncher::finalize() // the finalization of the algorithm
328 {
329  MsgStream log( msgSvc(), name() );
330 
331  unsigned int ninstances;
332 
333  {
334  CHM::const_accessor const_name_ninstances;
335  m_name_ncopies_map.find( const_name_ninstances, name() );
336  ninstances = const_name_ninstances->second;
337  }
338 
339  constexpr double s2ms = 1000.;
340  // do not show repetitions
341  if ( ninstances != 0 ) {
342  info() << "Summary: name= " << name() << "\t avg_runtime= " << m_avg_runtime * s2ms << "\t n_clones= " << ninstances
343  << endmsg;
344 
345  CHM::accessor name_ninstances;
346  m_name_ncopies_map.find( name_ninstances, name() );
347  name_ninstances->second = 0;
348  }
349 
350  return GaudiAlgorithm::finalize();
351 }
352 
353 //------------------------------------------------------------------------------
StatusCode execute() override
the execution of the algorithm
Definition of the MsgStream class used to transmit messages.
Definition: MsgStream.h:24
SmartIF< IRndmGenSvc > & randSvc() const
The standard RandomGen service, Return a pointer to the service if present.
Definition: Algorithm.cpp:802
Gaudi::Property< float > m_sleepFraction
Definition: CPUCruncher.h:64
The ISvcLocator is the interface implemented by the Service Factory in the Application Manager to loc...
Definition: ISvcLocator.h:25
const std::string & name() const override
The identifying name of the algorithm object.
Definition: Algorithm.cpp:731
A class that implements a search for prime numbers.
Definition: CPUCruncher.h:19
void setFilterPassed(bool state) const override
Set the filter passed flag to the specified state.
Definition: Algorithm.cpp:768
ContextID_t slot() const
Definition: EventContext.h:40
MsgStream & info() const
shortcut for the method msgStream(MSG::INFO)
void setIOBound(bool value)
Definition: Algorithm.h:474
StatusCode initialize() override
standard initialization method
virtual ~CPUCruncher()
virtual & protected desctrustor
Definition: CPUCruncher.cpp:40
T to_string(T...args)
void findPrimes(const unsigned long int)
The CPU intensive function.
T clock(T...args)
Gaudi::Property< unsigned int > m_rwRepetitions
Definition: CPUCruncher.h:63
T sleep_for(T...args)
const std::string & context() const
Returns the "context" string. Used to identify different processing states.
Definition: GaudiCommon.h:704
Parameters for the Gauss random number generation.
std::vector< DataObjectHandle< DataObject > * > m_outputHandles
Definition: CPUCruncher.h:78
void calibrate()
Calibrate.
Definition: CPUCruncher.cpp:86
This class represents an entry point to all the event specific data.
Definition: EventContext.h:24
long unsigned int getNCaliIters(double)
#define VERBOSE_MSG
Definition: CPUCruncher.cpp:21
#define DECLARE_COMPONENT(type)
Definition: PluginService.h:33
T resize(T...args)
ContextEvt_t evt() const
Definition: EventContext.h:39
STL class.
tbb::concurrent_hash_map< std::string, unsigned int > CHM
Definition: CPUCruncher.h:23
T push_back(T...args)
MsgStream & error() const
shortcut for the method msgStream(MSG::ERROR)
static std::vector< unsigned int > m_niters_vect
Definition: CPUCruncher.h:70
static CHM m_name_ncopies_map
Definition: CPUCruncher.h:80
This class is used for returning status codes from appropriate routines.
Definition: StatusCode.h:28
constexpr double m
Definition: SystemOfUnits.h:94
StatusCode finalize() override
standard finalization method
Gaudi::Property< bool > m_invertCFD
Definition: CPUCruncher.h:67
The useful base class for data processing algorithms.
std::vector< DataObjectHandle< DataObject > * > m_inputHandles
Definition: CPUCruncher.h:77
GAUDI_API const EventContext & currentContext()
T fabs(T...args)
static std::vector< double > m_times_vect
Definition: CPUCruncher.h:71
#define DEBUG_MSG
Definition: CPUCruncher.cpp:18
T size(T...args)
Gaudi::Property< bool > m_shortCalib
Definition: CPUCruncher.h:62
MsgStream & debug() const
shortcut for the method msgStream(MSG::DEBUG)
Gaudi::Property< double > m_avg_runtime
Definition: CPUCruncher.h:59
bool isIOBound() const
Definition: Algorithm.h:472
StatusCode initialize() override
Its initialization.
Definition: CPUCruncher.cpp:47
T back(T...args)
CPUCruncher()
the default constructor is disabled
SmartIF< IMessageSvc > & msgSvc() const
The standard message service.
T hex(T...args)
Gaudi::Property< std::vector< std::string > > m_outKeys
Definition: CPUCruncher.h:57
Gaudi::Details::PropertyBase * declareProperty(const std::string &name, ToolHandle< T > &hndl, const std::string &doc="none")
Definition: Algorithm.h:370
Gaudi::Property< std::vector< std::string > > m_inpKeys
Definition: CPUCruncher.h:56
A DataObject is the base class of any identifiable object on any data store.
Definition: DataObject.h:29
StatusCode finalize() override
the finalization of the algorithm
MsgStream & endmsg(MsgStream &s)
MsgStream Modifier: endmsg. Calls the output method of the MsgStream.
Definition: MsgStream.h:209
Gaudi::Property< bool > m_local_rndm_gen
Definition: CPUCruncher.h:61
Gaudi::Property< double > m_var_runtime
Definition: CPUCruncher.h:60
#define ON_DEBUG
Definition: CPUCruncher.cpp:17