All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
CPUCruncher.cpp
Go to the documentation of this file.
1 #include "CPUCruncher.h"
2 #include "HiveNumbers.h"
3 #include <ctime>
4 #include <sys/resource.h>
5 #include <sys/times.h>
6 
7 #include <tbb/tick_count.h>
8 #include <thread>
9 
13 
15 
16 #define ON_DEBUG if (msgLevel(MSG::DEBUG))
17 #define DEBUG_MSG ON_DEBUG debug()
18 
19 #define ON_VERBOSE if (msgLevel(MSG::VERBOSE))
20 #define VERBOSE_MSG ON_VERBOSE verbose()
21 
22 //------------------------------------------------------------------------------
23 
24 CPUCruncher::CPUCruncher( const std::string& name, // the algorithm instance name
25  ISvcLocator* pSvc )
26  : GaudiAlgorithm( name, pSvc )
27 {
28 
29  declareProperty( "NIterationsVect", m_niters_vect, "Number of iterations for the calibration." );
30  declareProperty( "NTimesVect", m_times_vect, "Number of seconds for the calibration." );
31 
32  // Register the algo in the static concurrent hash map in order to
33  // monitor the # of copies
34  CHM::accessor name_ninstances;
35  m_name_ncopies_map.insert( name_ninstances, name );
36  name_ninstances->second += 1;
37 }
38 
40 {
41  for ( uint i = 0; i < m_inputHandles.size(); ++i ) delete m_inputHandles[i];
42 
43  for ( uint i = 0; i < m_outputHandles.size(); ++i ) delete m_outputHandles[i];
44 }
45 
47 {
48  auto sc = GaudiAlgorithm::initialize();
49  if ( !sc ) return sc;
50 
51  if ( m_times_vect.size() == 0 ) calibrate();
52 
53  // if an algorithm was setup to sleep, for whatever period, it effectively becomes I/O-bound
54  if ( m_sleepFraction != 0.0f ) setIOBound( true );
55 
56  // This is a bit ugly. There is no way to declare a vector of DataObjectHandles, so
57  // we need to wait until initialize when we've read in the input and output key
58  // properties, and know their size, and then turn them
59  // into Handles and register them with the framework by calling declareProperty. We
60  // could call declareInput/declareOutput on them too.
61 
62  int i = 0;
63  for ( auto k : m_inpKeys ) {
64  DEBUG_MSG << "adding input key " << k << endmsg;
66  declareProperty( "dummy_in_" + std::to_string( i ), *( m_inputHandles.back() ) );
67  i++;
68  }
69 
70  i = 0;
71  for ( auto k : m_outKeys ) {
72  DEBUG_MSG << "adding output key " << k << endmsg;
74  declareProperty( "dummy_out_" + std::to_string( i ), *( m_outputHandles.back() ) );
75  i++;
76  }
77 
78  return sc;
79 }
80 
81 /*
82 Calibrate the crunching finding the right relation between max number to be searched and time spent.
83 The relation is a sqrt for times greater than 10^-4 seconds.
84 */
86 {
87  m_niters_vect = { 0, 500, 600, 700, 800,
88  1000, 1300, 1600,
89  2000, 2300, 2600,
90  3000, 3300, 3500, 3900,
91  4200, 5000, 6000, 8000,
92  10000, 12000, 15000, 17000,
93  20000, 25000,
94  30000, 35000,
95  40000, 60000 };
96  if ( !m_shortCalib ) {
97  m_niters_vect.push_back( 100000 );
98  m_niters_vect.push_back( 200000 );
99  }
100 
102  m_times_vect[0] = 0.;
103 
104  info() << "Starting calibration..." << endmsg;
105  for ( unsigned int i = 1; i < m_niters_vect.size(); ++i ) {
106  unsigned long niters = m_niters_vect[i];
107  unsigned int trials = 30;
108  do {
109  auto start_cali = tbb::tick_count::now();
110  findPrimes( niters );
111  auto stop_cali = tbb::tick_count::now();
112  double deltat = ( stop_cali - start_cali ).seconds();
113  m_times_vect[i] = deltat;
114  DEBUG_MSG << "Calibration: # iters = " << niters << " => " << deltat << endmsg;
115  trials--;
116  } while ( trials > 0 and m_times_vect[i] < m_times_vect[i - 1] ); // make sure that they are monotonic
117  }
118  info() << "Calibration finished!" << endmsg;
119 }
120 
121 unsigned long CPUCruncher::getNCaliIters( double runtime )
122 {
123 
124  unsigned int smaller_i = 0;
125  double time = 0.;
126  bool found = false;
127  // We know that the first entry is 0, so we start to iterate from 1
128  for ( unsigned int i = 1; i < m_times_vect.size(); i++ ) {
129  time = m_times_vect[i];
130  if ( time > runtime ) {
131  smaller_i = i - 1;
132  found = true;
133  break;
134  }
135  }
136 
137  // Case 1: we are outside the interpolation range, we take the last 2 points
138  if ( not found ) smaller_i = m_times_vect.size() - 2;
139 
140  // Case 2: we maeke a linear interpolation
141  // y=mx+q
142  const double x0 = m_times_vect[smaller_i];
143  const double x1 = m_times_vect[smaller_i + 1];
144  const double y0 = m_niters_vect[smaller_i];
145  const double y1 = m_niters_vect[smaller_i + 1];
146  const double m = ( y1 - y0 ) / ( x1 - x0 );
147  const double q = y0 - m * x0;
148 
149  const unsigned long nCaliIters = m * runtime + q;
150  // always() << x0 << "<" << runtime << "<" << x1 << " Corresponding to " << nCaliIters << " iterations" << endmsg;
151 
152  return nCaliIters;
153 }
154 
155 void CPUCruncher::findPrimes( const unsigned long int n_iterations )
156 {
157  // Flag to trigger the allocation
158  bool is_prime;
159 
160  // Let's prepare the material for the allocations
161  unsigned int primes_size = 1;
162  unsigned long* primes = new unsigned long[primes_size];
163  primes[0] = 2;
164 
165  unsigned long i = 2;
166 
167  // Loop on numbers
168  for ( unsigned long int iiter = 0; iiter < n_iterations; iiter++ ) {
169  // Once at max, it returns to 0
170  i += 1;
171 
172  // Check if it can be divided by the smaller ones
173  is_prime = true;
174  for ( unsigned long j = 2; j < i && is_prime; ++j ) {
175  if ( i % j == 0 ) is_prime = false;
176  } // end loop on numbers < than tested one
177 
178  if ( is_prime ) {
179  // copy the array of primes (INEFFICIENT ON PURPOSE!)
180  unsigned int new_primes_size = 1 + primes_size;
181  unsigned long* new_primes = new unsigned long[new_primes_size];
182 
183  for ( unsigned int prime_index = 0; prime_index < primes_size; prime_index++ ) {
184  new_primes[prime_index] = primes[prime_index];
185  }
186  // attach the last prime
187  new_primes[primes_size] = i;
188 
189  // Update primes array
190  delete[] primes;
191  primes = new_primes;
192  primes_size = new_primes_size;
193  } // end is prime
194 
195  } // end of while loop
196 
197  // Fool Compiler optimisations:
198  for ( unsigned int prime_index = 0; prime_index < primes_size; prime_index++ )
199  if ( primes[prime_index] == 4 )
200  debug() << "This does never happen, but it's necessary too fool aggressive compiler optimisations!" << endmsg;
201 
202  delete[] primes;
203 }
204 
205 //------------------------------------------------------------------------------
206 
207 StatusCode CPUCruncher::execute() // the execution of the algorithm
208 {
209  float crunchtime;
210 
211  if ( m_local_rndm_gen ) {
212  /* This will disappear with a thread safe random number generator svc
213  * Use box mueller to generate gaussian randoms
214  * The quality is not good for in depth study given that the generator is a
215  * linear congruent.
216  * Throw away basically a free number: we are in a cpu cruncher after all.
217  * The seed is taken from the clock, but we could assign a seed per module to
218  * ensure reproducibility.
219  *
220  * This is not an overkill but rather an exercise towards a thread safe
221  * random number generation.
222  */
223 
224  auto getGausRandom = []( double mean, double sigma ) -> double {
225 
226  unsigned int seed = std::clock();
227 
228  auto getUnifRandom = []( unsigned int& seed ) -> double {
229  // from numerical recipies
230  constexpr unsigned int m = 232;
231  constexpr unsigned int a = 1664525;
232  constexpr unsigned int c = 1013904223;
233  seed = ( a * seed + c ) % m;
234  const double unif = double( seed ) / m;
235  return unif;
236  };
237 
238  const double unif1 = getUnifRandom( seed );
239  const double unif2 = getUnifRandom( seed );
240  const double normal = sqrt( -2. * log( unif1 ) ) * cos( 2 * M_PI * unif2 );
241  return normal * sigma + mean;
242  };
243  crunchtime = fabs( getGausRandom( m_avg_runtime * ( 1. - m_sleepFraction ), m_var_runtime ) );
244  // End Of temp block
245  } else {
246  // Should be a member.
248  crunchtime = std::fabs( rndmgaus() );
249  }
250 
251  // Prepare to sleep (even if we won't enter the following if clause for sleeping).
252  // This is needed to distribute evenly among all algorithms the overhead (around sleeping) which is harmful when
253  // trying to achieve uniform distribution of algorithm timings.
254  const double dreamtime = m_avg_runtime * m_sleepFraction;
255  const std::chrono::duration<double> dreamtime_duration( dreamtime );
256  tbb::tick_count startSleeptbb;
257  tbb::tick_count endSleeptbb;
258 
259  // Start to measure the total time here, together with the dreaming process straight ahead
260  tbb::tick_count starttbb = tbb::tick_count::now();
261 
262  // If the algorithm was set as I/O-bound, we will replace requested part of crunching with plain sleeping
263  if ( isIOBound() ) {
264  // in this block (and not in other places around) msgLevel is checked for the same reason as above, when
265  // preparing to sleep several lines above: to reduce as much as possible the overhead around sleeping
266  DEBUG_MSG << "Dreaming time will be: " << dreamtime << endmsg;
267 
268  ON_DEBUG startSleeptbb = tbb::tick_count::now();
269  std::this_thread::sleep_for( dreamtime_duration );
270  ON_DEBUG endSleeptbb = tbb::tick_count::now();
271 
272  // actual sleeping time can be longer due to scheduling or resource contention delays
273  ON_DEBUG {
274  const double actualDreamTime = ( endSleeptbb - startSleeptbb ).seconds();
275  debug() << "Actual dreaming time was: " << actualDreamTime << "s" << endmsg;
276  }
277  } // end of "sleeping block"
278 
279  DEBUG_MSG << "Crunching time will be: " << crunchtime << endmsg;
280  if ( getContext() )
281  DEBUG_MSG << "Start event " << getContext()->evt() << " in slot " << getContext()->slot()
282  << " on pthreadID " << std::hex << pthread_self() << std::dec << endmsg;
283 
284  VERBOSE_MSG << "inputs number: " << m_inputHandles.size() << endmsg;
285  for ( auto& inputHandle : m_inputHandles ) {
286  if ( !inputHandle->isValid() ) continue;
287 
288  VERBOSE_MSG << "get from TS: " << inputHandle->objKey() << endmsg;
289  DataObject* obj = nullptr;
290  for ( unsigned int i = 0; i < m_rwRepetitions; ++i ) {
291  obj = inputHandle->get();
292  }
293  if ( obj == nullptr ) error() << "A read object was a null pointer." << endmsg;
294  }
295 
296  const unsigned long n_iters = getNCaliIters( crunchtime );
297  findPrimes( n_iters );
298 
299  VERBOSE_MSG << "outputs number: " << m_outputHandles.size() << endmsg;
300  for ( auto& outputHandle : m_outputHandles ) {
301  if ( !outputHandle->isValid() ) continue;
302 
303  VERBOSE_MSG << "put to TS: " << outputHandle->objKey() << endmsg;
304  outputHandle->put( new DataObject() );
305  }
306 
307  tbb::tick_count endtbb = tbb::tick_count::now();
308 
309  const double actualRuntime = ( endtbb - starttbb ).seconds();
310 
311  if ( getContext() )
312  DEBUG_MSG << "Finish event " << getContext()->evt()
313  // << " on pthreadID " << getContext()->m_thread_id
314  << " in " << actualRuntime << " seconds" << endmsg;
315 
316  DEBUG_MSG << "Timing: ExpectedCrunchtime= " << crunchtime << " ExpectedDreamtime= " << dreamtime
317  << " ActualTotalRuntime= " << actualRuntime << " Ratio= " << ( crunchtime + dreamtime ) / actualRuntime
318  << " Niters= " << n_iters << endmsg;
319 
320  return StatusCode::SUCCESS;
321 }
322 
323 //------------------------------------------------------------------------------
324 
325 StatusCode CPUCruncher::finalize() // the finalization of the algorithm
326 {
327  MsgStream log( msgSvc(), name() );
328 
329  unsigned int ninstances;
330 
331  {
332  CHM::const_accessor const_name_ninstances;
333  m_name_ncopies_map.find( const_name_ninstances, name() );
334  ninstances = const_name_ninstances->second;
335  }
336 
337  constexpr double s2ms = 1000.;
338  // do not show repetitions
339  if ( ninstances != 0 ) {
340  info() << "Summary: name= " << name() << "\t avg_runtime= " << m_avg_runtime * s2ms
341  << "\t n_clones= " << ninstances << endmsg;
342 
343  CHM::accessor name_ninstances;
344  m_name_ncopies_map.find( name_ninstances, name() );
345  name_ninstances->second = 0;
346  }
347 
348  return GaudiAlgorithm::finalize();
349 }
350 
351 //------------------------------------------------------------------------------
StatusCode execute() override
the execution of the algorithm
Definition of the MsgStream class used to transmit messages.
Definition: MsgStream.h:24
SmartIF< IRndmGenSvc > & randSvc() const
The standard RandomGen service, Return a pointer to the service if present.
Definition: Algorithm.cpp:827
Gaudi::Property< float > m_sleepFraction
Definition: CPUCruncher.h:62
The ISvcLocator is the interface implemented by the Service Factory in the Application Manager to loc...
Definition: ISvcLocator.h:25
const std::string & name() const override
The identifying name of the algorithm object.
Definition: Algorithm.cpp:725
A class that implements a search for prime numbers.
Definition: CPUCruncher.h:19
ContextID_t slot() const
Definition: EventContext.h:41
MsgStream & info() const
shortcut for the method msgStream(MSG::INFO)
void setIOBound(bool value)
Definition: Algorithm.h:491
StatusCode initialize() override
standard initialization method
virtual ~CPUCruncher()
virtual & protected desctrustor
Definition: CPUCruncher.cpp:39
T to_string(T...args)
void findPrimes(const unsigned long int)
The CPU intensive function.
T clock(T...args)
Gaudi::Property< unsigned int > m_rwRepetitions
Definition: CPUCruncher.h:61
T sleep_for(T...args)
Parameters for the Gauss random number generation.
std::vector< DataObjectHandle< DataObject > * > m_outputHandles
Definition: CPUCruncher.h:75
void calibrate()
Calibrate.
Definition: CPUCruncher.cpp:85
long unsigned int getNCaliIters(double)
#define VERBOSE_MSG
Definition: CPUCruncher.cpp:20
#define DECLARE_COMPONENT(type)
Definition: PluginService.h:36
T resize(T...args)
ContextEvt_t evt() const
Definition: EventContext.h:40
STL class.
tbb::concurrent_hash_map< std::string, unsigned int > CHM
Definition: CPUCruncher.h:23
T push_back(T...args)
MsgStream & error() const
shortcut for the method msgStream(MSG::ERROR)
static std::vector< unsigned int > m_niters_vect
Definition: CPUCruncher.h:67
static CHM m_name_ncopies_map
Definition: CPUCruncher.h:77
This class is used for returning status codes from appropriate routines.
Definition: StatusCode.h:26
constexpr double m
Definition: SystemOfUnits.h:93
StatusCode finalize() override
standard finalization method
The useful base class for data processing algorithms.
std::vector< DataObjectHandle< DataObject > * > m_inputHandles
Definition: CPUCruncher.h:74
T fabs(T...args)
static std::vector< double > m_times_vect
Definition: CPUCruncher.h:68
#define DEBUG_MSG
Definition: CPUCruncher.cpp:17
T size(T...args)
Gaudi::Property< bool > m_shortCalib
Definition: CPUCruncher.h:60
MsgStream & debug() const
shortcut for the method msgStream(MSG::DEBUG)
Gaudi::Property< double > m_avg_runtime
Definition: CPUCruncher.h:57
bool isIOBound() const
Definition: Algorithm.h:489
StatusCode initialize() override
Its initialization.
Definition: CPUCruncher.cpp:46
T back(T...args)
CPUCruncher()
the default constructor is disabled
SmartIF< IMessageSvc > & msgSvc() const
The standard message service.
T hex(T...args)
Gaudi::Property< std::vector< std::string > > m_outKeys
Definition: CPUCruncher.h:55
Gaudi::Property< std::vector< std::string > > m_inpKeys
Definition: CPUCruncher.h:54
A DataObject is the base class of any identifiable object on any data store.
Definition: DataObject.h:30
StatusCode finalize() override
the finalization of the algorithm
MsgStream & endmsg(MsgStream &s)
MsgStream Modifier: endmsg. Calls the output method of the MsgStream.
Definition: MsgStream.h:244
const EventContext * getContext() const override
get the context
Definition: Algorithm.h:435
Gaudi::Property< bool > m_local_rndm_gen
Definition: CPUCruncher.h:59
Gaudi::Property< double > m_var_runtime
Definition: CPUCruncher.h:58
#define ON_DEBUG
Definition: CPUCruncher.cpp:16
Gaudi::Details::PropertyBase * declareProperty(const std::string &name, DataObjectHandle< T > &hndl, const std::string &doc="none")