All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
CPUCruncher.cpp
Go to the documentation of this file.
1 #include "CPUCruncher.h"
2 #include "HiveNumbers.h"
3 #include <ctime>
4 #include <sys/resource.h>
5 #include <sys/times.h>
7 
8 #include <tbb/tick_count.h>
9 #include <thread>
10 
14 
16 
17 #define ON_DEBUG if (msgLevel(MSG::DEBUG))
18 #define DEBUG_MSG ON_DEBUG debug()
19 
20 #define ON_VERBOSE if (msgLevel(MSG::VERBOSE))
21 #define VERBOSE_MSG ON_VERBOSE verbose()
22 
23 //------------------------------------------------------------------------------
24 
25 CPUCruncher::CPUCruncher( const std::string& name, // the algorithm instance name
26  ISvcLocator* pSvc )
27  : GaudiAlgorithm( name, pSvc )
28 {
29 
30  declareProperty( "NIterationsVect", m_niters_vect, "Number of iterations for the calibration." );
31  declareProperty( "NTimesVect", m_times_vect, "Number of seconds for the calibration." );
32 
33  // Register the algo in the static concurrent hash map in order to
34  // monitor the # of copies
35  CHM::accessor name_ninstances;
36  m_name_ncopies_map.insert( name_ninstances, name );
37  name_ninstances->second += 1;
38 }
39 
41 {
42  for ( uint i = 0; i < m_inputHandles.size(); ++i ) delete m_inputHandles[i];
43 
44  for ( uint i = 0; i < m_outputHandles.size(); ++i ) delete m_outputHandles[i];
45 }
46 
48 {
49  auto sc = GaudiAlgorithm::initialize();
50  if ( !sc ) return sc;
51 
52  if ( m_times_vect.size() == 0 ) calibrate();
53 
54  // if an algorithm was setup to sleep, for whatever period, it effectively becomes I/O-bound
55  if ( m_sleepFraction != 0.0f ) setIOBound( true );
56 
57  // This is a bit ugly. There is no way to declare a vector of DataObjectHandles, so
58  // we need to wait until initialize when we've read in the input and output key
59  // properties, and know their size, and then turn them
60  // into Handles and register them with the framework by calling declareProperty. We
61  // could call declareInput/declareOutput on them too.
62 
63  int i = 0;
64  for ( auto k : m_inpKeys ) {
65  DEBUG_MSG << "adding input key " << k << endmsg;
67  declareProperty( "dummy_in_" + std::to_string( i ), *( m_inputHandles.back() ) );
68  i++;
69  }
70 
71  i = 0;
72  for ( auto k : m_outKeys ) {
73  DEBUG_MSG << "adding output key " << k << endmsg;
75  declareProperty( "dummy_out_" + std::to_string( i ), *( m_outputHandles.back() ) );
76  i++;
77  }
78 
79  return sc;
80 }
81 
82 /*
83 Calibrate the crunching finding the right relation between max number to be searched and time spent.
84 The relation is a sqrt for times greater than 10^-4 seconds.
85 */
87 {
88  m_niters_vect = { 0, 500, 600, 700, 800,
89  1000, 1300, 1600,
90  2000, 2300, 2600,
91  3000, 3300, 3500, 3900,
92  4200, 5000, 6000, 8000,
93  10000, 12000, 15000, 17000,
94  20000, 25000,
95  30000, 35000,
96  40000, 60000 };
97  if ( !m_shortCalib ) {
98  m_niters_vect.push_back( 100000 );
99  m_niters_vect.push_back( 200000 );
100  }
101 
103  m_times_vect[0] = 0.;
104 
105  info() << "Starting calibration..." << endmsg;
106  for ( unsigned int i = 1; i < m_niters_vect.size(); ++i ) {
107  unsigned long niters = m_niters_vect[i];
108  unsigned int trials = 30;
109  do {
110  auto start_cali = tbb::tick_count::now();
111  findPrimes( niters );
112  auto stop_cali = tbb::tick_count::now();
113  double deltat = ( stop_cali - start_cali ).seconds();
114  m_times_vect[i] = deltat;
115  DEBUG_MSG << "Calibration: # iters = " << niters << " => " << deltat << endmsg;
116  trials--;
117  } while ( trials > 0 and m_times_vect[i] < m_times_vect[i - 1] ); // make sure that they are monotonic
118  }
119  info() << "Calibration finished!" << endmsg;
120 }
121 
122 unsigned long CPUCruncher::getNCaliIters( double runtime )
123 {
124 
125  unsigned int smaller_i = 0;
126  double time = 0.;
127  bool found = false;
128  // We know that the first entry is 0, so we start to iterate from 1
129  for ( unsigned int i = 1; i < m_times_vect.size(); i++ ) {
130  time = m_times_vect[i];
131  if ( time > runtime ) {
132  smaller_i = i - 1;
133  found = true;
134  break;
135  }
136  }
137 
138  // Case 1: we are outside the interpolation range, we take the last 2 points
139  if ( not found ) smaller_i = m_times_vect.size() - 2;
140 
141  // Case 2: we maeke a linear interpolation
142  // y=mx+q
143  const double x0 = m_times_vect[smaller_i];
144  const double x1 = m_times_vect[smaller_i + 1];
145  const double y0 = m_niters_vect[smaller_i];
146  const double y1 = m_niters_vect[smaller_i + 1];
147  const double m = ( y1 - y0 ) / ( x1 - x0 );
148  const double q = y0 - m * x0;
149 
150  const unsigned long nCaliIters = m * runtime + q;
151  // always() << x0 << "<" << runtime << "<" << x1 << " Corresponding to " << nCaliIters << " iterations" << endmsg;
152 
153  return nCaliIters;
154 }
155 
156 void CPUCruncher::findPrimes( const unsigned long int n_iterations )
157 {
158  // Flag to trigger the allocation
159  bool is_prime;
160 
161  // Let's prepare the material for the allocations
162  unsigned int primes_size = 1;
163  unsigned long* primes = new unsigned long[primes_size];
164  primes[0] = 2;
165 
166  unsigned long i = 2;
167 
168  // Loop on numbers
169  for ( unsigned long int iiter = 0; iiter < n_iterations; iiter++ ) {
170  // Once at max, it returns to 0
171  i += 1;
172 
173  // Check if it can be divided by the smaller ones
174  is_prime = true;
175  for ( unsigned long j = 2; j < i && is_prime; ++j ) {
176  if ( i % j == 0 ) is_prime = false;
177  } // end loop on numbers < than tested one
178 
179  if ( is_prime ) {
180  // copy the array of primes (INEFFICIENT ON PURPOSE!)
181  unsigned int new_primes_size = 1 + primes_size;
182  unsigned long* new_primes = new unsigned long[new_primes_size];
183 
184  for ( unsigned int prime_index = 0; prime_index < primes_size; prime_index++ ) {
185  new_primes[prime_index] = primes[prime_index];
186  }
187  // attach the last prime
188  new_primes[primes_size] = i;
189 
190  // Update primes array
191  delete[] primes;
192  primes = new_primes;
193  primes_size = new_primes_size;
194  } // end is prime
195 
196  } // end of while loop
197 
198  // Fool Compiler optimisations:
199  for ( unsigned int prime_index = 0; prime_index < primes_size; prime_index++ )
200  if ( primes[prime_index] == 4 )
201  debug() << "This does never happen, but it's necessary too fool aggressive compiler optimisations!" << endmsg;
202 
203  delete[] primes;
204 }
205 
206 //------------------------------------------------------------------------------
207 
208 StatusCode CPUCruncher::execute() // the execution of the algorithm
209 {
210  float crunchtime;
211 
212  if ( m_local_rndm_gen ) {
213  /* This will disappear with a thread safe random number generator svc
214  * Use box mueller to generate gaussian randoms
215  * The quality is not good for in depth study given that the generator is a
216  * linear congruent.
217  * Throw away basically a free number: we are in a cpu cruncher after all.
218  * The seed is taken from the clock, but we could assign a seed per module to
219  * ensure reproducibility.
220  *
221  * This is not an overkill but rather an exercise towards a thread safe
222  * random number generation.
223  */
224 
225  auto getGausRandom = []( double mean, double sigma ) -> double {
226 
227  unsigned int seed = std::clock();
228 
229  auto getUnifRandom = []( unsigned int& seed ) -> double {
230  // from numerical recipies
231  constexpr unsigned int m = 232;
232  constexpr unsigned int a = 1664525;
233  constexpr unsigned int c = 1013904223;
234  seed = ( a * seed + c ) % m;
235  const double unif = double( seed ) / m;
236  return unif;
237  };
238 
239  const double unif1 = getUnifRandom( seed );
240  const double unif2 = getUnifRandom( seed );
241  const double normal = sqrt( -2. * log( unif1 ) ) * cos( 2 * M_PI * unif2 );
242  return normal * sigma + mean;
243  };
244  crunchtime = fabs( getGausRandom( m_avg_runtime * ( 1. - m_sleepFraction ), m_var_runtime ) );
245  // End Of temp block
246  } else {
247  // Should be a member.
249  crunchtime = std::fabs( rndmgaus() );
250  }
251 
252  // Prepare to sleep (even if we won't enter the following if clause for sleeping).
253  // This is needed to distribute evenly among all algorithms the overhead (around sleeping) which is harmful when
254  // trying to achieve uniform distribution of algorithm timings.
255  const double dreamtime = m_avg_runtime * m_sleepFraction;
256  const std::chrono::duration<double> dreamtime_duration( dreamtime );
257  tbb::tick_count startSleeptbb;
258  tbb::tick_count endSleeptbb;
259 
260  // Start to measure the total time here, together with the dreaming process straight ahead
261  tbb::tick_count starttbb = tbb::tick_count::now();
262 
263  // If the algorithm was set as I/O-bound, we will replace requested part of crunching with plain sleeping
264  if ( isIOBound() ) {
265  // in this block (and not in other places around) msgLevel is checked for the same reason as above, when
266  // preparing to sleep several lines above: to reduce as much as possible the overhead around sleeping
267  DEBUG_MSG << "Dreaming time will be: " << dreamtime << endmsg;
268 
269  ON_DEBUG startSleeptbb = tbb::tick_count::now();
270  std::this_thread::sleep_for( dreamtime_duration );
271  ON_DEBUG endSleeptbb = tbb::tick_count::now();
272 
273  // actual sleeping time can be longer due to scheduling or resource contention delays
274  ON_DEBUG {
275  const double actualDreamTime = ( endSleeptbb - startSleeptbb ).seconds();
276  debug() << "Actual dreaming time was: " << actualDreamTime << "s" << endmsg;
277  }
278  } // end of "sleeping block"
279 
280  DEBUG_MSG << "Crunching time will be: " << crunchtime << endmsg;
282  DEBUG_MSG << "Start event " << context.evt() << " in slot " << context.slot()
283  << " on pthreadID " << std::hex << pthread_self() << std::dec << endmsg;
284 
285  VERBOSE_MSG << "inputs number: " << m_inputHandles.size() << endmsg;
286  for ( auto& inputHandle : m_inputHandles ) {
287  if ( !inputHandle->isValid() ) continue;
288 
289  VERBOSE_MSG << "get from TS: " << inputHandle->objKey() << endmsg;
290  DataObject* obj = nullptr;
291  for ( unsigned int i = 0; i < m_rwRepetitions; ++i ) {
292  obj = inputHandle->get();
293  }
294  if ( obj == nullptr ) error() << "A read object was a null pointer." << endmsg;
295  }
296 
297  const unsigned long n_iters = getNCaliIters( crunchtime );
298  findPrimes( n_iters );
299 
300  VERBOSE_MSG << "outputs number: " << m_outputHandles.size() << endmsg;
301  for ( auto& outputHandle : m_outputHandles ) {
302  if ( !outputHandle->isValid() ) continue;
303 
304  VERBOSE_MSG << "put to TS: " << outputHandle->objKey() << endmsg;
305  outputHandle->put( new DataObject() );
306  }
307 
308  tbb::tick_count endtbb = tbb::tick_count::now();
309 
310  const double actualRuntime = ( endtbb - starttbb ).seconds();
311 
312  DEBUG_MSG << "Finish event " << context.evt()
313  // << " on pthreadID " << context.m_thread_id
314  << " in " << actualRuntime << " seconds" << endmsg;
315 
316  DEBUG_MSG << "Timing: ExpectedCrunchtime= " << crunchtime << " ExpectedDreamtime= " << dreamtime
317  << " ActualTotalRuntime= " << actualRuntime << " Ratio= " << ( crunchtime + dreamtime ) / actualRuntime
318  << " Niters= " << n_iters << endmsg;
319 
320  return StatusCode::SUCCESS;
321 }
322 
323 //------------------------------------------------------------------------------
324 
325 StatusCode CPUCruncher::finalize() // the finalization of the algorithm
326 {
327  MsgStream log( msgSvc(), name() );
328 
329  unsigned int ninstances;
330 
331  {
332  CHM::const_accessor const_name_ninstances;
333  m_name_ncopies_map.find( const_name_ninstances, name() );
334  ninstances = const_name_ninstances->second;
335  }
336 
337  constexpr double s2ms = 1000.;
338  // do not show repetitions
339  if ( ninstances != 0 ) {
340  info() << "Summary: name= " << name() << "\t avg_runtime= " << m_avg_runtime * s2ms
341  << "\t n_clones= " << ninstances << endmsg;
342 
343  CHM::accessor name_ninstances;
344  m_name_ncopies_map.find( name_ninstances, name() );
345  name_ninstances->second = 0;
346  }
347 
348  return GaudiAlgorithm::finalize();
349 }
350 
351 //------------------------------------------------------------------------------
StatusCode execute() override
the execution of the algorithm
Definition of the MsgStream class used to transmit messages.
Definition: MsgStream.h:24
SmartIF< IRndmGenSvc > & randSvc() const
The standard RandomGen service, Return a pointer to the service if present.
Definition: Algorithm.cpp:780
Gaudi::Property< float > m_sleepFraction
Definition: CPUCruncher.h:62
The ISvcLocator is the interface implemented by the Service Factory in the Application Manager to loc...
Definition: ISvcLocator.h:25
const std::string & name() const override
The identifying name of the algorithm object.
Definition: Algorithm.cpp:715
A class that implements a search for prime numbers.
Definition: CPUCruncher.h:19
ContextID_t slot() const
Definition: EventContext.h:41
MsgStream & info() const
shortcut for the method msgStream(MSG::INFO)
void setIOBound(bool value)
Definition: Algorithm.h:467
StatusCode initialize() override
standard initialization method
virtual ~CPUCruncher()
virtual & protected desctrustor
Definition: CPUCruncher.cpp:40
T to_string(T...args)
void findPrimes(const unsigned long int)
The CPU intensive function.
T clock(T...args)
Gaudi::Property< unsigned int > m_rwRepetitions
Definition: CPUCruncher.h:61
T sleep_for(T...args)
const std::string & context() const
Returns the "context" string. Used to identify different processing states.
Definition: GaudiCommon.h:704
Parameters for the Gauss random number generation.
std::vector< DataObjectHandle< DataObject > * > m_outputHandles
Definition: CPUCruncher.h:75
void calibrate()
Calibrate.
Definition: CPUCruncher.cpp:86
This class represents an entry point to all the event specific data.
Definition: EventContext.h:25
long unsigned int getNCaliIters(double)
#define VERBOSE_MSG
Definition: CPUCruncher.cpp:21
#define DECLARE_COMPONENT(type)
Definition: PluginService.h:36
T resize(T...args)
ContextEvt_t evt() const
Definition: EventContext.h:40
STL class.
tbb::concurrent_hash_map< std::string, unsigned int > CHM
Definition: CPUCruncher.h:23
T push_back(T...args)
MsgStream & error() const
shortcut for the method msgStream(MSG::ERROR)
static std::vector< unsigned int > m_niters_vect
Definition: CPUCruncher.h:67
static CHM m_name_ncopies_map
Definition: CPUCruncher.h:77
This class is used for returning status codes from appropriate routines.
Definition: StatusCode.h:26
constexpr double m
Definition: SystemOfUnits.h:93
StatusCode finalize() override
standard finalization method
The useful base class for data processing algorithms.
std::vector< DataObjectHandle< DataObject > * > m_inputHandles
Definition: CPUCruncher.h:74
GAUDI_API const EventContext & currentContext()
T fabs(T...args)
static std::vector< double > m_times_vect
Definition: CPUCruncher.h:68
#define DEBUG_MSG
Definition: CPUCruncher.cpp:18
T size(T...args)
Gaudi::Property< bool > m_shortCalib
Definition: CPUCruncher.h:60
MsgStream & debug() const
shortcut for the method msgStream(MSG::DEBUG)
Gaudi::Property< double > m_avg_runtime
Definition: CPUCruncher.h:57
bool isIOBound() const
Definition: Algorithm.h:465
StatusCode initialize() override
Its initialization.
Definition: CPUCruncher.cpp:47
T back(T...args)
CPUCruncher()
the default constructor is disabled
SmartIF< IMessageSvc > & msgSvc() const
The standard message service.
T hex(T...args)
Gaudi::Property< std::vector< std::string > > m_outKeys
Definition: CPUCruncher.h:55
Gaudi::Details::PropertyBase * declareProperty(const std::string &name, ToolHandle< T > &hndl, const std::string &doc="none")
Definition: Algorithm.h:366
Gaudi::Property< std::vector< std::string > > m_inpKeys
Definition: CPUCruncher.h:54
A DataObject is the base class of any identifiable object on any data store.
Definition: DataObject.h:30
StatusCode finalize() override
the finalization of the algorithm
MsgStream & endmsg(MsgStream &s)
MsgStream Modifier: endmsg. Calls the output method of the MsgStream.
Definition: MsgStream.h:244
Gaudi::Property< bool > m_local_rndm_gen
Definition: CPUCruncher.h:59
Gaudi::Property< double > m_var_runtime
Definition: CPUCruncher.h:58
#define ON_DEBUG
Definition: CPUCruncher.cpp:17