CPUCruncher.cpp
Go to the documentation of this file.
1 #include "CPUCruncher.h"
2 #include "HiveNumbers.h"
3 #include <ctime>
4 #include <sys/resource.h>
5 #include <sys/times.h>
6 
7 #include <tbb/tick_count.h>
8 #include <thread>
9 
13 
14 //DECLARE_ALGORITHM_FACTORY(CPUCruncher)
16 
17 //------------------------------------------------------------------------------
18 
19 CPUCruncher::CPUCruncher(const std::string& name, // the algorithm instance name
20  ISvcLocator*pSvc) :
21  GaudiAlgorithm(name, pSvc), m_avg_runtime(1.), m_var_runtime(.01), m_shortCalib(
22  false) {
23 
24  declareProperty("inpKeys", m_inpKeys);
25  declareProperty("outKeys", m_outKeys);
26 
27 
28  declareProperty("avgRuntime", m_avg_runtime,
29  "Average runtime of the module.");
30  declareProperty("varRuntime", m_var_runtime,
31  "Variance of the runtime of the module.");
32  declareProperty("localRndm", m_local_rndm_gen = true,
33  "Decide if the local random generator is to be used");
34  declareProperty("NIterationsVect", m_niters_vect,
35  "Number of iterations for the calibration.");
36  declareProperty("NTimesVect", m_times_vect,
37  "Number of seconds for the calibration.");
38  declareProperty("shortCalib", m_shortCalib = false,
39  "Enable coarse grained calibration");
40  declareProperty("RwRepetitions", m_rwRepetitions = 1,
41  "Increase access to the WB");
42  declareProperty("SleepyExecution", m_sleepyExecution = false,
43  "Sleep during execution instead of crunching");
44 
45  // Register the algo in the static concurrent hash map in order to
46  // monitor the # of copies
47  CHM::accessor name_ninstances;
48  m_name_ncopies_map.insert(name_ninstances, name);
49  name_ninstances->second += 1;
50 }
51 
53  for (uint i = 0; i < m_inputHandles.size(); ++i) {
54  delete m_inputHandles[i];
55  }
56 
58  for (uint i = 0; i < m_outputHandles.size(); ++i) {
59  delete m_outputHandles[i];
60  }
61 }
62 
64  if (m_times_vect.size()==0){
65  calibrate();
66  }
67 
68  // This is a bit ugly. There is no way to declare a vector of DataObjectHandles, so
69  // we need to wait until initialize when we've read in the input and output key
70  // properties, and know their size, and then turn them
71  // into Handles and register them with the framework by calling declareProperty. We
72  // could call declareInput/declareOutput on them too.
73 
74  int i=0;
75  for (auto k: m_inpKeys) {
76  debug() << "adding input key " << k << endmsg;
78  declareProperty("dummy_in_" + std::to_string(i), *(m_inputHandles.back()) );
79  i++;
80  }
81 
82  i = 0;
83  for (auto k: m_outKeys) {
84  debug() << "adding output key " << k << endmsg;
86  declareProperty("dummy_out_" + std::to_string(i), *(m_outputHandles.back()) );
87  i++;
88  }
89 
90  return StatusCode::SUCCESS ;
91 }
92 
93 /*
94 Calibrate the crunching finding the right relation between max number to be searched and time spent.
95 The relation is a sqrt for times greater than 10^-4 seconds.
96 */
98 
99  MsgStream log(msgSvc(), name());
105  m_niters_vect.push_back(1000);
106  m_niters_vect.push_back(1300);
107  m_niters_vect.push_back(1600);
108  m_niters_vect.push_back(2000);
109  m_niters_vect.push_back(2300);
110  m_niters_vect.push_back(2600);
111  m_niters_vect.push_back(3000);
112  m_niters_vect.push_back(3300);
113  m_niters_vect.push_back(3500);
114  m_niters_vect.push_back(3900);
115  m_niters_vect.push_back(4200);
116  m_niters_vect.push_back(5000);
117  m_niters_vect.push_back(6000);
118  m_niters_vect.push_back(8000);
119  m_niters_vect.push_back(10000);
120  m_niters_vect.push_back(12000);
121  m_niters_vect.push_back(15000);
122  m_niters_vect.push_back(17000);
123  m_niters_vect.push_back(20000);
124  m_niters_vect.push_back(25000);
125  m_niters_vect.push_back(30000);
126  m_niters_vect.push_back(35000);
127  m_niters_vect.push_back(40000);
128  m_niters_vect.push_back(60000);
129  if (!m_shortCalib){
130  m_niters_vect.push_back(100000);
131  m_niters_vect.push_back(200000);
132  }
133 
134 
136  m_times_vect[0]=0.;
137 
138 
139  log << MSG::INFO << "Starting calibration..." << endmsg;
140  for (unsigned int i=1;i<m_niters_vect.size();++i){
141  unsigned long niters=m_niters_vect[i];
142  unsigned int trials = 30;
143  do{
144  auto start_cali=tbb::tick_count::now();
145  findPrimes(niters);
146  auto stop_cali=tbb::tick_count::now();
147  double deltat = (stop_cali-start_cali).seconds();
148  m_times_vect[i]=deltat;
149  log << MSG::DEBUG << "Calibration: # iters = " << niters << " => " << deltat << endmsg;
150  trials--;
151  } while(trials > 0 and m_times_vect[i]<m_times_vect[i-1]); // make sure that they are monotonic
152  }
153  log << MSG::INFO << "Calibration finished!" << endmsg;
154 }
155 
156 unsigned long CPUCruncher::getNCaliIters(double runtime){
157 
158  unsigned int smaller_i=0;
159  double time=0.;
160  bool found=false;
161  // We know that the first entry is 0, so we start to iterate from 1
162  for (unsigned int i=1;i<m_times_vect.size();i++){
163  time = m_times_vect[i];
164  if (time>runtime){
165  smaller_i=i-1;
166  found=true;
167  break;
168  }
169  }
170 
171  // Case 1: we are outside the interpolation range, we take the last 2 points
172  if (not found)
173  smaller_i=m_times_vect.size()-2;
174 
175  // Case 2: we maeke a linear interpolation
176  // y=mx+q
177  const double x0=m_times_vect[smaller_i];
178  const double x1=m_times_vect[smaller_i+1];
179  const double y0=m_niters_vect[smaller_i];
180  const double y1=m_niters_vect[smaller_i+1];
181  const double m=(y1-y0)/(x1-x0);
182  const double q=y0-m*x0;
183 
184  const unsigned long nCaliIters = m * runtime + q ;
185  //always() << x0 << "<" << runtime << "<" << x1 << " Corresponding to " << nCaliIters << " iterations" << endmsg;
186 
187  return nCaliIters ;
188 }
189 
190 
191 void CPUCruncher::findPrimes (const unsigned long int n_iterations) {
192 
193 
194  MsgStream log(msgSvc(), name());
195 
196  // Flag to trigger the allocation
197  bool is_prime;
198 
199  // Let's prepare the material for the allocations
200  unsigned int primes_size=1;
201  unsigned long* primes = new unsigned long[primes_size];
202  primes[0]=2;
203 
204  unsigned long i = 2;
205 
206  // Loop on numbers
207  for (unsigned long int iiter=0;iiter<n_iterations;iiter++ ){
208  // Once at max, it returns to 0
209  i+=1;
210 
211  // Check if it can be divided by the smaller ones
212  is_prime = true;
213  for (unsigned long j=2;j<i && is_prime;++j){
214  if (i%j == 0){
215  is_prime = false;
216  }
217  }// end loop on numbers < than tested one
218  if (is_prime){
219  // copy the array of primes (INEFFICIENT ON PURPOSE!)
220  unsigned int new_primes_size = 1 + primes_size;
221  unsigned long* new_primes = new unsigned long[new_primes_size];
222 
223  for (unsigned int prime_index=0; prime_index<primes_size;prime_index++){
224  new_primes[prime_index]=primes[prime_index];
225  }
226  // attach the last prime
227  new_primes[primes_size]=i;
228 
229  // Update primes array
230  delete[] primes;
231  primes = new_primes;
232  primes_size=new_primes_size;
233  } // end is prime
234 
235  } // end of while loop
236 
237  // Fool Compiler optimisations:
238  for (unsigned int prime_index=0; prime_index<primes_size;prime_index++)
239  if (primes[prime_index] == 4)
240  log << "This does never happen, but it's necessary too fool aggressive compiler optimisations!"<< endmsg ;
241 
242  delete[] primes;
243 
244 }
245 
246 //------------------------------------------------------------------------------
247 
248 StatusCode CPUCruncher::execute () // the execution of the algorithm
249 {
250 
251  MsgStream logstream(msgSvc(), name());
252 
253  if (m_sleepyExecution) {
254  logstream << MSG::DEBUG << "Going to dream for: "<< m_avg_runtime << endmsg;
256 
257  tbb::tick_count starttbb=tbb::tick_count::now();
258  std::this_thread::sleep_for(dreamtime);
259  tbb::tick_count endtbb=tbb::tick_count::now();
260  // actual sleeping time can be longer due to scheduling or resource contention delays
261  const double actualDreamTime=(endtbb-starttbb).seconds();
262 
263  logstream << MSG::DEBUG << "Actual dreaming time was: "<< actualDreamTime << "s" << endmsg;
264 
265  return StatusCode::SUCCESS;
266  }
267 
268  float runtime;
269 
270  if (m_local_rndm_gen){
271  /* This will disappear with a thread safe random number generator svc
272  * Use box mueller to generate gaussian randoms
273  * The quality is not good for in depth study given that the generator is a
274  * linear congruent.
275  * Throw away basically a free number: we are in a cpu cruncher after all.
276  * The seed is taken from the clock, but we could assign a seed per module to
277  * ensure reproducibility.
278  *
279  * This is not an overkill but rather an exercise towards a thread safe
280  * random number generation.
281  */
282 
283  auto getGausRandom = [] (double mean, double sigma) -> double {
284 
285  unsigned int seed = std::clock();
286 
287  auto getUnifRandom = [] (unsigned int & seed) ->double {
288  // from numerical recipies
289  constexpr unsigned int m = 232;
290  constexpr unsigned int a = 1664525;
291  constexpr unsigned int c = 1013904223;
292  seed = (a * seed + c) % m;
293  const double unif = double(seed) / m;
294  return unif;
295  };
296 
297  const double unif1 = getUnifRandom(seed);
298  const double unif2 = getUnifRandom(seed);
299  const double normal = sqrt(-2.*log(unif1))*cos(2*M_PI*unif2);
300  return normal*sigma + mean;
301  };
302  runtime = fabs(getGausRandom( m_avg_runtime , m_var_runtime ));
303  //End Of temp block
304  } else {
305  // Should be a member.
307  runtime = std::fabs(rndmgaus());
308  }
309 
310  tbb::tick_count starttbb=tbb::tick_count::now();
311  logstream << MSG::DEBUG << "Runtime will be: "<< runtime << endmsg;
312  if (getContext())
313  logstream << MSG::DEBUG << "Start event " << getContext()->evt()
314  << " in slot " << getContext()->slot()
315  << " on pthreadID " << std::hex << pthread_self() << std::dec
316  << endmsg;
317 
318  for (auto & inputHandle: m_inputHandles){
319  if(!inputHandle->isValid())
320  continue;
321 
322  DataObject* obj = nullptr;
323  for (unsigned int i=0; i<m_rwRepetitions;++i){
324  obj = inputHandle->get();
325  }
326  if (obj == nullptr)
327  logstream << MSG::ERROR << "A read object was a null pointer." << endmsg;
328  }
329 
330  const unsigned long n_iters= getNCaliIters(runtime);
331  findPrimes( n_iters );
332 
333  for (auto & outputHandle: m_outputHandles){
334  if(!outputHandle->isValid())
335  continue;
336 
337  outputHandle->put(new DataObject());
338  }
339 
340  for (auto & inputHandle: m_inputHandles){
341  if(!inputHandle->isValid())
342  continue;
343 
344  for (unsigned int i=1; i<m_rwRepetitions;++i){
345  inputHandle->get();
346  }
347  }
348 
349  tbb::tick_count endtbb=tbb::tick_count::now();
350 
351  const double actualRuntime=(endtbb-starttbb).seconds();
352 
353  if (getContext())
354  logstream << MSG::DEBUG << "Finish event " << getContext()->evt()
355  // << " on pthreadID " << getContext()->m_thread_id
356  << " in " << actualRuntime << " seconds" << endmsg;
357 
358  logstream << MSG::DEBUG << "Timing: ExpectedRuntime= " << runtime
359  << " ActualRuntime= " << actualRuntime
360  << " Ratio= " << runtime/actualRuntime
361  << " Niters= " << n_iters << endmsg;
362 
363 
364  return StatusCode::SUCCESS ;
365 }
366 
367 //------------------------------------------------------------------------------
368 
369 StatusCode CPUCruncher::finalize () // the finalization of the algorithm
370 {
371  MsgStream log(msgSvc(), name());
372 
373  unsigned int ninstances;
374 
375  {
376  CHM::const_accessor const_name_ninstances;
377  m_name_ncopies_map.find(const_name_ninstances,name());
378  ninstances=const_name_ninstances->second;
379  }
380 
381  constexpr double s2ms=1000.;
382  // do not show repetitions
383  if (ninstances!=0){
384  log << MSG::INFO << "Summary: name= "<< name()
385  <<"\t avg_runtime= " << m_avg_runtime*s2ms
386  << "\t n_clones= " << ninstances << endmsg;
387 
388  CHM::accessor name_ninstances;
389  m_name_ncopies_map.find(name_ninstances,name());
390  name_ninstances->second=0;
391  }
392 
393  return GaudiAlgorithm::finalize () ;
394 }
395 
396 //------------------------------------------------------------------------------
397 
398 
399 
bool m_sleepyExecution
Definition: CPUCruncher.h:81
Definition of the MsgStream class used to transmit messages.
Definition: MsgStream.h:24
The ISvcLocator is the interface implemented by the Service Factory in the Application Manager to loc...
Definition: ISvcLocator.h:25
virtual StatusCode initialize()
Its initialization.
Definition: CPUCruncher.cpp:63
bool m_shortCalib
Definition: CPUCruncher.h:60
ContextID_t slot() const
Definition: EventContext.h:41
const uint MAX_OUTPUTS
Definition: CPUCruncher.h:69
tuple c
Definition: gaudirun.py:391
virtual ~CPUCruncher()
virtual & protected desctrustor
Definition: CPUCruncher.cpp:52
T to_string(T...args)
void findPrimes(const unsigned long int)
The CPU intensive function.
T clock(T...args)
double m_avg_runtime
Definition: CPUCruncher.h:57
STL namespace.
T sleep_for(T...args)
Property * declareProperty(const std::string &name, DataObjectHandle< T > &hndl, const std::string &doc="none") const
Parameters for the Gauss random number generation.
EventContext * getContext() const
get the context
Definition: Algorithm.h:571
void calibrate()
Calibrate.
Definition: CPUCruncher.cpp:97
virtual StatusCode execute()
the execution of the algorithm
long unsigned int getNCaliIters(double)
#define DECLARE_COMPONENT(type)
Definition: PluginService.h:36
T resize(T...args)
ContextEvt_t evt() const
Definition: EventContext.h:40
const std::string & name() const override
The identifying name of the algorithm object.
Definition: Algorithm.cpp:820
virtual StatusCode finalize()
the finalization of the algorithm
T push_back(T...args)
static std::vector< unsigned int > m_niters_vect
Definition: CPUCruncher.h:63
static CHM m_name_ncopies_map
Definition: CPUCruncher.h:78
This class is used for returning status codes from appropriate routines.
Definition: StatusCode.h:26
constexpr double m
Definition: SystemOfUnits.h:93
StatusCode finalize() override
standard finalization method
The useful base class for data processing algorithms.
SmartIF< IRndmGenSvc > & randSvc() const
AIDA-based NTuple service Returns a pointer to the AIDATuple service if present.
T fabs(T...args)
static std::vector< double > m_times_vect
Definition: CPUCruncher.h:64
std::vector< DataObjectHandle< DataObject > * > m_inputHandles
Definition: CPUCruncher.h:71
T size(T...args)
MsgStream & debug() const
shortcut for the method msgStream(MSG::DEBUG)
T back(T...args)
SmartIF< IMessageSvc > & msgSvc() const
The standard message service.
T hex(T...args)
std::vector< std::string > m_outKeys
Definition: CPUCruncher.h:74
unsigned int m_rwRepetitions
Definition: CPUCruncher.h:76
std::vector< std::string > m_inpKeys
Definition: CPUCruncher.h:74
bool m_local_rndm_gen
Definition: CPUCruncher.h:59
std::vector< DataObjectHandle< DataObject > * > m_outputHandles
Definition: CPUCruncher.h:72
A DataObject is the base class of any identifiable object on any data store.
Definition: DataObject.h:30
list i
Definition: ana.py:128
double m_var_runtime
Definition: CPUCruncher.h:58
tbb::concurrent_hash_map< std::string, unsigned int > CHM
Definition: CPUCruncher.h:24
MsgStream & endmsg(MsgStream &s)
MsgStream Modifier: endmsg. Calls the output method of the MsgStream.
Definition: MsgStream.h:244