All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
CPUCruncher.cpp
Go to the documentation of this file.
1 #include "CPUCruncher.h"
2 #include "HiveNumbers.h"
3 #include <ctime>
4 #include <sys/resource.h>
5 #include <sys/times.h>
6 
7 #include <tbb/tick_count.h>
8 #include <thread>
9 
10 std::vector<unsigned int> CPUCruncher::m_niters_vect;
11 std::vector<double> CPUCruncher::m_times_vect;
13 
15 
16 //------------------------------------------------------------------------------
17 
18 CPUCruncher::CPUCruncher(const std::string& name, // the algorithm instance name
19  ISvcLocator*pSvc) :
20  GaudiAlgorithm(name, pSvc), m_avg_runtime(1.), m_var_runtime(.01), m_shortCalib(
21  false) {
22 
23  // For Concurrent run
24  m_inputHandles.resize(MAX_INPUTS);
25  for (uint i = 0; i < MAX_INPUTS; ++i){
26  m_inputHandles[i] = new DataObjectHandle<DataObject>();
27  declareInput("input_" + std::to_string(i), *m_inputHandles[i]);
28  }
29 
30  m_outputHandles.resize(MAX_OUTPUTS);
31  for (uint i = 0; i < MAX_OUTPUTS; ++i){
32  m_outputHandles[i] = new DataObjectHandle<DataObject>();
33  declareOutput("output_" + std::to_string(i), *m_outputHandles[i]);
34  }
35 
36  declareProperty("avgRuntime", m_avg_runtime,
37  "Average runtime of the module.");
38  declareProperty("varRuntime", m_var_runtime,
39  "Variance of the runtime of the module.");
40  declareProperty("localRndm", m_local_rndm_gen = true,
41  "Decide if the local random generator is to be used");
42  declareProperty("NIterationsVect", m_niters_vect,
43  "Number of iterations for the calibration.");
44  declareProperty("NTimesVect", m_times_vect,
45  "Number of seconds for the calibration.");
46  declareProperty("shortCalib", m_shortCalib = false,
47  "Enable coarse grained calibration");
48  declareProperty("RwRepetitions", m_rwRepetitions = 1,
49  "Increase access to the WB");
50  declareProperty("SleepyExecution", m_sleepyExecution = false,
51  "Sleep during execution instead of crunching");
52 
53  // Register the algo in the static concurrent hash map in order to
54  // monitor the # of copies
55  CHM::accessor name_ninstances;
56  m_name_ncopies_map.insert(name_ninstances, name);
57  name_ninstances->second += 1;
58 }
59 
61  for (uint i = 0; i < MAX_INPUTS; ++i) {
62  delete m_inputHandles[i];
63  }
64 
66  for (uint i = 0; i < MAX_OUTPUTS; ++i) {
67  delete m_outputHandles[i];
68  }
69 }
70 
72  if (m_times_vect.size()==0){
73  calibrate();
74  }
75 
76  return StatusCode::SUCCESS ;
77 }
78 
79 /*
80 Calibrate the crunching finding the right relation between max number to be searched and time spent.
81 The relation is a sqrt for times greater than 10^-4 seconds.
82 */
84 
85  MsgStream log(msgSvc(), name());
86  m_niters_vect.push_back(0);
87  m_niters_vect.push_back(500);
88  m_niters_vect.push_back(600);
89  m_niters_vect.push_back(700);
90  m_niters_vect.push_back(800);
91  m_niters_vect.push_back(1000);
92  m_niters_vect.push_back(1300);
93  m_niters_vect.push_back(1600);
94  m_niters_vect.push_back(2000);
95  m_niters_vect.push_back(2300);
96  m_niters_vect.push_back(2600);
97  m_niters_vect.push_back(3000);
98  m_niters_vect.push_back(3300);
99  m_niters_vect.push_back(3500);
100  m_niters_vect.push_back(3900);
101  m_niters_vect.push_back(4200);
102  m_niters_vect.push_back(5000);
103  m_niters_vect.push_back(6000);
104  m_niters_vect.push_back(8000);
105  m_niters_vect.push_back(10000);
106  m_niters_vect.push_back(12000);
107  m_niters_vect.push_back(15000);
108  m_niters_vect.push_back(17000);
109  m_niters_vect.push_back(20000);
110  m_niters_vect.push_back(25000);
111  m_niters_vect.push_back(30000);
112  m_niters_vect.push_back(35000);
113  m_niters_vect.push_back(40000);
114  m_niters_vect.push_back(60000);
115  if (!m_shortCalib){
116  m_niters_vect.push_back(100000);
117  m_niters_vect.push_back(200000);
118  }
119 
120 
121  m_times_vect.resize(m_niters_vect.size());
122  m_times_vect[0]=0.;
123 
124 
125  log << MSG::INFO << "Starting calibration..." << endmsg;
126  for (unsigned int i=1;i<m_niters_vect.size();++i){
127  unsigned long niters=m_niters_vect[i];
128  unsigned int trials = 30;
129  do{
130  auto start_cali=tbb::tick_count::now();
131  findPrimes(niters);
132  auto stop_cali=tbb::tick_count::now();
133  double deltat = (stop_cali-start_cali).seconds();
134  m_times_vect[i]=deltat;
135  log << MSG::DEBUG << "Calibration: # iters = " << niters << " => " << deltat << endmsg;
136  trials--;
137  } while(trials > 0 and m_times_vect[i]<m_times_vect[i-1]); // make sure that they are monotonic
138  }
139  log << MSG::INFO << "Calibration finished!" << endmsg;
140 }
141 
142 unsigned long CPUCruncher::getNCaliIters(double runtime){
143 
144  unsigned int smaller_i=0;
145  double time=0.;
146  bool found=false;
147  // We know that the first entry is 0, so we start to iterate from 1
148  for (unsigned int i=1;i<m_times_vect.size();i++){
149  time = m_times_vect[i];
150  if (time>runtime){
151  smaller_i=i-1;
152  found=true;
153  break;
154  }
155  }
156 
157  // Case 1: we are outside the interpolation range, we take the last 2 points
158  if (not found)
159  smaller_i=m_times_vect.size()-2;
160 
161  // Case 2: we maeke a linear interpolation
162  // y=mx+q
163  const double x0=m_times_vect[smaller_i];
164  const double x1=m_times_vect[smaller_i+1];
165  const double y0=m_niters_vect[smaller_i];
166  const double y1=m_niters_vect[smaller_i+1];
167  const double m=(y1-y0)/(x1-x0);
168  const double q=y0-m*x0;
169 
170  const unsigned long nCaliIters = m * runtime + q ;
171  //always() << x0 << "<" << runtime << "<" << x1 << " Corresponding to " << nCaliIters << " iterations" << endmsg;
172 
173  return nCaliIters ;
174 }
175 
176 
177 void CPUCruncher::findPrimes (const unsigned long int n_iterations) {
178 
179 
180  MsgStream log(msgSvc(), name());
181 
182  // Flag to trigger the allocation
183  bool is_prime;
184 
185  // Let's prepare the material for the allocations
186  unsigned int primes_size=1;
187  unsigned long* primes = new unsigned long[primes_size];
188  primes[0]=2;
189 
190  unsigned long i = 2;
191 
192  // Loop on numbers
193  for (unsigned long int iiter=0;iiter<n_iterations;iiter++ ){
194  // Once at max, it returns to 0
195  i+=1;
196 
197  // Check if it can be divided by the smaller ones
198  is_prime = true;
199  for (unsigned long j=2;j<i && is_prime;++j){
200  if (i%j == 0){
201  is_prime = false;
202  }
203  }// end loop on numbers < than tested one
204  if (is_prime){
205  // copy the array of primes (INEFFICIENT ON PURPOSE!)
206  unsigned int new_primes_size = 1 + primes_size;
207  unsigned long* new_primes = new unsigned long[new_primes_size];
208 
209  for (unsigned int prime_index=0; prime_index<primes_size;prime_index++){
210  new_primes[prime_index]=primes[prime_index];
211  }
212  // attach the last prime
213  new_primes[primes_size]=i;
214 
215  // Update primes array
216  delete[] primes;
217  primes = new_primes;
218  primes_size=new_primes_size;
219  } // end is prime
220 
221  } // end of while loop
222 
223  // Fool Compiler optimisations:
224  for (unsigned int prime_index=0; prime_index<primes_size;prime_index++)
225  if (primes[prime_index] == 4)
226  log << "This does never happen, but it's necessary too fool aggressive compiler optimisations!"<< endmsg ;
227 
228  delete[] primes;
229 
230 }
231 
232 //------------------------------------------------------------------------------
233 
234 StatusCode CPUCruncher::execute () // the execution of the algorithm
235 {
236 
237  MsgStream logstream(msgSvc(), name());
238 
239  if (m_sleepyExecution) {
240  logstream << MSG::DEBUG << "Going to dream for: "<< m_avg_runtime << endmsg;
241  std::chrono::duration<double> dreamtime( m_avg_runtime );
242 
243  tbb::tick_count starttbb=tbb::tick_count::now();
244  std::this_thread::sleep_for(dreamtime);
245  tbb::tick_count endtbb=tbb::tick_count::now();
246  // actual sleeping time can be longer due to scheduling or resource contention delays
247  const double actualDreamTime=(endtbb-starttbb).seconds();
248 
249  logstream << MSG::DEBUG << "Actual dreaming time was: "<< actualDreamTime << "s" << endmsg;
250 
251  return StatusCode::SUCCESS;
252  }
253 
254  float runtime;
255 
256  if (m_local_rndm_gen){
257  /* This will disappear with a thread safe random number generator svc
258  * Use box mueller to generate gaussian randoms
259  * The quality is not good for in depth study given that the generator is a
260  * linear congruent.
261  * Throw away basically a free number: we are in a cpu cruncher after all.
262  * The seed is taken from the clock, but we could assign a seed per module to
263  * ensure reproducibility.
264  *
265  * This is not an overkill but rather an exercise towards a thread safe
266  * random number generation.
267  */
268 
269  auto getGausRandom = [] (double mean, double sigma) -> double {
270 
271  unsigned int seed = std::clock();
272 
273  auto getUnifRandom = [] (unsigned int & seed) ->double {
274  // from numerical recipies
275  constexpr unsigned int m = 232;
276  constexpr unsigned int a = 1664525;
277  constexpr unsigned int c = 1013904223;
278  seed = (a * seed + c) % m;
279  const double unif = double(seed) / m;
280  return unif;
281  };
282 
283  const double unif1 = getUnifRandom(seed);
284  const double unif2 = getUnifRandom(seed);
285  const double normal = sqrt(-2.*log(unif1))*cos(2*M_PI*unif2);
286  return normal*sigma + mean;
287  };
288  runtime = fabs(getGausRandom( m_avg_runtime , m_var_runtime ));
289  //End Of temp block
290  } else {
291  // Should be a member.
293  runtime = std::fabs(rndmgaus());
294  }
295 
296  tbb::tick_count starttbb=tbb::tick_count::now();
297  logstream << MSG::DEBUG << "Runtime will be: "<< runtime << endmsg;
298  if (getContext())
299  logstream << MSG::DEBUG << "Start event " << getContext()->evt()
300  << " in slot " << getContext()->slot()
301  << " on pthreadID " << std::hex << pthread_self() << std::dec
302  << endmsg;
303 
304  for (auto & inputHandle: m_inputHandles){
305  if(!inputHandle->isValid())
306  continue;
307 
308  DataObject* obj = nullptr;
309  for (unsigned int i=0; i<m_rwRepetitions;++i){
310  obj = inputHandle->get();
311  }
312  if (obj == nullptr)
313  logstream << MSG::ERROR << "A read object was a null pointer." << endmsg;
314  }
315 
316  const unsigned long n_iters= getNCaliIters(runtime);
317  findPrimes( n_iters );
318 
319  for (auto & outputHandle: m_outputHandles){
320  if(!outputHandle->isValid())
321  continue;
322 
323  outputHandle->put(new DataObject());
324  }
325 
326  for (auto & inputHandle: m_inputHandles){
327  if(!inputHandle->isValid())
328  continue;
329 
330  for (unsigned int i=1; i<m_rwRepetitions;++i){
331  inputHandle->get();
332  }
333  }
334 
335  tbb::tick_count endtbb=tbb::tick_count::now();
336 
337  const double actualRuntime=(endtbb-starttbb).seconds();
338 
339  if (getContext())
340  logstream << MSG::DEBUG << "Finish event " << getContext()->evt()
341  // << " on pthreadID " << getContext()->m_thread_id
342  << " in " << actualRuntime << " seconds" << endmsg;
343 
344  logstream << MSG::DEBUG << "Timing: ExpectedRuntime= " << runtime
345  << " ActualRuntime= " << actualRuntime
346  << " Ratio= " << runtime/actualRuntime
347  << " Niters= " << n_iters << endmsg;
348 
349 
350  return StatusCode::SUCCESS ;
351 }
352 
353 //------------------------------------------------------------------------------
354 
355 StatusCode CPUCruncher::finalize () // the finalization of the algorithm
356 {
357  MsgStream log(msgSvc(), name());
358 
359  unsigned int ninstances;
360 
361  {
362  CHM::const_accessor const_name_ninstances;
363  m_name_ncopies_map.find(const_name_ninstances,name());
364  ninstances=const_name_ninstances->second;
365  }
366 
367  constexpr double s2ms=1000.;
368  // do not show repetitions
369  if (ninstances!=0){
370  log << MSG::INFO << "Summary: name= "<< name()
371  <<"\t avg_runtime= " << m_avg_runtime*s2ms
372  << "\t n_clones= " << ninstances << endmsg;
373 
374  CHM::accessor name_ninstances;
375  m_name_ncopies_map.find(name_ninstances,name());
376  name_ninstances->second=0;
377  }
378 
379  return GaudiAlgorithm::finalize () ;
380 }
381 
382 //------------------------------------------------------------------------------
383 
384 const std::vector<std::string>
386 {
387  std::vector<std::string> di;
388  for (auto & h: m_inputHandles){
389  if(h->isValid())
390  di.push_back(h->dataProductName());
391  }
392 
393  return di;
394 }
395 
396 //------------------------------------------------------------------------------
397 
398 const std::vector<std::string>
400 {
401  std::vector<std::string> di;
402  for (auto & h: m_outputHandles){
403  if(h->isValid())
404  di.push_back(h->dataProductName());
405  }
406 
407  return di;
408 }
409 
410 //------------------------------------------------------------------------------
411 
412 
413 
tuple c
Definition: gaudirun.py:391
bool m_sleepyExecution
Definition: CPUCruncher.h:87
Definition of the MsgStream class used to transmit messages.
Definition: MsgStream.h:24
string to_string(const T &value)
Definition: mergesort.cpp:40
#define DECLARE_ALGORITHM_FACTORY(x)
Definition: Algorithm.h:946
The ISvcLocator is the interface implemented by the Service Factory in the Application Manager to loc...
Definition: ISvcLocator.h:25
virtual StatusCode initialize()
Its initialization.
Definition: CPUCruncher.cpp:71
MsgStream & endmsg(MsgStream &s)
MsgStream Modifier: endmsg. Calls the output method of the MsgStream.
Definition: MsgStream.h:244
bool m_shortCalib
Definition: CPUCruncher.h:68
const uint MAX_OUTPUTS
Definition: CPUCruncher.h:77
SmartIF< IMessageSvc > & msgSvc() const
The standard message service.
virtual ~CPUCruncher()
virtual & protected desctrustor
Definition: CPUCruncher.cpp:60
void findPrimes(const unsigned long int)
The CPU intensive function.
double m_avg_runtime
Definition: CPUCruncher.h:65
STL namespace.
Parameters for the Gauss random number generation.
void calibrate()
Calibrate.
Definition: CPUCruncher.cpp:83
virtual const std::vector< std::string > get_inputs()
Get the inputs.
virtual StatusCode execute()
the execution of the algorithm
long unsigned int getNCaliIters(double)
virtual StatusCode finalize()
the finalization of the algorithm
static std::vector< unsigned int > m_niters_vect
Definition: CPUCruncher.h:71
static CHM m_name_ncopies_map
Definition: CPUCruncher.h:84
This class is used for returning status codes from appropriate routines.
Definition: StatusCode.h:26
constexpr double m
Definition: SystemOfUnits.h:93
StatusCode finalize() override
standard finalization method
The useful base class for data processing algorithms.
static std::vector< double > m_times_vect
Definition: CPUCruncher.h:72
std::vector< DataObjectHandle< DataObject > * > m_inputHandles
Definition: CPUCruncher.h:79
unsigned int m_rwRepetitions
Definition: CPUCruncher.h:82
const uint MAX_INPUTS
Definition: CPUCruncher.h:76
virtual const std::vector< std::string > get_outputs()
Get the outputs.
bool m_local_rndm_gen
Definition: CPUCruncher.h:67
std::vector< DataObjectHandle< DataObject > * > m_outputHandles
Definition: CPUCruncher.h:80
A DataObject is the base class of any identifiable object on any data store.
Definition: DataObject.h:30
list i
Definition: ana.py:128
double m_var_runtime
Definition: CPUCruncher.h:66
tbb::concurrent_hash_map< std::string, unsigned int > CHM
Definition: CPUCruncher.h:26