pfm_gen_analysis.cpp
Go to the documentation of this file.
1 /*
2 Name: pfm_analysis.cpp
3 Author: Daniele Francesco Kruse
4 E-mail: daniele.francesco.kruse@cern.ch
5 Version: 0.9 (16/02/2010)
6 
7 This code is responsible for analysing results generated by the PerfmonService of CMSSW.
8 It takes 42 files as input (21 simple text files and 21 gzipped text files) and
9 produces a HTML directory containing the results of the analysis (both counting and sampling).
10 
11 compile linking zlib: g++ -Wall -lz pfm_analysis.cpp
12 */
13 
14 #include <ctype.h>
15 #include <cxxabi.h>
16 #include <fcntl.h>
17 #include <math.h>
18 #include <stdint.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <unistd.h>
25 #include <zlib.h>
26 
27 #include <algorithm>
28 #include <list>
29 #include <map>
30 #include <string>
31 #include <vector>
32 #include <sstream>
33 #include <iostream>
34 
35 #include <dirent.h>
36 #include <errno.h>
37 
38 //Core
39 #define CORE_L2_MISS_CYCLES 200
40 #define CORE_L2_HIT_CYCLES 14.5
41 #define CORE_L1_DTLB_MISS_CYCLES 10
42 #define CORE_LCP_STALL_CYCLES 6
43 #define CORE_UNKNOWN_ADDR_STORE_CYCLES 5
44 #define CORE_OVERLAPPING_CYCLES 6
45 #define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES 20
46 
47 //Nehalem
48 #define I7_L1_DTLB_WALK_COMPLETED_CYCLES 35
49 #define I7_L1_ITLB_WALK_COMPLETED_CYCLES 35
50 #define I7_L2_HIT_CYCLES 6
51 #define I7_L3_UNSHARED_HIT_CYCLES 35
52 #define I7_OTHER_CORE_L2_HIT_CYCLES 60
53 #define I7_OTHER_CORE_L2_HITM_CYCLES 75
54 #define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES 225 //average of 200 (not modified) and 225-250 (modified)
55 #define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES 360 //average of 350 (not modified) and 370 (modified)
56 #define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES 180
57 #define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT 200
58 #define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT 350
59 #define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP 35
60 #define I7_IFETCH_L2_MISS_L3_HIT_SNOOP 60
61 #define I7_IFETCH_L2_MISS_L3_HITM 75
62 #define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD 180
63 
64 #define MAX_MODULES 1000
65 
66 #define EXPECTED_CPI 0.25
67 
68 #define MAX_FILENAME_LENGTH 1024
69 #define MAX_SAMPLE_INDEX_LENGTH 10000
70 #define MAX_SYM_LENGTH 15000
71 #define MAX_SYM_MOD_LENGTH 20000
72 #define MAX_LIB_LENGTH 5000
73 #define MAX_LIB_MOD_LENGTH 7000
74 #define MAX_SIMPLE_SYM_LENGTH 300
75 #define MAX_SIMPLE_SYM_MOD_LENGTH 500
76 #define MAX_SIMPLE_LIB_LENGTH 300
77 #define MAX_SIMPLE_LIB_MOD_LENGTH 500
78 #define MAX_LINE_LENGTH 20000
79 #define MAX_EVENT_NAME_LENGTH 150
80 #define MAX_MODULE_NAME_LENGTH 250
81 #define MAX_VALUE_STRING_LENGTH 250
82 #define MAX_ARCH_NAME_LENGTH 20
83 #define MAX_CMASK_STR_LENGTH 5
84 #define MAX_INV_STR_LENGTH 5
85 #define MAX_SP_STR_LENGTH 50
86 
87 #define PIPE_BUFFER_LENGTH 1000
88 
90 {
91  public:
92  PipeReader(const char *cmd)
93  {
94  pipe = popen(cmd, "r");
95  if(!pipe)
96  {
97  printf("Cannot open pipe. Exiting...\n");
98  exit(1);
99  }
100  char buffer[PIPE_BUFFER_LENGTH];
101  bzero(buffer, PIPE_BUFFER_LENGTH);
102  std::string result = "";
103  while(!feof(pipe))
104  {
105  if(fgets(buffer, PIPE_BUFFER_LENGTH, pipe)!=NULL)
106  {
107  result += buffer;
108  }
109  bzero(buffer, PIPE_BUFFER_LENGTH);
110  }
111  iss = new std::istringstream(result, std::istringstream::in);
112  }
113 
115  {
116  pclose(pipe);
117  delete iss;
118  }
119 
121  {
122  return *iss;
123  }
124  private:
125  FILE* pipe;
127 };
128 
129 // skipWhitespaces()
130 // const char *srcbuffer : source string
131 // const char **dstbuffer : destination string
132 // Skips white spaces
133 bool skipWhitespaces(const char *srcbuffer, const char **destbuffer)
134 {
135  if(!isspace(*srcbuffer++))
136  {
137  return false;
138  }
139  while(isspace(*srcbuffer))
140  {
141  srcbuffer++;
142  }
143  *destbuffer = srcbuffer;
144  return true;
145 }
146 
147 // skipString()
148 // const char *strptr : substring to skip
149 // const char *srcbuffer : source string
150 // const char **dstbuffer : destination string
151 // Skips strings of the form '\\s+strptr\\s+' starting from buffer.
152 // Returns a pointer to the first char which does not match the above regexp,
153 // or 0 in case the regexp is not matched.
154 bool skipString(const char *strptr, const char *srcbuffer, const char **dstbuffer)
155 {
156  if(strncmp(srcbuffer, strptr, strlen(strptr)))
157  {
158  return false;
159  }
160  *dstbuffer = srcbuffer + strlen(strptr);
161  return true;
162 }
163 
164 class FileInfo
165 {
166  public:
167  typedef int Offset;
169  FileInfo(void) : NAME("<dynamically generated>") {}
170  FileInfo(const std::string &name, bool useGdb) : NAME(name)
171  {
172  if(useGdb)
173  {
174  this->createOffsetMap();
175  }
176  }
177 
178  const char *symbolByOffset(Offset offset)
179  {
180  if(m_symbolCache.empty())
181  {
182  return 0;
183  }
184 
185  SymbolCache::iterator i = lower_bound(m_symbolCache.begin(), m_symbolCache.end(), offset, CacheItemComparator());
186  if(i->OFFSET == offset)
187  {
188  return i->NAME.c_str();
189  }
190 
191  if(i == m_symbolCache.begin())
192  {
193  return m_symbolCache.begin()->NAME.c_str();
194  }
195 
196  --i;
197 
198  return i->NAME.c_str();
199  }
200 
201  Offset next(Offset offset)
202  {
203  SymbolCache::iterator i = upper_bound(m_symbolCache.begin(), m_symbolCache.end(), offset, CacheItemComparator());
204  if(i == m_symbolCache.end())
205  {
206  return 0;
207  }
208  return i->OFFSET;
209  }
210 
211  private:
212  struct CacheItem
213  {
214  CacheItem(Offset offset, const std::string &name) : OFFSET(offset), NAME(name) {};
215  Offset OFFSET;
217  };
218 
220  SymbolCache m_symbolCache;
221 
223  {
224  bool operator()(const CacheItem& a, const int &b) const
225  {
226  return a.OFFSET < b;
227  }
228  bool operator()(const int& a, const CacheItem &b) const
229  {
230  return a < b.OFFSET;
231  }
232  };
233 
234  void createOffsetMap(void)
235  {
236  std::string commandLine = "objdump -p " + NAME;
237  PipeReader objdump(commandLine.c_str());
238  std::string oldname;
239  std::string suffix;
240  int vmbase = 0;
241  bool matched = false;
242  while(objdump.output())
243  {
244  // Checks the following regexp
245  //
246  // LOAD\\s+off\\s+(0x[0-9A-Fa-f]+)\\s+vaddr\\s+(0x[0-9A-Fa-f]+)
247  //
248  // and sets vmbase to be $2 - $1 of the first matched entry.
249 
251  std::getline(objdump.output(), line);
252 
253  if(!objdump.output()) break;
254  if(line.empty()) continue;
255  const char *lineptr = line.c_str();
256  if(!skipWhitespaces(lineptr, &lineptr)) continue;
257  if(!skipString("LOAD", lineptr, &lineptr)) continue;
258  if(!skipWhitespaces(lineptr, &lineptr)) continue;
259  if(!skipString("off", lineptr, &lineptr)) continue;
260  char *endptr = 0;
261  int initialBase = strtol(lineptr, &endptr, 16);
262  if(lineptr == endptr) continue;
263  lineptr = endptr;
264  if(!skipWhitespaces(lineptr, &lineptr)) continue;
265  if(!skipString("vaddr", lineptr, &lineptr)) continue;
266  if(!skipWhitespaces(lineptr, &lineptr)) continue;
267  int finalBase = strtol(lineptr, &endptr, 16);
268  if(lineptr == endptr) continue;
269  vmbase=finalBase - initialBase;
270  matched = true;
271  break;
272  }
273  if(!matched)
274  {
275  fprintf(stderr, "Cannot determine VM base address for %s\n", NAME.c_str());
276  fprintf(stderr, "Error while running `objdump -p %s`\n", NAME.c_str());
277  exit(1);
278  }
279  std::string commandLine2 = "nm -t d -n " + NAME;
280  PipeReader nm(commandLine2.c_str());
281  while(nm.output())
282  {
284  std::getline(nm.output(), line);
285  if(!nm.output()) break;
286  if(line.empty()) continue;
287  // If line does not match "^(\\d+)[ ]\\S[ ](\S+)$", exit.
288  const char *begin = line.c_str();
289  char *endptr = 0;
290  int address = strtol(begin, &endptr, 10);
291  if(endptr == begin) continue;
292  if(*endptr++ != ' ') continue;
293  if(isspace(*endptr++)) continue;
294  if(*endptr++ != ' ') continue;
295  char *symbolName = endptr;
296  while(*endptr && !isspace(*endptr)) endptr++;
297  if(*endptr != 0) continue;
298  // If line starts with '.' forget about it.
299  if(symbolName[0] == '.') continue;
300  // Create a new symbol with the given fileoffset.
301  // The symbol is automatically saved in the FileInfo cache by offset.
302  // If a symbol with the same offset is already there, the new one
303  // replaces the old one.
304  int offset = address-vmbase;
305  if(m_symbolCache.size() && (m_symbolCache.back().OFFSET == offset)) m_symbolCache.back().NAME = symbolName;
306  else m_symbolCache.push_back(CacheItem(address-vmbase, symbolName));
307  }
308  }
309 };
310 
311 static std::map<std::string, unsigned int> modules_tot_samples;
312 static std::map<std::string, FileInfo> libsInfo;
313 static int nehalem;
314 
316 static std::vector<std::string> C_events;
317 static std::vector<std::string> S_events;
318 
319 static std::vector<std::string> core_caa_events;
320 static std::vector<std::string> nhm_caa_events;
321 static std::vector<std::string> core_caa_events_displ;
322 static std::vector<std::string> nhm_caa_events_displ;
323 
325 {
326  core_caa_events.push_back("BRANCH_INSTRUCTIONS_RETIRED");
327  core_caa_events.push_back("ILD_STALL");
328  core_caa_events.push_back("INST_RETIRED:LOADS");
329  core_caa_events.push_back("INST_RETIRED:OTHER");
330  core_caa_events.push_back("INST_RETIRED:STORES");
331  core_caa_events.push_back("INSTRUCTIONS_RETIRED");
332  core_caa_events.push_back("LOAD_BLOCK:OVERLAP_STORE");
333  core_caa_events.push_back("LOAD_BLOCK:STA");
334  core_caa_events.push_back("LOAD_BLOCK:UNTIL_RETIRE");
335  core_caa_events.push_back("MEM_LOAD_RETIRED:DTLB_MISS");
336  core_caa_events.push_back("MEM_LOAD_RETIRED:L1D_LINE_MISS");
337  core_caa_events.push_back("MEM_LOAD_RETIRED:L2_LINE_MISS");
338  core_caa_events.push_back("MISPREDICTED_BRANCH_RETIRED");
339  //core_caa_events.push_back("RS_UOPS_DISPATCHED");
340  //core_caa_events.push_back("RS_UOPS_DISPATCHED CMASK=1");
341  core_caa_events.push_back("RS_UOPS_DISPATCHED CMASK=1 INV=1");
342  core_caa_events.push_back("SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE");
343  core_caa_events.push_back("UNHALTED_CORE_CYCLES");
344  //core_caa_events.push_back("UOPS_RETIRED:ANY");
345  //core_caa_events.push_back("UOPS_RETIRED:FUSED");
346  //core_caa_events.push_back("IDLE_DURING_DIV");
347 }
348 
350 {
351  nhm_caa_events.push_back("ARITH:CYCLES_DIV_BUSY");
352  nhm_caa_events.push_back("BR_INST_EXEC:ANY");
353  nhm_caa_events.push_back("BR_INST_EXEC:DIRECT_NEAR_CALL");
354  nhm_caa_events.push_back("BR_INST_EXEC:INDIRECT_NEAR_CALL");
355  nhm_caa_events.push_back("BR_INST_EXEC:INDIRECT_NON_CALL");
356  nhm_caa_events.push_back("BR_INST_EXEC:NEAR_CALLS");
357  nhm_caa_events.push_back("BR_INST_EXEC:NON_CALLS");
358  nhm_caa_events.push_back("BR_INST_EXEC:RETURN_NEAR");
359  nhm_caa_events.push_back("BR_INST_RETIRED:ALL_BRANCHES");
360  nhm_caa_events.push_back("BR_INST_RETIRED:CONDITIONAL");
361  nhm_caa_events.push_back("BR_INST_RETIRED:NEAR_CALL");
362  nhm_caa_events.push_back("BR_MISP_EXEC:ANY");
363  nhm_caa_events.push_back("CPU_CLK_UNHALTED:THREAD_P");
364  nhm_caa_events.push_back("DTLB_LOAD_MISSES:WALK_COMPLETED");
365  nhm_caa_events.push_back("INST_RETIRED:ANY_P");
366  nhm_caa_events.push_back("ITLB_MISSES:WALK_COMPLETED");
367  nhm_caa_events.push_back("L2_RQSTS:IFETCH_HIT");
368  nhm_caa_events.push_back("L2_RQSTS:IFETCH_MISS");
369  nhm_caa_events.push_back("MEM_INST_RETIRED:LOADS");
370  nhm_caa_events.push_back("MEM_INST_RETIRED:STORES");
371  nhm_caa_events.push_back("MEM_LOAD_RETIRED:L2_HIT");
372  nhm_caa_events.push_back("MEM_LOAD_RETIRED:L3_MISS");
373  nhm_caa_events.push_back("MEM_LOAD_RETIRED:L3_UNSHARED_HIT");
374  nhm_caa_events.push_back("MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM");
375  nhm_caa_events.push_back("MEM_UNCORE_RETIRED:LOCAL_DRAM");
376  nhm_caa_events.push_back("MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM");
377  nhm_caa_events.push_back("MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT");
378  nhm_caa_events.push_back("MEM_UNCORE_RETIRED:REMOTE_DRAM");
379  nhm_caa_events.push_back("OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM");
380  nhm_caa_events.push_back("OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM");
381  nhm_caa_events.push_back("OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP");
382  nhm_caa_events.push_back("OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD");
383  nhm_caa_events.push_back("OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM");
384  nhm_caa_events.push_back("OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT");
385  nhm_caa_events.push_back("RESOURCE_STALLS:ANY");
386  nhm_caa_events.push_back("SSEX_UOPS_RETIRED:PACKED_DOUBLE");
387  nhm_caa_events.push_back("SSEX_UOPS_RETIRED:PACKED_SINGLE");
388  nhm_caa_events.push_back("UOPS_DECODED:MS CMASK=1");
389  nhm_caa_events.push_back("UOPS_ISSUED:ANY CMASK=1 INV=1");
390  nhm_caa_events.push_back("ITLB_MISS_RETIRED");
391  nhm_caa_events.push_back("UOPS_RETIRED:ANY");
392 }
393 
395 {
396  for(std::vector<std::string>::const_iterator it=core_caa_events.begin(); it!=core_caa_events.end(); ++it)
397  {
398  if(find(C_events.begin(), C_events.end(), (*it))==C_events.end())
399  {
400  fprintf(stderr, "ERROR: Cannot find event %s!!!\naborting...\n", (*it).c_str());
401  return false;
402  }
403  }
404  return true;
405 }
406 
408 {
409  for(std::vector<std::string>::const_iterator it=nhm_caa_events.begin(); it!=nhm_caa_events.end(); ++it)
410  {
411  if(find(C_events.begin(), C_events.end(), (*it))==C_events.end())
412  {
413  fprintf(stderr, "ERROR: Cannot find event %s!!!\naborting...\n", (*it).c_str());
414  return false;
415  }
416  }
417  return true;
418 }
419 
421 {
422  core_caa_events_displ.push_back("Total Cycles");
423  core_caa_events_displ.push_back("Stalled Cycles");
424  core_caa_events_displ.push_back("% of Total Cycles");
425  core_caa_events_displ.push_back("Instructions Retired");
426  core_caa_events_displ.push_back("CPI");
427  core_caa_events_displ.push_back("");
428  core_caa_events_displ.push_back("iMargin");
429  core_caa_events_displ.push_back("iFactor");
430  core_caa_events_displ.push_back("");
431  core_caa_events_displ.push_back("Counted Stalled Cycles");
432  core_caa_events_displ.push_back("");
433  core_caa_events_displ.push_back("L2 Miss Impact");
434  core_caa_events_displ.push_back("L2 Miss % of counted Stalled Cycles");
435  core_caa_events_displ.push_back("");
436  core_caa_events_displ.push_back("L2 Hit Impact");
437  core_caa_events_displ.push_back("L2 Hit % of counted Stalled Cycles");
438  core_caa_events_displ.push_back("");
439  core_caa_events_displ.push_back("L1 DTLB Miss Impact");
440  core_caa_events_displ.push_back("L1 DTLB Miss % of counted Stalled Cycles");
441  core_caa_events_displ.push_back("");
442  core_caa_events_displ.push_back("LCP Stalls Impact");
443  core_caa_events_displ.push_back("LCP Stalls % of counted Stalled Cycles");
444  core_caa_events_displ.push_back("");
445  core_caa_events_displ.push_back("Store-Fwd Stalls Impact");
446  core_caa_events_displ.push_back("Store-Fwd Stalls % of counted Stalled Cycles");
447  core_caa_events_displ.push_back("");
448  core_caa_events_displ.push_back("Loads Blocked by Unknown Address Store Impact");
449  core_caa_events_displ.push_back("Loads Blocked % of Store-Fwd Stalls Cycles");
450  core_caa_events_displ.push_back("Loads Overlapped with Stores Impact");
451  core_caa_events_displ.push_back("Loads Overlapped % of Store-Fwd Stalls Cycles");
452  core_caa_events_displ.push_back("Loads Spanning across Cache Lines Impact");
453  core_caa_events_displ.push_back("Loads Spanning % of Store-Fwd Stalls Cycles");
454  core_caa_events_displ.push_back("");
455  core_caa_events_displ.push_back("Load Instructions");
456  core_caa_events_displ.push_back("Load % of all Instructions");
457  core_caa_events_displ.push_back("Store Instructions");
458  core_caa_events_displ.push_back("Store % of all Instructions");
459  core_caa_events_displ.push_back("Branch Instructions");
460  core_caa_events_displ.push_back("Branch % of all Instructions");
461  core_caa_events_displ.push_back("Packed SIMD Computational Instructions");
462  core_caa_events_displ.push_back("Packed SIMD % of all Instructions");
463  core_caa_events_displ.push_back("Other Instructions");
464  core_caa_events_displ.push_back("Other % of all Instructions");
465  core_caa_events_displ.push_back("");
466  core_caa_events_displ.push_back("ITLB Miss Rate in %");
467  core_caa_events_displ.push_back("% of Mispredicted Branches");
468 }
469 
470 void calc_core_deriv_values(double totalCycles)
471 {
472  for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
473  {
474  (it->second)["Total Cycles"] = (it->second)["UNHALTED_CORE_CYCLES"];
475  (it->second)["Stalled Cycles"] = (it->second)["RS_UOPS_DISPATCHED CMASK=1 INV=1"];
476  (it->second)["L2 Miss Impact"] = (it->second)["MEM_LOAD_RETIRED:L2_LINE_MISS"] * CORE_L2_MISS_CYCLES;
477  (it->second)["L2 Hit Impact"] = ((it->second)["MEM_LOAD_RETIRED:L1D_LINE_MISS"] - (it->second)["MEM_LOAD_RETIRED:L2_LINE_MISS"]) * CORE_L2_HIT_CYCLES;
478  (it->second)["L1 DTLB Miss Impact"] = (it->second)["MEM_LOAD_RETIRED:DTLB_MISS"] * CORE_L1_DTLB_MISS_CYCLES;
479  (it->second)["LCP Stalls Impact"] = (it->second)["ILD_STALL"] * CORE_LCP_STALL_CYCLES;
480  (it->second)["Loads Blocked by Unknown Address Store Impact"] = (it->second)["LOAD_BLOCK:STA"] * CORE_UNKNOWN_ADDR_STORE_CYCLES;
481  (it->second)["Loads Overlapped with Stores Impact"] = (it->second)["LOAD_BLOCK:OVERLAP_STORE"] * CORE_OVERLAPPING_CYCLES;
482  (it->second)["Loads Spanning across Cache Lines Impact"] = (it->second)["LOAD_BLOCK:UNTIL_RETIRE"] * CORE_SPAN_ACROSS_CACHE_LINE_CYCLES;
483  (it->second)["Store-Fwd Stalls Impact"] = (it->second)["Loads Blocked by Unknown Address Store Impact"] + (it->second)["Loads Overlapped with Stores Impact"] + (it->second)["Loads Spanning across Cache Lines Impact"];
484  (it->second)["Counted Stalled Cycles"] = (it->second)["L2 Miss Impact"] + (it->second)["L2 Hit Impact"] + (it->second)["LCP Stalls Impact"] + (it->second)["L1 DTLB Miss Impact"] + (it->second)["Store-Fwd Stalls Impact"];
485  (it->second)["Instructions Retired"] = (it->second)["INSTRUCTIONS_RETIRED"];
486  (it->second)["ITLB Miss Rate in %"] = ((it->second)["ITLB_MISS_RETIRED"]/(it->second)["INSTRUCTIONS_RETIRED"])*100;
487  (it->second)["Branch Instructions"] = (it->second)["BRANCH_INSTRUCTIONS_RETIRED"];
488  (it->second)["Load Instructions"] = (it->second)["INST_RETIRED:LOADS"];
489  (it->second)["Store Instructions"] = (it->second)["INST_RETIRED:STORES"];
490  (it->second)["Other Instructions"] = (it->second)["INST_RETIRED:OTHER"] - (it->second)["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] - (it->second)["BRANCH_INSTRUCTIONS_RETIRED"];
491  (it->second)["% of Mispredicted Branches"] = ((it->second)["MISPREDICTED_BRANCH_RETIRED"]/(it->second)["BRANCH_INSTRUCTIONS_RETIRED"])*100;
492  (it->second)["Packed SIMD Computational Instructions"] = (it->second)["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"];
493  (it->second)["Counted Instructions Retired"] = (it->second)["Branch Instructions"] + (it->second)["Load Instructions"] + (it->second)["Store Instructions"] + (it->second)["Other Instructions"] + (it->second)["Packed SIMD Computational Instructions"];
494  (it->second)["CPI"] = (it->second)["UNHALTED_CORE_CYCLES"]/(it->second)["INSTRUCTIONS_RETIRED"];
495 
496  double localPerformanceImprovement = (it->second)["CPI"]/EXPECTED_CPI;
497  double cyclesAfterImprovement = (it->second)["UNHALTED_CORE_CYCLES"]/localPerformanceImprovement;
498  double totalCyclesAfterImprovement = totalCycles-(it->second)["UNHALTED_CORE_CYCLES"]+cyclesAfterImprovement;
499  (it->second)["iMargin"] = 100-(totalCyclesAfterImprovement/totalCycles)*100;
500 
501  (it->second)["% of Total Cycles"] = (it->second)["RS_UOPS_DISPATCHED CMASK=1 INV=1"]*100/(it->second)["UNHALTED_CORE_CYCLES"];
502  (it->second)["L2 Miss % of counted Stalled Cycles"] =(it->second)["L2 Miss Impact"]*100/(it->second)["Counted Stalled Cycles"];
503  (it->second)["L2 Hit % of counted Stalled Cycles"] =(it->second)["L2 Hit Impact"]*100/(it->second)["Counted Stalled Cycles"];
504  (it->second)["L1 DTLB Miss % of counted Stalled Cycles"] =(it->second)["L1 DTLB Miss Impact"]*100/(it->second)["Counted Stalled Cycles"];
505  (it->second)["LCP Stalls % of counted Stalled Cycles"] =(it->second)["LCP Stalls Impact"]*100/(it->second)["Counted Stalled Cycles"];
506  (it->second)["Store-Fwd Stalls % of counted Stalled Cycles"] =(it->second)["Store-Fwd Stalls Impact"]*100/(it->second)["Counted Stalled Cycles"];
507  (it->second)["Loads Blocked % of Store-Fwd Stalls Cycles"] =(it->second)["Loads Blocked by Unknown Address Store Impact"]*100/(it->second)["Store-Fwd Stalls Impact"];
508  (it->second)["Loads Overlapped % of Store-Fwd Stalls Cycles"] =(it->second)["Loads Overlapped with Stores Impact"]*100/(it->second)["Store-Fwd Stalls Impact"];
509  (it->second)["Loads Spanning % of Store-Fwd Stalls Cycles"] =(it->second)["Loads Spanning across Cache Lines Impact"]*100/(it->second)["Store-Fwd Stalls Impact"];
510 
511  (it->second)["Load % of all Instructions"] =(it->second)["INST_RETIRED:LOADS"]*100/(it->second)["Counted Instructions Retired"];
512  (it->second)["Store % of all Instructions"] =(it->second)["INST_RETIRED:STORES"]*100/(it->second)["Counted Instructions Retired"];
513  (it->second)["Branch % of all Instructions"] =(it->second)["BRANCH_INSTRUCTIONS_RETIRED"]*100/(it->second)["Counted Instructions Retired"];
514  (it->second)["Packed SIMD % of all Instructions"] =(it->second)["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"]*100/(it->second)["Counted Instructions Retired"];
515  (it->second)["Other % of all Instructions"] =(it->second)["Other Instructions"]*100/(it->second)["Counted Instructions Retired"];
516  }
517 }
518 
520 {
521  nhm_caa_events_displ.push_back("Total Cycles");
522  nhm_caa_events_displ.push_back("Instructions Retired");
523  nhm_caa_events_displ.push_back("CPI");
524  nhm_caa_events_displ.push_back("");
525  nhm_caa_events_displ.push_back("iMargin");
526  nhm_caa_events_displ.push_back("iFactor");
527  nhm_caa_events_displ.push_back("");
528  nhm_caa_events_displ.push_back("Stalled Cycles");
529  nhm_caa_events_displ.push_back("% of Total Cycles");
530  nhm_caa_events_displ.push_back("Total Counted Stalled Cycles");
531  nhm_caa_events_displ.push_back("");
532  nhm_caa_events_displ.push_back("Instruction Starvation % of Total Cycles");
533  nhm_caa_events_displ.push_back("# of Instructions per Call");
534  nhm_caa_events_displ.push_back("% of Total Cycles spent handling FP exceptions");
535  nhm_caa_events_displ.push_back("");
536  nhm_caa_events_displ.push_back("Counted Stalled Cycles due to Load Ops");
537  nhm_caa_events_displ.push_back("");
538  nhm_caa_events_displ.push_back("L2 Hit Impact");
539  nhm_caa_events_displ.push_back("L2 Hit % of Load Stalls");
540  nhm_caa_events_displ.push_back("");
541  nhm_caa_events_displ.push_back("L3 Unshared Hit Impact");
542  nhm_caa_events_displ.push_back("L3 Unshared Hit % of Load Stalls");
543  nhm_caa_events_displ.push_back("");
544  nhm_caa_events_displ.push_back("L2 Other Core Hit Impact");
545  nhm_caa_events_displ.push_back("L2 Other Core Hit % of Load Stalls");
546  nhm_caa_events_displ.push_back("");
547  nhm_caa_events_displ.push_back("L2 Other Core Hit Modified Impact");
548  nhm_caa_events_displ.push_back("L2 Other Core Hit Modified % of Load Stalls");
549  nhm_caa_events_displ.push_back("");
550  nhm_caa_events_displ.push_back("L3 Miss -> Local DRAM Hit Impact");
551  nhm_caa_events_displ.push_back("L3 Miss -> Remote DRAM Hit Impact");
552  nhm_caa_events_displ.push_back("L3 Miss -> Remote Cache Hit Impact");
553  nhm_caa_events_displ.push_back("L3 Miss -> Total Impact");
554  nhm_caa_events_displ.push_back("L3 Miss % of Load Stalls");
555  nhm_caa_events_displ.push_back("");
556  nhm_caa_events_displ.push_back("L1 DTLB Miss Impact");
557  nhm_caa_events_displ.push_back("L1 DTLB Miss % of Load Stalls");
558  nhm_caa_events_displ.push_back("");
559  nhm_caa_events_displ.push_back("Cycles spent during DIV & SQRT Ops");
560  nhm_caa_events_displ.push_back("DIV & SQRT Ops % of counted Stalled Cycles");
561  nhm_caa_events_displ.push_back("");
562  nhm_caa_events_displ.push_back("Total L2 IFETCH misses");
563  nhm_caa_events_displ.push_back("% of L2 IFETCH misses");
564  nhm_caa_events_displ.push_back("");
565  nhm_caa_events_displ.push_back("% of IFETCHes served by Local DRAM");
566  nhm_caa_events_displ.push_back("% of IFETCHes served by L3 (Modified)");
567  nhm_caa_events_displ.push_back("% of IFETCHes served by L3 (Clean Snoop)");
568  nhm_caa_events_displ.push_back("% of IFETCHes served by Remote L2");
569  nhm_caa_events_displ.push_back("% of IFETCHes served by Remote DRAM");
570  nhm_caa_events_displ.push_back("% of IFETCHes served by L3 (No Snoop)");
571  nhm_caa_events_displ.push_back("");
572  nhm_caa_events_displ.push_back("Total L2 IFETCH miss Impact");
573  nhm_caa_events_displ.push_back("");
574  nhm_caa_events_displ.push_back("Cycles IFETCH served by Local DRAM");
575  nhm_caa_events_displ.push_back("Local DRAM IFECTHes % Impact");
576  nhm_caa_events_displ.push_back("");
577  nhm_caa_events_displ.push_back("Cycles IFETCH served by L3 (Modified)");
578  nhm_caa_events_displ.push_back("L3 (Modified) IFECTHes % Impact");
579  nhm_caa_events_displ.push_back("");
580  nhm_caa_events_displ.push_back("Cycles IFETCH served by L3 (Clean Snoop)");
581  nhm_caa_events_displ.push_back("L3 (Clean Snoop) IFECTHes % Impact");
582  nhm_caa_events_displ.push_back("");
583  nhm_caa_events_displ.push_back("Cycles IFETCH served by Remote L2");
584  nhm_caa_events_displ.push_back("Remote L2 IFECTHes % Impact");
585  nhm_caa_events_displ.push_back("");
586  nhm_caa_events_displ.push_back("Cycles IFETCH served by Remote DRAM");
587  nhm_caa_events_displ.push_back("Remote DRAM IFECTHes % Impact");
588  nhm_caa_events_displ.push_back("");
589  nhm_caa_events_displ.push_back("Cycles IFETCH served by L3 (No Snoop)");
590  nhm_caa_events_displ.push_back("L3 (No Snoop) IFECTHes % Impact");
591  nhm_caa_events_displ.push_back("");
592  nhm_caa_events_displ.push_back("Total Branch Instructions Executed");
593  nhm_caa_events_displ.push_back("% of Mispredicted Branches");
594  nhm_caa_events_displ.push_back("");
595  nhm_caa_events_displ.push_back("Direct Near Calls % of Total Branches Executed");
596  nhm_caa_events_displ.push_back("Indirect Near Calls % of Total Branches Executed");
597  nhm_caa_events_displ.push_back("Indirect Near Non-Calls % of Total Branches Executed");
598  nhm_caa_events_displ.push_back("All Near Calls % of Total Branches Executed");
599  nhm_caa_events_displ.push_back("All Non Calls % of Total Branches Executed");
600  nhm_caa_events_displ.push_back("All Returns % of Total Branches Executed");
601  nhm_caa_events_displ.push_back("");
602  nhm_caa_events_displ.push_back("Total Branch Instructions Retired");
603  nhm_caa_events_displ.push_back("Conditionals % of Total Branches Retired");
604  nhm_caa_events_displ.push_back("Near Calls % of Total Branches Retired");
605  nhm_caa_events_displ.push_back("");
606  nhm_caa_events_displ.push_back("L1 ITLB Miss Impact");
607  nhm_caa_events_displ.push_back("ITLB Miss Rate in %");
608  nhm_caa_events_displ.push_back("");
609  nhm_caa_events_displ.push_back("Branch Instructions");
610  nhm_caa_events_displ.push_back("Branch % of all Instructions");
611  nhm_caa_events_displ.push_back("");
612  nhm_caa_events_displ.push_back("Load Instructions");
613  nhm_caa_events_displ.push_back("Load % of all Instructions");
614  nhm_caa_events_displ.push_back("");
615  nhm_caa_events_displ.push_back("Store Instructions");
616  nhm_caa_events_displ.push_back("Store % of all Instructions");
617  nhm_caa_events_displ.push_back("");
618  nhm_caa_events_displ.push_back("Other Instructions");
619  nhm_caa_events_displ.push_back("Other % of all Instructions");
620  nhm_caa_events_displ.push_back("");
621  nhm_caa_events_displ.push_back("Packed UOPS Retired");
622  nhm_caa_events_displ.push_back("Packed % of all UOPS Retired");
623 }
624 
625 void calc_nhm_deriv_values(double totalCycles)
626 {
627  for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
628  {
629  (it->second)["Total Cycles"] = (it->second)["CPU_CLK_UNHALTED:THREAD_P"];
630 
631  (it->second)["L2 Hit Impact"] = (it->second)["MEM_LOAD_RETIRED:L2_HIT"] * I7_L2_HIT_CYCLES;
632  (it->second)["L3 Unshared Hit Impact"] = (it->second)["MEM_LOAD_RETIRED:L3_UNSHARED_HIT"] * I7_L3_UNSHARED_HIT_CYCLES;
633  if((it->second)["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"]>(it->second)["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"])
634  {
635  (it->second)["L2 Other Core Hit Impact"] = ((it->second)["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] - (it->second)["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"])* I7_OTHER_CORE_L2_HIT_CYCLES;
636  }
637  else
638  {
639  (it->second)["L2 Other Core Hit Impact"] = 0.0;
640  }
641  (it->second)["L2 Other Core Hit Modified Impact"] = (it->second)["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] * I7_OTHER_CORE_L2_HITM_CYCLES;
642  (it->second)["L3 Miss -> Local DRAM Hit Impact"] = (it->second)["MEM_UNCORE_RETIRED:LOCAL_DRAM"] * I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES;
643  (it->second)["L3 Miss -> Remote DRAM Hit Impact"] = (it->second)["MEM_UNCORE_RETIRED:REMOTE_DRAM"] * I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES;
644  (it->second)["L3 Miss -> Remote Cache Hit Impact"] = (it->second)["MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT"] * I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES;
645  (it->second)["L3 Miss -> Total Impact"] = (it->second)["L3 Miss -> Local DRAM Hit Impact"] + (it->second)["L3 Miss -> Remote DRAM Hit Impact"] + (it->second)["L3 Miss -> Remote Cache Hit Impact"];
646  (it->second)["L1 DTLB Miss Impact"] = (it->second)["DTLB_LOAD_MISSES:WALK_COMPLETED"] * I7_L1_DTLB_WALK_COMPLETED_CYCLES;
647  (it->second)["Counted Stalled Cycles due to Load Ops"] = (it->second)["L3 Miss -> Total Impact"] + (it->second)["L2 Hit Impact"] + (it->second)["L1 DTLB Miss Impact"] + (it->second)["L3 Unshared Hit Impact"] + (it->second)["L2 Other Core Hit Modified Impact"] + (it->second)["L2 Other Core Hit Impact"];
648  (it->second)["Cycles spent during DIV & SQRT Ops"] = (it->second)["ARITH:CYCLES_DIV_BUSY"];
649  (it->second)["Total Counted Stalled Cycles"] = (it->second)["Counted Stalled Cycles due to Load Ops"] + (it->second)["Cycles spent during DIV & SQRT Ops"];
650  (it->second)["Stalled Cycles"] = (it->second)["Total Counted Stalled Cycles"]; //TO BE FIXED when UOPS_EXECUTED:0x3f is fixed!!
651  (it->second)["% of Total Cycles"] = (it->second)["Stalled Cycles"] * 100 / (it->second)["CPU_CLK_UNHALTED:THREAD_P"]; //TO BE FIXED!! see above
652  (it->second)["L3 Miss % of Load Stalls"] = (it->second)["L3 Miss -> Total Impact"] * 100 / (it->second)["Counted Stalled Cycles due to Load Ops"];
653  (it->second)["L2 Hit % of Load Stalls"] = (it->second)["L2 Hit Impact"] * 100 / (it->second)["Counted Stalled Cycles due to Load Ops"];
654  (it->second)["L1 DTLB Miss % of Load Stalls"] = (it->second)["L1 DTLB Miss Impact"] * 100 / (it->second)["Counted Stalled Cycles due to Load Ops"];
655  (it->second)["L3 Unshared Hit % of Load Stalls"] = (it->second)["L3 Unshared Hit Impact"] * 100 / (it->second)["Counted Stalled Cycles due to Load Ops"];
656  (it->second)["L2 Other Core Hit % of Load Stalls"] = (it->second)["L2 Other Core Hit Impact"] * 100 / (it->second)["Counted Stalled Cycles due to Load Ops"];
657  (it->second)["L2 Other Core Hit Modified % of Load Stalls"] = (it->second)["L2 Other Core Hit Modified Impact"] * 100 / (it->second)["Counted Stalled Cycles due to Load Ops"];
658  (it->second)["DIV & SQRT Ops % of counted Stalled Cycles"] = (it->second)["Cycles spent during DIV & SQRT Ops"] * 100 / (it->second)["Total Counted Stalled Cycles"];
659 
660  (it->second)["Cycles IFETCH served by Local DRAM"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT;
661  (it->second)["Cycles IFETCH served by L3 (Modified)"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * I7_IFETCH_L2_MISS_L3_HITM;
662  (it->second)["Cycles IFETCH served by L3 (Clean Snoop)"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * I7_IFETCH_L2_MISS_L3_HIT_SNOOP;
663  (it->second)["Cycles IFETCH served by Remote L2"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD;
664  (it->second)["Cycles IFETCH served by Remote DRAM"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT;
665  (it->second)["Cycles IFETCH served by L3 (No Snoop)"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP;
666  (it->second)["Total L2 IFETCH miss Impact"] = (it->second)["Cycles IFETCH served by Local DRAM"] + (it->second)["Cycles IFETCH served by L3 (Modified)"] + (it->second)["Cycles IFETCH served by L3 (Clean Snoop)"] + (it->second)["Cycles IFETCH served by Remote L2"] + (it->second)["Cycles IFETCH served by Remote DRAM"] + (it->second)["Cycles IFETCH served by L3 (No Snoop)"];
667  (it->second)["Local DRAM IFECTHes % Impact"] = (it->second)["Cycles IFETCH served by Local DRAM"] * 100 / (it->second)["Total L2 IFETCH miss Impact"];
668  (it->second)["L3 (Modified) IFECTHes % Impact"] = (it->second)["Cycles IFETCH served by L3 (Modified)"] * 100 / (it->second)["Total L2 IFETCH miss Impact"];
669  (it->second)["L3 (Clean Snoop) IFECTHes % Impact"] = (it->second)["Cycles IFETCH served by L3 (Clean Snoop)"] * 100 / (it->second)["Total L2 IFETCH miss Impact"];
670  (it->second)["Remote L2 IFECTHes % Impact"] = (it->second)["Cycles IFETCH served by Remote L2"] * 100 / (it->second)["Total L2 IFETCH miss Impact"];
671  (it->second)["Remote DRAM IFECTHes % Impact"] = (it->second)["Cycles IFETCH served by Remote DRAM"] * 100 / (it->second)["Total L2 IFETCH miss Impact"];
672  (it->second)["L3 (No Snoop) IFECTHes % Impact"] = (it->second)["Cycles IFETCH served by L3 (No Snoop)"] * 100 / (it->second)["Total L2 IFETCH miss Impact"];
673  (it->second)["Total L2 IFETCH misses"] = (it->second)["L2_RQSTS:IFETCH_MISS"];
674  (it->second)["% of IFETCHes served by Local DRAM"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * 100 / (it->second)["L2_RQSTS:IFETCH_MISS"];
675  (it->second)["% of IFETCHes served by L3 (Modified)"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * 100 / (it->second)["L2_RQSTS:IFETCH_MISS"];
676  (it->second)["% of IFETCHes served by L3 (Clean Snoop)"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * 100 / (it->second)["L2_RQSTS:IFETCH_MISS"];
677  (it->second)["% of IFETCHes served by Remote L2"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * 100 / (it->second)["L2_RQSTS:IFETCH_MISS"];
678  (it->second)["% of IFETCHes served by Remote DRAM"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * 100 / (it->second)["L2_RQSTS:IFETCH_MISS"];
679  (it->second)["% of IFETCHes served by L3 (No Snoop)"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * 100 / (it->second)["L2_RQSTS:IFETCH_MISS"];
680  (it->second)["% of L2 IFETCH misses"] = (it->second)["L2_RQSTS:IFETCH_MISS"] * 100 / ((it->second)["L2_RQSTS:IFETCH_MISS"] + (it->second)["L2_RQSTS:IFETCH_HIT"]);
681  (it->second)["L1 ITLB Miss Impact"] = (it->second)["ITLB_MISSES:WALK_COMPLETED"] * I7_L1_ITLB_WALK_COMPLETED_CYCLES;
682 
683  (it->second)["Total Branch Instructions Executed"] = (it->second)["BR_INST_EXEC:ANY"];
684  (it->second)["% of Mispredicted Branches"] = (it->second)["BR_MISP_EXEC:ANY"] * 100 / (it->second)["BR_INST_EXEC:ANY"];
685  (it->second)["Direct Near Calls % of Total Branches Executed"] = (it->second)["BR_INST_EXEC:DIRECT_NEAR_CALL"] * 100 / (it->second)["Total Branch Instructions Executed"];
686  (it->second)["Indirect Near Calls % of Total Branches Executed"] = (it->second)["BR_INST_EXEC:INDIRECT_NEAR_CALL"] * 100 / (it->second)["Total Branch Instructions Executed"];
687  (it->second)["Indirect Near Non-Calls % of Total Branches Executed"] = (it->second)["BR_INST_EXEC:INDIRECT_NON_CALL"] * 100 / (it->second)["Total Branch Instructions Executed"];
688  (it->second)["All Near Calls % of Total Branches Executed"] = (it->second)["BR_INST_EXEC:NEAR_CALLS"] * 100 / (it->second)["Total Branch Instructions Executed"];
689  (it->second)["All Non Calls % of Total Branches Executed"] = (it->second)["BR_INST_EXEC:NON_CALLS"] * 100 / (it->second)["Total Branch Instructions Executed"];
690  (it->second)["All Returns % of Total Branches Executed"] = (it->second)["BR_INST_EXEC:RETURN_NEAR"] * 100 / (it->second)["Total Branch Instructions Executed"];
691  (it->second)["Total Branch Instructions Retired"] = (it->second)["BR_INST_RETIRED:ALL_BRANCHES"];
692  (it->second)["Conditionals % of Total Branches Retired"] = (it->second)["BR_INST_RETIRED:CONDITIONAL"] * 100 / (it->second)["Total Branch Instructions Retired"];
693  (it->second)["Near Calls % of Total Branches Retired"] = (it->second)["BR_INST_RETIRED:NEAR_CALL"] * 100 / (it->second)["Total Branch Instructions Retired"];
694 
695  (it->second)["Instruction Starvation % of Total Cycles"] = ((it->second)["UOPS_ISSUED:ANY CMASK=1 INV=1"] - (it->second)["RESOURCE_STALLS:ANY"])* 100 / (it->second)["CPU_CLK_UNHALTED:THREAD_P"];
696  (it->second)["% of Total Cycles spent handling FP exceptions"] = (it->second)["UOPS_DECODED:MS CMASK=1"]* 100 / (it->second)["CPU_CLK_UNHALTED:THREAD_P"];
697  (it->second)["# of Instructions per Call"] = (it->second)["INST_RETIRED:ANY_P"] / (it->second)["BR_INST_EXEC:NEAR_CALLS"];
698 
699  (it->second)["Instructions Retired"] = (it->second)["INST_RETIRED:ANY_P"];
700  (it->second)["ITLB Miss Rate in %"] = ((it->second)["ITLB_MISS_RETIRED"] / (it->second)["INST_RETIRED:ANY_P"]) * 100;
701 
702  (it->second)["Branch Instructions"] = (it->second)["BR_INST_RETIRED:ALL_BRANCHES"];
703  (it->second)["Load Instructions"] = (it->second)["MEM_INST_RETIRED:LOADS"];
704  (it->second)["Store Instructions"] = (it->second)["MEM_INST_RETIRED:STORES"];
705  (it->second)["Other Instructions"] = (it->second)["Instructions Retired"] - (it->second)["MEM_INST_RETIRED:LOADS"] - (it->second)["MEM_INST_RETIRED:STORES"] - (it->second)["BR_INST_RETIRED:ALL_BRANCHES"];
706  (it->second)["Packed UOPS Retired"] = (it->second)["SSEX_UOPS_RETIRED:PACKED_DOUBLE"] + (it->second)["SSEX_UOPS_RETIRED:PACKED_SINGLE"];
707  (it->second)["CPI"] = (it->second)["CPU_CLK_UNHALTED:THREAD_P"] / (it->second)["INST_RETIRED:ANY_P"];
708 
709  double localPerformanceImprovement = (it->second)["CPI"]/EXPECTED_CPI;
710  double cyclesAfterImprovement = (it->second)["CPU_CLK_UNHALTED:THREAD_P"]/localPerformanceImprovement;
711  double totalCyclesAfterImprovement = totalCycles-(it->second)["CPU_CLK_UNHALTED:THREAD_P"]+cyclesAfterImprovement;
712  (it->second)["iMargin"] = 100-(totalCyclesAfterImprovement/totalCycles)*100;
713 
714  (it->second)["Load % of all Instructions"] = (it->second)["MEM_INST_RETIRED:LOADS"] * 100 / (it->second)["INST_RETIRED:ANY_P"];
715  (it->second)["Store % of all Instructions"] = (it->second)["MEM_INST_RETIRED:STORES"] * 100 / (it->second)["INST_RETIRED:ANY_P"];
716  (it->second)["Branch % of all Instructions"] = (it->second)["BR_INST_RETIRED:ALL_BRANCHES"] * 100 / (it->second)["INST_RETIRED:ANY_P"];
717  (it->second)["Other % of all Instructions"] = (it->second)["Other Instructions"] * 100 / (it->second)["INST_RETIRED:ANY_P"];
718 
719  (it->second)["Packed % of all UOPS Retired"] = (it->second)["Packed UOPS Retired"] * 100 / (it->second)["UOPS_RETIRED:ANY"];
720  }
721 }
722 
723 // S_module class defining the objects containing sampling results for each module
724 class S_module
725 {
726  private:
728  unsigned int total_num_samples;
732  unsigned int cmask;
733  unsigned int inv;
734  unsigned int sp;
735 
736  public:
738  {
739  clear();
740  }
741  void clear()
742  {
743  samples.clear();
744  total_num_samples = 0;
745  sp = 0;
746  module_name = "";
747  cmask = 0;
748  inv = 0;
749  sp = 0;
750  }
751  void init(const char* name, const char* architecture, const char* event_name, unsigned int c_mask, unsigned int inv_mask, unsigned int smpl_period)
752  {
753  module_name = name;
754  arch = architecture;
755  event = event_name;
756  cmask = c_mask;
757  inv = inv_mask;
758  sp = smpl_period;
759  }
760  void set_total(unsigned int total)
761  {
762  total_num_samples = total;
763  return;
764  }
765  unsigned int get_smpl_period()
766  {
767  return sp;
768  }
769  unsigned int get_inv_mask()
770  {
771  return inv;
772  }
773  unsigned int get_c_mask()
774  {
775  return cmask;
776  }
778  {
779  return arch;
780  }
782  {
783  return event;
784  }
785  void add_sample(const char* index, unsigned int value)
786  {
787  samples[index] += value;
788  return;
789  }
790  bool get_max(char *index, unsigned int *value)
791  {
792  if(samples.empty()) return false;
793  unsigned int cur_max = 0;
795  for(std::map<std::string, unsigned int>::iterator it = samples.begin(); it != samples.end(); ++it)
796  {
797  if(it->second > cur_max)
798  {
799  cur_max = it->second;
800  max_pos = it;
801  }
802  }
803  strcpy(index, (max_pos->first).c_str());
804  *value = max_pos->second;
805  samples.erase(max_pos);
806  return true;
807  }
809  {
810  return module_name;
811  }
812  unsigned int get_total_num_samples()
813  {
814  return total_num_samples;
815  }
816 };
817 
818 // void html_special_chars()
819 // const char *s : source string
820 // char *s_mod : destination string
821 // replaces special HTML characters with correctly escaped sequences to be used inside HTML code
822 void html_special_chars(const char *s, char *s_mod)
823 {
824  int n = strlen(s);
825  *s_mod = '\0';
826  for (int i=0; i < n; i++)
827  {
828  switch(s[i])
829  {
830  case '<':
831  strcat(s_mod, "&lt;");
832  break;
833  case '>':
834  strcat(s_mod, "&gt;");
835  break;
836  case '&':
837  strcat(s_mod, "&amp;");
838  break;
839  case '"':
840  strcat(s_mod, "&quot;");
841  break;
842  default:
843  char to_app[2];
844  to_app[0]=s[i];
845  to_app[1]='\0';
846  strcat(s_mod, to_app);
847  break;
848  }
849  }
850  return;
851 }
852 
853 // func_name()
854 // const char *demangled_symbol : string corresponding to the demangled symbol found by the read_file() function
855 // parses the argument and returns just the function name without arguments or return types
856 const char *func_name(const char *demangled_symbol)
857 {
858  char *operator_string_begin = const_cast<char *>(strstr(demangled_symbol, "operator"));
859  if(operator_string_begin != NULL)
860  {
861  char *operator_string_end = operator_string_begin+8;
862  while(*operator_string_end == ' ') operator_string_end++;
863  if(strstr(operator_string_end, "delete[]")==operator_string_end)
864  {
865  operator_string_end+=8;
866  *operator_string_end='\0';
867  }
868  else if(strstr(operator_string_end, "delete")==operator_string_end)
869  {
870  operator_string_end+=6;
871  *operator_string_end='\0';
872  }
873  else if(strstr(operator_string_end, "new[]")==operator_string_end)
874  {
875  operator_string_end+=5;
876  *operator_string_end='\0';
877  }
878  else if(strstr(operator_string_end, "new")==operator_string_end)
879  {
880  operator_string_end+=3;
881  *operator_string_end='\0';
882  }
883  else if(strstr(operator_string_end, ">>=")==operator_string_end)
884  {
885  operator_string_end+=3;
886  *operator_string_end='\0';
887  }
888  else if(strstr(operator_string_end, "<<=")==operator_string_end)
889  {
890  operator_string_end+=3;
891  *operator_string_end='\0';
892  }
893  else if(strstr(operator_string_end, "->*")==operator_string_end)
894  {
895  operator_string_end+=3;
896  *operator_string_end='\0';
897  }
898  else if(strstr(operator_string_end, "<<")==operator_string_end)
899  {
900  operator_string_end+=2;
901  *operator_string_end='\0';
902  }
903  else if(strstr(operator_string_end, ">>")==operator_string_end)
904  {
905  operator_string_end+=2;
906  *operator_string_end='\0';
907  }
908  else if(strstr(operator_string_end, ">=")==operator_string_end)
909  {
910  operator_string_end+=2;
911  *operator_string_end='\0';
912  }
913  else if(strstr(operator_string_end, "<=")==operator_string_end)
914  {
915  operator_string_end+=2;
916  *operator_string_end='\0';
917  }
918  else if(strstr(operator_string_end, "==")==operator_string_end)
919  {
920  operator_string_end+=2;
921  *operator_string_end='\0';
922  }
923  else if(strstr(operator_string_end, "!=")==operator_string_end)
924  {
925  operator_string_end+=2;
926  *operator_string_end='\0';
927  }
928  else if(strstr(operator_string_end, "|=")==operator_string_end)
929  {
930  operator_string_end+=2;
931  *operator_string_end='\0';
932  }
933  else if(strstr(operator_string_end, "&=")==operator_string_end)
934  {
935  operator_string_end+=2;
936  *operator_string_end='\0';
937  }
938  else if(strstr(operator_string_end, "^=")==operator_string_end)
939  {
940  operator_string_end+=2;
941  *operator_string_end='\0';
942  }
943  else if(strstr(operator_string_end, "%=")==operator_string_end)
944  {
945  operator_string_end+=2;
946  *operator_string_end='\0';
947  }
948  else if(strstr(operator_string_end, "/=")==operator_string_end)
949  {
950  operator_string_end+=2;
951  *operator_string_end='\0';
952  }
953  else if(strstr(operator_string_end, "*=")==operator_string_end)
954  {
955  operator_string_end+=2;
956  *operator_string_end='\0';
957  }
958  else if(strstr(operator_string_end, "-=")==operator_string_end)
959  {
960  operator_string_end+=2;
961  *operator_string_end='\0';
962  }
963  else if(strstr(operator_string_end, "+=")==operator_string_end)
964  {
965  operator_string_end+=2;
966  *operator_string_end='\0';
967  }
968  else if(strstr(operator_string_end, "&&")==operator_string_end)
969  {
970  operator_string_end+=2;
971  *operator_string_end='\0';
972  }
973  else if(strstr(operator_string_end, "||")==operator_string_end)
974  {
975  operator_string_end+=2;
976  *operator_string_end='\0';
977  }
978  else if(strstr(operator_string_end, "[]")==operator_string_end)
979  {
980  operator_string_end+=2;
981  *operator_string_end='\0';
982  }
983  else if(strstr(operator_string_end, "()")==operator_string_end)
984  {
985  operator_string_end+=2;
986  *operator_string_end='\0';
987  }
988  else if(strstr(operator_string_end, "++")==operator_string_end)
989  {
990  operator_string_end+=2;
991  *operator_string_end='\0';
992  }
993  else if(strstr(operator_string_end, "--")==operator_string_end)
994  {
995  operator_string_end+=2;
996  *operator_string_end='\0';
997  }
998  else if(strstr(operator_string_end, "->")==operator_string_end)
999  {
1000  operator_string_end+=2;
1001  *operator_string_end='\0';
1002  }
1003  else if(strstr(operator_string_end, "<")==operator_string_end)
1004  {
1005  operator_string_end+=1;
1006  *operator_string_end='\0';
1007  }
1008  else if(strstr(operator_string_end, ">")==operator_string_end)
1009  {
1010  operator_string_end+=1;
1011  *operator_string_end='\0';
1012  }
1013  else if(strstr(operator_string_end, "~")==operator_string_end)
1014  {
1015  operator_string_end+=1;
1016  *operator_string_end='\0';
1017  }
1018  else if(strstr(operator_string_end, "!")==operator_string_end)
1019  {
1020  operator_string_end+=1;
1021  *operator_string_end='\0';
1022  }
1023  else if(strstr(operator_string_end, "+")==operator_string_end)
1024  {
1025  operator_string_end+=1;
1026  *operator_string_end='\0';
1027  }
1028  else if(strstr(operator_string_end, "-")==operator_string_end)
1029  {
1030  operator_string_end+=1;
1031  *operator_string_end='\0';
1032  }
1033  else if(strstr(operator_string_end, "*")==operator_string_end)
1034  {
1035  operator_string_end+=1;
1036  *operator_string_end='\0';
1037  }
1038  else if(strstr(operator_string_end, "/")==operator_string_end)
1039  {
1040  operator_string_end+=1;
1041  *operator_string_end='\0';
1042  }
1043  else if(strstr(operator_string_end, "%")==operator_string_end)
1044  {
1045  operator_string_end+=1;
1046  *operator_string_end='\0';
1047  }
1048  else if(strstr(operator_string_end, "^")==operator_string_end)
1049  {
1050  operator_string_end+=1;
1051  *operator_string_end='\0';
1052  }
1053  else if(strstr(operator_string_end, "&")==operator_string_end)
1054  {
1055  operator_string_end+=1;
1056  *operator_string_end='\0';
1057  }
1058  else if(strstr(operator_string_end, "|")==operator_string_end)
1059  {
1060  operator_string_end+=1;
1061  *operator_string_end='\0';
1062  }
1063  else if(strstr(operator_string_end, ",")==operator_string_end)
1064  {
1065  operator_string_end+=1;
1066  *operator_string_end='\0';
1067  }
1068  else if(strstr(operator_string_end, "=")==operator_string_end)
1069  {
1070  operator_string_end+=1;
1071  *operator_string_end='\0';
1072  }
1073  return operator_string_begin;
1074  }
1075  char *end_of_demangled_name = const_cast<char *>(strrchr(demangled_symbol, ')'));
1076  if(end_of_demangled_name != NULL)
1077  {
1078  int pars = 1;
1079  char c;
1080  while(pars>0 && end_of_demangled_name!=demangled_symbol)
1081  {
1082  c = *(--end_of_demangled_name);
1083  if(c==')')
1084  {
1085  pars++;
1086  }
1087  else if(c=='(')
1088  {
1089  pars--;
1090  }
1091  }
1092  }
1093  else
1094  {
1095  return demangled_symbol;
1096  }
1097  char *end_of_func_name = end_of_demangled_name;
1098  if(end_of_func_name != NULL)
1099  {
1100  *end_of_func_name = '\0';
1101  char c = *(--end_of_func_name);
1102  if(c=='>')
1103  {
1104  int pars = 1;
1105  while(pars>0 && end_of_func_name!=demangled_symbol)
1106  {
1107  c = *(--end_of_func_name);
1108  if(c=='>')
1109  {
1110  pars++;
1111  }
1112  else if(c=='<')
1113  {
1114  pars--;
1115  }
1116  }
1117  *end_of_func_name = '\0';
1118  }
1119  c = *(--end_of_func_name);
1120  while(isalnum(c) || c=='_' || c=='~')
1121  {
1122  c = *(--end_of_func_name);
1123  }
1124  return ++end_of_func_name;
1125  }
1126  return demangled_symbol;
1127 }
1128 
1129 // put_module()
1130 // S_module *cur_module : pointer to the current module object to be written out in to HTML file
1131 // const char *event : name of architectural event being analysed
1132 // const char *dir : directory where sampling results input files are located
1133 // creates or updates the HTML output file using information contained inside the module object given as a parameter
1134 void put_S_module(S_module *cur_module, const char *dir)
1135 {
1136  char module_name[MAX_MODULE_NAME_LENGTH];
1137  bzero(module_name, MAX_MODULE_NAME_LENGTH);
1138  strcpy(module_name, (cur_module->get_module_name()).c_str());
1139  char module_filename[MAX_FILENAME_LENGTH];
1140  bzero(module_filename, MAX_FILENAME_LENGTH);
1141  strcpy(module_filename, dir);
1142  strcat(module_filename, "/HTML/");
1143  strcat(module_filename, module_name);
1144  strcat(module_filename, ".html");
1145  char event[MAX_EVENT_NAME_LENGTH];
1146  bzero(event, MAX_EVENT_NAME_LENGTH);
1147  strcpy(event, (cur_module->get_event()).c_str());
1148  std::map<std::string, unsigned int>::iterator result = modules_tot_samples.find(cur_module->get_module_name());
1149  FILE *module_file;
1150  if(result == modules_tot_samples.end()) //not found
1151  {
1152  if((!strcmp(event, "UNHALTED_CORE_CYCLES") && !nehalem) || (!strcmp(event, "CPU_CLK_UNHALTED:THREAD_P") && nehalem))
1153  {
1154  modules_tot_samples.insert(std::pair<std::string, unsigned int>(cur_module->get_module_name(), cur_module->get_total_num_samples()));
1155  }
1156  else
1157  {
1158  modules_tot_samples.insert(std::pair<std::string, unsigned int>(cur_module->get_module_name(), 0));
1159  }
1160  module_file = fopen(module_filename, "w");
1161  if(module_file == NULL)
1162  {
1163  fprintf(stderr, "ERROR: Cannot create file %s!!!\naborting...\n", module_filename);
1164  exit(1);
1165  }
1166  fprintf(module_file, "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n");
1167  fprintf(module_file, "<html>\n");
1168  fprintf(module_file, "<head>\n");
1169  fprintf(module_file, "<title>\n");
1170  fprintf(module_file, "%s\n", module_name);
1171  fprintf(module_file, "</title>\n");
1172  fprintf(module_file, "</head>\n");
1173  fprintf(module_file, "<body>\n");
1174  fprintf(module_file, "<h2>%s</h2><br/>Events Sampled:<br/>\n", module_name);
1175  fprintf(module_file, "<ul>\n");
1176  for(std::vector<std::string>::const_iterator it = S_events.begin(); it != S_events.end(); ++it)
1177  {
1178  fprintf(module_file, "<li><a href=\"#%s\">%s</a></li>\n", it->c_str(), it->c_str());
1179  }
1180  fprintf(module_file, "</ul>\n");
1181  }// if(result == modules_tot_samples.end()) //not found
1182  else
1183  {
1184  if((!strcmp(event, "UNHALTED_CORE_CYCLES") && !nehalem) || (!strcmp(event, "CPU_CLK_UNHALTED:THREAD_P") && nehalem))
1185  {
1186  modules_tot_samples[cur_module->get_module_name()] = cur_module->get_total_num_samples();
1187  }
1188  module_file = fopen(module_filename, "a");
1189  }//else:: if(result != modules_tot_samples.end()) //found!!
1190  char event_str[MAX_EVENT_NAME_LENGTH];
1191  bzero(event_str, MAX_EVENT_NAME_LENGTH);
1192  strcpy(event_str, event);
1193  if(cur_module->get_c_mask()>0)
1194  {
1195  sprintf(event_str, "%s CMASK=%d", event_str, cur_module->get_c_mask());
1196  }
1197  if(cur_module->get_inv_mask()>0)
1198  {
1199  sprintf(event_str, "%s INV=%d", event_str, cur_module->get_inv_mask());
1200  }
1201  fprintf(module_file, "<a name=\"%s\"><a>\n", event_str);
1202  fprintf(module_file, "<table cellpadding=\"5\">\n");
1203  fprintf(module_file, "<tr bgcolor=\"#EEEEEE\">\n");
1204  fprintf(module_file, "<th colspan=\"6\" align=\"left\">%s -- cmask: %u -- invmask: %u -- Total Samples: %u -- Sampling Period: %d</th>\n", event, cur_module->get_c_mask(), cur_module->get_inv_mask(), cur_module->get_total_num_samples(), cur_module->get_smpl_period());
1205  fprintf(module_file, "</tr>\n");
1206  fprintf(module_file, "<tr bgcolor=\"#EEEEEE\">\n");
1207  fprintf(module_file, "<th align=\"left\">Samples</th>\n");
1208  fprintf(module_file, "<th align=\"left\">Percentage</th>\n");
1209  fprintf(module_file, "<th align=\"left\">Symbol Name</th>\n");
1210  fprintf(module_file, "<th align=\"left\">Library Name</th>\n");
1211  fprintf(module_file, "<th align=\"left\">Complete Signature</th>\n");
1212  fprintf(module_file, "<th align=\"left\">Library Pathname</th>\n");
1213  fprintf(module_file, "</tr>\n");
1214  for(int j=0; j<20; j++)
1215  {
1216  char sym[MAX_SYM_LENGTH];
1217  char sym_mod[MAX_SYM_MOD_LENGTH];
1218  char lib[MAX_LIB_LENGTH];
1219  char lib_mod[MAX_LIB_MOD_LENGTH];
1220  char simple_sym[MAX_SIMPLE_SYM_LENGTH];
1221  char simple_sym_mod[MAX_SIMPLE_SYM_MOD_LENGTH];
1222  char simple_lib[MAX_SIMPLE_LIB_LENGTH];
1223  char simple_lib_mod[MAX_SIMPLE_LIB_MOD_LENGTH];
1224 
1225  bzero(sym, MAX_SYM_LENGTH);
1226  bzero(sym_mod, MAX_SYM_MOD_LENGTH);
1227  bzero(lib, MAX_LIB_LENGTH);
1228  bzero(lib_mod, MAX_LIB_MOD_LENGTH);
1229  bzero(simple_sym, MAX_SIMPLE_SYM_LENGTH);
1230  bzero(simple_sym_mod, MAX_SIMPLE_SYM_MOD_LENGTH);
1231  bzero(simple_lib, MAX_SIMPLE_LIB_LENGTH);
1232  bzero(simple_lib_mod, MAX_SIMPLE_LIB_MOD_LENGTH);
1233 
1234  char index[MAX_SAMPLE_INDEX_LENGTH];
1235  bzero(index, MAX_SAMPLE_INDEX_LENGTH);
1236  unsigned int value;
1237  bool res = cur_module->get_max(index, &value);
1238  if(!res) break;
1239  char *sym_end = strchr(index, '%');
1240  if(sym_end==NULL) //error
1241  {
1242  fprintf(stderr, "ERROR: Invalid sym and lib name! : %s\naborting...\n", index);
1243  exit(1);
1244  }
1245  strncpy(sym, index, strlen(index)-strlen(sym_end));
1246  strcpy(lib, sym_end+1);
1247  char temp[MAX_SYM_LENGTH];
1248  bzero(temp, MAX_SYM_LENGTH);
1249  strcpy(temp, sym);
1250  strcpy(simple_sym, (func_name(temp)));
1251  if(strrchr(lib, '/')!=NULL && *(strrchr(lib, '/')+1)!='\0')
1252  {
1253  strcpy(simple_lib, strrchr(lib, '/')+1);
1254  }
1255  else
1256  {
1257  strcpy(simple_lib, lib);
1258  }
1259  if(j%2!=0)
1260  {
1261  fprintf(module_file, "<tr bgcolor=\"#FFFFCC\">\n");
1262  }
1263  else
1264  {
1265  fprintf(module_file, "<tr bgcolor=\"#CCFFCC\">\n");
1266  }
1267  fprintf(module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%u</td>\n", value);
1268  fprintf(module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%f%%</td>\n", (((double)(value))/((double)(cur_module->get_total_num_samples())))*100);
1269  html_special_chars(simple_sym, simple_sym_mod);
1270  html_special_chars(simple_lib, simple_lib_mod);
1271  html_special_chars(sym, sym_mod);
1272  html_special_chars(lib, lib_mod);
1273  fprintf(module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_sym_mod);
1274  fprintf(module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_lib_mod);
1275  fprintf(module_file, "<td style=\"font-family:courier;\">%s</td>\n", sym_mod);
1276  fprintf(module_file, "<td style=\"font-family:courier;\">%s</td>\n</tr>\n", lib_mod);
1277  }
1278  fprintf(module_file, "</table><br/><br/>\n");
1279  int res = fclose(module_file);
1280  if(res)
1281  {
1282  fprintf(stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename);
1283  exit(1);
1284  }
1285  return;
1286 }
1287 
1288 // read_S_file()
1289 // const char *dir : directory where sampling results input files are located
1290 // const char *filename : name of the current file to analyse
1291 // reads content of a gzipped sampling result file, finds names of symbols inside libraries using their offsets,
1292 // demangles them to make them human-readable, creates the module objects (with their sampling values),
1293 // and calls the put_module() function to create (or update) the corresponding HTML output file
1294 // returns 0 on success
1295 int read_S_file(const char *dir, const char *filename)
1296 {
1297  char line[MAX_LINE_LENGTH];
1298  char event[MAX_EVENT_NAME_LENGTH];
1299  char arch[MAX_ARCH_NAME_LENGTH];
1300  unsigned int cmask;
1301  unsigned int inv;
1302  unsigned int sp;
1303  char cur_module_name[MAX_MODULE_NAME_LENGTH];
1304  bzero(line, MAX_LINE_LENGTH);
1305  bzero(event, MAX_EVENT_NAME_LENGTH);
1306  bzero(cur_module_name, MAX_MODULE_NAME_LENGTH);
1307  bzero(arch, MAX_ARCH_NAME_LENGTH);
1308 
1309  S_module *cur_module = new S_module();
1310  unsigned int module_num = 0;
1311 
1312  char path_name[MAX_FILENAME_LENGTH];
1313  bzero(path_name, MAX_FILENAME_LENGTH);
1314  strcpy(path_name, dir);
1315  strcat(path_name, "/");
1316  strcat(path_name, filename);
1317  gzFile res_file = gzopen(path_name, "rb");
1318 
1319  if(res_file != NULL)
1320  {
1321  bzero(line, MAX_LINE_LENGTH);
1322  gzgets(res_file, line, MAX_LINE_LENGTH);
1323  if(line[strlen(line)-1]=='\n') line[strlen(line)-1]='\0';
1324  bzero(event, MAX_EVENT_NAME_LENGTH);
1325  sscanf(line, "%s %s %u %u %u", arch, event, &cmask, &inv, &sp);
1326  if(!strcmp(arch, "NHM")) nehalem = true; else nehalem = false;
1327  bzero(line, MAX_LINE_LENGTH);
1328  while(gzgets(res_file, line, MAX_LINE_LENGTH)!=Z_NULL)
1329  {
1330  if(line[strlen(line)-1]=='\n') line[strlen(line)-1]='\0';
1331  if(strchr(line, ' ')==NULL) //module
1332  {
1333  if(module_num>0)
1334  {
1335  put_S_module(cur_module, dir);
1336  cur_module->clear();
1337  }
1338  module_num++;
1339  char *end_sym = strchr(line, '%');
1340  if(end_sym == NULL) //error
1341  {
1342  fprintf(stderr, "ERROR: Invalid module name. \nLINE: %s\naborting...\n", line);
1343  exit(1);
1344  }
1345  bzero(cur_module_name, MAX_MODULE_NAME_LENGTH);
1346  strncpy(cur_module_name, line, strlen(line)-strlen(end_sym));
1347  cur_module->init(cur_module_name, arch, event, cmask, inv, sp);
1348  cur_module->set_total(atoi(end_sym+1));
1349  } //module
1350  else //symbol, libName, libOffset, value
1351  {
1352  unsigned int value=0, libOffset=0;
1353  char symbol[MAX_SYM_LENGTH];
1354  char libName[MAX_LIB_LENGTH];
1355  char final_sym[MAX_SYM_MOD_LENGTH];
1356  char final_lib[MAX_LIB_MOD_LENGTH];
1357  bzero(symbol, MAX_SYM_LENGTH);
1358  bzero(libName, MAX_LIB_LENGTH);
1359  bzero(final_sym, MAX_SYM_MOD_LENGTH);
1360  bzero(final_lib, MAX_LIB_MOD_LENGTH);
1361 
1362  sscanf(line, "%s %s %u %u", symbol, libName, &libOffset, &value);
1363  char realPathName_s[FILENAME_MAX];
1364  bzero(realPathName_s, FILENAME_MAX);
1365  char *realPathName = realpath(libName, realPathName_s);
1366  if(realPathName!=NULL && strlen(realPathName)>0)
1367  {
1369  result = libsInfo.find(realPathName);
1370  if(result == libsInfo.end())
1371  {
1372  libsInfo[realPathName] = FileInfo(realPathName, true);
1373  }
1374  const char *temp_sym = libsInfo[realPathName].symbolByOffset(libOffset);
1375  if(temp_sym!=NULL && strlen(temp_sym)>0)
1376  {
1377  int status;
1378  char *demangled_symbol = abi::__cxa_demangle(temp_sym, NULL, NULL, &status);
1379  if(status == 0)
1380  {
1381  strcpy(final_sym, demangled_symbol);
1382  free(demangled_symbol);
1383  }
1384  else
1385  {
1386  strcpy(final_sym, temp_sym);
1387  }
1388  }
1389  else
1390  {
1391  strcpy(final_sym, "???");
1392  }
1393  strcpy(final_lib, realPathName);
1394  }
1395  else
1396  {
1397  strcpy(final_sym, symbol);
1398  strcpy(final_lib, libName);
1399  }
1400  char index[MAX_LINE_LENGTH];
1401  bzero(index, MAX_LINE_LENGTH);
1402  strcpy(index, final_sym);
1403  strcat(index, "%");
1404  strcat(index, final_lib);
1405  cur_module->add_sample(index, value);
1406  }// symbol, libName, libOffset, value
1407  bzero(line, MAX_LINE_LENGTH);
1408  }// while(gzgets(res_file, line, MAX_LINE_LENGTH)!=Z_NULL)
1409  put_S_module(cur_module, dir);//last module!
1410  cur_module->clear();
1411  gzclose(res_file);
1412  }// if(res_file != NULL)
1413  else
1414  {
1415  fprintf(stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename);
1416  exit(1);
1417  }
1418  delete cur_module; //delete it!
1419  return 0;
1420 }
1421 
1422 int read_S_events(const char *dir, const char *filename)
1423 {
1424  char event[MAX_EVENT_NAME_LENGTH];
1425  char arch[MAX_ARCH_NAME_LENGTH];
1426  char line[MAX_LINE_LENGTH];
1427  char cmask_str[MAX_CMASK_STR_LENGTH];
1428  char inv_str[MAX_INV_STR_LENGTH];
1429  char sp_str[MAX_SP_STR_LENGTH];
1430  bzero(line, MAX_LINE_LENGTH);
1431  bzero(event, MAX_EVENT_NAME_LENGTH);
1432  bzero(arch, MAX_ARCH_NAME_LENGTH);
1433  bzero(cmask_str, MAX_CMASK_STR_LENGTH);
1434  bzero(inv_str, MAX_INV_STR_LENGTH);
1435  bzero(sp_str, MAX_SP_STR_LENGTH);
1436  char path_name[MAX_FILENAME_LENGTH];
1437  bzero(path_name, MAX_FILENAME_LENGTH);
1438  strcpy(path_name, dir);
1439  strcat(path_name, "/");
1440  strcat(path_name, filename);
1441  gzFile res_file = gzopen(path_name, "rb");
1442  if(res_file != NULL)
1443  {
1444  bzero(line, MAX_LINE_LENGTH);
1445  gzgets(res_file, line, MAX_LINE_LENGTH);
1446  if(line[strlen(line)-1]=='\n') line[strlen(line)-1]='\0';
1447  bzero(event, MAX_EVENT_NAME_LENGTH);
1448  sscanf(line, "%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str);
1449  std::string event_str(event);
1450  if(atoi(cmask_str)>0)
1451  {
1452  event_str += " CMASK=";
1453  event_str += cmask_str;
1454  }
1455  if(atoi(inv_str)>0)
1456  {
1457  event_str += " INV=";
1458  event_str += inv_str;
1459  }
1460  S_events.push_back(event_str);
1461  }// if(res_file != NULL)
1462  else
1463  {
1464  fprintf(stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename);
1465  exit(1);
1466  }
1467  return 0;
1468 }
1469 
1470 // finalize_html_pages()
1471 // const char *dir : directory contating sampling result files
1472 // puts footers in module HTML pages and creates index file
1473 int finalize_S_html_pages(const char *dir)
1474 {
1475  for(std::map<std::string, unsigned int>::const_iterator i = modules_tot_samples.begin(); i != modules_tot_samples.end(); i++)
1476  {
1477  char module_filename[MAX_FILENAME_LENGTH];
1478  strcpy(module_filename, dir);
1479  strcat(module_filename, "/HTML/");
1480  strcat(module_filename, (i->first).c_str());
1481  strcat(module_filename, ".html");
1482  FILE *module_file = fopen(module_filename, "a");
1483  if(module_file == NULL)
1484  {
1485  fprintf(stderr, "ERROR: Unable to append to file: %s\naborting...\n", module_filename);
1486  exit(1);
1487  }
1488  fprintf(module_file, "</body>\n</html>\n");
1489  if(fclose(module_file))
1490  {
1491  fprintf(stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename);
1492  exit(1);
1493  }
1494  }
1495  return 0;
1496 }
1497 
1498 // read_file()
1499 // const char *filename : input file to analyse
1500 // analyses the event file and updates the list of modules with counter information found in the file
1501 // returns the number of modules found in the file
1502 int read_C_file(const char *dir, const char *filename)
1503 {
1504  char event[MAX_EVENT_NAME_LENGTH];
1505  char arch[MAX_ARCH_NAME_LENGTH];
1506  char line[MAX_LINE_LENGTH];
1507  char cmask_str[MAX_CMASK_STR_LENGTH];
1508  char inv_str[MAX_INV_STR_LENGTH];
1509  char sp_str[MAX_SP_STR_LENGTH];
1510  char cur_module_name[MAX_MODULE_NAME_LENGTH];
1511  bzero(line, MAX_LINE_LENGTH);
1512  bzero(event, MAX_EVENT_NAME_LENGTH);
1513  bzero(cur_module_name, MAX_MODULE_NAME_LENGTH);
1514  bzero(arch, MAX_ARCH_NAME_LENGTH);
1515  bzero(line, MAX_LINE_LENGTH);
1516  bzero(cmask_str, MAX_CMASK_STR_LENGTH);
1517  bzero(inv_str, MAX_INV_STR_LENGTH);
1518  bzero(sp_str, MAX_SP_STR_LENGTH);
1519  int number_of_modules = 0;
1520  long cur_sum = 0;
1521  int no_of_values = 0;
1522  char path_name[MAX_FILENAME_LENGTH];
1523  bzero(path_name, MAX_FILENAME_LENGTH);
1524  strcpy(path_name, dir);
1525  strcat(path_name, "/");
1526  strcat(path_name, filename);
1527  FILE *fp = fopen(path_name, "r");
1528  int stat = fscanf(fp, "%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str);
1529  if ( stat != 5 ) {
1530  std::cerr << "ERROR: failed to parse " << path_name << std::endl;
1531  exit(1);
1532  }
1533  if(!strcmp(arch, "NHM")) nehalem = true; else nehalem = false;
1534  std::string event_str(event);
1535  if(atoi(cmask_str)>0)
1536  {
1537  event_str += " CMASK=";
1538  event_str += cmask_str;
1539  }
1540  if(atoi(inv_str)>0)
1541  {
1542  event_str += " INV=";
1543  event_str += inv_str;
1544  }
1545  C_events.push_back(event_str);
1546  while(fscanf(fp, "%s\n", line)!=EOF)
1547  {
1548  if(isalpha(line[0])) //module
1549  {
1550  if(number_of_modules>0)
1551  {
1552  C_modules[cur_module_name][event_str]=(double)cur_sum/no_of_values;
1553  cur_sum = 0;
1554  no_of_values = 0;
1555  }
1556  strcpy(cur_module_name, line);
1557  number_of_modules++;
1558  }
1559  else if(isdigit(line[0])) //value
1560  {
1561  cur_sum += strtol(line, NULL, 10);
1562  no_of_values++;
1563  }
1564  }
1565  C_modules[cur_module_name][event_str]=(double)cur_sum/no_of_values; //last module
1566  fclose(fp);
1567  return number_of_modules;
1568 }
1569 
1570 void put_C_header(FILE *fp, std::vector<std::string> &columns)
1571 {
1572  fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n");
1573  fprintf(fp, "<html>\n");
1574  fprintf(fp, "<head>\n");
1575  fprintf(fp, "<title>\n");
1576  fprintf(fp, "Analysis Result\n");
1577  fprintf(fp, "</title>\n");
1578  fprintf(fp, "<script src=\"sorttable.js\"></script>\n");
1579  fprintf(fp, "<style>\ntable.sortable thead {\nbackground-color:#eee;\ncolor:#666666;\nfont-weight:bold;\ncursor:default;\nfont-family:courier;\n}\n</style>\n");
1580  fprintf(fp, "</head>\n");
1581  fprintf(fp, "<body link=\"black\">\n");
1582  fprintf(fp, "<h1>RESULTS:</h1><br/>Click for detailed symbol view...<p/>\n");
1583  fprintf(fp, "<table class=\"sortable\" cellpadding=\"5\">\n");
1584  fprintf(fp, "<tr>\n");
1585  fprintf(fp, "<th>MODULE NAME</th>\n");
1586  for(std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it)
1587  {
1588  if(strlen(it->c_str())==0) fprintf(fp, "<th bgcolor=\"#FFFFFF\">&nbsp;</th>\n");
1589  else fprintf(fp, "<th>%s</th>\n", (*it).c_str());
1590  }
1591  fprintf(fp, "</tr>\n");
1592  return;
1593 }
1594 
1595 void put_C_modules(FILE *fp, std::vector<std::string> &columns)
1596 {
1597  int index = 0;
1598  for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
1599  {
1600  if(index%2) fprintf(fp, "<tr bgcolor=\"#FFFFCC\">\n");
1601  else fprintf(fp, "<tr bgcolor=\"#CCFFCC\">\n");
1602  fprintf(fp, "<td style=\"font-family:monospace;font-size:large;color:Black\"><a href=\"%s.html\">%s</a></td>\n", (it->first).c_str(), (it->first).c_str());
1603  for(std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt)
1604  {
1605  if(strlen(jt->c_str())==0)
1606  {
1607  fprintf(fp, "<td bgcolor=\"#FFFFFF\">&nbsp;</td>");
1608  }
1609  else
1610  {
1611  if((it->second).find(*jt) == (it->second).end())
1612  {
1613  fprintf(stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", (*jt).c_str());
1614  exit(1);
1615  }
1616  fprintf(fp, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\" align=\"right\">%.2f</td>\n", (it->second)[*jt]);
1617  }
1618  }
1619  fprintf(fp, "</tr>\n");
1620  index++;
1621  }
1622 }
1623 
1624 void put_C_footer(FILE *fp)
1625 {
1626  fprintf(fp, "</table>\n</body>\n</html>\n");
1627  return;
1628 }
1629 
1631 {
1632  fprintf(fp, "MODULE NAME");
1633  for(std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it)
1634  {
1635  if(strlen(it->c_str())==0) {}
1636  else fprintf(fp, ",%s", (*it).c_str());
1637  }
1638  fprintf(fp, "\n");
1639  return;
1640 }
1641 
1643 {
1644  for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
1645  {
1646  fprintf(fp, "%s", (it->first).c_str()) ;
1647  for(std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt)
1648  {
1649  if(strlen(jt->c_str())==0) {}
1650  else
1651  {
1652  if((it->second).find(*jt) == (it->second).end())
1653  {
1654  fprintf(stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", (*jt).c_str());
1655  exit(1);
1656  }
1657  fprintf(fp, ",%.2f", (it->second)[*jt]);
1658  }
1659  }
1660  fprintf(fp, "\n");
1661  }
1662 }
1663 
1664 // normalize()
1665 // struct C_module *mod : pointer to the head of the list of modules
1666 // int counter : event selected (see C_module class for which event corresponds to which number)
1667 // int number_of_modules : length of the list
1668 // double value : value to be normalized
1669 // double normalizeTo : value to which the value above should be normalized
1670 // returns the normalized value
1671 double normalize(std::string field, double value, double normalizeTo)
1672 {
1673  double max = 0;
1674  double counter_value;
1675  for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
1676  {
1677  counter_value = (it->second)[field];
1678  if(max < counter_value) max = counter_value;
1679  }
1680  if(value>0 && max>0 && normalizeTo>0)
1681  {
1682  return 1.*value/max*normalizeTo;
1683  }
1684  else return 0;
1685 }
1686 
1687 // calc_post_deriv_values()
1688 // struct C_module *mod : pointer to the head of the list of modules
1689 // double totalCycles : total cycles spent by all the modules
1690 // int number_of_modules : length of the list
1691 // calculates the iFactor of each module
1693 {
1694  if(nehalem)
1695  {
1696  for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
1697  {
1698  double simdnorm = 1. - normalize("Packed % of all UOPS Retired", (it->second)["Packed % of all UOPS Retired"], 1);
1699  double misspnorm = normalize("% of Mispredicted Branches", (it->second)["% of Mispredicted Branches"], 1);
1700  double stallnorm = normalize("Stalled Cycles", (it->second)["Stalled Cycles"], 1);
1701  (it->second)["iFactor"] = stallnorm*(simdnorm + misspnorm + stallnorm);
1702  }
1703  }
1704  else
1705  {
1706  for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
1707  {
1708  double simdnorm = 1. - normalize("Packed SIMD % of all Instructions", (it->second)["Packed SIMD % of all Instructions"], 1);
1709  double misspnorm = normalize("% of Mispredicted Branches", (it->second)["% of Mispredicted Branches"], 1);
1710  double stallnorm = normalize("Stalled Cycles", (it->second)["Stalled Cycles"], 1);
1711  (it->second)["iFactor"] = stallnorm*(simdnorm + misspnorm + stallnorm);
1712  }
1713  }
1714 }
1715 
1716 // getTotalCycles()
1717 // struct C_module *mod : pointer to the head of the list of modules
1718 // int number_of_modules : length of the list
1719 // returns the number of total cycles spent by all the modules
1721 {
1722  double sum=0;
1723  if(nehalem)
1724  {
1725  for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
1726  {
1727  sum += (it->second)["CPU_CLK_UNHALTED:THREAD_P"];
1728  }
1729  }
1730  else
1731  {
1732  for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
1733  {
1734  sum += (it->second)["UNHALTED_CORE_CYCLES"];
1735  }
1736  }
1737  return sum;
1738 }
1739 
1740 // main()
1741 // takes as argument the directory containing results
1742 // and produces the HTML directory inside of it containing browsable statistics
1743 int main(int argc, char *argv[])
1744 {
1745  if(argc<2 || argc>4)
1746  {
1747  printf("\n\nUsage: %s DIRECTORY [--caa] [--csv]\n\n", argv[0]);
1748  exit(1);
1749  }
1750 
1751  bool caa = false;
1752  bool csv = false;
1753  for(int i=2; i<argc; i++)
1754  {
1755  if(!strcmp(argv[i], "--caa")) caa = true;
1756  if(!strcmp(argv[i], "--csv")) csv = true;
1757  }
1758 
1759  char dir[MAX_FILENAME_LENGTH];
1760  strcpy(dir, argv[1]);
1761  if(!csv)
1762  {
1763  strcat(dir, "/HTML");
1764  int res = mkdir(dir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
1765  if(res!=0)
1766  {
1767  fprintf(stderr, "ERROR: Cannot create directory %s\naborting...\n", dir);
1768  exit(1);
1769  }
1770  }
1771 
1772  DIR *dp;
1773  struct dirent *dirp;
1774  int num_of_modules = 0;
1775  if((dp = opendir(argv[1]))==NULL)
1776  {
1777  printf("Error(%d) opening %s\n", errno, argv[1]);
1778  return errno;
1779  }
1780  while((dirp = readdir(dp))!=NULL)
1781  {
1782  if(strstr(dirp->d_name, "_S_")!=NULL && strstr(dirp->d_name, ".txt.gz")!=NULL && !csv)
1783  {
1784  if(read_S_events(argv[1], dirp->d_name))
1785  {
1786  fprintf(stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name);
1787  exit(1);
1788  }
1789  }
1790  }
1791  closedir(dp);
1792  sort(S_events.begin(), S_events.end());
1793  if((dp = opendir(argv[1]))==NULL)
1794  {
1795  printf("Error(%d) opening %s\n", errno, argv[1]);
1796  return errno;
1797  }
1798  while((dirp = readdir(dp))!=NULL)
1799  {
1800  if(strstr(dirp->d_name, "_S_")!=NULL && strstr(dirp->d_name, ".txt.gz")!=NULL && !csv)
1801  {
1802  if(read_S_file(argv[1], dirp->d_name))
1803  {
1804  fprintf(stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name);
1805  exit(1);
1806  }
1807  }
1808  else if(strstr(dirp->d_name, "_C_")!=NULL && strstr(dirp->d_name, ".txt")!=NULL)
1809  {
1810  int res = read_C_file(argv[1], dirp->d_name);
1811  if(res>num_of_modules)
1812  {
1813  num_of_modules = res;
1814  }
1815  }
1816  }
1817  closedir(dp);
1818 
1819  if(!csv)
1820  {
1821  if(finalize_S_html_pages(argv[1]))
1822  {
1823  fprintf(stderr, "ERROR: Cannot finalize HTML pages!!!\naborting...\n");
1824  exit(1);
1825  }
1826  }
1827 
1828  char filepath[MAX_FILENAME_LENGTH];
1829  bzero(filepath, MAX_FILENAME_LENGTH);
1830  if(!csv) sprintf(filepath, "%s/HTML/index.html", argv[1]);
1831  else sprintf(filepath, "%s/results.csv", argv[1]);
1832  FILE *fp = fopen(filepath, "w");
1833  if(fp == NULL)
1834  {
1835  fprintf(stderr, "ERROR: Cannot create file index.html!!!\naborting...\n");
1836  exit(1);
1837  }
1838 
1839  if(caa)
1840  {
1841  double totalCycles;
1842  if(!nehalem)
1843  {
1846  {
1847  fprintf(stderr, "(core) ERROR: One or more events for CAA missing!\naborting...\n");
1848  exit(1);
1849  }
1851  totalCycles = getTotalCycles();
1852  calc_core_deriv_values(totalCycles);
1854  if(!csv)
1855  {
1856  put_C_header(fp, core_caa_events_displ);
1857  put_C_modules(fp, core_caa_events_displ);
1858  }
1859  else
1860  {
1861  put_C_header_csv(fp, core_caa_events_displ);
1862  put_C_modules_csv(fp, core_caa_events_displ);
1863  }
1864  }
1865  else
1866  {
1869  {
1870  fprintf(stderr, "(nehalem) ERROR: One or more events for CAA missing!\naborting...\n");
1871  exit(1);
1872  }
1874  totalCycles = getTotalCycles();
1875  calc_nhm_deriv_values(totalCycles);
1877  if(!csv)
1878  {
1879  put_C_header(fp, nhm_caa_events_displ);
1880  put_C_modules(fp, nhm_caa_events_displ);
1881  }
1882  else
1883  {
1884  put_C_header_csv(fp, nhm_caa_events_displ);
1885  put_C_modules_csv(fp, nhm_caa_events_displ);
1886  }
1887  }
1888  if(!csv) put_C_footer(fp);
1889  fclose(fp);
1890  }
1891  else
1892  {
1893  if(!csv)
1894  {
1895  put_C_header(fp, C_events);
1896  put_C_modules(fp, C_events);
1897  put_C_footer(fp);
1898  }
1899  else
1900  {
1901  put_C_header_csv(fp, C_events);
1902  put_C_modules_csv(fp, C_events);
1903  }
1904  fclose(fp);
1905  }
1906  if(!csv)
1907  {
1908  char src[MAX_FILENAME_LENGTH];
1909  char dst[MAX_FILENAME_LENGTH];
1910  sprintf(src, "sorttable.js");
1911  sprintf(dst, "%s/HTML/sorttable.js", argv[1]);
1912  int fd_src = open(src, O_RDONLY);
1913  if(fd_src == -1)
1914  {
1915  fprintf(stderr, "ERROR: Cannot open file \"%s\"!\naborting...\n", src);
1916  exit(1);
1917  }
1918  int fd_dst = open(dst, O_WRONLY|O_CREAT|O_TRUNC, 0644);
1919  if(fd_dst == -1)
1920  {
1921  fprintf(stderr, "ERROR: Cannot open file \"%s\" (%s)!\naborting...\n", dst, strerror(errno));
1922  exit(1);
1923  }
1924  char c;
1925  while(read(fd_src, &c, 1))
1926  {
1927  if ( write(fd_dst, &c, 1) == -1 ) {
1928  std::cerr << "ERROR: failed to write to " << dst << std::endl;
1929  exit(1);
1930  }
1931  }
1932  close(fd_dst);
1933  close(fd_src);
1934  }
1935  return 0;
1936 }
std::istringstream * iss
std::string module_name
#define MAX_EVENT_NAME_LENGTH
std::map< std::string, unsigned int > samples
unsigned int sp
unsigned int get_inv_mask()
#define I7_L3_UNSHARED_HIT_CYCLES
T empty(T...args)
const char * func_name(const char *demangled_symbol)
const char * symbolByOffset(Offset offset)
bool skipString(const char *strptr, const char *srcbuffer, const char **dstbuffer)
#define MAX_SAMPLE_INDEX_LENGTH
#define CORE_UNKNOWN_ADDR_STORE_CYCLES
#define CORE_LCP_STALL_CYCLES
#define MAX_LIB_LENGTH
void put_C_footer(FILE *fp)
void put_S_module(S_module *cur_module, const char *dir)
double sum(double x, double y, double z)
#define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES
void init_core_caa_events_displ()
void put_C_modules(FILE *fp, std::vector< std::string > &columns)
list argv
Definition: gaudirun.py:227
std::string event
T getline(T...args)
bool get_max(char *index, unsigned int *value)
unsigned int cmask
T endl(T...args)
void init_core_caa_events()
#define MAX_SYM_LENGTH
def read(f, regex='.*', skipevents=0)
Definition: hivetimeline.py:19
#define I7_IFETCH_L2_MISS_L3_HITM
int read_S_file(const char *dir, const char *filename)
std::vector< CacheItem > SymbolCache
#define EXPECTED_CPI
#define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD
bool check_for_nhm_caa_events()
T end(T...args)
unsigned int get_smpl_period()
std::istringstream & output(void)
#define MAX_ARCH_NAME_LENGTH
void add_sample(const char *index, unsigned int value)
#define I7_OTHER_CORE_L2_HITM_CYCLES
#define MAX_LINE_LENGTH
double normalize(std::string field, double value, double normalizeTo)
#define CORE_L1_DTLB_MISS_CYCLES
FileInfo(const std::string &name, bool useGdb)
std::string arch
#define CORE_OVERLAPPING_CYCLES
void calc_post_deriv_values()
void calc_nhm_deriv_values(double totalCycles)
std::string NAME
#define MAX_SIMPLE_SYM_LENGTH
void init_nhm_caa_events_displ()
auto begin(reverse_wrapper< T > &w)
Definition: reverse.h:48
#define MAX_LIB_MOD_LENGTH
int finalize_S_html_pages(const char *dir)
STL class.
void put_C_header(FILE *fp, std::vector< std::string > &columns)
#define MAX_SIMPLE_LIB_MOD_LENGTH
void init_nhm_caa_events()
T push_back(T...args)
#define I7_L1_ITLB_WALK_COMPLETED_CYCLES
void calc_core_deriv_values(double totalCycles)
#define MAX_SP_STR_LENGTH
Offset next(Offset offset)
#define I7_L2_HIT_CYCLES
void init(const char *name, const char *architecture, const char *event_name, unsigned int c_mask, unsigned int inv_mask, unsigned int smpl_period)
#define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES
unsigned int total_num_samples
void set_total(unsigned int total)
#define I7_OTHER_CORE_L2_HIT_CYCLES
void html_special_chars(const char *s, char *s_mod)
std::string get_arch()
#define MAX_CMASK_STR_LENGTH
#define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES
T erase(T...args)
#define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES
int read_S_events(const char *dir, const char *filename)
auto end(reverse_wrapper< T > &w)
Definition: reverse.h:50
#define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP
#define I7_L1_DTLB_WALK_COMPLETED_CYCLES
T clear(T...args)
unsigned int inv
bool operator()(const int &a, const CacheItem &b) const
#define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT
#define MAX_FILENAME_LENGTH
#define CORE_L2_MISS_CYCLES
constexpr double nm
Definition: SystemOfUnits.h:82
T insert(T...args)
void put_C_header_csv(FILE *fp, std::vector< std::string > &columns)
#define PIPE_BUFFER_LENGTH
T find(T...args)
T size(T...args)
void createOffsetMap(void)
#define MAX_SIMPLE_SYM_MOD_LENGTH
T begin(T...args)
SymbolCache m_symbolCache
#define MAX_INV_STR_LENGTH
bool check_for_core_caa_events()
T c_str(T...args)
#define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT
T back(T...args)
string s
Definition: gaudirun.py:245
std::string get_event()
double getTotalCycles()
PipeReader(const char *cmd)
bool operator()(const CacheItem &a, const int &b) const
void put_C_modules_csv(FILE *fp, std::vector< std::string > &columns)
unsigned int get_total_num_samples()
#define MAX_MODULE_NAME_LENGTH
#define MAX_SIMPLE_LIB_LENGTH
bool skipWhitespaces(const char *srcbuffer, const char **destbuffer)
#define I7_IFETCH_L2_MISS_L3_HIT_SNOOP
int read_C_file(const char *dir, const char *filename)
int main(int argc, char *argv[])
std::string get_module_name()
#define CORE_L2_HIT_CYCLES
#define MAX_SYM_MOD_LENGTH
unsigned int get_c_mask()