The Gaudi Framework  v30r3 (a5ef0a68)
pfm_gen_analysis.cpp
Go to the documentation of this file.
1 /*
2 Name: pfm_analysis.cpp
3 Author: Daniele Francesco Kruse
4 E-mail: daniele.francesco.kruse@cern.ch
5 Version: 0.9 (16/02/2010)
6 
7 This code is responsible for analysing results generated by the PerfmonService of CMSSW.
8 It takes 42 files as input (21 simple text files and 21 gzipped text files) and
9 produces a HTML directory containing the results of the analysis (both counting and sampling).
10 
11 compile linking zlib: g++ -Wall -lz pfm_analysis.cpp
12 */
13 
14 #include <ctype.h>
15 #include <cxxabi.h>
16 #include <fcntl.h>
17 #include <math.h>
18 #include <stdint.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <unistd.h>
25 #include <zlib.h>
26 
27 #include <algorithm>
28 #include <iostream>
29 #include <list>
30 #include <map>
31 #include <sstream>
32 #include <string>
33 #include <vector>
34 
35 #include <dirent.h>
36 #include <errno.h>
37 
38 // Core
39 #define CORE_L2_MISS_CYCLES 200
40 #define CORE_L2_HIT_CYCLES 14.5
41 #define CORE_L1_DTLB_MISS_CYCLES 10
42 #define CORE_LCP_STALL_CYCLES 6
43 #define CORE_UNKNOWN_ADDR_STORE_CYCLES 5
44 #define CORE_OVERLAPPING_CYCLES 6
45 #define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES 20
46 
47 // Nehalem
48 #define I7_L1_DTLB_WALK_COMPLETED_CYCLES 35
49 #define I7_L1_ITLB_WALK_COMPLETED_CYCLES 35
50 #define I7_L2_HIT_CYCLES 6
51 #define I7_L3_UNSHARED_HIT_CYCLES 35
52 #define I7_OTHER_CORE_L2_HIT_CYCLES 60
53 #define I7_OTHER_CORE_L2_HITM_CYCLES 75
54 #define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES 225 // average of 200 (not modified) and 225-250 (modified)
55 #define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES 360 // average of 350 (not modified) and 370 (modified)
56 #define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES 180
57 #define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT 200
58 #define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT 350
59 #define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP 35
60 #define I7_IFETCH_L2_MISS_L3_HIT_SNOOP 60
61 #define I7_IFETCH_L2_MISS_L3_HITM 75
62 #define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD 180
63 
64 #define MAX_MODULES 1000
65 
66 #define EXPECTED_CPI 0.25
67 
68 #define MAX_FILENAME_LENGTH 1024
69 #define MAX_SAMPLE_INDEX_LENGTH 10000
70 #define MAX_SYM_LENGTH 15000
71 #define MAX_SYM_MOD_LENGTH 20000
72 #define MAX_LIB_LENGTH 5000
73 #define MAX_LIB_MOD_LENGTH 7000
74 #define MAX_SIMPLE_SYM_LENGTH 300
75 #define MAX_SIMPLE_SYM_MOD_LENGTH 500
76 #define MAX_SIMPLE_LIB_LENGTH 300
77 #define MAX_SIMPLE_LIB_MOD_LENGTH 500
78 #define MAX_LINE_LENGTH 20000
79 #define MAX_EVENT_NAME_LENGTH 150
80 #define MAX_MODULE_NAME_LENGTH 250
81 #define MAX_VALUE_STRING_LENGTH 250
82 #define MAX_ARCH_NAME_LENGTH 20
83 #define MAX_CMASK_STR_LENGTH 5
84 #define MAX_INV_STR_LENGTH 5
85 #define MAX_SP_STR_LENGTH 50
86 
87 #define PIPE_BUFFER_LENGTH 1000
88 
90 {
91 public:
92  PipeReader( const char* cmd )
93  {
94  pipe = popen( cmd, "r" );
95  if ( !pipe ) {
96  printf( "Cannot open pipe. Exiting...\n" );
97  exit( 1 );
98  }
99  char buffer[PIPE_BUFFER_LENGTH];
100  bzero( buffer, PIPE_BUFFER_LENGTH );
101  std::string result = "";
102  while ( !feof( pipe ) ) {
103  if ( fgets( buffer, PIPE_BUFFER_LENGTH, pipe ) != NULL ) {
104  result += buffer;
105  }
106  bzero( buffer, PIPE_BUFFER_LENGTH );
107  }
108  iss = new std::istringstream( result, std::istringstream::in );
109  }
110 
111  ~PipeReader( void )
112  {
113  pclose( pipe );
114  delete iss;
115  }
116 
117  std::istringstream& output( void ) { return *iss; }
118 
119 private:
120  FILE* pipe;
122 };
123 
124 // skipWhitespaces()
125 // const char *srcbuffer : source string
126 // const char **dstbuffer : destination string
127 // Skips white spaces
128 bool skipWhitespaces( const char* srcbuffer, const char** destbuffer )
129 {
130  if ( !isspace( *srcbuffer++ ) ) {
131  return false;
132  }
133  while ( isspace( *srcbuffer ) ) {
134  srcbuffer++;
135  }
136  *destbuffer = srcbuffer;
137  return true;
138 }
139 
140 // skipString()
141 // const char *strptr : substring to skip
142 // const char *srcbuffer : source string
143 // const char **dstbuffer : destination string
144 // Skips strings of the form '\\s+strptr\\s+' starting from buffer.
145 // Returns a pointer to the first char which does not match the above regexp,
146 // or 0 in case the regexp is not matched.
147 bool skipString( const char* strptr, const char* srcbuffer, const char** dstbuffer )
148 {
149  if ( strncmp( srcbuffer, strptr, strlen( strptr ) ) ) {
150  return false;
151  }
152  *dstbuffer = srcbuffer + strlen( strptr );
153  return true;
154 }
155 
156 class FileInfo
157 {
158 public:
159  typedef int Offset;
161  FileInfo( void ) : NAME( "<dynamically generated>" ) {}
162  FileInfo( const std::string& name, bool useGdb ) : NAME( name )
163  {
164  if ( useGdb ) {
165  this->createOffsetMap();
166  }
167  }
168 
169  const char* symbolByOffset( Offset offset )
170  {
171  if ( m_symbolCache.empty() ) {
172  return 0;
173  }
174 
175  SymbolCache::iterator i = lower_bound( m_symbolCache.begin(), m_symbolCache.end(), offset, CacheItemComparator() );
176  if ( i->OFFSET == offset ) {
177  return i->NAME.c_str();
178  }
179 
180  if ( i == m_symbolCache.begin() ) {
181  return m_symbolCache.begin()->NAME.c_str();
182  }
183 
184  --i;
185 
186  return i->NAME.c_str();
187  }
188 
189  Offset next( Offset offset )
190  {
191  SymbolCache::iterator i = upper_bound( m_symbolCache.begin(), m_symbolCache.end(), offset, CacheItemComparator() );
192  if ( i == m_symbolCache.end() ) {
193  return 0;
194  }
195  return i->OFFSET;
196  }
197 
198 private:
199  struct CacheItem {
200  CacheItem( Offset offset, const std::string& name ) : OFFSET( offset ), NAME( name ){};
201  Offset OFFSET;
203  };
204 
206  SymbolCache m_symbolCache;
207 
209  bool operator()( const CacheItem& a, const int& b ) const { return a.OFFSET < b; }
210  bool operator()( const int& a, const CacheItem& b ) const { return a < b.OFFSET; }
211  };
212 
213  void createOffsetMap( void )
214  {
215  std::string commandLine = "objdump -p " + NAME;
216  PipeReader objdump( commandLine.c_str() );
217  std::string oldname;
218  std::string suffix;
219  int vmbase = 0;
220  bool matched = false;
221  while ( objdump.output() ) {
222  // Checks the following regexp
223  //
224  // LOAD\\s+off\\s+(0x[0-9A-Fa-f]+)\\s+vaddr\\s+(0x[0-9A-Fa-f]+)
225  //
226  // and sets vmbase to be $2 - $1 of the first matched entry.
227 
229  std::getline( objdump.output(), line );
230 
231  if ( !objdump.output() ) break;
232  if ( line.empty() ) continue;
233  const char* lineptr = line.c_str();
234  if ( !skipWhitespaces( lineptr, &lineptr ) ) continue;
235  if ( !skipString( "LOAD", lineptr, &lineptr ) ) continue;
236  if ( !skipWhitespaces( lineptr, &lineptr ) ) continue;
237  if ( !skipString( "off", lineptr, &lineptr ) ) continue;
238  char* endptr = 0;
239  int initialBase = strtol( lineptr, &endptr, 16 );
240  if ( lineptr == endptr ) continue;
241  lineptr = endptr;
242  if ( !skipWhitespaces( lineptr, &lineptr ) ) continue;
243  if ( !skipString( "vaddr", lineptr, &lineptr ) ) continue;
244  if ( !skipWhitespaces( lineptr, &lineptr ) ) continue;
245  int finalBase = strtol( lineptr, &endptr, 16 );
246  if ( lineptr == endptr ) continue;
247  vmbase = finalBase - initialBase;
248  matched = true;
249  break;
250  }
251  if ( !matched ) {
252  fprintf( stderr, "Cannot determine VM base address for %s\n", NAME.c_str() );
253  fprintf( stderr, "Error while running `objdump -p %s`\n", NAME.c_str() );
254  exit( 1 );
255  }
256  std::string commandLine2 = "nm -t d -n " + NAME;
257  PipeReader nm( commandLine2.c_str() );
258  while ( nm.output() ) {
260  std::getline( nm.output(), line );
261  if ( !nm.output() ) break;
262  if ( line.empty() ) continue;
263  // If line does not match "^(\\d+)[ ]\\S[ ](\S+)$", exit.
264  const char* begin = line.c_str();
265  char* endptr = 0;
266  int address = strtol( begin, &endptr, 10 );
267  if ( endptr == begin ) continue;
268  if ( *endptr++ != ' ' ) continue;
269  if ( isspace( *endptr++ ) ) continue;
270  if ( *endptr++ != ' ' ) continue;
271  char* symbolName = endptr;
272  while ( *endptr && !isspace( *endptr ) ) endptr++;
273  if ( *endptr != 0 ) continue;
274  // If line starts with '.' forget about it.
275  if ( symbolName[0] == '.' ) continue;
276  // Create a new symbol with the given fileoffset.
277  // The symbol is automatically saved in the FileInfo cache by offset.
278  // If a symbol with the same offset is already there, the new one
279  // replaces the old one.
280  int offset = address - vmbase;
281  if ( m_symbolCache.size() && ( m_symbolCache.back().OFFSET == offset ) )
282  m_symbolCache.back().NAME = symbolName;
283  else
284  m_symbolCache.push_back( CacheItem( address - vmbase, symbolName ) );
285  }
286  }
287 };
288 
289 static std::map<std::string, unsigned int> modules_tot_samples;
290 static std::map<std::string, FileInfo> libsInfo;
291 static int nehalem;
292 
294 static std::vector<std::string> C_events;
295 static std::vector<std::string> S_events;
296 
297 static std::vector<std::string> core_caa_events;
298 static std::vector<std::string> nhm_caa_events;
299 static std::vector<std::string> core_caa_events_displ;
300 static std::vector<std::string> nhm_caa_events_displ;
301 
303 {
304  core_caa_events.push_back( "BRANCH_INSTRUCTIONS_RETIRED" );
305  core_caa_events.push_back( "ILD_STALL" );
306  core_caa_events.push_back( "INST_RETIRED:LOADS" );
307  core_caa_events.push_back( "INST_RETIRED:OTHER" );
308  core_caa_events.push_back( "INST_RETIRED:STORES" );
309  core_caa_events.push_back( "INSTRUCTIONS_RETIRED" );
310  core_caa_events.push_back( "LOAD_BLOCK:OVERLAP_STORE" );
311  core_caa_events.push_back( "LOAD_BLOCK:STA" );
312  core_caa_events.push_back( "LOAD_BLOCK:UNTIL_RETIRE" );
313  core_caa_events.push_back( "MEM_LOAD_RETIRED:DTLB_MISS" );
314  core_caa_events.push_back( "MEM_LOAD_RETIRED:L1D_LINE_MISS" );
315  core_caa_events.push_back( "MEM_LOAD_RETIRED:L2_LINE_MISS" );
316  core_caa_events.push_back( "MISPREDICTED_BRANCH_RETIRED" );
317  // core_caa_events.push_back("RS_UOPS_DISPATCHED");
318  // core_caa_events.push_back("RS_UOPS_DISPATCHED CMASK=1");
319  core_caa_events.push_back( "RS_UOPS_DISPATCHED CMASK=1 INV=1" );
320  core_caa_events.push_back( "SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE" );
321  core_caa_events.push_back( "UNHALTED_CORE_CYCLES" );
322  // core_caa_events.push_back("UOPS_RETIRED:ANY");
323  // core_caa_events.push_back("UOPS_RETIRED:FUSED");
324  // core_caa_events.push_back("IDLE_DURING_DIV");
325 }
326 
328 {
329  nhm_caa_events.push_back( "ARITH:CYCLES_DIV_BUSY" );
330  nhm_caa_events.push_back( "BR_INST_EXEC:ANY" );
331  nhm_caa_events.push_back( "BR_INST_EXEC:DIRECT_NEAR_CALL" );
332  nhm_caa_events.push_back( "BR_INST_EXEC:INDIRECT_NEAR_CALL" );
333  nhm_caa_events.push_back( "BR_INST_EXEC:INDIRECT_NON_CALL" );
334  nhm_caa_events.push_back( "BR_INST_EXEC:NEAR_CALLS" );
335  nhm_caa_events.push_back( "BR_INST_EXEC:NON_CALLS" );
336  nhm_caa_events.push_back( "BR_INST_EXEC:RETURN_NEAR" );
337  nhm_caa_events.push_back( "BR_INST_RETIRED:ALL_BRANCHES" );
338  nhm_caa_events.push_back( "BR_INST_RETIRED:CONDITIONAL" );
339  nhm_caa_events.push_back( "BR_INST_RETIRED:NEAR_CALL" );
340  nhm_caa_events.push_back( "BR_MISP_EXEC:ANY" );
341  nhm_caa_events.push_back( "CPU_CLK_UNHALTED:THREAD_P" );
342  nhm_caa_events.push_back( "DTLB_LOAD_MISSES:WALK_COMPLETED" );
343  nhm_caa_events.push_back( "INST_RETIRED:ANY_P" );
344  nhm_caa_events.push_back( "ITLB_MISSES:WALK_COMPLETED" );
345  nhm_caa_events.push_back( "L2_RQSTS:IFETCH_HIT" );
346  nhm_caa_events.push_back( "L2_RQSTS:IFETCH_MISS" );
347  nhm_caa_events.push_back( "MEM_INST_RETIRED:LOADS" );
348  nhm_caa_events.push_back( "MEM_INST_RETIRED:STORES" );
349  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L2_HIT" );
350  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L3_MISS" );
351  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L3_UNSHARED_HIT" );
352  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM" );
353  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:LOCAL_DRAM" );
354  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM" );
355  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT" );
356  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:REMOTE_DRAM" );
357  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM" );
358  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM" );
359  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP" );
360  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD" );
361  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM" );
362  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT" );
363  nhm_caa_events.push_back( "RESOURCE_STALLS:ANY" );
364  nhm_caa_events.push_back( "SSEX_UOPS_RETIRED:PACKED_DOUBLE" );
365  nhm_caa_events.push_back( "SSEX_UOPS_RETIRED:PACKED_SINGLE" );
366  nhm_caa_events.push_back( "UOPS_DECODED:MS CMASK=1" );
367  nhm_caa_events.push_back( "UOPS_ISSUED:ANY CMASK=1 INV=1" );
368  nhm_caa_events.push_back( "ITLB_MISS_RETIRED" );
369  nhm_caa_events.push_back( "UOPS_RETIRED:ANY" );
370 }
371 
373 {
374  for ( std::vector<std::string>::const_iterator it = core_caa_events.begin(); it != core_caa_events.end(); ++it ) {
375  if ( find( C_events.begin(), C_events.end(), ( *it ) ) == C_events.end() ) {
376  fprintf( stderr, "ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
377  return false;
378  }
379  }
380  return true;
381 }
382 
384 {
385  for ( std::vector<std::string>::const_iterator it = nhm_caa_events.begin(); it != nhm_caa_events.end(); ++it ) {
386  if ( find( C_events.begin(), C_events.end(), ( *it ) ) == C_events.end() ) {
387  fprintf( stderr, "ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
388  return false;
389  }
390  }
391  return true;
392 }
393 
395 {
396  core_caa_events_displ.push_back( "Total Cycles" );
397  core_caa_events_displ.push_back( "Stalled Cycles" );
398  core_caa_events_displ.push_back( "% of Total Cycles" );
399  core_caa_events_displ.push_back( "Instructions Retired" );
400  core_caa_events_displ.push_back( "CPI" );
401  core_caa_events_displ.push_back( "" );
402  core_caa_events_displ.push_back( "iMargin" );
403  core_caa_events_displ.push_back( "iFactor" );
404  core_caa_events_displ.push_back( "" );
405  core_caa_events_displ.push_back( "Counted Stalled Cycles" );
406  core_caa_events_displ.push_back( "" );
407  core_caa_events_displ.push_back( "L2 Miss Impact" );
408  core_caa_events_displ.push_back( "L2 Miss % of counted Stalled Cycles" );
409  core_caa_events_displ.push_back( "" );
410  core_caa_events_displ.push_back( "L2 Hit Impact" );
411  core_caa_events_displ.push_back( "L2 Hit % of counted Stalled Cycles" );
412  core_caa_events_displ.push_back( "" );
413  core_caa_events_displ.push_back( "L1 DTLB Miss Impact" );
414  core_caa_events_displ.push_back( "L1 DTLB Miss % of counted Stalled Cycles" );
415  core_caa_events_displ.push_back( "" );
416  core_caa_events_displ.push_back( "LCP Stalls Impact" );
417  core_caa_events_displ.push_back( "LCP Stalls % of counted Stalled Cycles" );
418  core_caa_events_displ.push_back( "" );
419  core_caa_events_displ.push_back( "Store-Fwd Stalls Impact" );
420  core_caa_events_displ.push_back( "Store-Fwd Stalls % of counted Stalled Cycles" );
421  core_caa_events_displ.push_back( "" );
422  core_caa_events_displ.push_back( "Loads Blocked by Unknown Address Store Impact" );
423  core_caa_events_displ.push_back( "Loads Blocked % of Store-Fwd Stalls Cycles" );
424  core_caa_events_displ.push_back( "Loads Overlapped with Stores Impact" );
425  core_caa_events_displ.push_back( "Loads Overlapped % of Store-Fwd Stalls Cycles" );
426  core_caa_events_displ.push_back( "Loads Spanning across Cache Lines Impact" );
427  core_caa_events_displ.push_back( "Loads Spanning % of Store-Fwd Stalls Cycles" );
428  core_caa_events_displ.push_back( "" );
429  core_caa_events_displ.push_back( "Load Instructions" );
430  core_caa_events_displ.push_back( "Load % of all Instructions" );
431  core_caa_events_displ.push_back( "Store Instructions" );
432  core_caa_events_displ.push_back( "Store % of all Instructions" );
433  core_caa_events_displ.push_back( "Branch Instructions" );
434  core_caa_events_displ.push_back( "Branch % of all Instructions" );
435  core_caa_events_displ.push_back( "Packed SIMD Computational Instructions" );
436  core_caa_events_displ.push_back( "Packed SIMD % of all Instructions" );
437  core_caa_events_displ.push_back( "Other Instructions" );
438  core_caa_events_displ.push_back( "Other % of all Instructions" );
439  core_caa_events_displ.push_back( "" );
440  core_caa_events_displ.push_back( "ITLB Miss Rate in %" );
441  core_caa_events_displ.push_back( "% of Mispredicted Branches" );
442 }
443 
444 void calc_core_deriv_values( double totalCycles )
445 {
446  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
447  ++it ) {
448  ( it->second )["Total Cycles"] = ( it->second )["UNHALTED_CORE_CYCLES"];
449  ( it->second )["Stalled Cycles"] = ( it->second )["RS_UOPS_DISPATCHED CMASK=1 INV=1"];
450  ( it->second )["L2 Miss Impact"] = ( it->second )["MEM_LOAD_RETIRED:L2_LINE_MISS"] * CORE_L2_MISS_CYCLES;
451  ( it->second )["L2 Hit Impact"] =
452  ( ( it->second )["MEM_LOAD_RETIRED:L1D_LINE_MISS"] - ( it->second )["MEM_LOAD_RETIRED:L2_LINE_MISS"] ) *
454  ( it->second )["L1 DTLB Miss Impact"] = ( it->second )["MEM_LOAD_RETIRED:DTLB_MISS"] * CORE_L1_DTLB_MISS_CYCLES;
455  ( it->second )["LCP Stalls Impact"] = ( it->second )["ILD_STALL"] * CORE_LCP_STALL_CYCLES;
456  ( it->second )["Loads Blocked by Unknown Address Store Impact"] =
457  ( it->second )["LOAD_BLOCK:STA"] * CORE_UNKNOWN_ADDR_STORE_CYCLES;
458  ( it->second )["Loads Overlapped with Stores Impact"] =
459  ( it->second )["LOAD_BLOCK:OVERLAP_STORE"] * CORE_OVERLAPPING_CYCLES;
460  ( it->second )["Loads Spanning across Cache Lines Impact"] =
461  ( it->second )["LOAD_BLOCK:UNTIL_RETIRE"] * CORE_SPAN_ACROSS_CACHE_LINE_CYCLES;
462  ( it->second )["Store-Fwd Stalls Impact"] = ( it->second )["Loads Blocked by Unknown Address Store Impact"] +
463  ( it->second )["Loads Overlapped with Stores Impact"] +
464  ( it->second )["Loads Spanning across Cache Lines Impact"];
465  ( it->second )["Counted Stalled Cycles"] =
466  ( it->second )["L2 Miss Impact"] + ( it->second )["L2 Hit Impact"] + ( it->second )["LCP Stalls Impact"] +
467  ( it->second )["L1 DTLB Miss Impact"] + ( it->second )["Store-Fwd Stalls Impact"];
468  ( it->second )["Instructions Retired"] = ( it->second )["INSTRUCTIONS_RETIRED"];
469  ( it->second )["ITLB Miss Rate in %"] =
470  ( ( it->second )["ITLB_MISS_RETIRED"] / ( it->second )["INSTRUCTIONS_RETIRED"] ) * 100;
471  ( it->second )["Branch Instructions"] = ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"];
472  ( it->second )["Load Instructions"] = ( it->second )["INST_RETIRED:LOADS"];
473  ( it->second )["Store Instructions"] = ( it->second )["INST_RETIRED:STORES"];
474  ( it->second )["Other Instructions"] = ( it->second )["INST_RETIRED:OTHER"] -
475  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] -
476  ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"];
477  ( it->second )["% of Mispredicted Branches"] =
478  ( ( it->second )["MISPREDICTED_BRANCH_RETIRED"] / ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"] ) * 100;
479  ( it->second )["Packed SIMD Computational Instructions"] =
480  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"];
481  ( it->second )["Counted Instructions Retired"] =
482  ( it->second )["Branch Instructions"] + ( it->second )["Load Instructions"] +
483  ( it->second )["Store Instructions"] + ( it->second )["Other Instructions"] +
484  ( it->second )["Packed SIMD Computational Instructions"];
485  ( it->second )["CPI"] = ( it->second )["UNHALTED_CORE_CYCLES"] / ( it->second )["INSTRUCTIONS_RETIRED"];
486 
487  double localPerformanceImprovement = ( it->second )["CPI"] / EXPECTED_CPI;
488  double cyclesAfterImprovement = ( it->second )["UNHALTED_CORE_CYCLES"] / localPerformanceImprovement;
489  double totalCyclesAfterImprovement = totalCycles - ( it->second )["UNHALTED_CORE_CYCLES"] + cyclesAfterImprovement;
490  ( it->second )["iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
491 
492  ( it->second )["% of Total Cycles"] =
493  ( it->second )["RS_UOPS_DISPATCHED CMASK=1 INV=1"] * 100 / ( it->second )["UNHALTED_CORE_CYCLES"];
494  ( it->second )["L2 Miss % of counted Stalled Cycles"] =
495  ( it->second )["L2 Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
496  ( it->second )["L2 Hit % of counted Stalled Cycles"] =
497  ( it->second )["L2 Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
498  ( it->second )["L1 DTLB Miss % of counted Stalled Cycles"] =
499  ( it->second )["L1 DTLB Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
500  ( it->second )["LCP Stalls % of counted Stalled Cycles"] =
501  ( it->second )["LCP Stalls Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
502  ( it->second )["Store-Fwd Stalls % of counted Stalled Cycles"] =
503  ( it->second )["Store-Fwd Stalls Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
504  ( it->second )["Loads Blocked % of Store-Fwd Stalls Cycles"] =
505  ( it->second )["Loads Blocked by Unknown Address Store Impact"] * 100 /
506  ( it->second )["Store-Fwd Stalls Impact"];
507  ( it->second )["Loads Overlapped % of Store-Fwd Stalls Cycles"] =
508  ( it->second )["Loads Overlapped with Stores Impact"] * 100 / ( it->second )["Store-Fwd Stalls Impact"];
509  ( it->second )["Loads Spanning % of Store-Fwd Stalls Cycles"] =
510  ( it->second )["Loads Spanning across Cache Lines Impact"] * 100 / ( it->second )["Store-Fwd Stalls Impact"];
511 
512  ( it->second )["Load % of all Instructions"] =
513  ( it->second )["INST_RETIRED:LOADS"] * 100 / ( it->second )["Counted Instructions Retired"];
514  ( it->second )["Store % of all Instructions"] =
515  ( it->second )["INST_RETIRED:STORES"] * 100 / ( it->second )["Counted Instructions Retired"];
516  ( it->second )["Branch % of all Instructions"] =
517  ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"] * 100 / ( it->second )["Counted Instructions Retired"];
518  ( it->second )["Packed SIMD % of all Instructions"] =
519  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] * 100 /
520  ( it->second )["Counted Instructions Retired"];
521  ( it->second )["Other % of all Instructions"] =
522  ( it->second )["Other Instructions"] * 100 / ( it->second )["Counted Instructions Retired"];
523  }
524 }
525 
527 {
528  nhm_caa_events_displ.push_back( "Total Cycles" );
529  nhm_caa_events_displ.push_back( "Instructions Retired" );
530  nhm_caa_events_displ.push_back( "CPI" );
531  nhm_caa_events_displ.push_back( "" );
532  nhm_caa_events_displ.push_back( "iMargin" );
533  nhm_caa_events_displ.push_back( "iFactor" );
534  nhm_caa_events_displ.push_back( "" );
535  nhm_caa_events_displ.push_back( "Stalled Cycles" );
536  nhm_caa_events_displ.push_back( "% of Total Cycles" );
537  nhm_caa_events_displ.push_back( "Total Counted Stalled Cycles" );
538  nhm_caa_events_displ.push_back( "" );
539  nhm_caa_events_displ.push_back( "Instruction Starvation % of Total Cycles" );
540  nhm_caa_events_displ.push_back( "# of Instructions per Call" );
541  nhm_caa_events_displ.push_back( "% of Total Cycles spent handling FP exceptions" );
542  nhm_caa_events_displ.push_back( "" );
543  nhm_caa_events_displ.push_back( "Counted Stalled Cycles due to Load Ops" );
544  nhm_caa_events_displ.push_back( "" );
545  nhm_caa_events_displ.push_back( "L2 Hit Impact" );
546  nhm_caa_events_displ.push_back( "L2 Hit % of Load Stalls" );
547  nhm_caa_events_displ.push_back( "" );
548  nhm_caa_events_displ.push_back( "L3 Unshared Hit Impact" );
549  nhm_caa_events_displ.push_back( "L3 Unshared Hit % of Load Stalls" );
550  nhm_caa_events_displ.push_back( "" );
551  nhm_caa_events_displ.push_back( "L2 Other Core Hit Impact" );
552  nhm_caa_events_displ.push_back( "L2 Other Core Hit % of Load Stalls" );
553  nhm_caa_events_displ.push_back( "" );
554  nhm_caa_events_displ.push_back( "L2 Other Core Hit Modified Impact" );
555  nhm_caa_events_displ.push_back( "L2 Other Core Hit Modified % of Load Stalls" );
556  nhm_caa_events_displ.push_back( "" );
557  nhm_caa_events_displ.push_back( "L3 Miss -> Local DRAM Hit Impact" );
558  nhm_caa_events_displ.push_back( "L3 Miss -> Remote DRAM Hit Impact" );
559  nhm_caa_events_displ.push_back( "L3 Miss -> Remote Cache Hit Impact" );
560  nhm_caa_events_displ.push_back( "L3 Miss -> Total Impact" );
561  nhm_caa_events_displ.push_back( "L3 Miss % of Load Stalls" );
562  nhm_caa_events_displ.push_back( "" );
563  nhm_caa_events_displ.push_back( "L1 DTLB Miss Impact" );
564  nhm_caa_events_displ.push_back( "L1 DTLB Miss % of Load Stalls" );
565  nhm_caa_events_displ.push_back( "" );
566  nhm_caa_events_displ.push_back( "Cycles spent during DIV & SQRT Ops" );
567  nhm_caa_events_displ.push_back( "DIV & SQRT Ops % of counted Stalled Cycles" );
568  nhm_caa_events_displ.push_back( "" );
569  nhm_caa_events_displ.push_back( "Total L2 IFETCH misses" );
570  nhm_caa_events_displ.push_back( "% of L2 IFETCH misses" );
571  nhm_caa_events_displ.push_back( "" );
572  nhm_caa_events_displ.push_back( "% of IFETCHes served by Local DRAM" );
573  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (Modified)" );
574  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (Clean Snoop)" );
575  nhm_caa_events_displ.push_back( "% of IFETCHes served by Remote L2" );
576  nhm_caa_events_displ.push_back( "% of IFETCHes served by Remote DRAM" );
577  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (No Snoop)" );
578  nhm_caa_events_displ.push_back( "" );
579  nhm_caa_events_displ.push_back( "Total L2 IFETCH miss Impact" );
580  nhm_caa_events_displ.push_back( "" );
581  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Local DRAM" );
582  nhm_caa_events_displ.push_back( "Local DRAM IFECTHes % Impact" );
583  nhm_caa_events_displ.push_back( "" );
584  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (Modified)" );
585  nhm_caa_events_displ.push_back( "L3 (Modified) IFECTHes % Impact" );
586  nhm_caa_events_displ.push_back( "" );
587  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (Clean Snoop)" );
588  nhm_caa_events_displ.push_back( "L3 (Clean Snoop) IFECTHes % Impact" );
589  nhm_caa_events_displ.push_back( "" );
590  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Remote L2" );
591  nhm_caa_events_displ.push_back( "Remote L2 IFECTHes % Impact" );
592  nhm_caa_events_displ.push_back( "" );
593  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Remote DRAM" );
594  nhm_caa_events_displ.push_back( "Remote DRAM IFECTHes % Impact" );
595  nhm_caa_events_displ.push_back( "" );
596  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (No Snoop)" );
597  nhm_caa_events_displ.push_back( "L3 (No Snoop) IFECTHes % Impact" );
598  nhm_caa_events_displ.push_back( "" );
599  nhm_caa_events_displ.push_back( "Total Branch Instructions Executed" );
600  nhm_caa_events_displ.push_back( "% of Mispredicted Branches" );
601  nhm_caa_events_displ.push_back( "" );
602  nhm_caa_events_displ.push_back( "Direct Near Calls % of Total Branches Executed" );
603  nhm_caa_events_displ.push_back( "Indirect Near Calls % of Total Branches Executed" );
604  nhm_caa_events_displ.push_back( "Indirect Near Non-Calls % of Total Branches Executed" );
605  nhm_caa_events_displ.push_back( "All Near Calls % of Total Branches Executed" );
606  nhm_caa_events_displ.push_back( "All Non Calls % of Total Branches Executed" );
607  nhm_caa_events_displ.push_back( "All Returns % of Total Branches Executed" );
608  nhm_caa_events_displ.push_back( "" );
609  nhm_caa_events_displ.push_back( "Total Branch Instructions Retired" );
610  nhm_caa_events_displ.push_back( "Conditionals % of Total Branches Retired" );
611  nhm_caa_events_displ.push_back( "Near Calls % of Total Branches Retired" );
612  nhm_caa_events_displ.push_back( "" );
613  nhm_caa_events_displ.push_back( "L1 ITLB Miss Impact" );
614  nhm_caa_events_displ.push_back( "ITLB Miss Rate in %" );
615  nhm_caa_events_displ.push_back( "" );
616  nhm_caa_events_displ.push_back( "Branch Instructions" );
617  nhm_caa_events_displ.push_back( "Branch % of all Instructions" );
618  nhm_caa_events_displ.push_back( "" );
619  nhm_caa_events_displ.push_back( "Load Instructions" );
620  nhm_caa_events_displ.push_back( "Load % of all Instructions" );
621  nhm_caa_events_displ.push_back( "" );
622  nhm_caa_events_displ.push_back( "Store Instructions" );
623  nhm_caa_events_displ.push_back( "Store % of all Instructions" );
624  nhm_caa_events_displ.push_back( "" );
625  nhm_caa_events_displ.push_back( "Other Instructions" );
626  nhm_caa_events_displ.push_back( "Other % of all Instructions" );
627  nhm_caa_events_displ.push_back( "" );
628  nhm_caa_events_displ.push_back( "Packed UOPS Retired" );
629  nhm_caa_events_displ.push_back( "Packed % of all UOPS Retired" );
630 }
631 
632 void calc_nhm_deriv_values( double totalCycles )
633 {
634  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
635  ++it ) {
636  ( it->second )["Total Cycles"] = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
637 
638  ( it->second )["L2 Hit Impact"] = ( it->second )["MEM_LOAD_RETIRED:L2_HIT"] * I7_L2_HIT_CYCLES;
639  ( it->second )["L3 Unshared Hit Impact"] =
640  ( it->second )["MEM_LOAD_RETIRED:L3_UNSHARED_HIT"] * I7_L3_UNSHARED_HIT_CYCLES;
641  if ( ( it->second )["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] >
642  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) {
643  ( it->second )["L2 Other Core Hit Impact"] = ( ( it->second )["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] -
644  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) *
646  } else {
647  ( it->second )["L2 Other Core Hit Impact"] = 0.0;
648  }
649  ( it->second )["L2 Other Core Hit Modified Impact"] =
650  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] * I7_OTHER_CORE_L2_HITM_CYCLES;
651  ( it->second )["L3 Miss -> Local DRAM Hit Impact"] =
652  ( it->second )["MEM_UNCORE_RETIRED:LOCAL_DRAM"] * I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES;
653  ( it->second )["L3 Miss -> Remote DRAM Hit Impact"] =
654  ( it->second )["MEM_UNCORE_RETIRED:REMOTE_DRAM"] * I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES;
655  ( it->second )["L3 Miss -> Remote Cache Hit Impact"] =
656  ( it->second )["MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT"] * I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES;
657  ( it->second )["L3 Miss -> Total Impact"] = ( it->second )["L3 Miss -> Local DRAM Hit Impact"] +
658  ( it->second )["L3 Miss -> Remote DRAM Hit Impact"] +
659  ( it->second )["L3 Miss -> Remote Cache Hit Impact"];
660  ( it->second )["L1 DTLB Miss Impact"] =
661  ( it->second )["DTLB_LOAD_MISSES:WALK_COMPLETED"] * I7_L1_DTLB_WALK_COMPLETED_CYCLES;
662  ( it->second )["Counted Stalled Cycles due to Load Ops"] =
663  ( it->second )["L3 Miss -> Total Impact"] + ( it->second )["L2 Hit Impact"] +
664  ( it->second )["L1 DTLB Miss Impact"] + ( it->second )["L3 Unshared Hit Impact"] +
665  ( it->second )["L2 Other Core Hit Modified Impact"] + ( it->second )["L2 Other Core Hit Impact"];
666  ( it->second )["Cycles spent during DIV & SQRT Ops"] = ( it->second )["ARITH:CYCLES_DIV_BUSY"];
667  ( it->second )["Total Counted Stalled Cycles"] =
668  ( it->second )["Counted Stalled Cycles due to Load Ops"] + ( it->second )["Cycles spent during DIV & SQRT Ops"];
669  ( it->second )["Stalled Cycles"] =
670  ( it->second )["Total Counted Stalled Cycles"]; // TO BE FIXED when UOPS_EXECUTED:0x3f is fixed!!
671  ( it->second )["% of Total Cycles"] =
672  ( it->second )["Stalled Cycles"] * 100 / ( it->second )["CPU_CLK_UNHALTED:THREAD_P"]; // TO BE FIXED!! see above
673  ( it->second )["L3 Miss % of Load Stalls"] =
674  ( it->second )["L3 Miss -> Total Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
675  ( it->second )["L2 Hit % of Load Stalls"] =
676  ( it->second )["L2 Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
677  ( it->second )["L1 DTLB Miss % of Load Stalls"] =
678  ( it->second )["L1 DTLB Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
679  ( it->second )["L3 Unshared Hit % of Load Stalls"] =
680  ( it->second )["L3 Unshared Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
681  ( it->second )["L2 Other Core Hit % of Load Stalls"] =
682  ( it->second )["L2 Other Core Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
683  ( it->second )["L2 Other Core Hit Modified % of Load Stalls"] =
684  ( it->second )["L2 Other Core Hit Modified Impact"] * 100 /
685  ( it->second )["Counted Stalled Cycles due to Load Ops"];
686  ( it->second )["DIV & SQRT Ops % of counted Stalled Cycles"] =
687  ( it->second )["Cycles spent during DIV & SQRT Ops"] * 100 / ( it->second )["Total Counted Stalled Cycles"];
688 
689  ( it->second )["Cycles IFETCH served by Local DRAM"] =
690  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT;
691  ( it->second )["Cycles IFETCH served by L3 (Modified)"] =
692  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * I7_IFETCH_L2_MISS_L3_HITM;
693  ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] =
694  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * I7_IFETCH_L2_MISS_L3_HIT_SNOOP;
695  ( it->second )["Cycles IFETCH served by Remote L2"] =
696  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD;
697  ( it->second )["Cycles IFETCH served by Remote DRAM"] =
698  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT;
699  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"] =
700  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP;
701  ( it->second )["Total L2 IFETCH miss Impact"] =
702  ( it->second )["Cycles IFETCH served by Local DRAM"] + ( it->second )["Cycles IFETCH served by L3 (Modified)"] +
703  ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] +
704  ( it->second )["Cycles IFETCH served by Remote L2"] + ( it->second )["Cycles IFETCH served by Remote DRAM"] +
705  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"];
706  ( it->second )["Local DRAM IFECTHes % Impact"] =
707  ( it->second )["Cycles IFETCH served by Local DRAM"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
708  ( it->second )["L3 (Modified) IFECTHes % Impact"] =
709  ( it->second )["Cycles IFETCH served by L3 (Modified)"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
710  ( it->second )["L3 (Clean Snoop) IFECTHes % Impact"] = ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] *
711  100 / ( it->second )["Total L2 IFETCH miss Impact"];
712  ( it->second )["Remote L2 IFECTHes % Impact"] =
713  ( it->second )["Cycles IFETCH served by Remote L2"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
714  ( it->second )["Remote DRAM IFECTHes % Impact"] =
715  ( it->second )["Cycles IFETCH served by Remote DRAM"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
716  ( it->second )["L3 (No Snoop) IFECTHes % Impact"] =
717  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
718  ( it->second )["Total L2 IFETCH misses"] = ( it->second )["L2_RQSTS:IFETCH_MISS"];
719  ( it->second )["% of IFETCHes served by Local DRAM"] =
720  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
721  ( it->second )["% of IFETCHes served by L3 (Modified)"] =
722  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
723  ( it->second )["% of IFETCHes served by L3 (Clean Snoop)"] =
724  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * 100 /
725  ( it->second )["L2_RQSTS:IFETCH_MISS"];
726  ( it->second )["% of IFETCHes served by Remote L2"] =
727  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * 100 /
728  ( it->second )["L2_RQSTS:IFETCH_MISS"];
729  ( it->second )["% of IFETCHes served by Remote DRAM"] =
730  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
731  ( it->second )["% of IFETCHes served by L3 (No Snoop)"] =
732  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
733  ( it->second )["% of L2 IFETCH misses"] =
734  ( it->second )["L2_RQSTS:IFETCH_MISS"] * 100 /
735  ( ( it->second )["L2_RQSTS:IFETCH_MISS"] + ( it->second )["L2_RQSTS:IFETCH_HIT"] );
736  ( it->second )["L1 ITLB Miss Impact"] =
737  ( it->second )["ITLB_MISSES:WALK_COMPLETED"] * I7_L1_ITLB_WALK_COMPLETED_CYCLES;
738 
739  ( it->second )["Total Branch Instructions Executed"] = ( it->second )["BR_INST_EXEC:ANY"];
740  ( it->second )["% of Mispredicted Branches"] =
741  ( it->second )["BR_MISP_EXEC:ANY"] * 100 / ( it->second )["BR_INST_EXEC:ANY"];
742  ( it->second )["Direct Near Calls % of Total Branches Executed"] =
743  ( it->second )["BR_INST_EXEC:DIRECT_NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
744  ( it->second )["Indirect Near Calls % of Total Branches Executed"] =
745  ( it->second )["BR_INST_EXEC:INDIRECT_NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
746  ( it->second )["Indirect Near Non-Calls % of Total Branches Executed"] =
747  ( it->second )["BR_INST_EXEC:INDIRECT_NON_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
748  ( it->second )["All Near Calls % of Total Branches Executed"] =
749  ( it->second )["BR_INST_EXEC:NEAR_CALLS"] * 100 / ( it->second )["Total Branch Instructions Executed"];
750  ( it->second )["All Non Calls % of Total Branches Executed"] =
751  ( it->second )["BR_INST_EXEC:NON_CALLS"] * 100 / ( it->second )["Total Branch Instructions Executed"];
752  ( it->second )["All Returns % of Total Branches Executed"] =
753  ( it->second )["BR_INST_EXEC:RETURN_NEAR"] * 100 / ( it->second )["Total Branch Instructions Executed"];
754  ( it->second )["Total Branch Instructions Retired"] = ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
755  ( it->second )["Conditionals % of Total Branches Retired"] =
756  ( it->second )["BR_INST_RETIRED:CONDITIONAL"] * 100 / ( it->second )["Total Branch Instructions Retired"];
757  ( it->second )["Near Calls % of Total Branches Retired"] =
758  ( it->second )["BR_INST_RETIRED:NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Retired"];
759 
760  ( it->second )["Instruction Starvation % of Total Cycles"] =
761  ( ( it->second )["UOPS_ISSUED:ANY CMASK=1 INV=1"] - ( it->second )["RESOURCE_STALLS:ANY"] ) * 100 /
762  ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
763  ( it->second )["% of Total Cycles spent handling FP exceptions"] =
764  ( it->second )["UOPS_DECODED:MS CMASK=1"] * 100 / ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
765  ( it->second )["# of Instructions per Call"] =
766  ( it->second )["INST_RETIRED:ANY_P"] / ( it->second )["BR_INST_EXEC:NEAR_CALLS"];
767 
768  ( it->second )["Instructions Retired"] = ( it->second )["INST_RETIRED:ANY_P"];
769  ( it->second )["ITLB Miss Rate in %"] =
770  ( ( it->second )["ITLB_MISS_RETIRED"] / ( it->second )["INST_RETIRED:ANY_P"] ) * 100;
771 
772  ( it->second )["Branch Instructions"] = ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
773  ( it->second )["Load Instructions"] = ( it->second )["MEM_INST_RETIRED:LOADS"];
774  ( it->second )["Store Instructions"] = ( it->second )["MEM_INST_RETIRED:STORES"];
775  ( it->second )["Other Instructions"] =
776  ( it->second )["Instructions Retired"] - ( it->second )["MEM_INST_RETIRED:LOADS"] -
777  ( it->second )["MEM_INST_RETIRED:STORES"] - ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
778  ( it->second )["Packed UOPS Retired"] =
779  ( it->second )["SSEX_UOPS_RETIRED:PACKED_DOUBLE"] + ( it->second )["SSEX_UOPS_RETIRED:PACKED_SINGLE"];
780  ( it->second )["CPI"] = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] / ( it->second )["INST_RETIRED:ANY_P"];
781 
782  double localPerformanceImprovement = ( it->second )["CPI"] / EXPECTED_CPI;
783  double cyclesAfterImprovement = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] / localPerformanceImprovement;
784  double totalCyclesAfterImprovement =
785  totalCycles - ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] + cyclesAfterImprovement;
786  ( it->second )["iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
787 
788  ( it->second )["Load % of all Instructions"] =
789  ( it->second )["MEM_INST_RETIRED:LOADS"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
790  ( it->second )["Store % of all Instructions"] =
791  ( it->second )["MEM_INST_RETIRED:STORES"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
792  ( it->second )["Branch % of all Instructions"] =
793  ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
794  ( it->second )["Other % of all Instructions"] =
795  ( it->second )["Other Instructions"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
796 
797  ( it->second )["Packed % of all UOPS Retired"] =
798  ( it->second )["Packed UOPS Retired"] * 100 / ( it->second )["UOPS_RETIRED:ANY"];
799  }
800 }
801 
802 // S_module class defining the objects containing sampling results for each module
803 class S_module
804 {
805 private:
807  unsigned int total_num_samples;
811  unsigned int cmask;
812  unsigned int inv;
813  unsigned int sp;
814 
815 public:
816  S_module() { clear(); }
817  void clear()
818  {
819  samples.clear();
820  total_num_samples = 0;
821  sp = 0;
822  module_name = "";
823  cmask = 0;
824  inv = 0;
825  sp = 0;
826  }
827  void init( const char* name, const char* architecture, const char* event_name, unsigned int c_mask,
828  unsigned int inv_mask, unsigned int smpl_period )
829  {
830  module_name = name;
831  arch = architecture;
832  event = event_name;
833  cmask = c_mask;
834  inv = inv_mask;
835  sp = smpl_period;
836  }
837  void set_total( unsigned int total )
838  {
839  total_num_samples = total;
840  return;
841  }
842  unsigned int get_smpl_period() { return sp; }
843  unsigned int get_inv_mask() { return inv; }
844  unsigned int get_c_mask() { return cmask; }
845  std::string get_arch() { return arch; }
846  std::string get_event() { return event; }
847  void add_sample( const char* index, unsigned int value )
848  {
849  samples[index] += value;
850  return;
851  }
852  bool get_max( char* index, unsigned int* value )
853  {
854  if ( samples.empty() ) return false;
855  unsigned int cur_max = 0;
857  for ( std::map<std::string, unsigned int>::iterator it = samples.begin(); it != samples.end(); ++it ) {
858  if ( it->second > cur_max ) {
859  cur_max = it->second;
860  max_pos = it;
861  }
862  }
863  strcpy( index, ( max_pos->first ).c_str() );
864  *value = max_pos->second;
865  samples.erase( max_pos );
866  return true;
867  }
868  std::string get_module_name() { return module_name; }
869  unsigned int get_total_num_samples() { return total_num_samples; }
870 };
871 
872 // void html_special_chars()
873 // const char *s : source string
874 // char *s_mod : destination string
875 // replaces special HTML characters with correctly escaped sequences to be used inside HTML code
876 void html_special_chars( const char* s, char* s_mod )
877 {
878  int n = strlen( s );
879  *s_mod = '\0';
880  for ( int i = 0; i < n; i++ ) {
881  switch ( s[i] ) {
882  case '<':
883  strcat( s_mod, "&lt;" );
884  break;
885  case '>':
886  strcat( s_mod, "&gt;" );
887  break;
888  case '&':
889  strcat( s_mod, "&amp;" );
890  break;
891  case '"':
892  strcat( s_mod, "&quot;" );
893  break;
894  default:
895  char to_app[2];
896  to_app[0] = s[i];
897  to_app[1] = '\0';
898  strcat( s_mod, to_app );
899  break;
900  }
901  }
902  return;
903 }
904 
905 // func_name()
906 // const char *demangled_symbol : string corresponding to the demangled symbol found by the read_file() function
907 // parses the argument and returns just the function name without arguments or return types
908 const char* func_name( const char* demangled_symbol )
909 {
910  char* operator_string_begin = const_cast<char*>( strstr( demangled_symbol, "operator" ) );
911  if ( operator_string_begin != NULL ) {
912  char* operator_string_end = operator_string_begin + 8;
913  while ( *operator_string_end == ' ' ) operator_string_end++;
914  if ( strstr( operator_string_end, "delete[]" ) == operator_string_end ) {
915  operator_string_end += 8;
916  *operator_string_end = '\0';
917  } else if ( strstr( operator_string_end, "delete" ) == operator_string_end ) {
918  operator_string_end += 6;
919  *operator_string_end = '\0';
920  } else if ( strstr( operator_string_end, "new[]" ) == operator_string_end ) {
921  operator_string_end += 5;
922  *operator_string_end = '\0';
923  } else if ( strstr( operator_string_end, "new" ) == operator_string_end ) {
924  operator_string_end += 3;
925  *operator_string_end = '\0';
926  } else if ( strstr( operator_string_end, ">>=" ) == operator_string_end ) {
927  operator_string_end += 3;
928  *operator_string_end = '\0';
929  } else if ( strstr( operator_string_end, "<<=" ) == operator_string_end ) {
930  operator_string_end += 3;
931  *operator_string_end = '\0';
932  } else if ( strstr( operator_string_end, "->*" ) == operator_string_end ) {
933  operator_string_end += 3;
934  *operator_string_end = '\0';
935  } else if ( strstr( operator_string_end, "<<" ) == operator_string_end ) {
936  operator_string_end += 2;
937  *operator_string_end = '\0';
938  } else if ( strstr( operator_string_end, ">>" ) == operator_string_end ) {
939  operator_string_end += 2;
940  *operator_string_end = '\0';
941  } else if ( strstr( operator_string_end, ">=" ) == operator_string_end ) {
942  operator_string_end += 2;
943  *operator_string_end = '\0';
944  } else if ( strstr( operator_string_end, "<=" ) == operator_string_end ) {
945  operator_string_end += 2;
946  *operator_string_end = '\0';
947  } else if ( strstr( operator_string_end, "==" ) == operator_string_end ) {
948  operator_string_end += 2;
949  *operator_string_end = '\0';
950  } else if ( strstr( operator_string_end, "!=" ) == operator_string_end ) {
951  operator_string_end += 2;
952  *operator_string_end = '\0';
953  } else if ( strstr( operator_string_end, "|=" ) == operator_string_end ) {
954  operator_string_end += 2;
955  *operator_string_end = '\0';
956  } else if ( strstr( operator_string_end, "&=" ) == operator_string_end ) {
957  operator_string_end += 2;
958  *operator_string_end = '\0';
959  } else if ( strstr( operator_string_end, "^=" ) == operator_string_end ) {
960  operator_string_end += 2;
961  *operator_string_end = '\0';
962  } else if ( strstr( operator_string_end, "%=" ) == operator_string_end ) {
963  operator_string_end += 2;
964  *operator_string_end = '\0';
965  } else if ( strstr( operator_string_end, "/=" ) == operator_string_end ) {
966  operator_string_end += 2;
967  *operator_string_end = '\0';
968  } else if ( strstr( operator_string_end, "*=" ) == operator_string_end ) {
969  operator_string_end += 2;
970  *operator_string_end = '\0';
971  } else if ( strstr( operator_string_end, "-=" ) == operator_string_end ) {
972  operator_string_end += 2;
973  *operator_string_end = '\0';
974  } else if ( strstr( operator_string_end, "+=" ) == operator_string_end ) {
975  operator_string_end += 2;
976  *operator_string_end = '\0';
977  } else if ( strstr( operator_string_end, "&&" ) == operator_string_end ) {
978  operator_string_end += 2;
979  *operator_string_end = '\0';
980  } else if ( strstr( operator_string_end, "||" ) == operator_string_end ) {
981  operator_string_end += 2;
982  *operator_string_end = '\0';
983  } else if ( strstr( operator_string_end, "[]" ) == operator_string_end ) {
984  operator_string_end += 2;
985  *operator_string_end = '\0';
986  } else if ( strstr( operator_string_end, "()" ) == operator_string_end ) {
987  operator_string_end += 2;
988  *operator_string_end = '\0';
989  } else if ( strstr( operator_string_end, "++" ) == operator_string_end ) {
990  operator_string_end += 2;
991  *operator_string_end = '\0';
992  } else if ( strstr( operator_string_end, "--" ) == operator_string_end ) {
993  operator_string_end += 2;
994  *operator_string_end = '\0';
995  } else if ( strstr( operator_string_end, "->" ) == operator_string_end ) {
996  operator_string_end += 2;
997  *operator_string_end = '\0';
998  } else if ( strstr( operator_string_end, "<" ) == operator_string_end ) {
999  operator_string_end += 1;
1000  *operator_string_end = '\0';
1001  } else if ( strstr( operator_string_end, ">" ) == operator_string_end ) {
1002  operator_string_end += 1;
1003  *operator_string_end = '\0';
1004  } else if ( strstr( operator_string_end, "~" ) == operator_string_end ) {
1005  operator_string_end += 1;
1006  *operator_string_end = '\0';
1007  } else if ( strstr( operator_string_end, "!" ) == operator_string_end ) {
1008  operator_string_end += 1;
1009  *operator_string_end = '\0';
1010  } else if ( strstr( operator_string_end, "+" ) == operator_string_end ) {
1011  operator_string_end += 1;
1012  *operator_string_end = '\0';
1013  } else if ( strstr( operator_string_end, "-" ) == operator_string_end ) {
1014  operator_string_end += 1;
1015  *operator_string_end = '\0';
1016  } else if ( strstr( operator_string_end, "*" ) == operator_string_end ) {
1017  operator_string_end += 1;
1018  *operator_string_end = '\0';
1019  } else if ( strstr( operator_string_end, "/" ) == operator_string_end ) {
1020  operator_string_end += 1;
1021  *operator_string_end = '\0';
1022  } else if ( strstr( operator_string_end, "%" ) == operator_string_end ) {
1023  operator_string_end += 1;
1024  *operator_string_end = '\0';
1025  } else if ( strstr( operator_string_end, "^" ) == operator_string_end ) {
1026  operator_string_end += 1;
1027  *operator_string_end = '\0';
1028  } else if ( strstr( operator_string_end, "&" ) == operator_string_end ) {
1029  operator_string_end += 1;
1030  *operator_string_end = '\0';
1031  } else if ( strstr( operator_string_end, "|" ) == operator_string_end ) {
1032  operator_string_end += 1;
1033  *operator_string_end = '\0';
1034  } else if ( strstr( operator_string_end, "," ) == operator_string_end ) {
1035  operator_string_end += 1;
1036  *operator_string_end = '\0';
1037  } else if ( strstr( operator_string_end, "=" ) == operator_string_end ) {
1038  operator_string_end += 1;
1039  *operator_string_end = '\0';
1040  }
1041  return operator_string_begin;
1042  }
1043  char* end_of_demangled_name = const_cast<char*>( strrchr( demangled_symbol, ')' ) );
1044  if ( end_of_demangled_name != NULL ) {
1045  int pars = 1;
1046  char c;
1047  while ( pars > 0 && end_of_demangled_name != demangled_symbol ) {
1048  c = *( --end_of_demangled_name );
1049  if ( c == ')' ) {
1050  pars++;
1051  } else if ( c == '(' ) {
1052  pars--;
1053  }
1054  }
1055  } else {
1056  return demangled_symbol;
1057  }
1058  char* end_of_func_name = end_of_demangled_name;
1059  if ( end_of_func_name != NULL ) {
1060  *end_of_func_name = '\0';
1061  char c = *( --end_of_func_name );
1062  if ( c == '>' ) {
1063  int pars = 1;
1064  while ( pars > 0 && end_of_func_name != demangled_symbol ) {
1065  c = *( --end_of_func_name );
1066  if ( c == '>' ) {
1067  pars++;
1068  } else if ( c == '<' ) {
1069  pars--;
1070  }
1071  }
1072  *end_of_func_name = '\0';
1073  }
1074  c = *( --end_of_func_name );
1075  while ( isalnum( c ) || c == '_' || c == '~' ) {
1076  c = *( --end_of_func_name );
1077  }
1078  return ++end_of_func_name;
1079  }
1080  return demangled_symbol;
1081 }
1082 
1083 // put_module()
1084 // S_module *cur_module : pointer to the current module object to be written out in to HTML file
1085 // const char *event : name of architectural event being analysed
1086 // const char *dir : directory where sampling results input files are located
1087 // creates or updates the HTML output file using information contained inside the module object given as a parameter
1088 void put_S_module( S_module* cur_module, const char* dir )
1089 {
1090  char module_name[MAX_MODULE_NAME_LENGTH];
1091  bzero( module_name, MAX_MODULE_NAME_LENGTH );
1092  strcpy( module_name, ( cur_module->get_module_name() ).c_str() );
1093  char module_filename[MAX_FILENAME_LENGTH];
1094  bzero( module_filename, MAX_FILENAME_LENGTH );
1095  strcpy( module_filename, dir );
1096  strcat( module_filename, "/HTML/" );
1097  strcat( module_filename, module_name );
1098  strcat( module_filename, ".html" );
1099  char event[MAX_EVENT_NAME_LENGTH];
1100  bzero( event, MAX_EVENT_NAME_LENGTH );
1101  strcpy( event, ( cur_module->get_event() ).c_str() );
1102  std::map<std::string, unsigned int>::iterator result = modules_tot_samples.find( cur_module->get_module_name() );
1103  FILE* module_file;
1104  if ( result == modules_tot_samples.end() ) // not found
1105  {
1106  if ( ( !strcmp( event, "UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1107  ( !strcmp( event, "CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1108  modules_tot_samples.insert(
1110  } else {
1111  modules_tot_samples.insert( std::pair<std::string, unsigned int>( cur_module->get_module_name(), 0 ) );
1112  }
1113  module_file = fopen( module_filename, "w" );
1114  if ( module_file == NULL ) {
1115  fprintf( stderr, "ERROR: Cannot create file %s!!!\naborting...\n", module_filename );
1116  exit( 1 );
1117  }
1118  fprintf( module_file, "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
1119  "\"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1120  fprintf( module_file, "<html>\n" );
1121  fprintf( module_file, "<head>\n" );
1122  fprintf( module_file, "<title>\n" );
1123  fprintf( module_file, "%s\n", module_name );
1124  fprintf( module_file, "</title>\n" );
1125  fprintf( module_file, "</head>\n" );
1126  fprintf( module_file, "<body>\n" );
1127  fprintf( module_file, "<h2>%s</h2><br/>Events Sampled:<br/>\n", module_name );
1128  fprintf( module_file, "<ul>\n" );
1129  for ( std::vector<std::string>::const_iterator it = S_events.begin(); it != S_events.end(); ++it ) {
1130  fprintf( module_file, "<li><a href=\"#%s\">%s</a></li>\n", it->c_str(), it->c_str() );
1131  }
1132  fprintf( module_file, "</ul>\n" );
1133  } // if(result == modules_tot_samples.end()) //not found
1134  else {
1135  if ( ( !strcmp( event, "UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1136  ( !strcmp( event, "CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1137  modules_tot_samples[cur_module->get_module_name()] = cur_module->get_total_num_samples();
1138  }
1139  module_file = fopen( module_filename, "a" );
1140  } // else:: if(result != modules_tot_samples.end()) //found!!
1141  char event_str[MAX_EVENT_NAME_LENGTH];
1142  bzero( event_str, MAX_EVENT_NAME_LENGTH );
1143  strcpy( event_str, event );
1144  if ( cur_module->get_c_mask() > 0 ) {
1145  sprintf( event_str, "%s CMASK=%d", event_str, cur_module->get_c_mask() );
1146  }
1147  if ( cur_module->get_inv_mask() > 0 ) {
1148  sprintf( event_str, "%s INV=%d", event_str, cur_module->get_inv_mask() );
1149  }
1150  fprintf( module_file, "<a name=\"%s\"><a>\n", event_str );
1151  fprintf( module_file, "<table cellpadding=\"5\">\n" );
1152  fprintf( module_file, "<tr bgcolor=\"#EEEEEE\">\n" );
1153  fprintf( module_file, "<th colspan=\"6\" align=\"left\">%s -- cmask: %u -- invmask: %u -- Total Samples: %u -- "
1154  "Sampling Period: %d</th>\n",
1155  event, cur_module->get_c_mask(), cur_module->get_inv_mask(), cur_module->get_total_num_samples(),
1156  cur_module->get_smpl_period() );
1157  fprintf( module_file, "</tr>\n" );
1158  fprintf( module_file, "<tr bgcolor=\"#EEEEEE\">\n" );
1159  fprintf( module_file, "<th align=\"left\">Samples</th>\n" );
1160  fprintf( module_file, "<th align=\"left\">Percentage</th>\n" );
1161  fprintf( module_file, "<th align=\"left\">Symbol Name</th>\n" );
1162  fprintf( module_file, "<th align=\"left\">Library Name</th>\n" );
1163  fprintf( module_file, "<th align=\"left\">Complete Signature</th>\n" );
1164  fprintf( module_file, "<th align=\"left\">Library Pathname</th>\n" );
1165  fprintf( module_file, "</tr>\n" );
1166  for ( int j = 0; j < 20; j++ ) {
1167  char sym[MAX_SYM_LENGTH];
1168  char sym_mod[MAX_SYM_MOD_LENGTH];
1169  char lib[MAX_LIB_LENGTH];
1170  char lib_mod[MAX_LIB_MOD_LENGTH];
1171  char simple_sym[MAX_SIMPLE_SYM_LENGTH];
1172  char simple_sym_mod[MAX_SIMPLE_SYM_MOD_LENGTH];
1173  char simple_lib[MAX_SIMPLE_LIB_LENGTH];
1174  char simple_lib_mod[MAX_SIMPLE_LIB_MOD_LENGTH];
1175 
1176  bzero( sym, MAX_SYM_LENGTH );
1177  bzero( sym_mod, MAX_SYM_MOD_LENGTH );
1178  bzero( lib, MAX_LIB_LENGTH );
1179  bzero( lib_mod, MAX_LIB_MOD_LENGTH );
1180  bzero( simple_sym, MAX_SIMPLE_SYM_LENGTH );
1181  bzero( simple_sym_mod, MAX_SIMPLE_SYM_MOD_LENGTH );
1182  bzero( simple_lib, MAX_SIMPLE_LIB_LENGTH );
1183  bzero( simple_lib_mod, MAX_SIMPLE_LIB_MOD_LENGTH );
1184 
1185  char index[MAX_SAMPLE_INDEX_LENGTH];
1186  bzero( index, MAX_SAMPLE_INDEX_LENGTH );
1187  unsigned int value;
1188  bool res = cur_module->get_max( index, &value );
1189  if ( !res ) break;
1190  char* sym_end = strchr( index, '%' );
1191  if ( sym_end == NULL ) // error
1192  {
1193  fprintf( stderr, "ERROR: Invalid sym and lib name! : %s\naborting...\n", index );
1194  exit( 1 );
1195  }
1196  strncpy( sym, index, strlen( index ) - strlen( sym_end ) );
1197  strcpy( lib, sym_end + 1 );
1198  char temp[MAX_SYM_LENGTH];
1199  bzero( temp, MAX_SYM_LENGTH );
1200  strcpy( temp, sym );
1201  strcpy( simple_sym, ( func_name( temp ) ) );
1202  if ( strrchr( lib, '/' ) != NULL && *( strrchr( lib, '/' ) + 1 ) != '\0' ) {
1203  strcpy( simple_lib, strrchr( lib, '/' ) + 1 );
1204  } else {
1205  strcpy( simple_lib, lib );
1206  }
1207  if ( j % 2 != 0 ) {
1208  fprintf( module_file, "<tr bgcolor=\"#FFFFCC\">\n" );
1209  } else {
1210  fprintf( module_file, "<tr bgcolor=\"#CCFFCC\">\n" );
1211  }
1212  fprintf( module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%u</td>\n", value );
1213  fprintf( module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%f%%</td>\n",
1214  ( ( (double)( value ) ) / ( (double)( cur_module->get_total_num_samples() ) ) ) * 100 );
1215  html_special_chars( simple_sym, simple_sym_mod );
1216  html_special_chars( simple_lib, simple_lib_mod );
1217  html_special_chars( sym, sym_mod );
1218  html_special_chars( lib, lib_mod );
1219  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_sym_mod );
1220  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_lib_mod );
1221  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", sym_mod );
1222  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n</tr>\n", lib_mod );
1223  }
1224  fprintf( module_file, "</table><br/><br/>\n" );
1225  int res = fclose( module_file );
1226  if ( res ) {
1227  fprintf( stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1228  exit( 1 );
1229  }
1230  return;
1231 }
1232 
1233 // read_S_file()
1234 // const char *dir : directory where sampling results input files are located
1235 // const char *filename : name of the current file to analyse
1236 // reads content of a gzipped sampling result file, finds names of symbols inside libraries using their offsets,
1237 // demangles them to make them human-readable, creates the module objects (with their sampling values),
1238 // and calls the put_module() function to create (or update) the corresponding HTML output file
1239 // returns 0 on success
1240 int read_S_file( const char* dir, const char* filename )
1241 {
1242  char line[MAX_LINE_LENGTH];
1243  char event[MAX_EVENT_NAME_LENGTH];
1244  char arch[MAX_ARCH_NAME_LENGTH];
1245  unsigned int cmask;
1246  unsigned int inv;
1247  unsigned int sp;
1248  char cur_module_name[MAX_MODULE_NAME_LENGTH];
1249  bzero( line, MAX_LINE_LENGTH );
1250  bzero( event, MAX_EVENT_NAME_LENGTH );
1251  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1252  bzero( arch, MAX_ARCH_NAME_LENGTH );
1253 
1254  S_module* cur_module = new S_module();
1255  unsigned int module_num = 0;
1256 
1257  char path_name[MAX_FILENAME_LENGTH];
1258  bzero( path_name, MAX_FILENAME_LENGTH );
1259  strcpy( path_name, dir );
1260  strcat( path_name, "/" );
1261  strcat( path_name, filename );
1262  gzFile res_file = gzopen( path_name, "rb" );
1263 
1264  if ( res_file != NULL ) {
1265  bzero( line, MAX_LINE_LENGTH );
1266  gzgets( res_file, line, MAX_LINE_LENGTH );
1267  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1268  bzero( event, MAX_EVENT_NAME_LENGTH );
1269  sscanf( line, "%s %s %u %u %u", arch, event, &cmask, &inv, &sp );
1270  if ( !strcmp( arch, "NHM" ) )
1271  nehalem = true;
1272  else
1273  nehalem = false;
1274  bzero( line, MAX_LINE_LENGTH );
1275  while ( gzgets( res_file, line, MAX_LINE_LENGTH ) != Z_NULL ) {
1276  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1277  if ( strchr( line, ' ' ) == NULL ) // module
1278  {
1279  if ( module_num > 0 ) {
1280  put_S_module( cur_module, dir );
1281  cur_module->clear();
1282  }
1283  module_num++;
1284  char* end_sym = strchr( line, '%' );
1285  if ( end_sym == NULL ) // error
1286  {
1287  fprintf( stderr, "ERROR: Invalid module name. \nLINE: %s\naborting...\n", line );
1288  exit( 1 );
1289  }
1290  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1291  strncpy( cur_module_name, line, strlen( line ) - strlen( end_sym ) );
1292  cur_module->init( cur_module_name, arch, event, cmask, inv, sp );
1293  cur_module->set_total( atoi( end_sym + 1 ) );
1294  } // module
1295  else // symbol, libName, libOffset, value
1296  {
1297  unsigned int value = 0, libOffset = 0;
1298  char symbol[MAX_SYM_LENGTH];
1299  char libName[MAX_LIB_LENGTH];
1300  char final_sym[MAX_SYM_MOD_LENGTH];
1301  char final_lib[MAX_LIB_MOD_LENGTH];
1302  bzero( symbol, MAX_SYM_LENGTH );
1303  bzero( libName, MAX_LIB_LENGTH );
1304  bzero( final_sym, MAX_SYM_MOD_LENGTH );
1305  bzero( final_lib, MAX_LIB_MOD_LENGTH );
1306 
1307  sscanf( line, "%s %s %u %u", symbol, libName, &libOffset, &value );
1308  char realPathName_s[FILENAME_MAX];
1309  bzero( realPathName_s, FILENAME_MAX );
1310  char* realPathName = realpath( libName, realPathName_s );
1311  if ( realPathName != NULL && strlen( realPathName ) > 0 ) {
1313  result = libsInfo.find( realPathName );
1314  if ( result == libsInfo.end() ) {
1315  libsInfo[realPathName] = FileInfo( realPathName, true );
1316  }
1317  const char* temp_sym = libsInfo[realPathName].symbolByOffset( libOffset );
1318  if ( temp_sym != NULL && strlen( temp_sym ) > 0 ) {
1319  int status;
1320  char* demangled_symbol = abi::__cxa_demangle( temp_sym, NULL, NULL, &status );
1321  if ( status == 0 ) {
1322  strcpy( final_sym, demangled_symbol );
1323  free( demangled_symbol );
1324  } else {
1325  strcpy( final_sym, temp_sym );
1326  }
1327  } else {
1328  strcpy( final_sym, "???" );
1329  }
1330  strcpy( final_lib, realPathName );
1331  } else {
1332  strcpy( final_sym, symbol );
1333  strcpy( final_lib, libName );
1334  }
1335  char index[MAX_LINE_LENGTH];
1336  bzero( index, MAX_LINE_LENGTH );
1337  strcpy( index, final_sym );
1338  strcat( index, "%" );
1339  strcat( index, final_lib );
1340  cur_module->add_sample( index, value );
1341  } // symbol, libName, libOffset, value
1342  bzero( line, MAX_LINE_LENGTH );
1343  } // while(gzgets(res_file, line, MAX_LINE_LENGTH)!=Z_NULL)
1344  put_S_module( cur_module, dir ); // last module!
1345  cur_module->clear();
1346  gzclose( res_file );
1347  } // if(res_file != NULL)
1348  else {
1349  fprintf( stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename );
1350  exit( 1 );
1351  }
1352  delete cur_module; // delete it!
1353  return 0;
1354 }
1355 
1356 int read_S_events( const char* dir, const char* filename )
1357 {
1358  char event[MAX_EVENT_NAME_LENGTH];
1359  char arch[MAX_ARCH_NAME_LENGTH];
1360  char line[MAX_LINE_LENGTH];
1361  char cmask_str[MAX_CMASK_STR_LENGTH];
1362  char inv_str[MAX_INV_STR_LENGTH];
1363  char sp_str[MAX_SP_STR_LENGTH];
1364  bzero( line, MAX_LINE_LENGTH );
1365  bzero( event, MAX_EVENT_NAME_LENGTH );
1366  bzero( arch, MAX_ARCH_NAME_LENGTH );
1367  bzero( cmask_str, MAX_CMASK_STR_LENGTH );
1368  bzero( inv_str, MAX_INV_STR_LENGTH );
1369  bzero( sp_str, MAX_SP_STR_LENGTH );
1370  char path_name[MAX_FILENAME_LENGTH];
1371  bzero( path_name, MAX_FILENAME_LENGTH );
1372  strcpy( path_name, dir );
1373  strcat( path_name, "/" );
1374  strcat( path_name, filename );
1375  gzFile res_file = gzopen( path_name, "rb" );
1376  if ( res_file != NULL ) {
1377  bzero( line, MAX_LINE_LENGTH );
1378  gzgets( res_file, line, MAX_LINE_LENGTH );
1379  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1380  bzero( event, MAX_EVENT_NAME_LENGTH );
1381  sscanf( line, "%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str );
1382  std::string event_str( event );
1383  if ( atoi( cmask_str ) > 0 ) {
1384  event_str += " CMASK=";
1385  event_str += cmask_str;
1386  }
1387  if ( atoi( inv_str ) > 0 ) {
1388  event_str += " INV=";
1389  event_str += inv_str;
1390  }
1391  S_events.push_back( event_str );
1392  } // if(res_file != NULL)
1393  else {
1394  fprintf( stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename );
1395  exit( 1 );
1396  }
1397  return 0;
1398 }
1399 
1400 // finalize_html_pages()
1401 // const char *dir : directory contating sampling result files
1402 // puts footers in module HTML pages and creates index file
1403 int finalize_S_html_pages( const char* dir )
1404 {
1405  for ( std::map<std::string, unsigned int>::const_iterator i = modules_tot_samples.begin();
1406  i != modules_tot_samples.end(); i++ ) {
1407  char module_filename[MAX_FILENAME_LENGTH];
1408  strcpy( module_filename, dir );
1409  strcat( module_filename, "/HTML/" );
1410  strcat( module_filename, ( i->first ).c_str() );
1411  strcat( module_filename, ".html" );
1412  FILE* module_file = fopen( module_filename, "a" );
1413  if ( module_file == NULL ) {
1414  fprintf( stderr, "ERROR: Unable to append to file: %s\naborting...\n", module_filename );
1415  exit( 1 );
1416  }
1417  fprintf( module_file, "</body>\n</html>\n" );
1418  if ( fclose( module_file ) ) {
1419  fprintf( stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1420  exit( 1 );
1421  }
1422  }
1423  return 0;
1424 }
1425 
1426 // read_file()
1427 // const char *filename : input file to analyse
1428 // analyses the event file and updates the list of modules with counter information found in the file
1429 // returns the number of modules found in the file
1430 int read_C_file( const char* dir, const char* filename )
1431 {
1432  char event[MAX_EVENT_NAME_LENGTH];
1433  char arch[MAX_ARCH_NAME_LENGTH];
1434  char line[MAX_LINE_LENGTH];
1435  char cmask_str[MAX_CMASK_STR_LENGTH];
1436  char inv_str[MAX_INV_STR_LENGTH];
1437  char sp_str[MAX_SP_STR_LENGTH];
1438  char cur_module_name[MAX_MODULE_NAME_LENGTH];
1439  bzero( line, MAX_LINE_LENGTH );
1440  bzero( event, MAX_EVENT_NAME_LENGTH );
1441  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1442  bzero( arch, MAX_ARCH_NAME_LENGTH );
1443  bzero( line, MAX_LINE_LENGTH );
1444  bzero( cmask_str, MAX_CMASK_STR_LENGTH );
1445  bzero( inv_str, MAX_INV_STR_LENGTH );
1446  bzero( sp_str, MAX_SP_STR_LENGTH );
1447  int number_of_modules = 0;
1448  long cur_sum = 0;
1449  int no_of_values = 0;
1450  char path_name[MAX_FILENAME_LENGTH];
1451  bzero( path_name, MAX_FILENAME_LENGTH );
1452  strcpy( path_name, dir );
1453  strcat( path_name, "/" );
1454  strcat( path_name, filename );
1455  FILE* fp = fopen( path_name, "r" );
1456  int stat = fscanf( fp, "%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str );
1457  if ( stat != 5 ) {
1458  std::cerr << "ERROR: failed to parse " << path_name << std::endl;
1459  exit( 1 );
1460  }
1461  if ( !strcmp( arch, "NHM" ) )
1462  nehalem = true;
1463  else
1464  nehalem = false;
1465  std::string event_str( event );
1466  if ( atoi( cmask_str ) > 0 ) {
1467  event_str += " CMASK=";
1468  event_str += cmask_str;
1469  }
1470  if ( atoi( inv_str ) > 0 ) {
1471  event_str += " INV=";
1472  event_str += inv_str;
1473  }
1474  C_events.push_back( event_str );
1475  while ( fscanf( fp, "%s\n", line ) != EOF ) {
1476  if ( isalpha( line[0] ) ) // module
1477  {
1478  if ( number_of_modules > 0 ) {
1479  C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values;
1480  cur_sum = 0;
1481  no_of_values = 0;
1482  }
1483  strcpy( cur_module_name, line );
1484  number_of_modules++;
1485  } else if ( isdigit( line[0] ) ) // value
1486  {
1487  cur_sum += strtol( line, NULL, 10 );
1488  no_of_values++;
1489  }
1490  }
1491  C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values; // last module
1492  fclose( fp );
1493  return number_of_modules;
1494 }
1495 
1496 void put_C_header( FILE* fp, std::vector<std::string>& columns )
1497 {
1498  fprintf(
1499  fp,
1500  "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1501  fprintf( fp, "<html>\n" );
1502  fprintf( fp, "<head>\n" );
1503  fprintf( fp, "<title>\n" );
1504  fprintf( fp, "Analysis Result\n" );
1505  fprintf( fp, "</title>\n" );
1506  fprintf( fp, "<script src=\"sorttable.js\"></script>\n" );
1507  fprintf( fp, "<style>\ntable.sortable thead "
1508  "{\nbackground-color:#eee;\ncolor:#666666;\nfont-weight:bold;\ncursor:default;\nfont-family:courier;\n}"
1509  "\n</style>\n" );
1510  fprintf( fp, "</head>\n" );
1511  fprintf( fp, "<body link=\"black\">\n" );
1512  fprintf( fp, "<h1>RESULTS:</h1><br/>Click for detailed symbol view...<p/>\n" );
1513  fprintf( fp, "<table class=\"sortable\" cellpadding=\"5\">\n" );
1514  fprintf( fp, "<tr>\n" );
1515  fprintf( fp, "<th>MODULE NAME</th>\n" );
1516  for ( std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it ) {
1517  if ( strlen( it->c_str() ) == 0 )
1518  fprintf( fp, "<th bgcolor=\"#FFFFFF\">&nbsp;</th>\n" );
1519  else
1520  fprintf( fp, "<th>%s</th>\n", ( *it ).c_str() );
1521  }
1522  fprintf( fp, "</tr>\n" );
1523  return;
1524 }
1525 
1526 void put_C_modules( FILE* fp, std::vector<std::string>& columns )
1527 {
1528  int index = 0;
1529  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1530  ++it ) {
1531  if ( index % 2 )
1532  fprintf( fp, "<tr bgcolor=\"#FFFFCC\">\n" );
1533  else
1534  fprintf( fp, "<tr bgcolor=\"#CCFFCC\">\n" );
1535  fprintf( fp, "<td style=\"font-family:monospace;font-size:large;color:Black\"><a href=\"%s.html\">%s</a></td>\n",
1536  ( it->first ).c_str(), ( it->first ).c_str() );
1537  for ( std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt ) {
1538  if ( strlen( jt->c_str() ) == 0 ) {
1539  fprintf( fp, "<td bgcolor=\"#FFFFFF\">&nbsp;</td>" );
1540  } else {
1541  if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1542  fprintf( stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1543  exit( 1 );
1544  }
1545  fprintf( fp, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\" align=\"right\">%.2f</td>\n",
1546  ( it->second )[*jt] );
1547  }
1548  }
1549  fprintf( fp, "</tr>\n" );
1550  index++;
1551  }
1552 }
1553 
1554 void put_C_footer( FILE* fp )
1555 {
1556  fprintf( fp, "</table>\n</body>\n</html>\n" );
1557  return;
1558 }
1559 
1560 void put_C_header_csv( FILE* fp, std::vector<std::string>& columns )
1561 {
1562  fprintf( fp, "MODULE NAME" );
1563  for ( std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it ) {
1564  if ( strlen( it->c_str() ) == 0 ) {
1565  } else
1566  fprintf( fp, ",%s", ( *it ).c_str() );
1567  }
1568  fprintf( fp, "\n" );
1569  return;
1570 }
1571 
1573 {
1574  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1575  ++it ) {
1576  fprintf( fp, "%s", ( it->first ).c_str() );
1577  for ( std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt ) {
1578  if ( strlen( jt->c_str() ) == 0 ) {
1579  } else {
1580  if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1581  fprintf( stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1582  exit( 1 );
1583  }
1584  fprintf( fp, ",%.2f", ( it->second )[*jt] );
1585  }
1586  }
1587  fprintf( fp, "\n" );
1588  }
1589 }
1590 
1591 // normalize()
1592 // struct C_module *mod : pointer to the head of the list of modules
1593 // int counter : event selected (see C_module class for which event corresponds to which number)
1594 // int number_of_modules : length of the list
1595 // double value : value to be normalized
1596 // double normalizeTo : value to which the value above should be normalized
1597 // returns the normalized value
1598 double normalize( std::string field, double value, double normalizeTo )
1599 {
1600  double max = 0;
1601  double counter_value;
1602  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1603  ++it ) {
1604  counter_value = ( it->second )[field];
1605  if ( max < counter_value ) max = counter_value;
1606  }
1607  if ( value > 0 && max > 0 && normalizeTo > 0 ) {
1608  return 1. * value / max * normalizeTo;
1609  } else
1610  return 0;
1611 }
1612 
1613 // calc_post_deriv_values()
1614 // struct C_module *mod : pointer to the head of the list of modules
1615 // double totalCycles : total cycles spent by all the modules
1616 // int number_of_modules : length of the list
1617 // calculates the iFactor of each module
1619 {
1620  if ( nehalem ) {
1621  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1622  ++it ) {
1623  double simdnorm =
1624  1. - normalize( "Packed % of all UOPS Retired", ( it->second )["Packed % of all UOPS Retired"], 1 );
1625  double misspnorm = normalize( "% of Mispredicted Branches", ( it->second )["% of Mispredicted Branches"], 1 );
1626  double stallnorm = normalize( "Stalled Cycles", ( it->second )["Stalled Cycles"], 1 );
1627  ( it->second )["iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1628  }
1629  } else {
1630  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1631  ++it ) {
1632  double simdnorm =
1633  1. - normalize( "Packed SIMD % of all Instructions", ( it->second )["Packed SIMD % of all Instructions"], 1 );
1634  double misspnorm = normalize( "% of Mispredicted Branches", ( it->second )["% of Mispredicted Branches"], 1 );
1635  double stallnorm = normalize( "Stalled Cycles", ( it->second )["Stalled Cycles"], 1 );
1636  ( it->second )["iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1637  }
1638  }
1639 }
1640 
1641 // getTotalCycles()
1642 // struct C_module *mod : pointer to the head of the list of modules
1643 // int number_of_modules : length of the list
1644 // returns the number of total cycles spent by all the modules
1646 {
1647  double sum = 0;
1648  if ( nehalem ) {
1649  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1650  ++it ) {
1651  sum += ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
1652  }
1653  } else {
1654  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1655  ++it ) {
1656  sum += ( it->second )["UNHALTED_CORE_CYCLES"];
1657  }
1658  }
1659  return sum;
1660 }
1661 
1662 // main()
1663 // takes as argument the directory containing results
1664 // and produces the HTML directory inside of it containing browsable statistics
1665 int main( int argc, char* argv[] )
1666 {
1667  if ( argc < 2 || argc > 4 ) {
1668  printf( "\n\nUsage: %s DIRECTORY [--caa] [--csv]\n\n", argv[0] );
1669  exit( 1 );
1670  }
1671 
1672  bool caa = false;
1673  bool csv = false;
1674  for ( int i = 2; i < argc; i++ ) {
1675  if ( !strcmp( argv[i], "--caa" ) ) caa = true;
1676  if ( !strcmp( argv[i], "--csv" ) ) csv = true;
1677  }
1678 
1679  char dir[MAX_FILENAME_LENGTH];
1680  strcpy( dir, argv[1] );
1681  if ( !csv ) {
1682  strcat( dir, "/HTML" );
1683  int res = mkdir( dir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH );
1684  if ( res != 0 ) {
1685  fprintf( stderr, "ERROR: Cannot create directory %s\naborting...\n", dir );
1686  exit( 1 );
1687  }
1688  }
1689 
1690  DIR* dp;
1691  struct dirent* dirp;
1692  int num_of_modules = 0;
1693  if ( ( dp = opendir( argv[1] ) ) == NULL ) {
1694  printf( "Error(%d) opening %s\n", errno, argv[1] );
1695  return errno;
1696  }
1697  while ( ( dirp = readdir( dp ) ) != NULL ) {
1698  if ( strstr( dirp->d_name, "_S_" ) != NULL && strstr( dirp->d_name, ".txt.gz" ) != NULL && !csv ) {
1699  if ( read_S_events( argv[1], dirp->d_name ) ) {
1700  fprintf( stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1701  exit( 1 );
1702  }
1703  }
1704  }
1705  closedir( dp );
1706  sort( S_events.begin(), S_events.end() );
1707  if ( ( dp = opendir( argv[1] ) ) == NULL ) {
1708  printf( "Error(%d) opening %s\n", errno, argv[1] );
1709  return errno;
1710  }
1711  while ( ( dirp = readdir( dp ) ) != NULL ) {
1712  if ( strstr( dirp->d_name, "_S_" ) != NULL && strstr( dirp->d_name, ".txt.gz" ) != NULL && !csv ) {
1713  if ( read_S_file( argv[1], dirp->d_name ) ) {
1714  fprintf( stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1715  exit( 1 );
1716  }
1717  } else if ( strstr( dirp->d_name, "_C_" ) != NULL && strstr( dirp->d_name, ".txt" ) != NULL ) {
1718  int res = read_C_file( argv[1], dirp->d_name );
1719  if ( res > num_of_modules ) {
1720  num_of_modules = res;
1721  }
1722  }
1723  }
1724  closedir( dp );
1725 
1726  if ( !csv ) {
1727  if ( finalize_S_html_pages( argv[1] ) ) {
1728  fprintf( stderr, "ERROR: Cannot finalize HTML pages!!!\naborting...\n" );
1729  exit( 1 );
1730  }
1731  }
1732 
1733  char filepath[MAX_FILENAME_LENGTH];
1734  bzero( filepath, MAX_FILENAME_LENGTH );
1735  if ( !csv )
1736  sprintf( filepath, "%s/HTML/index.html", argv[1] );
1737  else
1738  sprintf( filepath, "%s/results.csv", argv[1] );
1739  FILE* fp = fopen( filepath, "w" );
1740  if ( fp == NULL ) {
1741  fprintf( stderr, "ERROR: Cannot create file index.html!!!\naborting...\n" );
1742  exit( 1 );
1743  }
1744 
1745  if ( caa ) {
1746  double totalCycles;
1747  if ( !nehalem ) {
1749  if ( !check_for_core_caa_events() ) {
1750  fprintf( stderr, "(core) ERROR: One or more events for CAA missing!\naborting...\n" );
1751  exit( 1 );
1752  }
1754  totalCycles = getTotalCycles();
1755  calc_core_deriv_values( totalCycles );
1757  if ( !csv ) {
1758  put_C_header( fp, core_caa_events_displ );
1759  put_C_modules( fp, core_caa_events_displ );
1760  } else {
1761  put_C_header_csv( fp, core_caa_events_displ );
1762  put_C_modules_csv( fp, core_caa_events_displ );
1763  }
1764  } else {
1766  if ( !check_for_nhm_caa_events() ) {
1767  fprintf( stderr, "(nehalem) ERROR: One or more events for CAA missing!\naborting...\n" );
1768  exit( 1 );
1769  }
1771  totalCycles = getTotalCycles();
1772  calc_nhm_deriv_values( totalCycles );
1774  if ( !csv ) {
1775  put_C_header( fp, nhm_caa_events_displ );
1776  put_C_modules( fp, nhm_caa_events_displ );
1777  } else {
1778  put_C_header_csv( fp, nhm_caa_events_displ );
1779  put_C_modules_csv( fp, nhm_caa_events_displ );
1780  }
1781  }
1782  if ( !csv ) put_C_footer( fp );
1783  fclose( fp );
1784  } else {
1785  if ( !csv ) {
1786  put_C_header( fp, C_events );
1787  put_C_modules( fp, C_events );
1788  put_C_footer( fp );
1789  } else {
1790  put_C_header_csv( fp, C_events );
1791  put_C_modules_csv( fp, C_events );
1792  }
1793  fclose( fp );
1794  }
1795  if ( !csv ) {
1796  char src[MAX_FILENAME_LENGTH];
1797  char dst[MAX_FILENAME_LENGTH];
1798  sprintf( src, "sorttable.js" );
1799  sprintf( dst, "%s/HTML/sorttable.js", argv[1] );
1800  int fd_src = open( src, O_RDONLY );
1801  if ( fd_src == -1 ) {
1802  fprintf( stderr, "ERROR: Cannot open file \"%s\"!\naborting...\n", src );
1803  exit( 1 );
1804  }
1805  int fd_dst = open( dst, O_WRONLY | O_CREAT | O_TRUNC, 0644 );
1806  if ( fd_dst == -1 ) {
1807  fprintf( stderr, "ERROR: Cannot open file \"%s\" (%s)!\naborting...\n", dst, strerror( errno ) );
1808  exit( 1 );
1809  }
1810  char c;
1811  while ( read( fd_src, &c, 1 ) ) {
1812  if ( write( fd_dst, &c, 1 ) == -1 ) {
1813  std::cerr << "ERROR: failed to write to " << dst << std::endl;
1814  exit( 1 );
1815  }
1816  }
1817  close( fd_dst );
1818  close( fd_src );
1819  }
1820  return 0;
1821 }
std::istringstream * iss
std::string module_name
#define MAX_EVENT_NAME_LENGTH
std::map< std::string, unsigned int > samples
unsigned int sp
unsigned int get_inv_mask()
#define I7_L3_UNSHARED_HIT_CYCLES
T empty(T...args)
const char * func_name(const char *demangled_symbol)
const char * symbolByOffset(Offset offset)
bool skipString(const char *strptr, const char *srcbuffer, const char **dstbuffer)
#define MAX_SAMPLE_INDEX_LENGTH
#define CORE_UNKNOWN_ADDR_STORE_CYCLES
#define CORE_LCP_STALL_CYCLES
#define MAX_LIB_LENGTH
void put_C_footer(FILE *fp)
void put_S_module(S_module *cur_module, const char *dir)
double sum(double x, double y, double z)
#define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES
void init_core_caa_events_displ()
void put_C_modules(FILE *fp, std::vector< std::string > &columns)
list argv
Definition: gaudirun.py:235
std::string event
T getline(T...args)
bool get_max(char *index, unsigned int *value)
unsigned int cmask
T endl(T...args)
void init_core_caa_events()
#define MAX_SYM_LENGTH
def read(f, regex='.*', skipevents=0)
Definition: hivetimeline.py:22
#define I7_IFETCH_L2_MISS_L3_HITM
int read_S_file(const char *dir, const char *filename)
std::vector< CacheItem > SymbolCache
#define EXPECTED_CPI
#define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD
bool check_for_nhm_caa_events()
T end(T...args)
unsigned int get_smpl_period()
std::istringstream & output(void)
#define MAX_ARCH_NAME_LENGTH
void add_sample(const char *index, unsigned int value)
#define I7_OTHER_CORE_L2_HITM_CYCLES
#define MAX_LINE_LENGTH
double normalize(std::string field, double value, double normalizeTo)
#define CORE_L1_DTLB_MISS_CYCLES
FileInfo(const std::string &name, bool useGdb)
std::string arch
#define CORE_OVERLAPPING_CYCLES
void calc_post_deriv_values()
void calc_nhm_deriv_values(double totalCycles)
std::string NAME
#define MAX_SIMPLE_SYM_LENGTH
void init_nhm_caa_events_displ()
#define MAX_LIB_MOD_LENGTH
int finalize_S_html_pages(const char *dir)
STL class.
void put_C_header(FILE *fp, std::vector< std::string > &columns)
#define MAX_SIMPLE_LIB_MOD_LENGTH
void init_nhm_caa_events()
T push_back(T...args)
#define I7_L1_ITLB_WALK_COMPLETED_CYCLES
void calc_core_deriv_values(double totalCycles)
#define MAX_SP_STR_LENGTH
Offset next(Offset offset)
#define I7_L2_HIT_CYCLES
void init(const char *name, const char *architecture, const char *event_name, unsigned int c_mask, unsigned int inv_mask, unsigned int smpl_period)
#define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES
unsigned int total_num_samples
void set_total(unsigned int total)
#define I7_OTHER_CORE_L2_HIT_CYCLES
void html_special_chars(const char *s, char *s_mod)
std::string get_arch()
#define MAX_CMASK_STR_LENGTH
#define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES
T erase(T...args)
#define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES
int read_S_events(const char *dir, const char *filename)
#define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP
#define I7_L1_DTLB_WALK_COMPLETED_CYCLES
T clear(T...args)
unsigned int inv
bool operator()(const int &a, const CacheItem &b) const
#define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT
#define MAX_FILENAME_LENGTH
#define CORE_L2_MISS_CYCLES
constexpr double nm
Definition: SystemOfUnits.h:83
T insert(T...args)
void put_C_header_csv(FILE *fp, std::vector< std::string > &columns)
#define PIPE_BUFFER_LENGTH
T find(T...args)
T size(T...args)
void createOffsetMap(void)
#define MAX_SIMPLE_SYM_MOD_LENGTH
T begin(T...args)
SymbolCache m_symbolCache
#define MAX_INV_STR_LENGTH
bool check_for_core_caa_events()
T c_str(T...args)
#define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT
T back(T...args)
string s
Definition: gaudirun.py:253
std::string get_event()
double getTotalCycles()
PipeReader(const char *cmd)
bool operator()(const CacheItem &a, const int &b) const
void put_C_modules_csv(FILE *fp, std::vector< std::string > &columns)
unsigned int get_total_num_samples()
#define MAX_MODULE_NAME_LENGTH
AttribStringParser::Iterator begin(const AttribStringParser &parser)
#define MAX_SIMPLE_LIB_LENGTH
bool skipWhitespaces(const char *srcbuffer, const char **destbuffer)
#define I7_IFETCH_L2_MISS_L3_HIT_SNOOP
int read_C_file(const char *dir, const char *filename)
int main(int argc, char *argv[])
std::string get_module_name()
#define CORE_L2_HIT_CYCLES
#define MAX_SYM_MOD_LENGTH
unsigned int get_c_mask()