Loading [MathJax]/extensions/tex2jax.js
The Gaudi Framework  v36r16 (ea80daf8)
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
pfm_gen_analysis.cpp
Go to the documentation of this file.
1 /***********************************************************************************\
2 * (c) Copyright 1998-2019 CERN for the benefit of the LHCb and ATLAS collaborations *
3 * *
4 * This software is distributed under the terms of the Apache version 2 licence, *
5 * copied verbatim in the file "LICENSE". *
6 * *
7 * In applying this licence, CERN does not waive the privileges and immunities *
8 * granted to it by virtue of its status as an Intergovernmental Organization *
9 * or submit itself to any jurisdiction. *
10 \***********************************************************************************/
11 /*
12 Name: pfm_analysis.cpp
13 Author: Daniele Francesco Kruse
14 E-mail: daniele.francesco.kruse@cern.ch
15 Version: 0.9 (16/02/2010)
16 
17 This code is responsible for analysing results generated by the PerfmonService of CMSSW.
18 It takes 42 files as input (21 simple text files and 21 gzipped text files) and
19 produces a HTML directory containing the results of the analysis (both counting and sampling).
20 
21 compile linking zlib: g++ -Wall -lz pfm_analysis.cpp
22 */
23 
24 #include <ctype.h>
25 #include <cxxabi.h>
26 #include <fcntl.h>
27 #include <math.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <sys/stat.h>
33 #include <sys/types.h>
34 #include <unistd.h>
35 #include <zlib.h>
36 
37 #include <algorithm>
38 #include <iostream>
39 #include <list>
40 #include <map>
41 #include <sstream>
42 #include <string>
43 #include <vector>
44 
45 #include <dirent.h>
46 #include <errno.h>
47 
48 // Core
49 #define CORE_L2_MISS_CYCLES 200
50 #define CORE_L2_HIT_CYCLES 14.5
51 #define CORE_L1_DTLB_MISS_CYCLES 10
52 #define CORE_LCP_STALL_CYCLES 6
53 #define CORE_UNKNOWN_ADDR_STORE_CYCLES 5
54 #define CORE_OVERLAPPING_CYCLES 6
55 #define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES 20
56 
57 // Nehalem
58 #define I7_L1_DTLB_WALK_COMPLETED_CYCLES 35
59 #define I7_L1_ITLB_WALK_COMPLETED_CYCLES 35
60 #define I7_L2_HIT_CYCLES 6
61 #define I7_L3_UNSHARED_HIT_CYCLES 35
62 #define I7_OTHER_CORE_L2_HIT_CYCLES 60
63 #define I7_OTHER_CORE_L2_HITM_CYCLES 75
64 #define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES 225 // average of 200 (not modified) and 225-250 (modified)
65 #define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES 360 // average of 350 (not modified) and 370 (modified)
66 #define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES 180
67 #define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT 200
68 #define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT 350
69 #define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP 35
70 #define I7_IFETCH_L2_MISS_L3_HIT_SNOOP 60
71 #define I7_IFETCH_L2_MISS_L3_HITM 75
72 #define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD 180
73 
74 #define MAX_MODULES 1000
75 
76 #define EXPECTED_CPI 0.25
77 
78 #define MAX_FILENAME_LENGTH 1024
79 #define MAX_SAMPLE_INDEX_LENGTH 10000
80 #define MAX_SYM_LENGTH 15000
81 #define MAX_SYM_MOD_LENGTH 20000
82 #define MAX_LIB_LENGTH 5000
83 #define MAX_LIB_MOD_LENGTH 7000
84 #define MAX_SIMPLE_SYM_LENGTH 300
85 #define MAX_SIMPLE_SYM_MOD_LENGTH 500
86 #define MAX_SIMPLE_LIB_LENGTH 300
87 #define MAX_SIMPLE_LIB_MOD_LENGTH 500
88 #define MAX_LINE_LENGTH 20000
89 #define MAX_EVENT_NAME_LENGTH 150
90 #define MAX_MODULE_NAME_LENGTH 250
91 #define MAX_VALUE_STRING_LENGTH 250
92 #define MAX_ARCH_NAME_LENGTH 20
93 #define MAX_CMASK_STR_LENGTH 5
94 #define MAX_INV_STR_LENGTH 5
95 #define MAX_SP_STR_LENGTH 50
96 
97 #define PIPE_BUFFER_LENGTH 1000
98 
99 class PipeReader {
100 public:
101  PipeReader( const char* cmd ) {
102  pipe = popen( cmd, "r" );
103  if ( !pipe ) {
104  printf( "Cannot open pipe. Exiting...\n" );
105  exit( 1 );
106  }
107  char buffer[PIPE_BUFFER_LENGTH];
108  bzero( buffer, PIPE_BUFFER_LENGTH );
109  std::string result = "";
110  while ( !feof( pipe ) ) {
111  if ( fgets( buffer, PIPE_BUFFER_LENGTH, pipe ) != NULL ) { result += buffer; }
112  bzero( buffer, PIPE_BUFFER_LENGTH );
113  }
114  iss = new std::istringstream( result, std::istringstream::in );
115  }
116 
117  ~PipeReader( void ) {
118  pclose( pipe );
119  delete iss;
120  }
121 
122  std::istringstream& output( void ) { return *iss; }
123 
124 private:
125  FILE* pipe;
127 };
128 
129 // skipWhitespaces()
130 // const char *srcbuffer : source string
131 // const char **dstbuffer : destination string
132 // Skips white spaces
133 bool skipWhitespaces( const char* srcbuffer, const char** destbuffer ) {
134  if ( !isspace( *srcbuffer++ ) ) { return false; }
135  while ( isspace( *srcbuffer ) ) { srcbuffer++; }
136  *destbuffer = srcbuffer;
137  return true;
138 }
139 
140 // skipString()
141 // const char *strptr : substring to skip
142 // const char *srcbuffer : source string
143 // const char **dstbuffer : destination string
144 // Skips strings of the form '\\s+strptr\\s+' starting from buffer.
145 // Returns a pointer to the first char which does not match the above regexp,
146 // or 0 in case the regexp is not matched.
147 bool skipString( const char* strptr, const char* srcbuffer, const char** dstbuffer ) {
148  if ( strncmp( srcbuffer, strptr, strlen( strptr ) ) ) { return false; }
149  *dstbuffer = srcbuffer + strlen( strptr );
150  return true;
151 }
152 
153 class FileInfo {
154 public:
155  typedef int Offset;
157  FileInfo( void ) : NAME( "<dynamically generated>" ) {}
158  FileInfo( const std::string& name, bool useGdb ) : NAME( name ) {
159  if ( useGdb ) { this->createOffsetMap(); }
160  }
161 
162  const char* symbolByOffset( Offset offset ) {
163  if ( m_symbolCache.empty() ) { return 0; }
164 
165  SymbolCache::iterator i = lower_bound( m_symbolCache.begin(), m_symbolCache.end(), offset, CacheItemComparator() );
166  if ( i->OFFSET == offset ) { return i->NAME.c_str(); }
167 
168  if ( i == m_symbolCache.begin() ) { return m_symbolCache.begin()->NAME.c_str(); }
169 
170  --i;
171 
172  return i->NAME.c_str();
173  }
174 
175  Offset next( Offset offset ) {
176  SymbolCache::iterator i = upper_bound( m_symbolCache.begin(), m_symbolCache.end(), offset, CacheItemComparator() );
177  if ( i == m_symbolCache.end() ) { return 0; }
178  return i->OFFSET;
179  }
180 
181 private:
182  struct CacheItem {
183  CacheItem( Offset offset, const std::string& name ) : OFFSET( offset ), NAME( name ){};
186  };
187 
190 
192  bool operator()( const CacheItem& a, const int& b ) const { return a.OFFSET < b; }
193  bool operator()( const int& a, const CacheItem& b ) const { return a < b.OFFSET; }
194  };
195 
196  void createOffsetMap( void ) {
197  std::string commandLine = "objdump -p " + NAME;
198  PipeReader objdump( commandLine.c_str() );
199  std::string oldname;
200  std::string suffix;
201  int vmbase = 0;
202  bool matched = false;
203  while ( objdump.output() ) {
204  // Checks the following regexp
205  //
206  // LOAD\\s+off\\s+(0x[0-9A-Fa-f]+)\\s+vaddr\\s+(0x[0-9A-Fa-f]+)
207  //
208  // and sets vmbase to be $2 - $1 of the first matched entry.
209 
211  std::getline( objdump.output(), line );
212 
213  if ( !objdump.output() ) break;
214  if ( line.empty() ) continue;
215  const char* lineptr = line.c_str();
216  if ( !skipWhitespaces( lineptr, &lineptr ) ) continue;
217  if ( !skipString( "LOAD", lineptr, &lineptr ) ) continue;
218  if ( !skipWhitespaces( lineptr, &lineptr ) ) continue;
219  if ( !skipString( "off", lineptr, &lineptr ) ) continue;
220  char* endptr = 0;
221  int initialBase = strtol( lineptr, &endptr, 16 );
222  if ( lineptr == endptr ) continue;
223  lineptr = endptr;
224  if ( !skipWhitespaces( lineptr, &lineptr ) ) continue;
225  if ( !skipString( "vaddr", lineptr, &lineptr ) ) continue;
226  if ( !skipWhitespaces( lineptr, &lineptr ) ) continue;
227  int finalBase = strtol( lineptr, &endptr, 16 );
228  if ( lineptr == endptr ) continue;
229  vmbase = finalBase - initialBase;
230  matched = true;
231  break;
232  }
233  if ( !matched ) {
234  fprintf( stderr, "Cannot determine VM base address for %s\n", NAME.c_str() );
235  fprintf( stderr, "Error while running `objdump -p %s`\n", NAME.c_str() );
236  exit( 1 );
237  }
238  std::string commandLine2 = "nm -t d -n " + NAME;
239  PipeReader nm( commandLine2.c_str() );
240  while ( nm.output() ) {
242  std::getline( nm.output(), line );
243  if ( !nm.output() ) break;
244  if ( line.empty() ) continue;
245  // If line does not match "^(\\d+)[ ]\\S[ ](\S+)$", exit.
246  const char* begin = line.c_str();
247  char* endptr = 0;
248  int address = strtol( begin, &endptr, 10 );
249  if ( endptr == begin ) continue;
250  if ( *endptr++ != ' ' ) continue;
251  if ( isspace( *endptr++ ) ) continue;
252  if ( *endptr++ != ' ' ) continue;
253  char* symbolName = endptr;
254  while ( *endptr && !isspace( *endptr ) ) endptr++;
255  if ( *endptr != 0 ) continue;
256  // If line starts with '.' forget about it.
257  if ( symbolName[0] == '.' ) continue;
258  // Create a new symbol with the given fileoffset.
259  // The symbol is automatically saved in the FileInfo cache by offset.
260  // If a symbol with the same offset is already there, the new one
261  // replaces the old one.
262  int offset = address - vmbase;
263  if ( m_symbolCache.size() && ( m_symbolCache.back().OFFSET == offset ) )
264  m_symbolCache.back().NAME = symbolName;
265  else
266  m_symbolCache.push_back( CacheItem( address - vmbase, symbolName ) );
267  }
268  }
269 };
270 
271 static std::map<std::string, unsigned int> modules_tot_samples;
272 static std::map<std::string, FileInfo> libsInfo;
273 static int nehalem;
274 
276 static std::vector<std::string> C_events;
277 static std::vector<std::string> S_events;
278 
279 static std::vector<std::string> core_caa_events;
280 static std::vector<std::string> nhm_caa_events;
281 static std::vector<std::string> core_caa_events_displ;
282 static std::vector<std::string> nhm_caa_events_displ;
283 
285  core_caa_events.push_back( "BRANCH_INSTRUCTIONS_RETIRED" );
286  core_caa_events.push_back( "ILD_STALL" );
287  core_caa_events.push_back( "INST_RETIRED:LOADS" );
288  core_caa_events.push_back( "INST_RETIRED:OTHER" );
289  core_caa_events.push_back( "INST_RETIRED:STORES" );
290  core_caa_events.push_back( "INSTRUCTIONS_RETIRED" );
291  core_caa_events.push_back( "LOAD_BLOCK:OVERLAP_STORE" );
292  core_caa_events.push_back( "LOAD_BLOCK:STA" );
293  core_caa_events.push_back( "LOAD_BLOCK:UNTIL_RETIRE" );
294  core_caa_events.push_back( "MEM_LOAD_RETIRED:DTLB_MISS" );
295  core_caa_events.push_back( "MEM_LOAD_RETIRED:L1D_LINE_MISS" );
296  core_caa_events.push_back( "MEM_LOAD_RETIRED:L2_LINE_MISS" );
297  core_caa_events.push_back( "MISPREDICTED_BRANCH_RETIRED" );
298  // core_caa_events.push_back("RS_UOPS_DISPATCHED");
299  // core_caa_events.push_back("RS_UOPS_DISPATCHED CMASK=1");
300  core_caa_events.push_back( "RS_UOPS_DISPATCHED CMASK=1 INV=1" );
301  core_caa_events.push_back( "SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE" );
302  core_caa_events.push_back( "UNHALTED_CORE_CYCLES" );
303  // core_caa_events.push_back("UOPS_RETIRED:ANY");
304  // core_caa_events.push_back("UOPS_RETIRED:FUSED");
305  // core_caa_events.push_back("IDLE_DURING_DIV");
306 }
307 
309  nhm_caa_events.push_back( "ARITH:CYCLES_DIV_BUSY" );
310  nhm_caa_events.push_back( "BR_INST_EXEC:ANY" );
311  nhm_caa_events.push_back( "BR_INST_EXEC:DIRECT_NEAR_CALL" );
312  nhm_caa_events.push_back( "BR_INST_EXEC:INDIRECT_NEAR_CALL" );
313  nhm_caa_events.push_back( "BR_INST_EXEC:INDIRECT_NON_CALL" );
314  nhm_caa_events.push_back( "BR_INST_EXEC:NEAR_CALLS" );
315  nhm_caa_events.push_back( "BR_INST_EXEC:NON_CALLS" );
316  nhm_caa_events.push_back( "BR_INST_EXEC:RETURN_NEAR" );
317  nhm_caa_events.push_back( "BR_INST_RETIRED:ALL_BRANCHES" );
318  nhm_caa_events.push_back( "BR_INST_RETIRED:CONDITIONAL" );
319  nhm_caa_events.push_back( "BR_INST_RETIRED:NEAR_CALL" );
320  nhm_caa_events.push_back( "BR_MISP_EXEC:ANY" );
321  nhm_caa_events.push_back( "CPU_CLK_UNHALTED:THREAD_P" );
322  nhm_caa_events.push_back( "DTLB_LOAD_MISSES:WALK_COMPLETED" );
323  nhm_caa_events.push_back( "INST_RETIRED:ANY_P" );
324  nhm_caa_events.push_back( "ITLB_MISSES:WALK_COMPLETED" );
325  nhm_caa_events.push_back( "L2_RQSTS:IFETCH_HIT" );
326  nhm_caa_events.push_back( "L2_RQSTS:IFETCH_MISS" );
327  nhm_caa_events.push_back( "MEM_INST_RETIRED:LOADS" );
328  nhm_caa_events.push_back( "MEM_INST_RETIRED:STORES" );
329  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L2_HIT" );
330  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L3_MISS" );
331  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L3_UNSHARED_HIT" );
332  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM" );
333  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:LOCAL_DRAM" );
334  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM" );
335  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT" );
336  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:REMOTE_DRAM" );
337  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM" );
338  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM" );
339  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP" );
340  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD" );
341  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM" );
342  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT" );
343  nhm_caa_events.push_back( "RESOURCE_STALLS:ANY" );
344  nhm_caa_events.push_back( "SSEX_UOPS_RETIRED:PACKED_DOUBLE" );
345  nhm_caa_events.push_back( "SSEX_UOPS_RETIRED:PACKED_SINGLE" );
346  nhm_caa_events.push_back( "UOPS_DECODED:MS CMASK=1" );
347  nhm_caa_events.push_back( "UOPS_ISSUED:ANY CMASK=1 INV=1" );
348  nhm_caa_events.push_back( "ITLB_MISS_RETIRED" );
349  nhm_caa_events.push_back( "UOPS_RETIRED:ANY" );
350 }
351 
353  for ( std::vector<std::string>::const_iterator it = core_caa_events.begin(); it != core_caa_events.end(); ++it ) {
354  if ( find( C_events.begin(), C_events.end(), ( *it ) ) == C_events.end() ) {
355  fprintf( stderr, "ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
356  return false;
357  }
358  }
359  return true;
360 }
361 
363  for ( std::vector<std::string>::const_iterator it = nhm_caa_events.begin(); it != nhm_caa_events.end(); ++it ) {
364  if ( find( C_events.begin(), C_events.end(), ( *it ) ) == C_events.end() ) {
365  fprintf( stderr, "ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
366  return false;
367  }
368  }
369  return true;
370 }
371 
373  core_caa_events_displ.push_back( "Total Cycles" );
374  core_caa_events_displ.push_back( "Stalled Cycles" );
375  core_caa_events_displ.push_back( "% of Total Cycles" );
376  core_caa_events_displ.push_back( "Instructions Retired" );
377  core_caa_events_displ.push_back( "CPI" );
378  core_caa_events_displ.push_back( "" );
379  core_caa_events_displ.push_back( "iMargin" );
380  core_caa_events_displ.push_back( "iFactor" );
381  core_caa_events_displ.push_back( "" );
382  core_caa_events_displ.push_back( "Counted Stalled Cycles" );
383  core_caa_events_displ.push_back( "" );
384  core_caa_events_displ.push_back( "L2 Miss Impact" );
385  core_caa_events_displ.push_back( "L2 Miss % of counted Stalled Cycles" );
386  core_caa_events_displ.push_back( "" );
387  core_caa_events_displ.push_back( "L2 Hit Impact" );
388  core_caa_events_displ.push_back( "L2 Hit % of counted Stalled Cycles" );
389  core_caa_events_displ.push_back( "" );
390  core_caa_events_displ.push_back( "L1 DTLB Miss Impact" );
391  core_caa_events_displ.push_back( "L1 DTLB Miss % of counted Stalled Cycles" );
392  core_caa_events_displ.push_back( "" );
393  core_caa_events_displ.push_back( "LCP Stalls Impact" );
394  core_caa_events_displ.push_back( "LCP Stalls % of counted Stalled Cycles" );
395  core_caa_events_displ.push_back( "" );
396  core_caa_events_displ.push_back( "Store-Fwd Stalls Impact" );
397  core_caa_events_displ.push_back( "Store-Fwd Stalls % of counted Stalled Cycles" );
398  core_caa_events_displ.push_back( "" );
399  core_caa_events_displ.push_back( "Loads Blocked by Unknown Address Store Impact" );
400  core_caa_events_displ.push_back( "Loads Blocked % of Store-Fwd Stalls Cycles" );
401  core_caa_events_displ.push_back( "Loads Overlapped with Stores Impact" );
402  core_caa_events_displ.push_back( "Loads Overlapped % of Store-Fwd Stalls Cycles" );
403  core_caa_events_displ.push_back( "Loads Spanning across Cache Lines Impact" );
404  core_caa_events_displ.push_back( "Loads Spanning % of Store-Fwd Stalls Cycles" );
405  core_caa_events_displ.push_back( "" );
406  core_caa_events_displ.push_back( "Load Instructions" );
407  core_caa_events_displ.push_back( "Load % of all Instructions" );
408  core_caa_events_displ.push_back( "Store Instructions" );
409  core_caa_events_displ.push_back( "Store % of all Instructions" );
410  core_caa_events_displ.push_back( "Branch Instructions" );
411  core_caa_events_displ.push_back( "Branch % of all Instructions" );
412  core_caa_events_displ.push_back( "Packed SIMD Computational Instructions" );
413  core_caa_events_displ.push_back( "Packed SIMD % of all Instructions" );
414  core_caa_events_displ.push_back( "Other Instructions" );
415  core_caa_events_displ.push_back( "Other % of all Instructions" );
416  core_caa_events_displ.push_back( "" );
417  core_caa_events_displ.push_back( "ITLB Miss Rate in %" );
418  core_caa_events_displ.push_back( "% of Mispredicted Branches" );
419 }
420 
421 void calc_core_deriv_values( double totalCycles ) {
422  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
423  ++it ) {
424  ( it->second )["Total Cycles"] = ( it->second )["UNHALTED_CORE_CYCLES"];
425  ( it->second )["Stalled Cycles"] = ( it->second )["RS_UOPS_DISPATCHED CMASK=1 INV=1"];
426  ( it->second )["L2 Miss Impact"] = ( it->second )["MEM_LOAD_RETIRED:L2_LINE_MISS"] * CORE_L2_MISS_CYCLES;
427  ( it->second )["L2 Hit Impact"] =
428  ( ( it->second )["MEM_LOAD_RETIRED:L1D_LINE_MISS"] - ( it->second )["MEM_LOAD_RETIRED:L2_LINE_MISS"] ) *
430  ( it->second )["L1 DTLB Miss Impact"] = ( it->second )["MEM_LOAD_RETIRED:DTLB_MISS"] * CORE_L1_DTLB_MISS_CYCLES;
431  ( it->second )["LCP Stalls Impact"] = ( it->second )["ILD_STALL"] * CORE_LCP_STALL_CYCLES;
432  ( it->second )["Loads Blocked by Unknown Address Store Impact"] =
433  ( it->second )["LOAD_BLOCK:STA"] * CORE_UNKNOWN_ADDR_STORE_CYCLES;
434  ( it->second )["Loads Overlapped with Stores Impact"] =
435  ( it->second )["LOAD_BLOCK:OVERLAP_STORE"] * CORE_OVERLAPPING_CYCLES;
436  ( it->second )["Loads Spanning across Cache Lines Impact"] =
437  ( it->second )["LOAD_BLOCK:UNTIL_RETIRE"] * CORE_SPAN_ACROSS_CACHE_LINE_CYCLES;
438  ( it->second )["Store-Fwd Stalls Impact"] = ( it->second )["Loads Blocked by Unknown Address Store Impact"] +
439  ( it->second )["Loads Overlapped with Stores Impact"] +
440  ( it->second )["Loads Spanning across Cache Lines Impact"];
441  ( it->second )["Counted Stalled Cycles"] =
442  ( it->second )["L2 Miss Impact"] + ( it->second )["L2 Hit Impact"] + ( it->second )["LCP Stalls Impact"] +
443  ( it->second )["L1 DTLB Miss Impact"] + ( it->second )["Store-Fwd Stalls Impact"];
444  ( it->second )["Instructions Retired"] = ( it->second )["INSTRUCTIONS_RETIRED"];
445  ( it->second )["ITLB Miss Rate in %"] =
446  ( ( it->second )["ITLB_MISS_RETIRED"] / ( it->second )["INSTRUCTIONS_RETIRED"] ) * 100;
447  ( it->second )["Branch Instructions"] = ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"];
448  ( it->second )["Load Instructions"] = ( it->second )["INST_RETIRED:LOADS"];
449  ( it->second )["Store Instructions"] = ( it->second )["INST_RETIRED:STORES"];
450  ( it->second )["Other Instructions"] = ( it->second )["INST_RETIRED:OTHER"] -
451  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] -
452  ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"];
453  ( it->second )["% of Mispredicted Branches"] =
454  ( ( it->second )["MISPREDICTED_BRANCH_RETIRED"] / ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"] ) * 100;
455  ( it->second )["Packed SIMD Computational Instructions"] =
456  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"];
457  ( it->second )["Counted Instructions Retired"] =
458  ( it->second )["Branch Instructions"] + ( it->second )["Load Instructions"] +
459  ( it->second )["Store Instructions"] + ( it->second )["Other Instructions"] +
460  ( it->second )["Packed SIMD Computational Instructions"];
461  ( it->second )["CPI"] = ( it->second )["UNHALTED_CORE_CYCLES"] / ( it->second )["INSTRUCTIONS_RETIRED"];
462 
463  double localPerformanceImprovement = ( it->second )["CPI"] / EXPECTED_CPI;
464  double cyclesAfterImprovement = ( it->second )["UNHALTED_CORE_CYCLES"] / localPerformanceImprovement;
465  double totalCyclesAfterImprovement = totalCycles - ( it->second )["UNHALTED_CORE_CYCLES"] + cyclesAfterImprovement;
466  ( it->second )["iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
467 
468  ( it->second )["% of Total Cycles"] =
469  ( it->second )["RS_UOPS_DISPATCHED CMASK=1 INV=1"] * 100 / ( it->second )["UNHALTED_CORE_CYCLES"];
470  ( it->second )["L2 Miss % of counted Stalled Cycles"] =
471  ( it->second )["L2 Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
472  ( it->second )["L2 Hit % of counted Stalled Cycles"] =
473  ( it->second )["L2 Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
474  ( it->second )["L1 DTLB Miss % of counted Stalled Cycles"] =
475  ( it->second )["L1 DTLB Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
476  ( it->second )["LCP Stalls % of counted Stalled Cycles"] =
477  ( it->second )["LCP Stalls Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
478  ( it->second )["Store-Fwd Stalls % of counted Stalled Cycles"] =
479  ( it->second )["Store-Fwd Stalls Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
480  ( it->second )["Loads Blocked % of Store-Fwd Stalls Cycles"] =
481  ( it->second )["Loads Blocked by Unknown Address Store Impact"] * 100 /
482  ( it->second )["Store-Fwd Stalls Impact"];
483  ( it->second )["Loads Overlapped % of Store-Fwd Stalls Cycles"] =
484  ( it->second )["Loads Overlapped with Stores Impact"] * 100 / ( it->second )["Store-Fwd Stalls Impact"];
485  ( it->second )["Loads Spanning % of Store-Fwd Stalls Cycles"] =
486  ( it->second )["Loads Spanning across Cache Lines Impact"] * 100 / ( it->second )["Store-Fwd Stalls Impact"];
487 
488  ( it->second )["Load % of all Instructions"] =
489  ( it->second )["INST_RETIRED:LOADS"] * 100 / ( it->second )["Counted Instructions Retired"];
490  ( it->second )["Store % of all Instructions"] =
491  ( it->second )["INST_RETIRED:STORES"] * 100 / ( it->second )["Counted Instructions Retired"];
492  ( it->second )["Branch % of all Instructions"] =
493  ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"] * 100 / ( it->second )["Counted Instructions Retired"];
494  ( it->second )["Packed SIMD % of all Instructions"] =
495  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] * 100 /
496  ( it->second )["Counted Instructions Retired"];
497  ( it->second )["Other % of all Instructions"] =
498  ( it->second )["Other Instructions"] * 100 / ( it->second )["Counted Instructions Retired"];
499  }
500 }
501 
503  nhm_caa_events_displ.push_back( "Total Cycles" );
504  nhm_caa_events_displ.push_back( "Instructions Retired" );
505  nhm_caa_events_displ.push_back( "CPI" );
506  nhm_caa_events_displ.push_back( "" );
507  nhm_caa_events_displ.push_back( "iMargin" );
508  nhm_caa_events_displ.push_back( "iFactor" );
509  nhm_caa_events_displ.push_back( "" );
510  nhm_caa_events_displ.push_back( "Stalled Cycles" );
511  nhm_caa_events_displ.push_back( "% of Total Cycles" );
512  nhm_caa_events_displ.push_back( "Total Counted Stalled Cycles" );
513  nhm_caa_events_displ.push_back( "" );
514  nhm_caa_events_displ.push_back( "Instruction Starvation % of Total Cycles" );
515  nhm_caa_events_displ.push_back( "# of Instructions per Call" );
516  nhm_caa_events_displ.push_back( "% of Total Cycles spent handling FP exceptions" );
517  nhm_caa_events_displ.push_back( "" );
518  nhm_caa_events_displ.push_back( "Counted Stalled Cycles due to Load Ops" );
519  nhm_caa_events_displ.push_back( "" );
520  nhm_caa_events_displ.push_back( "L2 Hit Impact" );
521  nhm_caa_events_displ.push_back( "L2 Hit % of Load Stalls" );
522  nhm_caa_events_displ.push_back( "" );
523  nhm_caa_events_displ.push_back( "L3 Unshared Hit Impact" );
524  nhm_caa_events_displ.push_back( "L3 Unshared Hit % of Load Stalls" );
525  nhm_caa_events_displ.push_back( "" );
526  nhm_caa_events_displ.push_back( "L2 Other Core Hit Impact" );
527  nhm_caa_events_displ.push_back( "L2 Other Core Hit % of Load Stalls" );
528  nhm_caa_events_displ.push_back( "" );
529  nhm_caa_events_displ.push_back( "L2 Other Core Hit Modified Impact" );
530  nhm_caa_events_displ.push_back( "L2 Other Core Hit Modified % of Load Stalls" );
531  nhm_caa_events_displ.push_back( "" );
532  nhm_caa_events_displ.push_back( "L3 Miss -> Local DRAM Hit Impact" );
533  nhm_caa_events_displ.push_back( "L3 Miss -> Remote DRAM Hit Impact" );
534  nhm_caa_events_displ.push_back( "L3 Miss -> Remote Cache Hit Impact" );
535  nhm_caa_events_displ.push_back( "L3 Miss -> Total Impact" );
536  nhm_caa_events_displ.push_back( "L3 Miss % of Load Stalls" );
537  nhm_caa_events_displ.push_back( "" );
538  nhm_caa_events_displ.push_back( "L1 DTLB Miss Impact" );
539  nhm_caa_events_displ.push_back( "L1 DTLB Miss % of Load Stalls" );
540  nhm_caa_events_displ.push_back( "" );
541  nhm_caa_events_displ.push_back( "Cycles spent during DIV & SQRT Ops" );
542  nhm_caa_events_displ.push_back( "DIV & SQRT Ops % of counted Stalled Cycles" );
543  nhm_caa_events_displ.push_back( "" );
544  nhm_caa_events_displ.push_back( "Total L2 IFETCH misses" );
545  nhm_caa_events_displ.push_back( "% of L2 IFETCH misses" );
546  nhm_caa_events_displ.push_back( "" );
547  nhm_caa_events_displ.push_back( "% of IFETCHes served by Local DRAM" );
548  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (Modified)" );
549  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (Clean Snoop)" );
550  nhm_caa_events_displ.push_back( "% of IFETCHes served by Remote L2" );
551  nhm_caa_events_displ.push_back( "% of IFETCHes served by Remote DRAM" );
552  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (No Snoop)" );
553  nhm_caa_events_displ.push_back( "" );
554  nhm_caa_events_displ.push_back( "Total L2 IFETCH miss Impact" );
555  nhm_caa_events_displ.push_back( "" );
556  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Local DRAM" );
557  nhm_caa_events_displ.push_back( "Local DRAM IFECTHes % Impact" );
558  nhm_caa_events_displ.push_back( "" );
559  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (Modified)" );
560  nhm_caa_events_displ.push_back( "L3 (Modified) IFECTHes % Impact" );
561  nhm_caa_events_displ.push_back( "" );
562  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (Clean Snoop)" );
563  nhm_caa_events_displ.push_back( "L3 (Clean Snoop) IFECTHes % Impact" );
564  nhm_caa_events_displ.push_back( "" );
565  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Remote L2" );
566  nhm_caa_events_displ.push_back( "Remote L2 IFECTHes % Impact" );
567  nhm_caa_events_displ.push_back( "" );
568  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Remote DRAM" );
569  nhm_caa_events_displ.push_back( "Remote DRAM IFECTHes % Impact" );
570  nhm_caa_events_displ.push_back( "" );
571  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (No Snoop)" );
572  nhm_caa_events_displ.push_back( "L3 (No Snoop) IFECTHes % Impact" );
573  nhm_caa_events_displ.push_back( "" );
574  nhm_caa_events_displ.push_back( "Total Branch Instructions Executed" );
575  nhm_caa_events_displ.push_back( "% of Mispredicted Branches" );
576  nhm_caa_events_displ.push_back( "" );
577  nhm_caa_events_displ.push_back( "Direct Near Calls % of Total Branches Executed" );
578  nhm_caa_events_displ.push_back( "Indirect Near Calls % of Total Branches Executed" );
579  nhm_caa_events_displ.push_back( "Indirect Near Non-Calls % of Total Branches Executed" );
580  nhm_caa_events_displ.push_back( "All Near Calls % of Total Branches Executed" );
581  nhm_caa_events_displ.push_back( "All Non Calls % of Total Branches Executed" );
582  nhm_caa_events_displ.push_back( "All Returns % of Total Branches Executed" );
583  nhm_caa_events_displ.push_back( "" );
584  nhm_caa_events_displ.push_back( "Total Branch Instructions Retired" );
585  nhm_caa_events_displ.push_back( "Conditionals % of Total Branches Retired" );
586  nhm_caa_events_displ.push_back( "Near Calls % of Total Branches Retired" );
587  nhm_caa_events_displ.push_back( "" );
588  nhm_caa_events_displ.push_back( "L1 ITLB Miss Impact" );
589  nhm_caa_events_displ.push_back( "ITLB Miss Rate in %" );
590  nhm_caa_events_displ.push_back( "" );
591  nhm_caa_events_displ.push_back( "Branch Instructions" );
592  nhm_caa_events_displ.push_back( "Branch % of all Instructions" );
593  nhm_caa_events_displ.push_back( "" );
594  nhm_caa_events_displ.push_back( "Load Instructions" );
595  nhm_caa_events_displ.push_back( "Load % of all Instructions" );
596  nhm_caa_events_displ.push_back( "" );
597  nhm_caa_events_displ.push_back( "Store Instructions" );
598  nhm_caa_events_displ.push_back( "Store % of all Instructions" );
599  nhm_caa_events_displ.push_back( "" );
600  nhm_caa_events_displ.push_back( "Other Instructions" );
601  nhm_caa_events_displ.push_back( "Other % of all Instructions" );
602  nhm_caa_events_displ.push_back( "" );
603  nhm_caa_events_displ.push_back( "Packed UOPS Retired" );
604  nhm_caa_events_displ.push_back( "Packed % of all UOPS Retired" );
605 }
606 
607 void calc_nhm_deriv_values( double totalCycles ) {
608  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
609  ++it ) {
610  ( it->second )["Total Cycles"] = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
611 
612  ( it->second )["L2 Hit Impact"] = ( it->second )["MEM_LOAD_RETIRED:L2_HIT"] * I7_L2_HIT_CYCLES;
613  ( it->second )["L3 Unshared Hit Impact"] =
614  ( it->second )["MEM_LOAD_RETIRED:L3_UNSHARED_HIT"] * I7_L3_UNSHARED_HIT_CYCLES;
615  if ( ( it->second )["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] >
616  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) {
617  ( it->second )["L2 Other Core Hit Impact"] = ( ( it->second )["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] -
618  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) *
620  } else {
621  ( it->second )["L2 Other Core Hit Impact"] = 0.0;
622  }
623  ( it->second )["L2 Other Core Hit Modified Impact"] =
624  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] * I7_OTHER_CORE_L2_HITM_CYCLES;
625  ( it->second )["L3 Miss -> Local DRAM Hit Impact"] =
626  ( it->second )["MEM_UNCORE_RETIRED:LOCAL_DRAM"] * I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES;
627  ( it->second )["L3 Miss -> Remote DRAM Hit Impact"] =
628  ( it->second )["MEM_UNCORE_RETIRED:REMOTE_DRAM"] * I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES;
629  ( it->second )["L3 Miss -> Remote Cache Hit Impact"] =
630  ( it->second )["MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT"] * I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES;
631  ( it->second )["L3 Miss -> Total Impact"] = ( it->second )["L3 Miss -> Local DRAM Hit Impact"] +
632  ( it->second )["L3 Miss -> Remote DRAM Hit Impact"] +
633  ( it->second )["L3 Miss -> Remote Cache Hit Impact"];
634  ( it->second )["L1 DTLB Miss Impact"] =
635  ( it->second )["DTLB_LOAD_MISSES:WALK_COMPLETED"] * I7_L1_DTLB_WALK_COMPLETED_CYCLES;
636  ( it->second )["Counted Stalled Cycles due to Load Ops"] =
637  ( it->second )["L3 Miss -> Total Impact"] + ( it->second )["L2 Hit Impact"] +
638  ( it->second )["L1 DTLB Miss Impact"] + ( it->second )["L3 Unshared Hit Impact"] +
639  ( it->second )["L2 Other Core Hit Modified Impact"] + ( it->second )["L2 Other Core Hit Impact"];
640  ( it->second )["Cycles spent during DIV & SQRT Ops"] = ( it->second )["ARITH:CYCLES_DIV_BUSY"];
641  ( it->second )["Total Counted Stalled Cycles"] =
642  ( it->second )["Counted Stalled Cycles due to Load Ops"] + ( it->second )["Cycles spent during DIV & SQRT Ops"];
643  ( it->second )["Stalled Cycles"] =
644  ( it->second )["Total Counted Stalled Cycles"]; // TO BE FIXED when UOPS_EXECUTED:0x3f is fixed!!
645  ( it->second )["% of Total Cycles"] =
646  ( it->second )["Stalled Cycles"] * 100 / ( it->second )["CPU_CLK_UNHALTED:THREAD_P"]; // TO BE FIXED!! see above
647  ( it->second )["L3 Miss % of Load Stalls"] =
648  ( it->second )["L3 Miss -> Total Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
649  ( it->second )["L2 Hit % of Load Stalls"] =
650  ( it->second )["L2 Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
651  ( it->second )["L1 DTLB Miss % of Load Stalls"] =
652  ( it->second )["L1 DTLB Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
653  ( it->second )["L3 Unshared Hit % of Load Stalls"] =
654  ( it->second )["L3 Unshared Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
655  ( it->second )["L2 Other Core Hit % of Load Stalls"] =
656  ( it->second )["L2 Other Core Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
657  ( it->second )["L2 Other Core Hit Modified % of Load Stalls"] =
658  ( it->second )["L2 Other Core Hit Modified Impact"] * 100 /
659  ( it->second )["Counted Stalled Cycles due to Load Ops"];
660  ( it->second )["DIV & SQRT Ops % of counted Stalled Cycles"] =
661  ( it->second )["Cycles spent during DIV & SQRT Ops"] * 100 / ( it->second )["Total Counted Stalled Cycles"];
662 
663  ( it->second )["Cycles IFETCH served by Local DRAM"] =
664  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT;
665  ( it->second )["Cycles IFETCH served by L3 (Modified)"] =
666  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * I7_IFETCH_L2_MISS_L3_HITM;
667  ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] =
668  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * I7_IFETCH_L2_MISS_L3_HIT_SNOOP;
669  ( it->second )["Cycles IFETCH served by Remote L2"] =
670  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD;
671  ( it->second )["Cycles IFETCH served by Remote DRAM"] =
672  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT;
673  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"] =
674  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP;
675  ( it->second )["Total L2 IFETCH miss Impact"] =
676  ( it->second )["Cycles IFETCH served by Local DRAM"] + ( it->second )["Cycles IFETCH served by L3 (Modified)"] +
677  ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] +
678  ( it->second )["Cycles IFETCH served by Remote L2"] + ( it->second )["Cycles IFETCH served by Remote DRAM"] +
679  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"];
680  ( it->second )["Local DRAM IFECTHes % Impact"] =
681  ( it->second )["Cycles IFETCH served by Local DRAM"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
682  ( it->second )["L3 (Modified) IFECTHes % Impact"] =
683  ( it->second )["Cycles IFETCH served by L3 (Modified)"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
684  ( it->second )["L3 (Clean Snoop) IFECTHes % Impact"] = ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] *
685  100 / ( it->second )["Total L2 IFETCH miss Impact"];
686  ( it->second )["Remote L2 IFECTHes % Impact"] =
687  ( it->second )["Cycles IFETCH served by Remote L2"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
688  ( it->second )["Remote DRAM IFECTHes % Impact"] =
689  ( it->second )["Cycles IFETCH served by Remote DRAM"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
690  ( it->second )["L3 (No Snoop) IFECTHes % Impact"] =
691  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
692  ( it->second )["Total L2 IFETCH misses"] = ( it->second )["L2_RQSTS:IFETCH_MISS"];
693  ( it->second )["% of IFETCHes served by Local DRAM"] =
694  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
695  ( it->second )["% of IFETCHes served by L3 (Modified)"] =
696  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
697  ( it->second )["% of IFETCHes served by L3 (Clean Snoop)"] =
698  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * 100 /
699  ( it->second )["L2_RQSTS:IFETCH_MISS"];
700  ( it->second )["% of IFETCHes served by Remote L2"] =
701  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * 100 /
702  ( it->second )["L2_RQSTS:IFETCH_MISS"];
703  ( it->second )["% of IFETCHes served by Remote DRAM"] =
704  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
705  ( it->second )["% of IFETCHes served by L3 (No Snoop)"] =
706  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
707  ( it->second )["% of L2 IFETCH misses"] =
708  ( it->second )["L2_RQSTS:IFETCH_MISS"] * 100 /
709  ( ( it->second )["L2_RQSTS:IFETCH_MISS"] + ( it->second )["L2_RQSTS:IFETCH_HIT"] );
710  ( it->second )["L1 ITLB Miss Impact"] =
711  ( it->second )["ITLB_MISSES:WALK_COMPLETED"] * I7_L1_ITLB_WALK_COMPLETED_CYCLES;
712 
713  ( it->second )["Total Branch Instructions Executed"] = ( it->second )["BR_INST_EXEC:ANY"];
714  ( it->second )["% of Mispredicted Branches"] =
715  ( it->second )["BR_MISP_EXEC:ANY"] * 100 / ( it->second )["BR_INST_EXEC:ANY"];
716  ( it->second )["Direct Near Calls % of Total Branches Executed"] =
717  ( it->second )["BR_INST_EXEC:DIRECT_NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
718  ( it->second )["Indirect Near Calls % of Total Branches Executed"] =
719  ( it->second )["BR_INST_EXEC:INDIRECT_NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
720  ( it->second )["Indirect Near Non-Calls % of Total Branches Executed"] =
721  ( it->second )["BR_INST_EXEC:INDIRECT_NON_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
722  ( it->second )["All Near Calls % of Total Branches Executed"] =
723  ( it->second )["BR_INST_EXEC:NEAR_CALLS"] * 100 / ( it->second )["Total Branch Instructions Executed"];
724  ( it->second )["All Non Calls % of Total Branches Executed"] =
725  ( it->second )["BR_INST_EXEC:NON_CALLS"] * 100 / ( it->second )["Total Branch Instructions Executed"];
726  ( it->second )["All Returns % of Total Branches Executed"] =
727  ( it->second )["BR_INST_EXEC:RETURN_NEAR"] * 100 / ( it->second )["Total Branch Instructions Executed"];
728  ( it->second )["Total Branch Instructions Retired"] = ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
729  ( it->second )["Conditionals % of Total Branches Retired"] =
730  ( it->second )["BR_INST_RETIRED:CONDITIONAL"] * 100 / ( it->second )["Total Branch Instructions Retired"];
731  ( it->second )["Near Calls % of Total Branches Retired"] =
732  ( it->second )["BR_INST_RETIRED:NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Retired"];
733 
734  ( it->second )["Instruction Starvation % of Total Cycles"] =
735  ( ( it->second )["UOPS_ISSUED:ANY CMASK=1 INV=1"] - ( it->second )["RESOURCE_STALLS:ANY"] ) * 100 /
736  ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
737  ( it->second )["% of Total Cycles spent handling FP exceptions"] =
738  ( it->second )["UOPS_DECODED:MS CMASK=1"] * 100 / ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
739  ( it->second )["# of Instructions per Call"] =
740  ( it->second )["INST_RETIRED:ANY_P"] / ( it->second )["BR_INST_EXEC:NEAR_CALLS"];
741 
742  ( it->second )["Instructions Retired"] = ( it->second )["INST_RETIRED:ANY_P"];
743  ( it->second )["ITLB Miss Rate in %"] =
744  ( ( it->second )["ITLB_MISS_RETIRED"] / ( it->second )["INST_RETIRED:ANY_P"] ) * 100;
745 
746  ( it->second )["Branch Instructions"] = ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
747  ( it->second )["Load Instructions"] = ( it->second )["MEM_INST_RETIRED:LOADS"];
748  ( it->second )["Store Instructions"] = ( it->second )["MEM_INST_RETIRED:STORES"];
749  ( it->second )["Other Instructions"] =
750  ( it->second )["Instructions Retired"] - ( it->second )["MEM_INST_RETIRED:LOADS"] -
751  ( it->second )["MEM_INST_RETIRED:STORES"] - ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
752  ( it->second )["Packed UOPS Retired"] =
753  ( it->second )["SSEX_UOPS_RETIRED:PACKED_DOUBLE"] + ( it->second )["SSEX_UOPS_RETIRED:PACKED_SINGLE"];
754  ( it->second )["CPI"] = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] / ( it->second )["INST_RETIRED:ANY_P"];
755 
756  double localPerformanceImprovement = ( it->second )["CPI"] / EXPECTED_CPI;
757  double cyclesAfterImprovement = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] / localPerformanceImprovement;
758  double totalCyclesAfterImprovement =
759  totalCycles - ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] + cyclesAfterImprovement;
760  ( it->second )["iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
761 
762  ( it->second )["Load % of all Instructions"] =
763  ( it->second )["MEM_INST_RETIRED:LOADS"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
764  ( it->second )["Store % of all Instructions"] =
765  ( it->second )["MEM_INST_RETIRED:STORES"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
766  ( it->second )["Branch % of all Instructions"] =
767  ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
768  ( it->second )["Other % of all Instructions"] =
769  ( it->second )["Other Instructions"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
770 
771  ( it->second )["Packed % of all UOPS Retired"] =
772  ( it->second )["Packed UOPS Retired"] * 100 / ( it->second )["UOPS_RETIRED:ANY"];
773  }
774 }
775 
776 // S_module class defining the objects containing sampling results for each module
777 class S_module {
778 private:
780  unsigned int total_num_samples;
784  unsigned int cmask;
785  unsigned int inv;
786  unsigned int sp;
787 
788 public:
789  S_module() { clear(); }
790  void clear() {
791  samples.clear();
792  total_num_samples = 0;
793  sp = 0;
794  module_name = "";
795  cmask = 0;
796  inv = 0;
797  sp = 0;
798  }
799  void init( const char* name, const char* architecture, const char* event_name, unsigned int c_mask,
800  unsigned int inv_mask, unsigned int smpl_period ) {
801  module_name = name;
802  arch = architecture;
803  event = event_name;
804  cmask = c_mask;
805  inv = inv_mask;
806  sp = smpl_period;
807  }
808  void set_total( unsigned int total ) {
809  total_num_samples = total;
810  return;
811  }
812  unsigned int get_smpl_period() { return sp; }
813  unsigned int get_inv_mask() { return inv; }
814  unsigned int get_c_mask() { return cmask; }
815  std::string get_arch() { return arch; }
817  void add_sample( const char* index, unsigned int value ) {
818  samples[index] += value;
819  return;
820  }
821  bool get_max( char* index, unsigned int& value ) {
822  auto max_pos = std::max_element( samples.begin(), samples.end(),
823  []( const auto& lhs, const auto& rhs ) { return lhs.second < rhs.second; } );
824  if ( max_pos == samples.end() ) return false;
825  strcpy( index, ( max_pos->first ).c_str() );
826  value = max_pos->second;
827  samples.erase( max_pos );
828  return true;
829  }
831  unsigned int get_total_num_samples() { return total_num_samples; }
832 };
833 
834 // void html_special_chars()
835 // const char *s : source string
836 // char *s_mod : destination string
837 // replaces special HTML characters with correctly escaped sequences to be used inside HTML code
838 void html_special_chars( const char* s, char* s_mod ) {
839  int n = strlen( s );
840  *s_mod = '\0';
841  for ( int i = 0; i < n; i++ ) {
842  switch ( s[i] ) {
843  case '<':
844  strcat( s_mod, "&lt;" );
845  break;
846  case '>':
847  strcat( s_mod, "&gt;" );
848  break;
849  case '&':
850  strcat( s_mod, "&amp;" );
851  break;
852  case '"':
853  strcat( s_mod, "&quot;" );
854  break;
855  default:
856  char to_app[2];
857  to_app[0] = s[i];
858  to_app[1] = '\0';
859  strcat( s_mod, to_app );
860  break;
861  }
862  }
863  return;
864 }
865 
866 // func_name()
867 // const char *demangled_symbol : string corresponding to the demangled symbol found by the read_file() function
868 // parses the argument and returns just the function name without arguments or return types
869 const char* func_name( const char* demangled_symbol ) {
870  char* operator_string_begin = const_cast<char*>( strstr( demangled_symbol, "operator" ) );
871  if ( operator_string_begin != NULL ) {
872  char* operator_string_end = operator_string_begin + 8;
873  while ( *operator_string_end == ' ' ) operator_string_end++;
874  if ( strstr( operator_string_end, "delete[]" ) == operator_string_end ) {
875  operator_string_end += 8;
876  *operator_string_end = '\0';
877  } else if ( strstr( operator_string_end, "delete" ) == operator_string_end ) {
878  operator_string_end += 6;
879  *operator_string_end = '\0';
880  } else if ( strstr( operator_string_end, "new[]" ) == operator_string_end ) {
881  operator_string_end += 5;
882  *operator_string_end = '\0';
883  } else if ( strstr( operator_string_end, "new" ) == operator_string_end ) {
884  operator_string_end += 3;
885  *operator_string_end = '\0';
886  } else if ( strstr( operator_string_end, ">>=" ) == operator_string_end ) {
887  operator_string_end += 3;
888  *operator_string_end = '\0';
889  } else if ( strstr( operator_string_end, "<<=" ) == operator_string_end ) {
890  operator_string_end += 3;
891  *operator_string_end = '\0';
892  } else if ( strstr( operator_string_end, "->*" ) == operator_string_end ) {
893  operator_string_end += 3;
894  *operator_string_end = '\0';
895  } else if ( strstr( operator_string_end, "<<" ) == operator_string_end ) {
896  operator_string_end += 2;
897  *operator_string_end = '\0';
898  } else if ( strstr( operator_string_end, ">>" ) == operator_string_end ) {
899  operator_string_end += 2;
900  *operator_string_end = '\0';
901  } else if ( strstr( operator_string_end, ">=" ) == operator_string_end ) {
902  operator_string_end += 2;
903  *operator_string_end = '\0';
904  } else if ( strstr( operator_string_end, "<=" ) == operator_string_end ) {
905  operator_string_end += 2;
906  *operator_string_end = '\0';
907  } else if ( strstr( operator_string_end, "==" ) == operator_string_end ) {
908  operator_string_end += 2;
909  *operator_string_end = '\0';
910  } else if ( strstr( operator_string_end, "!=" ) == operator_string_end ) {
911  operator_string_end += 2;
912  *operator_string_end = '\0';
913  } else if ( strstr( operator_string_end, "|=" ) == operator_string_end ) {
914  operator_string_end += 2;
915  *operator_string_end = '\0';
916  } else if ( strstr( operator_string_end, "&=" ) == operator_string_end ) {
917  operator_string_end += 2;
918  *operator_string_end = '\0';
919  } else if ( strstr( operator_string_end, "^=" ) == operator_string_end ) {
920  operator_string_end += 2;
921  *operator_string_end = '\0';
922  } else if ( strstr( operator_string_end, "%=" ) == operator_string_end ) {
923  operator_string_end += 2;
924  *operator_string_end = '\0';
925  } else if ( strstr( operator_string_end, "/=" ) == operator_string_end ) {
926  operator_string_end += 2;
927  *operator_string_end = '\0';
928  } else if ( strstr( operator_string_end, "*=" ) == operator_string_end ) {
929  operator_string_end += 2;
930  *operator_string_end = '\0';
931  } else if ( strstr( operator_string_end, "-=" ) == operator_string_end ) {
932  operator_string_end += 2;
933  *operator_string_end = '\0';
934  } else if ( strstr( operator_string_end, "+=" ) == operator_string_end ) {
935  operator_string_end += 2;
936  *operator_string_end = '\0';
937  } else if ( strstr( operator_string_end, "&&" ) == operator_string_end ) {
938  operator_string_end += 2;
939  *operator_string_end = '\0';
940  } else if ( strstr( operator_string_end, "||" ) == operator_string_end ) {
941  operator_string_end += 2;
942  *operator_string_end = '\0';
943  } else if ( strstr( operator_string_end, "[]" ) == operator_string_end ) {
944  operator_string_end += 2;
945  *operator_string_end = '\0';
946  } else if ( strstr( operator_string_end, "()" ) == operator_string_end ) {
947  operator_string_end += 2;
948  *operator_string_end = '\0';
949  } else if ( strstr( operator_string_end, "++" ) == operator_string_end ) {
950  operator_string_end += 2;
951  *operator_string_end = '\0';
952  } else if ( strstr( operator_string_end, "--" ) == operator_string_end ) {
953  operator_string_end += 2;
954  *operator_string_end = '\0';
955  } else if ( strstr( operator_string_end, "->" ) == operator_string_end ) {
956  operator_string_end += 2;
957  *operator_string_end = '\0';
958  } else if ( strstr( operator_string_end, "<" ) == operator_string_end ) {
959  operator_string_end += 1;
960  *operator_string_end = '\0';
961  } else if ( strstr( operator_string_end, ">" ) == operator_string_end ) {
962  operator_string_end += 1;
963  *operator_string_end = '\0';
964  } else if ( strstr( operator_string_end, "~" ) == operator_string_end ) {
965  operator_string_end += 1;
966  *operator_string_end = '\0';
967  } else if ( strstr( operator_string_end, "!" ) == operator_string_end ) {
968  operator_string_end += 1;
969  *operator_string_end = '\0';
970  } else if ( strstr( operator_string_end, "+" ) == operator_string_end ) {
971  operator_string_end += 1;
972  *operator_string_end = '\0';
973  } else if ( strstr( operator_string_end, "-" ) == operator_string_end ) {
974  operator_string_end += 1;
975  *operator_string_end = '\0';
976  } else if ( strstr( operator_string_end, "*" ) == operator_string_end ) {
977  operator_string_end += 1;
978  *operator_string_end = '\0';
979  } else if ( strstr( operator_string_end, "/" ) == operator_string_end ) {
980  operator_string_end += 1;
981  *operator_string_end = '\0';
982  } else if ( strstr( operator_string_end, "%" ) == operator_string_end ) {
983  operator_string_end += 1;
984  *operator_string_end = '\0';
985  } else if ( strstr( operator_string_end, "^" ) == operator_string_end ) {
986  operator_string_end += 1;
987  *operator_string_end = '\0';
988  } else if ( strstr( operator_string_end, "&" ) == operator_string_end ) {
989  operator_string_end += 1;
990  *operator_string_end = '\0';
991  } else if ( strstr( operator_string_end, "|" ) == operator_string_end ) {
992  operator_string_end += 1;
993  *operator_string_end = '\0';
994  } else if ( strstr( operator_string_end, "," ) == operator_string_end ) {
995  operator_string_end += 1;
996  *operator_string_end = '\0';
997  } else if ( strstr( operator_string_end, "=" ) == operator_string_end ) {
998  operator_string_end += 1;
999  *operator_string_end = '\0';
1000  }
1001  return operator_string_begin;
1002  }
1003  char* end_of_demangled_name = const_cast<char*>( strrchr( demangled_symbol, ')' ) );
1004  if ( end_of_demangled_name != NULL ) {
1005  int pars = 1;
1006  char c;
1007  while ( pars > 0 && end_of_demangled_name != demangled_symbol ) {
1008  c = *( --end_of_demangled_name );
1009  if ( c == ')' ) {
1010  pars++;
1011  } else if ( c == '(' ) {
1012  pars--;
1013  }
1014  }
1015  } else {
1016  return demangled_symbol;
1017  }
1018  char* end_of_func_name = end_of_demangled_name;
1019  if ( end_of_func_name != NULL ) {
1020  *end_of_func_name = '\0';
1021  char c = *( --end_of_func_name );
1022  if ( c == '>' ) {
1023  int pars = 1;
1024  while ( pars > 0 && end_of_func_name != demangled_symbol ) {
1025  c = *( --end_of_func_name );
1026  if ( c == '>' ) {
1027  pars++;
1028  } else if ( c == '<' ) {
1029  pars--;
1030  }
1031  }
1032  *end_of_func_name = '\0';
1033  }
1034  c = *( --end_of_func_name );
1035  while ( isalnum( c ) || c == '_' || c == '~' ) { c = *( --end_of_func_name ); }
1036  return ++end_of_func_name;
1037  }
1038  return demangled_symbol;
1039 }
1040 
1041 // put_module()
1042 // S_module *cur_module : pointer to the current module object to be written out in to HTML file
1043 // const char *event : name of architectural event being analysed
1044 // const char *dir : directory where sampling results input files are located
1045 // creates or updates the HTML output file using information contained inside the module object given as a parameter
1046 void put_S_module( S_module* cur_module, const char* dir ) {
1047  char module_name[MAX_MODULE_NAME_LENGTH];
1048  bzero( module_name, MAX_MODULE_NAME_LENGTH );
1049  strcpy( module_name, ( cur_module->get_module_name() ).c_str() );
1050  char module_filename[MAX_FILENAME_LENGTH];
1051  bzero( module_filename, MAX_FILENAME_LENGTH );
1052  strcpy( module_filename, dir );
1053  strcat( module_filename, "/HTML/" );
1054  strcat( module_filename, module_name );
1055  strcat( module_filename, ".html" );
1056  char event[MAX_EVENT_NAME_LENGTH];
1057  bzero( event, MAX_EVENT_NAME_LENGTH );
1058  strcpy( event, ( cur_module->get_event() ).c_str() );
1059  std::map<std::string, unsigned int>::iterator result = modules_tot_samples.find( cur_module->get_module_name() );
1060  FILE* module_file;
1061  if ( result == modules_tot_samples.end() ) // not found
1062  {
1063  if ( ( !strcmp( event, "UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1064  ( !strcmp( event, "CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1065  modules_tot_samples.insert(
1067  } else {
1068  modules_tot_samples.insert( std::pair<std::string, unsigned int>( cur_module->get_module_name(), 0 ) );
1069  }
1070  module_file = fopen( module_filename, "w" );
1071  if ( module_file == NULL ) {
1072  fprintf( stderr, "ERROR: Cannot create file %s!!!\naborting...\n", module_filename );
1073  exit( 1 );
1074  }
1075  fprintf( module_file, "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
1076  "\"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1077  fprintf( module_file, "<html>\n" );
1078  fprintf( module_file, "<head>\n" );
1079  fprintf( module_file, "<title>\n" );
1080  fprintf( module_file, "%s\n", module_name );
1081  fprintf( module_file, "</title>\n" );
1082  fprintf( module_file, "</head>\n" );
1083  fprintf( module_file, "<body>\n" );
1084  fprintf( module_file, "<h2>%s</h2><br/>Events Sampled:<br/>\n", module_name );
1085  fprintf( module_file, "<ul>\n" );
1086  for ( std::vector<std::string>::const_iterator it = S_events.begin(); it != S_events.end(); ++it ) {
1087  fprintf( module_file, "<li><a href=\"#%s\">%s</a></li>\n", it->c_str(), it->c_str() );
1088  }
1089  fprintf( module_file, "</ul>\n" );
1090  } // if(result == modules_tot_samples.end()) //not found
1091  else {
1092  if ( ( !strcmp( event, "UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1093  ( !strcmp( event, "CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1094  modules_tot_samples[cur_module->get_module_name()] = cur_module->get_total_num_samples();
1095  }
1096  module_file = fopen( module_filename, "a" );
1097  } // else:: if(result != modules_tot_samples.end()) //found!!
1098  char event_str[MAX_EVENT_NAME_LENGTH];
1099  bzero( event_str, MAX_EVENT_NAME_LENGTH );
1100  strcpy( event_str, event );
1101  if ( cur_module->get_c_mask() > 0 ) {
1102  sprintf( event_str + strlen( event_str ), " CMASK=%d", cur_module->get_c_mask() );
1103  }
1104  if ( cur_module->get_inv_mask() > 0 ) {
1105  sprintf( event_str + strlen( event_str ), " INV=%d", cur_module->get_inv_mask() );
1106  }
1107  fprintf( module_file, "<a name=\"%s\"><a>\n", event_str );
1108  fprintf( module_file, "<table cellpadding=\"5\">\n" );
1109  fprintf( module_file, "<tr bgcolor=\"#EEEEEE\">\n" );
1110  fprintf( module_file,
1111  "<th colspan=\"6\" align=\"left\">%s -- cmask: %u -- invmask: %u -- Total Samples: %u -- "
1112  "Sampling Period: %d</th>\n",
1113  event, cur_module->get_c_mask(), cur_module->get_inv_mask(), cur_module->get_total_num_samples(),
1114  cur_module->get_smpl_period() );
1115  fprintf( module_file, "</tr>\n" );
1116  fprintf( module_file, "<tr bgcolor=\"#EEEEEE\">\n" );
1117  fprintf( module_file, "<th align=\"left\">Samples</th>\n" );
1118  fprintf( module_file, "<th align=\"left\">Percentage</th>\n" );
1119  fprintf( module_file, "<th align=\"left\">Symbol Name</th>\n" );
1120  fprintf( module_file, "<th align=\"left\">Library Name</th>\n" );
1121  fprintf( module_file, "<th align=\"left\">Complete Signature</th>\n" );
1122  fprintf( module_file, "<th align=\"left\">Library Pathname</th>\n" );
1123  fprintf( module_file, "</tr>\n" );
1124  for ( int j = 0; j < 20; j++ ) {
1125  char sym[MAX_SYM_LENGTH];
1126  char sym_mod[MAX_SYM_MOD_LENGTH];
1127  char lib[MAX_LIB_LENGTH];
1128  char lib_mod[MAX_LIB_MOD_LENGTH];
1129  char simple_sym[MAX_SIMPLE_SYM_LENGTH];
1130  char simple_sym_mod[MAX_SIMPLE_SYM_MOD_LENGTH];
1131  char simple_lib[MAX_SIMPLE_LIB_LENGTH];
1132  char simple_lib_mod[MAX_SIMPLE_LIB_MOD_LENGTH];
1133 
1134  bzero( sym, MAX_SYM_LENGTH );
1135  bzero( sym_mod, MAX_SYM_MOD_LENGTH );
1136  bzero( lib, MAX_LIB_LENGTH );
1137  bzero( lib_mod, MAX_LIB_MOD_LENGTH );
1138  bzero( simple_sym, MAX_SIMPLE_SYM_LENGTH );
1139  bzero( simple_sym_mod, MAX_SIMPLE_SYM_MOD_LENGTH );
1140  bzero( simple_lib, MAX_SIMPLE_LIB_LENGTH );
1141  bzero( simple_lib_mod, MAX_SIMPLE_LIB_MOD_LENGTH );
1142 
1143  char index[MAX_SAMPLE_INDEX_LENGTH];
1144  bzero( index, MAX_SAMPLE_INDEX_LENGTH );
1145  unsigned int value;
1146  bool res = cur_module->get_max( index, value );
1147  if ( !res ) break;
1148  char* sym_end = strchr( index, '%' );
1149  if ( sym_end == NULL ) // error
1150  {
1151  fprintf( stderr, "ERROR: Invalid sym and lib name! : %s\naborting...\n", index );
1152  exit( 1 );
1153  }
1154  memcpy( sym, index, strlen( index ) - strlen( sym_end ) );
1155  strcpy( lib, sym_end + 1 );
1156  char temp[MAX_SYM_LENGTH];
1157  bzero( temp, MAX_SYM_LENGTH );
1158  strcpy( temp, sym );
1159  strcpy( simple_sym, ( func_name( temp ) ) );
1160  if ( strrchr( lib, '/' ) != NULL && *( strrchr( lib, '/' ) + 1 ) != '\0' ) {
1161  strcpy( simple_lib, strrchr( lib, '/' ) + 1 );
1162  } else {
1163  strcpy( simple_lib, lib );
1164  }
1165  if ( j % 2 != 0 ) {
1166  fprintf( module_file, "<tr bgcolor=\"#FFFFCC\">\n" );
1167  } else {
1168  fprintf( module_file, "<tr bgcolor=\"#CCFFCC\">\n" );
1169  }
1170  fprintf( module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%u</td>\n", value );
1171  fprintf( module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%f%%</td>\n",
1172  ( ( (double)( value ) ) / ( (double)( cur_module->get_total_num_samples() ) ) ) * 100 );
1173  html_special_chars( simple_sym, simple_sym_mod );
1174  html_special_chars( simple_lib, simple_lib_mod );
1175  html_special_chars( sym, sym_mod );
1176  html_special_chars( lib, lib_mod );
1177  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_sym_mod );
1178  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_lib_mod );
1179  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", sym_mod );
1180  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n</tr>\n", lib_mod );
1181  }
1182  fprintf( module_file, "</table><br/><br/>\n" );
1183  int res = fclose( module_file );
1184  if ( res ) {
1185  fprintf( stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1186  exit( 1 );
1187  }
1188  return;
1189 }
1190 
1191 // read_S_file()
1192 // const char *dir : directory where sampling results input files are located
1193 // const char *filename : name of the current file to analyse
1194 // reads content of a gzipped sampling result file, finds names of symbols inside libraries using their offsets,
1195 // demangles them to make them human-readable, creates the module objects (with their sampling values),
1196 // and calls the put_module() function to create (or update) the corresponding HTML output file
1197 // returns 0 on success
1198 int read_S_file( const char* dir, const char* filename ) {
1199  char line[MAX_LINE_LENGTH];
1200  char event[MAX_EVENT_NAME_LENGTH];
1201  char arch[MAX_ARCH_NAME_LENGTH];
1202  unsigned int cmask;
1203  unsigned int inv;
1204  unsigned int sp;
1205  char cur_module_name[MAX_MODULE_NAME_LENGTH];
1206  bzero( line, MAX_LINE_LENGTH );
1207  bzero( event, MAX_EVENT_NAME_LENGTH );
1208  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1209  bzero( arch, MAX_ARCH_NAME_LENGTH );
1210 
1211  S_module* cur_module = new S_module();
1212  unsigned int module_num = 0;
1213 
1214  char path_name[MAX_FILENAME_LENGTH];
1215  bzero( path_name, MAX_FILENAME_LENGTH );
1216  strcpy( path_name, dir );
1217  strcat( path_name, "/" );
1218  strcat( path_name, filename );
1219  gzFile res_file = gzopen( path_name, "rb" );
1220 
1221  if ( res_file != NULL ) {
1222  bzero( line, MAX_LINE_LENGTH );
1223  gzgets( res_file, line, MAX_LINE_LENGTH );
1224  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1225  bzero( event, MAX_EVENT_NAME_LENGTH );
1226  sscanf( line, "%s %s %u %u %u", arch, event, &cmask, &inv, &sp );
1227  if ( !strcmp( arch, "NHM" ) )
1228  nehalem = true;
1229  else
1230  nehalem = false;
1231  bzero( line, MAX_LINE_LENGTH );
1232  while ( gzgets( res_file, line, MAX_LINE_LENGTH ) != Z_NULL ) {
1233  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1234  if ( strchr( line, ' ' ) == NULL ) // module
1235  {
1236  if ( module_num > 0 ) {
1237  put_S_module( cur_module, dir );
1238  cur_module->clear();
1239  }
1240  module_num++;
1241  char* end_sym = strchr( line, '%' );
1242  if ( end_sym == NULL ) // error
1243  {
1244  fprintf( stderr, "ERROR: Invalid module name. \nLINE: %s\naborting...\n", line );
1245  exit( 1 );
1246  }
1247  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1248  memcpy( cur_module_name, line, strlen( line ) - strlen( end_sym ) );
1249  cur_module->init( cur_module_name, arch, event, cmask, inv, sp );
1250  cur_module->set_total( atoi( end_sym + 1 ) );
1251  } // module
1252  else // symbol, libName, libOffset, value
1253  {
1254  unsigned int value = 0, libOffset = 0;
1255  char symbol[MAX_SYM_LENGTH];
1256  char libName[MAX_LIB_LENGTH];
1257  char final_sym[MAX_SYM_MOD_LENGTH];
1258  char final_lib[MAX_LIB_MOD_LENGTH];
1259  bzero( symbol, MAX_SYM_LENGTH );
1260  bzero( libName, MAX_LIB_LENGTH );
1261  bzero( final_sym, MAX_SYM_MOD_LENGTH );
1262  bzero( final_lib, MAX_LIB_MOD_LENGTH );
1263 
1264  sscanf( line, "%s %s %u %u", symbol, libName, &libOffset, &value );
1265  char realPathName_s[FILENAME_MAX];
1266  bzero( realPathName_s, FILENAME_MAX );
1267  char* realPathName = realpath( libName, realPathName_s );
1268  if ( realPathName != NULL && strlen( realPathName ) > 0 ) {
1270  result = libsInfo.find( realPathName );
1271  if ( result == libsInfo.end() ) { libsInfo[realPathName] = FileInfo( realPathName, true ); }
1272  const char* temp_sym = libsInfo[realPathName].symbolByOffset( libOffset );
1273  if ( temp_sym != NULL && strlen( temp_sym ) > 0 ) {
1274  int status;
1275  char* demangled_symbol = abi::__cxa_demangle( temp_sym, NULL, NULL, &status );
1276  if ( status == 0 ) {
1277  strcpy( final_sym, demangled_symbol );
1278  free( demangled_symbol );
1279  } else {
1280  strcpy( final_sym, temp_sym );
1281  }
1282  } else {
1283  strcpy( final_sym, "???" );
1284  }
1285  strcpy( final_lib, realPathName );
1286  } else {
1287  strcpy( final_sym, symbol );
1288  strcpy( final_lib, libName );
1289  }
1290  char index[MAX_LINE_LENGTH];
1291  bzero( index, MAX_LINE_LENGTH );
1292  strcpy( index, final_sym );
1293  strcat( index, "%" );
1294  strcat( index, final_lib );
1295  cur_module->add_sample( index, value );
1296  } // symbol, libName, libOffset, value
1297  bzero( line, MAX_LINE_LENGTH );
1298  } // while(gzgets(res_file, line, MAX_LINE_LENGTH)!=Z_NULL)
1299  put_S_module( cur_module, dir ); // last module!
1300  cur_module->clear();
1301  gzclose( res_file );
1302  } // if(res_file != NULL)
1303  else {
1304  fprintf( stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename );
1305  exit( 1 );
1306  }
1307  delete cur_module; // delete it!
1308  return 0;
1309 }
1310 
1311 int read_S_events( const char* dir, const char* filename ) {
1312  char event[MAX_EVENT_NAME_LENGTH];
1313  char arch[MAX_ARCH_NAME_LENGTH];
1314  char line[MAX_LINE_LENGTH];
1315  char cmask_str[MAX_CMASK_STR_LENGTH];
1316  char inv_str[MAX_INV_STR_LENGTH];
1317  char sp_str[MAX_SP_STR_LENGTH];
1318  bzero( line, MAX_LINE_LENGTH );
1319  bzero( event, MAX_EVENT_NAME_LENGTH );
1320  bzero( arch, MAX_ARCH_NAME_LENGTH );
1321  bzero( cmask_str, MAX_CMASK_STR_LENGTH );
1322  bzero( inv_str, MAX_INV_STR_LENGTH );
1323  bzero( sp_str, MAX_SP_STR_LENGTH );
1324  char path_name[MAX_FILENAME_LENGTH];
1325  bzero( path_name, MAX_FILENAME_LENGTH );
1326  strcpy( path_name, dir );
1327  strcat( path_name, "/" );
1328  strcat( path_name, filename );
1329  gzFile res_file = gzopen( path_name, "rb" );
1330  if ( res_file != NULL ) {
1331  bzero( line, MAX_LINE_LENGTH );
1332  gzgets( res_file, line, MAX_LINE_LENGTH );
1333  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1334  bzero( event, MAX_EVENT_NAME_LENGTH );
1335  sscanf( line, "%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str );
1336  std::string event_str( event );
1337  if ( atoi( cmask_str ) > 0 ) {
1338  event_str += " CMASK=";
1339  event_str += cmask_str;
1340  }
1341  if ( atoi( inv_str ) > 0 ) {
1342  event_str += " INV=";
1343  event_str += inv_str;
1344  }
1345  S_events.push_back( event_str );
1346  } // if(res_file != NULL)
1347  else {
1348  fprintf( stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename );
1349  exit( 1 );
1350  }
1351  gzclose( res_file );
1352  return 0;
1353 }
1354 
1355 // finalize_html_pages()
1356 // const char *dir : directory contating sampling result files
1357 // puts footers in module HTML pages and creates index file
1358 int finalize_S_html_pages( const char* dir ) {
1359  for ( std::map<std::string, unsigned int>::const_iterator i = modules_tot_samples.begin();
1360  i != modules_tot_samples.end(); i++ ) {
1361  char module_filename[MAX_FILENAME_LENGTH];
1362  strcpy( module_filename, dir );
1363  strcat( module_filename, "/HTML/" );
1364  strcat( module_filename, ( i->first ).c_str() );
1365  strcat( module_filename, ".html" );
1366  FILE* module_file = fopen( module_filename, "a" );
1367  if ( module_file == NULL ) {
1368  fprintf( stderr, "ERROR: Unable to append to file: %s\naborting...\n", module_filename );
1369  exit( 1 );
1370  }
1371  fprintf( module_file, "</body>\n</html>\n" );
1372  if ( fclose( module_file ) ) {
1373  fprintf( stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1374  exit( 1 );
1375  }
1376  }
1377  return 0;
1378 }
1379 
1380 // read_file()
1381 // const char *filename : input file to analyse
1382 // analyses the event file and updates the list of modules with counter information found in the file
1383 // returns the number of modules found in the file
1384 int read_C_file( const char* dir, const char* filename ) {
1385  char event[MAX_EVENT_NAME_LENGTH];
1386  char arch[MAX_ARCH_NAME_LENGTH];
1387  char line[MAX_LINE_LENGTH];
1388  char cmask_str[MAX_CMASK_STR_LENGTH];
1389  char inv_str[MAX_INV_STR_LENGTH];
1390  char sp_str[MAX_SP_STR_LENGTH];
1391  char cur_module_name[MAX_MODULE_NAME_LENGTH];
1392  bzero( line, MAX_LINE_LENGTH );
1393  bzero( event, MAX_EVENT_NAME_LENGTH );
1394  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1395  bzero( arch, MAX_ARCH_NAME_LENGTH );
1396  bzero( line, MAX_LINE_LENGTH );
1397  bzero( cmask_str, MAX_CMASK_STR_LENGTH );
1398  bzero( inv_str, MAX_INV_STR_LENGTH );
1399  bzero( sp_str, MAX_SP_STR_LENGTH );
1400  int number_of_modules = 0;
1401  long cur_sum = 0;
1402  int no_of_values = 0;
1403  char path_name[MAX_FILENAME_LENGTH];
1404  bzero( path_name, MAX_FILENAME_LENGTH );
1405  strcpy( path_name, dir );
1406  strcat( path_name, "/" );
1407  strcat( path_name, filename );
1408  FILE* fp = fopen( path_name, "r" );
1409  int stat = fscanf( fp, "%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str );
1410  if ( stat != 5 ) {
1411  std::cerr << "ERROR: failed to parse " << path_name << std::endl;
1412  exit( 1 );
1413  }
1414  if ( !strcmp( arch, "NHM" ) )
1415  nehalem = true;
1416  else
1417  nehalem = false;
1418  std::string event_str( event );
1419  if ( atoi( cmask_str ) > 0 ) {
1420  event_str += " CMASK=";
1421  event_str += cmask_str;
1422  }
1423  if ( atoi( inv_str ) > 0 ) {
1424  event_str += " INV=";
1425  event_str += inv_str;
1426  }
1427  C_events.push_back( event_str );
1428  while ( fscanf( fp, "%s\n", line ) != EOF ) {
1429  if ( isalpha( line[0] ) ) // module
1430  {
1431  if ( number_of_modules > 0 ) {
1432  C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values;
1433  cur_sum = 0;
1434  no_of_values = 0;
1435  }
1436  strcpy( cur_module_name, line );
1437  number_of_modules++;
1438  } else if ( isdigit( line[0] ) ) // value
1439  {
1440  cur_sum += strtol( line, NULL, 10 );
1441  no_of_values++;
1442  }
1443  }
1444  C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values; // last module
1445  fclose( fp );
1446  return number_of_modules;
1447 }
1448 
1449 void put_C_header( FILE* fp, std::vector<std::string>& columns ) {
1450  fprintf(
1451  fp,
1452  "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1453  fprintf( fp, "<html>\n" );
1454  fprintf( fp, "<head>\n" );
1455  fprintf( fp, "<title>\n" );
1456  fprintf( fp, "Analysis Result\n" );
1457  fprintf( fp, "</title>\n" );
1458  fprintf( fp, "<script src=\"sorttable.js\"></script>\n" );
1459  fprintf( fp, "<style>\ntable.sortable thead "
1460  "{\nbackground-color:#eee;\ncolor:#666666;\nfont-weight:bold;\ncursor:default;\nfont-family:courier;\n}"
1461  "\n</style>\n" );
1462  fprintf( fp, "</head>\n" );
1463  fprintf( fp, "<body link=\"black\">\n" );
1464  fprintf( fp, "<h1>RESULTS:</h1><br/>Click for detailed symbol view...<p/>\n" );
1465  fprintf( fp, "<table class=\"sortable\" cellpadding=\"5\">\n" );
1466  fprintf( fp, "<tr>\n" );
1467  fprintf( fp, "<th>MODULE NAME</th>\n" );
1468  for ( std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it ) {
1469  if ( strlen( it->c_str() ) == 0 )
1470  fprintf( fp, "<th bgcolor=\"#FFFFFF\">&nbsp;</th>\n" );
1471  else
1472  fprintf( fp, "<th>%s</th>\n", ( *it ).c_str() );
1473  }
1474  fprintf( fp, "</tr>\n" );
1475  return;
1476 }
1477 
1478 void put_C_modules( FILE* fp, std::vector<std::string>& columns ) {
1479  int index = 0;
1480  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1481  ++it ) {
1482  if ( index % 2 )
1483  fprintf( fp, "<tr bgcolor=\"#FFFFCC\">\n" );
1484  else
1485  fprintf( fp, "<tr bgcolor=\"#CCFFCC\">\n" );
1486  fprintf( fp, "<td style=\"font-family:monospace;font-size:large;color:Black\"><a href=\"%s.html\">%s</a></td>\n",
1487  ( it->first ).c_str(), ( it->first ).c_str() );
1488  for ( std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt ) {
1489  if ( strlen( jt->c_str() ) == 0 ) {
1490  fprintf( fp, "<td bgcolor=\"#FFFFFF\">&nbsp;</td>" );
1491  } else {
1492  if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1493  fprintf( stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1494  exit( 1 );
1495  }
1496  fprintf( fp, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\" align=\"right\">%.2f</td>\n",
1497  ( it->second )[*jt] );
1498  }
1499  }
1500  fprintf( fp, "</tr>\n" );
1501  index++;
1502  }
1503 }
1504 
1505 void put_C_footer( FILE* fp ) {
1506  fprintf( fp, "</table>\n</body>\n</html>\n" );
1507  return;
1508 }
1509 
1510 void put_C_header_csv( FILE* fp, std::vector<std::string>& columns ) {
1511  fprintf( fp, "MODULE NAME" );
1512  for ( std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it ) {
1513  if ( strlen( it->c_str() ) == 0 ) {
1514  } else
1515  fprintf( fp, ",%s", ( *it ).c_str() );
1516  }
1517  fprintf( fp, "\n" );
1518  return;
1519 }
1520 
1521 void put_C_modules_csv( FILE* fp, std::vector<std::string>& columns ) {
1522  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1523  ++it ) {
1524  fprintf( fp, "%s", ( it->first ).c_str() );
1525  for ( std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt ) {
1526  if ( strlen( jt->c_str() ) == 0 ) {
1527  } else {
1528  if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1529  fprintf( stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1530  exit( 1 );
1531  }
1532  fprintf( fp, ",%.2f", ( it->second )[*jt] );
1533  }
1534  }
1535  fprintf( fp, "\n" );
1536  }
1537 }
1538 
1539 // normalize()
1540 // struct C_module *mod : pointer to the head of the list of modules
1541 // int counter : event selected (see C_module class for which event corresponds to which number)
1542 // int number_of_modules : length of the list
1543 // double value : value to be normalized
1544 // double normalizeTo : value to which the value above should be normalized
1545 // returns the normalized value
1546 double normalize( std::string field, double value, double normalizeTo ) {
1547  double max = 0;
1548  double counter_value;
1549  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1550  ++it ) {
1551  counter_value = ( it->second )[field];
1552  if ( max < counter_value ) max = counter_value;
1553  }
1554  if ( value > 0 && max > 0 && normalizeTo > 0 ) {
1555  return 1. * value / max * normalizeTo;
1556  } else
1557  return 0;
1558 }
1559 
1560 // calc_post_deriv_values()
1561 // struct C_module *mod : pointer to the head of the list of modules
1562 // double totalCycles : total cycles spent by all the modules
1563 // int number_of_modules : length of the list
1564 // calculates the iFactor of each module
1566  if ( nehalem ) {
1567  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1568  ++it ) {
1569  double simdnorm =
1570  1. - normalize( "Packed % of all UOPS Retired", ( it->second )["Packed % of all UOPS Retired"], 1 );
1571  double misspnorm = normalize( "% of Mispredicted Branches", ( it->second )["% of Mispredicted Branches"], 1 );
1572  double stallnorm = normalize( "Stalled Cycles", ( it->second )["Stalled Cycles"], 1 );
1573  ( it->second )["iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1574  }
1575  } else {
1576  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1577  ++it ) {
1578  double simdnorm =
1579  1. - normalize( "Packed SIMD % of all Instructions", ( it->second )["Packed SIMD % of all Instructions"], 1 );
1580  double misspnorm = normalize( "% of Mispredicted Branches", ( it->second )["% of Mispredicted Branches"], 1 );
1581  double stallnorm = normalize( "Stalled Cycles", ( it->second )["Stalled Cycles"], 1 );
1582  ( it->second )["iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1583  }
1584  }
1585 }
1586 
1587 // getTotalCycles()
1588 // struct C_module *mod : pointer to the head of the list of modules
1589 // int number_of_modules : length of the list
1590 // returns the number of total cycles spent by all the modules
1591 double getTotalCycles() {
1592  double sum = 0;
1593  if ( nehalem ) {
1594  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1595  ++it ) {
1596  sum += ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
1597  }
1598  } else {
1599  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1600  ++it ) {
1601  sum += ( it->second )["UNHALTED_CORE_CYCLES"];
1602  }
1603  }
1604  return sum;
1605 }
1606 
1607 // main()
1608 // takes as argument the directory containing results
1609 // and produces the HTML directory inside of it containing browsable statistics
1610 int main( int argc, char* argv[] ) {
1611  if ( argc < 2 || argc > 4 ) {
1612  printf( "\n\nUsage: %s DIRECTORY [--caa] [--csv]\n\n", argv[0] );
1613  exit( 1 );
1614  }
1615 
1616  bool caa = false;
1617  bool csv = false;
1618  for ( int i = 2; i < argc; i++ ) {
1619  if ( !strcmp( argv[i], "--caa" ) ) caa = true;
1620  if ( !strcmp( argv[i], "--csv" ) ) csv = true;
1621  }
1622 
1623  char dir[MAX_FILENAME_LENGTH];
1624  strcpy( dir, argv[1] );
1625  if ( !csv ) {
1626  strcat( dir, "/HTML" );
1627  int res = mkdir( dir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH );
1628  if ( res != 0 ) {
1629  fprintf( stderr, "ERROR: Cannot create directory %s\naborting...\n", dir );
1630  exit( 1 );
1631  }
1632  }
1633 
1634  DIR* dp;
1635  struct dirent* dirp;
1636  int num_of_modules = 0;
1637  if ( ( dp = opendir( argv[1] ) ) == NULL ) {
1638  printf( "Error(%d) opening %s\n", errno, argv[1] );
1639  return errno;
1640  }
1641  while ( ( dirp = readdir( dp ) ) != NULL ) {
1642  if ( strstr( dirp->d_name, "_S_" ) != NULL && strstr( dirp->d_name, ".txt.gz" ) != NULL && !csv ) {
1643  if ( read_S_events( argv[1], dirp->d_name ) ) {
1644  fprintf( stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1645  exit( 1 );
1646  }
1647  }
1648  }
1649  closedir( dp );
1650  sort( S_events.begin(), S_events.end() );
1651  if ( ( dp = opendir( argv[1] ) ) == NULL ) {
1652  printf( "Error(%d) opening %s\n", errno, argv[1] );
1653  return errno;
1654  }
1655  while ( ( dirp = readdir( dp ) ) != NULL ) {
1656  if ( strstr( dirp->d_name, "_S_" ) != NULL && strstr( dirp->d_name, ".txt.gz" ) != NULL && !csv ) {
1657  if ( read_S_file( argv[1], dirp->d_name ) ) {
1658  fprintf( stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1659  exit( 1 );
1660  }
1661  } else if ( strstr( dirp->d_name, "_C_" ) != NULL && strstr( dirp->d_name, ".txt" ) != NULL ) {
1662  int res = read_C_file( argv[1], dirp->d_name );
1663  if ( res > num_of_modules ) { num_of_modules = res; }
1664  }
1665  }
1666  closedir( dp );
1667 
1668  if ( !csv ) {
1669  if ( finalize_S_html_pages( argv[1] ) ) {
1670  fprintf( stderr, "ERROR: Cannot finalize HTML pages!!!\naborting...\n" );
1671  exit( 1 );
1672  }
1673  }
1674 
1675  char filepath[MAX_FILENAME_LENGTH];
1676  bzero( filepath, MAX_FILENAME_LENGTH );
1677  if ( !csv )
1678  sprintf( filepath, "%s/HTML/index.html", argv[1] );
1679  else
1680  sprintf( filepath, "%s/results.csv", argv[1] );
1681  FILE* fp = fopen( filepath, "w" );
1682  if ( fp == NULL ) {
1683  fprintf( stderr, "ERROR: Cannot create file index.html!!!\naborting...\n" );
1684  exit( 1 );
1685  }
1686 
1687  if ( caa ) {
1688  double totalCycles;
1689  if ( !nehalem ) {
1691  if ( !check_for_core_caa_events() ) {
1692  fprintf( stderr, "(core) ERROR: One or more events for CAA missing!\naborting...\n" );
1693  exit( 1 );
1694  }
1696  totalCycles = getTotalCycles();
1697  calc_core_deriv_values( totalCycles );
1699  if ( !csv ) {
1700  put_C_header( fp, core_caa_events_displ );
1701  put_C_modules( fp, core_caa_events_displ );
1702  } else {
1703  put_C_header_csv( fp, core_caa_events_displ );
1704  put_C_modules_csv( fp, core_caa_events_displ );
1705  }
1706  } else {
1708  if ( !check_for_nhm_caa_events() ) {
1709  fprintf( stderr, "(nehalem) ERROR: One or more events for CAA missing!\naborting...\n" );
1710  exit( 1 );
1711  }
1713  totalCycles = getTotalCycles();
1714  calc_nhm_deriv_values( totalCycles );
1716  if ( !csv ) {
1717  put_C_header( fp, nhm_caa_events_displ );
1718  put_C_modules( fp, nhm_caa_events_displ );
1719  } else {
1720  put_C_header_csv( fp, nhm_caa_events_displ );
1721  put_C_modules_csv( fp, nhm_caa_events_displ );
1722  }
1723  }
1724  if ( !csv ) put_C_footer( fp );
1725  fclose( fp );
1726  } else {
1727  if ( !csv ) {
1728  put_C_header( fp, C_events );
1729  put_C_modules( fp, C_events );
1730  put_C_footer( fp );
1731  } else {
1732  put_C_header_csv( fp, C_events );
1733  put_C_modules_csv( fp, C_events );
1734  }
1735  fclose( fp );
1736  }
1737  if ( !csv ) {
1738  char src[MAX_FILENAME_LENGTH];
1739  char dst[MAX_FILENAME_LENGTH];
1740  sprintf( src, "sorttable.js" );
1741  sprintf( dst, "%s/HTML/sorttable.js", argv[1] );
1742  int fd_src = open( src, O_RDONLY );
1743  if ( fd_src == -1 ) {
1744  fprintf( stderr, "ERROR: Cannot open file \"%s\"!\naborting...\n", src );
1745  exit( 1 );
1746  }
1747  int fd_dst = open( dst, O_WRONLY | O_CREAT | O_TRUNC, 0644 );
1748  if ( fd_dst == -1 ) {
1749  fprintf( stderr, "ERROR: Cannot open file \"%s\" (%s)!\naborting...\n", dst, strerror( errno ) );
1750  exit( 1 );
1751  }
1752  char c;
1753  while ( read( fd_src, &c, 1 ) ) {
1754  if ( write( fd_dst, &c, 1 ) == -1 ) {
1755  std::cerr << "ERROR: failed to write to " << dst << std::endl;
1756  exit( 1 );
1757  }
1758  }
1759  close( fd_dst );
1760  close( fd_src );
1761  }
1762  return 0;
1763 }
MAX_CMASK_STR_LENGTH
#define MAX_CMASK_STR_LENGTH
Definition: pfm_gen_analysis.cpp:93
I7_L1_ITLB_WALK_COMPLETED_CYCLES
#define I7_L1_ITLB_WALK_COMPLETED_CYCLES
Definition: pfm_gen_analysis.cpp:59
std::max_element
T max_element(T... args)
init_core_caa_events
void init_core_caa_events()
Definition: pfm_gen_analysis.cpp:284
html_special_chars
void html_special_chars(const char *s, char *s_mod)
Definition: pfm_gen_analysis.cpp:838
I7_OTHER_CORE_L2_HITM_CYCLES
#define I7_OTHER_CORE_L2_HITM_CYCLES
Definition: pfm_gen_analysis.cpp:63
S_module::get_arch
std::string get_arch()
Definition: pfm_gen_analysis.cpp:815
FileInfo::symbolByOffset
const char * symbolByOffset(Offset offset)
Definition: pfm_gen_analysis.cpp:162
MAX_SYM_MOD_LENGTH
#define MAX_SYM_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:81
std::string
STL class.
PipeReader::output
std::istringstream & output(void)
Definition: pfm_gen_analysis.cpp:122
put_C_modules
void put_C_modules(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1478
FileInfo::CacheItemComparator
Definition: pfm_gen_analysis.cpp:191
FileInfo::CacheItem::CacheItem
CacheItem(Offset offset, const std::string &name)
Definition: pfm_gen_analysis.cpp:183
S_module::inv
unsigned int inv
Definition: pfm_gen_analysis.cpp:785
plotBacklogPyRoot.argc
argc
Definition: plotBacklogPyRoot.py:173
std::pair
init_nhm_caa_events
void init_nhm_caa_events()
Definition: pfm_gen_analysis.cpp:308
I7_IFETCH_L2_MISS_L3_HITM
#define I7_IFETCH_L2_MISS_L3_HITM
Definition: pfm_gen_analysis.cpp:71
gaudirun.s
string s
Definition: gaudirun.py:348
PipeReader::iss
std::istringstream * iss
Definition: pfm_gen_analysis.cpp:126
S_module::get_event
std::string get_event()
Definition: pfm_gen_analysis.cpp:816
std::vector< CacheItem >
std::map::find
T find(T... args)
MAX_LIB_MOD_LENGTH
#define MAX_LIB_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:83
finalize_S_html_pages
int finalize_S_html_pages(const char *dir)
Definition: pfm_gen_analysis.cpp:1358
std::vector::size
T size(T... args)
CORE_UNKNOWN_ADDR_STORE_CYCLES
#define CORE_UNKNOWN_ADDR_STORE_CYCLES
Definition: pfm_gen_analysis.cpp:53
put_C_header
void put_C_header(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1449
CORE_L1_DTLB_MISS_CYCLES
#define CORE_L1_DTLB_MISS_CYCLES
Definition: pfm_gen_analysis.cpp:51
I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES
#define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:64
Gaudi::Units::nm
constexpr double nm
Definition: SystemOfUnits.h:97
std::istringstream
STL class.
FileInfo::SymbolCache
std::vector< CacheItem > SymbolCache
Definition: pfm_gen_analysis.cpp:188
FileInfo::CacheItem::OFFSET
Offset OFFSET
Definition: pfm_gen_analysis.cpp:183
gaudirun.c
c
Definition: gaudirun.py:527
max
EventIDBase max(const EventIDBase &lhs, const EventIDBase &rhs)
Definition: EventIDBase.h:225
std::vector::back
T back(T... args)
MAX_EVENT_NAME_LENGTH
#define MAX_EVENT_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:89
S_module::get_total_num_samples
unsigned int get_total_num_samples()
Definition: pfm_gen_analysis.cpp:831
std::map::clear
T clear(T... args)
I7_L2_HIT_CYCLES
#define I7_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:60
EXPECTED_CPI
#define EXPECTED_CPI
Definition: pfm_gen_analysis.cpp:76
std::vector::push_back
T push_back(T... args)
S_module::get_max
bool get_max(char *index, unsigned int &value)
Definition: pfm_gen_analysis.cpp:821
compareOutputFiles.sp
sp
Definition: compareOutputFiles.py:515
I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT
#define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT
Definition: pfm_gen_analysis.cpp:68
I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD
#define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD
Definition: pfm_gen_analysis.cpp:72
read_C_file
int read_C_file(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1384
S_module::sp
unsigned int sp
Definition: pfm_gen_analysis.cpp:786
S_module::clear
void clear()
Definition: pfm_gen_analysis.cpp:790
CORE_L2_HIT_CYCLES
#define CORE_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:50
FileInfo::FileInfo
FileInfo(void)
Definition: pfm_gen_analysis.cpp:157
read_S_file
int read_S_file(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1198
calc_nhm_deriv_values
void calc_nhm_deriv_values(double totalCycles)
Definition: pfm_gen_analysis.cpp:607
TimingHistograms.name
name
Definition: TimingHistograms.py:25
S_module::get_c_mask
unsigned int get_c_mask()
Definition: pfm_gen_analysis.cpp:814
PipeReader::pipe
FILE * pipe
Definition: pfm_gen_analysis.cpp:125
read_S_events
int read_S_events(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1311
MAX_SIMPLE_LIB_MOD_LENGTH
#define MAX_SIMPLE_LIB_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:87
S_module::module_name
std::string module_name
Definition: pfm_gen_analysis.cpp:781
MAX_LIB_LENGTH
#define MAX_LIB_LENGTH
Definition: pfm_gen_analysis.cpp:82
FileInfo::createOffsetMap
void createOffsetMap(void)
Definition: pfm_gen_analysis.cpp:196
PIPE_BUFFER_LENGTH
#define PIPE_BUFFER_LENGTH
Definition: pfm_gen_analysis.cpp:97
I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP
#define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP
Definition: pfm_gen_analysis.cpp:69
MAX_SIMPLE_SYM_LENGTH
#define MAX_SIMPLE_SYM_LENGTH
Definition: pfm_gen_analysis.cpp:84
ProduceConsume.j
j
Definition: ProduceConsume.py:101
std::cerr
CLHEP::begin
double * begin(CLHEP::HepVector &v)
Definition: TupleAlg.cpp:45
I7_L1_DTLB_WALK_COMPLETED_CYCLES
#define I7_L1_DTLB_WALK_COMPLETED_CYCLES
Definition: pfm_gen_analysis.cpp:58
std::string::c_str
T c_str(T... args)
S_module::get_module_name
std::string get_module_name()
Definition: pfm_gen_analysis.cpp:830
Prepare.dst
dst
Definition: Prepare.py:22
I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT
#define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT
Definition: pfm_gen_analysis.cpp:67
func_name
const char * func_name(const char *demangled_symbol)
Definition: pfm_gen_analysis.cpp:869
std::map::erase
T erase(T... args)
getTotalCycles
double getTotalCycles()
Definition: pfm_gen_analysis.cpp:1591
FileInfo::CacheItem::NAME
std::string NAME
Definition: pfm_gen_analysis.cpp:185
PipeReader
Definition: pfm_gen_analysis.cpp:99
FileInfo::next
Offset next(Offset offset)
Definition: pfm_gen_analysis.cpp:175
S_module::samples
std::map< std::string, unsigned int > samples
Definition: pfm_gen_analysis.cpp:779
CORE_LCP_STALL_CYCLES
#define CORE_LCP_STALL_CYCLES
Definition: pfm_gen_analysis.cpp:52
FileInfo::FileInfo
FileInfo(const std::string &name, bool useGdb)
Definition: pfm_gen_analysis.cpp:158
check_for_nhm_caa_events
bool check_for_nhm_caa_events()
Definition: pfm_gen_analysis.cpp:362
CORE_OVERLAPPING_CYCLES
#define CORE_OVERLAPPING_CYCLES
Definition: pfm_gen_analysis.cpp:54
std::map< std::string, unsigned int >
S_module::get_inv_mask
unsigned int get_inv_mask()
Definition: pfm_gen_analysis.cpp:813
FileInfo::NAME
std::string NAME
Definition: pfm_gen_analysis.cpp:156
put_C_footer
void put_C_footer(FILE *fp)
Definition: pfm_gen_analysis.cpp:1505
put_C_modules_csv
void put_C_modules_csv(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1521
GaudiPluginService.cpluginsvc.n
n
Definition: cpluginsvc.py:235
MAX_MODULE_NAME_LENGTH
#define MAX_MODULE_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:90
FileInfo::CacheItemComparator::operator()
bool operator()(const CacheItem &a, const int &b) const
Definition: pfm_gen_analysis.cpp:192
init_core_caa_events_displ
void init_core_caa_events_displ()
Definition: pfm_gen_analysis.cpp:372
S_module::add_sample
void add_sample(const char *index, unsigned int value)
Definition: pfm_gen_analysis.cpp:817
S_module::event
std::string event
Definition: pfm_gen_analysis.cpp:783
CORE_SPAN_ACROSS_CACHE_LINE_CYCLES
#define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES
Definition: pfm_gen_analysis.cpp:55
PipeReader::PipeReader
PipeReader(const char *cmd)
Definition: pfm_gen_analysis.cpp:101
MAX_SYM_LENGTH
#define MAX_SYM_LENGTH
Definition: pfm_gen_analysis.cpp:80
hivetimeline.read
def read(f, regex=".*", skipevents=0)
Definition: hivetimeline.py:33
FileInfo::CacheItem
Definition: pfm_gen_analysis.cpp:182
skipWhitespaces
bool skipWhitespaces(const char *srcbuffer, const char **destbuffer)
Definition: pfm_gen_analysis.cpp:133
std::endl
T endl(T... args)
put_C_header_csv
void put_C_header_csv(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1510
FileInfo::Offset
int Offset
Definition: pfm_gen_analysis.cpp:155
S_module::get_smpl_period
unsigned int get_smpl_period()
Definition: pfm_gen_analysis.cpp:812
std::vector::begin
T begin(T... args)
std::getline
T getline(T... args)
std::map::insert
T insert(T... args)
I7_OTHER_CORE_L2_HIT_CYCLES
#define I7_OTHER_CORE_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:62
CORE_L2_MISS_CYCLES
#define CORE_L2_MISS_CYCLES
Definition: pfm_gen_analysis.cpp:49
MAX_SIMPLE_LIB_LENGTH
#define MAX_SIMPLE_LIB_LENGTH
Definition: pfm_gen_analysis.cpp:86
S_module::arch
std::string arch
Definition: pfm_gen_analysis.cpp:782
S_module::cmask
unsigned int cmask
Definition: pfm_gen_analysis.cpp:784
MAX_LINE_LENGTH
#define MAX_LINE_LENGTH
Definition: pfm_gen_analysis.cpp:88
FileInfo
Definition: pfm_gen_analysis.cpp:153
MAX_SAMPLE_INDEX_LENGTH
#define MAX_SAMPLE_INDEX_LENGTH
Definition: pfm_gen_analysis.cpp:79
I7_IFETCH_L2_MISS_L3_HIT_SNOOP
#define I7_IFETCH_L2_MISS_L3_HIT_SNOOP
Definition: pfm_gen_analysis.cpp:70
calc_post_deriv_values
void calc_post_deriv_values()
Definition: pfm_gen_analysis.cpp:1565
std::vector::empty
T empty(T... args)
plotSpeedupsPyRoot.line
line
Definition: plotSpeedupsPyRoot.py:198
put_S_module
void put_S_module(S_module *cur_module, const char *dir)
Definition: pfm_gen_analysis.cpp:1046
main
int main(int argc, char *argv[])
Definition: pfm_gen_analysis.cpp:1610
MAX_INV_STR_LENGTH
#define MAX_INV_STR_LENGTH
Definition: pfm_gen_analysis.cpp:94
check_for_core_caa_events
bool check_for_core_caa_events()
Definition: pfm_gen_analysis.cpp:352
std::vector::end
T end(T... args)
normalize
double normalize(std::string field, double value, double normalizeTo)
Definition: pfm_gen_analysis.cpp:1546
I7_L3_UNSHARED_HIT_CYCLES
#define I7_L3_UNSHARED_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:61
S_module::S_module
S_module()
Definition: pfm_gen_analysis.cpp:789
skipString
bool skipString(const char *strptr, const char *srcbuffer, const char **dstbuffer)
Definition: pfm_gen_analysis.cpp:147
FileInfo::CacheItemComparator::operator()
bool operator()(const int &a, const CacheItem &b) const
Definition: pfm_gen_analysis.cpp:193
graphanalysis.filename
filename
Definition: graphanalysis.py:131
PipeReader::~PipeReader
~PipeReader(void)
Definition: pfm_gen_analysis.cpp:117
S_module::init
void init(const char *name, const char *architecture, const char *event_name, unsigned int c_mask, unsigned int inv_mask, unsigned int smpl_period)
Definition: pfm_gen_analysis.cpp:799
S_module::total_num_samples
unsigned int total_num_samples
Definition: pfm_gen_analysis.cpp:780
MAX_FILENAME_LENGTH
#define MAX_FILENAME_LENGTH
Definition: pfm_gen_analysis.cpp:78
I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES
#define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:66
init_nhm_caa_events_displ
void init_nhm_caa_events_displ()
Definition: pfm_gen_analysis.cpp:502
calc_core_deriv_values
void calc_core_deriv_values(double totalCycles)
Definition: pfm_gen_analysis.cpp:421
I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES
#define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:65
S_module::set_total
void set_total(unsigned int total)
Definition: pfm_gen_analysis.cpp:808
MAX_SIMPLE_SYM_MOD_LENGTH
#define MAX_SIMPLE_SYM_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:85
gaudirun.argv
list argv
Definition: gaudirun.py:329
MAX_SP_STR_LENGTH
#define MAX_SP_STR_LENGTH
Definition: pfm_gen_analysis.cpp:95
FileInfo::m_symbolCache
SymbolCache m_symbolCache
Definition: pfm_gen_analysis.cpp:189
S_module
Definition: pfm_gen_analysis.cpp:777
MAX_ARCH_NAME_LENGTH
#define MAX_ARCH_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:92