The Gaudi Framework  v39r1 (adb068b2)
pfm_gen_analysis.cpp
Go to the documentation of this file.
1 /***********************************************************************************\
2 * (c) Copyright 1998-2024 CERN for the benefit of the LHCb and ATLAS collaborations *
3 * *
4 * This software is distributed under the terms of the Apache version 2 licence, *
5 * copied verbatim in the file "LICENSE". *
6 * *
7 * In applying this licence, CERN does not waive the privileges and immunities *
8 * granted to it by virtue of its status as an Intergovernmental Organization *
9 * or submit itself to any jurisdiction. *
10 \***********************************************************************************/
11 /*
12 Name: pfm_analysis.cpp
13 Author: Daniele Francesco Kruse
14 E-mail: daniele.francesco.kruse@cern.ch
15 Version: 0.9 (16/02/2010)
16 
17 This code is responsible for analysing results generated by the PerfmonService of CMSSW.
18 It takes 42 files as input (21 simple text files and 21 gzipped text files) and
19 produces a HTML directory containing the results of the analysis (both counting and sampling).
20 
21 compile linking zlib: g++ -Wall -lz pfm_analysis.cpp
22 */
23 
24 #include <ctype.h>
25 #include <cxxabi.h>
26 #include <fcntl.h>
27 #include <math.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <sys/stat.h>
33 #include <sys/types.h>
34 #include <unistd.h>
35 #include <zlib.h>
36 
37 #include <algorithm>
38 #include <iostream>
39 #include <list>
40 #include <map>
41 #include <memory>
42 #include <sstream>
43 #include <string>
44 #include <vector>
45 
46 #include <dirent.h>
47 #include <errno.h>
48 
49 // Core
50 #define CORE_L2_MISS_CYCLES 200
51 #define CORE_L2_HIT_CYCLES 14.5
52 #define CORE_L1_DTLB_MISS_CYCLES 10
53 #define CORE_LCP_STALL_CYCLES 6
54 #define CORE_UNKNOWN_ADDR_STORE_CYCLES 5
55 #define CORE_OVERLAPPING_CYCLES 6
56 #define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES 20
57 
58 // Nehalem
59 #define I7_L1_DTLB_WALK_COMPLETED_CYCLES 35
60 #define I7_L1_ITLB_WALK_COMPLETED_CYCLES 35
61 #define I7_L2_HIT_CYCLES 6
62 #define I7_L3_UNSHARED_HIT_CYCLES 35
63 #define I7_OTHER_CORE_L2_HIT_CYCLES 60
64 #define I7_OTHER_CORE_L2_HITM_CYCLES 75
65 #define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES 225 // average of 200 (not modified) and 225-250 (modified)
66 #define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES 360 // average of 350 (not modified) and 370 (modified)
67 #define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES 180
68 #define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT 200
69 #define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT 350
70 #define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP 35
71 #define I7_IFETCH_L2_MISS_L3_HIT_SNOOP 60
72 #define I7_IFETCH_L2_MISS_L3_HITM 75
73 #define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD 180
74 
75 #define MAX_MODULES 1000
76 
77 #define EXPECTED_CPI 0.25
78 
79 #define MAX_FILENAME_LENGTH 1024
80 #define MAX_SAMPLE_INDEX_LENGTH 10000
81 #define MAX_SYM_LENGTH 15000
82 #define MAX_SYM_MOD_LENGTH 20000
83 #define MAX_LIB_LENGTH 5000
84 #define MAX_LIB_MOD_LENGTH 7000
85 #define MAX_SIMPLE_SYM_LENGTH 300
86 #define MAX_SIMPLE_SYM_MOD_LENGTH 500
87 #define MAX_SIMPLE_LIB_LENGTH 300
88 #define MAX_SIMPLE_LIB_MOD_LENGTH 500
89 #define MAX_LINE_LENGTH 20000
90 #define MAX_EVENT_NAME_LENGTH 150
91 #define MAX_MODULE_NAME_LENGTH 250
92 #define MAX_VALUE_STRING_LENGTH 250
93 #define MAX_ARCH_NAME_LENGTH 20
94 #define MAX_CMASK_STR_LENGTH 5
95 #define MAX_INV_STR_LENGTH 5
96 #define MAX_SP_STR_LENGTH 50
97 
98 #define PIPE_BUFFER_LENGTH 1000
99 
100 class PipeReader {
101 public:
102  PipeReader( const char* cmd ) {
103  pipe = popen( cmd, "r" );
104  if ( !pipe ) {
105  printf( "Cannot open pipe. Exiting...\n" );
106  exit( 1 );
107  }
108  char buffer[PIPE_BUFFER_LENGTH];
109  bzero( buffer, PIPE_BUFFER_LENGTH );
110  std::string result = "";
111  while ( !feof( pipe ) ) {
112  if ( fgets( buffer, PIPE_BUFFER_LENGTH, pipe ) != NULL ) { result += buffer; }
113  bzero( buffer, PIPE_BUFFER_LENGTH );
114  }
115  iss = std::make_unique<std::istringstream>( result, std::istringstream::in );
116  }
117 
118  ~PipeReader( void ) { pclose( pipe ); }
119 
120  std::istringstream& output( void ) { return *iss; }
121 
122 private:
123  FILE* pipe;
125 };
126 
127 // skipWhitespaces()
128 // const char *srcbuffer : source string
129 // const char **dstbuffer : destination string
130 // Skips white spaces
131 bool skipWhitespaces( const char* srcbuffer, const char** destbuffer ) {
132  if ( !isspace( *srcbuffer++ ) ) { return false; }
133  while ( isspace( *srcbuffer ) ) { srcbuffer++; }
134  *destbuffer = srcbuffer;
135  return true;
136 }
137 
138 // skipString()
139 // const char *strptr : substring to skip
140 // const char *srcbuffer : source string
141 // const char **dstbuffer : destination string
142 // Skips strings of the form '\\s+strptr\\s+' starting from buffer.
143 // Returns a pointer to the first char which does not match the above regexp,
144 // or 0 in case the regexp is not matched.
145 bool skipString( const char* strptr, const char* srcbuffer, const char** dstbuffer ) {
146  if ( strncmp( srcbuffer, strptr, strlen( strptr ) ) ) { return false; }
147  *dstbuffer = srcbuffer + strlen( strptr );
148  return true;
149 }
150 
151 class FileInfo {
152 public:
153  typedef int Offset;
155  FileInfo( void ) : NAME( "<dynamically generated>" ) {}
156  FileInfo( const std::string& name, bool useGdb ) : NAME( name ) {
157  if ( useGdb ) { this->createOffsetMap(); }
158  }
159 
160  const char* symbolByOffset( Offset offset ) {
161  if ( m_symbolCache.empty() ) { return 0; }
162 
163  SymbolCache::iterator i = lower_bound( m_symbolCache.begin(), m_symbolCache.end(), offset, CacheItemComparator() );
164  if ( i->OFFSET == offset ) { return i->NAME.c_str(); }
165 
166  if ( i == m_symbolCache.begin() ) { return m_symbolCache.begin()->NAME.c_str(); }
167 
168  --i;
169 
170  return i->NAME.c_str();
171  }
172 
173  Offset next( Offset offset ) {
174  SymbolCache::iterator i = upper_bound( m_symbolCache.begin(), m_symbolCache.end(), offset, CacheItemComparator() );
175  if ( i == m_symbolCache.end() ) { return 0; }
176  return i->OFFSET;
177  }
178 
179 private:
180  struct CacheItem {
181  CacheItem( Offset offset, const std::string& name ) : OFFSET( offset ), NAME( name ){};
184  };
185 
188 
190  bool operator()( const CacheItem& a, const int& b ) const { return a.OFFSET < b; }
191  bool operator()( const int& a, const CacheItem& b ) const { return a < b.OFFSET; }
192  };
193 
194  void createOffsetMap( void ) {
195  std::string commandLine = "objdump -p " + NAME;
196  PipeReader objdump( commandLine.c_str() );
197  std::string oldname;
198  std::string suffix;
199  int vmbase = 0;
200  bool matched = false;
201  while ( objdump.output() ) {
202  // Checks the following regexp
203  //
204  // LOAD\\s+off\\s+(0x[0-9A-Fa-f]+)\\s+vaddr\\s+(0x[0-9A-Fa-f]+)
205  //
206  // and sets vmbase to be $2 - $1 of the first matched entry.
207 
209  std::getline( objdump.output(), line );
210 
211  if ( !objdump.output() ) break;
212  if ( line.empty() ) continue;
213  const char* lineptr = line.c_str();
214  if ( !skipWhitespaces( lineptr, &lineptr ) ) continue;
215  if ( !skipString( "LOAD", lineptr, &lineptr ) ) continue;
216  if ( !skipWhitespaces( lineptr, &lineptr ) ) continue;
217  if ( !skipString( "off", lineptr, &lineptr ) ) continue;
218  char* endptr = 0;
219  int initialBase = strtol( lineptr, &endptr, 16 );
220  if ( lineptr == endptr ) continue;
221  lineptr = endptr;
222  if ( !skipWhitespaces( lineptr, &lineptr ) ) continue;
223  if ( !skipString( "vaddr", lineptr, &lineptr ) ) continue;
224  if ( !skipWhitespaces( lineptr, &lineptr ) ) continue;
225  int finalBase = strtol( lineptr, &endptr, 16 );
226  if ( lineptr == endptr ) continue;
227  vmbase = finalBase - initialBase;
228  matched = true;
229  break;
230  }
231  if ( !matched ) {
232  fprintf( stderr, "Cannot determine VM base address for %s\n", NAME.c_str() );
233  fprintf( stderr, "Error while running `objdump -p %s`\n", NAME.c_str() );
234  exit( 1 );
235  }
236  std::string commandLine2 = "nm -t d -n " + NAME;
237  PipeReader nm( commandLine2.c_str() );
238  while ( nm.output() ) {
240  std::getline( nm.output(), line );
241  if ( !nm.output() ) break;
242  if ( line.empty() ) continue;
243  // If line does not match "^(\\d+)[ ]\\S[ ](\S+)$", exit.
244  const char* begin = line.c_str();
245  char* endptr = 0;
246  int address = strtol( begin, &endptr, 10 );
247  if ( endptr == begin ) continue;
248  if ( *endptr++ != ' ' ) continue;
249  if ( isspace( *endptr++ ) ) continue;
250  if ( *endptr++ != ' ' ) continue;
251  char* symbolName = endptr;
252  while ( *endptr && !isspace( *endptr ) ) endptr++;
253  if ( *endptr != 0 ) continue;
254  // If line starts with '.' forget about it.
255  if ( symbolName[0] == '.' ) continue;
256  // Create a new symbol with the given fileoffset.
257  // The symbol is automatically saved in the FileInfo cache by offset.
258  // If a symbol with the same offset is already there, the new one
259  // replaces the old one.
260  int offset = address - vmbase;
261  if ( m_symbolCache.size() && ( m_symbolCache.back().OFFSET == offset ) )
262  m_symbolCache.back().NAME = symbolName;
263  else
264  m_symbolCache.push_back( CacheItem( address - vmbase, symbolName ) );
265  }
266  }
267 };
268 
269 static std::map<std::string, unsigned int> modules_tot_samples;
270 static std::map<std::string, FileInfo> libsInfo;
271 static int nehalem;
272 
274 static std::vector<std::string> C_events;
275 static std::vector<std::string> S_events;
276 
277 static std::vector<std::string> core_caa_events;
278 static std::vector<std::string> nhm_caa_events;
279 static std::vector<std::string> core_caa_events_displ;
280 static std::vector<std::string> nhm_caa_events_displ;
281 
283  core_caa_events.push_back( "BRANCH_INSTRUCTIONS_RETIRED" );
284  core_caa_events.push_back( "ILD_STALL" );
285  core_caa_events.push_back( "INST_RETIRED:LOADS" );
286  core_caa_events.push_back( "INST_RETIRED:OTHER" );
287  core_caa_events.push_back( "INST_RETIRED:STORES" );
288  core_caa_events.push_back( "INSTRUCTIONS_RETIRED" );
289  core_caa_events.push_back( "LOAD_BLOCK:OVERLAP_STORE" );
290  core_caa_events.push_back( "LOAD_BLOCK:STA" );
291  core_caa_events.push_back( "LOAD_BLOCK:UNTIL_RETIRE" );
292  core_caa_events.push_back( "MEM_LOAD_RETIRED:DTLB_MISS" );
293  core_caa_events.push_back( "MEM_LOAD_RETIRED:L1D_LINE_MISS" );
294  core_caa_events.push_back( "MEM_LOAD_RETIRED:L2_LINE_MISS" );
295  core_caa_events.push_back( "MISPREDICTED_BRANCH_RETIRED" );
296  // core_caa_events.push_back("RS_UOPS_DISPATCHED");
297  // core_caa_events.push_back("RS_UOPS_DISPATCHED CMASK=1");
298  core_caa_events.push_back( "RS_UOPS_DISPATCHED CMASK=1 INV=1" );
299  core_caa_events.push_back( "SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE" );
300  core_caa_events.push_back( "UNHALTED_CORE_CYCLES" );
301  // core_caa_events.push_back("UOPS_RETIRED:ANY");
302  // core_caa_events.push_back("UOPS_RETIRED:FUSED");
303  // core_caa_events.push_back("IDLE_DURING_DIV");
304 }
305 
307  nhm_caa_events.push_back( "ARITH:CYCLES_DIV_BUSY" );
308  nhm_caa_events.push_back( "BR_INST_EXEC:ANY" );
309  nhm_caa_events.push_back( "BR_INST_EXEC:DIRECT_NEAR_CALL" );
310  nhm_caa_events.push_back( "BR_INST_EXEC:INDIRECT_NEAR_CALL" );
311  nhm_caa_events.push_back( "BR_INST_EXEC:INDIRECT_NON_CALL" );
312  nhm_caa_events.push_back( "BR_INST_EXEC:NEAR_CALLS" );
313  nhm_caa_events.push_back( "BR_INST_EXEC:NON_CALLS" );
314  nhm_caa_events.push_back( "BR_INST_EXEC:RETURN_NEAR" );
315  nhm_caa_events.push_back( "BR_INST_RETIRED:ALL_BRANCHES" );
316  nhm_caa_events.push_back( "BR_INST_RETIRED:CONDITIONAL" );
317  nhm_caa_events.push_back( "BR_INST_RETIRED:NEAR_CALL" );
318  nhm_caa_events.push_back( "BR_MISP_EXEC:ANY" );
319  nhm_caa_events.push_back( "CPU_CLK_UNHALTED:THREAD_P" );
320  nhm_caa_events.push_back( "DTLB_LOAD_MISSES:WALK_COMPLETED" );
321  nhm_caa_events.push_back( "INST_RETIRED:ANY_P" );
322  nhm_caa_events.push_back( "ITLB_MISSES:WALK_COMPLETED" );
323  nhm_caa_events.push_back( "L2_RQSTS:IFETCH_HIT" );
324  nhm_caa_events.push_back( "L2_RQSTS:IFETCH_MISS" );
325  nhm_caa_events.push_back( "MEM_INST_RETIRED:LOADS" );
326  nhm_caa_events.push_back( "MEM_INST_RETIRED:STORES" );
327  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L2_HIT" );
328  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L3_MISS" );
329  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L3_UNSHARED_HIT" );
330  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM" );
331  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:LOCAL_DRAM" );
332  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM" );
333  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT" );
334  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:REMOTE_DRAM" );
335  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM" );
336  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM" );
337  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP" );
338  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD" );
339  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM" );
340  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT" );
341  nhm_caa_events.push_back( "RESOURCE_STALLS:ANY" );
342  nhm_caa_events.push_back( "SSEX_UOPS_RETIRED:PACKED_DOUBLE" );
343  nhm_caa_events.push_back( "SSEX_UOPS_RETIRED:PACKED_SINGLE" );
344  nhm_caa_events.push_back( "UOPS_DECODED:MS CMASK=1" );
345  nhm_caa_events.push_back( "UOPS_ISSUED:ANY CMASK=1 INV=1" );
346  nhm_caa_events.push_back( "ITLB_MISS_RETIRED" );
347  nhm_caa_events.push_back( "UOPS_RETIRED:ANY" );
348 }
349 
351  for ( std::vector<std::string>::const_iterator it = core_caa_events.begin(); it != core_caa_events.end(); ++it ) {
352  if ( find( C_events.begin(), C_events.end(), ( *it ) ) == C_events.end() ) {
353  fprintf( stderr, "ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
354  return false;
355  }
356  }
357  return true;
358 }
359 
361  for ( std::vector<std::string>::const_iterator it = nhm_caa_events.begin(); it != nhm_caa_events.end(); ++it ) {
362  if ( find( C_events.begin(), C_events.end(), ( *it ) ) == C_events.end() ) {
363  fprintf( stderr, "ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
364  return false;
365  }
366  }
367  return true;
368 }
369 
371  core_caa_events_displ.push_back( "Total Cycles" );
372  core_caa_events_displ.push_back( "Stalled Cycles" );
373  core_caa_events_displ.push_back( "% of Total Cycles" );
374  core_caa_events_displ.push_back( "Instructions Retired" );
375  core_caa_events_displ.push_back( "CPI" );
376  core_caa_events_displ.push_back( "" );
377  core_caa_events_displ.push_back( "iMargin" );
378  core_caa_events_displ.push_back( "iFactor" );
379  core_caa_events_displ.push_back( "" );
380  core_caa_events_displ.push_back( "Counted Stalled Cycles" );
381  core_caa_events_displ.push_back( "" );
382  core_caa_events_displ.push_back( "L2 Miss Impact" );
383  core_caa_events_displ.push_back( "L2 Miss % of counted Stalled Cycles" );
384  core_caa_events_displ.push_back( "" );
385  core_caa_events_displ.push_back( "L2 Hit Impact" );
386  core_caa_events_displ.push_back( "L2 Hit % of counted Stalled Cycles" );
387  core_caa_events_displ.push_back( "" );
388  core_caa_events_displ.push_back( "L1 DTLB Miss Impact" );
389  core_caa_events_displ.push_back( "L1 DTLB Miss % of counted Stalled Cycles" );
390  core_caa_events_displ.push_back( "" );
391  core_caa_events_displ.push_back( "LCP Stalls Impact" );
392  core_caa_events_displ.push_back( "LCP Stalls % of counted Stalled Cycles" );
393  core_caa_events_displ.push_back( "" );
394  core_caa_events_displ.push_back( "Store-Fwd Stalls Impact" );
395  core_caa_events_displ.push_back( "Store-Fwd Stalls % of counted Stalled Cycles" );
396  core_caa_events_displ.push_back( "" );
397  core_caa_events_displ.push_back( "Loads Blocked by Unknown Address Store Impact" );
398  core_caa_events_displ.push_back( "Loads Blocked % of Store-Fwd Stalls Cycles" );
399  core_caa_events_displ.push_back( "Loads Overlapped with Stores Impact" );
400  core_caa_events_displ.push_back( "Loads Overlapped % of Store-Fwd Stalls Cycles" );
401  core_caa_events_displ.push_back( "Loads Spanning across Cache Lines Impact" );
402  core_caa_events_displ.push_back( "Loads Spanning % of Store-Fwd Stalls Cycles" );
403  core_caa_events_displ.push_back( "" );
404  core_caa_events_displ.push_back( "Load Instructions" );
405  core_caa_events_displ.push_back( "Load % of all Instructions" );
406  core_caa_events_displ.push_back( "Store Instructions" );
407  core_caa_events_displ.push_back( "Store % of all Instructions" );
408  core_caa_events_displ.push_back( "Branch Instructions" );
409  core_caa_events_displ.push_back( "Branch % of all Instructions" );
410  core_caa_events_displ.push_back( "Packed SIMD Computational Instructions" );
411  core_caa_events_displ.push_back( "Packed SIMD % of all Instructions" );
412  core_caa_events_displ.push_back( "Other Instructions" );
413  core_caa_events_displ.push_back( "Other % of all Instructions" );
414  core_caa_events_displ.push_back( "" );
415  core_caa_events_displ.push_back( "ITLB Miss Rate in %" );
416  core_caa_events_displ.push_back( "% of Mispredicted Branches" );
417 }
418 
419 void calc_core_deriv_values( double totalCycles ) {
420  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
421  ++it ) {
422  ( it->second )["Total Cycles"] = ( it->second )["UNHALTED_CORE_CYCLES"];
423  ( it->second )["Stalled Cycles"] = ( it->second )["RS_UOPS_DISPATCHED CMASK=1 INV=1"];
424  ( it->second )["L2 Miss Impact"] = ( it->second )["MEM_LOAD_RETIRED:L2_LINE_MISS"] * CORE_L2_MISS_CYCLES;
425  ( it->second )["L2 Hit Impact"] =
426  ( ( it->second )["MEM_LOAD_RETIRED:L1D_LINE_MISS"] - ( it->second )["MEM_LOAD_RETIRED:L2_LINE_MISS"] ) *
428  ( it->second )["L1 DTLB Miss Impact"] = ( it->second )["MEM_LOAD_RETIRED:DTLB_MISS"] * CORE_L1_DTLB_MISS_CYCLES;
429  ( it->second )["LCP Stalls Impact"] = ( it->second )["ILD_STALL"] * CORE_LCP_STALL_CYCLES;
430  ( it->second )["Loads Blocked by Unknown Address Store Impact"] =
431  ( it->second )["LOAD_BLOCK:STA"] * CORE_UNKNOWN_ADDR_STORE_CYCLES;
432  ( it->second )["Loads Overlapped with Stores Impact"] =
433  ( it->second )["LOAD_BLOCK:OVERLAP_STORE"] * CORE_OVERLAPPING_CYCLES;
434  ( it->second )["Loads Spanning across Cache Lines Impact"] =
435  ( it->second )["LOAD_BLOCK:UNTIL_RETIRE"] * CORE_SPAN_ACROSS_CACHE_LINE_CYCLES;
436  ( it->second )["Store-Fwd Stalls Impact"] = ( it->second )["Loads Blocked by Unknown Address Store Impact"] +
437  ( it->second )["Loads Overlapped with Stores Impact"] +
438  ( it->second )["Loads Spanning across Cache Lines Impact"];
439  ( it->second )["Counted Stalled Cycles"] =
440  ( it->second )["L2 Miss Impact"] + ( it->second )["L2 Hit Impact"] + ( it->second )["LCP Stalls Impact"] +
441  ( it->second )["L1 DTLB Miss Impact"] + ( it->second )["Store-Fwd Stalls Impact"];
442  ( it->second )["Instructions Retired"] = ( it->second )["INSTRUCTIONS_RETIRED"];
443  ( it->second )["ITLB Miss Rate in %"] =
444  ( ( it->second )["ITLB_MISS_RETIRED"] / ( it->second )["INSTRUCTIONS_RETIRED"] ) * 100;
445  ( it->second )["Branch Instructions"] = ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"];
446  ( it->second )["Load Instructions"] = ( it->second )["INST_RETIRED:LOADS"];
447  ( it->second )["Store Instructions"] = ( it->second )["INST_RETIRED:STORES"];
448  ( it->second )["Other Instructions"] = ( it->second )["INST_RETIRED:OTHER"] -
449  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] -
450  ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"];
451  ( it->second )["% of Mispredicted Branches"] =
452  ( ( it->second )["MISPREDICTED_BRANCH_RETIRED"] / ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"] ) * 100;
453  ( it->second )["Packed SIMD Computational Instructions"] =
454  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"];
455  ( it->second )["Counted Instructions Retired"] =
456  ( it->second )["Branch Instructions"] + ( it->second )["Load Instructions"] +
457  ( it->second )["Store Instructions"] + ( it->second )["Other Instructions"] +
458  ( it->second )["Packed SIMD Computational Instructions"];
459  ( it->second )["CPI"] = ( it->second )["UNHALTED_CORE_CYCLES"] / ( it->second )["INSTRUCTIONS_RETIRED"];
460 
461  double localPerformanceImprovement = ( it->second )["CPI"] / EXPECTED_CPI;
462  double cyclesAfterImprovement = ( it->second )["UNHALTED_CORE_CYCLES"] / localPerformanceImprovement;
463  double totalCyclesAfterImprovement = totalCycles - ( it->second )["UNHALTED_CORE_CYCLES"] + cyclesAfterImprovement;
464  ( it->second )["iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
465 
466  ( it->second )["% of Total Cycles"] =
467  ( it->second )["RS_UOPS_DISPATCHED CMASK=1 INV=1"] * 100 / ( it->second )["UNHALTED_CORE_CYCLES"];
468  ( it->second )["L2 Miss % of counted Stalled Cycles"] =
469  ( it->second )["L2 Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
470  ( it->second )["L2 Hit % of counted Stalled Cycles"] =
471  ( it->second )["L2 Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
472  ( it->second )["L1 DTLB Miss % of counted Stalled Cycles"] =
473  ( it->second )["L1 DTLB Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
474  ( it->second )["LCP Stalls % of counted Stalled Cycles"] =
475  ( it->second )["LCP Stalls Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
476  ( it->second )["Store-Fwd Stalls % of counted Stalled Cycles"] =
477  ( it->second )["Store-Fwd Stalls Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
478  ( it->second )["Loads Blocked % of Store-Fwd Stalls Cycles"] =
479  ( it->second )["Loads Blocked by Unknown Address Store Impact"] * 100 /
480  ( it->second )["Store-Fwd Stalls Impact"];
481  ( it->second )["Loads Overlapped % of Store-Fwd Stalls Cycles"] =
482  ( it->second )["Loads Overlapped with Stores Impact"] * 100 / ( it->second )["Store-Fwd Stalls Impact"];
483  ( it->second )["Loads Spanning % of Store-Fwd Stalls Cycles"] =
484  ( it->second )["Loads Spanning across Cache Lines Impact"] * 100 / ( it->second )["Store-Fwd Stalls Impact"];
485 
486  ( it->second )["Load % of all Instructions"] =
487  ( it->second )["INST_RETIRED:LOADS"] * 100 / ( it->second )["Counted Instructions Retired"];
488  ( it->second )["Store % of all Instructions"] =
489  ( it->second )["INST_RETIRED:STORES"] * 100 / ( it->second )["Counted Instructions Retired"];
490  ( it->second )["Branch % of all Instructions"] =
491  ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"] * 100 / ( it->second )["Counted Instructions Retired"];
492  ( it->second )["Packed SIMD % of all Instructions"] =
493  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] * 100 /
494  ( it->second )["Counted Instructions Retired"];
495  ( it->second )["Other % of all Instructions"] =
496  ( it->second )["Other Instructions"] * 100 / ( it->second )["Counted Instructions Retired"];
497  }
498 }
499 
501  nhm_caa_events_displ.push_back( "Total Cycles" );
502  nhm_caa_events_displ.push_back( "Instructions Retired" );
503  nhm_caa_events_displ.push_back( "CPI" );
504  nhm_caa_events_displ.push_back( "" );
505  nhm_caa_events_displ.push_back( "iMargin" );
506  nhm_caa_events_displ.push_back( "iFactor" );
507  nhm_caa_events_displ.push_back( "" );
508  nhm_caa_events_displ.push_back( "Stalled Cycles" );
509  nhm_caa_events_displ.push_back( "% of Total Cycles" );
510  nhm_caa_events_displ.push_back( "Total Counted Stalled Cycles" );
511  nhm_caa_events_displ.push_back( "" );
512  nhm_caa_events_displ.push_back( "Instruction Starvation % of Total Cycles" );
513  nhm_caa_events_displ.push_back( "# of Instructions per Call" );
514  nhm_caa_events_displ.push_back( "% of Total Cycles spent handling FP exceptions" );
515  nhm_caa_events_displ.push_back( "" );
516  nhm_caa_events_displ.push_back( "Counted Stalled Cycles due to Load Ops" );
517  nhm_caa_events_displ.push_back( "" );
518  nhm_caa_events_displ.push_back( "L2 Hit Impact" );
519  nhm_caa_events_displ.push_back( "L2 Hit % of Load Stalls" );
520  nhm_caa_events_displ.push_back( "" );
521  nhm_caa_events_displ.push_back( "L3 Unshared Hit Impact" );
522  nhm_caa_events_displ.push_back( "L3 Unshared Hit % of Load Stalls" );
523  nhm_caa_events_displ.push_back( "" );
524  nhm_caa_events_displ.push_back( "L2 Other Core Hit Impact" );
525  nhm_caa_events_displ.push_back( "L2 Other Core Hit % of Load Stalls" );
526  nhm_caa_events_displ.push_back( "" );
527  nhm_caa_events_displ.push_back( "L2 Other Core Hit Modified Impact" );
528  nhm_caa_events_displ.push_back( "L2 Other Core Hit Modified % of Load Stalls" );
529  nhm_caa_events_displ.push_back( "" );
530  nhm_caa_events_displ.push_back( "L3 Miss -> Local DRAM Hit Impact" );
531  nhm_caa_events_displ.push_back( "L3 Miss -> Remote DRAM Hit Impact" );
532  nhm_caa_events_displ.push_back( "L3 Miss -> Remote Cache Hit Impact" );
533  nhm_caa_events_displ.push_back( "L3 Miss -> Total Impact" );
534  nhm_caa_events_displ.push_back( "L3 Miss % of Load Stalls" );
535  nhm_caa_events_displ.push_back( "" );
536  nhm_caa_events_displ.push_back( "L1 DTLB Miss Impact" );
537  nhm_caa_events_displ.push_back( "L1 DTLB Miss % of Load Stalls" );
538  nhm_caa_events_displ.push_back( "" );
539  nhm_caa_events_displ.push_back( "Cycles spent during DIV & SQRT Ops" );
540  nhm_caa_events_displ.push_back( "DIV & SQRT Ops % of counted Stalled Cycles" );
541  nhm_caa_events_displ.push_back( "" );
542  nhm_caa_events_displ.push_back( "Total L2 IFETCH misses" );
543  nhm_caa_events_displ.push_back( "% of L2 IFETCH misses" );
544  nhm_caa_events_displ.push_back( "" );
545  nhm_caa_events_displ.push_back( "% of IFETCHes served by Local DRAM" );
546  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (Modified)" );
547  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (Clean Snoop)" );
548  nhm_caa_events_displ.push_back( "% of IFETCHes served by Remote L2" );
549  nhm_caa_events_displ.push_back( "% of IFETCHes served by Remote DRAM" );
550  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (No Snoop)" );
551  nhm_caa_events_displ.push_back( "" );
552  nhm_caa_events_displ.push_back( "Total L2 IFETCH miss Impact" );
553  nhm_caa_events_displ.push_back( "" );
554  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Local DRAM" );
555  nhm_caa_events_displ.push_back( "Local DRAM IFECTHes % Impact" );
556  nhm_caa_events_displ.push_back( "" );
557  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (Modified)" );
558  nhm_caa_events_displ.push_back( "L3 (Modified) IFECTHes % Impact" );
559  nhm_caa_events_displ.push_back( "" );
560  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (Clean Snoop)" );
561  nhm_caa_events_displ.push_back( "L3 (Clean Snoop) IFECTHes % Impact" );
562  nhm_caa_events_displ.push_back( "" );
563  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Remote L2" );
564  nhm_caa_events_displ.push_back( "Remote L2 IFECTHes % Impact" );
565  nhm_caa_events_displ.push_back( "" );
566  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Remote DRAM" );
567  nhm_caa_events_displ.push_back( "Remote DRAM IFECTHes % Impact" );
568  nhm_caa_events_displ.push_back( "" );
569  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (No Snoop)" );
570  nhm_caa_events_displ.push_back( "L3 (No Snoop) IFECTHes % Impact" );
571  nhm_caa_events_displ.push_back( "" );
572  nhm_caa_events_displ.push_back( "Total Branch Instructions Executed" );
573  nhm_caa_events_displ.push_back( "% of Mispredicted Branches" );
574  nhm_caa_events_displ.push_back( "" );
575  nhm_caa_events_displ.push_back( "Direct Near Calls % of Total Branches Executed" );
576  nhm_caa_events_displ.push_back( "Indirect Near Calls % of Total Branches Executed" );
577  nhm_caa_events_displ.push_back( "Indirect Near Non-Calls % of Total Branches Executed" );
578  nhm_caa_events_displ.push_back( "All Near Calls % of Total Branches Executed" );
579  nhm_caa_events_displ.push_back( "All Non Calls % of Total Branches Executed" );
580  nhm_caa_events_displ.push_back( "All Returns % of Total Branches Executed" );
581  nhm_caa_events_displ.push_back( "" );
582  nhm_caa_events_displ.push_back( "Total Branch Instructions Retired" );
583  nhm_caa_events_displ.push_back( "Conditionals % of Total Branches Retired" );
584  nhm_caa_events_displ.push_back( "Near Calls % of Total Branches Retired" );
585  nhm_caa_events_displ.push_back( "" );
586  nhm_caa_events_displ.push_back( "L1 ITLB Miss Impact" );
587  nhm_caa_events_displ.push_back( "ITLB Miss Rate in %" );
588  nhm_caa_events_displ.push_back( "" );
589  nhm_caa_events_displ.push_back( "Branch Instructions" );
590  nhm_caa_events_displ.push_back( "Branch % of all Instructions" );
591  nhm_caa_events_displ.push_back( "" );
592  nhm_caa_events_displ.push_back( "Load Instructions" );
593  nhm_caa_events_displ.push_back( "Load % of all Instructions" );
594  nhm_caa_events_displ.push_back( "" );
595  nhm_caa_events_displ.push_back( "Store Instructions" );
596  nhm_caa_events_displ.push_back( "Store % of all Instructions" );
597  nhm_caa_events_displ.push_back( "" );
598  nhm_caa_events_displ.push_back( "Other Instructions" );
599  nhm_caa_events_displ.push_back( "Other % of all Instructions" );
600  nhm_caa_events_displ.push_back( "" );
601  nhm_caa_events_displ.push_back( "Packed UOPS Retired" );
602  nhm_caa_events_displ.push_back( "Packed % of all UOPS Retired" );
603 }
604 
605 void calc_nhm_deriv_values( double totalCycles ) {
606  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
607  ++it ) {
608  ( it->second )["Total Cycles"] = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
609 
610  ( it->second )["L2 Hit Impact"] = ( it->second )["MEM_LOAD_RETIRED:L2_HIT"] * I7_L2_HIT_CYCLES;
611  ( it->second )["L3 Unshared Hit Impact"] =
612  ( it->second )["MEM_LOAD_RETIRED:L3_UNSHARED_HIT"] * I7_L3_UNSHARED_HIT_CYCLES;
613  if ( ( it->second )["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] >
614  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) {
615  ( it->second )["L2 Other Core Hit Impact"] = ( ( it->second )["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] -
616  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) *
618  } else {
619  ( it->second )["L2 Other Core Hit Impact"] = 0.0;
620  }
621  ( it->second )["L2 Other Core Hit Modified Impact"] =
622  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] * I7_OTHER_CORE_L2_HITM_CYCLES;
623  ( it->second )["L3 Miss -> Local DRAM Hit Impact"] =
624  ( it->second )["MEM_UNCORE_RETIRED:LOCAL_DRAM"] * I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES;
625  ( it->second )["L3 Miss -> Remote DRAM Hit Impact"] =
626  ( it->second )["MEM_UNCORE_RETIRED:REMOTE_DRAM"] * I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES;
627  ( it->second )["L3 Miss -> Remote Cache Hit Impact"] =
628  ( it->second )["MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT"] * I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES;
629  ( it->second )["L3 Miss -> Total Impact"] = ( it->second )["L3 Miss -> Local DRAM Hit Impact"] +
630  ( it->second )["L3 Miss -> Remote DRAM Hit Impact"] +
631  ( it->second )["L3 Miss -> Remote Cache Hit Impact"];
632  ( it->second )["L1 DTLB Miss Impact"] =
633  ( it->second )["DTLB_LOAD_MISSES:WALK_COMPLETED"] * I7_L1_DTLB_WALK_COMPLETED_CYCLES;
634  ( it->second )["Counted Stalled Cycles due to Load Ops"] =
635  ( it->second )["L3 Miss -> Total Impact"] + ( it->second )["L2 Hit Impact"] +
636  ( it->second )["L1 DTLB Miss Impact"] + ( it->second )["L3 Unshared Hit Impact"] +
637  ( it->second )["L2 Other Core Hit Modified Impact"] + ( it->second )["L2 Other Core Hit Impact"];
638  ( it->second )["Cycles spent during DIV & SQRT Ops"] = ( it->second )["ARITH:CYCLES_DIV_BUSY"];
639  ( it->second )["Total Counted Stalled Cycles"] =
640  ( it->second )["Counted Stalled Cycles due to Load Ops"] + ( it->second )["Cycles spent during DIV & SQRT Ops"];
641  ( it->second )["Stalled Cycles"] =
642  ( it->second )["Total Counted Stalled Cycles"]; // TO BE FIXED when UOPS_EXECUTED:0x3f is fixed!!
643  ( it->second )["% of Total Cycles"] =
644  ( it->second )["Stalled Cycles"] * 100 / ( it->second )["CPU_CLK_UNHALTED:THREAD_P"]; // TO BE FIXED!! see above
645  ( it->second )["L3 Miss % of Load Stalls"] =
646  ( it->second )["L3 Miss -> Total Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
647  ( it->second )["L2 Hit % of Load Stalls"] =
648  ( it->second )["L2 Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
649  ( it->second )["L1 DTLB Miss % of Load Stalls"] =
650  ( it->second )["L1 DTLB Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
651  ( it->second )["L3 Unshared Hit % of Load Stalls"] =
652  ( it->second )["L3 Unshared Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
653  ( it->second )["L2 Other Core Hit % of Load Stalls"] =
654  ( it->second )["L2 Other Core Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
655  ( it->second )["L2 Other Core Hit Modified % of Load Stalls"] =
656  ( it->second )["L2 Other Core Hit Modified Impact"] * 100 /
657  ( it->second )["Counted Stalled Cycles due to Load Ops"];
658  ( it->second )["DIV & SQRT Ops % of counted Stalled Cycles"] =
659  ( it->second )["Cycles spent during DIV & SQRT Ops"] * 100 / ( it->second )["Total Counted Stalled Cycles"];
660 
661  ( it->second )["Cycles IFETCH served by Local DRAM"] =
662  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT;
663  ( it->second )["Cycles IFETCH served by L3 (Modified)"] =
664  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * I7_IFETCH_L2_MISS_L3_HITM;
665  ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] =
666  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * I7_IFETCH_L2_MISS_L3_HIT_SNOOP;
667  ( it->second )["Cycles IFETCH served by Remote L2"] =
668  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD;
669  ( it->second )["Cycles IFETCH served by Remote DRAM"] =
670  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT;
671  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"] =
672  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP;
673  ( it->second )["Total L2 IFETCH miss Impact"] =
674  ( it->second )["Cycles IFETCH served by Local DRAM"] + ( it->second )["Cycles IFETCH served by L3 (Modified)"] +
675  ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] +
676  ( it->second )["Cycles IFETCH served by Remote L2"] + ( it->second )["Cycles IFETCH served by Remote DRAM"] +
677  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"];
678  ( it->second )["Local DRAM IFECTHes % Impact"] =
679  ( it->second )["Cycles IFETCH served by Local DRAM"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
680  ( it->second )["L3 (Modified) IFECTHes % Impact"] =
681  ( it->second )["Cycles IFETCH served by L3 (Modified)"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
682  ( it->second )["L3 (Clean Snoop) IFECTHes % Impact"] = ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] *
683  100 / ( it->second )["Total L2 IFETCH miss Impact"];
684  ( it->second )["Remote L2 IFECTHes % Impact"] =
685  ( it->second )["Cycles IFETCH served by Remote L2"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
686  ( it->second )["Remote DRAM IFECTHes % Impact"] =
687  ( it->second )["Cycles IFETCH served by Remote DRAM"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
688  ( it->second )["L3 (No Snoop) IFECTHes % Impact"] =
689  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
690  ( it->second )["Total L2 IFETCH misses"] = ( it->second )["L2_RQSTS:IFETCH_MISS"];
691  ( it->second )["% of IFETCHes served by Local DRAM"] =
692  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
693  ( it->second )["% of IFETCHes served by L3 (Modified)"] =
694  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
695  ( it->second )["% of IFETCHes served by L3 (Clean Snoop)"] =
696  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * 100 /
697  ( it->second )["L2_RQSTS:IFETCH_MISS"];
698  ( it->second )["% of IFETCHes served by Remote L2"] =
699  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * 100 /
700  ( it->second )["L2_RQSTS:IFETCH_MISS"];
701  ( it->second )["% of IFETCHes served by Remote DRAM"] =
702  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
703  ( it->second )["% of IFETCHes served by L3 (No Snoop)"] =
704  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
705  ( it->second )["% of L2 IFETCH misses"] =
706  ( it->second )["L2_RQSTS:IFETCH_MISS"] * 100 /
707  ( ( it->second )["L2_RQSTS:IFETCH_MISS"] + ( it->second )["L2_RQSTS:IFETCH_HIT"] );
708  ( it->second )["L1 ITLB Miss Impact"] =
709  ( it->second )["ITLB_MISSES:WALK_COMPLETED"] * I7_L1_ITLB_WALK_COMPLETED_CYCLES;
710 
711  ( it->second )["Total Branch Instructions Executed"] = ( it->second )["BR_INST_EXEC:ANY"];
712  ( it->second )["% of Mispredicted Branches"] =
713  ( it->second )["BR_MISP_EXEC:ANY"] * 100 / ( it->second )["BR_INST_EXEC:ANY"];
714  ( it->second )["Direct Near Calls % of Total Branches Executed"] =
715  ( it->second )["BR_INST_EXEC:DIRECT_NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
716  ( it->second )["Indirect Near Calls % of Total Branches Executed"] =
717  ( it->second )["BR_INST_EXEC:INDIRECT_NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
718  ( it->second )["Indirect Near Non-Calls % of Total Branches Executed"] =
719  ( it->second )["BR_INST_EXEC:INDIRECT_NON_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
720  ( it->second )["All Near Calls % of Total Branches Executed"] =
721  ( it->second )["BR_INST_EXEC:NEAR_CALLS"] * 100 / ( it->second )["Total Branch Instructions Executed"];
722  ( it->second )["All Non Calls % of Total Branches Executed"] =
723  ( it->second )["BR_INST_EXEC:NON_CALLS"] * 100 / ( it->second )["Total Branch Instructions Executed"];
724  ( it->second )["All Returns % of Total Branches Executed"] =
725  ( it->second )["BR_INST_EXEC:RETURN_NEAR"] * 100 / ( it->second )["Total Branch Instructions Executed"];
726  ( it->second )["Total Branch Instructions Retired"] = ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
727  ( it->second )["Conditionals % of Total Branches Retired"] =
728  ( it->second )["BR_INST_RETIRED:CONDITIONAL"] * 100 / ( it->second )["Total Branch Instructions Retired"];
729  ( it->second )["Near Calls % of Total Branches Retired"] =
730  ( it->second )["BR_INST_RETIRED:NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Retired"];
731 
732  ( it->second )["Instruction Starvation % of Total Cycles"] =
733  ( ( it->second )["UOPS_ISSUED:ANY CMASK=1 INV=1"] - ( it->second )["RESOURCE_STALLS:ANY"] ) * 100 /
734  ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
735  ( it->second )["% of Total Cycles spent handling FP exceptions"] =
736  ( it->second )["UOPS_DECODED:MS CMASK=1"] * 100 / ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
737  ( it->second )["# of Instructions per Call"] =
738  ( it->second )["INST_RETIRED:ANY_P"] / ( it->second )["BR_INST_EXEC:NEAR_CALLS"];
739 
740  ( it->second )["Instructions Retired"] = ( it->second )["INST_RETIRED:ANY_P"];
741  ( it->second )["ITLB Miss Rate in %"] =
742  ( ( it->second )["ITLB_MISS_RETIRED"] / ( it->second )["INST_RETIRED:ANY_P"] ) * 100;
743 
744  ( it->second )["Branch Instructions"] = ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
745  ( it->second )["Load Instructions"] = ( it->second )["MEM_INST_RETIRED:LOADS"];
746  ( it->second )["Store Instructions"] = ( it->second )["MEM_INST_RETIRED:STORES"];
747  ( it->second )["Other Instructions"] =
748  ( it->second )["Instructions Retired"] - ( it->second )["MEM_INST_RETIRED:LOADS"] -
749  ( it->second )["MEM_INST_RETIRED:STORES"] - ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
750  ( it->second )["Packed UOPS Retired"] =
751  ( it->second )["SSEX_UOPS_RETIRED:PACKED_DOUBLE"] + ( it->second )["SSEX_UOPS_RETIRED:PACKED_SINGLE"];
752  ( it->second )["CPI"] = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] / ( it->second )["INST_RETIRED:ANY_P"];
753 
754  double localPerformanceImprovement = ( it->second )["CPI"] / EXPECTED_CPI;
755  double cyclesAfterImprovement = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] / localPerformanceImprovement;
756  double totalCyclesAfterImprovement =
757  totalCycles - ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] + cyclesAfterImprovement;
758  ( it->second )["iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
759 
760  ( it->second )["Load % of all Instructions"] =
761  ( it->second )["MEM_INST_RETIRED:LOADS"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
762  ( it->second )["Store % of all Instructions"] =
763  ( it->second )["MEM_INST_RETIRED:STORES"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
764  ( it->second )["Branch % of all Instructions"] =
765  ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
766  ( it->second )["Other % of all Instructions"] =
767  ( it->second )["Other Instructions"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
768 
769  ( it->second )["Packed % of all UOPS Retired"] =
770  ( it->second )["Packed UOPS Retired"] * 100 / ( it->second )["UOPS_RETIRED:ANY"];
771  }
772 }
773 
774 // S_module class defining the objects containing sampling results for each module
775 class S_module {
776 private:
778  unsigned int total_num_samples;
782  unsigned int cmask;
783  unsigned int inv;
784  unsigned int sp;
785 
786 public:
787  S_module() { clear(); }
788  void clear() {
789  samples.clear();
790  total_num_samples = 0;
791  sp = 0;
792  module_name = "";
793  cmask = 0;
794  inv = 0;
795  sp = 0;
796  }
797  void init( const char* name, const char* architecture, const char* event_name, unsigned int c_mask,
798  unsigned int inv_mask, unsigned int smpl_period ) {
799  module_name = name;
800  arch = architecture;
801  event = event_name;
802  cmask = c_mask;
803  inv = inv_mask;
804  sp = smpl_period;
805  }
806  void set_total( unsigned int total ) {
807  total_num_samples = total;
808  return;
809  }
810  unsigned int get_smpl_period() { return sp; }
811  unsigned int get_inv_mask() { return inv; }
812  unsigned int get_c_mask() { return cmask; }
813  std::string get_arch() { return arch; }
815  void add_sample( const char* index, unsigned int value ) {
816  samples[index] += value;
817  return;
818  }
819  bool get_max( char* index, unsigned int& value ) {
820  auto max_pos = std::max_element( samples.begin(), samples.end(),
821  []( const auto& lhs, const auto& rhs ) { return lhs.second < rhs.second; } );
822  if ( max_pos == samples.end() ) return false;
823  strcpy( index, ( max_pos->first ).c_str() );
824  value = max_pos->second;
825  samples.erase( max_pos );
826  return true;
827  }
829  unsigned int get_total_num_samples() { return total_num_samples; }
830 };
831 
832 // void html_special_chars()
833 // const char *s : source string
834 // char *s_mod : destination string
835 // replaces special HTML characters with correctly escaped sequences to be used inside HTML code
836 void html_special_chars( const char* s, char* s_mod ) {
837  int n = strlen( s );
838  *s_mod = '\0';
839  for ( int i = 0; i < n; i++ ) {
840  switch ( s[i] ) {
841  case '<':
842  strcat( s_mod, "&lt;" );
843  break;
844  case '>':
845  strcat( s_mod, "&gt;" );
846  break;
847  case '&':
848  strcat( s_mod, "&amp;" );
849  break;
850  case '"':
851  strcat( s_mod, "&quot;" );
852  break;
853  default:
854  char to_app[2];
855  to_app[0] = s[i];
856  to_app[1] = '\0';
857  strcat( s_mod, to_app );
858  break;
859  }
860  }
861  return;
862 }
863 
864 // func_name()
865 // const char *demangled_symbol : string corresponding to the demangled symbol found by the read_file() function
866 // parses the argument and returns just the function name without arguments or return types
867 const char* func_name( const char* demangled_symbol ) {
868  char* operator_string_begin = const_cast<char*>( strstr( demangled_symbol, "operator" ) );
869  if ( operator_string_begin != NULL ) {
870  char* operator_string_end = operator_string_begin + 8;
871  while ( *operator_string_end == ' ' ) operator_string_end++;
872  if ( strstr( operator_string_end, "delete[]" ) == operator_string_end ) {
873  operator_string_end += 8;
874  *operator_string_end = '\0';
875  } else if ( strstr( operator_string_end, "delete" ) == operator_string_end ) {
876  operator_string_end += 6;
877  *operator_string_end = '\0';
878  } else if ( strstr( operator_string_end, "new[]" ) == operator_string_end ) {
879  operator_string_end += 5;
880  *operator_string_end = '\0';
881  } else if ( strstr( operator_string_end, "new" ) == operator_string_end ) {
882  operator_string_end += 3;
883  *operator_string_end = '\0';
884  } else if ( strstr( operator_string_end, ">>=" ) == operator_string_end ) {
885  operator_string_end += 3;
886  *operator_string_end = '\0';
887  } else if ( strstr( operator_string_end, "<<=" ) == operator_string_end ) {
888  operator_string_end += 3;
889  *operator_string_end = '\0';
890  } else if ( strstr( operator_string_end, "->*" ) == operator_string_end ) {
891  operator_string_end += 3;
892  *operator_string_end = '\0';
893  } else if ( strstr( operator_string_end, "<<" ) == operator_string_end ) {
894  operator_string_end += 2;
895  *operator_string_end = '\0';
896  } else if ( strstr( operator_string_end, ">>" ) == operator_string_end ) {
897  operator_string_end += 2;
898  *operator_string_end = '\0';
899  } else if ( strstr( operator_string_end, ">=" ) == operator_string_end ) {
900  operator_string_end += 2;
901  *operator_string_end = '\0';
902  } else if ( strstr( operator_string_end, "<=" ) == operator_string_end ) {
903  operator_string_end += 2;
904  *operator_string_end = '\0';
905  } else if ( strstr( operator_string_end, "==" ) == operator_string_end ) {
906  operator_string_end += 2;
907  *operator_string_end = '\0';
908  } else if ( strstr( operator_string_end, "!=" ) == operator_string_end ) {
909  operator_string_end += 2;
910  *operator_string_end = '\0';
911  } else if ( strstr( operator_string_end, "|=" ) == operator_string_end ) {
912  operator_string_end += 2;
913  *operator_string_end = '\0';
914  } else if ( strstr( operator_string_end, "&=" ) == operator_string_end ) {
915  operator_string_end += 2;
916  *operator_string_end = '\0';
917  } else if ( strstr( operator_string_end, "^=" ) == operator_string_end ) {
918  operator_string_end += 2;
919  *operator_string_end = '\0';
920  } else if ( strstr( operator_string_end, "%=" ) == operator_string_end ) {
921  operator_string_end += 2;
922  *operator_string_end = '\0';
923  } else if ( strstr( operator_string_end, "/=" ) == operator_string_end ) {
924  operator_string_end += 2;
925  *operator_string_end = '\0';
926  } else if ( strstr( operator_string_end, "*=" ) == operator_string_end ) {
927  operator_string_end += 2;
928  *operator_string_end = '\0';
929  } else if ( strstr( operator_string_end, "-=" ) == operator_string_end ) {
930  operator_string_end += 2;
931  *operator_string_end = '\0';
932  } else if ( strstr( operator_string_end, "+=" ) == operator_string_end ) {
933  operator_string_end += 2;
934  *operator_string_end = '\0';
935  } else if ( strstr( operator_string_end, "&&" ) == operator_string_end ) {
936  operator_string_end += 2;
937  *operator_string_end = '\0';
938  } else if ( strstr( operator_string_end, "||" ) == operator_string_end ) {
939  operator_string_end += 2;
940  *operator_string_end = '\0';
941  } else if ( strstr( operator_string_end, "[]" ) == operator_string_end ) {
942  operator_string_end += 2;
943  *operator_string_end = '\0';
944  } else if ( strstr( operator_string_end, "()" ) == operator_string_end ) {
945  operator_string_end += 2;
946  *operator_string_end = '\0';
947  } else if ( strstr( operator_string_end, "++" ) == operator_string_end ) {
948  operator_string_end += 2;
949  *operator_string_end = '\0';
950  } else if ( strstr( operator_string_end, "--" ) == operator_string_end ) {
951  operator_string_end += 2;
952  *operator_string_end = '\0';
953  } else if ( strstr( operator_string_end, "->" ) == operator_string_end ) {
954  operator_string_end += 2;
955  *operator_string_end = '\0';
956  } else if ( strstr( operator_string_end, "<" ) == operator_string_end ) {
957  operator_string_end += 1;
958  *operator_string_end = '\0';
959  } else if ( strstr( operator_string_end, ">" ) == operator_string_end ) {
960  operator_string_end += 1;
961  *operator_string_end = '\0';
962  } else if ( strstr( operator_string_end, "~" ) == operator_string_end ) {
963  operator_string_end += 1;
964  *operator_string_end = '\0';
965  } else if ( strstr( operator_string_end, "!" ) == operator_string_end ) {
966  operator_string_end += 1;
967  *operator_string_end = '\0';
968  } else if ( strstr( operator_string_end, "+" ) == operator_string_end ) {
969  operator_string_end += 1;
970  *operator_string_end = '\0';
971  } else if ( strstr( operator_string_end, "-" ) == operator_string_end ) {
972  operator_string_end += 1;
973  *operator_string_end = '\0';
974  } else if ( strstr( operator_string_end, "*" ) == operator_string_end ) {
975  operator_string_end += 1;
976  *operator_string_end = '\0';
977  } else if ( strstr( operator_string_end, "/" ) == operator_string_end ) {
978  operator_string_end += 1;
979  *operator_string_end = '\0';
980  } else if ( strstr( operator_string_end, "%" ) == operator_string_end ) {
981  operator_string_end += 1;
982  *operator_string_end = '\0';
983  } else if ( strstr( operator_string_end, "^" ) == operator_string_end ) {
984  operator_string_end += 1;
985  *operator_string_end = '\0';
986  } else if ( strstr( operator_string_end, "&" ) == operator_string_end ) {
987  operator_string_end += 1;
988  *operator_string_end = '\0';
989  } else if ( strstr( operator_string_end, "|" ) == operator_string_end ) {
990  operator_string_end += 1;
991  *operator_string_end = '\0';
992  } else if ( strstr( operator_string_end, "," ) == operator_string_end ) {
993  operator_string_end += 1;
994  *operator_string_end = '\0';
995  } else if ( strstr( operator_string_end, "=" ) == operator_string_end ) {
996  operator_string_end += 1;
997  *operator_string_end = '\0';
998  }
999  return operator_string_begin;
1000  }
1001  char* end_of_demangled_name = const_cast<char*>( strrchr( demangled_symbol, ')' ) );
1002  if ( end_of_demangled_name != NULL ) {
1003  int pars = 1;
1004  char c;
1005  while ( pars > 0 && end_of_demangled_name != demangled_symbol ) {
1006  c = *( --end_of_demangled_name );
1007  if ( c == ')' ) {
1008  pars++;
1009  } else if ( c == '(' ) {
1010  pars--;
1011  }
1012  }
1013  } else {
1014  return demangled_symbol;
1015  }
1016  char* end_of_func_name = end_of_demangled_name;
1017  if ( end_of_func_name != NULL ) {
1018  *end_of_func_name = '\0';
1019  char c = *( --end_of_func_name );
1020  if ( c == '>' ) {
1021  int pars = 1;
1022  while ( pars > 0 && end_of_func_name != demangled_symbol ) {
1023  c = *( --end_of_func_name );
1024  if ( c == '>' ) {
1025  pars++;
1026  } else if ( c == '<' ) {
1027  pars--;
1028  }
1029  }
1030  *end_of_func_name = '\0';
1031  }
1032  c = *( --end_of_func_name );
1033  while ( isalnum( c ) || c == '_' || c == '~' ) { c = *( --end_of_func_name ); }
1034  return ++end_of_func_name;
1035  }
1036  return demangled_symbol;
1037 }
1038 
1039 // put_module()
1040 // S_module *cur_module : pointer to the current module object to be written out in to HTML file
1041 // const char *event : name of architectural event being analysed
1042 // const char *dir : directory where sampling results input files are located
1043 // creates or updates the HTML output file using information contained inside the module object given as a parameter
1044 void put_S_module( S_module* cur_module, const char* dir ) {
1045  char module_name[MAX_MODULE_NAME_LENGTH];
1046  bzero( module_name, MAX_MODULE_NAME_LENGTH );
1047  strcpy( module_name, ( cur_module->get_module_name() ).c_str() );
1048  char module_filename[MAX_FILENAME_LENGTH];
1049  bzero( module_filename, MAX_FILENAME_LENGTH );
1050  strcpy( module_filename, dir );
1051  strcat( module_filename, "/HTML/" );
1052  strcat( module_filename, module_name );
1053  strcat( module_filename, ".html" );
1054  char event[MAX_EVENT_NAME_LENGTH];
1055  bzero( event, MAX_EVENT_NAME_LENGTH );
1056  strcpy( event, ( cur_module->get_event() ).c_str() );
1057  std::map<std::string, unsigned int>::iterator result = modules_tot_samples.find( cur_module->get_module_name() );
1058  FILE* module_file;
1059  if ( result == modules_tot_samples.end() ) // not found
1060  {
1061  if ( ( !strcmp( event, "UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1062  ( !strcmp( event, "CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1063  modules_tot_samples.insert(
1065  } else {
1066  modules_tot_samples.insert( std::pair<std::string, unsigned int>( cur_module->get_module_name(), 0 ) );
1067  }
1068  module_file = fopen( module_filename, "w" );
1069  if ( module_file == NULL ) {
1070  fprintf( stderr, "ERROR: Cannot create file %s!!!\naborting...\n", module_filename );
1071  exit( 1 );
1072  }
1073  fprintf( module_file, "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
1074  "\"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1075  fprintf( module_file, "<html>\n" );
1076  fprintf( module_file, "<head>\n" );
1077  fprintf( module_file, "<title>\n" );
1078  fprintf( module_file, "%s\n", module_name );
1079  fprintf( module_file, "</title>\n" );
1080  fprintf( module_file, "</head>\n" );
1081  fprintf( module_file, "<body>\n" );
1082  fprintf( module_file, "<h2>%s</h2><br/>Events Sampled:<br/>\n", module_name );
1083  fprintf( module_file, "<ul>\n" );
1084  for ( std::vector<std::string>::const_iterator it = S_events.begin(); it != S_events.end(); ++it ) {
1085  fprintf( module_file, "<li><a href=\"#%s\">%s</a></li>\n", it->c_str(), it->c_str() );
1086  }
1087  fprintf( module_file, "</ul>\n" );
1088  } // if(result == modules_tot_samples.end()) //not found
1089  else {
1090  if ( ( !strcmp( event, "UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1091  ( !strcmp( event, "CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1092  modules_tot_samples[cur_module->get_module_name()] = cur_module->get_total_num_samples();
1093  }
1094  module_file = fopen( module_filename, "a" );
1095  } // else:: if(result != modules_tot_samples.end()) //found!!
1096  char event_str[MAX_EVENT_NAME_LENGTH];
1097  bzero( event_str, MAX_EVENT_NAME_LENGTH );
1098  strcpy( event_str, event );
1099  if ( cur_module->get_c_mask() > 0 ) {
1100  sprintf( event_str + strlen( event_str ), " CMASK=%u", cur_module->get_c_mask() );
1101  }
1102  if ( cur_module->get_inv_mask() > 0 ) {
1103  sprintf( event_str + strlen( event_str ), " INV=%u", cur_module->get_inv_mask() );
1104  }
1105  fprintf( module_file, "<a name=\"%s\"><a>\n", event_str );
1106  fprintf( module_file, "<table cellpadding=\"5\">\n" );
1107  fprintf( module_file, "<tr bgcolor=\"#EEEEEE\">\n" );
1108  fprintf( module_file,
1109  "<th colspan=\"6\" align=\"left\">%s -- cmask: %u -- invmask: %u -- Total Samples: %u -- "
1110  "Sampling Period: %u</th>\n",
1111  event, cur_module->get_c_mask(), cur_module->get_inv_mask(), cur_module->get_total_num_samples(),
1112  cur_module->get_smpl_period() );
1113  fprintf( module_file, "</tr>\n" );
1114  fprintf( module_file, "<tr bgcolor=\"#EEEEEE\">\n" );
1115  fprintf( module_file, "<th align=\"left\">Samples</th>\n" );
1116  fprintf( module_file, "<th align=\"left\">Percentage</th>\n" );
1117  fprintf( module_file, "<th align=\"left\">Symbol Name</th>\n" );
1118  fprintf( module_file, "<th align=\"left\">Library Name</th>\n" );
1119  fprintf( module_file, "<th align=\"left\">Complete Signature</th>\n" );
1120  fprintf( module_file, "<th align=\"left\">Library Pathname</th>\n" );
1121  fprintf( module_file, "</tr>\n" );
1122  for ( int j = 0; j < 20; j++ ) {
1123  char sym[MAX_SYM_LENGTH];
1124  char sym_mod[MAX_SYM_MOD_LENGTH];
1125  char lib[MAX_LIB_LENGTH];
1126  char lib_mod[MAX_LIB_MOD_LENGTH];
1127  char simple_sym[MAX_SIMPLE_SYM_LENGTH];
1128  char simple_sym_mod[MAX_SIMPLE_SYM_MOD_LENGTH];
1129  char simple_lib[MAX_SIMPLE_LIB_LENGTH];
1130  char simple_lib_mod[MAX_SIMPLE_LIB_MOD_LENGTH];
1131 
1132  bzero( sym, MAX_SYM_LENGTH );
1133  bzero( sym_mod, MAX_SYM_MOD_LENGTH );
1134  bzero( lib, MAX_LIB_LENGTH );
1135  bzero( lib_mod, MAX_LIB_MOD_LENGTH );
1136  bzero( simple_sym, MAX_SIMPLE_SYM_LENGTH );
1137  bzero( simple_sym_mod, MAX_SIMPLE_SYM_MOD_LENGTH );
1138  bzero( simple_lib, MAX_SIMPLE_LIB_LENGTH );
1139  bzero( simple_lib_mod, MAX_SIMPLE_LIB_MOD_LENGTH );
1140 
1142  bzero( index, MAX_SAMPLE_INDEX_LENGTH );
1143  unsigned int value;
1144  bool res = cur_module->get_max( index, value );
1145  if ( !res ) break;
1146  char* sym_end = strchr( index, '%' );
1147  if ( sym_end == NULL ) // error
1148  {
1149  fprintf( stderr, "ERROR: Invalid sym and lib name! : %s\naborting...\n", index );
1150  exit( 1 );
1151  }
1152  memcpy( sym, index, strlen( index ) - strlen( sym_end ) );
1153  strcpy( lib, sym_end + 1 );
1154  char temp[MAX_SYM_LENGTH];
1155  bzero( temp, MAX_SYM_LENGTH );
1156  strcpy( temp, sym );
1157  strcpy( simple_sym, ( func_name( temp ) ) );
1158  if ( strrchr( lib, '/' ) != NULL && *( strrchr( lib, '/' ) + 1 ) != '\0' ) {
1159  strcpy( simple_lib, strrchr( lib, '/' ) + 1 );
1160  } else {
1161  strcpy( simple_lib, lib );
1162  }
1163  if ( j % 2 != 0 ) {
1164  fprintf( module_file, "<tr bgcolor=\"#FFFFCC\">\n" );
1165  } else {
1166  fprintf( module_file, "<tr bgcolor=\"#CCFFCC\">\n" );
1167  }
1168  fprintf( module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%u</td>\n", value );
1169  fprintf( module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%f%%</td>\n",
1170  ( ( (double)( value ) ) / ( (double)( cur_module->get_total_num_samples() ) ) ) * 100 );
1171  html_special_chars( simple_sym, simple_sym_mod );
1172  html_special_chars( simple_lib, simple_lib_mod );
1173  html_special_chars( sym, sym_mod );
1174  html_special_chars( lib, lib_mod );
1175  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_sym_mod );
1176  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_lib_mod );
1177  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", sym_mod );
1178  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n</tr>\n", lib_mod );
1179  }
1180  fprintf( module_file, "</table><br/><br/>\n" );
1181  int res = fclose( module_file );
1182  if ( res ) {
1183  fprintf( stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1184  exit( 1 );
1185  }
1186  return;
1187 }
1188 
1189 // read_S_file()
1190 // const char *dir : directory where sampling results input files are located
1191 // const char *filename : name of the current file to analyse
1192 // reads content of a gzipped sampling result file, finds names of symbols inside libraries using their offsets,
1193 // demangles them to make them human-readable, creates the module objects (with their sampling values),
1194 // and calls the put_module() function to create (or update) the corresponding HTML output file
1195 // returns 0 on success
1196 int read_S_file( const char* dir, const char* filename ) {
1197  char line[MAX_LINE_LENGTH];
1198  char event[MAX_EVENT_NAME_LENGTH];
1199  char arch[MAX_ARCH_NAME_LENGTH];
1200  unsigned int cmask;
1201  unsigned int inv;
1202  unsigned int sp;
1203  char cur_module_name[MAX_MODULE_NAME_LENGTH];
1204  bzero( line, MAX_LINE_LENGTH );
1205  bzero( event, MAX_EVENT_NAME_LENGTH );
1206  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1207  bzero( arch, MAX_ARCH_NAME_LENGTH );
1208 
1209  S_module* cur_module = new S_module();
1210  unsigned int module_num = 0;
1211 
1212  char path_name[MAX_FILENAME_LENGTH];
1213  bzero( path_name, MAX_FILENAME_LENGTH );
1214  strcpy( path_name, dir );
1215  strcat( path_name, "/" );
1216  strcat( path_name, filename );
1217  gzFile res_file = gzopen( path_name, "rb" );
1218 
1219  if ( res_file != NULL ) {
1220  bzero( line, MAX_LINE_LENGTH );
1221  gzgets( res_file, line, MAX_LINE_LENGTH );
1222  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1223  bzero( event, MAX_EVENT_NAME_LENGTH );
1224  sscanf( line, "%s %s %u %u %u", arch, event, &cmask, &inv, &sp );
1225  if ( !strcmp( arch, "NHM" ) )
1226  nehalem = true;
1227  else
1228  nehalem = false;
1229  bzero( line, MAX_LINE_LENGTH );
1230  while ( gzgets( res_file, line, MAX_LINE_LENGTH ) != Z_NULL ) {
1231  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1232  if ( strchr( line, ' ' ) == NULL ) // module
1233  {
1234  if ( module_num > 0 ) {
1235  put_S_module( cur_module, dir );
1236  cur_module->clear();
1237  }
1238  module_num++;
1239  char* end_sym = strchr( line, '%' );
1240  if ( end_sym == NULL ) // error
1241  {
1242  fprintf( stderr, "ERROR: Invalid module name. \nLINE: %s\naborting...\n", line );
1243  exit( 1 );
1244  }
1245  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1246  memcpy( cur_module_name, line, strlen( line ) - strlen( end_sym ) );
1247  cur_module->init( cur_module_name, arch, event, cmask, inv, sp );
1248  cur_module->set_total( atoi( end_sym + 1 ) );
1249  } // module
1250  else // symbol, libName, libOffset, value
1251  {
1252  unsigned int value = 0, libOffset = 0;
1253  char symbol[MAX_SYM_LENGTH];
1254  char libName[MAX_LIB_LENGTH];
1255  char final_sym[MAX_SYM_MOD_LENGTH];
1256  char final_lib[MAX_LIB_MOD_LENGTH];
1257  bzero( symbol, MAX_SYM_LENGTH );
1258  bzero( libName, MAX_LIB_LENGTH );
1259  bzero( final_sym, MAX_SYM_MOD_LENGTH );
1260  bzero( final_lib, MAX_LIB_MOD_LENGTH );
1261 
1262  sscanf( line, "%s %s %u %u", symbol, libName, &libOffset, &value );
1263  char realPathName_s[FILENAME_MAX];
1264  bzero( realPathName_s, FILENAME_MAX );
1265  char* realPathName = realpath( libName, realPathName_s );
1266  if ( realPathName != NULL && strlen( realPathName ) > 0 ) {
1268  result = libsInfo.find( realPathName );
1269  if ( result == libsInfo.end() ) { libsInfo[realPathName] = FileInfo( realPathName, true ); }
1270  const char* temp_sym = libsInfo[realPathName].symbolByOffset( libOffset );
1271  if ( temp_sym != NULL && strlen( temp_sym ) > 0 ) {
1272  int status;
1273  char* demangled_symbol = abi::__cxa_demangle( temp_sym, NULL, NULL, &status );
1274  if ( status == 0 ) {
1275  strcpy( final_sym, demangled_symbol );
1276  free( demangled_symbol );
1277  } else {
1278  strcpy( final_sym, temp_sym );
1279  }
1280  } else {
1281  strcpy( final_sym, "???" );
1282  }
1283  strcpy( final_lib, realPathName );
1284  } else {
1285  strcpy( final_sym, symbol );
1286  strcpy( final_lib, libName );
1287  }
1288  char index[MAX_LINE_LENGTH];
1289  bzero( index, MAX_LINE_LENGTH );
1290  strcpy( index, final_sym );
1291  strcat( index, "%" );
1292  strcat( index, final_lib );
1293  cur_module->add_sample( index, value );
1294  } // symbol, libName, libOffset, value
1295  bzero( line, MAX_LINE_LENGTH );
1296  } // while(gzgets(res_file, line, MAX_LINE_LENGTH)!=Z_NULL)
1297  put_S_module( cur_module, dir ); // last module!
1298  cur_module->clear();
1299  gzclose( res_file );
1300  } // if(res_file != NULL)
1301  else {
1302  fprintf( stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename );
1303  exit( 1 );
1304  }
1305  delete cur_module; // delete it!
1306  return 0;
1307 }
1308 
1309 int read_S_events( const char* dir, const char* filename ) {
1310  char event[MAX_EVENT_NAME_LENGTH];
1311  char arch[MAX_ARCH_NAME_LENGTH];
1312  char line[MAX_LINE_LENGTH];
1313  char cmask_str[MAX_CMASK_STR_LENGTH];
1314  char inv_str[MAX_INV_STR_LENGTH];
1315  char sp_str[MAX_SP_STR_LENGTH];
1316  bzero( line, MAX_LINE_LENGTH );
1317  bzero( event, MAX_EVENT_NAME_LENGTH );
1318  bzero( arch, MAX_ARCH_NAME_LENGTH );
1319  bzero( cmask_str, MAX_CMASK_STR_LENGTH );
1320  bzero( inv_str, MAX_INV_STR_LENGTH );
1321  bzero( sp_str, MAX_SP_STR_LENGTH );
1322  char path_name[MAX_FILENAME_LENGTH];
1323  bzero( path_name, MAX_FILENAME_LENGTH );
1324  strcpy( path_name, dir );
1325  strcat( path_name, "/" );
1326  strcat( path_name, filename );
1327  gzFile res_file = gzopen( path_name, "rb" );
1328  if ( res_file != NULL ) {
1329  bzero( line, MAX_LINE_LENGTH );
1330  gzgets( res_file, line, MAX_LINE_LENGTH );
1331  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1332  bzero( event, MAX_EVENT_NAME_LENGTH );
1333  sscanf( line, "%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str );
1334  std::string event_str( event );
1335  if ( atoi( cmask_str ) > 0 ) {
1336  event_str += " CMASK=";
1337  event_str += cmask_str;
1338  }
1339  if ( atoi( inv_str ) > 0 ) {
1340  event_str += " INV=";
1341  event_str += inv_str;
1342  }
1343  S_events.push_back( event_str );
1344  } // if(res_file != NULL)
1345  else {
1346  fprintf( stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename );
1347  exit( 1 );
1348  }
1349  gzclose( res_file );
1350  return 0;
1351 }
1352 
1353 // finalize_html_pages()
1354 // const char *dir : directory contating sampling result files
1355 // puts footers in module HTML pages and creates index file
1356 int finalize_S_html_pages( const char* dir ) {
1357  for ( std::map<std::string, unsigned int>::const_iterator i = modules_tot_samples.begin();
1358  i != modules_tot_samples.end(); i++ ) {
1359  char module_filename[MAX_FILENAME_LENGTH];
1360  strcpy( module_filename, dir );
1361  strcat( module_filename, "/HTML/" );
1362  strcat( module_filename, ( i->first ).c_str() );
1363  strcat( module_filename, ".html" );
1364  FILE* module_file = fopen( module_filename, "a" );
1365  if ( module_file == NULL ) {
1366  fprintf( stderr, "ERROR: Unable to append to file: %s\naborting...\n", module_filename );
1367  exit( 1 );
1368  }
1369  fprintf( module_file, "</body>\n</html>\n" );
1370  if ( fclose( module_file ) ) {
1371  fprintf( stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1372  exit( 1 );
1373  }
1374  }
1375  return 0;
1376 }
1377 
1378 // read_file()
1379 // const char *filename : input file to analyse
1380 // analyses the event file and updates the list of modules with counter information found in the file
1381 // returns the number of modules found in the file
1382 int read_C_file( const char* dir, const char* filename ) {
1383  char event[MAX_EVENT_NAME_LENGTH];
1384  char arch[MAX_ARCH_NAME_LENGTH];
1385  char line[MAX_LINE_LENGTH];
1386  char cmask_str[MAX_CMASK_STR_LENGTH];
1387  char inv_str[MAX_INV_STR_LENGTH];
1388  char sp_str[MAX_SP_STR_LENGTH];
1389  char cur_module_name[MAX_MODULE_NAME_LENGTH];
1390  bzero( line, MAX_LINE_LENGTH );
1391  bzero( event, MAX_EVENT_NAME_LENGTH );
1392  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1393  bzero( arch, MAX_ARCH_NAME_LENGTH );
1394  bzero( line, MAX_LINE_LENGTH );
1395  bzero( cmask_str, MAX_CMASK_STR_LENGTH );
1396  bzero( inv_str, MAX_INV_STR_LENGTH );
1397  bzero( sp_str, MAX_SP_STR_LENGTH );
1398  int number_of_modules = 0;
1399  long cur_sum = 0;
1400  int no_of_values = 0;
1401  char path_name[MAX_FILENAME_LENGTH];
1402  bzero( path_name, MAX_FILENAME_LENGTH );
1403  strcpy( path_name, dir );
1404  strcat( path_name, "/" );
1405  strcat( path_name, filename );
1406  FILE* fp = fopen( path_name, "r" );
1407  int stat = fscanf( fp, "%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str );
1408  if ( stat != 5 ) {
1409  std::cerr << "ERROR: failed to parse " << path_name << std::endl;
1410  exit( 1 );
1411  }
1412  if ( !strcmp( arch, "NHM" ) )
1413  nehalem = true;
1414  else
1415  nehalem = false;
1416  std::string event_str( event );
1417  if ( atoi( cmask_str ) > 0 ) {
1418  event_str += " CMASK=";
1419  event_str += cmask_str;
1420  }
1421  if ( atoi( inv_str ) > 0 ) {
1422  event_str += " INV=";
1423  event_str += inv_str;
1424  }
1425  C_events.push_back( event_str );
1426  while ( fscanf( fp, "%s\n", line ) != EOF ) {
1427  if ( isalpha( line[0] ) ) // module
1428  {
1429  if ( number_of_modules > 0 ) {
1430  C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values;
1431  cur_sum = 0;
1432  no_of_values = 0;
1433  }
1434  strcpy( cur_module_name, line );
1435  number_of_modules++;
1436  } else if ( isdigit( line[0] ) ) // value
1437  {
1438  cur_sum += strtol( line, NULL, 10 );
1439  no_of_values++;
1440  }
1441  }
1442  C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values; // last module
1443  fclose( fp );
1444  return number_of_modules;
1445 }
1446 
1447 void put_C_header( FILE* fp, std::vector<std::string>& columns ) {
1448  fprintf(
1449  fp,
1450  "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1451  fprintf( fp, "<html>\n" );
1452  fprintf( fp, "<head>\n" );
1453  fprintf( fp, "<title>\n" );
1454  fprintf( fp, "Analysis Result\n" );
1455  fprintf( fp, "</title>\n" );
1456  fprintf( fp, "<script src=\"sorttable.js\"></script>\n" );
1457  fprintf( fp, "<style>\ntable.sortable thead "
1458  "{\nbackground-color:#eee;\ncolor:#666666;\nfont-weight:bold;\ncursor:default;\nfont-family:courier;\n}"
1459  "\n</style>\n" );
1460  fprintf( fp, "</head>\n" );
1461  fprintf( fp, "<body link=\"black\">\n" );
1462  fprintf( fp, "<h1>RESULTS:</h1><br/>Click for detailed symbol view...<p/>\n" );
1463  fprintf( fp, "<table class=\"sortable\" cellpadding=\"5\">\n" );
1464  fprintf( fp, "<tr>\n" );
1465  fprintf( fp, "<th>MODULE NAME</th>\n" );
1466  for ( std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it ) {
1467  if ( strlen( it->c_str() ) == 0 )
1468  fprintf( fp, "<th bgcolor=\"#FFFFFF\">&nbsp;</th>\n" );
1469  else
1470  fprintf( fp, "<th>%s</th>\n", ( *it ).c_str() );
1471  }
1472  fprintf( fp, "</tr>\n" );
1473  return;
1474 }
1475 
1476 void put_C_modules( FILE* fp, std::vector<std::string>& columns ) {
1477  int index = 0;
1478  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1479  ++it ) {
1480  if ( index % 2 )
1481  fprintf( fp, "<tr bgcolor=\"#FFFFCC\">\n" );
1482  else
1483  fprintf( fp, "<tr bgcolor=\"#CCFFCC\">\n" );
1484  fprintf( fp, "<td style=\"font-family:monospace;font-size:large;color:Black\"><a href=\"%s.html\">%s</a></td>\n",
1485  ( it->first ).c_str(), ( it->first ).c_str() );
1486  for ( std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt ) {
1487  if ( strlen( jt->c_str() ) == 0 ) {
1488  fprintf( fp, "<td bgcolor=\"#FFFFFF\">&nbsp;</td>" );
1489  } else {
1490  if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1491  fprintf( stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1492  exit( 1 );
1493  }
1494  fprintf( fp, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\" align=\"right\">%.2f</td>\n",
1495  ( it->second )[*jt] );
1496  }
1497  }
1498  fprintf( fp, "</tr>\n" );
1499  index++;
1500  }
1501 }
1502 
1503 void put_C_footer( FILE* fp ) {
1504  fprintf( fp, "</table>\n</body>\n</html>\n" );
1505  return;
1506 }
1507 
1508 void put_C_header_csv( FILE* fp, std::vector<std::string>& columns ) {
1509  fprintf( fp, "MODULE NAME" );
1510  for ( std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it ) {
1511  if ( strlen( it->c_str() ) == 0 ) {
1512  } else
1513  fprintf( fp, ",%s", ( *it ).c_str() );
1514  }
1515  fprintf( fp, "\n" );
1516  return;
1517 }
1518 
1519 void put_C_modules_csv( FILE* fp, std::vector<std::string>& columns ) {
1520  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1521  ++it ) {
1522  fprintf( fp, "%s", ( it->first ).c_str() );
1523  for ( std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt ) {
1524  if ( strlen( jt->c_str() ) == 0 ) {
1525  } else {
1526  if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1527  fprintf( stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1528  exit( 1 );
1529  }
1530  fprintf( fp, ",%.2f", ( it->second )[*jt] );
1531  }
1532  }
1533  fprintf( fp, "\n" );
1534  }
1535 }
1536 
1537 // normalize()
1538 // struct C_module *mod : pointer to the head of the list of modules
1539 // int counter : event selected (see C_module class for which event corresponds to which number)
1540 // int number_of_modules : length of the list
1541 // double value : value to be normalized
1542 // double normalizeTo : value to which the value above should be normalized
1543 // returns the normalized value
1544 double normalize( std::string field, double value, double normalizeTo ) {
1545  double max = 0;
1546  double counter_value;
1547  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1548  ++it ) {
1549  counter_value = ( it->second )[field];
1550  if ( max < counter_value ) max = counter_value;
1551  }
1552  if ( value > 0 && max > 0 && normalizeTo > 0 ) {
1553  return 1. * value / max * normalizeTo;
1554  } else
1555  return 0;
1556 }
1557 
1558 // calc_post_deriv_values()
1559 // struct C_module *mod : pointer to the head of the list of modules
1560 // double totalCycles : total cycles spent by all the modules
1561 // int number_of_modules : length of the list
1562 // calculates the iFactor of each module
1564  if ( nehalem ) {
1565  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1566  ++it ) {
1567  double simdnorm =
1568  1. - normalize( "Packed % of all UOPS Retired", ( it->second )["Packed % of all UOPS Retired"], 1 );
1569  double misspnorm = normalize( "% of Mispredicted Branches", ( it->second )["% of Mispredicted Branches"], 1 );
1570  double stallnorm = normalize( "Stalled Cycles", ( it->second )["Stalled Cycles"], 1 );
1571  ( it->second )["iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1572  }
1573  } else {
1574  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1575  ++it ) {
1576  double simdnorm =
1577  1. - normalize( "Packed SIMD % of all Instructions", ( it->second )["Packed SIMD % of all Instructions"], 1 );
1578  double misspnorm = normalize( "% of Mispredicted Branches", ( it->second )["% of Mispredicted Branches"], 1 );
1579  double stallnorm = normalize( "Stalled Cycles", ( it->second )["Stalled Cycles"], 1 );
1580  ( it->second )["iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1581  }
1582  }
1583 }
1584 
1585 // getTotalCycles()
1586 // struct C_module *mod : pointer to the head of the list of modules
1587 // int number_of_modules : length of the list
1588 // returns the number of total cycles spent by all the modules
1589 double getTotalCycles() {
1590  double sum = 0;
1591  if ( nehalem ) {
1592  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1593  ++it ) {
1594  sum += ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
1595  }
1596  } else {
1597  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1598  ++it ) {
1599  sum += ( it->second )["UNHALTED_CORE_CYCLES"];
1600  }
1601  }
1602  return sum;
1603 }
1604 
1605 // main()
1606 // takes as argument the directory containing results
1607 // and produces the HTML directory inside of it containing browsable statistics
1608 int main( int argc, char* argv[] ) {
1609  if ( argc < 2 || argc > 4 ) {
1610  printf( "\n\nUsage: %s DIRECTORY [--caa] [--csv]\n\n", argv[0] );
1611  exit( 1 );
1612  }
1613 
1614  bool caa = false;
1615  bool csv = false;
1616  for ( int i = 2; i < argc; i++ ) {
1617  if ( !strcmp( argv[i], "--caa" ) ) caa = true;
1618  if ( !strcmp( argv[i], "--csv" ) ) csv = true;
1619  }
1620 
1621  char dir[MAX_FILENAME_LENGTH];
1622  strcpy( dir, argv[1] );
1623  if ( !csv ) {
1624  strcat( dir, "/HTML" );
1625  int res = mkdir( dir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH );
1626  if ( res != 0 ) {
1627  fprintf( stderr, "ERROR: Cannot create directory %s\naborting...\n", dir );
1628  exit( 1 );
1629  }
1630  }
1631 
1632  DIR* dp;
1633  struct dirent* dirp;
1634  int num_of_modules = 0;
1635  if ( ( dp = opendir( argv[1] ) ) == NULL ) {
1636  printf( "Error(%d) opening %s\n", errno, argv[1] );
1637  return errno;
1638  }
1639  while ( ( dirp = readdir( dp ) ) != NULL ) {
1640  if ( strstr( dirp->d_name, "_S_" ) != NULL && strstr( dirp->d_name, ".txt.gz" ) != NULL && !csv ) {
1641  if ( read_S_events( argv[1], dirp->d_name ) ) {
1642  fprintf( stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1643  exit( 1 );
1644  }
1645  }
1646  }
1647  closedir( dp );
1648  sort( S_events.begin(), S_events.end() );
1649  if ( ( dp = opendir( argv[1] ) ) == NULL ) {
1650  printf( "Error(%d) opening %s\n", errno, argv[1] );
1651  return errno;
1652  }
1653  while ( ( dirp = readdir( dp ) ) != NULL ) {
1654  if ( strstr( dirp->d_name, "_S_" ) != NULL && strstr( dirp->d_name, ".txt.gz" ) != NULL && !csv ) {
1655  if ( read_S_file( argv[1], dirp->d_name ) ) {
1656  fprintf( stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1657  exit( 1 );
1658  }
1659  } else if ( strstr( dirp->d_name, "_C_" ) != NULL && strstr( dirp->d_name, ".txt" ) != NULL ) {
1660  int res = read_C_file( argv[1], dirp->d_name );
1661  if ( res > num_of_modules ) { num_of_modules = res; }
1662  }
1663  }
1664  closedir( dp );
1665 
1666  if ( !csv ) {
1667  if ( finalize_S_html_pages( argv[1] ) ) {
1668  fprintf( stderr, "ERROR: Cannot finalize HTML pages!!!\naborting...\n" );
1669  exit( 1 );
1670  }
1671  }
1672 
1673  char filepath[MAX_FILENAME_LENGTH];
1674  bzero( filepath, MAX_FILENAME_LENGTH );
1675  if ( !csv )
1676  sprintf( filepath, "%s/HTML/index.html", argv[1] );
1677  else
1678  sprintf( filepath, "%s/results.csv", argv[1] );
1679  FILE* fp = fopen( filepath, "w" );
1680  if ( fp == NULL ) {
1681  fprintf( stderr, "ERROR: Cannot create file index.html!!!\naborting...\n" );
1682  exit( 1 );
1683  }
1684 
1685  if ( caa ) {
1686  double totalCycles;
1687  if ( !nehalem ) {
1689  if ( !check_for_core_caa_events() ) {
1690  fprintf( stderr, "(core) ERROR: One or more events for CAA missing!\naborting...\n" );
1691  exit( 1 );
1692  }
1694  totalCycles = getTotalCycles();
1695  calc_core_deriv_values( totalCycles );
1697  if ( !csv ) {
1698  put_C_header( fp, core_caa_events_displ );
1699  put_C_modules( fp, core_caa_events_displ );
1700  } else {
1701  put_C_header_csv( fp, core_caa_events_displ );
1702  put_C_modules_csv( fp, core_caa_events_displ );
1703  }
1704  } else {
1706  if ( !check_for_nhm_caa_events() ) {
1707  fprintf( stderr, "(nehalem) ERROR: One or more events for CAA missing!\naborting...\n" );
1708  exit( 1 );
1709  }
1711  totalCycles = getTotalCycles();
1712  calc_nhm_deriv_values( totalCycles );
1714  if ( !csv ) {
1715  put_C_header( fp, nhm_caa_events_displ );
1716  put_C_modules( fp, nhm_caa_events_displ );
1717  } else {
1718  put_C_header_csv( fp, nhm_caa_events_displ );
1719  put_C_modules_csv( fp, nhm_caa_events_displ );
1720  }
1721  }
1722  if ( !csv ) put_C_footer( fp );
1723  fclose( fp );
1724  } else {
1725  if ( !csv ) {
1726  put_C_header( fp, C_events );
1727  put_C_modules( fp, C_events );
1728  put_C_footer( fp );
1729  } else {
1730  put_C_header_csv( fp, C_events );
1731  put_C_modules_csv( fp, C_events );
1732  }
1733  fclose( fp );
1734  }
1735  if ( !csv ) {
1736  char src[MAX_FILENAME_LENGTH];
1737  char dst[MAX_FILENAME_LENGTH];
1738  sprintf( src, "sorttable.js" );
1739  sprintf( dst, "%s/HTML/sorttable.js", argv[1] );
1740  int fd_src = open( src, O_RDONLY );
1741  if ( fd_src == -1 ) {
1742  fprintf( stderr, "ERROR: Cannot open file \"%s\"!\naborting...\n", src );
1743  exit( 1 );
1744  }
1745  int fd_dst = open( dst, O_WRONLY | O_CREAT | O_TRUNC, 0644 );
1746  if ( fd_dst == -1 ) {
1747  fprintf( stderr, "ERROR: Cannot open file \"%s\" (%s)!\naborting...\n", dst, strerror( errno ) );
1748  exit( 1 );
1749  }
1750  char c;
1751  while ( read( fd_src, &c, 1 ) ) {
1752  if ( write( fd_dst, &c, 1 ) == -1 ) {
1753  std::cerr << "ERROR: failed to write to " << dst << std::endl;
1754  exit( 1 );
1755  }
1756  }
1757  close( fd_dst );
1758  close( fd_src );
1759  }
1760  return 0;
1761 }
MAX_CMASK_STR_LENGTH
#define MAX_CMASK_STR_LENGTH
Definition: pfm_gen_analysis.cpp:94
I7_L1_ITLB_WALK_COMPLETED_CYCLES
#define I7_L1_ITLB_WALK_COMPLETED_CYCLES
Definition: pfm_gen_analysis.cpp:60
std::max_element
T max_element(T... args)
init_core_caa_events
void init_core_caa_events()
Definition: pfm_gen_analysis.cpp:282
html_special_chars
void html_special_chars(const char *s, char *s_mod)
Definition: pfm_gen_analysis.cpp:836
I7_OTHER_CORE_L2_HITM_CYCLES
#define I7_OTHER_CORE_L2_HITM_CYCLES
Definition: pfm_gen_analysis.cpp:64
S_module::get_arch
std::string get_arch()
Definition: pfm_gen_analysis.cpp:813
FileInfo::symbolByOffset
const char * symbolByOffset(Offset offset)
Definition: pfm_gen_analysis.cpp:160
MAX_SYM_MOD_LENGTH
#define MAX_SYM_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:82
std::string
STL class.
PipeReader::output
std::istringstream & output(void)
Definition: pfm_gen_analysis.cpp:120
put_C_modules
void put_C_modules(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1476
FileInfo::CacheItemComparator
Definition: pfm_gen_analysis.cpp:189
FileInfo::CacheItem::CacheItem
CacheItem(Offset offset, const std::string &name)
Definition: pfm_gen_analysis.cpp:181
S_module::inv
unsigned int inv
Definition: pfm_gen_analysis.cpp:783
plotBacklogPyRoot.argc
argc
Definition: plotBacklogPyRoot.py:173
std::pair
init_nhm_caa_events
void init_nhm_caa_events()
Definition: pfm_gen_analysis.cpp:306
I7_IFETCH_L2_MISS_L3_HITM
#define I7_IFETCH_L2_MISS_L3_HITM
Definition: pfm_gen_analysis.cpp:72
gaudirun.s
string s
Definition: gaudirun.py:346
S_module::get_event
std::string get_event()
Definition: pfm_gen_analysis.cpp:814
std::vector< CacheItem >
std::map::find
T find(T... args)
MAX_LIB_MOD_LENGTH
#define MAX_LIB_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:84
finalize_S_html_pages
int finalize_S_html_pages(const char *dir)
Definition: pfm_gen_analysis.cpp:1356
std::vector::size
T size(T... args)
CORE_UNKNOWN_ADDR_STORE_CYCLES
#define CORE_UNKNOWN_ADDR_STORE_CYCLES
Definition: pfm_gen_analysis.cpp:54
put_C_header
void put_C_header(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1447
CORE_L1_DTLB_MISS_CYCLES
#define CORE_L1_DTLB_MISS_CYCLES
Definition: pfm_gen_analysis.cpp:52
I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES
#define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:65
Gaudi::Units::nm
constexpr double nm
Definition: SystemOfUnits.h:97
std::istringstream
STL class.
FileInfo::SymbolCache
std::vector< CacheItem > SymbolCache
Definition: pfm_gen_analysis.cpp:186
FileInfo::CacheItem::OFFSET
Offset OFFSET
Definition: pfm_gen_analysis.cpp:181
gaudirun.c
c
Definition: gaudirun.py:525
std::vector::back
T back(T... args)
MAX_EVENT_NAME_LENGTH
#define MAX_EVENT_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:90
S_module::get_total_num_samples
unsigned int get_total_num_samples()
Definition: pfm_gen_analysis.cpp:829
std::map::clear
T clear(T... args)
I7_L2_HIT_CYCLES
#define I7_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:61
EXPECTED_CPI
#define EXPECTED_CPI
Definition: pfm_gen_analysis.cpp:77
std::vector::push_back
T push_back(T... args)
S_module::get_max
bool get_max(char *index, unsigned int &value)
Definition: pfm_gen_analysis.cpp:819
compareOutputFiles.sp
sp
Definition: compareOutputFiles.py:506
I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT
#define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT
Definition: pfm_gen_analysis.cpp:69
I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD
#define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD
Definition: pfm_gen_analysis.cpp:73
fixtures.stderr
Generator[bytes, None, None] stderr(subprocess.CompletedProcess completed_process)
Definition: fixtures.py:147
read_C_file
int read_C_file(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1382
S_module::sp
unsigned int sp
Definition: pfm_gen_analysis.cpp:784
S_module::clear
void clear()
Definition: pfm_gen_analysis.cpp:788
CORE_L2_HIT_CYCLES
#define CORE_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:51
FileInfo::FileInfo
FileInfo(void)
Definition: pfm_gen_analysis.cpp:155
Gaudi::Utils::begin
AttribStringParser::Iterator begin(const AttribStringParser &parser)
Definition: AttribStringParser.h:136
read_S_file
int read_S_file(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1196
calc_nhm_deriv_values
void calc_nhm_deriv_values(double totalCycles)
Definition: pfm_gen_analysis.cpp:605
S_module::get_c_mask
unsigned int get_c_mask()
Definition: pfm_gen_analysis.cpp:812
PipeReader::pipe
FILE * pipe
Definition: pfm_gen_analysis.cpp:123
read_S_events
int read_S_events(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1309
MAX_SIMPLE_LIB_MOD_LENGTH
#define MAX_SIMPLE_LIB_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:88
S_module::module_name
std::string module_name
Definition: pfm_gen_analysis.cpp:779
MAX_LIB_LENGTH
#define MAX_LIB_LENGTH
Definition: pfm_gen_analysis.cpp:83
FileInfo::createOffsetMap
void createOffsetMap(void)
Definition: pfm_gen_analysis.cpp:194
PIPE_BUFFER_LENGTH
#define PIPE_BUFFER_LENGTH
Definition: pfm_gen_analysis.cpp:98
I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP
#define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP
Definition: pfm_gen_analysis.cpp:70
MAX_SIMPLE_SYM_LENGTH
#define MAX_SIMPLE_SYM_LENGTH
Definition: pfm_gen_analysis.cpp:85
ProduceConsume.j
j
Definition: ProduceConsume.py:104
std::cerr
I7_L1_DTLB_WALK_COMPLETED_CYCLES
#define I7_L1_DTLB_WALK_COMPLETED_CYCLES
Definition: pfm_gen_analysis.cpp:59
std::string::c_str
T c_str(T... args)
S_module::get_module_name
std::string get_module_name()
Definition: pfm_gen_analysis.cpp:828
I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT
#define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT
Definition: pfm_gen_analysis.cpp:68
func_name
const char * func_name(const char *demangled_symbol)
Definition: pfm_gen_analysis.cpp:867
std::map::erase
T erase(T... args)
PipeReader::iss
std::unique_ptr< std::istringstream > iss
Definition: pfm_gen_analysis.cpp:124
getTotalCycles
double getTotalCycles()
Definition: pfm_gen_analysis.cpp:1589
FileInfo::CacheItem::NAME
std::string NAME
Definition: pfm_gen_analysis.cpp:183
PipeReader
Definition: pfm_gen_analysis.cpp:100
FileInfo::next
Offset next(Offset offset)
Definition: pfm_gen_analysis.cpp:173
S_module::samples
std::map< std::string, unsigned int > samples
Definition: pfm_gen_analysis.cpp:777
CORE_LCP_STALL_CYCLES
#define CORE_LCP_STALL_CYCLES
Definition: pfm_gen_analysis.cpp:53
FileInfo::FileInfo
FileInfo(const std::string &name, bool useGdb)
Definition: pfm_gen_analysis.cpp:156
check_for_nhm_caa_events
bool check_for_nhm_caa_events()
Definition: pfm_gen_analysis.cpp:360
CORE_OVERLAPPING_CYCLES
#define CORE_OVERLAPPING_CYCLES
Definition: pfm_gen_analysis.cpp:55
std::map< std::string, unsigned int >
S_module::get_inv_mask
unsigned int get_inv_mask()
Definition: pfm_gen_analysis.cpp:811
FileInfo::NAME
std::string NAME
Definition: pfm_gen_analysis.cpp:154
put_C_footer
void put_C_footer(FILE *fp)
Definition: pfm_gen_analysis.cpp:1503
put_C_modules_csv
void put_C_modules_csv(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1519
cpluginsvc.n
n
Definition: cpluginsvc.py:234
MAX_MODULE_NAME_LENGTH
#define MAX_MODULE_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:91
FileInfo::CacheItemComparator::operator()
bool operator()(const CacheItem &a, const int &b) const
Definition: pfm_gen_analysis.cpp:190
init_core_caa_events_displ
void init_core_caa_events_displ()
Definition: pfm_gen_analysis.cpp:370
S_module::add_sample
void add_sample(const char *index, unsigned int value)
Definition: pfm_gen_analysis.cpp:815
S_module::event
std::string event
Definition: pfm_gen_analysis.cpp:781
CORE_SPAN_ACROSS_CACHE_LINE_CYCLES
#define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES
Definition: pfm_gen_analysis.cpp:56
PipeReader::PipeReader
PipeReader(const char *cmd)
Definition: pfm_gen_analysis.cpp:102
MAX_SYM_LENGTH
#define MAX_SYM_LENGTH
Definition: pfm_gen_analysis.cpp:81
hivetimeline.read
def read(f, regex=".*", skipevents=0)
Definition: hivetimeline.py:32
FileInfo::CacheItem
Definition: pfm_gen_analysis.cpp:180
skipWhitespaces
bool skipWhitespaces(const char *srcbuffer, const char **destbuffer)
Definition: pfm_gen_analysis.cpp:131
ConditionsStallTest.name
name
Definition: ConditionsStallTest.py:77
std::endl
T endl(T... args)
put_C_header_csv
void put_C_header_csv(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1508
FileInfo::Offset
int Offset
Definition: pfm_gen_analysis.cpp:153
S_module::get_smpl_period
unsigned int get_smpl_period()
Definition: pfm_gen_analysis.cpp:810
std::vector::begin
T begin(T... args)
std::getline
T getline(T... args)
std::map::insert
T insert(T... args)
I7_OTHER_CORE_L2_HIT_CYCLES
#define I7_OTHER_CORE_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:63
CORE_L2_MISS_CYCLES
#define CORE_L2_MISS_CYCLES
Definition: pfm_gen_analysis.cpp:50
MAX_SIMPLE_LIB_LENGTH
#define MAX_SIMPLE_LIB_LENGTH
Definition: pfm_gen_analysis.cpp:87
S_module::arch
std::string arch
Definition: pfm_gen_analysis.cpp:780
S_module::cmask
unsigned int cmask
Definition: pfm_gen_analysis.cpp:782
MAX_LINE_LENGTH
#define MAX_LINE_LENGTH
Definition: pfm_gen_analysis.cpp:89
FileInfo
Definition: pfm_gen_analysis.cpp:151
MAX_SAMPLE_INDEX_LENGTH
#define MAX_SAMPLE_INDEX_LENGTH
Definition: pfm_gen_analysis.cpp:80
I7_IFETCH_L2_MISS_L3_HIT_SNOOP
#define I7_IFETCH_L2_MISS_L3_HIT_SNOOP
Definition: pfm_gen_analysis.cpp:71
ReadAndWriteWhiteBoard.dst
dst
Definition: ReadAndWriteWhiteBoard.py:33
calc_post_deriv_values
void calc_post_deriv_values()
Definition: pfm_gen_analysis.cpp:1563
std::vector::empty
T empty(T... args)
plotSpeedupsPyRoot.line
line
Definition: plotSpeedupsPyRoot.py:198
put_S_module
void put_S_module(S_module *cur_module, const char *dir)
Definition: pfm_gen_analysis.cpp:1044
main
int main(int argc, char *argv[])
Definition: pfm_gen_analysis.cpp:1608
MAX_INV_STR_LENGTH
#define MAX_INV_STR_LENGTH
Definition: pfm_gen_analysis.cpp:95
check_for_core_caa_events
bool check_for_core_caa_events()
Definition: pfm_gen_analysis.cpp:350
std::vector::end
T end(T... args)
normalize
double normalize(std::string field, double value, double normalizeTo)
Definition: pfm_gen_analysis.cpp:1544
I7_L3_UNSHARED_HIT_CYCLES
#define I7_L3_UNSHARED_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:62
S_module::S_module
S_module()
Definition: pfm_gen_analysis.cpp:787
skipString
bool skipString(const char *strptr, const char *srcbuffer, const char **dstbuffer)
Definition: pfm_gen_analysis.cpp:145
FileInfo::CacheItemComparator::operator()
bool operator()(const int &a, const CacheItem &b) const
Definition: pfm_gen_analysis.cpp:191
std::unique_ptr< std::istringstream >
graphanalysis.filename
filename
Definition: graphanalysis.py:130
PipeReader::~PipeReader
~PipeReader(void)
Definition: pfm_gen_analysis.cpp:118
S_module::init
void init(const char *name, const char *architecture, const char *event_name, unsigned int c_mask, unsigned int inv_mask, unsigned int smpl_period)
Definition: pfm_gen_analysis.cpp:797
S_module::total_num_samples
unsigned int total_num_samples
Definition: pfm_gen_analysis.cpp:778
MAX_FILENAME_LENGTH
#define MAX_FILENAME_LENGTH
Definition: pfm_gen_analysis.cpp:79
I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES
#define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:67
init_nhm_caa_events_displ
void init_nhm_caa_events_displ()
Definition: pfm_gen_analysis.cpp:500
calc_core_deriv_values
void calc_core_deriv_values(double totalCycles)
Definition: pfm_gen_analysis.cpp:419
I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES
#define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:66
S_module::set_total
void set_total(unsigned int total)
Definition: pfm_gen_analysis.cpp:806
Gaudi::ParticleProperties::index
size_t index(const Gaudi::ParticleProperty *property, const Gaudi::Interfaces::IParticlePropertySvc *service)
helper utility for mapping of Gaudi::ParticleProperty object into non-negative integral sequential id...
Definition: IParticlePropertySvc.cpp:39
MAX_SIMPLE_SYM_MOD_LENGTH
#define MAX_SIMPLE_SYM_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:86
gaudirun.argv
list argv
Definition: gaudirun.py:327
MAX_SP_STR_LENGTH
#define MAX_SP_STR_LENGTH
Definition: pfm_gen_analysis.cpp:96
FileInfo::m_symbolCache
SymbolCache m_symbolCache
Definition: pfm_gen_analysis.cpp:187
S_module
Definition: pfm_gen_analysis.cpp:775
MAX_ARCH_NAME_LENGTH
#define MAX_ARCH_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:93