The Gaudi Framework  master (37c0b60a)
pfm_gen_analysis.cpp
Go to the documentation of this file.
1 /***********************************************************************************\
2 * (c) Copyright 1998-2024 CERN for the benefit of the LHCb and ATLAS collaborations *
3 * *
4 * This software is distributed under the terms of the Apache version 2 licence, *
5 * copied verbatim in the file "LICENSE". *
6 * *
7 * In applying this licence, CERN does not waive the privileges and immunities *
8 * granted to it by virtue of its status as an Intergovernmental Organization *
9 * or submit itself to any jurisdiction. *
10 \***********************************************************************************/
11 /*
12 Name: pfm_analysis.cpp
13 Author: Daniele Francesco Kruse
14 E-mail: daniele.francesco.kruse@cern.ch
15 Version: 0.9 (16/02/2010)
16 
17 This code is responsible for analysing results generated by the PerfmonService of CMSSW.
18 It takes 42 files as input (21 simple text files and 21 gzipped text files) and
19 produces a HTML directory containing the results of the analysis (both counting and sampling).
20 
21 compile linking zlib: g++ -Wall -lz pfm_analysis.cpp
22 */
23 
24 #include <ctype.h>
25 #include <cxxabi.h>
26 #include <fcntl.h>
27 #include <math.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <sys/stat.h>
33 #include <sys/types.h>
34 #include <unistd.h>
35 #include <zlib.h>
36 
37 #include <algorithm>
38 #include <format>
39 #include <iostream>
40 #include <list>
41 #include <map>
42 #include <memory>
43 #include <sstream>
44 #include <string>
45 #include <vector>
46 
47 #include <dirent.h>
48 #include <errno.h>
49 
50 // Core
51 #define CORE_L2_MISS_CYCLES 200
52 #define CORE_L2_HIT_CYCLES 14.5
53 #define CORE_L1_DTLB_MISS_CYCLES 10
54 #define CORE_LCP_STALL_CYCLES 6
55 #define CORE_UNKNOWN_ADDR_STORE_CYCLES 5
56 #define CORE_OVERLAPPING_CYCLES 6
57 #define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES 20
58 
59 // Nehalem
60 #define I7_L1_DTLB_WALK_COMPLETED_CYCLES 35
61 #define I7_L1_ITLB_WALK_COMPLETED_CYCLES 35
62 #define I7_L2_HIT_CYCLES 6
63 #define I7_L3_UNSHARED_HIT_CYCLES 35
64 #define I7_OTHER_CORE_L2_HIT_CYCLES 60
65 #define I7_OTHER_CORE_L2_HITM_CYCLES 75
66 #define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES 225 // average of 200 (not modified) and 225-250 (modified)
67 #define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES 360 // average of 350 (not modified) and 370 (modified)
68 #define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES 180
69 #define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT 200
70 #define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT 350
71 #define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP 35
72 #define I7_IFETCH_L2_MISS_L3_HIT_SNOOP 60
73 #define I7_IFETCH_L2_MISS_L3_HITM 75
74 #define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD 180
75 
76 #define MAX_MODULES 1000
77 
78 #define EXPECTED_CPI 0.25
79 
80 #define MAX_FILENAME_LENGTH 1024
81 #define MAX_SAMPLE_INDEX_LENGTH 10000
82 #define MAX_SYM_LENGTH 15000
83 #define MAX_SYM_MOD_LENGTH 20000
84 #define MAX_LIB_LENGTH 5000
85 #define MAX_LIB_MOD_LENGTH 7000
86 #define MAX_SIMPLE_SYM_LENGTH 300
87 #define MAX_SIMPLE_SYM_MOD_LENGTH 500
88 #define MAX_SIMPLE_LIB_LENGTH 300
89 #define MAX_SIMPLE_LIB_MOD_LENGTH 500
90 #define MAX_LINE_LENGTH 20000
91 #define MAX_EVENT_NAME_LENGTH 150
92 #define MAX_MODULE_NAME_LENGTH 250
93 #define MAX_VALUE_STRING_LENGTH 250
94 #define MAX_ARCH_NAME_LENGTH 20
95 #define MAX_CMASK_STR_LENGTH 5
96 #define MAX_INV_STR_LENGTH 5
97 #define MAX_SP_STR_LENGTH 50
98 
99 #define PIPE_BUFFER_LENGTH 1000
100 
101 class PipeReader {
102 public:
103  PipeReader( const char* cmd ) {
104  pipe = popen( cmd, "r" );
105  if ( !pipe ) {
106  printf( "Cannot open pipe. Exiting...\n" );
107  exit( 1 );
108  }
109  char buffer[PIPE_BUFFER_LENGTH];
110  bzero( buffer, PIPE_BUFFER_LENGTH );
111  std::string result = "";
112  while ( !feof( pipe ) ) {
113  if ( fgets( buffer, PIPE_BUFFER_LENGTH, pipe ) != NULL ) { result += buffer; }
114  bzero( buffer, PIPE_BUFFER_LENGTH );
115  }
116  iss = std::make_unique<std::istringstream>( result, std::istringstream::in );
117  }
118 
119  ~PipeReader( void ) { pclose( pipe ); }
120 
121  std::istringstream& output( void ) { return *iss; }
122 
123 private:
124  FILE* pipe;
126 };
127 
128 // skipWhitespaces()
129 // const char *srcbuffer : source string
130 // const char **dstbuffer : destination string
131 // Skips white spaces
132 bool skipWhitespaces( const char* srcbuffer, const char** destbuffer ) {
133  if ( !isspace( *srcbuffer++ ) ) { return false; }
134  while ( isspace( *srcbuffer ) ) { srcbuffer++; }
135  *destbuffer = srcbuffer;
136  return true;
137 }
138 
139 // skipString()
140 // const char *strptr : substring to skip
141 // const char *srcbuffer : source string
142 // const char **dstbuffer : destination string
143 // Skips strings of the form '\\s+strptr\\s+' starting from buffer.
144 // Returns a pointer to the first char which does not match the above regexp,
145 // or 0 in case the regexp is not matched.
146 bool skipString( const char* strptr, const char* srcbuffer, const char** dstbuffer ) {
147  if ( strncmp( srcbuffer, strptr, strlen( strptr ) ) ) { return false; }
148  *dstbuffer = srcbuffer + strlen( strptr );
149  return true;
150 }
151 
152 class FileInfo {
153 public:
154  typedef int Offset;
156  FileInfo( void ) : NAME( "<dynamically generated>" ) {}
157  FileInfo( const std::string& name, bool useGdb ) : NAME( name ) {
158  if ( useGdb ) { this->createOffsetMap(); }
159  }
160 
161  const char* symbolByOffset( Offset offset ) {
162  if ( m_symbolCache.empty() ) { return 0; }
163 
164  SymbolCache::iterator i = lower_bound( m_symbolCache.begin(), m_symbolCache.end(), offset, CacheItemComparator() );
165  if ( i->OFFSET == offset ) { return i->NAME.c_str(); }
166 
167  if ( i == m_symbolCache.begin() ) { return m_symbolCache.begin()->NAME.c_str(); }
168 
169  --i;
170 
171  return i->NAME.c_str();
172  }
173 
174  Offset next( Offset offset ) {
175  SymbolCache::iterator i = upper_bound( m_symbolCache.begin(), m_symbolCache.end(), offset, CacheItemComparator() );
176  if ( i == m_symbolCache.end() ) { return 0; }
177  return i->OFFSET;
178  }
179 
180 private:
181  struct CacheItem {
182  CacheItem( Offset offset, const std::string& name ) : OFFSET( offset ), NAME( name ){};
185  };
186 
189 
191  bool operator()( const CacheItem& a, const int& b ) const { return a.OFFSET < b; }
192  bool operator()( const int& a, const CacheItem& b ) const { return a < b.OFFSET; }
193  };
194 
195  void createOffsetMap( void ) {
196  std::string commandLine = "objdump -p " + NAME;
197  PipeReader objdump( commandLine.c_str() );
198  std::string oldname;
199  std::string suffix;
200  int vmbase = 0;
201  bool matched = false;
202  while ( objdump.output() ) {
203  // Checks the following regexp
204  //
205  // LOAD\\s+off\\s+(0x[0-9A-Fa-f]+)\\s+vaddr\\s+(0x[0-9A-Fa-f]+)
206  //
207  // and sets vmbase to be $2 - $1 of the first matched entry.
208 
210  std::getline( objdump.output(), line );
211 
212  if ( !objdump.output() ) break;
213  if ( line.empty() ) continue;
214  const char* lineptr = line.c_str();
215  if ( !skipWhitespaces( lineptr, &lineptr ) ) continue;
216  if ( !skipString( "LOAD", lineptr, &lineptr ) ) continue;
217  if ( !skipWhitespaces( lineptr, &lineptr ) ) continue;
218  if ( !skipString( "off", lineptr, &lineptr ) ) continue;
219  char* endptr = 0;
220  int initialBase = strtol( lineptr, &endptr, 16 );
221  if ( lineptr == endptr ) continue;
222  lineptr = endptr;
223  if ( !skipWhitespaces( lineptr, &lineptr ) ) continue;
224  if ( !skipString( "vaddr", lineptr, &lineptr ) ) continue;
225  if ( !skipWhitespaces( lineptr, &lineptr ) ) continue;
226  int finalBase = strtol( lineptr, &endptr, 16 );
227  if ( lineptr == endptr ) continue;
228  vmbase = finalBase - initialBase;
229  matched = true;
230  break;
231  }
232  if ( !matched ) {
233  fprintf( stderr, "Cannot determine VM base address for %s\n", NAME.c_str() );
234  fprintf( stderr, "Error while running `objdump -p %s`\n", NAME.c_str() );
235  exit( 1 );
236  }
237  std::string commandLine2 = "nm -t d -n " + NAME;
238  PipeReader nm( commandLine2.c_str() );
239  while ( nm.output() ) {
241  std::getline( nm.output(), line );
242  if ( !nm.output() ) break;
243  if ( line.empty() ) continue;
244  // If line does not match "^(\\d+)[ ]\\S[ ](\S+)$", exit.
245  const char* begin = line.c_str();
246  char* endptr = 0;
247  int address = strtol( begin, &endptr, 10 );
248  if ( endptr == begin ) continue;
249  if ( *endptr++ != ' ' ) continue;
250  if ( isspace( *endptr++ ) ) continue;
251  if ( *endptr++ != ' ' ) continue;
252  char* symbolName = endptr;
253  while ( *endptr && !isspace( *endptr ) ) endptr++;
254  if ( *endptr != 0 ) continue;
255  // If line starts with '.' forget about it.
256  if ( symbolName[0] == '.' ) continue;
257  // Create a new symbol with the given fileoffset.
258  // The symbol is automatically saved in the FileInfo cache by offset.
259  // If a symbol with the same offset is already there, the new one
260  // replaces the old one.
261  int offset = address - vmbase;
262  if ( m_symbolCache.size() && ( m_symbolCache.back().OFFSET == offset ) )
263  m_symbolCache.back().NAME = symbolName;
264  else
265  m_symbolCache.push_back( CacheItem( address - vmbase, symbolName ) );
266  }
267  }
268 };
269 
270 static std::map<std::string, unsigned int> modules_tot_samples;
271 static std::map<std::string, FileInfo> libsInfo;
272 static int nehalem;
273 
275 static std::vector<std::string> C_events;
276 static std::vector<std::string> S_events;
277 
278 static std::vector<std::string> core_caa_events;
279 static std::vector<std::string> nhm_caa_events;
280 static std::vector<std::string> core_caa_events_displ;
281 static std::vector<std::string> nhm_caa_events_displ;
282 
284  core_caa_events.push_back( "BRANCH_INSTRUCTIONS_RETIRED" );
285  core_caa_events.push_back( "ILD_STALL" );
286  core_caa_events.push_back( "INST_RETIRED:LOADS" );
287  core_caa_events.push_back( "INST_RETIRED:OTHER" );
288  core_caa_events.push_back( "INST_RETIRED:STORES" );
289  core_caa_events.push_back( "INSTRUCTIONS_RETIRED" );
290  core_caa_events.push_back( "LOAD_BLOCK:OVERLAP_STORE" );
291  core_caa_events.push_back( "LOAD_BLOCK:STA" );
292  core_caa_events.push_back( "LOAD_BLOCK:UNTIL_RETIRE" );
293  core_caa_events.push_back( "MEM_LOAD_RETIRED:DTLB_MISS" );
294  core_caa_events.push_back( "MEM_LOAD_RETIRED:L1D_LINE_MISS" );
295  core_caa_events.push_back( "MEM_LOAD_RETIRED:L2_LINE_MISS" );
296  core_caa_events.push_back( "MISPREDICTED_BRANCH_RETIRED" );
297  // core_caa_events.push_back("RS_UOPS_DISPATCHED");
298  // core_caa_events.push_back("RS_UOPS_DISPATCHED CMASK=1");
299  core_caa_events.push_back( "RS_UOPS_DISPATCHED CMASK=1 INV=1" );
300  core_caa_events.push_back( "SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE" );
301  core_caa_events.push_back( "UNHALTED_CORE_CYCLES" );
302  // core_caa_events.push_back("UOPS_RETIRED:ANY");
303  // core_caa_events.push_back("UOPS_RETIRED:FUSED");
304  // core_caa_events.push_back("IDLE_DURING_DIV");
305 }
306 
308  nhm_caa_events.push_back( "ARITH:CYCLES_DIV_BUSY" );
309  nhm_caa_events.push_back( "BR_INST_EXEC:ANY" );
310  nhm_caa_events.push_back( "BR_INST_EXEC:DIRECT_NEAR_CALL" );
311  nhm_caa_events.push_back( "BR_INST_EXEC:INDIRECT_NEAR_CALL" );
312  nhm_caa_events.push_back( "BR_INST_EXEC:INDIRECT_NON_CALL" );
313  nhm_caa_events.push_back( "BR_INST_EXEC:NEAR_CALLS" );
314  nhm_caa_events.push_back( "BR_INST_EXEC:NON_CALLS" );
315  nhm_caa_events.push_back( "BR_INST_EXEC:RETURN_NEAR" );
316  nhm_caa_events.push_back( "BR_INST_RETIRED:ALL_BRANCHES" );
317  nhm_caa_events.push_back( "BR_INST_RETIRED:CONDITIONAL" );
318  nhm_caa_events.push_back( "BR_INST_RETIRED:NEAR_CALL" );
319  nhm_caa_events.push_back( "BR_MISP_EXEC:ANY" );
320  nhm_caa_events.push_back( "CPU_CLK_UNHALTED:THREAD_P" );
321  nhm_caa_events.push_back( "DTLB_LOAD_MISSES:WALK_COMPLETED" );
322  nhm_caa_events.push_back( "INST_RETIRED:ANY_P" );
323  nhm_caa_events.push_back( "ITLB_MISSES:WALK_COMPLETED" );
324  nhm_caa_events.push_back( "L2_RQSTS:IFETCH_HIT" );
325  nhm_caa_events.push_back( "L2_RQSTS:IFETCH_MISS" );
326  nhm_caa_events.push_back( "MEM_INST_RETIRED:LOADS" );
327  nhm_caa_events.push_back( "MEM_INST_RETIRED:STORES" );
328  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L2_HIT" );
329  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L3_MISS" );
330  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L3_UNSHARED_HIT" );
331  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM" );
332  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:LOCAL_DRAM" );
333  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM" );
334  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT" );
335  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:REMOTE_DRAM" );
336  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM" );
337  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM" );
338  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP" );
339  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD" );
340  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM" );
341  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT" );
342  nhm_caa_events.push_back( "RESOURCE_STALLS:ANY" );
343  nhm_caa_events.push_back( "SSEX_UOPS_RETIRED:PACKED_DOUBLE" );
344  nhm_caa_events.push_back( "SSEX_UOPS_RETIRED:PACKED_SINGLE" );
345  nhm_caa_events.push_back( "UOPS_DECODED:MS CMASK=1" );
346  nhm_caa_events.push_back( "UOPS_ISSUED:ANY CMASK=1 INV=1" );
347  nhm_caa_events.push_back( "ITLB_MISS_RETIRED" );
348  nhm_caa_events.push_back( "UOPS_RETIRED:ANY" );
349 }
350 
352  for ( std::vector<std::string>::const_iterator it = core_caa_events.begin(); it != core_caa_events.end(); ++it ) {
353  if ( find( C_events.begin(), C_events.end(), ( *it ) ) == C_events.end() ) {
354  fprintf( stderr, "ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
355  return false;
356  }
357  }
358  return true;
359 }
360 
362  for ( std::vector<std::string>::const_iterator it = nhm_caa_events.begin(); it != nhm_caa_events.end(); ++it ) {
363  if ( find( C_events.begin(), C_events.end(), ( *it ) ) == C_events.end() ) {
364  fprintf( stderr, "ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
365  return false;
366  }
367  }
368  return true;
369 }
370 
372  core_caa_events_displ.push_back( "Total Cycles" );
373  core_caa_events_displ.push_back( "Stalled Cycles" );
374  core_caa_events_displ.push_back( "% of Total Cycles" );
375  core_caa_events_displ.push_back( "Instructions Retired" );
376  core_caa_events_displ.push_back( "CPI" );
377  core_caa_events_displ.push_back( "" );
378  core_caa_events_displ.push_back( "iMargin" );
379  core_caa_events_displ.push_back( "iFactor" );
380  core_caa_events_displ.push_back( "" );
381  core_caa_events_displ.push_back( "Counted Stalled Cycles" );
382  core_caa_events_displ.push_back( "" );
383  core_caa_events_displ.push_back( "L2 Miss Impact" );
384  core_caa_events_displ.push_back( "L2 Miss % of counted Stalled Cycles" );
385  core_caa_events_displ.push_back( "" );
386  core_caa_events_displ.push_back( "L2 Hit Impact" );
387  core_caa_events_displ.push_back( "L2 Hit % of counted Stalled Cycles" );
388  core_caa_events_displ.push_back( "" );
389  core_caa_events_displ.push_back( "L1 DTLB Miss Impact" );
390  core_caa_events_displ.push_back( "L1 DTLB Miss % of counted Stalled Cycles" );
391  core_caa_events_displ.push_back( "" );
392  core_caa_events_displ.push_back( "LCP Stalls Impact" );
393  core_caa_events_displ.push_back( "LCP Stalls % of counted Stalled Cycles" );
394  core_caa_events_displ.push_back( "" );
395  core_caa_events_displ.push_back( "Store-Fwd Stalls Impact" );
396  core_caa_events_displ.push_back( "Store-Fwd Stalls % of counted Stalled Cycles" );
397  core_caa_events_displ.push_back( "" );
398  core_caa_events_displ.push_back( "Loads Blocked by Unknown Address Store Impact" );
399  core_caa_events_displ.push_back( "Loads Blocked % of Store-Fwd Stalls Cycles" );
400  core_caa_events_displ.push_back( "Loads Overlapped with Stores Impact" );
401  core_caa_events_displ.push_back( "Loads Overlapped % of Store-Fwd Stalls Cycles" );
402  core_caa_events_displ.push_back( "Loads Spanning across Cache Lines Impact" );
403  core_caa_events_displ.push_back( "Loads Spanning % of Store-Fwd Stalls Cycles" );
404  core_caa_events_displ.push_back( "" );
405  core_caa_events_displ.push_back( "Load Instructions" );
406  core_caa_events_displ.push_back( "Load % of all Instructions" );
407  core_caa_events_displ.push_back( "Store Instructions" );
408  core_caa_events_displ.push_back( "Store % of all Instructions" );
409  core_caa_events_displ.push_back( "Branch Instructions" );
410  core_caa_events_displ.push_back( "Branch % of all Instructions" );
411  core_caa_events_displ.push_back( "Packed SIMD Computational Instructions" );
412  core_caa_events_displ.push_back( "Packed SIMD % of all Instructions" );
413  core_caa_events_displ.push_back( "Other Instructions" );
414  core_caa_events_displ.push_back( "Other % of all Instructions" );
415  core_caa_events_displ.push_back( "" );
416  core_caa_events_displ.push_back( "ITLB Miss Rate in %" );
417  core_caa_events_displ.push_back( "% of Mispredicted Branches" );
418 }
419 
420 void calc_core_deriv_values( double totalCycles ) {
421  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
422  ++it ) {
423  ( it->second )["Total Cycles"] = ( it->second )["UNHALTED_CORE_CYCLES"];
424  ( it->second )["Stalled Cycles"] = ( it->second )["RS_UOPS_DISPATCHED CMASK=1 INV=1"];
425  ( it->second )["L2 Miss Impact"] = ( it->second )["MEM_LOAD_RETIRED:L2_LINE_MISS"] * CORE_L2_MISS_CYCLES;
426  ( it->second )["L2 Hit Impact"] =
427  ( ( it->second )["MEM_LOAD_RETIRED:L1D_LINE_MISS"] - ( it->second )["MEM_LOAD_RETIRED:L2_LINE_MISS"] ) *
429  ( it->second )["L1 DTLB Miss Impact"] = ( it->second )["MEM_LOAD_RETIRED:DTLB_MISS"] * CORE_L1_DTLB_MISS_CYCLES;
430  ( it->second )["LCP Stalls Impact"] = ( it->second )["ILD_STALL"] * CORE_LCP_STALL_CYCLES;
431  ( it->second )["Loads Blocked by Unknown Address Store Impact"] =
432  ( it->second )["LOAD_BLOCK:STA"] * CORE_UNKNOWN_ADDR_STORE_CYCLES;
433  ( it->second )["Loads Overlapped with Stores Impact"] =
434  ( it->second )["LOAD_BLOCK:OVERLAP_STORE"] * CORE_OVERLAPPING_CYCLES;
435  ( it->second )["Loads Spanning across Cache Lines Impact"] =
436  ( it->second )["LOAD_BLOCK:UNTIL_RETIRE"] * CORE_SPAN_ACROSS_CACHE_LINE_CYCLES;
437  ( it->second )["Store-Fwd Stalls Impact"] = ( it->second )["Loads Blocked by Unknown Address Store Impact"] +
438  ( it->second )["Loads Overlapped with Stores Impact"] +
439  ( it->second )["Loads Spanning across Cache Lines Impact"];
440  ( it->second )["Counted Stalled Cycles"] =
441  ( it->second )["L2 Miss Impact"] + ( it->second )["L2 Hit Impact"] + ( it->second )["LCP Stalls Impact"] +
442  ( it->second )["L1 DTLB Miss Impact"] + ( it->second )["Store-Fwd Stalls Impact"];
443  ( it->second )["Instructions Retired"] = ( it->second )["INSTRUCTIONS_RETIRED"];
444  ( it->second )["ITLB Miss Rate in %"] =
445  ( ( it->second )["ITLB_MISS_RETIRED"] / ( it->second )["INSTRUCTIONS_RETIRED"] ) * 100;
446  ( it->second )["Branch Instructions"] = ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"];
447  ( it->second )["Load Instructions"] = ( it->second )["INST_RETIRED:LOADS"];
448  ( it->second )["Store Instructions"] = ( it->second )["INST_RETIRED:STORES"];
449  ( it->second )["Other Instructions"] = ( it->second )["INST_RETIRED:OTHER"] -
450  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] -
451  ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"];
452  ( it->second )["% of Mispredicted Branches"] =
453  ( ( it->second )["MISPREDICTED_BRANCH_RETIRED"] / ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"] ) * 100;
454  ( it->second )["Packed SIMD Computational Instructions"] =
455  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"];
456  ( it->second )["Counted Instructions Retired"] =
457  ( it->second )["Branch Instructions"] + ( it->second )["Load Instructions"] +
458  ( it->second )["Store Instructions"] + ( it->second )["Other Instructions"] +
459  ( it->second )["Packed SIMD Computational Instructions"];
460  ( it->second )["CPI"] = ( it->second )["UNHALTED_CORE_CYCLES"] / ( it->second )["INSTRUCTIONS_RETIRED"];
461 
462  double localPerformanceImprovement = ( it->second )["CPI"] / EXPECTED_CPI;
463  double cyclesAfterImprovement = ( it->second )["UNHALTED_CORE_CYCLES"] / localPerformanceImprovement;
464  double totalCyclesAfterImprovement = totalCycles - ( it->second )["UNHALTED_CORE_CYCLES"] + cyclesAfterImprovement;
465  ( it->second )["iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
466 
467  ( it->second )["% of Total Cycles"] =
468  ( it->second )["RS_UOPS_DISPATCHED CMASK=1 INV=1"] * 100 / ( it->second )["UNHALTED_CORE_CYCLES"];
469  ( it->second )["L2 Miss % of counted Stalled Cycles"] =
470  ( it->second )["L2 Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
471  ( it->second )["L2 Hit % of counted Stalled Cycles"] =
472  ( it->second )["L2 Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
473  ( it->second )["L1 DTLB Miss % of counted Stalled Cycles"] =
474  ( it->second )["L1 DTLB Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
475  ( it->second )["LCP Stalls % of counted Stalled Cycles"] =
476  ( it->second )["LCP Stalls Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
477  ( it->second )["Store-Fwd Stalls % of counted Stalled Cycles"] =
478  ( it->second )["Store-Fwd Stalls Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
479  ( it->second )["Loads Blocked % of Store-Fwd Stalls Cycles"] =
480  ( it->second )["Loads Blocked by Unknown Address Store Impact"] * 100 /
481  ( it->second )["Store-Fwd Stalls Impact"];
482  ( it->second )["Loads Overlapped % of Store-Fwd Stalls Cycles"] =
483  ( it->second )["Loads Overlapped with Stores Impact"] * 100 / ( it->second )["Store-Fwd Stalls Impact"];
484  ( it->second )["Loads Spanning % of Store-Fwd Stalls Cycles"] =
485  ( it->second )["Loads Spanning across Cache Lines Impact"] * 100 / ( it->second )["Store-Fwd Stalls Impact"];
486 
487  ( it->second )["Load % of all Instructions"] =
488  ( it->second )["INST_RETIRED:LOADS"] * 100 / ( it->second )["Counted Instructions Retired"];
489  ( it->second )["Store % of all Instructions"] =
490  ( it->second )["INST_RETIRED:STORES"] * 100 / ( it->second )["Counted Instructions Retired"];
491  ( it->second )["Branch % of all Instructions"] =
492  ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"] * 100 / ( it->second )["Counted Instructions Retired"];
493  ( it->second )["Packed SIMD % of all Instructions"] =
494  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] * 100 /
495  ( it->second )["Counted Instructions Retired"];
496  ( it->second )["Other % of all Instructions"] =
497  ( it->second )["Other Instructions"] * 100 / ( it->second )["Counted Instructions Retired"];
498  }
499 }
500 
502  nhm_caa_events_displ.push_back( "Total Cycles" );
503  nhm_caa_events_displ.push_back( "Instructions Retired" );
504  nhm_caa_events_displ.push_back( "CPI" );
505  nhm_caa_events_displ.push_back( "" );
506  nhm_caa_events_displ.push_back( "iMargin" );
507  nhm_caa_events_displ.push_back( "iFactor" );
508  nhm_caa_events_displ.push_back( "" );
509  nhm_caa_events_displ.push_back( "Stalled Cycles" );
510  nhm_caa_events_displ.push_back( "% of Total Cycles" );
511  nhm_caa_events_displ.push_back( "Total Counted Stalled Cycles" );
512  nhm_caa_events_displ.push_back( "" );
513  nhm_caa_events_displ.push_back( "Instruction Starvation % of Total Cycles" );
514  nhm_caa_events_displ.push_back( "# of Instructions per Call" );
515  nhm_caa_events_displ.push_back( "% of Total Cycles spent handling FP exceptions" );
516  nhm_caa_events_displ.push_back( "" );
517  nhm_caa_events_displ.push_back( "Counted Stalled Cycles due to Load Ops" );
518  nhm_caa_events_displ.push_back( "" );
519  nhm_caa_events_displ.push_back( "L2 Hit Impact" );
520  nhm_caa_events_displ.push_back( "L2 Hit % of Load Stalls" );
521  nhm_caa_events_displ.push_back( "" );
522  nhm_caa_events_displ.push_back( "L3 Unshared Hit Impact" );
523  nhm_caa_events_displ.push_back( "L3 Unshared Hit % of Load Stalls" );
524  nhm_caa_events_displ.push_back( "" );
525  nhm_caa_events_displ.push_back( "L2 Other Core Hit Impact" );
526  nhm_caa_events_displ.push_back( "L2 Other Core Hit % of Load Stalls" );
527  nhm_caa_events_displ.push_back( "" );
528  nhm_caa_events_displ.push_back( "L2 Other Core Hit Modified Impact" );
529  nhm_caa_events_displ.push_back( "L2 Other Core Hit Modified % of Load Stalls" );
530  nhm_caa_events_displ.push_back( "" );
531  nhm_caa_events_displ.push_back( "L3 Miss -> Local DRAM Hit Impact" );
532  nhm_caa_events_displ.push_back( "L3 Miss -> Remote DRAM Hit Impact" );
533  nhm_caa_events_displ.push_back( "L3 Miss -> Remote Cache Hit Impact" );
534  nhm_caa_events_displ.push_back( "L3 Miss -> Total Impact" );
535  nhm_caa_events_displ.push_back( "L3 Miss % of Load Stalls" );
536  nhm_caa_events_displ.push_back( "" );
537  nhm_caa_events_displ.push_back( "L1 DTLB Miss Impact" );
538  nhm_caa_events_displ.push_back( "L1 DTLB Miss % of Load Stalls" );
539  nhm_caa_events_displ.push_back( "" );
540  nhm_caa_events_displ.push_back( "Cycles spent during DIV & SQRT Ops" );
541  nhm_caa_events_displ.push_back( "DIV & SQRT Ops % of counted Stalled Cycles" );
542  nhm_caa_events_displ.push_back( "" );
543  nhm_caa_events_displ.push_back( "Total L2 IFETCH misses" );
544  nhm_caa_events_displ.push_back( "% of L2 IFETCH misses" );
545  nhm_caa_events_displ.push_back( "" );
546  nhm_caa_events_displ.push_back( "% of IFETCHes served by Local DRAM" );
547  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (Modified)" );
548  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (Clean Snoop)" );
549  nhm_caa_events_displ.push_back( "% of IFETCHes served by Remote L2" );
550  nhm_caa_events_displ.push_back( "% of IFETCHes served by Remote DRAM" );
551  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (No Snoop)" );
552  nhm_caa_events_displ.push_back( "" );
553  nhm_caa_events_displ.push_back( "Total L2 IFETCH miss Impact" );
554  nhm_caa_events_displ.push_back( "" );
555  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Local DRAM" );
556  nhm_caa_events_displ.push_back( "Local DRAM IFECTHes % Impact" );
557  nhm_caa_events_displ.push_back( "" );
558  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (Modified)" );
559  nhm_caa_events_displ.push_back( "L3 (Modified) IFECTHes % Impact" );
560  nhm_caa_events_displ.push_back( "" );
561  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (Clean Snoop)" );
562  nhm_caa_events_displ.push_back( "L3 (Clean Snoop) IFECTHes % Impact" );
563  nhm_caa_events_displ.push_back( "" );
564  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Remote L2" );
565  nhm_caa_events_displ.push_back( "Remote L2 IFECTHes % Impact" );
566  nhm_caa_events_displ.push_back( "" );
567  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Remote DRAM" );
568  nhm_caa_events_displ.push_back( "Remote DRAM IFECTHes % Impact" );
569  nhm_caa_events_displ.push_back( "" );
570  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (No Snoop)" );
571  nhm_caa_events_displ.push_back( "L3 (No Snoop) IFECTHes % Impact" );
572  nhm_caa_events_displ.push_back( "" );
573  nhm_caa_events_displ.push_back( "Total Branch Instructions Executed" );
574  nhm_caa_events_displ.push_back( "% of Mispredicted Branches" );
575  nhm_caa_events_displ.push_back( "" );
576  nhm_caa_events_displ.push_back( "Direct Near Calls % of Total Branches Executed" );
577  nhm_caa_events_displ.push_back( "Indirect Near Calls % of Total Branches Executed" );
578  nhm_caa_events_displ.push_back( "Indirect Near Non-Calls % of Total Branches Executed" );
579  nhm_caa_events_displ.push_back( "All Near Calls % of Total Branches Executed" );
580  nhm_caa_events_displ.push_back( "All Non Calls % of Total Branches Executed" );
581  nhm_caa_events_displ.push_back( "All Returns % of Total Branches Executed" );
582  nhm_caa_events_displ.push_back( "" );
583  nhm_caa_events_displ.push_back( "Total Branch Instructions Retired" );
584  nhm_caa_events_displ.push_back( "Conditionals % of Total Branches Retired" );
585  nhm_caa_events_displ.push_back( "Near Calls % of Total Branches Retired" );
586  nhm_caa_events_displ.push_back( "" );
587  nhm_caa_events_displ.push_back( "L1 ITLB Miss Impact" );
588  nhm_caa_events_displ.push_back( "ITLB Miss Rate in %" );
589  nhm_caa_events_displ.push_back( "" );
590  nhm_caa_events_displ.push_back( "Branch Instructions" );
591  nhm_caa_events_displ.push_back( "Branch % of all Instructions" );
592  nhm_caa_events_displ.push_back( "" );
593  nhm_caa_events_displ.push_back( "Load Instructions" );
594  nhm_caa_events_displ.push_back( "Load % of all Instructions" );
595  nhm_caa_events_displ.push_back( "" );
596  nhm_caa_events_displ.push_back( "Store Instructions" );
597  nhm_caa_events_displ.push_back( "Store % of all Instructions" );
598  nhm_caa_events_displ.push_back( "" );
599  nhm_caa_events_displ.push_back( "Other Instructions" );
600  nhm_caa_events_displ.push_back( "Other % of all Instructions" );
601  nhm_caa_events_displ.push_back( "" );
602  nhm_caa_events_displ.push_back( "Packed UOPS Retired" );
603  nhm_caa_events_displ.push_back( "Packed % of all UOPS Retired" );
604 }
605 
606 void calc_nhm_deriv_values( double totalCycles ) {
607  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
608  ++it ) {
609  ( it->second )["Total Cycles"] = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
610 
611  ( it->second )["L2 Hit Impact"] = ( it->second )["MEM_LOAD_RETIRED:L2_HIT"] * I7_L2_HIT_CYCLES;
612  ( it->second )["L3 Unshared Hit Impact"] =
613  ( it->second )["MEM_LOAD_RETIRED:L3_UNSHARED_HIT"] * I7_L3_UNSHARED_HIT_CYCLES;
614  if ( ( it->second )["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] >
615  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) {
616  ( it->second )["L2 Other Core Hit Impact"] = ( ( it->second )["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] -
617  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) *
619  } else {
620  ( it->second )["L2 Other Core Hit Impact"] = 0.0;
621  }
622  ( it->second )["L2 Other Core Hit Modified Impact"] =
623  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] * I7_OTHER_CORE_L2_HITM_CYCLES;
624  ( it->second )["L3 Miss -> Local DRAM Hit Impact"] =
625  ( it->second )["MEM_UNCORE_RETIRED:LOCAL_DRAM"] * I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES;
626  ( it->second )["L3 Miss -> Remote DRAM Hit Impact"] =
627  ( it->second )["MEM_UNCORE_RETIRED:REMOTE_DRAM"] * I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES;
628  ( it->second )["L3 Miss -> Remote Cache Hit Impact"] =
629  ( it->second )["MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT"] * I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES;
630  ( it->second )["L3 Miss -> Total Impact"] = ( it->second )["L3 Miss -> Local DRAM Hit Impact"] +
631  ( it->second )["L3 Miss -> Remote DRAM Hit Impact"] +
632  ( it->second )["L3 Miss -> Remote Cache Hit Impact"];
633  ( it->second )["L1 DTLB Miss Impact"] =
634  ( it->second )["DTLB_LOAD_MISSES:WALK_COMPLETED"] * I7_L1_DTLB_WALK_COMPLETED_CYCLES;
635  ( it->second )["Counted Stalled Cycles due to Load Ops"] =
636  ( it->second )["L3 Miss -> Total Impact"] + ( it->second )["L2 Hit Impact"] +
637  ( it->second )["L1 DTLB Miss Impact"] + ( it->second )["L3 Unshared Hit Impact"] +
638  ( it->second )["L2 Other Core Hit Modified Impact"] + ( it->second )["L2 Other Core Hit Impact"];
639  ( it->second )["Cycles spent during DIV & SQRT Ops"] = ( it->second )["ARITH:CYCLES_DIV_BUSY"];
640  ( it->second )["Total Counted Stalled Cycles"] =
641  ( it->second )["Counted Stalled Cycles due to Load Ops"] + ( it->second )["Cycles spent during DIV & SQRT Ops"];
642  ( it->second )["Stalled Cycles"] =
643  ( it->second )["Total Counted Stalled Cycles"]; // TO BE FIXED when UOPS_EXECUTED:0x3f is fixed!!
644  ( it->second )["% of Total Cycles"] =
645  ( it->second )["Stalled Cycles"] * 100 / ( it->second )["CPU_CLK_UNHALTED:THREAD_P"]; // TO BE FIXED!! see above
646  ( it->second )["L3 Miss % of Load Stalls"] =
647  ( it->second )["L3 Miss -> Total Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
648  ( it->second )["L2 Hit % of Load Stalls"] =
649  ( it->second )["L2 Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
650  ( it->second )["L1 DTLB Miss % of Load Stalls"] =
651  ( it->second )["L1 DTLB Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
652  ( it->second )["L3 Unshared Hit % of Load Stalls"] =
653  ( it->second )["L3 Unshared Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
654  ( it->second )["L2 Other Core Hit % of Load Stalls"] =
655  ( it->second )["L2 Other Core Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
656  ( it->second )["L2 Other Core Hit Modified % of Load Stalls"] =
657  ( it->second )["L2 Other Core Hit Modified Impact"] * 100 /
658  ( it->second )["Counted Stalled Cycles due to Load Ops"];
659  ( it->second )["DIV & SQRT Ops % of counted Stalled Cycles"] =
660  ( it->second )["Cycles spent during DIV & SQRT Ops"] * 100 / ( it->second )["Total Counted Stalled Cycles"];
661 
662  ( it->second )["Cycles IFETCH served by Local DRAM"] =
663  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT;
664  ( it->second )["Cycles IFETCH served by L3 (Modified)"] =
665  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * I7_IFETCH_L2_MISS_L3_HITM;
666  ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] =
667  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * I7_IFETCH_L2_MISS_L3_HIT_SNOOP;
668  ( it->second )["Cycles IFETCH served by Remote L2"] =
669  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD;
670  ( it->second )["Cycles IFETCH served by Remote DRAM"] =
671  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT;
672  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"] =
673  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP;
674  ( it->second )["Total L2 IFETCH miss Impact"] =
675  ( it->second )["Cycles IFETCH served by Local DRAM"] + ( it->second )["Cycles IFETCH served by L3 (Modified)"] +
676  ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] +
677  ( it->second )["Cycles IFETCH served by Remote L2"] + ( it->second )["Cycles IFETCH served by Remote DRAM"] +
678  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"];
679  ( it->second )["Local DRAM IFECTHes % Impact"] =
680  ( it->second )["Cycles IFETCH served by Local DRAM"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
681  ( it->second )["L3 (Modified) IFECTHes % Impact"] =
682  ( it->second )["Cycles IFETCH served by L3 (Modified)"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
683  ( it->second )["L3 (Clean Snoop) IFECTHes % Impact"] = ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] *
684  100 / ( it->second )["Total L2 IFETCH miss Impact"];
685  ( it->second )["Remote L2 IFECTHes % Impact"] =
686  ( it->second )["Cycles IFETCH served by Remote L2"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
687  ( it->second )["Remote DRAM IFECTHes % Impact"] =
688  ( it->second )["Cycles IFETCH served by Remote DRAM"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
689  ( it->second )["L3 (No Snoop) IFECTHes % Impact"] =
690  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
691  ( it->second )["Total L2 IFETCH misses"] = ( it->second )["L2_RQSTS:IFETCH_MISS"];
692  ( it->second )["% of IFETCHes served by Local DRAM"] =
693  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
694  ( it->second )["% of IFETCHes served by L3 (Modified)"] =
695  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
696  ( it->second )["% of IFETCHes served by L3 (Clean Snoop)"] =
697  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * 100 /
698  ( it->second )["L2_RQSTS:IFETCH_MISS"];
699  ( it->second )["% of IFETCHes served by Remote L2"] =
700  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * 100 /
701  ( it->second )["L2_RQSTS:IFETCH_MISS"];
702  ( it->second )["% of IFETCHes served by Remote DRAM"] =
703  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
704  ( it->second )["% of IFETCHes served by L3 (No Snoop)"] =
705  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
706  ( it->second )["% of L2 IFETCH misses"] =
707  ( it->second )["L2_RQSTS:IFETCH_MISS"] * 100 /
708  ( ( it->second )["L2_RQSTS:IFETCH_MISS"] + ( it->second )["L2_RQSTS:IFETCH_HIT"] );
709  ( it->second )["L1 ITLB Miss Impact"] =
710  ( it->second )["ITLB_MISSES:WALK_COMPLETED"] * I7_L1_ITLB_WALK_COMPLETED_CYCLES;
711 
712  ( it->second )["Total Branch Instructions Executed"] = ( it->second )["BR_INST_EXEC:ANY"];
713  ( it->second )["% of Mispredicted Branches"] =
714  ( it->second )["BR_MISP_EXEC:ANY"] * 100 / ( it->second )["BR_INST_EXEC:ANY"];
715  ( it->second )["Direct Near Calls % of Total Branches Executed"] =
716  ( it->second )["BR_INST_EXEC:DIRECT_NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
717  ( it->second )["Indirect Near Calls % of Total Branches Executed"] =
718  ( it->second )["BR_INST_EXEC:INDIRECT_NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
719  ( it->second )["Indirect Near Non-Calls % of Total Branches Executed"] =
720  ( it->second )["BR_INST_EXEC:INDIRECT_NON_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
721  ( it->second )["All Near Calls % of Total Branches Executed"] =
722  ( it->second )["BR_INST_EXEC:NEAR_CALLS"] * 100 / ( it->second )["Total Branch Instructions Executed"];
723  ( it->second )["All Non Calls % of Total Branches Executed"] =
724  ( it->second )["BR_INST_EXEC:NON_CALLS"] * 100 / ( it->second )["Total Branch Instructions Executed"];
725  ( it->second )["All Returns % of Total Branches Executed"] =
726  ( it->second )["BR_INST_EXEC:RETURN_NEAR"] * 100 / ( it->second )["Total Branch Instructions Executed"];
727  ( it->second )["Total Branch Instructions Retired"] = ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
728  ( it->second )["Conditionals % of Total Branches Retired"] =
729  ( it->second )["BR_INST_RETIRED:CONDITIONAL"] * 100 / ( it->second )["Total Branch Instructions Retired"];
730  ( it->second )["Near Calls % of Total Branches Retired"] =
731  ( it->second )["BR_INST_RETIRED:NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Retired"];
732 
733  ( it->second )["Instruction Starvation % of Total Cycles"] =
734  ( ( it->second )["UOPS_ISSUED:ANY CMASK=1 INV=1"] - ( it->second )["RESOURCE_STALLS:ANY"] ) * 100 /
735  ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
736  ( it->second )["% of Total Cycles spent handling FP exceptions"] =
737  ( it->second )["UOPS_DECODED:MS CMASK=1"] * 100 / ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
738  ( it->second )["# of Instructions per Call"] =
739  ( it->second )["INST_RETIRED:ANY_P"] / ( it->second )["BR_INST_EXEC:NEAR_CALLS"];
740 
741  ( it->second )["Instructions Retired"] = ( it->second )["INST_RETIRED:ANY_P"];
742  ( it->second )["ITLB Miss Rate in %"] =
743  ( ( it->second )["ITLB_MISS_RETIRED"] / ( it->second )["INST_RETIRED:ANY_P"] ) * 100;
744 
745  ( it->second )["Branch Instructions"] = ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
746  ( it->second )["Load Instructions"] = ( it->second )["MEM_INST_RETIRED:LOADS"];
747  ( it->second )["Store Instructions"] = ( it->second )["MEM_INST_RETIRED:STORES"];
748  ( it->second )["Other Instructions"] =
749  ( it->second )["Instructions Retired"] - ( it->second )["MEM_INST_RETIRED:LOADS"] -
750  ( it->second )["MEM_INST_RETIRED:STORES"] - ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
751  ( it->second )["Packed UOPS Retired"] =
752  ( it->second )["SSEX_UOPS_RETIRED:PACKED_DOUBLE"] + ( it->second )["SSEX_UOPS_RETIRED:PACKED_SINGLE"];
753  ( it->second )["CPI"] = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] / ( it->second )["INST_RETIRED:ANY_P"];
754 
755  double localPerformanceImprovement = ( it->second )["CPI"] / EXPECTED_CPI;
756  double cyclesAfterImprovement = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] / localPerformanceImprovement;
757  double totalCyclesAfterImprovement =
758  totalCycles - ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] + cyclesAfterImprovement;
759  ( it->second )["iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
760 
761  ( it->second )["Load % of all Instructions"] =
762  ( it->second )["MEM_INST_RETIRED:LOADS"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
763  ( it->second )["Store % of all Instructions"] =
764  ( it->second )["MEM_INST_RETIRED:STORES"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
765  ( it->second )["Branch % of all Instructions"] =
766  ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
767  ( it->second )["Other % of all Instructions"] =
768  ( it->second )["Other Instructions"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
769 
770  ( it->second )["Packed % of all UOPS Retired"] =
771  ( it->second )["Packed UOPS Retired"] * 100 / ( it->second )["UOPS_RETIRED:ANY"];
772  }
773 }
774 
775 // S_module class defining the objects containing sampling results for each module
776 class S_module {
777 private:
779  unsigned int total_num_samples;
783  unsigned int cmask;
784  unsigned int inv;
785  unsigned int sp;
786 
787 public:
788  S_module() { clear(); }
789  void clear() {
790  samples.clear();
791  total_num_samples = 0;
792  sp = 0;
793  module_name = "";
794  cmask = 0;
795  inv = 0;
796  sp = 0;
797  }
798  void init( const char* name, const char* architecture, const char* event_name, unsigned int c_mask,
799  unsigned int inv_mask, unsigned int smpl_period ) {
800  module_name = name;
801  arch = architecture;
802  event = event_name;
803  cmask = c_mask;
804  inv = inv_mask;
805  sp = smpl_period;
806  }
807  void set_total( unsigned int total ) {
808  total_num_samples = total;
809  return;
810  }
811  unsigned int get_smpl_period() { return sp; }
812  unsigned int get_inv_mask() { return inv; }
813  unsigned int get_c_mask() { return cmask; }
814  const std::string& get_arch() { return arch; }
815  const std::string& get_event() { return event; }
816  void add_sample( const char* index, unsigned int value ) {
817  samples[index] += value;
818  return;
819  }
820  bool get_max( char* index, unsigned int& value ) {
821  auto max_pos = std::max_element( samples.begin(), samples.end(),
822  []( const auto& lhs, const auto& rhs ) { return lhs.second < rhs.second; } );
823  if ( max_pos == samples.end() ) return false;
824  strcpy( index, ( max_pos->first ).c_str() );
825  value = max_pos->second;
826  samples.erase( max_pos );
827  return true;
828  }
830  unsigned int get_total_num_samples() { return total_num_samples; }
831 };
832 
833 // void html_special_chars()
834 // const char *s : source string
835 // char *s_mod : destination string
836 // replaces special HTML characters with correctly escaped sequences to be used inside HTML code
837 void html_special_chars( const char* s, char* s_mod ) {
838  int n = strlen( s );
839  *s_mod = '\0';
840  for ( int i = 0; i < n; i++ ) {
841  switch ( s[i] ) {
842  case '<':
843  strcat( s_mod, "&lt;" );
844  break;
845  case '>':
846  strcat( s_mod, "&gt;" );
847  break;
848  case '&':
849  strcat( s_mod, "&amp;" );
850  break;
851  case '"':
852  strcat( s_mod, "&quot;" );
853  break;
854  default:
855  char to_app[2];
856  to_app[0] = s[i];
857  to_app[1] = '\0';
858  strcat( s_mod, to_app );
859  break;
860  }
861  }
862  return;
863 }
864 
865 // func_name()
866 // const char *demangled_symbol : string corresponding to the demangled symbol found by the read_file() function
867 // parses the argument and returns just the function name without arguments or return types
868 const char* func_name( const char* demangled_symbol ) {
869  char* operator_string_begin = const_cast<char*>( strstr( demangled_symbol, "operator" ) );
870  if ( operator_string_begin != NULL ) {
871  char* operator_string_end = operator_string_begin + 8;
872  while ( *operator_string_end == ' ' ) operator_string_end++;
873  if ( strstr( operator_string_end, "delete[]" ) == operator_string_end ) {
874  operator_string_end += 8;
875  *operator_string_end = '\0';
876  } else if ( strstr( operator_string_end, "delete" ) == operator_string_end ) {
877  operator_string_end += 6;
878  *operator_string_end = '\0';
879  } else if ( strstr( operator_string_end, "new[]" ) == operator_string_end ) {
880  operator_string_end += 5;
881  *operator_string_end = '\0';
882  } else if ( strstr( operator_string_end, "new" ) == operator_string_end ) {
883  operator_string_end += 3;
884  *operator_string_end = '\0';
885  } else if ( strstr( operator_string_end, ">>=" ) == operator_string_end ) {
886  operator_string_end += 3;
887  *operator_string_end = '\0';
888  } else if ( strstr( operator_string_end, "<<=" ) == operator_string_end ) {
889  operator_string_end += 3;
890  *operator_string_end = '\0';
891  } else if ( strstr( operator_string_end, "->*" ) == operator_string_end ) {
892  operator_string_end += 3;
893  *operator_string_end = '\0';
894  } else if ( strstr( operator_string_end, "<<" ) == operator_string_end ) {
895  operator_string_end += 2;
896  *operator_string_end = '\0';
897  } else if ( strstr( operator_string_end, ">>" ) == operator_string_end ) {
898  operator_string_end += 2;
899  *operator_string_end = '\0';
900  } else if ( strstr( operator_string_end, ">=" ) == operator_string_end ) {
901  operator_string_end += 2;
902  *operator_string_end = '\0';
903  } else if ( strstr( operator_string_end, "<=" ) == operator_string_end ) {
904  operator_string_end += 2;
905  *operator_string_end = '\0';
906  } else if ( strstr( operator_string_end, "==" ) == operator_string_end ) {
907  operator_string_end += 2;
908  *operator_string_end = '\0';
909  } else if ( strstr( operator_string_end, "!=" ) == operator_string_end ) {
910  operator_string_end += 2;
911  *operator_string_end = '\0';
912  } else if ( strstr( operator_string_end, "|=" ) == operator_string_end ) {
913  operator_string_end += 2;
914  *operator_string_end = '\0';
915  } else if ( strstr( operator_string_end, "&=" ) == operator_string_end ) {
916  operator_string_end += 2;
917  *operator_string_end = '\0';
918  } else if ( strstr( operator_string_end, "^=" ) == operator_string_end ) {
919  operator_string_end += 2;
920  *operator_string_end = '\0';
921  } else if ( strstr( operator_string_end, "%=" ) == operator_string_end ) {
922  operator_string_end += 2;
923  *operator_string_end = '\0';
924  } else if ( strstr( operator_string_end, "/=" ) == operator_string_end ) {
925  operator_string_end += 2;
926  *operator_string_end = '\0';
927  } else if ( strstr( operator_string_end, "*=" ) == operator_string_end ) {
928  operator_string_end += 2;
929  *operator_string_end = '\0';
930  } else if ( strstr( operator_string_end, "-=" ) == operator_string_end ) {
931  operator_string_end += 2;
932  *operator_string_end = '\0';
933  } else if ( strstr( operator_string_end, "+=" ) == operator_string_end ) {
934  operator_string_end += 2;
935  *operator_string_end = '\0';
936  } else if ( strstr( operator_string_end, "&&" ) == operator_string_end ) {
937  operator_string_end += 2;
938  *operator_string_end = '\0';
939  } else if ( strstr( operator_string_end, "||" ) == operator_string_end ) {
940  operator_string_end += 2;
941  *operator_string_end = '\0';
942  } else if ( strstr( operator_string_end, "[]" ) == operator_string_end ) {
943  operator_string_end += 2;
944  *operator_string_end = '\0';
945  } else if ( strstr( operator_string_end, "()" ) == operator_string_end ) {
946  operator_string_end += 2;
947  *operator_string_end = '\0';
948  } else if ( strstr( operator_string_end, "++" ) == operator_string_end ) {
949  operator_string_end += 2;
950  *operator_string_end = '\0';
951  } else if ( strstr( operator_string_end, "--" ) == operator_string_end ) {
952  operator_string_end += 2;
953  *operator_string_end = '\0';
954  } else if ( strstr( operator_string_end, "->" ) == operator_string_end ) {
955  operator_string_end += 2;
956  *operator_string_end = '\0';
957  } else if ( strstr( operator_string_end, "<" ) == operator_string_end ) {
958  operator_string_end += 1;
959  *operator_string_end = '\0';
960  } else if ( strstr( operator_string_end, ">" ) == operator_string_end ) {
961  operator_string_end += 1;
962  *operator_string_end = '\0';
963  } else if ( strstr( operator_string_end, "~" ) == operator_string_end ) {
964  operator_string_end += 1;
965  *operator_string_end = '\0';
966  } else if ( strstr( operator_string_end, "!" ) == operator_string_end ) {
967  operator_string_end += 1;
968  *operator_string_end = '\0';
969  } else if ( strstr( operator_string_end, "+" ) == operator_string_end ) {
970  operator_string_end += 1;
971  *operator_string_end = '\0';
972  } else if ( strstr( operator_string_end, "-" ) == operator_string_end ) {
973  operator_string_end += 1;
974  *operator_string_end = '\0';
975  } else if ( strstr( operator_string_end, "*" ) == operator_string_end ) {
976  operator_string_end += 1;
977  *operator_string_end = '\0';
978  } else if ( strstr( operator_string_end, "/" ) == operator_string_end ) {
979  operator_string_end += 1;
980  *operator_string_end = '\0';
981  } else if ( strstr( operator_string_end, "%" ) == operator_string_end ) {
982  operator_string_end += 1;
983  *operator_string_end = '\0';
984  } else if ( strstr( operator_string_end, "^" ) == operator_string_end ) {
985  operator_string_end += 1;
986  *operator_string_end = '\0';
987  } else if ( strstr( operator_string_end, "&" ) == operator_string_end ) {
988  operator_string_end += 1;
989  *operator_string_end = '\0';
990  } else if ( strstr( operator_string_end, "|" ) == operator_string_end ) {
991  operator_string_end += 1;
992  *operator_string_end = '\0';
993  } else if ( strstr( operator_string_end, "," ) == operator_string_end ) {
994  operator_string_end += 1;
995  *operator_string_end = '\0';
996  } else if ( strstr( operator_string_end, "=" ) == operator_string_end ) {
997  operator_string_end += 1;
998  *operator_string_end = '\0';
999  }
1000  return operator_string_begin;
1001  }
1002  char* end_of_demangled_name = const_cast<char*>( strrchr( demangled_symbol, ')' ) );
1003  if ( end_of_demangled_name != NULL ) {
1004  int pars = 1;
1005  char c;
1006  while ( pars > 0 && end_of_demangled_name != demangled_symbol ) {
1007  c = *( --end_of_demangled_name );
1008  if ( c == ')' ) {
1009  pars++;
1010  } else if ( c == '(' ) {
1011  pars--;
1012  }
1013  }
1014  } else {
1015  return demangled_symbol;
1016  }
1017  char* end_of_func_name = end_of_demangled_name;
1018  if ( end_of_func_name != NULL ) {
1019  *end_of_func_name = '\0';
1020  char c = *( --end_of_func_name );
1021  if ( c == '>' ) {
1022  int pars = 1;
1023  while ( pars > 0 && end_of_func_name != demangled_symbol ) {
1024  c = *( --end_of_func_name );
1025  if ( c == '>' ) {
1026  pars++;
1027  } else if ( c == '<' ) {
1028  pars--;
1029  }
1030  }
1031  *end_of_func_name = '\0';
1032  }
1033  c = *( --end_of_func_name );
1034  while ( isalnum( c ) || c == '_' || c == '~' ) { c = *( --end_of_func_name ); }
1035  return ++end_of_func_name;
1036  }
1037  return demangled_symbol;
1038 }
1039 
1040 // put_module()
1041 // S_module *cur_module : pointer to the current module object to be written out in to HTML file
1042 // const char *event : name of architectural event being analysed
1043 // const char *dir : directory where sampling results input files are located
1044 // creates or updates the HTML output file using information contained inside the module object given as a parameter
1045 void put_S_module( S_module* cur_module, const char* dir ) {
1046  char module_name[MAX_MODULE_NAME_LENGTH];
1047  bzero( module_name, MAX_MODULE_NAME_LENGTH );
1048  strcpy( module_name, ( cur_module->get_module_name() ).c_str() );
1049  char module_filename[MAX_FILENAME_LENGTH];
1050  bzero( module_filename, MAX_FILENAME_LENGTH );
1051  strcpy( module_filename, dir );
1052  strcat( module_filename, "/HTML/" );
1053  strcat( module_filename, module_name );
1054  strcat( module_filename, ".html" );
1055  char event[MAX_EVENT_NAME_LENGTH];
1056  bzero( event, MAX_EVENT_NAME_LENGTH );
1057  strcpy( event, ( cur_module->get_event() ).c_str() );
1058  std::map<std::string, unsigned int>::iterator result = modules_tot_samples.find( cur_module->get_module_name() );
1059  FILE* module_file;
1060  if ( result == modules_tot_samples.end() ) // not found
1061  {
1062  if ( ( !strcmp( event, "UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1063  ( !strcmp( event, "CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1064  modules_tot_samples.insert(
1066  } else {
1067  modules_tot_samples.insert( std::pair<std::string, unsigned int>( cur_module->get_module_name(), 0 ) );
1068  }
1069  module_file = fopen( module_filename, "w" );
1070  if ( module_file == NULL ) {
1071  fprintf( stderr, "ERROR: Cannot create file %s!!!\naborting...\n", module_filename );
1072  exit( 1 );
1073  }
1074  fprintf( module_file, "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
1075  "\"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1076  fprintf( module_file, "<html>\n" );
1077  fprintf( module_file, "<head>\n" );
1078  fprintf( module_file, "<title>\n" );
1079  fprintf( module_file, "%s\n", module_name );
1080  fprintf( module_file, "</title>\n" );
1081  fprintf( module_file, "</head>\n" );
1082  fprintf( module_file, "<body>\n" );
1083  fprintf( module_file, "<h2>%s</h2><br/>Events Sampled:<br/>\n", module_name );
1084  fprintf( module_file, "<ul>\n" );
1085  for ( std::vector<std::string>::const_iterator it = S_events.begin(); it != S_events.end(); ++it ) {
1086  fprintf( module_file, "<li><a href=\"#%s\">%s</a></li>\n", it->c_str(), it->c_str() );
1087  }
1088  fprintf( module_file, "</ul>\n" );
1089  } // if(result == modules_tot_samples.end()) //not found
1090  else {
1091  if ( ( !strcmp( event, "UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1092  ( !strcmp( event, "CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1093  modules_tot_samples[cur_module->get_module_name()] = cur_module->get_total_num_samples();
1094  }
1095  module_file = fopen( module_filename, "a" );
1096  } // else:: if(result != modules_tot_samples.end()) //found!!
1097  char event_str[MAX_EVENT_NAME_LENGTH];
1098  bzero( event_str, MAX_EVENT_NAME_LENGTH );
1099  strcpy( event_str, event );
1100  if ( cur_module->get_c_mask() > 0 ) {
1101  sprintf( event_str + strlen( event_str ), " CMASK=%u", cur_module->get_c_mask() );
1102  }
1103  if ( cur_module->get_inv_mask() > 0 ) {
1104  sprintf( event_str + strlen( event_str ), " INV=%u", cur_module->get_inv_mask() );
1105  }
1106  fprintf( module_file, "<a name=\"%s\"><a>\n", event_str );
1107  fprintf( module_file, "<table cellpadding=\"5\">\n" );
1108  fprintf( module_file, "<tr bgcolor=\"#EEEEEE\">\n" );
1109  fprintf( module_file,
1110  "<th colspan=\"6\" align=\"left\">%s -- cmask: %u -- invmask: %u -- Total Samples: %u -- "
1111  "Sampling Period: %u</th>\n",
1112  event, cur_module->get_c_mask(), cur_module->get_inv_mask(), cur_module->get_total_num_samples(),
1113  cur_module->get_smpl_period() );
1114  fprintf( module_file, "</tr>\n" );
1115  fprintf( module_file, "<tr bgcolor=\"#EEEEEE\">\n" );
1116  fprintf( module_file, "<th align=\"left\">Samples</th>\n" );
1117  fprintf( module_file, "<th align=\"left\">Percentage</th>\n" );
1118  fprintf( module_file, "<th align=\"left\">Symbol Name</th>\n" );
1119  fprintf( module_file, "<th align=\"left\">Library Name</th>\n" );
1120  fprintf( module_file, "<th align=\"left\">Complete Signature</th>\n" );
1121  fprintf( module_file, "<th align=\"left\">Library Pathname</th>\n" );
1122  fprintf( module_file, "</tr>\n" );
1123  for ( int j = 0; j < 20; j++ ) {
1124  char sym[MAX_SYM_LENGTH];
1125  char sym_mod[MAX_SYM_MOD_LENGTH];
1126  char lib[MAX_LIB_LENGTH];
1127  char lib_mod[MAX_LIB_MOD_LENGTH];
1128  char simple_sym[MAX_SIMPLE_SYM_LENGTH];
1129  char simple_sym_mod[MAX_SIMPLE_SYM_MOD_LENGTH];
1130  char simple_lib[MAX_SIMPLE_LIB_LENGTH];
1131  char simple_lib_mod[MAX_SIMPLE_LIB_MOD_LENGTH];
1132 
1133  bzero( sym, MAX_SYM_LENGTH );
1134  bzero( sym_mod, MAX_SYM_MOD_LENGTH );
1135  bzero( lib, MAX_LIB_LENGTH );
1136  bzero( lib_mod, MAX_LIB_MOD_LENGTH );
1137  bzero( simple_sym, MAX_SIMPLE_SYM_LENGTH );
1138  bzero( simple_sym_mod, MAX_SIMPLE_SYM_MOD_LENGTH );
1139  bzero( simple_lib, MAX_SIMPLE_LIB_LENGTH );
1140  bzero( simple_lib_mod, MAX_SIMPLE_LIB_MOD_LENGTH );
1141 
1143  bzero( index, MAX_SAMPLE_INDEX_LENGTH );
1144  unsigned int value;
1145  bool res = cur_module->get_max( index, value );
1146  if ( !res ) break;
1147  char* sym_end = strchr( index, '%' );
1148  if ( sym_end == NULL ) // error
1149  {
1150  fprintf( stderr, "ERROR: Invalid sym and lib name! : %s\naborting...\n", index );
1151  exit( 1 );
1152  }
1153  memcpy( sym, index, strlen( index ) - strlen( sym_end ) );
1154  strcpy( lib, sym_end + 1 );
1155  char temp[MAX_SYM_LENGTH];
1156  bzero( temp, MAX_SYM_LENGTH );
1157  strcpy( temp, sym );
1158  strcpy( simple_sym, ( func_name( temp ) ) );
1159  if ( strrchr( lib, '/' ) != NULL && *( strrchr( lib, '/' ) + 1 ) != '\0' ) {
1160  strcpy( simple_lib, strrchr( lib, '/' ) + 1 );
1161  } else {
1162  strcpy( simple_lib, lib );
1163  }
1164  if ( j % 2 != 0 ) {
1165  fprintf( module_file, "<tr bgcolor=\"#FFFFCC\">\n" );
1166  } else {
1167  fprintf( module_file, "<tr bgcolor=\"#CCFFCC\">\n" );
1168  }
1169  fprintf( module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%u</td>\n", value );
1170  fprintf( module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%f%%</td>\n",
1171  ( ( (double)( value ) ) / ( (double)( cur_module->get_total_num_samples() ) ) ) * 100 );
1172  html_special_chars( simple_sym, simple_sym_mod );
1173  html_special_chars( simple_lib, simple_lib_mod );
1174  html_special_chars( sym, sym_mod );
1175  html_special_chars( lib, lib_mod );
1176  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_sym_mod );
1177  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_lib_mod );
1178  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", sym_mod );
1179  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n</tr>\n", lib_mod );
1180  }
1181  fprintf( module_file, "</table><br/><br/>\n" );
1182  int res = fclose( module_file );
1183  if ( res ) {
1184  fprintf( stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1185  exit( 1 );
1186  }
1187  return;
1188 }
1189 
1190 // read_S_file()
1191 // const char *dir : directory where sampling results input files are located
1192 // const char *filename : name of the current file to analyse
1193 // reads content of a gzipped sampling result file, finds names of symbols inside libraries using their offsets,
1194 // demangles them to make them human-readable, creates the module objects (with their sampling values),
1195 // and calls the put_module() function to create (or update) the corresponding HTML output file
1196 // returns 0 on success
1197 int read_S_file( const char* dir, const char* filename ) {
1198  char line[MAX_LINE_LENGTH];
1199  char event[MAX_EVENT_NAME_LENGTH];
1200  char arch[MAX_ARCH_NAME_LENGTH];
1201  unsigned int cmask;
1202  unsigned int inv;
1203  unsigned int sp;
1204  char cur_module_name[MAX_MODULE_NAME_LENGTH];
1205  bzero( line, MAX_LINE_LENGTH );
1206  bzero( event, MAX_EVENT_NAME_LENGTH );
1207  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1208  bzero( arch, MAX_ARCH_NAME_LENGTH );
1209 
1210  S_module* cur_module = new S_module();
1211  unsigned int module_num = 0;
1212 
1213  char path_name[MAX_FILENAME_LENGTH];
1214  bzero( path_name, MAX_FILENAME_LENGTH );
1215  strcpy( path_name, dir );
1216  strcat( path_name, "/" );
1217  strcat( path_name, filename );
1218  gzFile res_file = gzopen( path_name, "rb" );
1219 
1220  if ( res_file != NULL ) {
1221  bzero( line, MAX_LINE_LENGTH );
1222  gzgets( res_file, line, MAX_LINE_LENGTH );
1223  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1224  bzero( event, MAX_EVENT_NAME_LENGTH );
1225  const std::string fmt = std::format( "%{:}s %{:}s %u %u %u", MAX_ARCH_NAME_LENGTH - 1, MAX_EVENT_NAME_LENGTH - 1 );
1226  sscanf( line, fmt.c_str(), arch, event, &cmask, &inv, &sp );
1227  if ( !strcmp( arch, "NHM" ) )
1228  nehalem = true;
1229  else
1230  nehalem = false;
1231  bzero( line, MAX_LINE_LENGTH );
1232  while ( gzgets( res_file, line, MAX_LINE_LENGTH ) != Z_NULL ) {
1233  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1234  if ( strchr( line, ' ' ) == NULL ) // module
1235  {
1236  if ( module_num > 0 ) {
1237  put_S_module( cur_module, dir );
1238  cur_module->clear();
1239  }
1240  module_num++;
1241  char* end_sym = strchr( line, '%' );
1242  if ( end_sym == NULL ) // error
1243  {
1244  fprintf( stderr, "ERROR: Invalid module name. \nLINE: %s\naborting...\n", line );
1245  exit( 1 );
1246  }
1247  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1248  memcpy( cur_module_name, line, strlen( line ) - strlen( end_sym ) );
1249  cur_module->init( cur_module_name, arch, event, cmask, inv, sp );
1250  cur_module->set_total( atoi( end_sym + 1 ) );
1251  } // module
1252  else // symbol, libName, libOffset, value
1253  {
1254  unsigned int value = 0, libOffset = 0;
1255  char symbol[MAX_SYM_LENGTH];
1256  char libName[MAX_LIB_LENGTH];
1257  char final_sym[MAX_SYM_MOD_LENGTH];
1258  char final_lib[MAX_LIB_MOD_LENGTH];
1259  bzero( symbol, MAX_SYM_LENGTH );
1260  bzero( libName, MAX_LIB_LENGTH );
1261  bzero( final_sym, MAX_SYM_MOD_LENGTH );
1262  bzero( final_lib, MAX_LIB_MOD_LENGTH );
1263 
1264  const std::string fmt = std::format( "%{:}s %{:}s %u %u", MAX_SYM_LENGTH - 1, MAX_LIB_LENGTH - 1 );
1265  sscanf( line, fmt.c_str(), symbol, libName, &libOffset, &value );
1266  char realPathName_s[FILENAME_MAX];
1267  bzero( realPathName_s, FILENAME_MAX );
1268  char* realPathName = realpath( libName, realPathName_s );
1269  if ( realPathName != NULL && strlen( realPathName ) > 0 ) {
1271  result = libsInfo.find( realPathName );
1272  if ( result == libsInfo.end() ) { libsInfo[realPathName] = FileInfo( realPathName, true ); }
1273  const char* temp_sym = libsInfo[realPathName].symbolByOffset( libOffset );
1274  if ( temp_sym != NULL && strlen( temp_sym ) > 0 ) {
1275  int status;
1276  char* demangled_symbol = abi::__cxa_demangle( temp_sym, NULL, NULL, &status );
1277  if ( status == 0 ) {
1278  strcpy( final_sym, demangled_symbol );
1279  free( demangled_symbol );
1280  } else {
1281  strcpy( final_sym, temp_sym );
1282  }
1283  } else {
1284  strcpy( final_sym, "???" );
1285  }
1286  strcpy( final_lib, realPathName );
1287  } else {
1288  strcpy( final_sym, symbol );
1289  strcpy( final_lib, libName );
1290  }
1291  char index[MAX_LINE_LENGTH];
1292  bzero( index, MAX_LINE_LENGTH );
1293  strcpy( index, final_sym );
1294  strcat( index, "%" );
1295  strcat( index, final_lib );
1296  cur_module->add_sample( index, value );
1297  } // symbol, libName, libOffset, value
1298  bzero( line, MAX_LINE_LENGTH );
1299  } // while(gzgets(res_file, line, MAX_LINE_LENGTH)!=Z_NULL)
1300  put_S_module( cur_module, dir ); // last module!
1301  cur_module->clear();
1302  gzclose( res_file );
1303  } // if(res_file != NULL)
1304  else {
1305  fprintf( stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename );
1306  exit( 1 );
1307  }
1308  delete cur_module; // delete it!
1309  return 0;
1310 }
1311 
1312 int read_S_events( const char* dir, const char* filename ) {
1313  char event[MAX_EVENT_NAME_LENGTH];
1314  char arch[MAX_ARCH_NAME_LENGTH];
1315  char line[MAX_LINE_LENGTH];
1316  char cmask_str[MAX_CMASK_STR_LENGTH];
1317  char inv_str[MAX_INV_STR_LENGTH];
1318  char sp_str[MAX_SP_STR_LENGTH];
1319  bzero( line, MAX_LINE_LENGTH );
1320  bzero( event, MAX_EVENT_NAME_LENGTH );
1321  bzero( arch, MAX_ARCH_NAME_LENGTH );
1322  bzero( cmask_str, MAX_CMASK_STR_LENGTH );
1323  bzero( inv_str, MAX_INV_STR_LENGTH );
1324  bzero( sp_str, MAX_SP_STR_LENGTH );
1325  char path_name[MAX_FILENAME_LENGTH];
1326  bzero( path_name, MAX_FILENAME_LENGTH );
1327  strcpy( path_name, dir );
1328  strcat( path_name, "/" );
1329  strcat( path_name, filename );
1330  gzFile res_file = gzopen( path_name, "rb" );
1331  if ( res_file != NULL ) {
1332  bzero( line, MAX_LINE_LENGTH );
1333  gzgets( res_file, line, MAX_LINE_LENGTH );
1334  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1335  bzero( event, MAX_EVENT_NAME_LENGTH );
1336  const std::string fmt =
1337  std::format( "%{:}s %{:}s %{:}s %{:}s %{:}s\n", MAX_ARCH_NAME_LENGTH - 1, MAX_EVENT_NAME_LENGTH - 1,
1339  sscanf( line, fmt.c_str(), arch, event, cmask_str, inv_str, sp_str );
1340  std::string event_str( event );
1341  if ( atoi( cmask_str ) > 0 ) {
1342  event_str += " CMASK=";
1343  event_str += cmask_str;
1344  }
1345  if ( atoi( inv_str ) > 0 ) {
1346  event_str += " INV=";
1347  event_str += inv_str;
1348  }
1349  S_events.push_back( event_str );
1350  } // if(res_file != NULL)
1351  else {
1352  fprintf( stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename );
1353  exit( 1 );
1354  }
1355  gzclose( res_file );
1356  return 0;
1357 }
1358 
1359 // finalize_html_pages()
1360 // const char *dir : directory contating sampling result files
1361 // puts footers in module HTML pages and creates index file
1362 int finalize_S_html_pages( const char* dir ) {
1363  for ( std::map<std::string, unsigned int>::const_iterator i = modules_tot_samples.begin();
1364  i != modules_tot_samples.end(); ++i ) {
1365  char module_filename[MAX_FILENAME_LENGTH];
1366  strcpy( module_filename, dir );
1367  strcat( module_filename, "/HTML/" );
1368  strcat( module_filename, ( i->first ).c_str() );
1369  strcat( module_filename, ".html" );
1370  FILE* module_file = fopen( module_filename, "a" );
1371  if ( module_file == NULL ) {
1372  fprintf( stderr, "ERROR: Unable to append to file: %s\naborting...\n", module_filename );
1373  exit( 1 );
1374  }
1375  fprintf( module_file, "</body>\n</html>\n" );
1376  if ( fclose( module_file ) ) {
1377  fprintf( stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1378  exit( 1 );
1379  }
1380  }
1381  return 0;
1382 }
1383 
1384 // read_file()
1385 // const char *filename : input file to analyse
1386 // analyses the event file and updates the list of modules with counter information found in the file
1387 // returns the number of modules found in the file
1388 int read_C_file( const char* dir, const char* filename ) {
1389  char event[MAX_EVENT_NAME_LENGTH];
1390  char arch[MAX_ARCH_NAME_LENGTH];
1391  char line[MAX_LINE_LENGTH];
1392  char cmask_str[MAX_CMASK_STR_LENGTH];
1393  char inv_str[MAX_INV_STR_LENGTH];
1394  char sp_str[MAX_SP_STR_LENGTH];
1395  char cur_module_name[MAX_MODULE_NAME_LENGTH];
1396  bzero( line, MAX_LINE_LENGTH );
1397  bzero( event, MAX_EVENT_NAME_LENGTH );
1398  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1399  bzero( arch, MAX_ARCH_NAME_LENGTH );
1400  bzero( line, MAX_LINE_LENGTH );
1401  bzero( cmask_str, MAX_CMASK_STR_LENGTH );
1402  bzero( inv_str, MAX_INV_STR_LENGTH );
1403  bzero( sp_str, MAX_SP_STR_LENGTH );
1404  int number_of_modules = 0;
1405  long cur_sum = 0;
1406  int no_of_values = 0;
1407  char path_name[MAX_FILENAME_LENGTH];
1408  bzero( path_name, MAX_FILENAME_LENGTH );
1409  strcpy( path_name, dir );
1410  strcat( path_name, "/" );
1411  strcat( path_name, filename );
1412  FILE* fp = fopen( path_name, "r" );
1413  const std::string fmt1 =
1414  std::format( "%{:}s %{:}s %{:}s %{:}s %{:}s\n", MAX_ARCH_NAME_LENGTH - 1, MAX_EVENT_NAME_LENGTH - 1,
1416  int stat = fscanf( fp, fmt1.c_str(), arch, event, cmask_str, inv_str, sp_str );
1417  if ( stat != 5 ) {
1418  std::cerr << "ERROR: failed to parse " << path_name << std::endl;
1419  exit( 1 );
1420  }
1421  if ( !strcmp( arch, "NHM" ) )
1422  nehalem = true;
1423  else
1424  nehalem = false;
1425  std::string event_str( event );
1426  if ( atoi( cmask_str ) > 0 ) {
1427  event_str += " CMASK=";
1428  event_str += cmask_str;
1429  }
1430  if ( atoi( inv_str ) > 0 ) {
1431  event_str += " INV=";
1432  event_str += inv_str;
1433  }
1434  C_events.push_back( event_str );
1435  const std::string fmt2 = std::format( "%{:}s\n", MAX_LINE_LENGTH - 1 );
1436  while ( fscanf( fp, fmt2.c_str(), line ) != EOF ) {
1437  if ( isalpha( line[0] ) ) // module
1438  {
1439  if ( number_of_modules > 0 ) {
1440  C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values;
1441  cur_sum = 0;
1442  no_of_values = 0;
1443  }
1444  strcpy( cur_module_name, line );
1445  number_of_modules++;
1446  } else if ( isdigit( line[0] ) ) // value
1447  {
1448  cur_sum += strtol( line, NULL, 10 );
1449  no_of_values++;
1450  }
1451  }
1452  C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values; // last module
1453  fclose( fp );
1454  return number_of_modules;
1455 }
1456 
1457 void put_C_header( FILE* fp, std::vector<std::string>& columns ) {
1458  fprintf(
1459  fp,
1460  "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1461  fprintf( fp, "<html>\n" );
1462  fprintf( fp, "<head>\n" );
1463  fprintf( fp, "<title>\n" );
1464  fprintf( fp, "Analysis Result\n" );
1465  fprintf( fp, "</title>\n" );
1466  fprintf( fp, "<script src=\"sorttable.js\"></script>\n" );
1467  fprintf( fp, "<style>\ntable.sortable thead "
1468  "{\nbackground-color:#eee;\ncolor:#666666;\nfont-weight:bold;\ncursor:default;\nfont-family:courier;\n}"
1469  "\n</style>\n" );
1470  fprintf( fp, "</head>\n" );
1471  fprintf( fp, "<body link=\"black\">\n" );
1472  fprintf( fp, "<h1>RESULTS:</h1><br/>Click for detailed symbol view...<p/>\n" );
1473  fprintf( fp, "<table class=\"sortable\" cellpadding=\"5\">\n" );
1474  fprintf( fp, "<tr>\n" );
1475  fprintf( fp, "<th>MODULE NAME</th>\n" );
1476  for ( std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it ) {
1477  if ( strlen( it->c_str() ) == 0 )
1478  fprintf( fp, "<th bgcolor=\"#FFFFFF\">&nbsp;</th>\n" );
1479  else
1480  fprintf( fp, "<th>%s</th>\n", ( *it ).c_str() );
1481  }
1482  fprintf( fp, "</tr>\n" );
1483  return;
1484 }
1485 
1486 void put_C_modules( FILE* fp, std::vector<std::string>& columns ) {
1487  int index = 0;
1488  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1489  ++it ) {
1490  if ( index % 2 )
1491  fprintf( fp, "<tr bgcolor=\"#FFFFCC\">\n" );
1492  else
1493  fprintf( fp, "<tr bgcolor=\"#CCFFCC\">\n" );
1494  fprintf( fp, "<td style=\"font-family:monospace;font-size:large;color:Black\"><a href=\"%s.html\">%s</a></td>\n",
1495  ( it->first ).c_str(), ( it->first ).c_str() );
1496  for ( std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt ) {
1497  if ( strlen( jt->c_str() ) == 0 ) {
1498  fprintf( fp, "<td bgcolor=\"#FFFFFF\">&nbsp;</td>" );
1499  } else {
1500  if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1501  fprintf( stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1502  exit( 1 );
1503  }
1504  fprintf( fp, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\" align=\"right\">%.2f</td>\n",
1505  ( it->second )[*jt] );
1506  }
1507  }
1508  fprintf( fp, "</tr>\n" );
1509  index++;
1510  }
1511 }
1512 
1513 void put_C_footer( FILE* fp ) {
1514  fprintf( fp, "</table>\n</body>\n</html>\n" );
1515  return;
1516 }
1517 
1518 void put_C_header_csv( FILE* fp, std::vector<std::string>& columns ) {
1519  fprintf( fp, "MODULE NAME" );
1520  for ( std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it ) {
1521  if ( strlen( it->c_str() ) == 0 ) {
1522  } else
1523  fprintf( fp, ",%s", ( *it ).c_str() );
1524  }
1525  fprintf( fp, "\n" );
1526  return;
1527 }
1528 
1529 void put_C_modules_csv( FILE* fp, std::vector<std::string>& columns ) {
1530  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1531  ++it ) {
1532  fprintf( fp, "%s", ( it->first ).c_str() );
1533  for ( std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt ) {
1534  if ( strlen( jt->c_str() ) == 0 ) {
1535  } else {
1536  if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1537  fprintf( stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1538  exit( 1 );
1539  }
1540  fprintf( fp, ",%.2f", ( it->second )[*jt] );
1541  }
1542  }
1543  fprintf( fp, "\n" );
1544  }
1545 }
1546 
1547 // normalize()
1548 // struct C_module *mod : pointer to the head of the list of modules
1549 // int counter : event selected (see C_module class for which event corresponds to which number)
1550 // int number_of_modules : length of the list
1551 // double value : value to be normalized
1552 // double normalizeTo : value to which the value above should be normalized
1553 // returns the normalized value
1554 double normalize( const std::string& field, double value, double normalizeTo ) {
1555  double max = 0;
1556  double counter_value;
1557  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1558  ++it ) {
1559  counter_value = ( it->second )[field];
1560  if ( max < counter_value ) max = counter_value;
1561  }
1562  if ( value > 0 && max > 0 && normalizeTo > 0 ) {
1563  return 1. * value / max * normalizeTo;
1564  } else
1565  return 0;
1566 }
1567 
1568 // calc_post_deriv_values()
1569 // struct C_module *mod : pointer to the head of the list of modules
1570 // double totalCycles : total cycles spent by all the modules
1571 // int number_of_modules : length of the list
1572 // calculates the iFactor of each module
1574  if ( nehalem ) {
1575  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1576  ++it ) {
1577  double simdnorm =
1578  1. - normalize( "Packed % of all UOPS Retired", ( it->second )["Packed % of all UOPS Retired"], 1 );
1579  double misspnorm = normalize( "% of Mispredicted Branches", ( it->second )["% of Mispredicted Branches"], 1 );
1580  double stallnorm = normalize( "Stalled Cycles", ( it->second )["Stalled Cycles"], 1 );
1581  ( it->second )["iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1582  }
1583  } else {
1584  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1585  ++it ) {
1586  double simdnorm =
1587  1. - normalize( "Packed SIMD % of all Instructions", ( it->second )["Packed SIMD % of all Instructions"], 1 );
1588  double misspnorm = normalize( "% of Mispredicted Branches", ( it->second )["% of Mispredicted Branches"], 1 );
1589  double stallnorm = normalize( "Stalled Cycles", ( it->second )["Stalled Cycles"], 1 );
1590  ( it->second )["iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1591  }
1592  }
1593 }
1594 
1595 // getTotalCycles()
1596 // struct C_module *mod : pointer to the head of the list of modules
1597 // int number_of_modules : length of the list
1598 // returns the number of total cycles spent by all the modules
1599 double getTotalCycles() {
1600  double sum = 0;
1601  if ( nehalem ) {
1602  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1603  ++it ) {
1604  sum += ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
1605  }
1606  } else {
1607  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1608  ++it ) {
1609  sum += ( it->second )["UNHALTED_CORE_CYCLES"];
1610  }
1611  }
1612  return sum;
1613 }
1614 
1615 // main()
1616 // takes as argument the directory containing results
1617 // and produces the HTML directory inside of it containing browsable statistics
1618 int main( int argc, char* argv[] ) {
1619  if ( argc < 2 || argc > 4 ) {
1620  printf( "\n\nUsage: %s DIRECTORY [--caa] [--csv]\n\n", argv[0] );
1621  exit( 1 );
1622  }
1623 
1624  bool caa = false;
1625  bool csv = false;
1626  for ( int i = 2; i < argc; i++ ) {
1627  if ( !strcmp( argv[i], "--caa" ) ) caa = true;
1628  if ( !strcmp( argv[i], "--csv" ) ) csv = true;
1629  }
1630 
1631  char dir[MAX_FILENAME_LENGTH];
1632  strcpy( dir, argv[1] );
1633  if ( !csv ) {
1634  strcat( dir, "/HTML" );
1635  int res = mkdir( dir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH );
1636  if ( res != 0 ) {
1637  fprintf( stderr, "ERROR: Cannot create directory %s\naborting...\n", dir );
1638  exit( 1 );
1639  }
1640  }
1641 
1642  DIR* dp;
1643  struct dirent* dirp;
1644  int num_of_modules = 0;
1645  if ( ( dp = opendir( argv[1] ) ) == NULL ) {
1646  printf( "Error(%d) opening %s\n", errno, argv[1] );
1647  return errno;
1648  }
1649  while ( ( dirp = readdir( dp ) ) != NULL ) {
1650  if ( strstr( dirp->d_name, "_S_" ) != NULL && strstr( dirp->d_name, ".txt.gz" ) != NULL && !csv ) {
1651  if ( read_S_events( argv[1], dirp->d_name ) ) {
1652  fprintf( stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1653  exit( 1 );
1654  }
1655  }
1656  }
1657  closedir( dp );
1658  sort( S_events.begin(), S_events.end() );
1659  if ( ( dp = opendir( argv[1] ) ) == NULL ) {
1660  printf( "Error(%d) opening %s\n", errno, argv[1] );
1661  return errno;
1662  }
1663  while ( ( dirp = readdir( dp ) ) != NULL ) {
1664  if ( strstr( dirp->d_name, "_S_" ) != NULL && strstr( dirp->d_name, ".txt.gz" ) != NULL && !csv ) {
1665  if ( read_S_file( argv[1], dirp->d_name ) ) {
1666  fprintf( stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1667  exit( 1 );
1668  }
1669  } else if ( strstr( dirp->d_name, "_C_" ) != NULL && strstr( dirp->d_name, ".txt" ) != NULL ) {
1670  int res = read_C_file( argv[1], dirp->d_name );
1671  if ( res > num_of_modules ) { num_of_modules = res; }
1672  }
1673  }
1674  closedir( dp );
1675 
1676  if ( !csv ) {
1677  if ( finalize_S_html_pages( argv[1] ) ) {
1678  fprintf( stderr, "ERROR: Cannot finalize HTML pages!!!\naborting...\n" );
1679  exit( 1 );
1680  }
1681  }
1682 
1683  char filepath[MAX_FILENAME_LENGTH];
1684  bzero( filepath, MAX_FILENAME_LENGTH );
1685  if ( !csv )
1686  sprintf( filepath, "%s/HTML/index.html", argv[1] );
1687  else
1688  sprintf( filepath, "%s/results.csv", argv[1] );
1689  FILE* fp = fopen( filepath, "w" );
1690  if ( fp == NULL ) {
1691  fprintf( stderr, "ERROR: Cannot create file index.html!!!\naborting...\n" );
1692  exit( 1 );
1693  }
1694 
1695  if ( caa ) {
1696  double totalCycles;
1697  if ( !nehalem ) {
1699  if ( !check_for_core_caa_events() ) {
1700  fprintf( stderr, "(core) ERROR: One or more events for CAA missing!\naborting...\n" );
1701  exit( 1 );
1702  }
1704  totalCycles = getTotalCycles();
1705  calc_core_deriv_values( totalCycles );
1707  if ( !csv ) {
1708  put_C_header( fp, core_caa_events_displ );
1709  put_C_modules( fp, core_caa_events_displ );
1710  } else {
1711  put_C_header_csv( fp, core_caa_events_displ );
1712  put_C_modules_csv( fp, core_caa_events_displ );
1713  }
1714  } else {
1716  if ( !check_for_nhm_caa_events() ) {
1717  fprintf( stderr, "(nehalem) ERROR: One or more events for CAA missing!\naborting...\n" );
1718  exit( 1 );
1719  }
1721  totalCycles = getTotalCycles();
1722  calc_nhm_deriv_values( totalCycles );
1724  if ( !csv ) {
1725  put_C_header( fp, nhm_caa_events_displ );
1726  put_C_modules( fp, nhm_caa_events_displ );
1727  } else {
1728  put_C_header_csv( fp, nhm_caa_events_displ );
1729  put_C_modules_csv( fp, nhm_caa_events_displ );
1730  }
1731  }
1732  if ( !csv ) put_C_footer( fp );
1733  fclose( fp );
1734  } else {
1735  if ( !csv ) {
1736  put_C_header( fp, C_events );
1737  put_C_modules( fp, C_events );
1738  put_C_footer( fp );
1739  } else {
1740  put_C_header_csv( fp, C_events );
1741  put_C_modules_csv( fp, C_events );
1742  }
1743  fclose( fp );
1744  }
1745  if ( !csv ) {
1746  char src[MAX_FILENAME_LENGTH];
1747  char dst[MAX_FILENAME_LENGTH];
1748  sprintf( src, "sorttable.js" );
1749  sprintf( dst, "%s/HTML/sorttable.js", argv[1] );
1750  int fd_src = open( src, O_RDONLY );
1751  if ( fd_src == -1 ) {
1752  fprintf( stderr, "ERROR: Cannot open file \"%s\"!\naborting...\n", src );
1753  exit( 1 );
1754  }
1755  int fd_dst = open( dst, O_WRONLY | O_CREAT | O_TRUNC, 0644 );
1756  if ( fd_dst == -1 ) {
1757  fprintf( stderr, "ERROR: Cannot open file \"%s\" (%s)!\naborting...\n", dst, strerror( errno ) );
1758  exit( 1 );
1759  }
1760  char c;
1761  while ( read( fd_src, &c, 1 ) ) {
1762  if ( write( fd_dst, &c, 1 ) == -1 ) {
1763  std::cerr << "ERROR: failed to write to " << dst << std::endl;
1764  exit( 1 );
1765  }
1766  }
1767  close( fd_dst );
1768  close( fd_src );
1769  }
1770  return 0;
1771 }
MAX_CMASK_STR_LENGTH
#define MAX_CMASK_STR_LENGTH
Definition: pfm_gen_analysis.cpp:95
I7_L1_ITLB_WALK_COMPLETED_CYCLES
#define I7_L1_ITLB_WALK_COMPLETED_CYCLES
Definition: pfm_gen_analysis.cpp:61
std::max_element
T max_element(T... args)
init_core_caa_events
void init_core_caa_events()
Definition: pfm_gen_analysis.cpp:283
html_special_chars
void html_special_chars(const char *s, char *s_mod)
Definition: pfm_gen_analysis.cpp:837
I7_OTHER_CORE_L2_HITM_CYCLES
#define I7_OTHER_CORE_L2_HITM_CYCLES
Definition: pfm_gen_analysis.cpp:65
FileInfo::symbolByOffset
const char * symbolByOffset(Offset offset)
Definition: pfm_gen_analysis.cpp:161
MAX_SYM_MOD_LENGTH
#define MAX_SYM_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:83
std::string
STL class.
PipeReader::output
std::istringstream & output(void)
Definition: pfm_gen_analysis.cpp:121
put_C_modules
void put_C_modules(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1486
FileInfo::CacheItemComparator
Definition: pfm_gen_analysis.cpp:190
FileInfo::CacheItem::CacheItem
CacheItem(Offset offset, const std::string &name)
Definition: pfm_gen_analysis.cpp:182
S_module::inv
unsigned int inv
Definition: pfm_gen_analysis.cpp:784
plotBacklogPyRoot.argc
argc
Definition: plotBacklogPyRoot.py:173
std::pair
init_nhm_caa_events
void init_nhm_caa_events()
Definition: pfm_gen_analysis.cpp:307
I7_IFETCH_L2_MISS_L3_HITM
#define I7_IFETCH_L2_MISS_L3_HITM
Definition: pfm_gen_analysis.cpp:73
gaudirun.s
string s
Definition: gaudirun.py:346
std::vector< CacheItem >
std::map::find
T find(T... args)
MAX_LIB_MOD_LENGTH
#define MAX_LIB_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:85
finalize_S_html_pages
int finalize_S_html_pages(const char *dir)
Definition: pfm_gen_analysis.cpp:1362
std::vector::size
T size(T... args)
CORE_UNKNOWN_ADDR_STORE_CYCLES
#define CORE_UNKNOWN_ADDR_STORE_CYCLES
Definition: pfm_gen_analysis.cpp:55
put_C_header
void put_C_header(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1457
CORE_L1_DTLB_MISS_CYCLES
#define CORE_L1_DTLB_MISS_CYCLES
Definition: pfm_gen_analysis.cpp:53
I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES
#define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:66
Gaudi::Units::nm
constexpr double nm
Definition: SystemOfUnits.h:97
std::istringstream
STL class.
FileInfo::SymbolCache
std::vector< CacheItem > SymbolCache
Definition: pfm_gen_analysis.cpp:187
S_module::get_event
const std::string & get_event()
Definition: pfm_gen_analysis.cpp:815
FileInfo::CacheItem::OFFSET
Offset OFFSET
Definition: pfm_gen_analysis.cpp:182
gaudirun.c
c
Definition: gaudirun.py:525
std::vector::back
T back(T... args)
MAX_EVENT_NAME_LENGTH
#define MAX_EVENT_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:91
S_module::get_total_num_samples
unsigned int get_total_num_samples()
Definition: pfm_gen_analysis.cpp:830
S_module::get_arch
const std::string & get_arch()
Definition: pfm_gen_analysis.cpp:814
std::map::clear
T clear(T... args)
I7_L2_HIT_CYCLES
#define I7_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:62
EXPECTED_CPI
#define EXPECTED_CPI
Definition: pfm_gen_analysis.cpp:78
std::vector::push_back
T push_back(T... args)
S_module::get_max
bool get_max(char *index, unsigned int &value)
Definition: pfm_gen_analysis.cpp:820
compareOutputFiles.sp
sp
Definition: compareOutputFiles.py:506
I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT
#define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT
Definition: pfm_gen_analysis.cpp:70
I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD
#define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD
Definition: pfm_gen_analysis.cpp:74
fixtures.stderr
Generator[bytes, None, None] stderr(subprocess.CompletedProcess completed_process)
Definition: fixtures.py:147
read_C_file
int read_C_file(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1388
S_module::sp
unsigned int sp
Definition: pfm_gen_analysis.cpp:785
normalize
double normalize(const std::string &field, double value, double normalizeTo)
Definition: pfm_gen_analysis.cpp:1554
S_module::clear
void clear()
Definition: pfm_gen_analysis.cpp:789
CORE_L2_HIT_CYCLES
#define CORE_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:52
FileInfo::FileInfo
FileInfo(void)
Definition: pfm_gen_analysis.cpp:156
Gaudi::Utils::begin
AttribStringParser::Iterator begin(const AttribStringParser &parser)
Definition: AttribStringParser.h:136
read_S_file
int read_S_file(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1197
calc_nhm_deriv_values
void calc_nhm_deriv_values(double totalCycles)
Definition: pfm_gen_analysis.cpp:606
S_module::get_c_mask
unsigned int get_c_mask()
Definition: pfm_gen_analysis.cpp:813
PipeReader::pipe
FILE * pipe
Definition: pfm_gen_analysis.cpp:124
read_S_events
int read_S_events(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1312
MAX_SIMPLE_LIB_MOD_LENGTH
#define MAX_SIMPLE_LIB_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:89
S_module::module_name
std::string module_name
Definition: pfm_gen_analysis.cpp:780
MAX_LIB_LENGTH
#define MAX_LIB_LENGTH
Definition: pfm_gen_analysis.cpp:84
FileInfo::createOffsetMap
void createOffsetMap(void)
Definition: pfm_gen_analysis.cpp:195
PIPE_BUFFER_LENGTH
#define PIPE_BUFFER_LENGTH
Definition: pfm_gen_analysis.cpp:99
I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP
#define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP
Definition: pfm_gen_analysis.cpp:71
MAX_SIMPLE_SYM_LENGTH
#define MAX_SIMPLE_SYM_LENGTH
Definition: pfm_gen_analysis.cpp:86
ProduceConsume.j
j
Definition: ProduceConsume.py:104
std::cerr
I7_L1_DTLB_WALK_COMPLETED_CYCLES
#define I7_L1_DTLB_WALK_COMPLETED_CYCLES
Definition: pfm_gen_analysis.cpp:60
std::string::c_str
T c_str(T... args)
I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT
#define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT
Definition: pfm_gen_analysis.cpp:69
func_name
const char * func_name(const char *demangled_symbol)
Definition: pfm_gen_analysis.cpp:868
std::map::erase
T erase(T... args)
PipeReader::iss
std::unique_ptr< std::istringstream > iss
Definition: pfm_gen_analysis.cpp:125
getTotalCycles
double getTotalCycles()
Definition: pfm_gen_analysis.cpp:1599
FileInfo::CacheItem::NAME
std::string NAME
Definition: pfm_gen_analysis.cpp:184
format
GAUDI_API std::string format(const char *,...)
MsgStream format utility "a la sprintf(...)".
Definition: MsgStream.cpp:119
PipeReader
Definition: pfm_gen_analysis.cpp:101
FileInfo::next
Offset next(Offset offset)
Definition: pfm_gen_analysis.cpp:174
S_module::samples
std::map< std::string, unsigned int > samples
Definition: pfm_gen_analysis.cpp:778
CORE_LCP_STALL_CYCLES
#define CORE_LCP_STALL_CYCLES
Definition: pfm_gen_analysis.cpp:54
FileInfo::FileInfo
FileInfo(const std::string &name, bool useGdb)
Definition: pfm_gen_analysis.cpp:157
check_for_nhm_caa_events
bool check_for_nhm_caa_events()
Definition: pfm_gen_analysis.cpp:361
CORE_OVERLAPPING_CYCLES
#define CORE_OVERLAPPING_CYCLES
Definition: pfm_gen_analysis.cpp:56
std::map< std::string, unsigned int >
S_module::get_inv_mask
unsigned int get_inv_mask()
Definition: pfm_gen_analysis.cpp:812
FileInfo::NAME
std::string NAME
Definition: pfm_gen_analysis.cpp:155
put_C_footer
void put_C_footer(FILE *fp)
Definition: pfm_gen_analysis.cpp:1513
put_C_modules_csv
void put_C_modules_csv(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1529
cpluginsvc.n
n
Definition: cpluginsvc.py:234
MAX_MODULE_NAME_LENGTH
#define MAX_MODULE_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:92
S_module::get_module_name
const std::string & get_module_name()
Definition: pfm_gen_analysis.cpp:829
FileInfo::CacheItemComparator::operator()
bool operator()(const CacheItem &a, const int &b) const
Definition: pfm_gen_analysis.cpp:191
init_core_caa_events_displ
void init_core_caa_events_displ()
Definition: pfm_gen_analysis.cpp:371
S_module::add_sample
void add_sample(const char *index, unsigned int value)
Definition: pfm_gen_analysis.cpp:816
S_module::event
std::string event
Definition: pfm_gen_analysis.cpp:782
CORE_SPAN_ACROSS_CACHE_LINE_CYCLES
#define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES
Definition: pfm_gen_analysis.cpp:57
PipeReader::PipeReader
PipeReader(const char *cmd)
Definition: pfm_gen_analysis.cpp:103
MAX_SYM_LENGTH
#define MAX_SYM_LENGTH
Definition: pfm_gen_analysis.cpp:82
hivetimeline.read
def read(f, regex=".*", skipevents=0)
Definition: hivetimeline.py:32
FileInfo::CacheItem
Definition: pfm_gen_analysis.cpp:181
skipWhitespaces
bool skipWhitespaces(const char *srcbuffer, const char **destbuffer)
Definition: pfm_gen_analysis.cpp:132
ConditionsStallTest.name
name
Definition: ConditionsStallTest.py:77
std::endl
T endl(T... args)
put_C_header_csv
void put_C_header_csv(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1518
FileInfo::Offset
int Offset
Definition: pfm_gen_analysis.cpp:154
S_module::get_smpl_period
unsigned int get_smpl_period()
Definition: pfm_gen_analysis.cpp:811
std::vector::begin
T begin(T... args)
std::getline
T getline(T... args)
std::map::insert
T insert(T... args)
I7_OTHER_CORE_L2_HIT_CYCLES
#define I7_OTHER_CORE_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:64
CORE_L2_MISS_CYCLES
#define CORE_L2_MISS_CYCLES
Definition: pfm_gen_analysis.cpp:51
fmt
MAX_SIMPLE_LIB_LENGTH
#define MAX_SIMPLE_LIB_LENGTH
Definition: pfm_gen_analysis.cpp:88
S_module::arch
std::string arch
Definition: pfm_gen_analysis.cpp:781
S_module::cmask
unsigned int cmask
Definition: pfm_gen_analysis.cpp:783
MAX_LINE_LENGTH
#define MAX_LINE_LENGTH
Definition: pfm_gen_analysis.cpp:90
FileInfo
Definition: pfm_gen_analysis.cpp:152
MAX_SAMPLE_INDEX_LENGTH
#define MAX_SAMPLE_INDEX_LENGTH
Definition: pfm_gen_analysis.cpp:81
I7_IFETCH_L2_MISS_L3_HIT_SNOOP
#define I7_IFETCH_L2_MISS_L3_HIT_SNOOP
Definition: pfm_gen_analysis.cpp:72
ReadAndWriteWhiteBoard.dst
dst
Definition: ReadAndWriteWhiteBoard.py:33
calc_post_deriv_values
void calc_post_deriv_values()
Definition: pfm_gen_analysis.cpp:1573
std::vector::empty
T empty(T... args)
plotSpeedupsPyRoot.line
line
Definition: plotSpeedupsPyRoot.py:198
put_S_module
void put_S_module(S_module *cur_module, const char *dir)
Definition: pfm_gen_analysis.cpp:1045
main
int main(int argc, char *argv[])
Definition: pfm_gen_analysis.cpp:1618
MAX_INV_STR_LENGTH
#define MAX_INV_STR_LENGTH
Definition: pfm_gen_analysis.cpp:96
check_for_core_caa_events
bool check_for_core_caa_events()
Definition: pfm_gen_analysis.cpp:351
std::vector::end
T end(T... args)
I7_L3_UNSHARED_HIT_CYCLES
#define I7_L3_UNSHARED_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:63
S_module::S_module
S_module()
Definition: pfm_gen_analysis.cpp:788
skipString
bool skipString(const char *strptr, const char *srcbuffer, const char **dstbuffer)
Definition: pfm_gen_analysis.cpp:146
FileInfo::CacheItemComparator::operator()
bool operator()(const int &a, const CacheItem &b) const
Definition: pfm_gen_analysis.cpp:192
std::unique_ptr< std::istringstream >
graphanalysis.filename
filename
Definition: graphanalysis.py:130
PipeReader::~PipeReader
~PipeReader(void)
Definition: pfm_gen_analysis.cpp:119
S_module::init
void init(const char *name, const char *architecture, const char *event_name, unsigned int c_mask, unsigned int inv_mask, unsigned int smpl_period)
Definition: pfm_gen_analysis.cpp:798
S_module::total_num_samples
unsigned int total_num_samples
Definition: pfm_gen_analysis.cpp:779
MAX_FILENAME_LENGTH
#define MAX_FILENAME_LENGTH
Definition: pfm_gen_analysis.cpp:80
I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES
#define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:68
init_nhm_caa_events_displ
void init_nhm_caa_events_displ()
Definition: pfm_gen_analysis.cpp:501
calc_core_deriv_values
void calc_core_deriv_values(double totalCycles)
Definition: pfm_gen_analysis.cpp:420
I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES
#define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:67
S_module::set_total
void set_total(unsigned int total)
Definition: pfm_gen_analysis.cpp:807
Gaudi::ParticleProperties::index
size_t index(const Gaudi::ParticleProperty *property, const Gaudi::Interfaces::IParticlePropertySvc *service)
helper utility for mapping of Gaudi::ParticleProperty object into non-negative integral sequential id...
Definition: IParticlePropertySvc.cpp:39
MAX_SIMPLE_SYM_MOD_LENGTH
#define MAX_SIMPLE_SYM_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:87
gaudirun.argv
list argv
Definition: gaudirun.py:327
MAX_SP_STR_LENGTH
#define MAX_SP_STR_LENGTH
Definition: pfm_gen_analysis.cpp:97
FileInfo::m_symbolCache
SymbolCache m_symbolCache
Definition: pfm_gen_analysis.cpp:188
S_module
Definition: pfm_gen_analysis.cpp:776
MAX_ARCH_NAME_LENGTH
#define MAX_ARCH_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:94