The Gaudi Framework  v39r1 (adb068b2)
pfm_gen_analysis.cpp File Reference
#include <ctype.h>
#include <cxxabi.h>
#include <fcntl.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <zlib.h>
#include <algorithm>
#include <iostream>
#include <list>
#include <map>
#include <memory>
#include <sstream>
#include <string>
#include <vector>
#include <dirent.h>
#include <errno.h>
Include dependency graph for pfm_gen_analysis.cpp:

Go to the source code of this file.

Classes

class  PipeReader
 
class  FileInfo
 
struct  FileInfo::CacheItem
 
struct  FileInfo::CacheItemComparator
 
class  S_module
 

Macros

#define CORE_L2_MISS_CYCLES   200
 
#define CORE_L2_HIT_CYCLES   14.5
 
#define CORE_L1_DTLB_MISS_CYCLES   10
 
#define CORE_LCP_STALL_CYCLES   6
 
#define CORE_UNKNOWN_ADDR_STORE_CYCLES   5
 
#define CORE_OVERLAPPING_CYCLES   6
 
#define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES   20
 
#define I7_L1_DTLB_WALK_COMPLETED_CYCLES   35
 
#define I7_L1_ITLB_WALK_COMPLETED_CYCLES   35
 
#define I7_L2_HIT_CYCLES   6
 
#define I7_L3_UNSHARED_HIT_CYCLES   35
 
#define I7_OTHER_CORE_L2_HIT_CYCLES   60
 
#define I7_OTHER_CORE_L2_HITM_CYCLES   75
 
#define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES   225
 
#define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES   360
 
#define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES   180
 
#define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT   200
 
#define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT   350
 
#define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP   35
 
#define I7_IFETCH_L2_MISS_L3_HIT_SNOOP   60
 
#define I7_IFETCH_L2_MISS_L3_HITM   75
 
#define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD   180
 
#define MAX_MODULES   1000
 
#define EXPECTED_CPI   0.25
 
#define MAX_FILENAME_LENGTH   1024
 
#define MAX_SAMPLE_INDEX_LENGTH   10000
 
#define MAX_SYM_LENGTH   15000
 
#define MAX_SYM_MOD_LENGTH   20000
 
#define MAX_LIB_LENGTH   5000
 
#define MAX_LIB_MOD_LENGTH   7000
 
#define MAX_SIMPLE_SYM_LENGTH   300
 
#define MAX_SIMPLE_SYM_MOD_LENGTH   500
 
#define MAX_SIMPLE_LIB_LENGTH   300
 
#define MAX_SIMPLE_LIB_MOD_LENGTH   500
 
#define MAX_LINE_LENGTH   20000
 
#define MAX_EVENT_NAME_LENGTH   150
 
#define MAX_MODULE_NAME_LENGTH   250
 
#define MAX_VALUE_STRING_LENGTH   250
 
#define MAX_ARCH_NAME_LENGTH   20
 
#define MAX_CMASK_STR_LENGTH   5
 
#define MAX_INV_STR_LENGTH   5
 
#define MAX_SP_STR_LENGTH   50
 
#define PIPE_BUFFER_LENGTH   1000
 

Functions

bool skipWhitespaces (const char *srcbuffer, const char **destbuffer)
 
bool skipString (const char *strptr, const char *srcbuffer, const char **dstbuffer)
 
void init_core_caa_events ()
 
void init_nhm_caa_events ()
 
bool check_for_core_caa_events ()
 
bool check_for_nhm_caa_events ()
 
void init_core_caa_events_displ ()
 
void calc_core_deriv_values (double totalCycles)
 
void init_nhm_caa_events_displ ()
 
void calc_nhm_deriv_values (double totalCycles)
 
void html_special_chars (const char *s, char *s_mod)
 
const char * func_name (const char *demangled_symbol)
 
void put_S_module (S_module *cur_module, const char *dir)
 
int read_S_file (const char *dir, const char *filename)
 
int read_S_events (const char *dir, const char *filename)
 
int finalize_S_html_pages (const char *dir)
 
int read_C_file (const char *dir, const char *filename)
 
void put_C_header (FILE *fp, std::vector< std::string > &columns)
 
void put_C_modules (FILE *fp, std::vector< std::string > &columns)
 
void put_C_footer (FILE *fp)
 
void put_C_header_csv (FILE *fp, std::vector< std::string > &columns)
 
void put_C_modules_csv (FILE *fp, std::vector< std::string > &columns)
 
double normalize (std::string field, double value, double normalizeTo)
 
void calc_post_deriv_values ()
 
double getTotalCycles ()
 
int main (int argc, char *argv[])
 

Macro Definition Documentation

◆ CORE_L1_DTLB_MISS_CYCLES

#define CORE_L1_DTLB_MISS_CYCLES   10

Definition at line 53 of file pfm_gen_analysis.cpp.

◆ CORE_L2_HIT_CYCLES

#define CORE_L2_HIT_CYCLES   14.5

Definition at line 52 of file pfm_gen_analysis.cpp.

◆ CORE_L2_MISS_CYCLES

#define CORE_L2_MISS_CYCLES   200

Definition at line 51 of file pfm_gen_analysis.cpp.

◆ CORE_LCP_STALL_CYCLES

#define CORE_LCP_STALL_CYCLES   6

Definition at line 54 of file pfm_gen_analysis.cpp.

◆ CORE_OVERLAPPING_CYCLES

#define CORE_OVERLAPPING_CYCLES   6

Definition at line 56 of file pfm_gen_analysis.cpp.

◆ CORE_SPAN_ACROSS_CACHE_LINE_CYCLES

#define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES   20

Definition at line 57 of file pfm_gen_analysis.cpp.

◆ CORE_UNKNOWN_ADDR_STORE_CYCLES

#define CORE_UNKNOWN_ADDR_STORE_CYCLES   5

Definition at line 55 of file pfm_gen_analysis.cpp.

◆ EXPECTED_CPI

#define EXPECTED_CPI   0.25

Definition at line 78 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP

#define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP   35

Definition at line 71 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L2_MISS_L3_HIT_SNOOP

#define I7_IFETCH_L2_MISS_L3_HIT_SNOOP   60

Definition at line 72 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L2_MISS_L3_HITM

#define I7_IFETCH_L2_MISS_L3_HITM   75

Definition at line 73 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT

#define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT   200

Definition at line 69 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD

#define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD   180

Definition at line 74 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT

#define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT   350

Definition at line 70 of file pfm_gen_analysis.cpp.

◆ I7_L1_DTLB_WALK_COMPLETED_CYCLES

#define I7_L1_DTLB_WALK_COMPLETED_CYCLES   35

Definition at line 60 of file pfm_gen_analysis.cpp.

◆ I7_L1_ITLB_WALK_COMPLETED_CYCLES

#define I7_L1_ITLB_WALK_COMPLETED_CYCLES   35

Definition at line 61 of file pfm_gen_analysis.cpp.

◆ I7_L2_HIT_CYCLES

#define I7_L2_HIT_CYCLES   6

Definition at line 62 of file pfm_gen_analysis.cpp.

◆ I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES

#define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES   225

Definition at line 66 of file pfm_gen_analysis.cpp.

◆ I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES

#define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES   180

Definition at line 68 of file pfm_gen_analysis.cpp.

◆ I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES

#define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES   360

Definition at line 67 of file pfm_gen_analysis.cpp.

◆ I7_L3_UNSHARED_HIT_CYCLES

#define I7_L3_UNSHARED_HIT_CYCLES   35

Definition at line 63 of file pfm_gen_analysis.cpp.

◆ I7_OTHER_CORE_L2_HIT_CYCLES

#define I7_OTHER_CORE_L2_HIT_CYCLES   60

Definition at line 64 of file pfm_gen_analysis.cpp.

◆ I7_OTHER_CORE_L2_HITM_CYCLES

#define I7_OTHER_CORE_L2_HITM_CYCLES   75

Definition at line 65 of file pfm_gen_analysis.cpp.

◆ MAX_ARCH_NAME_LENGTH

#define MAX_ARCH_NAME_LENGTH   20

Definition at line 94 of file pfm_gen_analysis.cpp.

◆ MAX_CMASK_STR_LENGTH

#define MAX_CMASK_STR_LENGTH   5

Definition at line 95 of file pfm_gen_analysis.cpp.

◆ MAX_EVENT_NAME_LENGTH

#define MAX_EVENT_NAME_LENGTH   150

Definition at line 91 of file pfm_gen_analysis.cpp.

◆ MAX_FILENAME_LENGTH

#define MAX_FILENAME_LENGTH   1024

Definition at line 80 of file pfm_gen_analysis.cpp.

◆ MAX_INV_STR_LENGTH

#define MAX_INV_STR_LENGTH   5

Definition at line 96 of file pfm_gen_analysis.cpp.

◆ MAX_LIB_LENGTH

#define MAX_LIB_LENGTH   5000

Definition at line 84 of file pfm_gen_analysis.cpp.

◆ MAX_LIB_MOD_LENGTH

#define MAX_LIB_MOD_LENGTH   7000

Definition at line 85 of file pfm_gen_analysis.cpp.

◆ MAX_LINE_LENGTH

#define MAX_LINE_LENGTH   20000

Definition at line 90 of file pfm_gen_analysis.cpp.

◆ MAX_MODULE_NAME_LENGTH

#define MAX_MODULE_NAME_LENGTH   250

Definition at line 92 of file pfm_gen_analysis.cpp.

◆ MAX_MODULES

#define MAX_MODULES   1000

Definition at line 76 of file pfm_gen_analysis.cpp.

◆ MAX_SAMPLE_INDEX_LENGTH

#define MAX_SAMPLE_INDEX_LENGTH   10000

Definition at line 81 of file pfm_gen_analysis.cpp.

◆ MAX_SIMPLE_LIB_LENGTH

#define MAX_SIMPLE_LIB_LENGTH   300

Definition at line 88 of file pfm_gen_analysis.cpp.

◆ MAX_SIMPLE_LIB_MOD_LENGTH

#define MAX_SIMPLE_LIB_MOD_LENGTH   500

Definition at line 89 of file pfm_gen_analysis.cpp.

◆ MAX_SIMPLE_SYM_LENGTH

#define MAX_SIMPLE_SYM_LENGTH   300

Definition at line 86 of file pfm_gen_analysis.cpp.

◆ MAX_SIMPLE_SYM_MOD_LENGTH

#define MAX_SIMPLE_SYM_MOD_LENGTH   500

Definition at line 87 of file pfm_gen_analysis.cpp.

◆ MAX_SP_STR_LENGTH

#define MAX_SP_STR_LENGTH   50

Definition at line 97 of file pfm_gen_analysis.cpp.

◆ MAX_SYM_LENGTH

#define MAX_SYM_LENGTH   15000

Definition at line 82 of file pfm_gen_analysis.cpp.

◆ MAX_SYM_MOD_LENGTH

#define MAX_SYM_MOD_LENGTH   20000

Definition at line 83 of file pfm_gen_analysis.cpp.

◆ MAX_VALUE_STRING_LENGTH

#define MAX_VALUE_STRING_LENGTH   250

Definition at line 93 of file pfm_gen_analysis.cpp.

◆ PIPE_BUFFER_LENGTH

#define PIPE_BUFFER_LENGTH   1000

Definition at line 99 of file pfm_gen_analysis.cpp.

Function Documentation

◆ calc_core_deriv_values()

void calc_core_deriv_values ( double  totalCycles)

Definition at line 419 of file pfm_gen_analysis.cpp.

419  {
420  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
421  ++it ) {
422  ( it->second )["Total Cycles"] = ( it->second )["UNHALTED_CORE_CYCLES"];
423  ( it->second )["Stalled Cycles"] = ( it->second )["RS_UOPS_DISPATCHED CMASK=1 INV=1"];
424  ( it->second )["L2 Miss Impact"] = ( it->second )["MEM_LOAD_RETIRED:L2_LINE_MISS"] * CORE_L2_MISS_CYCLES;
425  ( it->second )["L2 Hit Impact"] =
426  ( ( it->second )["MEM_LOAD_RETIRED:L1D_LINE_MISS"] - ( it->second )["MEM_LOAD_RETIRED:L2_LINE_MISS"] ) *
428  ( it->second )["L1 DTLB Miss Impact"] = ( it->second )["MEM_LOAD_RETIRED:DTLB_MISS"] * CORE_L1_DTLB_MISS_CYCLES;
429  ( it->second )["LCP Stalls Impact"] = ( it->second )["ILD_STALL"] * CORE_LCP_STALL_CYCLES;
430  ( it->second )["Loads Blocked by Unknown Address Store Impact"] =
431  ( it->second )["LOAD_BLOCK:STA"] * CORE_UNKNOWN_ADDR_STORE_CYCLES;
432  ( it->second )["Loads Overlapped with Stores Impact"] =
433  ( it->second )["LOAD_BLOCK:OVERLAP_STORE"] * CORE_OVERLAPPING_CYCLES;
434  ( it->second )["Loads Spanning across Cache Lines Impact"] =
435  ( it->second )["LOAD_BLOCK:UNTIL_RETIRE"] * CORE_SPAN_ACROSS_CACHE_LINE_CYCLES;
436  ( it->second )["Store-Fwd Stalls Impact"] = ( it->second )["Loads Blocked by Unknown Address Store Impact"] +
437  ( it->second )["Loads Overlapped with Stores Impact"] +
438  ( it->second )["Loads Spanning across Cache Lines Impact"];
439  ( it->second )["Counted Stalled Cycles"] =
440  ( it->second )["L2 Miss Impact"] + ( it->second )["L2 Hit Impact"] + ( it->second )["LCP Stalls Impact"] +
441  ( it->second )["L1 DTLB Miss Impact"] + ( it->second )["Store-Fwd Stalls Impact"];
442  ( it->second )["Instructions Retired"] = ( it->second )["INSTRUCTIONS_RETIRED"];
443  ( it->second )["ITLB Miss Rate in %"] =
444  ( ( it->second )["ITLB_MISS_RETIRED"] / ( it->second )["INSTRUCTIONS_RETIRED"] ) * 100;
445  ( it->second )["Branch Instructions"] = ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"];
446  ( it->second )["Load Instructions"] = ( it->second )["INST_RETIRED:LOADS"];
447  ( it->second )["Store Instructions"] = ( it->second )["INST_RETIRED:STORES"];
448  ( it->second )["Other Instructions"] = ( it->second )["INST_RETIRED:OTHER"] -
449  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] -
450  ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"];
451  ( it->second )["% of Mispredicted Branches"] =
452  ( ( it->second )["MISPREDICTED_BRANCH_RETIRED"] / ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"] ) * 100;
453  ( it->second )["Packed SIMD Computational Instructions"] =
454  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"];
455  ( it->second )["Counted Instructions Retired"] =
456  ( it->second )["Branch Instructions"] + ( it->second )["Load Instructions"] +
457  ( it->second )["Store Instructions"] + ( it->second )["Other Instructions"] +
458  ( it->second )["Packed SIMD Computational Instructions"];
459  ( it->second )["CPI"] = ( it->second )["UNHALTED_CORE_CYCLES"] / ( it->second )["INSTRUCTIONS_RETIRED"];
460 
461  double localPerformanceImprovement = ( it->second )["CPI"] / EXPECTED_CPI;
462  double cyclesAfterImprovement = ( it->second )["UNHALTED_CORE_CYCLES"] / localPerformanceImprovement;
463  double totalCyclesAfterImprovement = totalCycles - ( it->second )["UNHALTED_CORE_CYCLES"] + cyclesAfterImprovement;
464  ( it->second )["iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
465 
466  ( it->second )["% of Total Cycles"] =
467  ( it->second )["RS_UOPS_DISPATCHED CMASK=1 INV=1"] * 100 / ( it->second )["UNHALTED_CORE_CYCLES"];
468  ( it->second )["L2 Miss % of counted Stalled Cycles"] =
469  ( it->second )["L2 Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
470  ( it->second )["L2 Hit % of counted Stalled Cycles"] =
471  ( it->second )["L2 Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
472  ( it->second )["L1 DTLB Miss % of counted Stalled Cycles"] =
473  ( it->second )["L1 DTLB Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
474  ( it->second )["LCP Stalls % of counted Stalled Cycles"] =
475  ( it->second )["LCP Stalls Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
476  ( it->second )["Store-Fwd Stalls % of counted Stalled Cycles"] =
477  ( it->second )["Store-Fwd Stalls Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
478  ( it->second )["Loads Blocked % of Store-Fwd Stalls Cycles"] =
479  ( it->second )["Loads Blocked by Unknown Address Store Impact"] * 100 /
480  ( it->second )["Store-Fwd Stalls Impact"];
481  ( it->second )["Loads Overlapped % of Store-Fwd Stalls Cycles"] =
482  ( it->second )["Loads Overlapped with Stores Impact"] * 100 / ( it->second )["Store-Fwd Stalls Impact"];
483  ( it->second )["Loads Spanning % of Store-Fwd Stalls Cycles"] =
484  ( it->second )["Loads Spanning across Cache Lines Impact"] * 100 / ( it->second )["Store-Fwd Stalls Impact"];
485 
486  ( it->second )["Load % of all Instructions"] =
487  ( it->second )["INST_RETIRED:LOADS"] * 100 / ( it->second )["Counted Instructions Retired"];
488  ( it->second )["Store % of all Instructions"] =
489  ( it->second )["INST_RETIRED:STORES"] * 100 / ( it->second )["Counted Instructions Retired"];
490  ( it->second )["Branch % of all Instructions"] =
491  ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"] * 100 / ( it->second )["Counted Instructions Retired"];
492  ( it->second )["Packed SIMD % of all Instructions"] =
493  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] * 100 /
494  ( it->second )["Counted Instructions Retired"];
495  ( it->second )["Other % of all Instructions"] =
496  ( it->second )["Other Instructions"] * 100 / ( it->second )["Counted Instructions Retired"];
497  }
498 }

◆ calc_nhm_deriv_values()

void calc_nhm_deriv_values ( double  totalCycles)

Definition at line 605 of file pfm_gen_analysis.cpp.

605  {
606  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
607  ++it ) {
608  ( it->second )["Total Cycles"] = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
609 
610  ( it->second )["L2 Hit Impact"] = ( it->second )["MEM_LOAD_RETIRED:L2_HIT"] * I7_L2_HIT_CYCLES;
611  ( it->second )["L3 Unshared Hit Impact"] =
612  ( it->second )["MEM_LOAD_RETIRED:L3_UNSHARED_HIT"] * I7_L3_UNSHARED_HIT_CYCLES;
613  if ( ( it->second )["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] >
614  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) {
615  ( it->second )["L2 Other Core Hit Impact"] = ( ( it->second )["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] -
616  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) *
618  } else {
619  ( it->second )["L2 Other Core Hit Impact"] = 0.0;
620  }
621  ( it->second )["L2 Other Core Hit Modified Impact"] =
622  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] * I7_OTHER_CORE_L2_HITM_CYCLES;
623  ( it->second )["L3 Miss -> Local DRAM Hit Impact"] =
624  ( it->second )["MEM_UNCORE_RETIRED:LOCAL_DRAM"] * I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES;
625  ( it->second )["L3 Miss -> Remote DRAM Hit Impact"] =
626  ( it->second )["MEM_UNCORE_RETIRED:REMOTE_DRAM"] * I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES;
627  ( it->second )["L3 Miss -> Remote Cache Hit Impact"] =
628  ( it->second )["MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT"] * I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES;
629  ( it->second )["L3 Miss -> Total Impact"] = ( it->second )["L3 Miss -> Local DRAM Hit Impact"] +
630  ( it->second )["L3 Miss -> Remote DRAM Hit Impact"] +
631  ( it->second )["L3 Miss -> Remote Cache Hit Impact"];
632  ( it->second )["L1 DTLB Miss Impact"] =
633  ( it->second )["DTLB_LOAD_MISSES:WALK_COMPLETED"] * I7_L1_DTLB_WALK_COMPLETED_CYCLES;
634  ( it->second )["Counted Stalled Cycles due to Load Ops"] =
635  ( it->second )["L3 Miss -> Total Impact"] + ( it->second )["L2 Hit Impact"] +
636  ( it->second )["L1 DTLB Miss Impact"] + ( it->second )["L3 Unshared Hit Impact"] +
637  ( it->second )["L2 Other Core Hit Modified Impact"] + ( it->second )["L2 Other Core Hit Impact"];
638  ( it->second )["Cycles spent during DIV & SQRT Ops"] = ( it->second )["ARITH:CYCLES_DIV_BUSY"];
639  ( it->second )["Total Counted Stalled Cycles"] =
640  ( it->second )["Counted Stalled Cycles due to Load Ops"] + ( it->second )["Cycles spent during DIV & SQRT Ops"];
641  ( it->second )["Stalled Cycles"] =
642  ( it->second )["Total Counted Stalled Cycles"]; // TO BE FIXED when UOPS_EXECUTED:0x3f is fixed!!
643  ( it->second )["% of Total Cycles"] =
644  ( it->second )["Stalled Cycles"] * 100 / ( it->second )["CPU_CLK_UNHALTED:THREAD_P"]; // TO BE FIXED!! see above
645  ( it->second )["L3 Miss % of Load Stalls"] =
646  ( it->second )["L3 Miss -> Total Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
647  ( it->second )["L2 Hit % of Load Stalls"] =
648  ( it->second )["L2 Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
649  ( it->second )["L1 DTLB Miss % of Load Stalls"] =
650  ( it->second )["L1 DTLB Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
651  ( it->second )["L3 Unshared Hit % of Load Stalls"] =
652  ( it->second )["L3 Unshared Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
653  ( it->second )["L2 Other Core Hit % of Load Stalls"] =
654  ( it->second )["L2 Other Core Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
655  ( it->second )["L2 Other Core Hit Modified % of Load Stalls"] =
656  ( it->second )["L2 Other Core Hit Modified Impact"] * 100 /
657  ( it->second )["Counted Stalled Cycles due to Load Ops"];
658  ( it->second )["DIV & SQRT Ops % of counted Stalled Cycles"] =
659  ( it->second )["Cycles spent during DIV & SQRT Ops"] * 100 / ( it->second )["Total Counted Stalled Cycles"];
660 
661  ( it->second )["Cycles IFETCH served by Local DRAM"] =
662  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT;
663  ( it->second )["Cycles IFETCH served by L3 (Modified)"] =
664  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * I7_IFETCH_L2_MISS_L3_HITM;
665  ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] =
666  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * I7_IFETCH_L2_MISS_L3_HIT_SNOOP;
667  ( it->second )["Cycles IFETCH served by Remote L2"] =
668  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD;
669  ( it->second )["Cycles IFETCH served by Remote DRAM"] =
670  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT;
671  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"] =
672  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP;
673  ( it->second )["Total L2 IFETCH miss Impact"] =
674  ( it->second )["Cycles IFETCH served by Local DRAM"] + ( it->second )["Cycles IFETCH served by L3 (Modified)"] +
675  ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] +
676  ( it->second )["Cycles IFETCH served by Remote L2"] + ( it->second )["Cycles IFETCH served by Remote DRAM"] +
677  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"];
678  ( it->second )["Local DRAM IFECTHes % Impact"] =
679  ( it->second )["Cycles IFETCH served by Local DRAM"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
680  ( it->second )["L3 (Modified) IFECTHes % Impact"] =
681  ( it->second )["Cycles IFETCH served by L3 (Modified)"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
682  ( it->second )["L3 (Clean Snoop) IFECTHes % Impact"] = ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] *
683  100 / ( it->second )["Total L2 IFETCH miss Impact"];
684  ( it->second )["Remote L2 IFECTHes % Impact"] =
685  ( it->second )["Cycles IFETCH served by Remote L2"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
686  ( it->second )["Remote DRAM IFECTHes % Impact"] =
687  ( it->second )["Cycles IFETCH served by Remote DRAM"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
688  ( it->second )["L3 (No Snoop) IFECTHes % Impact"] =
689  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
690  ( it->second )["Total L2 IFETCH misses"] = ( it->second )["L2_RQSTS:IFETCH_MISS"];
691  ( it->second )["% of IFETCHes served by Local DRAM"] =
692  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
693  ( it->second )["% of IFETCHes served by L3 (Modified)"] =
694  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
695  ( it->second )["% of IFETCHes served by L3 (Clean Snoop)"] =
696  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * 100 /
697  ( it->second )["L2_RQSTS:IFETCH_MISS"];
698  ( it->second )["% of IFETCHes served by Remote L2"] =
699  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * 100 /
700  ( it->second )["L2_RQSTS:IFETCH_MISS"];
701  ( it->second )["% of IFETCHes served by Remote DRAM"] =
702  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
703  ( it->second )["% of IFETCHes served by L3 (No Snoop)"] =
704  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
705  ( it->second )["% of L2 IFETCH misses"] =
706  ( it->second )["L2_RQSTS:IFETCH_MISS"] * 100 /
707  ( ( it->second )["L2_RQSTS:IFETCH_MISS"] + ( it->second )["L2_RQSTS:IFETCH_HIT"] );
708  ( it->second )["L1 ITLB Miss Impact"] =
709  ( it->second )["ITLB_MISSES:WALK_COMPLETED"] * I7_L1_ITLB_WALK_COMPLETED_CYCLES;
710 
711  ( it->second )["Total Branch Instructions Executed"] = ( it->second )["BR_INST_EXEC:ANY"];
712  ( it->second )["% of Mispredicted Branches"] =
713  ( it->second )["BR_MISP_EXEC:ANY"] * 100 / ( it->second )["BR_INST_EXEC:ANY"];
714  ( it->second )["Direct Near Calls % of Total Branches Executed"] =
715  ( it->second )["BR_INST_EXEC:DIRECT_NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
716  ( it->second )["Indirect Near Calls % of Total Branches Executed"] =
717  ( it->second )["BR_INST_EXEC:INDIRECT_NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
718  ( it->second )["Indirect Near Non-Calls % of Total Branches Executed"] =
719  ( it->second )["BR_INST_EXEC:INDIRECT_NON_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
720  ( it->second )["All Near Calls % of Total Branches Executed"] =
721  ( it->second )["BR_INST_EXEC:NEAR_CALLS"] * 100 / ( it->second )["Total Branch Instructions Executed"];
722  ( it->second )["All Non Calls % of Total Branches Executed"] =
723  ( it->second )["BR_INST_EXEC:NON_CALLS"] * 100 / ( it->second )["Total Branch Instructions Executed"];
724  ( it->second )["All Returns % of Total Branches Executed"] =
725  ( it->second )["BR_INST_EXEC:RETURN_NEAR"] * 100 / ( it->second )["Total Branch Instructions Executed"];
726  ( it->second )["Total Branch Instructions Retired"] = ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
727  ( it->second )["Conditionals % of Total Branches Retired"] =
728  ( it->second )["BR_INST_RETIRED:CONDITIONAL"] * 100 / ( it->second )["Total Branch Instructions Retired"];
729  ( it->second )["Near Calls % of Total Branches Retired"] =
730  ( it->second )["BR_INST_RETIRED:NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Retired"];
731 
732  ( it->second )["Instruction Starvation % of Total Cycles"] =
733  ( ( it->second )["UOPS_ISSUED:ANY CMASK=1 INV=1"] - ( it->second )["RESOURCE_STALLS:ANY"] ) * 100 /
734  ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
735  ( it->second )["% of Total Cycles spent handling FP exceptions"] =
736  ( it->second )["UOPS_DECODED:MS CMASK=1"] * 100 / ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
737  ( it->second )["# of Instructions per Call"] =
738  ( it->second )["INST_RETIRED:ANY_P"] / ( it->second )["BR_INST_EXEC:NEAR_CALLS"];
739 
740  ( it->second )["Instructions Retired"] = ( it->second )["INST_RETIRED:ANY_P"];
741  ( it->second )["ITLB Miss Rate in %"] =
742  ( ( it->second )["ITLB_MISS_RETIRED"] / ( it->second )["INST_RETIRED:ANY_P"] ) * 100;
743 
744  ( it->second )["Branch Instructions"] = ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
745  ( it->second )["Load Instructions"] = ( it->second )["MEM_INST_RETIRED:LOADS"];
746  ( it->second )["Store Instructions"] = ( it->second )["MEM_INST_RETIRED:STORES"];
747  ( it->second )["Other Instructions"] =
748  ( it->second )["Instructions Retired"] - ( it->second )["MEM_INST_RETIRED:LOADS"] -
749  ( it->second )["MEM_INST_RETIRED:STORES"] - ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
750  ( it->second )["Packed UOPS Retired"] =
751  ( it->second )["SSEX_UOPS_RETIRED:PACKED_DOUBLE"] + ( it->second )["SSEX_UOPS_RETIRED:PACKED_SINGLE"];
752  ( it->second )["CPI"] = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] / ( it->second )["INST_RETIRED:ANY_P"];
753 
754  double localPerformanceImprovement = ( it->second )["CPI"] / EXPECTED_CPI;
755  double cyclesAfterImprovement = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] / localPerformanceImprovement;
756  double totalCyclesAfterImprovement =
757  totalCycles - ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] + cyclesAfterImprovement;
758  ( it->second )["iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
759 
760  ( it->second )["Load % of all Instructions"] =
761  ( it->second )["MEM_INST_RETIRED:LOADS"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
762  ( it->second )["Store % of all Instructions"] =
763  ( it->second )["MEM_INST_RETIRED:STORES"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
764  ( it->second )["Branch % of all Instructions"] =
765  ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
766  ( it->second )["Other % of all Instructions"] =
767  ( it->second )["Other Instructions"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
768 
769  ( it->second )["Packed % of all UOPS Retired"] =
770  ( it->second )["Packed UOPS Retired"] * 100 / ( it->second )["UOPS_RETIRED:ANY"];
771  }
772 }

◆ calc_post_deriv_values()

void calc_post_deriv_values ( )

Definition at line 1563 of file pfm_gen_analysis.cpp.

1563  {
1564  if ( nehalem ) {
1565  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1566  ++it ) {
1567  double simdnorm =
1568  1. - normalize( "Packed % of all UOPS Retired", ( it->second )["Packed % of all UOPS Retired"], 1 );
1569  double misspnorm = normalize( "% of Mispredicted Branches", ( it->second )["% of Mispredicted Branches"], 1 );
1570  double stallnorm = normalize( "Stalled Cycles", ( it->second )["Stalled Cycles"], 1 );
1571  ( it->second )["iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1572  }
1573  } else {
1574  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1575  ++it ) {
1576  double simdnorm =
1577  1. - normalize( "Packed SIMD % of all Instructions", ( it->second )["Packed SIMD % of all Instructions"], 1 );
1578  double misspnorm = normalize( "% of Mispredicted Branches", ( it->second )["% of Mispredicted Branches"], 1 );
1579  double stallnorm = normalize( "Stalled Cycles", ( it->second )["Stalled Cycles"], 1 );
1580  ( it->second )["iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1581  }
1582  }
1583 }

◆ check_for_core_caa_events()

bool check_for_core_caa_events ( )

Definition at line 350 of file pfm_gen_analysis.cpp.

350  {
351  for ( std::vector<std::string>::const_iterator it = core_caa_events.begin(); it != core_caa_events.end(); ++it ) {
352  if ( find( C_events.begin(), C_events.end(), ( *it ) ) == C_events.end() ) {
353  fprintf( stderr, "ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
354  return false;
355  }
356  }
357  return true;
358 }

◆ check_for_nhm_caa_events()

bool check_for_nhm_caa_events ( )

Definition at line 360 of file pfm_gen_analysis.cpp.

360  {
361  for ( std::vector<std::string>::const_iterator it = nhm_caa_events.begin(); it != nhm_caa_events.end(); ++it ) {
362  if ( find( C_events.begin(), C_events.end(), ( *it ) ) == C_events.end() ) {
363  fprintf( stderr, "ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
364  return false;
365  }
366  }
367  return true;
368 }

◆ finalize_S_html_pages()

int finalize_S_html_pages ( const char *  dir)

Definition at line 1356 of file pfm_gen_analysis.cpp.

1356  {
1357  for ( std::map<std::string, unsigned int>::const_iterator i = modules_tot_samples.begin();
1358  i != modules_tot_samples.end(); i++ ) {
1359  char module_filename[MAX_FILENAME_LENGTH];
1360  strcpy( module_filename, dir );
1361  strcat( module_filename, "/HTML/" );
1362  strcat( module_filename, ( i->first ).c_str() );
1363  strcat( module_filename, ".html" );
1364  FILE* module_file = fopen( module_filename, "a" );
1365  if ( module_file == NULL ) {
1366  fprintf( stderr, "ERROR: Unable to append to file: %s\naborting...\n", module_filename );
1367  exit( 1 );
1368  }
1369  fprintf( module_file, "</body>\n</html>\n" );
1370  if ( fclose( module_file ) ) {
1371  fprintf( stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1372  exit( 1 );
1373  }
1374  }
1375  return 0;
1376 }

◆ func_name()

const char* func_name ( const char *  demangled_symbol)

Definition at line 867 of file pfm_gen_analysis.cpp.

867  {
868  char* operator_string_begin = const_cast<char*>( strstr( demangled_symbol, "operator" ) );
869  if ( operator_string_begin != NULL ) {
870  char* operator_string_end = operator_string_begin + 8;
871  while ( *operator_string_end == ' ' ) operator_string_end++;
872  if ( strstr( operator_string_end, "delete[]" ) == operator_string_end ) {
873  operator_string_end += 8;
874  *operator_string_end = '\0';
875  } else if ( strstr( operator_string_end, "delete" ) == operator_string_end ) {
876  operator_string_end += 6;
877  *operator_string_end = '\0';
878  } else if ( strstr( operator_string_end, "new[]" ) == operator_string_end ) {
879  operator_string_end += 5;
880  *operator_string_end = '\0';
881  } else if ( strstr( operator_string_end, "new" ) == operator_string_end ) {
882  operator_string_end += 3;
883  *operator_string_end = '\0';
884  } else if ( strstr( operator_string_end, ">>=" ) == operator_string_end ) {
885  operator_string_end += 3;
886  *operator_string_end = '\0';
887  } else if ( strstr( operator_string_end, "<<=" ) == operator_string_end ) {
888  operator_string_end += 3;
889  *operator_string_end = '\0';
890  } else if ( strstr( operator_string_end, "->*" ) == operator_string_end ) {
891  operator_string_end += 3;
892  *operator_string_end = '\0';
893  } else if ( strstr( operator_string_end, "<<" ) == operator_string_end ) {
894  operator_string_end += 2;
895  *operator_string_end = '\0';
896  } else if ( strstr( operator_string_end, ">>" ) == operator_string_end ) {
897  operator_string_end += 2;
898  *operator_string_end = '\0';
899  } else if ( strstr( operator_string_end, ">=" ) == operator_string_end ) {
900  operator_string_end += 2;
901  *operator_string_end = '\0';
902  } else if ( strstr( operator_string_end, "<=" ) == operator_string_end ) {
903  operator_string_end += 2;
904  *operator_string_end = '\0';
905  } else if ( strstr( operator_string_end, "==" ) == operator_string_end ) {
906  operator_string_end += 2;
907  *operator_string_end = '\0';
908  } else if ( strstr( operator_string_end, "!=" ) == operator_string_end ) {
909  operator_string_end += 2;
910  *operator_string_end = '\0';
911  } else if ( strstr( operator_string_end, "|=" ) == operator_string_end ) {
912  operator_string_end += 2;
913  *operator_string_end = '\0';
914  } else if ( strstr( operator_string_end, "&=" ) == operator_string_end ) {
915  operator_string_end += 2;
916  *operator_string_end = '\0';
917  } else if ( strstr( operator_string_end, "^=" ) == operator_string_end ) {
918  operator_string_end += 2;
919  *operator_string_end = '\0';
920  } else if ( strstr( operator_string_end, "%=" ) == operator_string_end ) {
921  operator_string_end += 2;
922  *operator_string_end = '\0';
923  } else if ( strstr( operator_string_end, "/=" ) == operator_string_end ) {
924  operator_string_end += 2;
925  *operator_string_end = '\0';
926  } else if ( strstr( operator_string_end, "*=" ) == operator_string_end ) {
927  operator_string_end += 2;
928  *operator_string_end = '\0';
929  } else if ( strstr( operator_string_end, "-=" ) == operator_string_end ) {
930  operator_string_end += 2;
931  *operator_string_end = '\0';
932  } else if ( strstr( operator_string_end, "+=" ) == operator_string_end ) {
933  operator_string_end += 2;
934  *operator_string_end = '\0';
935  } else if ( strstr( operator_string_end, "&&" ) == operator_string_end ) {
936  operator_string_end += 2;
937  *operator_string_end = '\0';
938  } else if ( strstr( operator_string_end, "||" ) == operator_string_end ) {
939  operator_string_end += 2;
940  *operator_string_end = '\0';
941  } else if ( strstr( operator_string_end, "[]" ) == operator_string_end ) {
942  operator_string_end += 2;
943  *operator_string_end = '\0';
944  } else if ( strstr( operator_string_end, "()" ) == operator_string_end ) {
945  operator_string_end += 2;
946  *operator_string_end = '\0';
947  } else if ( strstr( operator_string_end, "++" ) == operator_string_end ) {
948  operator_string_end += 2;
949  *operator_string_end = '\0';
950  } else if ( strstr( operator_string_end, "--" ) == operator_string_end ) {
951  operator_string_end += 2;
952  *operator_string_end = '\0';
953  } else if ( strstr( operator_string_end, "->" ) == operator_string_end ) {
954  operator_string_end += 2;
955  *operator_string_end = '\0';
956  } else if ( strstr( operator_string_end, "<" ) == operator_string_end ) {
957  operator_string_end += 1;
958  *operator_string_end = '\0';
959  } else if ( strstr( operator_string_end, ">" ) == operator_string_end ) {
960  operator_string_end += 1;
961  *operator_string_end = '\0';
962  } else if ( strstr( operator_string_end, "~" ) == operator_string_end ) {
963  operator_string_end += 1;
964  *operator_string_end = '\0';
965  } else if ( strstr( operator_string_end, "!" ) == operator_string_end ) {
966  operator_string_end += 1;
967  *operator_string_end = '\0';
968  } else if ( strstr( operator_string_end, "+" ) == operator_string_end ) {
969  operator_string_end += 1;
970  *operator_string_end = '\0';
971  } else if ( strstr( operator_string_end, "-" ) == operator_string_end ) {
972  operator_string_end += 1;
973  *operator_string_end = '\0';
974  } else if ( strstr( operator_string_end, "*" ) == operator_string_end ) {
975  operator_string_end += 1;
976  *operator_string_end = '\0';
977  } else if ( strstr( operator_string_end, "/" ) == operator_string_end ) {
978  operator_string_end += 1;
979  *operator_string_end = '\0';
980  } else if ( strstr( operator_string_end, "%" ) == operator_string_end ) {
981  operator_string_end += 1;
982  *operator_string_end = '\0';
983  } else if ( strstr( operator_string_end, "^" ) == operator_string_end ) {
984  operator_string_end += 1;
985  *operator_string_end = '\0';
986  } else if ( strstr( operator_string_end, "&" ) == operator_string_end ) {
987  operator_string_end += 1;
988  *operator_string_end = '\0';
989  } else if ( strstr( operator_string_end, "|" ) == operator_string_end ) {
990  operator_string_end += 1;
991  *operator_string_end = '\0';
992  } else if ( strstr( operator_string_end, "," ) == operator_string_end ) {
993  operator_string_end += 1;
994  *operator_string_end = '\0';
995  } else if ( strstr( operator_string_end, "=" ) == operator_string_end ) {
996  operator_string_end += 1;
997  *operator_string_end = '\0';
998  }
999  return operator_string_begin;
1000  }
1001  char* end_of_demangled_name = const_cast<char*>( strrchr( demangled_symbol, ')' ) );
1002  if ( end_of_demangled_name != NULL ) {
1003  int pars = 1;
1004  char c;
1005  while ( pars > 0 && end_of_demangled_name != demangled_symbol ) {
1006  c = *( --end_of_demangled_name );
1007  if ( c == ')' ) {
1008  pars++;
1009  } else if ( c == '(' ) {
1010  pars--;
1011  }
1012  }
1013  } else {
1014  return demangled_symbol;
1015  }
1016  char* end_of_func_name = end_of_demangled_name;
1017  if ( end_of_func_name != NULL ) {
1018  *end_of_func_name = '\0';
1019  char c = *( --end_of_func_name );
1020  if ( c == '>' ) {
1021  int pars = 1;
1022  while ( pars > 0 && end_of_func_name != demangled_symbol ) {
1023  c = *( --end_of_func_name );
1024  if ( c == '>' ) {
1025  pars++;
1026  } else if ( c == '<' ) {
1027  pars--;
1028  }
1029  }
1030  *end_of_func_name = '\0';
1031  }
1032  c = *( --end_of_func_name );
1033  while ( isalnum( c ) || c == '_' || c == '~' ) { c = *( --end_of_func_name ); }
1034  return ++end_of_func_name;
1035  }
1036  return demangled_symbol;
1037 }

◆ getTotalCycles()

double getTotalCycles ( )

Definition at line 1589 of file pfm_gen_analysis.cpp.

1589  {
1590  double sum = 0;
1591  if ( nehalem ) {
1592  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1593  ++it ) {
1594  sum += ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
1595  }
1596  } else {
1597  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1598  ++it ) {
1599  sum += ( it->second )["UNHALTED_CORE_CYCLES"];
1600  }
1601  }
1602  return sum;
1603 }

◆ html_special_chars()

void html_special_chars ( const char *  s,
char *  s_mod 
)

Definition at line 836 of file pfm_gen_analysis.cpp.

836  {
837  int n = strlen( s );
838  *s_mod = '\0';
839  for ( int i = 0; i < n; i++ ) {
840  switch ( s[i] ) {
841  case '<':
842  strcat( s_mod, "&lt;" );
843  break;
844  case '>':
845  strcat( s_mod, "&gt;" );
846  break;
847  case '&':
848  strcat( s_mod, "&amp;" );
849  break;
850  case '"':
851  strcat( s_mod, "&quot;" );
852  break;
853  default:
854  char to_app[2];
855  to_app[0] = s[i];
856  to_app[1] = '\0';
857  strcat( s_mod, to_app );
858  break;
859  }
860  }
861  return;
862 }

◆ init_core_caa_events()

void init_core_caa_events ( )

Definition at line 282 of file pfm_gen_analysis.cpp.

282  {
283  core_caa_events.push_back( "BRANCH_INSTRUCTIONS_RETIRED" );
284  core_caa_events.push_back( "ILD_STALL" );
285  core_caa_events.push_back( "INST_RETIRED:LOADS" );
286  core_caa_events.push_back( "INST_RETIRED:OTHER" );
287  core_caa_events.push_back( "INST_RETIRED:STORES" );
288  core_caa_events.push_back( "INSTRUCTIONS_RETIRED" );
289  core_caa_events.push_back( "LOAD_BLOCK:OVERLAP_STORE" );
290  core_caa_events.push_back( "LOAD_BLOCK:STA" );
291  core_caa_events.push_back( "LOAD_BLOCK:UNTIL_RETIRE" );
292  core_caa_events.push_back( "MEM_LOAD_RETIRED:DTLB_MISS" );
293  core_caa_events.push_back( "MEM_LOAD_RETIRED:L1D_LINE_MISS" );
294  core_caa_events.push_back( "MEM_LOAD_RETIRED:L2_LINE_MISS" );
295  core_caa_events.push_back( "MISPREDICTED_BRANCH_RETIRED" );
296  // core_caa_events.push_back("RS_UOPS_DISPATCHED");
297  // core_caa_events.push_back("RS_UOPS_DISPATCHED CMASK=1");
298  core_caa_events.push_back( "RS_UOPS_DISPATCHED CMASK=1 INV=1" );
299  core_caa_events.push_back( "SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE" );
300  core_caa_events.push_back( "UNHALTED_CORE_CYCLES" );
301  // core_caa_events.push_back("UOPS_RETIRED:ANY");
302  // core_caa_events.push_back("UOPS_RETIRED:FUSED");
303  // core_caa_events.push_back("IDLE_DURING_DIV");
304 }

◆ init_core_caa_events_displ()

void init_core_caa_events_displ ( )

Definition at line 370 of file pfm_gen_analysis.cpp.

370  {
371  core_caa_events_displ.push_back( "Total Cycles" );
372  core_caa_events_displ.push_back( "Stalled Cycles" );
373  core_caa_events_displ.push_back( "% of Total Cycles" );
374  core_caa_events_displ.push_back( "Instructions Retired" );
375  core_caa_events_displ.push_back( "CPI" );
376  core_caa_events_displ.push_back( "" );
377  core_caa_events_displ.push_back( "iMargin" );
378  core_caa_events_displ.push_back( "iFactor" );
379  core_caa_events_displ.push_back( "" );
380  core_caa_events_displ.push_back( "Counted Stalled Cycles" );
381  core_caa_events_displ.push_back( "" );
382  core_caa_events_displ.push_back( "L2 Miss Impact" );
383  core_caa_events_displ.push_back( "L2 Miss % of counted Stalled Cycles" );
384  core_caa_events_displ.push_back( "" );
385  core_caa_events_displ.push_back( "L2 Hit Impact" );
386  core_caa_events_displ.push_back( "L2 Hit % of counted Stalled Cycles" );
387  core_caa_events_displ.push_back( "" );
388  core_caa_events_displ.push_back( "L1 DTLB Miss Impact" );
389  core_caa_events_displ.push_back( "L1 DTLB Miss % of counted Stalled Cycles" );
390  core_caa_events_displ.push_back( "" );
391  core_caa_events_displ.push_back( "LCP Stalls Impact" );
392  core_caa_events_displ.push_back( "LCP Stalls % of counted Stalled Cycles" );
393  core_caa_events_displ.push_back( "" );
394  core_caa_events_displ.push_back( "Store-Fwd Stalls Impact" );
395  core_caa_events_displ.push_back( "Store-Fwd Stalls % of counted Stalled Cycles" );
396  core_caa_events_displ.push_back( "" );
397  core_caa_events_displ.push_back( "Loads Blocked by Unknown Address Store Impact" );
398  core_caa_events_displ.push_back( "Loads Blocked % of Store-Fwd Stalls Cycles" );
399  core_caa_events_displ.push_back( "Loads Overlapped with Stores Impact" );
400  core_caa_events_displ.push_back( "Loads Overlapped % of Store-Fwd Stalls Cycles" );
401  core_caa_events_displ.push_back( "Loads Spanning across Cache Lines Impact" );
402  core_caa_events_displ.push_back( "Loads Spanning % of Store-Fwd Stalls Cycles" );
403  core_caa_events_displ.push_back( "" );
404  core_caa_events_displ.push_back( "Load Instructions" );
405  core_caa_events_displ.push_back( "Load % of all Instructions" );
406  core_caa_events_displ.push_back( "Store Instructions" );
407  core_caa_events_displ.push_back( "Store % of all Instructions" );
408  core_caa_events_displ.push_back( "Branch Instructions" );
409  core_caa_events_displ.push_back( "Branch % of all Instructions" );
410  core_caa_events_displ.push_back( "Packed SIMD Computational Instructions" );
411  core_caa_events_displ.push_back( "Packed SIMD % of all Instructions" );
412  core_caa_events_displ.push_back( "Other Instructions" );
413  core_caa_events_displ.push_back( "Other % of all Instructions" );
414  core_caa_events_displ.push_back( "" );
415  core_caa_events_displ.push_back( "ITLB Miss Rate in %" );
416  core_caa_events_displ.push_back( "% of Mispredicted Branches" );
417 }

◆ init_nhm_caa_events()

void init_nhm_caa_events ( )

Definition at line 306 of file pfm_gen_analysis.cpp.

306  {
307  nhm_caa_events.push_back( "ARITH:CYCLES_DIV_BUSY" );
308  nhm_caa_events.push_back( "BR_INST_EXEC:ANY" );
309  nhm_caa_events.push_back( "BR_INST_EXEC:DIRECT_NEAR_CALL" );
310  nhm_caa_events.push_back( "BR_INST_EXEC:INDIRECT_NEAR_CALL" );
311  nhm_caa_events.push_back( "BR_INST_EXEC:INDIRECT_NON_CALL" );
312  nhm_caa_events.push_back( "BR_INST_EXEC:NEAR_CALLS" );
313  nhm_caa_events.push_back( "BR_INST_EXEC:NON_CALLS" );
314  nhm_caa_events.push_back( "BR_INST_EXEC:RETURN_NEAR" );
315  nhm_caa_events.push_back( "BR_INST_RETIRED:ALL_BRANCHES" );
316  nhm_caa_events.push_back( "BR_INST_RETIRED:CONDITIONAL" );
317  nhm_caa_events.push_back( "BR_INST_RETIRED:NEAR_CALL" );
318  nhm_caa_events.push_back( "BR_MISP_EXEC:ANY" );
319  nhm_caa_events.push_back( "CPU_CLK_UNHALTED:THREAD_P" );
320  nhm_caa_events.push_back( "DTLB_LOAD_MISSES:WALK_COMPLETED" );
321  nhm_caa_events.push_back( "INST_RETIRED:ANY_P" );
322  nhm_caa_events.push_back( "ITLB_MISSES:WALK_COMPLETED" );
323  nhm_caa_events.push_back( "L2_RQSTS:IFETCH_HIT" );
324  nhm_caa_events.push_back( "L2_RQSTS:IFETCH_MISS" );
325  nhm_caa_events.push_back( "MEM_INST_RETIRED:LOADS" );
326  nhm_caa_events.push_back( "MEM_INST_RETIRED:STORES" );
327  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L2_HIT" );
328  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L3_MISS" );
329  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L3_UNSHARED_HIT" );
330  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM" );
331  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:LOCAL_DRAM" );
332  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM" );
333  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT" );
334  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:REMOTE_DRAM" );
335  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM" );
336  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM" );
337  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP" );
338  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD" );
339  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM" );
340  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT" );
341  nhm_caa_events.push_back( "RESOURCE_STALLS:ANY" );
342  nhm_caa_events.push_back( "SSEX_UOPS_RETIRED:PACKED_DOUBLE" );
343  nhm_caa_events.push_back( "SSEX_UOPS_RETIRED:PACKED_SINGLE" );
344  nhm_caa_events.push_back( "UOPS_DECODED:MS CMASK=1" );
345  nhm_caa_events.push_back( "UOPS_ISSUED:ANY CMASK=1 INV=1" );
346  nhm_caa_events.push_back( "ITLB_MISS_RETIRED" );
347  nhm_caa_events.push_back( "UOPS_RETIRED:ANY" );
348 }

◆ init_nhm_caa_events_displ()

void init_nhm_caa_events_displ ( )

Definition at line 500 of file pfm_gen_analysis.cpp.

500  {
501  nhm_caa_events_displ.push_back( "Total Cycles" );
502  nhm_caa_events_displ.push_back( "Instructions Retired" );
503  nhm_caa_events_displ.push_back( "CPI" );
504  nhm_caa_events_displ.push_back( "" );
505  nhm_caa_events_displ.push_back( "iMargin" );
506  nhm_caa_events_displ.push_back( "iFactor" );
507  nhm_caa_events_displ.push_back( "" );
508  nhm_caa_events_displ.push_back( "Stalled Cycles" );
509  nhm_caa_events_displ.push_back( "% of Total Cycles" );
510  nhm_caa_events_displ.push_back( "Total Counted Stalled Cycles" );
511  nhm_caa_events_displ.push_back( "" );
512  nhm_caa_events_displ.push_back( "Instruction Starvation % of Total Cycles" );
513  nhm_caa_events_displ.push_back( "# of Instructions per Call" );
514  nhm_caa_events_displ.push_back( "% of Total Cycles spent handling FP exceptions" );
515  nhm_caa_events_displ.push_back( "" );
516  nhm_caa_events_displ.push_back( "Counted Stalled Cycles due to Load Ops" );
517  nhm_caa_events_displ.push_back( "" );
518  nhm_caa_events_displ.push_back( "L2 Hit Impact" );
519  nhm_caa_events_displ.push_back( "L2 Hit % of Load Stalls" );
520  nhm_caa_events_displ.push_back( "" );
521  nhm_caa_events_displ.push_back( "L3 Unshared Hit Impact" );
522  nhm_caa_events_displ.push_back( "L3 Unshared Hit % of Load Stalls" );
523  nhm_caa_events_displ.push_back( "" );
524  nhm_caa_events_displ.push_back( "L2 Other Core Hit Impact" );
525  nhm_caa_events_displ.push_back( "L2 Other Core Hit % of Load Stalls" );
526  nhm_caa_events_displ.push_back( "" );
527  nhm_caa_events_displ.push_back( "L2 Other Core Hit Modified Impact" );
528  nhm_caa_events_displ.push_back( "L2 Other Core Hit Modified % of Load Stalls" );
529  nhm_caa_events_displ.push_back( "" );
530  nhm_caa_events_displ.push_back( "L3 Miss -> Local DRAM Hit Impact" );
531  nhm_caa_events_displ.push_back( "L3 Miss -> Remote DRAM Hit Impact" );
532  nhm_caa_events_displ.push_back( "L3 Miss -> Remote Cache Hit Impact" );
533  nhm_caa_events_displ.push_back( "L3 Miss -> Total Impact" );
534  nhm_caa_events_displ.push_back( "L3 Miss % of Load Stalls" );
535  nhm_caa_events_displ.push_back( "" );
536  nhm_caa_events_displ.push_back( "L1 DTLB Miss Impact" );
537  nhm_caa_events_displ.push_back( "L1 DTLB Miss % of Load Stalls" );
538  nhm_caa_events_displ.push_back( "" );
539  nhm_caa_events_displ.push_back( "Cycles spent during DIV & SQRT Ops" );
540  nhm_caa_events_displ.push_back( "DIV & SQRT Ops % of counted Stalled Cycles" );
541  nhm_caa_events_displ.push_back( "" );
542  nhm_caa_events_displ.push_back( "Total L2 IFETCH misses" );
543  nhm_caa_events_displ.push_back( "% of L2 IFETCH misses" );
544  nhm_caa_events_displ.push_back( "" );
545  nhm_caa_events_displ.push_back( "% of IFETCHes served by Local DRAM" );
546  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (Modified)" );
547  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (Clean Snoop)" );
548  nhm_caa_events_displ.push_back( "% of IFETCHes served by Remote L2" );
549  nhm_caa_events_displ.push_back( "% of IFETCHes served by Remote DRAM" );
550  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (No Snoop)" );
551  nhm_caa_events_displ.push_back( "" );
552  nhm_caa_events_displ.push_back( "Total L2 IFETCH miss Impact" );
553  nhm_caa_events_displ.push_back( "" );
554  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Local DRAM" );
555  nhm_caa_events_displ.push_back( "Local DRAM IFECTHes % Impact" );
556  nhm_caa_events_displ.push_back( "" );
557  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (Modified)" );
558  nhm_caa_events_displ.push_back( "L3 (Modified) IFECTHes % Impact" );
559  nhm_caa_events_displ.push_back( "" );
560  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (Clean Snoop)" );
561  nhm_caa_events_displ.push_back( "L3 (Clean Snoop) IFECTHes % Impact" );
562  nhm_caa_events_displ.push_back( "" );
563  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Remote L2" );
564  nhm_caa_events_displ.push_back( "Remote L2 IFECTHes % Impact" );
565  nhm_caa_events_displ.push_back( "" );
566  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Remote DRAM" );
567  nhm_caa_events_displ.push_back( "Remote DRAM IFECTHes % Impact" );
568  nhm_caa_events_displ.push_back( "" );
569  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (No Snoop)" );
570  nhm_caa_events_displ.push_back( "L3 (No Snoop) IFECTHes % Impact" );
571  nhm_caa_events_displ.push_back( "" );
572  nhm_caa_events_displ.push_back( "Total Branch Instructions Executed" );
573  nhm_caa_events_displ.push_back( "% of Mispredicted Branches" );
574  nhm_caa_events_displ.push_back( "" );
575  nhm_caa_events_displ.push_back( "Direct Near Calls % of Total Branches Executed" );
576  nhm_caa_events_displ.push_back( "Indirect Near Calls % of Total Branches Executed" );
577  nhm_caa_events_displ.push_back( "Indirect Near Non-Calls % of Total Branches Executed" );
578  nhm_caa_events_displ.push_back( "All Near Calls % of Total Branches Executed" );
579  nhm_caa_events_displ.push_back( "All Non Calls % of Total Branches Executed" );
580  nhm_caa_events_displ.push_back( "All Returns % of Total Branches Executed" );
581  nhm_caa_events_displ.push_back( "" );
582  nhm_caa_events_displ.push_back( "Total Branch Instructions Retired" );
583  nhm_caa_events_displ.push_back( "Conditionals % of Total Branches Retired" );
584  nhm_caa_events_displ.push_back( "Near Calls % of Total Branches Retired" );
585  nhm_caa_events_displ.push_back( "" );
586  nhm_caa_events_displ.push_back( "L1 ITLB Miss Impact" );
587  nhm_caa_events_displ.push_back( "ITLB Miss Rate in %" );
588  nhm_caa_events_displ.push_back( "" );
589  nhm_caa_events_displ.push_back( "Branch Instructions" );
590  nhm_caa_events_displ.push_back( "Branch % of all Instructions" );
591  nhm_caa_events_displ.push_back( "" );
592  nhm_caa_events_displ.push_back( "Load Instructions" );
593  nhm_caa_events_displ.push_back( "Load % of all Instructions" );
594  nhm_caa_events_displ.push_back( "" );
595  nhm_caa_events_displ.push_back( "Store Instructions" );
596  nhm_caa_events_displ.push_back( "Store % of all Instructions" );
597  nhm_caa_events_displ.push_back( "" );
598  nhm_caa_events_displ.push_back( "Other Instructions" );
599  nhm_caa_events_displ.push_back( "Other % of all Instructions" );
600  nhm_caa_events_displ.push_back( "" );
601  nhm_caa_events_displ.push_back( "Packed UOPS Retired" );
602  nhm_caa_events_displ.push_back( "Packed % of all UOPS Retired" );
603 }

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 1608 of file pfm_gen_analysis.cpp.

1608  {
1609  if ( argc < 2 || argc > 4 ) {
1610  printf( "\n\nUsage: %s DIRECTORY [--caa] [--csv]\n\n", argv[0] );
1611  exit( 1 );
1612  }
1613 
1614  bool caa = false;
1615  bool csv = false;
1616  for ( int i = 2; i < argc; i++ ) {
1617  if ( !strcmp( argv[i], "--caa" ) ) caa = true;
1618  if ( !strcmp( argv[i], "--csv" ) ) csv = true;
1619  }
1620 
1621  char dir[MAX_FILENAME_LENGTH];
1622  strcpy( dir, argv[1] );
1623  if ( !csv ) {
1624  strcat( dir, "/HTML" );
1625  int res = mkdir( dir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH );
1626  if ( res != 0 ) {
1627  fprintf( stderr, "ERROR: Cannot create directory %s\naborting...\n", dir );
1628  exit( 1 );
1629  }
1630  }
1631 
1632  DIR* dp;
1633  struct dirent* dirp;
1634  int num_of_modules = 0;
1635  if ( ( dp = opendir( argv[1] ) ) == NULL ) {
1636  printf( "Error(%d) opening %s\n", errno, argv[1] );
1637  return errno;
1638  }
1639  while ( ( dirp = readdir( dp ) ) != NULL ) {
1640  if ( strstr( dirp->d_name, "_S_" ) != NULL && strstr( dirp->d_name, ".txt.gz" ) != NULL && !csv ) {
1641  if ( read_S_events( argv[1], dirp->d_name ) ) {
1642  fprintf( stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1643  exit( 1 );
1644  }
1645  }
1646  }
1647  closedir( dp );
1648  sort( S_events.begin(), S_events.end() );
1649  if ( ( dp = opendir( argv[1] ) ) == NULL ) {
1650  printf( "Error(%d) opening %s\n", errno, argv[1] );
1651  return errno;
1652  }
1653  while ( ( dirp = readdir( dp ) ) != NULL ) {
1654  if ( strstr( dirp->d_name, "_S_" ) != NULL && strstr( dirp->d_name, ".txt.gz" ) != NULL && !csv ) {
1655  if ( read_S_file( argv[1], dirp->d_name ) ) {
1656  fprintf( stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1657  exit( 1 );
1658  }
1659  } else if ( strstr( dirp->d_name, "_C_" ) != NULL && strstr( dirp->d_name, ".txt" ) != NULL ) {
1660  int res = read_C_file( argv[1], dirp->d_name );
1661  if ( res > num_of_modules ) { num_of_modules = res; }
1662  }
1663  }
1664  closedir( dp );
1665 
1666  if ( !csv ) {
1667  if ( finalize_S_html_pages( argv[1] ) ) {
1668  fprintf( stderr, "ERROR: Cannot finalize HTML pages!!!\naborting...\n" );
1669  exit( 1 );
1670  }
1671  }
1672 
1673  char filepath[MAX_FILENAME_LENGTH];
1674  bzero( filepath, MAX_FILENAME_LENGTH );
1675  if ( !csv )
1676  sprintf( filepath, "%s/HTML/index.html", argv[1] );
1677  else
1678  sprintf( filepath, "%s/results.csv", argv[1] );
1679  FILE* fp = fopen( filepath, "w" );
1680  if ( fp == NULL ) {
1681  fprintf( stderr, "ERROR: Cannot create file index.html!!!\naborting...\n" );
1682  exit( 1 );
1683  }
1684 
1685  if ( caa ) {
1686  double totalCycles;
1687  if ( !nehalem ) {
1689  if ( !check_for_core_caa_events() ) {
1690  fprintf( stderr, "(core) ERROR: One or more events for CAA missing!\naborting...\n" );
1691  exit( 1 );
1692  }
1694  totalCycles = getTotalCycles();
1695  calc_core_deriv_values( totalCycles );
1697  if ( !csv ) {
1698  put_C_header( fp, core_caa_events_displ );
1699  put_C_modules( fp, core_caa_events_displ );
1700  } else {
1701  put_C_header_csv( fp, core_caa_events_displ );
1702  put_C_modules_csv( fp, core_caa_events_displ );
1703  }
1704  } else {
1706  if ( !check_for_nhm_caa_events() ) {
1707  fprintf( stderr, "(nehalem) ERROR: One or more events for CAA missing!\naborting...\n" );
1708  exit( 1 );
1709  }
1711  totalCycles = getTotalCycles();
1712  calc_nhm_deriv_values( totalCycles );
1714  if ( !csv ) {
1715  put_C_header( fp, nhm_caa_events_displ );
1716  put_C_modules( fp, nhm_caa_events_displ );
1717  } else {
1718  put_C_header_csv( fp, nhm_caa_events_displ );
1719  put_C_modules_csv( fp, nhm_caa_events_displ );
1720  }
1721  }
1722  if ( !csv ) put_C_footer( fp );
1723  fclose( fp );
1724  } else {
1725  if ( !csv ) {
1726  put_C_header( fp, C_events );
1727  put_C_modules( fp, C_events );
1728  put_C_footer( fp );
1729  } else {
1730  put_C_header_csv( fp, C_events );
1731  put_C_modules_csv( fp, C_events );
1732  }
1733  fclose( fp );
1734  }
1735  if ( !csv ) {
1736  char src[MAX_FILENAME_LENGTH];
1737  char dst[MAX_FILENAME_LENGTH];
1738  sprintf( src, "sorttable.js" );
1739  sprintf( dst, "%s/HTML/sorttable.js", argv[1] );
1740  int fd_src = open( src, O_RDONLY );
1741  if ( fd_src == -1 ) {
1742  fprintf( stderr, "ERROR: Cannot open file \"%s\"!\naborting...\n", src );
1743  exit( 1 );
1744  }
1745  int fd_dst = open( dst, O_WRONLY | O_CREAT | O_TRUNC, 0644 );
1746  if ( fd_dst == -1 ) {
1747  fprintf( stderr, "ERROR: Cannot open file \"%s\" (%s)!\naborting...\n", dst, strerror( errno ) );
1748  exit( 1 );
1749  }
1750  char c;
1751  while ( read( fd_src, &c, 1 ) ) {
1752  if ( write( fd_dst, &c, 1 ) == -1 ) {
1753  std::cerr << "ERROR: failed to write to " << dst << std::endl;
1754  exit( 1 );
1755  }
1756  }
1757  close( fd_dst );
1758  close( fd_src );
1759  }
1760  return 0;
1761 }

◆ normalize()

double normalize ( std::string  field,
double  value,
double  normalizeTo 
)

Definition at line 1544 of file pfm_gen_analysis.cpp.

1544  {
1545  double max = 0;
1546  double counter_value;
1547  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1548  ++it ) {
1549  counter_value = ( it->second )[field];
1550  if ( max < counter_value ) max = counter_value;
1551  }
1552  if ( value > 0 && max > 0 && normalizeTo > 0 ) {
1553  return 1. * value / max * normalizeTo;
1554  } else
1555  return 0;
1556 }

◆ put_C_footer()

void put_C_footer ( FILE *  fp)

Definition at line 1503 of file pfm_gen_analysis.cpp.

1503  {
1504  fprintf( fp, "</table>\n</body>\n</html>\n" );
1505  return;
1506 }

◆ put_C_header()

void put_C_header ( FILE *  fp,
std::vector< std::string > &  columns 
)

Definition at line 1447 of file pfm_gen_analysis.cpp.

1447  {
1448  fprintf(
1449  fp,
1450  "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1451  fprintf( fp, "<html>\n" );
1452  fprintf( fp, "<head>\n" );
1453  fprintf( fp, "<title>\n" );
1454  fprintf( fp, "Analysis Result\n" );
1455  fprintf( fp, "</title>\n" );
1456  fprintf( fp, "<script src=\"sorttable.js\"></script>\n" );
1457  fprintf( fp, "<style>\ntable.sortable thead "
1458  "{\nbackground-color:#eee;\ncolor:#666666;\nfont-weight:bold;\ncursor:default;\nfont-family:courier;\n}"
1459  "\n</style>\n" );
1460  fprintf( fp, "</head>\n" );
1461  fprintf( fp, "<body link=\"black\">\n" );
1462  fprintf( fp, "<h1>RESULTS:</h1><br/>Click for detailed symbol view...<p/>\n" );
1463  fprintf( fp, "<table class=\"sortable\" cellpadding=\"5\">\n" );
1464  fprintf( fp, "<tr>\n" );
1465  fprintf( fp, "<th>MODULE NAME</th>\n" );
1466  for ( std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it ) {
1467  if ( strlen( it->c_str() ) == 0 )
1468  fprintf( fp, "<th bgcolor=\"#FFFFFF\">&nbsp;</th>\n" );
1469  else
1470  fprintf( fp, "<th>%s</th>\n", ( *it ).c_str() );
1471  }
1472  fprintf( fp, "</tr>\n" );
1473  return;
1474 }

◆ put_C_header_csv()

void put_C_header_csv ( FILE *  fp,
std::vector< std::string > &  columns 
)

Definition at line 1508 of file pfm_gen_analysis.cpp.

1508  {
1509  fprintf( fp, "MODULE NAME" );
1510  for ( std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it ) {
1511  if ( strlen( it->c_str() ) == 0 ) {
1512  } else
1513  fprintf( fp, ",%s", ( *it ).c_str() );
1514  }
1515  fprintf( fp, "\n" );
1516  return;
1517 }

◆ put_C_modules()

void put_C_modules ( FILE *  fp,
std::vector< std::string > &  columns 
)

Definition at line 1476 of file pfm_gen_analysis.cpp.

1476  {
1477  int index = 0;
1478  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1479  ++it ) {
1480  if ( index % 2 )
1481  fprintf( fp, "<tr bgcolor=\"#FFFFCC\">\n" );
1482  else
1483  fprintf( fp, "<tr bgcolor=\"#CCFFCC\">\n" );
1484  fprintf( fp, "<td style=\"font-family:monospace;font-size:large;color:Black\"><a href=\"%s.html\">%s</a></td>\n",
1485  ( it->first ).c_str(), ( it->first ).c_str() );
1486  for ( std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt ) {
1487  if ( strlen( jt->c_str() ) == 0 ) {
1488  fprintf( fp, "<td bgcolor=\"#FFFFFF\">&nbsp;</td>" );
1489  } else {
1490  if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1491  fprintf( stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1492  exit( 1 );
1493  }
1494  fprintf( fp, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\" align=\"right\">%.2f</td>\n",
1495  ( it->second )[*jt] );
1496  }
1497  }
1498  fprintf( fp, "</tr>\n" );
1499  index++;
1500  }
1501 }

◆ put_C_modules_csv()

void put_C_modules_csv ( FILE *  fp,
std::vector< std::string > &  columns 
)

Definition at line 1519 of file pfm_gen_analysis.cpp.

1519  {
1520  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1521  ++it ) {
1522  fprintf( fp, "%s", ( it->first ).c_str() );
1523  for ( std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt ) {
1524  if ( strlen( jt->c_str() ) == 0 ) {
1525  } else {
1526  if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1527  fprintf( stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1528  exit( 1 );
1529  }
1530  fprintf( fp, ",%.2f", ( it->second )[*jt] );
1531  }
1532  }
1533  fprintf( fp, "\n" );
1534  }
1535 }

◆ put_S_module()

void put_S_module ( S_module cur_module,
const char *  dir 
)

Definition at line 1044 of file pfm_gen_analysis.cpp.

1044  {
1045  char module_name[MAX_MODULE_NAME_LENGTH];
1046  bzero( module_name, MAX_MODULE_NAME_LENGTH );
1047  strcpy( module_name, ( cur_module->get_module_name() ).c_str() );
1048  char module_filename[MAX_FILENAME_LENGTH];
1049  bzero( module_filename, MAX_FILENAME_LENGTH );
1050  strcpy( module_filename, dir );
1051  strcat( module_filename, "/HTML/" );
1052  strcat( module_filename, module_name );
1053  strcat( module_filename, ".html" );
1054  char event[MAX_EVENT_NAME_LENGTH];
1055  bzero( event, MAX_EVENT_NAME_LENGTH );
1056  strcpy( event, ( cur_module->get_event() ).c_str() );
1057  std::map<std::string, unsigned int>::iterator result = modules_tot_samples.find( cur_module->get_module_name() );
1058  FILE* module_file;
1059  if ( result == modules_tot_samples.end() ) // not found
1060  {
1061  if ( ( !strcmp( event, "UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1062  ( !strcmp( event, "CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1063  modules_tot_samples.insert(
1065  } else {
1066  modules_tot_samples.insert( std::pair<std::string, unsigned int>( cur_module->get_module_name(), 0 ) );
1067  }
1068  module_file = fopen( module_filename, "w" );
1069  if ( module_file == NULL ) {
1070  fprintf( stderr, "ERROR: Cannot create file %s!!!\naborting...\n", module_filename );
1071  exit( 1 );
1072  }
1073  fprintf( module_file, "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
1074  "\"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1075  fprintf( module_file, "<html>\n" );
1076  fprintf( module_file, "<head>\n" );
1077  fprintf( module_file, "<title>\n" );
1078  fprintf( module_file, "%s\n", module_name );
1079  fprintf( module_file, "</title>\n" );
1080  fprintf( module_file, "</head>\n" );
1081  fprintf( module_file, "<body>\n" );
1082  fprintf( module_file, "<h2>%s</h2><br/>Events Sampled:<br/>\n", module_name );
1083  fprintf( module_file, "<ul>\n" );
1084  for ( std::vector<std::string>::const_iterator it = S_events.begin(); it != S_events.end(); ++it ) {
1085  fprintf( module_file, "<li><a href=\"#%s\">%s</a></li>\n", it->c_str(), it->c_str() );
1086  }
1087  fprintf( module_file, "</ul>\n" );
1088  } // if(result == modules_tot_samples.end()) //not found
1089  else {
1090  if ( ( !strcmp( event, "UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1091  ( !strcmp( event, "CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1092  modules_tot_samples[cur_module->get_module_name()] = cur_module->get_total_num_samples();
1093  }
1094  module_file = fopen( module_filename, "a" );
1095  } // else:: if(result != modules_tot_samples.end()) //found!!
1096  char event_str[MAX_EVENT_NAME_LENGTH];
1097  bzero( event_str, MAX_EVENT_NAME_LENGTH );
1098  strcpy( event_str, event );
1099  if ( cur_module->get_c_mask() > 0 ) {
1100  sprintf( event_str + strlen( event_str ), " CMASK=%u", cur_module->get_c_mask() );
1101  }
1102  if ( cur_module->get_inv_mask() > 0 ) {
1103  sprintf( event_str + strlen( event_str ), " INV=%u", cur_module->get_inv_mask() );
1104  }
1105  fprintf( module_file, "<a name=\"%s\"><a>\n", event_str );
1106  fprintf( module_file, "<table cellpadding=\"5\">\n" );
1107  fprintf( module_file, "<tr bgcolor=\"#EEEEEE\">\n" );
1108  fprintf( module_file,
1109  "<th colspan=\"6\" align=\"left\">%s -- cmask: %u -- invmask: %u -- Total Samples: %u -- "
1110  "Sampling Period: %u</th>\n",
1111  event, cur_module->get_c_mask(), cur_module->get_inv_mask(), cur_module->get_total_num_samples(),
1112  cur_module->get_smpl_period() );
1113  fprintf( module_file, "</tr>\n" );
1114  fprintf( module_file, "<tr bgcolor=\"#EEEEEE\">\n" );
1115  fprintf( module_file, "<th align=\"left\">Samples</th>\n" );
1116  fprintf( module_file, "<th align=\"left\">Percentage</th>\n" );
1117  fprintf( module_file, "<th align=\"left\">Symbol Name</th>\n" );
1118  fprintf( module_file, "<th align=\"left\">Library Name</th>\n" );
1119  fprintf( module_file, "<th align=\"left\">Complete Signature</th>\n" );
1120  fprintf( module_file, "<th align=\"left\">Library Pathname</th>\n" );
1121  fprintf( module_file, "</tr>\n" );
1122  for ( int j = 0; j < 20; j++ ) {
1123  char sym[MAX_SYM_LENGTH];
1124  char sym_mod[MAX_SYM_MOD_LENGTH];
1125  char lib[MAX_LIB_LENGTH];
1126  char lib_mod[MAX_LIB_MOD_LENGTH];
1127  char simple_sym[MAX_SIMPLE_SYM_LENGTH];
1128  char simple_sym_mod[MAX_SIMPLE_SYM_MOD_LENGTH];
1129  char simple_lib[MAX_SIMPLE_LIB_LENGTH];
1130  char simple_lib_mod[MAX_SIMPLE_LIB_MOD_LENGTH];
1131 
1132  bzero( sym, MAX_SYM_LENGTH );
1133  bzero( sym_mod, MAX_SYM_MOD_LENGTH );
1134  bzero( lib, MAX_LIB_LENGTH );
1135  bzero( lib_mod, MAX_LIB_MOD_LENGTH );
1136  bzero( simple_sym, MAX_SIMPLE_SYM_LENGTH );
1137  bzero( simple_sym_mod, MAX_SIMPLE_SYM_MOD_LENGTH );
1138  bzero( simple_lib, MAX_SIMPLE_LIB_LENGTH );
1139  bzero( simple_lib_mod, MAX_SIMPLE_LIB_MOD_LENGTH );
1140 
1142  bzero( index, MAX_SAMPLE_INDEX_LENGTH );
1143  unsigned int value;
1144  bool res = cur_module->get_max( index, value );
1145  if ( !res ) break;
1146  char* sym_end = strchr( index, '%' );
1147  if ( sym_end == NULL ) // error
1148  {
1149  fprintf( stderr, "ERROR: Invalid sym and lib name! : %s\naborting...\n", index );
1150  exit( 1 );
1151  }
1152  memcpy( sym, index, strlen( index ) - strlen( sym_end ) );
1153  strcpy( lib, sym_end + 1 );
1154  char temp[MAX_SYM_LENGTH];
1155  bzero( temp, MAX_SYM_LENGTH );
1156  strcpy( temp, sym );
1157  strcpy( simple_sym, ( func_name( temp ) ) );
1158  if ( strrchr( lib, '/' ) != NULL && *( strrchr( lib, '/' ) + 1 ) != '\0' ) {
1159  strcpy( simple_lib, strrchr( lib, '/' ) + 1 );
1160  } else {
1161  strcpy( simple_lib, lib );
1162  }
1163  if ( j % 2 != 0 ) {
1164  fprintf( module_file, "<tr bgcolor=\"#FFFFCC\">\n" );
1165  } else {
1166  fprintf( module_file, "<tr bgcolor=\"#CCFFCC\">\n" );
1167  }
1168  fprintf( module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%u</td>\n", value );
1169  fprintf( module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%f%%</td>\n",
1170  ( ( (double)( value ) ) / ( (double)( cur_module->get_total_num_samples() ) ) ) * 100 );
1171  html_special_chars( simple_sym, simple_sym_mod );
1172  html_special_chars( simple_lib, simple_lib_mod );
1173  html_special_chars( sym, sym_mod );
1174  html_special_chars( lib, lib_mod );
1175  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_sym_mod );
1176  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_lib_mod );
1177  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", sym_mod );
1178  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n</tr>\n", lib_mod );
1179  }
1180  fprintf( module_file, "</table><br/><br/>\n" );
1181  int res = fclose( module_file );
1182  if ( res ) {
1183  fprintf( stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1184  exit( 1 );
1185  }
1186  return;
1187 }

◆ read_C_file()

int read_C_file ( const char *  dir,
const char *  filename 
)

Definition at line 1382 of file pfm_gen_analysis.cpp.

1382  {
1383  char event[MAX_EVENT_NAME_LENGTH];
1384  char arch[MAX_ARCH_NAME_LENGTH];
1385  char line[MAX_LINE_LENGTH];
1386  char cmask_str[MAX_CMASK_STR_LENGTH];
1387  char inv_str[MAX_INV_STR_LENGTH];
1388  char sp_str[MAX_SP_STR_LENGTH];
1389  char cur_module_name[MAX_MODULE_NAME_LENGTH];
1390  bzero( line, MAX_LINE_LENGTH );
1391  bzero( event, MAX_EVENT_NAME_LENGTH );
1392  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1393  bzero( arch, MAX_ARCH_NAME_LENGTH );
1394  bzero( line, MAX_LINE_LENGTH );
1395  bzero( cmask_str, MAX_CMASK_STR_LENGTH );
1396  bzero( inv_str, MAX_INV_STR_LENGTH );
1397  bzero( sp_str, MAX_SP_STR_LENGTH );
1398  int number_of_modules = 0;
1399  long cur_sum = 0;
1400  int no_of_values = 0;
1401  char path_name[MAX_FILENAME_LENGTH];
1402  bzero( path_name, MAX_FILENAME_LENGTH );
1403  strcpy( path_name, dir );
1404  strcat( path_name, "/" );
1405  strcat( path_name, filename );
1406  FILE* fp = fopen( path_name, "r" );
1407  int stat = fscanf( fp, "%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str );
1408  if ( stat != 5 ) {
1409  std::cerr << "ERROR: failed to parse " << path_name << std::endl;
1410  exit( 1 );
1411  }
1412  if ( !strcmp( arch, "NHM" ) )
1413  nehalem = true;
1414  else
1415  nehalem = false;
1416  std::string event_str( event );
1417  if ( atoi( cmask_str ) > 0 ) {
1418  event_str += " CMASK=";
1419  event_str += cmask_str;
1420  }
1421  if ( atoi( inv_str ) > 0 ) {
1422  event_str += " INV=";
1423  event_str += inv_str;
1424  }
1425  C_events.push_back( event_str );
1426  while ( fscanf( fp, "%s\n", line ) != EOF ) {
1427  if ( isalpha( line[0] ) ) // module
1428  {
1429  if ( number_of_modules > 0 ) {
1430  C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values;
1431  cur_sum = 0;
1432  no_of_values = 0;
1433  }
1434  strcpy( cur_module_name, line );
1435  number_of_modules++;
1436  } else if ( isdigit( line[0] ) ) // value
1437  {
1438  cur_sum += strtol( line, NULL, 10 );
1439  no_of_values++;
1440  }
1441  }
1442  C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values; // last module
1443  fclose( fp );
1444  return number_of_modules;
1445 }

◆ read_S_events()

int read_S_events ( const char *  dir,
const char *  filename 
)

Definition at line 1309 of file pfm_gen_analysis.cpp.

1309  {
1310  char event[MAX_EVENT_NAME_LENGTH];
1311  char arch[MAX_ARCH_NAME_LENGTH];
1312  char line[MAX_LINE_LENGTH];
1313  char cmask_str[MAX_CMASK_STR_LENGTH];
1314  char inv_str[MAX_INV_STR_LENGTH];
1315  char sp_str[MAX_SP_STR_LENGTH];
1316  bzero( line, MAX_LINE_LENGTH );
1317  bzero( event, MAX_EVENT_NAME_LENGTH );
1318  bzero( arch, MAX_ARCH_NAME_LENGTH );
1319  bzero( cmask_str, MAX_CMASK_STR_LENGTH );
1320  bzero( inv_str, MAX_INV_STR_LENGTH );
1321  bzero( sp_str, MAX_SP_STR_LENGTH );
1322  char path_name[MAX_FILENAME_LENGTH];
1323  bzero( path_name, MAX_FILENAME_LENGTH );
1324  strcpy( path_name, dir );
1325  strcat( path_name, "/" );
1326  strcat( path_name, filename );
1327  gzFile res_file = gzopen( path_name, "rb" );
1328  if ( res_file != NULL ) {
1329  bzero( line, MAX_LINE_LENGTH );
1330  gzgets( res_file, line, MAX_LINE_LENGTH );
1331  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1332  bzero( event, MAX_EVENT_NAME_LENGTH );
1333  sscanf( line, "%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str );
1334  std::string event_str( event );
1335  if ( atoi( cmask_str ) > 0 ) {
1336  event_str += " CMASK=";
1337  event_str += cmask_str;
1338  }
1339  if ( atoi( inv_str ) > 0 ) {
1340  event_str += " INV=";
1341  event_str += inv_str;
1342  }
1343  S_events.push_back( event_str );
1344  } // if(res_file != NULL)
1345  else {
1346  fprintf( stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename );
1347  exit( 1 );
1348  }
1349  gzclose( res_file );
1350  return 0;
1351 }

◆ read_S_file()

int read_S_file ( const char *  dir,
const char *  filename 
)

Definition at line 1196 of file pfm_gen_analysis.cpp.

1196  {
1197  char line[MAX_LINE_LENGTH];
1198  char event[MAX_EVENT_NAME_LENGTH];
1199  char arch[MAX_ARCH_NAME_LENGTH];
1200  unsigned int cmask;
1201  unsigned int inv;
1202  unsigned int sp;
1203  char cur_module_name[MAX_MODULE_NAME_LENGTH];
1204  bzero( line, MAX_LINE_LENGTH );
1205  bzero( event, MAX_EVENT_NAME_LENGTH );
1206  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1207  bzero( arch, MAX_ARCH_NAME_LENGTH );
1208 
1209  S_module* cur_module = new S_module();
1210  unsigned int module_num = 0;
1211 
1212  char path_name[MAX_FILENAME_LENGTH];
1213  bzero( path_name, MAX_FILENAME_LENGTH );
1214  strcpy( path_name, dir );
1215  strcat( path_name, "/" );
1216  strcat( path_name, filename );
1217  gzFile res_file = gzopen( path_name, "rb" );
1218 
1219  if ( res_file != NULL ) {
1220  bzero( line, MAX_LINE_LENGTH );
1221  gzgets( res_file, line, MAX_LINE_LENGTH );
1222  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1223  bzero( event, MAX_EVENT_NAME_LENGTH );
1224  sscanf( line, "%s %s %u %u %u", arch, event, &cmask, &inv, &sp );
1225  if ( !strcmp( arch, "NHM" ) )
1226  nehalem = true;
1227  else
1228  nehalem = false;
1229  bzero( line, MAX_LINE_LENGTH );
1230  while ( gzgets( res_file, line, MAX_LINE_LENGTH ) != Z_NULL ) {
1231  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1232  if ( strchr( line, ' ' ) == NULL ) // module
1233  {
1234  if ( module_num > 0 ) {
1235  put_S_module( cur_module, dir );
1236  cur_module->clear();
1237  }
1238  module_num++;
1239  char* end_sym = strchr( line, '%' );
1240  if ( end_sym == NULL ) // error
1241  {
1242  fprintf( stderr, "ERROR: Invalid module name. \nLINE: %s\naborting...\n", line );
1243  exit( 1 );
1244  }
1245  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1246  memcpy( cur_module_name, line, strlen( line ) - strlen( end_sym ) );
1247  cur_module->init( cur_module_name, arch, event, cmask, inv, sp );
1248  cur_module->set_total( atoi( end_sym + 1 ) );
1249  } // module
1250  else // symbol, libName, libOffset, value
1251  {
1252  unsigned int value = 0, libOffset = 0;
1253  char symbol[MAX_SYM_LENGTH];
1254  char libName[MAX_LIB_LENGTH];
1255  char final_sym[MAX_SYM_MOD_LENGTH];
1256  char final_lib[MAX_LIB_MOD_LENGTH];
1257  bzero( symbol, MAX_SYM_LENGTH );
1258  bzero( libName, MAX_LIB_LENGTH );
1259  bzero( final_sym, MAX_SYM_MOD_LENGTH );
1260  bzero( final_lib, MAX_LIB_MOD_LENGTH );
1261 
1262  sscanf( line, "%s %s %u %u", symbol, libName, &libOffset, &value );
1263  char realPathName_s[FILENAME_MAX];
1264  bzero( realPathName_s, FILENAME_MAX );
1265  char* realPathName = realpath( libName, realPathName_s );
1266  if ( realPathName != NULL && strlen( realPathName ) > 0 ) {
1268  result = libsInfo.find( realPathName );
1269  if ( result == libsInfo.end() ) { libsInfo[realPathName] = FileInfo( realPathName, true ); }
1270  const char* temp_sym = libsInfo[realPathName].symbolByOffset( libOffset );
1271  if ( temp_sym != NULL && strlen( temp_sym ) > 0 ) {
1272  int status;
1273  char* demangled_symbol = abi::__cxa_demangle( temp_sym, NULL, NULL, &status );
1274  if ( status == 0 ) {
1275  strcpy( final_sym, demangled_symbol );
1276  free( demangled_symbol );
1277  } else {
1278  strcpy( final_sym, temp_sym );
1279  }
1280  } else {
1281  strcpy( final_sym, "???" );
1282  }
1283  strcpy( final_lib, realPathName );
1284  } else {
1285  strcpy( final_sym, symbol );
1286  strcpy( final_lib, libName );
1287  }
1288  char index[MAX_LINE_LENGTH];
1289  bzero( index, MAX_LINE_LENGTH );
1290  strcpy( index, final_sym );
1291  strcat( index, "%" );
1292  strcat( index, final_lib );
1293  cur_module->add_sample( index, value );
1294  } // symbol, libName, libOffset, value
1295  bzero( line, MAX_LINE_LENGTH );
1296  } // while(gzgets(res_file, line, MAX_LINE_LENGTH)!=Z_NULL)
1297  put_S_module( cur_module, dir ); // last module!
1298  cur_module->clear();
1299  gzclose( res_file );
1300  } // if(res_file != NULL)
1301  else {
1302  fprintf( stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename );
1303  exit( 1 );
1304  }
1305  delete cur_module; // delete it!
1306  return 0;
1307 }

◆ skipString()

bool skipString ( const char *  strptr,
const char *  srcbuffer,
const char **  dstbuffer 
)

Definition at line 145 of file pfm_gen_analysis.cpp.

145  {
146  if ( strncmp( srcbuffer, strptr, strlen( strptr ) ) ) { return false; }
147  *dstbuffer = srcbuffer + strlen( strptr );
148  return true;
149 }

◆ skipWhitespaces()

bool skipWhitespaces ( const char *  srcbuffer,
const char **  destbuffer 
)

Definition at line 131 of file pfm_gen_analysis.cpp.

131  {
132  if ( !isspace( *srcbuffer++ ) ) { return false; }
133  while ( isspace( *srcbuffer ) ) { srcbuffer++; }
134  *destbuffer = srcbuffer;
135  return true;
136 }
std::strcpy
T strcpy(T... args)
MAX_CMASK_STR_LENGTH
#define MAX_CMASK_STR_LENGTH
Definition: pfm_gen_analysis.cpp:94
I7_L1_ITLB_WALK_COMPLETED_CYCLES
#define I7_L1_ITLB_WALK_COMPLETED_CYCLES
Definition: pfm_gen_analysis.cpp:60
std::strtol
T strtol(T... args)
init_core_caa_events
void init_core_caa_events()
Definition: pfm_gen_analysis.cpp:282
std::isdigit
T isdigit(T... args)
html_special_chars
void html_special_chars(const char *s, char *s_mod)
Definition: pfm_gen_analysis.cpp:836
std::strcmp
T strcmp(T... args)
I7_OTHER_CORE_L2_HITM_CYCLES
#define I7_OTHER_CORE_L2_HITM_CYCLES
Definition: pfm_gen_analysis.cpp:64
std::strrchr
T strrchr(T... args)
std::strlen
T strlen(T... args)
MAX_SYM_MOD_LENGTH
#define MAX_SYM_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:82
std::string
STL class.
put_C_modules
void put_C_modules(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1476
plotBacklogPyRoot.argc
argc
Definition: plotBacklogPyRoot.py:173
std::pair
init_nhm_caa_events
void init_nhm_caa_events()
Definition: pfm_gen_analysis.cpp:306
I7_IFETCH_L2_MISS_L3_HITM
#define I7_IFETCH_L2_MISS_L3_HITM
Definition: pfm_gen_analysis.cpp:72
gaudirun.s
string s
Definition: gaudirun.py:346
S_module::get_event
std::string get_event()
Definition: pfm_gen_analysis.cpp:814
std::vector
STL class.
std::find
T find(T... args)
MAX_LIB_MOD_LENGTH
#define MAX_LIB_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:84
finalize_S_html_pages
int finalize_S_html_pages(const char *dir)
Definition: pfm_gen_analysis.cpp:1356
CORE_UNKNOWN_ADDR_STORE_CYCLES
#define CORE_UNKNOWN_ADDR_STORE_CYCLES
Definition: pfm_gen_analysis.cpp:54
std::strcat
T strcat(T... args)
put_C_header
void put_C_header(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1447
CORE_L1_DTLB_MISS_CYCLES
#define CORE_L1_DTLB_MISS_CYCLES
Definition: pfm_gen_analysis.cpp:52
I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES
#define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:65
std::isalnum
T isalnum(T... args)
std::fscanf
T fscanf(T... args)
gaudirun.c
c
Definition: gaudirun.py:525
MAX_EVENT_NAME_LENGTH
#define MAX_EVENT_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:90
std::strerror
T strerror(T... args)
S_module::get_total_num_samples
unsigned int get_total_num_samples()
Definition: pfm_gen_analysis.cpp:829
std::sort
T sort(T... args)
I7_L2_HIT_CYCLES
#define I7_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:61
EXPECTED_CPI
#define EXPECTED_CPI
Definition: pfm_gen_analysis.cpp:77
std::vector::push_back
T push_back(T... args)
S_module::get_max
bool get_max(char *index, unsigned int &value)
Definition: pfm_gen_analysis.cpp:819
compareOutputFiles.sp
sp
Definition: compareOutputFiles.py:506
I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT
#define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT
Definition: pfm_gen_analysis.cpp:69
I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD
#define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD
Definition: pfm_gen_analysis.cpp:73
fixtures.stderr
Generator[bytes, None, None] stderr(subprocess.CompletedProcess completed_process)
Definition: fixtures.py:147
read_C_file
int read_C_file(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1382
S_module::clear
void clear()
Definition: pfm_gen_analysis.cpp:788
CORE_L2_HIT_CYCLES
#define CORE_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:51
read_S_file
int read_S_file(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1196
calc_nhm_deriv_values
void calc_nhm_deriv_values(double totalCycles)
Definition: pfm_gen_analysis.cpp:605
S_module::get_c_mask
unsigned int get_c_mask()
Definition: pfm_gen_analysis.cpp:812
std::strchr
T strchr(T... args)
read_S_events
int read_S_events(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1309
std::fprintf
T fprintf(T... args)
MAX_SIMPLE_LIB_MOD_LENGTH
#define MAX_SIMPLE_LIB_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:88
std::fclose
T fclose(T... args)
MAX_LIB_LENGTH
#define MAX_LIB_LENGTH
Definition: pfm_gen_analysis.cpp:83
I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP
#define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP
Definition: pfm_gen_analysis.cpp:70
MAX_SIMPLE_SYM_LENGTH
#define MAX_SIMPLE_SYM_LENGTH
Definition: pfm_gen_analysis.cpp:85
ProduceConsume.j
j
Definition: ProduceConsume.py:104
std::cerr
I7_L1_DTLB_WALK_COMPLETED_CYCLES
#define I7_L1_DTLB_WALK_COMPLETED_CYCLES
Definition: pfm_gen_analysis.cpp:59
std::isalpha
T isalpha(T... args)
S_module::get_module_name
std::string get_module_name()
Definition: pfm_gen_analysis.cpp:828
std::atoi
T atoi(T... args)
I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT
#define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT
Definition: pfm_gen_analysis.cpp:68
func_name
const char * func_name(const char *demangled_symbol)
Definition: pfm_gen_analysis.cpp:867
std::fopen
T fopen(T... args)
getTotalCycles
double getTotalCycles()
Definition: pfm_gen_analysis.cpp:1589
CORE_LCP_STALL_CYCLES
#define CORE_LCP_STALL_CYCLES
Definition: pfm_gen_analysis.cpp:53
check_for_nhm_caa_events
bool check_for_nhm_caa_events()
Definition: pfm_gen_analysis.cpp:360
CORE_OVERLAPPING_CYCLES
#define CORE_OVERLAPPING_CYCLES
Definition: pfm_gen_analysis.cpp:55
std::map
STL class.
S_module::get_inv_mask
unsigned int get_inv_mask()
Definition: pfm_gen_analysis.cpp:811
put_C_footer
void put_C_footer(FILE *fp)
Definition: pfm_gen_analysis.cpp:1503
put_C_modules_csv
void put_C_modules_csv(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1519
cpluginsvc.n
n
Definition: cpluginsvc.py:234
MAX_MODULE_NAME_LENGTH
#define MAX_MODULE_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:91
init_core_caa_events_displ
void init_core_caa_events_displ()
Definition: pfm_gen_analysis.cpp:370
S_module::add_sample
void add_sample(const char *index, unsigned int value)
Definition: pfm_gen_analysis.cpp:815
CORE_SPAN_ACROSS_CACHE_LINE_CYCLES
#define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES
Definition: pfm_gen_analysis.cpp:56
std::strstr
T strstr(T... args)
MAX_SYM_LENGTH
#define MAX_SYM_LENGTH
Definition: pfm_gen_analysis.cpp:81
hivetimeline.read
def read(f, regex=".*", skipevents=0)
Definition: hivetimeline.py:32
std::strncmp
T strncmp(T... args)
std::endl
T endl(T... args)
put_C_header_csv
void put_C_header_csv(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1508
S_module::get_smpl_period
unsigned int get_smpl_period()
Definition: pfm_gen_analysis.cpp:810
std::vector::begin
T begin(T... args)
std::map::insert
T insert(T... args)
I7_OTHER_CORE_L2_HIT_CYCLES
#define I7_OTHER_CORE_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:63
CORE_L2_MISS_CYCLES
#define CORE_L2_MISS_CYCLES
Definition: pfm_gen_analysis.cpp:50
MAX_SIMPLE_LIB_LENGTH
#define MAX_SIMPLE_LIB_LENGTH
Definition: pfm_gen_analysis.cpp:87
MAX_LINE_LENGTH
#define MAX_LINE_LENGTH
Definition: pfm_gen_analysis.cpp:89
FileInfo
Definition: pfm_gen_analysis.cpp:151
MAX_SAMPLE_INDEX_LENGTH
#define MAX_SAMPLE_INDEX_LENGTH
Definition: pfm_gen_analysis.cpp:80
I7_IFETCH_L2_MISS_L3_HIT_SNOOP
#define I7_IFETCH_L2_MISS_L3_HIT_SNOOP
Definition: pfm_gen_analysis.cpp:71
ReadAndWriteWhiteBoard.dst
dst
Definition: ReadAndWriteWhiteBoard.py:33
std::isspace
T isspace(T... args)
calc_post_deriv_values
void calc_post_deriv_values()
Definition: pfm_gen_analysis.cpp:1563
std::free
T free(T... args)
plotSpeedupsPyRoot.line
line
Definition: plotSpeedupsPyRoot.py:198
put_S_module
void put_S_module(S_module *cur_module, const char *dir)
Definition: pfm_gen_analysis.cpp:1044
std::memcpy
T memcpy(T... args)
MAX_INV_STR_LENGTH
#define MAX_INV_STR_LENGTH
Definition: pfm_gen_analysis.cpp:95
check_for_core_caa_events
bool check_for_core_caa_events()
Definition: pfm_gen_analysis.cpp:350
std::vector::end
T end(T... args)
normalize
double normalize(std::string field, double value, double normalizeTo)
Definition: pfm_gen_analysis.cpp:1544
std::max
T max(T... args)
I7_L3_UNSHARED_HIT_CYCLES
#define I7_L3_UNSHARED_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:62
graphanalysis.filename
filename
Definition: graphanalysis.py:130
S_module::init
void init(const char *name, const char *architecture, const char *event_name, unsigned int c_mask, unsigned int inv_mask, unsigned int smpl_period)
Definition: pfm_gen_analysis.cpp:797
MAX_FILENAME_LENGTH
#define MAX_FILENAME_LENGTH
Definition: pfm_gen_analysis.cpp:79
I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES
#define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:67
init_nhm_caa_events_displ
void init_nhm_caa_events_displ()
Definition: pfm_gen_analysis.cpp:500
calc_core_deriv_values
void calc_core_deriv_values(double totalCycles)
Definition: pfm_gen_analysis.cpp:419
I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES
#define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:66
S_module::set_total
void set_total(unsigned int total)
Definition: pfm_gen_analysis.cpp:806
std::exit
T exit(T... args)
Gaudi::ParticleProperties::index
size_t index(const Gaudi::ParticleProperty *property, const Gaudi::Interfaces::IParticlePropertySvc *service)
helper utility for mapping of Gaudi::ParticleProperty object into non-negative integral sequential id...
Definition: IParticlePropertySvc.cpp:39
MAX_SIMPLE_SYM_MOD_LENGTH
#define MAX_SIMPLE_SYM_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:86
gaudirun.argv
list argv
Definition: gaudirun.py:327
MAX_SP_STR_LENGTH
#define MAX_SP_STR_LENGTH
Definition: pfm_gen_analysis.cpp:96
S_module
Definition: pfm_gen_analysis.cpp:775
MAX_ARCH_NAME_LENGTH
#define MAX_ARCH_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:93