The Gaudi Framework  v36r1 (3e2fb5a8)
pfm_gen_analysis.cpp File Reference
#include <ctype.h>
#include <cxxabi.h>
#include <fcntl.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <zlib.h>
#include <algorithm>
#include <iostream>
#include <list>
#include <map>
#include <sstream>
#include <string>
#include <vector>
#include <dirent.h>
#include <errno.h>
Include dependency graph for pfm_gen_analysis.cpp:

Go to the source code of this file.

Classes

class  PipeReader
 
class  FileInfo
 
struct  FileInfo::CacheItem
 
struct  FileInfo::CacheItemComparator
 
class  S_module
 

Macros

#define CORE_L2_MISS_CYCLES   200
 
#define CORE_L2_HIT_CYCLES   14.5
 
#define CORE_L1_DTLB_MISS_CYCLES   10
 
#define CORE_LCP_STALL_CYCLES   6
 
#define CORE_UNKNOWN_ADDR_STORE_CYCLES   5
 
#define CORE_OVERLAPPING_CYCLES   6
 
#define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES   20
 
#define I7_L1_DTLB_WALK_COMPLETED_CYCLES   35
 
#define I7_L1_ITLB_WALK_COMPLETED_CYCLES   35
 
#define I7_L2_HIT_CYCLES   6
 
#define I7_L3_UNSHARED_HIT_CYCLES   35
 
#define I7_OTHER_CORE_L2_HIT_CYCLES   60
 
#define I7_OTHER_CORE_L2_HITM_CYCLES   75
 
#define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES   225
 
#define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES   360
 
#define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES   180
 
#define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT   200
 
#define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT   350
 
#define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP   35
 
#define I7_IFETCH_L2_MISS_L3_HIT_SNOOP   60
 
#define I7_IFETCH_L2_MISS_L3_HITM   75
 
#define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD   180
 
#define MAX_MODULES   1000
 
#define EXPECTED_CPI   0.25
 
#define MAX_FILENAME_LENGTH   1024
 
#define MAX_SAMPLE_INDEX_LENGTH   10000
 
#define MAX_SYM_LENGTH   15000
 
#define MAX_SYM_MOD_LENGTH   20000
 
#define MAX_LIB_LENGTH   5000
 
#define MAX_LIB_MOD_LENGTH   7000
 
#define MAX_SIMPLE_SYM_LENGTH   300
 
#define MAX_SIMPLE_SYM_MOD_LENGTH   500
 
#define MAX_SIMPLE_LIB_LENGTH   300
 
#define MAX_SIMPLE_LIB_MOD_LENGTH   500
 
#define MAX_LINE_LENGTH   20000
 
#define MAX_EVENT_NAME_LENGTH   150
 
#define MAX_MODULE_NAME_LENGTH   250
 
#define MAX_VALUE_STRING_LENGTH   250
 
#define MAX_ARCH_NAME_LENGTH   20
 
#define MAX_CMASK_STR_LENGTH   5
 
#define MAX_INV_STR_LENGTH   5
 
#define MAX_SP_STR_LENGTH   50
 
#define PIPE_BUFFER_LENGTH   1000
 

Functions

bool skipWhitespaces (const char *srcbuffer, const char **destbuffer)
 
bool skipString (const char *strptr, const char *srcbuffer, const char **dstbuffer)
 
void init_core_caa_events ()
 
void init_nhm_caa_events ()
 
bool check_for_core_caa_events ()
 
bool check_for_nhm_caa_events ()
 
void init_core_caa_events_displ ()
 
void calc_core_deriv_values (double totalCycles)
 
void init_nhm_caa_events_displ ()
 
void calc_nhm_deriv_values (double totalCycles)
 
void html_special_chars (const char *s, char *s_mod)
 
const char * func_name (const char *demangled_symbol)
 
void put_S_module (S_module *cur_module, const char *dir)
 
int read_S_file (const char *dir, const char *filename)
 
int read_S_events (const char *dir, const char *filename)
 
int finalize_S_html_pages (const char *dir)
 
int read_C_file (const char *dir, const char *filename)
 
void put_C_header (FILE *fp, std::vector< std::string > &columns)
 
void put_C_modules (FILE *fp, std::vector< std::string > &columns)
 
void put_C_footer (FILE *fp)
 
void put_C_header_csv (FILE *fp, std::vector< std::string > &columns)
 
void put_C_modules_csv (FILE *fp, std::vector< std::string > &columns)
 
double normalize (std::string field, double value, double normalizeTo)
 
void calc_post_deriv_values ()
 
double getTotalCycles ()
 
int main (int argc, char *argv[])
 

Macro Definition Documentation

◆ CORE_L1_DTLB_MISS_CYCLES

#define CORE_L1_DTLB_MISS_CYCLES   10

Definition at line 52 of file pfm_gen_analysis.cpp.

◆ CORE_L2_HIT_CYCLES

#define CORE_L2_HIT_CYCLES   14.5

Definition at line 51 of file pfm_gen_analysis.cpp.

◆ CORE_L2_MISS_CYCLES

#define CORE_L2_MISS_CYCLES   200

Definition at line 50 of file pfm_gen_analysis.cpp.

◆ CORE_LCP_STALL_CYCLES

#define CORE_LCP_STALL_CYCLES   6

Definition at line 53 of file pfm_gen_analysis.cpp.

◆ CORE_OVERLAPPING_CYCLES

#define CORE_OVERLAPPING_CYCLES   6

Definition at line 55 of file pfm_gen_analysis.cpp.

◆ CORE_SPAN_ACROSS_CACHE_LINE_CYCLES

#define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES   20

Definition at line 56 of file pfm_gen_analysis.cpp.

◆ CORE_UNKNOWN_ADDR_STORE_CYCLES

#define CORE_UNKNOWN_ADDR_STORE_CYCLES   5

Definition at line 54 of file pfm_gen_analysis.cpp.

◆ EXPECTED_CPI

#define EXPECTED_CPI   0.25

Definition at line 77 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP

#define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP   35

Definition at line 70 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L2_MISS_L3_HIT_SNOOP

#define I7_IFETCH_L2_MISS_L3_HIT_SNOOP   60

Definition at line 71 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L2_MISS_L3_HITM

#define I7_IFETCH_L2_MISS_L3_HITM   75

Definition at line 72 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT

#define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT   200

Definition at line 68 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD

#define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD   180

Definition at line 73 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT

#define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT   350

Definition at line 69 of file pfm_gen_analysis.cpp.

◆ I7_L1_DTLB_WALK_COMPLETED_CYCLES

#define I7_L1_DTLB_WALK_COMPLETED_CYCLES   35

Definition at line 59 of file pfm_gen_analysis.cpp.

◆ I7_L1_ITLB_WALK_COMPLETED_CYCLES

#define I7_L1_ITLB_WALK_COMPLETED_CYCLES   35

Definition at line 60 of file pfm_gen_analysis.cpp.

◆ I7_L2_HIT_CYCLES

#define I7_L2_HIT_CYCLES   6

Definition at line 61 of file pfm_gen_analysis.cpp.

◆ I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES

#define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES   225

Definition at line 65 of file pfm_gen_analysis.cpp.

◆ I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES

#define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES   180

Definition at line 67 of file pfm_gen_analysis.cpp.

◆ I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES

#define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES   360

Definition at line 66 of file pfm_gen_analysis.cpp.

◆ I7_L3_UNSHARED_HIT_CYCLES

#define I7_L3_UNSHARED_HIT_CYCLES   35

Definition at line 62 of file pfm_gen_analysis.cpp.

◆ I7_OTHER_CORE_L2_HIT_CYCLES

#define I7_OTHER_CORE_L2_HIT_CYCLES   60

Definition at line 63 of file pfm_gen_analysis.cpp.

◆ I7_OTHER_CORE_L2_HITM_CYCLES

#define I7_OTHER_CORE_L2_HITM_CYCLES   75

Definition at line 64 of file pfm_gen_analysis.cpp.

◆ MAX_ARCH_NAME_LENGTH

#define MAX_ARCH_NAME_LENGTH   20

Definition at line 93 of file pfm_gen_analysis.cpp.

◆ MAX_CMASK_STR_LENGTH

#define MAX_CMASK_STR_LENGTH   5

Definition at line 94 of file pfm_gen_analysis.cpp.

◆ MAX_EVENT_NAME_LENGTH

#define MAX_EVENT_NAME_LENGTH   150

Definition at line 90 of file pfm_gen_analysis.cpp.

◆ MAX_FILENAME_LENGTH

#define MAX_FILENAME_LENGTH   1024

Definition at line 79 of file pfm_gen_analysis.cpp.

◆ MAX_INV_STR_LENGTH

#define MAX_INV_STR_LENGTH   5

Definition at line 95 of file pfm_gen_analysis.cpp.

◆ MAX_LIB_LENGTH

#define MAX_LIB_LENGTH   5000

Definition at line 83 of file pfm_gen_analysis.cpp.

◆ MAX_LIB_MOD_LENGTH

#define MAX_LIB_MOD_LENGTH   7000

Definition at line 84 of file pfm_gen_analysis.cpp.

◆ MAX_LINE_LENGTH

#define MAX_LINE_LENGTH   20000

Definition at line 89 of file pfm_gen_analysis.cpp.

◆ MAX_MODULE_NAME_LENGTH

#define MAX_MODULE_NAME_LENGTH   250

Definition at line 91 of file pfm_gen_analysis.cpp.

◆ MAX_MODULES

#define MAX_MODULES   1000

Definition at line 75 of file pfm_gen_analysis.cpp.

◆ MAX_SAMPLE_INDEX_LENGTH

#define MAX_SAMPLE_INDEX_LENGTH   10000

Definition at line 80 of file pfm_gen_analysis.cpp.

◆ MAX_SIMPLE_LIB_LENGTH

#define MAX_SIMPLE_LIB_LENGTH   300

Definition at line 87 of file pfm_gen_analysis.cpp.

◆ MAX_SIMPLE_LIB_MOD_LENGTH

#define MAX_SIMPLE_LIB_MOD_LENGTH   500

Definition at line 88 of file pfm_gen_analysis.cpp.

◆ MAX_SIMPLE_SYM_LENGTH

#define MAX_SIMPLE_SYM_LENGTH   300

Definition at line 85 of file pfm_gen_analysis.cpp.

◆ MAX_SIMPLE_SYM_MOD_LENGTH

#define MAX_SIMPLE_SYM_MOD_LENGTH   500

Definition at line 86 of file pfm_gen_analysis.cpp.

◆ MAX_SP_STR_LENGTH

#define MAX_SP_STR_LENGTH   50

Definition at line 96 of file pfm_gen_analysis.cpp.

◆ MAX_SYM_LENGTH

#define MAX_SYM_LENGTH   15000

Definition at line 81 of file pfm_gen_analysis.cpp.

◆ MAX_SYM_MOD_LENGTH

#define MAX_SYM_MOD_LENGTH   20000

Definition at line 82 of file pfm_gen_analysis.cpp.

◆ MAX_VALUE_STRING_LENGTH

#define MAX_VALUE_STRING_LENGTH   250

Definition at line 92 of file pfm_gen_analysis.cpp.

◆ PIPE_BUFFER_LENGTH

#define PIPE_BUFFER_LENGTH   1000

Definition at line 98 of file pfm_gen_analysis.cpp.

Function Documentation

◆ calc_core_deriv_values()

void calc_core_deriv_values ( double  totalCycles)

Definition at line 421 of file pfm_gen_analysis.cpp.

421  {
422  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
423  ++it ) {
424  ( it->second )["Total Cycles"] = ( it->second )["UNHALTED_CORE_CYCLES"];
425  ( it->second )["Stalled Cycles"] = ( it->second )["RS_UOPS_DISPATCHED CMASK=1 INV=1"];
426  ( it->second )["L2 Miss Impact"] = ( it->second )["MEM_LOAD_RETIRED:L2_LINE_MISS"] * CORE_L2_MISS_CYCLES;
427  ( it->second )["L2 Hit Impact"] =
428  ( ( it->second )["MEM_LOAD_RETIRED:L1D_LINE_MISS"] - ( it->second )["MEM_LOAD_RETIRED:L2_LINE_MISS"] ) *
430  ( it->second )["L1 DTLB Miss Impact"] = ( it->second )["MEM_LOAD_RETIRED:DTLB_MISS"] * CORE_L1_DTLB_MISS_CYCLES;
431  ( it->second )["LCP Stalls Impact"] = ( it->second )["ILD_STALL"] * CORE_LCP_STALL_CYCLES;
432  ( it->second )["Loads Blocked by Unknown Address Store Impact"] =
433  ( it->second )["LOAD_BLOCK:STA"] * CORE_UNKNOWN_ADDR_STORE_CYCLES;
434  ( it->second )["Loads Overlapped with Stores Impact"] =
435  ( it->second )["LOAD_BLOCK:OVERLAP_STORE"] * CORE_OVERLAPPING_CYCLES;
436  ( it->second )["Loads Spanning across Cache Lines Impact"] =
437  ( it->second )["LOAD_BLOCK:UNTIL_RETIRE"] * CORE_SPAN_ACROSS_CACHE_LINE_CYCLES;
438  ( it->second )["Store-Fwd Stalls Impact"] = ( it->second )["Loads Blocked by Unknown Address Store Impact"] +
439  ( it->second )["Loads Overlapped with Stores Impact"] +
440  ( it->second )["Loads Spanning across Cache Lines Impact"];
441  ( it->second )["Counted Stalled Cycles"] =
442  ( it->second )["L2 Miss Impact"] + ( it->second )["L2 Hit Impact"] + ( it->second )["LCP Stalls Impact"] +
443  ( it->second )["L1 DTLB Miss Impact"] + ( it->second )["Store-Fwd Stalls Impact"];
444  ( it->second )["Instructions Retired"] = ( it->second )["INSTRUCTIONS_RETIRED"];
445  ( it->second )["ITLB Miss Rate in %"] =
446  ( ( it->second )["ITLB_MISS_RETIRED"] / ( it->second )["INSTRUCTIONS_RETIRED"] ) * 100;
447  ( it->second )["Branch Instructions"] = ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"];
448  ( it->second )["Load Instructions"] = ( it->second )["INST_RETIRED:LOADS"];
449  ( it->second )["Store Instructions"] = ( it->second )["INST_RETIRED:STORES"];
450  ( it->second )["Other Instructions"] = ( it->second )["INST_RETIRED:OTHER"] -
451  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] -
452  ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"];
453  ( it->second )["% of Mispredicted Branches"] =
454  ( ( it->second )["MISPREDICTED_BRANCH_RETIRED"] / ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"] ) * 100;
455  ( it->second )["Packed SIMD Computational Instructions"] =
456  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"];
457  ( it->second )["Counted Instructions Retired"] =
458  ( it->second )["Branch Instructions"] + ( it->second )["Load Instructions"] +
459  ( it->second )["Store Instructions"] + ( it->second )["Other Instructions"] +
460  ( it->second )["Packed SIMD Computational Instructions"];
461  ( it->second )["CPI"] = ( it->second )["UNHALTED_CORE_CYCLES"] / ( it->second )["INSTRUCTIONS_RETIRED"];
462 
463  double localPerformanceImprovement = ( it->second )["CPI"] / EXPECTED_CPI;
464  double cyclesAfterImprovement = ( it->second )["UNHALTED_CORE_CYCLES"] / localPerformanceImprovement;
465  double totalCyclesAfterImprovement = totalCycles - ( it->second )["UNHALTED_CORE_CYCLES"] + cyclesAfterImprovement;
466  ( it->second )["iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
467 
468  ( it->second )["% of Total Cycles"] =
469  ( it->second )["RS_UOPS_DISPATCHED CMASK=1 INV=1"] * 100 / ( it->second )["UNHALTED_CORE_CYCLES"];
470  ( it->second )["L2 Miss % of counted Stalled Cycles"] =
471  ( it->second )["L2 Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
472  ( it->second )["L2 Hit % of counted Stalled Cycles"] =
473  ( it->second )["L2 Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
474  ( it->second )["L1 DTLB Miss % of counted Stalled Cycles"] =
475  ( it->second )["L1 DTLB Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
476  ( it->second )["LCP Stalls % of counted Stalled Cycles"] =
477  ( it->second )["LCP Stalls Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
478  ( it->second )["Store-Fwd Stalls % of counted Stalled Cycles"] =
479  ( it->second )["Store-Fwd Stalls Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
480  ( it->second )["Loads Blocked % of Store-Fwd Stalls Cycles"] =
481  ( it->second )["Loads Blocked by Unknown Address Store Impact"] * 100 /
482  ( it->second )["Store-Fwd Stalls Impact"];
483  ( it->second )["Loads Overlapped % of Store-Fwd Stalls Cycles"] =
484  ( it->second )["Loads Overlapped with Stores Impact"] * 100 / ( it->second )["Store-Fwd Stalls Impact"];
485  ( it->second )["Loads Spanning % of Store-Fwd Stalls Cycles"] =
486  ( it->second )["Loads Spanning across Cache Lines Impact"] * 100 / ( it->second )["Store-Fwd Stalls Impact"];
487 
488  ( it->second )["Load % of all Instructions"] =
489  ( it->second )["INST_RETIRED:LOADS"] * 100 / ( it->second )["Counted Instructions Retired"];
490  ( it->second )["Store % of all Instructions"] =
491  ( it->second )["INST_RETIRED:STORES"] * 100 / ( it->second )["Counted Instructions Retired"];
492  ( it->second )["Branch % of all Instructions"] =
493  ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"] * 100 / ( it->second )["Counted Instructions Retired"];
494  ( it->second )["Packed SIMD % of all Instructions"] =
495  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] * 100 /
496  ( it->second )["Counted Instructions Retired"];
497  ( it->second )["Other % of all Instructions"] =
498  ( it->second )["Other Instructions"] * 100 / ( it->second )["Counted Instructions Retired"];
499  }
500 }

◆ calc_nhm_deriv_values()

void calc_nhm_deriv_values ( double  totalCycles)

Definition at line 607 of file pfm_gen_analysis.cpp.

607  {
608  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
609  ++it ) {
610  ( it->second )["Total Cycles"] = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
611 
612  ( it->second )["L2 Hit Impact"] = ( it->second )["MEM_LOAD_RETIRED:L2_HIT"] * I7_L2_HIT_CYCLES;
613  ( it->second )["L3 Unshared Hit Impact"] =
614  ( it->second )["MEM_LOAD_RETIRED:L3_UNSHARED_HIT"] * I7_L3_UNSHARED_HIT_CYCLES;
615  if ( ( it->second )["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] >
616  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) {
617  ( it->second )["L2 Other Core Hit Impact"] = ( ( it->second )["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] -
618  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) *
620  } else {
621  ( it->second )["L2 Other Core Hit Impact"] = 0.0;
622  }
623  ( it->second )["L2 Other Core Hit Modified Impact"] =
624  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] * I7_OTHER_CORE_L2_HITM_CYCLES;
625  ( it->second )["L3 Miss -> Local DRAM Hit Impact"] =
626  ( it->second )["MEM_UNCORE_RETIRED:LOCAL_DRAM"] * I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES;
627  ( it->second )["L3 Miss -> Remote DRAM Hit Impact"] =
628  ( it->second )["MEM_UNCORE_RETIRED:REMOTE_DRAM"] * I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES;
629  ( it->second )["L3 Miss -> Remote Cache Hit Impact"] =
630  ( it->second )["MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT"] * I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES;
631  ( it->second )["L3 Miss -> Total Impact"] = ( it->second )["L3 Miss -> Local DRAM Hit Impact"] +
632  ( it->second )["L3 Miss -> Remote DRAM Hit Impact"] +
633  ( it->second )["L3 Miss -> Remote Cache Hit Impact"];
634  ( it->second )["L1 DTLB Miss Impact"] =
635  ( it->second )["DTLB_LOAD_MISSES:WALK_COMPLETED"] * I7_L1_DTLB_WALK_COMPLETED_CYCLES;
636  ( it->second )["Counted Stalled Cycles due to Load Ops"] =
637  ( it->second )["L3 Miss -> Total Impact"] + ( it->second )["L2 Hit Impact"] +
638  ( it->second )["L1 DTLB Miss Impact"] + ( it->second )["L3 Unshared Hit Impact"] +
639  ( it->second )["L2 Other Core Hit Modified Impact"] + ( it->second )["L2 Other Core Hit Impact"];
640  ( it->second )["Cycles spent during DIV & SQRT Ops"] = ( it->second )["ARITH:CYCLES_DIV_BUSY"];
641  ( it->second )["Total Counted Stalled Cycles"] =
642  ( it->second )["Counted Stalled Cycles due to Load Ops"] + ( it->second )["Cycles spent during DIV & SQRT Ops"];
643  ( it->second )["Stalled Cycles"] =
644  ( it->second )["Total Counted Stalled Cycles"]; // TO BE FIXED when UOPS_EXECUTED:0x3f is fixed!!
645  ( it->second )["% of Total Cycles"] =
646  ( it->second )["Stalled Cycles"] * 100 / ( it->second )["CPU_CLK_UNHALTED:THREAD_P"]; // TO BE FIXED!! see above
647  ( it->second )["L3 Miss % of Load Stalls"] =
648  ( it->second )["L3 Miss -> Total Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
649  ( it->second )["L2 Hit % of Load Stalls"] =
650  ( it->second )["L2 Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
651  ( it->second )["L1 DTLB Miss % of Load Stalls"] =
652  ( it->second )["L1 DTLB Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
653  ( it->second )["L3 Unshared Hit % of Load Stalls"] =
654  ( it->second )["L3 Unshared Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
655  ( it->second )["L2 Other Core Hit % of Load Stalls"] =
656  ( it->second )["L2 Other Core Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
657  ( it->second )["L2 Other Core Hit Modified % of Load Stalls"] =
658  ( it->second )["L2 Other Core Hit Modified Impact"] * 100 /
659  ( it->second )["Counted Stalled Cycles due to Load Ops"];
660  ( it->second )["DIV & SQRT Ops % of counted Stalled Cycles"] =
661  ( it->second )["Cycles spent during DIV & SQRT Ops"] * 100 / ( it->second )["Total Counted Stalled Cycles"];
662 
663  ( it->second )["Cycles IFETCH served by Local DRAM"] =
664  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT;
665  ( it->second )["Cycles IFETCH served by L3 (Modified)"] =
666  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * I7_IFETCH_L2_MISS_L3_HITM;
667  ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] =
668  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * I7_IFETCH_L2_MISS_L3_HIT_SNOOP;
669  ( it->second )["Cycles IFETCH served by Remote L2"] =
670  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD;
671  ( it->second )["Cycles IFETCH served by Remote DRAM"] =
672  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT;
673  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"] =
674  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP;
675  ( it->second )["Total L2 IFETCH miss Impact"] =
676  ( it->second )["Cycles IFETCH served by Local DRAM"] + ( it->second )["Cycles IFETCH served by L3 (Modified)"] +
677  ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] +
678  ( it->second )["Cycles IFETCH served by Remote L2"] + ( it->second )["Cycles IFETCH served by Remote DRAM"] +
679  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"];
680  ( it->second )["Local DRAM IFECTHes % Impact"] =
681  ( it->second )["Cycles IFETCH served by Local DRAM"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
682  ( it->second )["L3 (Modified) IFECTHes % Impact"] =
683  ( it->second )["Cycles IFETCH served by L3 (Modified)"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
684  ( it->second )["L3 (Clean Snoop) IFECTHes % Impact"] = ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] *
685  100 / ( it->second )["Total L2 IFETCH miss Impact"];
686  ( it->second )["Remote L2 IFECTHes % Impact"] =
687  ( it->second )["Cycles IFETCH served by Remote L2"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
688  ( it->second )["Remote DRAM IFECTHes % Impact"] =
689  ( it->second )["Cycles IFETCH served by Remote DRAM"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
690  ( it->second )["L3 (No Snoop) IFECTHes % Impact"] =
691  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
692  ( it->second )["Total L2 IFETCH misses"] = ( it->second )["L2_RQSTS:IFETCH_MISS"];
693  ( it->second )["% of IFETCHes served by Local DRAM"] =
694  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
695  ( it->second )["% of IFETCHes served by L3 (Modified)"] =
696  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
697  ( it->second )["% of IFETCHes served by L3 (Clean Snoop)"] =
698  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * 100 /
699  ( it->second )["L2_RQSTS:IFETCH_MISS"];
700  ( it->second )["% of IFETCHes served by Remote L2"] =
701  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * 100 /
702  ( it->second )["L2_RQSTS:IFETCH_MISS"];
703  ( it->second )["% of IFETCHes served by Remote DRAM"] =
704  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
705  ( it->second )["% of IFETCHes served by L3 (No Snoop)"] =
706  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
707  ( it->second )["% of L2 IFETCH misses"] =
708  ( it->second )["L2_RQSTS:IFETCH_MISS"] * 100 /
709  ( ( it->second )["L2_RQSTS:IFETCH_MISS"] + ( it->second )["L2_RQSTS:IFETCH_HIT"] );
710  ( it->second )["L1 ITLB Miss Impact"] =
711  ( it->second )["ITLB_MISSES:WALK_COMPLETED"] * I7_L1_ITLB_WALK_COMPLETED_CYCLES;
712 
713  ( it->second )["Total Branch Instructions Executed"] = ( it->second )["BR_INST_EXEC:ANY"];
714  ( it->second )["% of Mispredicted Branches"] =
715  ( it->second )["BR_MISP_EXEC:ANY"] * 100 / ( it->second )["BR_INST_EXEC:ANY"];
716  ( it->second )["Direct Near Calls % of Total Branches Executed"] =
717  ( it->second )["BR_INST_EXEC:DIRECT_NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
718  ( it->second )["Indirect Near Calls % of Total Branches Executed"] =
719  ( it->second )["BR_INST_EXEC:INDIRECT_NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
720  ( it->second )["Indirect Near Non-Calls % of Total Branches Executed"] =
721  ( it->second )["BR_INST_EXEC:INDIRECT_NON_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
722  ( it->second )["All Near Calls % of Total Branches Executed"] =
723  ( it->second )["BR_INST_EXEC:NEAR_CALLS"] * 100 / ( it->second )["Total Branch Instructions Executed"];
724  ( it->second )["All Non Calls % of Total Branches Executed"] =
725  ( it->second )["BR_INST_EXEC:NON_CALLS"] * 100 / ( it->second )["Total Branch Instructions Executed"];
726  ( it->second )["All Returns % of Total Branches Executed"] =
727  ( it->second )["BR_INST_EXEC:RETURN_NEAR"] * 100 / ( it->second )["Total Branch Instructions Executed"];
728  ( it->second )["Total Branch Instructions Retired"] = ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
729  ( it->second )["Conditionals % of Total Branches Retired"] =
730  ( it->second )["BR_INST_RETIRED:CONDITIONAL"] * 100 / ( it->second )["Total Branch Instructions Retired"];
731  ( it->second )["Near Calls % of Total Branches Retired"] =
732  ( it->second )["BR_INST_RETIRED:NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Retired"];
733 
734  ( it->second )["Instruction Starvation % of Total Cycles"] =
735  ( ( it->second )["UOPS_ISSUED:ANY CMASK=1 INV=1"] - ( it->second )["RESOURCE_STALLS:ANY"] ) * 100 /
736  ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
737  ( it->second )["% of Total Cycles spent handling FP exceptions"] =
738  ( it->second )["UOPS_DECODED:MS CMASK=1"] * 100 / ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
739  ( it->second )["# of Instructions per Call"] =
740  ( it->second )["INST_RETIRED:ANY_P"] / ( it->second )["BR_INST_EXEC:NEAR_CALLS"];
741 
742  ( it->second )["Instructions Retired"] = ( it->second )["INST_RETIRED:ANY_P"];
743  ( it->second )["ITLB Miss Rate in %"] =
744  ( ( it->second )["ITLB_MISS_RETIRED"] / ( it->second )["INST_RETIRED:ANY_P"] ) * 100;
745 
746  ( it->second )["Branch Instructions"] = ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
747  ( it->second )["Load Instructions"] = ( it->second )["MEM_INST_RETIRED:LOADS"];
748  ( it->second )["Store Instructions"] = ( it->second )["MEM_INST_RETIRED:STORES"];
749  ( it->second )["Other Instructions"] =
750  ( it->second )["Instructions Retired"] - ( it->second )["MEM_INST_RETIRED:LOADS"] -
751  ( it->second )["MEM_INST_RETIRED:STORES"] - ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
752  ( it->second )["Packed UOPS Retired"] =
753  ( it->second )["SSEX_UOPS_RETIRED:PACKED_DOUBLE"] + ( it->second )["SSEX_UOPS_RETIRED:PACKED_SINGLE"];
754  ( it->second )["CPI"] = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] / ( it->second )["INST_RETIRED:ANY_P"];
755 
756  double localPerformanceImprovement = ( it->second )["CPI"] / EXPECTED_CPI;
757  double cyclesAfterImprovement = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] / localPerformanceImprovement;
758  double totalCyclesAfterImprovement =
759  totalCycles - ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] + cyclesAfterImprovement;
760  ( it->second )["iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
761 
762  ( it->second )["Load % of all Instructions"] =
763  ( it->second )["MEM_INST_RETIRED:LOADS"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
764  ( it->second )["Store % of all Instructions"] =
765  ( it->second )["MEM_INST_RETIRED:STORES"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
766  ( it->second )["Branch % of all Instructions"] =
767  ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
768  ( it->second )["Other % of all Instructions"] =
769  ( it->second )["Other Instructions"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
770 
771  ( it->second )["Packed % of all UOPS Retired"] =
772  ( it->second )["Packed UOPS Retired"] * 100 / ( it->second )["UOPS_RETIRED:ANY"];
773  }
774 }

◆ calc_post_deriv_values()

void calc_post_deriv_values ( )

Definition at line 1565 of file pfm_gen_analysis.cpp.

1565  {
1566  if ( nehalem ) {
1567  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1568  ++it ) {
1569  double simdnorm =
1570  1. - normalize( "Packed % of all UOPS Retired", ( it->second )["Packed % of all UOPS Retired"], 1 );
1571  double misspnorm = normalize( "% of Mispredicted Branches", ( it->second )["% of Mispredicted Branches"], 1 );
1572  double stallnorm = normalize( "Stalled Cycles", ( it->second )["Stalled Cycles"], 1 );
1573  ( it->second )["iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1574  }
1575  } else {
1576  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1577  ++it ) {
1578  double simdnorm =
1579  1. - normalize( "Packed SIMD % of all Instructions", ( it->second )["Packed SIMD % of all Instructions"], 1 );
1580  double misspnorm = normalize( "% of Mispredicted Branches", ( it->second )["% of Mispredicted Branches"], 1 );
1581  double stallnorm = normalize( "Stalled Cycles", ( it->second )["Stalled Cycles"], 1 );
1582  ( it->second )["iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1583  }
1584  }
1585 }

◆ check_for_core_caa_events()

bool check_for_core_caa_events ( )

Definition at line 352 of file pfm_gen_analysis.cpp.

352  {
353  for ( std::vector<std::string>::const_iterator it = core_caa_events.begin(); it != core_caa_events.end(); ++it ) {
354  if ( find( C_events.begin(), C_events.end(), ( *it ) ) == C_events.end() ) {
355  fprintf( stderr, "ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
356  return false;
357  }
358  }
359  return true;
360 }

◆ check_for_nhm_caa_events()

bool check_for_nhm_caa_events ( )

Definition at line 362 of file pfm_gen_analysis.cpp.

362  {
363  for ( std::vector<std::string>::const_iterator it = nhm_caa_events.begin(); it != nhm_caa_events.end(); ++it ) {
364  if ( find( C_events.begin(), C_events.end(), ( *it ) ) == C_events.end() ) {
365  fprintf( stderr, "ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
366  return false;
367  }
368  }
369  return true;
370 }

◆ finalize_S_html_pages()

int finalize_S_html_pages ( const char *  dir)

Definition at line 1358 of file pfm_gen_analysis.cpp.

1358  {
1359  for ( std::map<std::string, unsigned int>::const_iterator i = modules_tot_samples.begin();
1360  i != modules_tot_samples.end(); i++ ) {
1361  char module_filename[MAX_FILENAME_LENGTH];
1362  strcpy( module_filename, dir );
1363  strcat( module_filename, "/HTML/" );
1364  strcat( module_filename, ( i->first ).c_str() );
1365  strcat( module_filename, ".html" );
1366  FILE* module_file = fopen( module_filename, "a" );
1367  if ( module_file == NULL ) {
1368  fprintf( stderr, "ERROR: Unable to append to file: %s\naborting...\n", module_filename );
1369  exit( 1 );
1370  }
1371  fprintf( module_file, "</body>\n</html>\n" );
1372  if ( fclose( module_file ) ) {
1373  fprintf( stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1374  exit( 1 );
1375  }
1376  }
1377  return 0;
1378 }

◆ func_name()

const char* func_name ( const char *  demangled_symbol)

Definition at line 869 of file pfm_gen_analysis.cpp.

869  {
870  char* operator_string_begin = const_cast<char*>( strstr( demangled_symbol, "operator" ) );
871  if ( operator_string_begin != NULL ) {
872  char* operator_string_end = operator_string_begin + 8;
873  while ( *operator_string_end == ' ' ) operator_string_end++;
874  if ( strstr( operator_string_end, "delete[]" ) == operator_string_end ) {
875  operator_string_end += 8;
876  *operator_string_end = '\0';
877  } else if ( strstr( operator_string_end, "delete" ) == operator_string_end ) {
878  operator_string_end += 6;
879  *operator_string_end = '\0';
880  } else if ( strstr( operator_string_end, "new[]" ) == operator_string_end ) {
881  operator_string_end += 5;
882  *operator_string_end = '\0';
883  } else if ( strstr( operator_string_end, "new" ) == operator_string_end ) {
884  operator_string_end += 3;
885  *operator_string_end = '\0';
886  } else if ( strstr( operator_string_end, ">>=" ) == operator_string_end ) {
887  operator_string_end += 3;
888  *operator_string_end = '\0';
889  } else if ( strstr( operator_string_end, "<<=" ) == operator_string_end ) {
890  operator_string_end += 3;
891  *operator_string_end = '\0';
892  } else if ( strstr( operator_string_end, "->*" ) == operator_string_end ) {
893  operator_string_end += 3;
894  *operator_string_end = '\0';
895  } else if ( strstr( operator_string_end, "<<" ) == operator_string_end ) {
896  operator_string_end += 2;
897  *operator_string_end = '\0';
898  } else if ( strstr( operator_string_end, ">>" ) == operator_string_end ) {
899  operator_string_end += 2;
900  *operator_string_end = '\0';
901  } else if ( strstr( operator_string_end, ">=" ) == operator_string_end ) {
902  operator_string_end += 2;
903  *operator_string_end = '\0';
904  } else if ( strstr( operator_string_end, "<=" ) == operator_string_end ) {
905  operator_string_end += 2;
906  *operator_string_end = '\0';
907  } else if ( strstr( operator_string_end, "==" ) == operator_string_end ) {
908  operator_string_end += 2;
909  *operator_string_end = '\0';
910  } else if ( strstr( operator_string_end, "!=" ) == operator_string_end ) {
911  operator_string_end += 2;
912  *operator_string_end = '\0';
913  } else if ( strstr( operator_string_end, "|=" ) == operator_string_end ) {
914  operator_string_end += 2;
915  *operator_string_end = '\0';
916  } else if ( strstr( operator_string_end, "&=" ) == operator_string_end ) {
917  operator_string_end += 2;
918  *operator_string_end = '\0';
919  } else if ( strstr( operator_string_end, "^=" ) == operator_string_end ) {
920  operator_string_end += 2;
921  *operator_string_end = '\0';
922  } else if ( strstr( operator_string_end, "%=" ) == operator_string_end ) {
923  operator_string_end += 2;
924  *operator_string_end = '\0';
925  } else if ( strstr( operator_string_end, "/=" ) == operator_string_end ) {
926  operator_string_end += 2;
927  *operator_string_end = '\0';
928  } else if ( strstr( operator_string_end, "*=" ) == operator_string_end ) {
929  operator_string_end += 2;
930  *operator_string_end = '\0';
931  } else if ( strstr( operator_string_end, "-=" ) == operator_string_end ) {
932  operator_string_end += 2;
933  *operator_string_end = '\0';
934  } else if ( strstr( operator_string_end, "+=" ) == operator_string_end ) {
935  operator_string_end += 2;
936  *operator_string_end = '\0';
937  } else if ( strstr( operator_string_end, "&&" ) == operator_string_end ) {
938  operator_string_end += 2;
939  *operator_string_end = '\0';
940  } else if ( strstr( operator_string_end, "||" ) == operator_string_end ) {
941  operator_string_end += 2;
942  *operator_string_end = '\0';
943  } else if ( strstr( operator_string_end, "[]" ) == operator_string_end ) {
944  operator_string_end += 2;
945  *operator_string_end = '\0';
946  } else if ( strstr( operator_string_end, "()" ) == operator_string_end ) {
947  operator_string_end += 2;
948  *operator_string_end = '\0';
949  } else if ( strstr( operator_string_end, "++" ) == operator_string_end ) {
950  operator_string_end += 2;
951  *operator_string_end = '\0';
952  } else if ( strstr( operator_string_end, "--" ) == operator_string_end ) {
953  operator_string_end += 2;
954  *operator_string_end = '\0';
955  } else if ( strstr( operator_string_end, "->" ) == operator_string_end ) {
956  operator_string_end += 2;
957  *operator_string_end = '\0';
958  } else if ( strstr( operator_string_end, "<" ) == operator_string_end ) {
959  operator_string_end += 1;
960  *operator_string_end = '\0';
961  } else if ( strstr( operator_string_end, ">" ) == operator_string_end ) {
962  operator_string_end += 1;
963  *operator_string_end = '\0';
964  } else if ( strstr( operator_string_end, "~" ) == operator_string_end ) {
965  operator_string_end += 1;
966  *operator_string_end = '\0';
967  } else if ( strstr( operator_string_end, "!" ) == operator_string_end ) {
968  operator_string_end += 1;
969  *operator_string_end = '\0';
970  } else if ( strstr( operator_string_end, "+" ) == operator_string_end ) {
971  operator_string_end += 1;
972  *operator_string_end = '\0';
973  } else if ( strstr( operator_string_end, "-" ) == operator_string_end ) {
974  operator_string_end += 1;
975  *operator_string_end = '\0';
976  } else if ( strstr( operator_string_end, "*" ) == operator_string_end ) {
977  operator_string_end += 1;
978  *operator_string_end = '\0';
979  } else if ( strstr( operator_string_end, "/" ) == operator_string_end ) {
980  operator_string_end += 1;
981  *operator_string_end = '\0';
982  } else if ( strstr( operator_string_end, "%" ) == operator_string_end ) {
983  operator_string_end += 1;
984  *operator_string_end = '\0';
985  } else if ( strstr( operator_string_end, "^" ) == operator_string_end ) {
986  operator_string_end += 1;
987  *operator_string_end = '\0';
988  } else if ( strstr( operator_string_end, "&" ) == operator_string_end ) {
989  operator_string_end += 1;
990  *operator_string_end = '\0';
991  } else if ( strstr( operator_string_end, "|" ) == operator_string_end ) {
992  operator_string_end += 1;
993  *operator_string_end = '\0';
994  } else if ( strstr( operator_string_end, "," ) == operator_string_end ) {
995  operator_string_end += 1;
996  *operator_string_end = '\0';
997  } else if ( strstr( operator_string_end, "=" ) == operator_string_end ) {
998  operator_string_end += 1;
999  *operator_string_end = '\0';
1000  }
1001  return operator_string_begin;
1002  }
1003  char* end_of_demangled_name = const_cast<char*>( strrchr( demangled_symbol, ')' ) );
1004  if ( end_of_demangled_name != NULL ) {
1005  int pars = 1;
1006  char c;
1007  while ( pars > 0 && end_of_demangled_name != demangled_symbol ) {
1008  c = *( --end_of_demangled_name );
1009  if ( c == ')' ) {
1010  pars++;
1011  } else if ( c == '(' ) {
1012  pars--;
1013  }
1014  }
1015  } else {
1016  return demangled_symbol;
1017  }
1018  char* end_of_func_name = end_of_demangled_name;
1019  if ( end_of_func_name != NULL ) {
1020  *end_of_func_name = '\0';
1021  char c = *( --end_of_func_name );
1022  if ( c == '>' ) {
1023  int pars = 1;
1024  while ( pars > 0 && end_of_func_name != demangled_symbol ) {
1025  c = *( --end_of_func_name );
1026  if ( c == '>' ) {
1027  pars++;
1028  } else if ( c == '<' ) {
1029  pars--;
1030  }
1031  }
1032  *end_of_func_name = '\0';
1033  }
1034  c = *( --end_of_func_name );
1035  while ( isalnum( c ) || c == '_' || c == '~' ) { c = *( --end_of_func_name ); }
1036  return ++end_of_func_name;
1037  }
1038  return demangled_symbol;
1039 }

◆ getTotalCycles()

double getTotalCycles ( )

Definition at line 1591 of file pfm_gen_analysis.cpp.

1591  {
1592  double sum = 0;
1593  if ( nehalem ) {
1594  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1595  ++it ) {
1596  sum += ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
1597  }
1598  } else {
1599  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1600  ++it ) {
1601  sum += ( it->second )["UNHALTED_CORE_CYCLES"];
1602  }
1603  }
1604  return sum;
1605 }

◆ html_special_chars()

void html_special_chars ( const char *  s,
char *  s_mod 
)

Definition at line 838 of file pfm_gen_analysis.cpp.

838  {
839  int n = strlen( s );
840  *s_mod = '\0';
841  for ( int i = 0; i < n; i++ ) {
842  switch ( s[i] ) {
843  case '<':
844  strcat( s_mod, "&lt;" );
845  break;
846  case '>':
847  strcat( s_mod, "&gt;" );
848  break;
849  case '&':
850  strcat( s_mod, "&amp;" );
851  break;
852  case '"':
853  strcat( s_mod, "&quot;" );
854  break;
855  default:
856  char to_app[2];
857  to_app[0] = s[i];
858  to_app[1] = '\0';
859  strcat( s_mod, to_app );
860  break;
861  }
862  }
863  return;
864 }

◆ init_core_caa_events()

void init_core_caa_events ( )

Definition at line 284 of file pfm_gen_analysis.cpp.

284  {
285  core_caa_events.push_back( "BRANCH_INSTRUCTIONS_RETIRED" );
286  core_caa_events.push_back( "ILD_STALL" );
287  core_caa_events.push_back( "INST_RETIRED:LOADS" );
288  core_caa_events.push_back( "INST_RETIRED:OTHER" );
289  core_caa_events.push_back( "INST_RETIRED:STORES" );
290  core_caa_events.push_back( "INSTRUCTIONS_RETIRED" );
291  core_caa_events.push_back( "LOAD_BLOCK:OVERLAP_STORE" );
292  core_caa_events.push_back( "LOAD_BLOCK:STA" );
293  core_caa_events.push_back( "LOAD_BLOCK:UNTIL_RETIRE" );
294  core_caa_events.push_back( "MEM_LOAD_RETIRED:DTLB_MISS" );
295  core_caa_events.push_back( "MEM_LOAD_RETIRED:L1D_LINE_MISS" );
296  core_caa_events.push_back( "MEM_LOAD_RETIRED:L2_LINE_MISS" );
297  core_caa_events.push_back( "MISPREDICTED_BRANCH_RETIRED" );
298  // core_caa_events.push_back("RS_UOPS_DISPATCHED");
299  // core_caa_events.push_back("RS_UOPS_DISPATCHED CMASK=1");
300  core_caa_events.push_back( "RS_UOPS_DISPATCHED CMASK=1 INV=1" );
301  core_caa_events.push_back( "SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE" );
302  core_caa_events.push_back( "UNHALTED_CORE_CYCLES" );
303  // core_caa_events.push_back("UOPS_RETIRED:ANY");
304  // core_caa_events.push_back("UOPS_RETIRED:FUSED");
305  // core_caa_events.push_back("IDLE_DURING_DIV");
306 }

◆ init_core_caa_events_displ()

void init_core_caa_events_displ ( )

Definition at line 372 of file pfm_gen_analysis.cpp.

372  {
373  core_caa_events_displ.push_back( "Total Cycles" );
374  core_caa_events_displ.push_back( "Stalled Cycles" );
375  core_caa_events_displ.push_back( "% of Total Cycles" );
376  core_caa_events_displ.push_back( "Instructions Retired" );
377  core_caa_events_displ.push_back( "CPI" );
378  core_caa_events_displ.push_back( "" );
379  core_caa_events_displ.push_back( "iMargin" );
380  core_caa_events_displ.push_back( "iFactor" );
381  core_caa_events_displ.push_back( "" );
382  core_caa_events_displ.push_back( "Counted Stalled Cycles" );
383  core_caa_events_displ.push_back( "" );
384  core_caa_events_displ.push_back( "L2 Miss Impact" );
385  core_caa_events_displ.push_back( "L2 Miss % of counted Stalled Cycles" );
386  core_caa_events_displ.push_back( "" );
387  core_caa_events_displ.push_back( "L2 Hit Impact" );
388  core_caa_events_displ.push_back( "L2 Hit % of counted Stalled Cycles" );
389  core_caa_events_displ.push_back( "" );
390  core_caa_events_displ.push_back( "L1 DTLB Miss Impact" );
391  core_caa_events_displ.push_back( "L1 DTLB Miss % of counted Stalled Cycles" );
392  core_caa_events_displ.push_back( "" );
393  core_caa_events_displ.push_back( "LCP Stalls Impact" );
394  core_caa_events_displ.push_back( "LCP Stalls % of counted Stalled Cycles" );
395  core_caa_events_displ.push_back( "" );
396  core_caa_events_displ.push_back( "Store-Fwd Stalls Impact" );
397  core_caa_events_displ.push_back( "Store-Fwd Stalls % of counted Stalled Cycles" );
398  core_caa_events_displ.push_back( "" );
399  core_caa_events_displ.push_back( "Loads Blocked by Unknown Address Store Impact" );
400  core_caa_events_displ.push_back( "Loads Blocked % of Store-Fwd Stalls Cycles" );
401  core_caa_events_displ.push_back( "Loads Overlapped with Stores Impact" );
402  core_caa_events_displ.push_back( "Loads Overlapped % of Store-Fwd Stalls Cycles" );
403  core_caa_events_displ.push_back( "Loads Spanning across Cache Lines Impact" );
404  core_caa_events_displ.push_back( "Loads Spanning % of Store-Fwd Stalls Cycles" );
405  core_caa_events_displ.push_back( "" );
406  core_caa_events_displ.push_back( "Load Instructions" );
407  core_caa_events_displ.push_back( "Load % of all Instructions" );
408  core_caa_events_displ.push_back( "Store Instructions" );
409  core_caa_events_displ.push_back( "Store % of all Instructions" );
410  core_caa_events_displ.push_back( "Branch Instructions" );
411  core_caa_events_displ.push_back( "Branch % of all Instructions" );
412  core_caa_events_displ.push_back( "Packed SIMD Computational Instructions" );
413  core_caa_events_displ.push_back( "Packed SIMD % of all Instructions" );
414  core_caa_events_displ.push_back( "Other Instructions" );
415  core_caa_events_displ.push_back( "Other % of all Instructions" );
416  core_caa_events_displ.push_back( "" );
417  core_caa_events_displ.push_back( "ITLB Miss Rate in %" );
418  core_caa_events_displ.push_back( "% of Mispredicted Branches" );
419 }

◆ init_nhm_caa_events()

void init_nhm_caa_events ( )

Definition at line 308 of file pfm_gen_analysis.cpp.

308  {
309  nhm_caa_events.push_back( "ARITH:CYCLES_DIV_BUSY" );
310  nhm_caa_events.push_back( "BR_INST_EXEC:ANY" );
311  nhm_caa_events.push_back( "BR_INST_EXEC:DIRECT_NEAR_CALL" );
312  nhm_caa_events.push_back( "BR_INST_EXEC:INDIRECT_NEAR_CALL" );
313  nhm_caa_events.push_back( "BR_INST_EXEC:INDIRECT_NON_CALL" );
314  nhm_caa_events.push_back( "BR_INST_EXEC:NEAR_CALLS" );
315  nhm_caa_events.push_back( "BR_INST_EXEC:NON_CALLS" );
316  nhm_caa_events.push_back( "BR_INST_EXEC:RETURN_NEAR" );
317  nhm_caa_events.push_back( "BR_INST_RETIRED:ALL_BRANCHES" );
318  nhm_caa_events.push_back( "BR_INST_RETIRED:CONDITIONAL" );
319  nhm_caa_events.push_back( "BR_INST_RETIRED:NEAR_CALL" );
320  nhm_caa_events.push_back( "BR_MISP_EXEC:ANY" );
321  nhm_caa_events.push_back( "CPU_CLK_UNHALTED:THREAD_P" );
322  nhm_caa_events.push_back( "DTLB_LOAD_MISSES:WALK_COMPLETED" );
323  nhm_caa_events.push_back( "INST_RETIRED:ANY_P" );
324  nhm_caa_events.push_back( "ITLB_MISSES:WALK_COMPLETED" );
325  nhm_caa_events.push_back( "L2_RQSTS:IFETCH_HIT" );
326  nhm_caa_events.push_back( "L2_RQSTS:IFETCH_MISS" );
327  nhm_caa_events.push_back( "MEM_INST_RETIRED:LOADS" );
328  nhm_caa_events.push_back( "MEM_INST_RETIRED:STORES" );
329  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L2_HIT" );
330  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L3_MISS" );
331  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L3_UNSHARED_HIT" );
332  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM" );
333  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:LOCAL_DRAM" );
334  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM" );
335  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT" );
336  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:REMOTE_DRAM" );
337  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM" );
338  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM" );
339  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP" );
340  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD" );
341  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM" );
342  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT" );
343  nhm_caa_events.push_back( "RESOURCE_STALLS:ANY" );
344  nhm_caa_events.push_back( "SSEX_UOPS_RETIRED:PACKED_DOUBLE" );
345  nhm_caa_events.push_back( "SSEX_UOPS_RETIRED:PACKED_SINGLE" );
346  nhm_caa_events.push_back( "UOPS_DECODED:MS CMASK=1" );
347  nhm_caa_events.push_back( "UOPS_ISSUED:ANY CMASK=1 INV=1" );
348  nhm_caa_events.push_back( "ITLB_MISS_RETIRED" );
349  nhm_caa_events.push_back( "UOPS_RETIRED:ANY" );
350 }

◆ init_nhm_caa_events_displ()

void init_nhm_caa_events_displ ( )

Definition at line 502 of file pfm_gen_analysis.cpp.

502  {
503  nhm_caa_events_displ.push_back( "Total Cycles" );
504  nhm_caa_events_displ.push_back( "Instructions Retired" );
505  nhm_caa_events_displ.push_back( "CPI" );
506  nhm_caa_events_displ.push_back( "" );
507  nhm_caa_events_displ.push_back( "iMargin" );
508  nhm_caa_events_displ.push_back( "iFactor" );
509  nhm_caa_events_displ.push_back( "" );
510  nhm_caa_events_displ.push_back( "Stalled Cycles" );
511  nhm_caa_events_displ.push_back( "% of Total Cycles" );
512  nhm_caa_events_displ.push_back( "Total Counted Stalled Cycles" );
513  nhm_caa_events_displ.push_back( "" );
514  nhm_caa_events_displ.push_back( "Instruction Starvation % of Total Cycles" );
515  nhm_caa_events_displ.push_back( "# of Instructions per Call" );
516  nhm_caa_events_displ.push_back( "% of Total Cycles spent handling FP exceptions" );
517  nhm_caa_events_displ.push_back( "" );
518  nhm_caa_events_displ.push_back( "Counted Stalled Cycles due to Load Ops" );
519  nhm_caa_events_displ.push_back( "" );
520  nhm_caa_events_displ.push_back( "L2 Hit Impact" );
521  nhm_caa_events_displ.push_back( "L2 Hit % of Load Stalls" );
522  nhm_caa_events_displ.push_back( "" );
523  nhm_caa_events_displ.push_back( "L3 Unshared Hit Impact" );
524  nhm_caa_events_displ.push_back( "L3 Unshared Hit % of Load Stalls" );
525  nhm_caa_events_displ.push_back( "" );
526  nhm_caa_events_displ.push_back( "L2 Other Core Hit Impact" );
527  nhm_caa_events_displ.push_back( "L2 Other Core Hit % of Load Stalls" );
528  nhm_caa_events_displ.push_back( "" );
529  nhm_caa_events_displ.push_back( "L2 Other Core Hit Modified Impact" );
530  nhm_caa_events_displ.push_back( "L2 Other Core Hit Modified % of Load Stalls" );
531  nhm_caa_events_displ.push_back( "" );
532  nhm_caa_events_displ.push_back( "L3 Miss -> Local DRAM Hit Impact" );
533  nhm_caa_events_displ.push_back( "L3 Miss -> Remote DRAM Hit Impact" );
534  nhm_caa_events_displ.push_back( "L3 Miss -> Remote Cache Hit Impact" );
535  nhm_caa_events_displ.push_back( "L3 Miss -> Total Impact" );
536  nhm_caa_events_displ.push_back( "L3 Miss % of Load Stalls" );
537  nhm_caa_events_displ.push_back( "" );
538  nhm_caa_events_displ.push_back( "L1 DTLB Miss Impact" );
539  nhm_caa_events_displ.push_back( "L1 DTLB Miss % of Load Stalls" );
540  nhm_caa_events_displ.push_back( "" );
541  nhm_caa_events_displ.push_back( "Cycles spent during DIV & SQRT Ops" );
542  nhm_caa_events_displ.push_back( "DIV & SQRT Ops % of counted Stalled Cycles" );
543  nhm_caa_events_displ.push_back( "" );
544  nhm_caa_events_displ.push_back( "Total L2 IFETCH misses" );
545  nhm_caa_events_displ.push_back( "% of L2 IFETCH misses" );
546  nhm_caa_events_displ.push_back( "" );
547  nhm_caa_events_displ.push_back( "% of IFETCHes served by Local DRAM" );
548  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (Modified)" );
549  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (Clean Snoop)" );
550  nhm_caa_events_displ.push_back( "% of IFETCHes served by Remote L2" );
551  nhm_caa_events_displ.push_back( "% of IFETCHes served by Remote DRAM" );
552  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (No Snoop)" );
553  nhm_caa_events_displ.push_back( "" );
554  nhm_caa_events_displ.push_back( "Total L2 IFETCH miss Impact" );
555  nhm_caa_events_displ.push_back( "" );
556  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Local DRAM" );
557  nhm_caa_events_displ.push_back( "Local DRAM IFECTHes % Impact" );
558  nhm_caa_events_displ.push_back( "" );
559  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (Modified)" );
560  nhm_caa_events_displ.push_back( "L3 (Modified) IFECTHes % Impact" );
561  nhm_caa_events_displ.push_back( "" );
562  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (Clean Snoop)" );
563  nhm_caa_events_displ.push_back( "L3 (Clean Snoop) IFECTHes % Impact" );
564  nhm_caa_events_displ.push_back( "" );
565  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Remote L2" );
566  nhm_caa_events_displ.push_back( "Remote L2 IFECTHes % Impact" );
567  nhm_caa_events_displ.push_back( "" );
568  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Remote DRAM" );
569  nhm_caa_events_displ.push_back( "Remote DRAM IFECTHes % Impact" );
570  nhm_caa_events_displ.push_back( "" );
571  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (No Snoop)" );
572  nhm_caa_events_displ.push_back( "L3 (No Snoop) IFECTHes % Impact" );
573  nhm_caa_events_displ.push_back( "" );
574  nhm_caa_events_displ.push_back( "Total Branch Instructions Executed" );
575  nhm_caa_events_displ.push_back( "% of Mispredicted Branches" );
576  nhm_caa_events_displ.push_back( "" );
577  nhm_caa_events_displ.push_back( "Direct Near Calls % of Total Branches Executed" );
578  nhm_caa_events_displ.push_back( "Indirect Near Calls % of Total Branches Executed" );
579  nhm_caa_events_displ.push_back( "Indirect Near Non-Calls % of Total Branches Executed" );
580  nhm_caa_events_displ.push_back( "All Near Calls % of Total Branches Executed" );
581  nhm_caa_events_displ.push_back( "All Non Calls % of Total Branches Executed" );
582  nhm_caa_events_displ.push_back( "All Returns % of Total Branches Executed" );
583  nhm_caa_events_displ.push_back( "" );
584  nhm_caa_events_displ.push_back( "Total Branch Instructions Retired" );
585  nhm_caa_events_displ.push_back( "Conditionals % of Total Branches Retired" );
586  nhm_caa_events_displ.push_back( "Near Calls % of Total Branches Retired" );
587  nhm_caa_events_displ.push_back( "" );
588  nhm_caa_events_displ.push_back( "L1 ITLB Miss Impact" );
589  nhm_caa_events_displ.push_back( "ITLB Miss Rate in %" );
590  nhm_caa_events_displ.push_back( "" );
591  nhm_caa_events_displ.push_back( "Branch Instructions" );
592  nhm_caa_events_displ.push_back( "Branch % of all Instructions" );
593  nhm_caa_events_displ.push_back( "" );
594  nhm_caa_events_displ.push_back( "Load Instructions" );
595  nhm_caa_events_displ.push_back( "Load % of all Instructions" );
596  nhm_caa_events_displ.push_back( "" );
597  nhm_caa_events_displ.push_back( "Store Instructions" );
598  nhm_caa_events_displ.push_back( "Store % of all Instructions" );
599  nhm_caa_events_displ.push_back( "" );
600  nhm_caa_events_displ.push_back( "Other Instructions" );
601  nhm_caa_events_displ.push_back( "Other % of all Instructions" );
602  nhm_caa_events_displ.push_back( "" );
603  nhm_caa_events_displ.push_back( "Packed UOPS Retired" );
604  nhm_caa_events_displ.push_back( "Packed % of all UOPS Retired" );
605 }

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 1610 of file pfm_gen_analysis.cpp.

1610  {
1611  if ( argc < 2 || argc > 4 ) {
1612  printf( "\n\nUsage: %s DIRECTORY [--caa] [--csv]\n\n", argv[0] );
1613  exit( 1 );
1614  }
1615 
1616  bool caa = false;
1617  bool csv = false;
1618  for ( int i = 2; i < argc; i++ ) {
1619  if ( !strcmp( argv[i], "--caa" ) ) caa = true;
1620  if ( !strcmp( argv[i], "--csv" ) ) csv = true;
1621  }
1622 
1623  char dir[MAX_FILENAME_LENGTH];
1624  strcpy( dir, argv[1] );
1625  if ( !csv ) {
1626  strcat( dir, "/HTML" );
1627  int res = mkdir( dir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH );
1628  if ( res != 0 ) {
1629  fprintf( stderr, "ERROR: Cannot create directory %s\naborting...\n", dir );
1630  exit( 1 );
1631  }
1632  }
1633 
1634  DIR* dp;
1635  struct dirent* dirp;
1636  int num_of_modules = 0;
1637  if ( ( dp = opendir( argv[1] ) ) == NULL ) {
1638  printf( "Error(%d) opening %s\n", errno, argv[1] );
1639  return errno;
1640  }
1641  while ( ( dirp = readdir( dp ) ) != NULL ) {
1642  if ( strstr( dirp->d_name, "_S_" ) != NULL && strstr( dirp->d_name, ".txt.gz" ) != NULL && !csv ) {
1643  if ( read_S_events( argv[1], dirp->d_name ) ) {
1644  fprintf( stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1645  exit( 1 );
1646  }
1647  }
1648  }
1649  closedir( dp );
1650  sort( S_events.begin(), S_events.end() );
1651  if ( ( dp = opendir( argv[1] ) ) == NULL ) {
1652  printf( "Error(%d) opening %s\n", errno, argv[1] );
1653  return errno;
1654  }
1655  while ( ( dirp = readdir( dp ) ) != NULL ) {
1656  if ( strstr( dirp->d_name, "_S_" ) != NULL && strstr( dirp->d_name, ".txt.gz" ) != NULL && !csv ) {
1657  if ( read_S_file( argv[1], dirp->d_name ) ) {
1658  fprintf( stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1659  exit( 1 );
1660  }
1661  } else if ( strstr( dirp->d_name, "_C_" ) != NULL && strstr( dirp->d_name, ".txt" ) != NULL ) {
1662  int res = read_C_file( argv[1], dirp->d_name );
1663  if ( res > num_of_modules ) { num_of_modules = res; }
1664  }
1665  }
1666  closedir( dp );
1667 
1668  if ( !csv ) {
1669  if ( finalize_S_html_pages( argv[1] ) ) {
1670  fprintf( stderr, "ERROR: Cannot finalize HTML pages!!!\naborting...\n" );
1671  exit( 1 );
1672  }
1673  }
1674 
1675  char filepath[MAX_FILENAME_LENGTH];
1676  bzero( filepath, MAX_FILENAME_LENGTH );
1677  if ( !csv )
1678  sprintf( filepath, "%s/HTML/index.html", argv[1] );
1679  else
1680  sprintf( filepath, "%s/results.csv", argv[1] );
1681  FILE* fp = fopen( filepath, "w" );
1682  if ( fp == NULL ) {
1683  fprintf( stderr, "ERROR: Cannot create file index.html!!!\naborting...\n" );
1684  exit( 1 );
1685  }
1686 
1687  if ( caa ) {
1688  double totalCycles;
1689  if ( !nehalem ) {
1691  if ( !check_for_core_caa_events() ) {
1692  fprintf( stderr, "(core) ERROR: One or more events for CAA missing!\naborting...\n" );
1693  exit( 1 );
1694  }
1696  totalCycles = getTotalCycles();
1697  calc_core_deriv_values( totalCycles );
1699  if ( !csv ) {
1700  put_C_header( fp, core_caa_events_displ );
1701  put_C_modules( fp, core_caa_events_displ );
1702  } else {
1703  put_C_header_csv( fp, core_caa_events_displ );
1704  put_C_modules_csv( fp, core_caa_events_displ );
1705  }
1706  } else {
1708  if ( !check_for_nhm_caa_events() ) {
1709  fprintf( stderr, "(nehalem) ERROR: One or more events for CAA missing!\naborting...\n" );
1710  exit( 1 );
1711  }
1713  totalCycles = getTotalCycles();
1714  calc_nhm_deriv_values( totalCycles );
1716  if ( !csv ) {
1717  put_C_header( fp, nhm_caa_events_displ );
1718  put_C_modules( fp, nhm_caa_events_displ );
1719  } else {
1720  put_C_header_csv( fp, nhm_caa_events_displ );
1721  put_C_modules_csv( fp, nhm_caa_events_displ );
1722  }
1723  }
1724  if ( !csv ) put_C_footer( fp );
1725  fclose( fp );
1726  } else {
1727  if ( !csv ) {
1728  put_C_header( fp, C_events );
1729  put_C_modules( fp, C_events );
1730  put_C_footer( fp );
1731  } else {
1732  put_C_header_csv( fp, C_events );
1733  put_C_modules_csv( fp, C_events );
1734  }
1735  fclose( fp );
1736  }
1737  if ( !csv ) {
1738  char src[MAX_FILENAME_LENGTH];
1739  char dst[MAX_FILENAME_LENGTH];
1740  sprintf( src, "sorttable.js" );
1741  sprintf( dst, "%s/HTML/sorttable.js", argv[1] );
1742  int fd_src = open( src, O_RDONLY );
1743  if ( fd_src == -1 ) {
1744  fprintf( stderr, "ERROR: Cannot open file \"%s\"!\naborting...\n", src );
1745  exit( 1 );
1746  }
1747  int fd_dst = open( dst, O_WRONLY | O_CREAT | O_TRUNC, 0644 );
1748  if ( fd_dst == -1 ) {
1749  fprintf( stderr, "ERROR: Cannot open file \"%s\" (%s)!\naborting...\n", dst, strerror( errno ) );
1750  exit( 1 );
1751  }
1752  char c;
1753  while ( read( fd_src, &c, 1 ) ) {
1754  if ( write( fd_dst, &c, 1 ) == -1 ) {
1755  std::cerr << "ERROR: failed to write to " << dst << std::endl;
1756  exit( 1 );
1757  }
1758  }
1759  close( fd_dst );
1760  close( fd_src );
1761  }
1762  return 0;
1763 }

◆ normalize()

double normalize ( std::string  field,
double  value,
double  normalizeTo 
)

Definition at line 1546 of file pfm_gen_analysis.cpp.

1546  {
1547  double max = 0;
1548  double counter_value;
1549  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1550  ++it ) {
1551  counter_value = ( it->second )[field];
1552  if ( max < counter_value ) max = counter_value;
1553  }
1554  if ( value > 0 && max > 0 && normalizeTo > 0 ) {
1555  return 1. * value / max * normalizeTo;
1556  } else
1557  return 0;
1558 }

◆ put_C_footer()

void put_C_footer ( FILE *  fp)

Definition at line 1505 of file pfm_gen_analysis.cpp.

1505  {
1506  fprintf( fp, "</table>\n</body>\n</html>\n" );
1507  return;
1508 }

◆ put_C_header()

void put_C_header ( FILE *  fp,
std::vector< std::string > &  columns 
)

Definition at line 1449 of file pfm_gen_analysis.cpp.

1449  {
1450  fprintf(
1451  fp,
1452  "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1453  fprintf( fp, "<html>\n" );
1454  fprintf( fp, "<head>\n" );
1455  fprintf( fp, "<title>\n" );
1456  fprintf( fp, "Analysis Result\n" );
1457  fprintf( fp, "</title>\n" );
1458  fprintf( fp, "<script src=\"sorttable.js\"></script>\n" );
1459  fprintf( fp, "<style>\ntable.sortable thead "
1460  "{\nbackground-color:#eee;\ncolor:#666666;\nfont-weight:bold;\ncursor:default;\nfont-family:courier;\n}"
1461  "\n</style>\n" );
1462  fprintf( fp, "</head>\n" );
1463  fprintf( fp, "<body link=\"black\">\n" );
1464  fprintf( fp, "<h1>RESULTS:</h1><br/>Click for detailed symbol view...<p/>\n" );
1465  fprintf( fp, "<table class=\"sortable\" cellpadding=\"5\">\n" );
1466  fprintf( fp, "<tr>\n" );
1467  fprintf( fp, "<th>MODULE NAME</th>\n" );
1468  for ( std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it ) {
1469  if ( strlen( it->c_str() ) == 0 )
1470  fprintf( fp, "<th bgcolor=\"#FFFFFF\">&nbsp;</th>\n" );
1471  else
1472  fprintf( fp, "<th>%s</th>\n", ( *it ).c_str() );
1473  }
1474  fprintf( fp, "</tr>\n" );
1475  return;
1476 }

◆ put_C_header_csv()

void put_C_header_csv ( FILE *  fp,
std::vector< std::string > &  columns 
)

Definition at line 1510 of file pfm_gen_analysis.cpp.

1510  {
1511  fprintf( fp, "MODULE NAME" );
1512  for ( std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it ) {
1513  if ( strlen( it->c_str() ) == 0 ) {
1514  } else
1515  fprintf( fp, ",%s", ( *it ).c_str() );
1516  }
1517  fprintf( fp, "\n" );
1518  return;
1519 }

◆ put_C_modules()

void put_C_modules ( FILE *  fp,
std::vector< std::string > &  columns 
)

Definition at line 1478 of file pfm_gen_analysis.cpp.

1478  {
1479  int index = 0;
1480  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1481  ++it ) {
1482  if ( index % 2 )
1483  fprintf( fp, "<tr bgcolor=\"#FFFFCC\">\n" );
1484  else
1485  fprintf( fp, "<tr bgcolor=\"#CCFFCC\">\n" );
1486  fprintf( fp, "<td style=\"font-family:monospace;font-size:large;color:Black\"><a href=\"%s.html\">%s</a></td>\n",
1487  ( it->first ).c_str(), ( it->first ).c_str() );
1488  for ( std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt ) {
1489  if ( strlen( jt->c_str() ) == 0 ) {
1490  fprintf( fp, "<td bgcolor=\"#FFFFFF\">&nbsp;</td>" );
1491  } else {
1492  if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1493  fprintf( stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1494  exit( 1 );
1495  }
1496  fprintf( fp, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\" align=\"right\">%.2f</td>\n",
1497  ( it->second )[*jt] );
1498  }
1499  }
1500  fprintf( fp, "</tr>\n" );
1501  index++;
1502  }
1503 }

◆ put_C_modules_csv()

void put_C_modules_csv ( FILE *  fp,
std::vector< std::string > &  columns 
)

Definition at line 1521 of file pfm_gen_analysis.cpp.

1521  {
1522  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1523  ++it ) {
1524  fprintf( fp, "%s", ( it->first ).c_str() );
1525  for ( std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt ) {
1526  if ( strlen( jt->c_str() ) == 0 ) {
1527  } else {
1528  if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1529  fprintf( stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1530  exit( 1 );
1531  }
1532  fprintf( fp, ",%.2f", ( it->second )[*jt] );
1533  }
1534  }
1535  fprintf( fp, "\n" );
1536  }
1537 }

◆ put_S_module()

void put_S_module ( S_module cur_module,
const char *  dir 
)

Definition at line 1046 of file pfm_gen_analysis.cpp.

1046  {
1047  char module_name[MAX_MODULE_NAME_LENGTH];
1048  bzero( module_name, MAX_MODULE_NAME_LENGTH );
1049  strcpy( module_name, ( cur_module->get_module_name() ).c_str() );
1050  char module_filename[MAX_FILENAME_LENGTH];
1051  bzero( module_filename, MAX_FILENAME_LENGTH );
1052  strcpy( module_filename, dir );
1053  strcat( module_filename, "/HTML/" );
1054  strcat( module_filename, module_name );
1055  strcat( module_filename, ".html" );
1056  char event[MAX_EVENT_NAME_LENGTH];
1057  bzero( event, MAX_EVENT_NAME_LENGTH );
1058  strcpy( event, ( cur_module->get_event() ).c_str() );
1059  std::map<std::string, unsigned int>::iterator result = modules_tot_samples.find( cur_module->get_module_name() );
1060  FILE* module_file;
1061  if ( result == modules_tot_samples.end() ) // not found
1062  {
1063  if ( ( !strcmp( event, "UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1064  ( !strcmp( event, "CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1065  modules_tot_samples.insert(
1067  } else {
1068  modules_tot_samples.insert( std::pair<std::string, unsigned int>( cur_module->get_module_name(), 0 ) );
1069  }
1070  module_file = fopen( module_filename, "w" );
1071  if ( module_file == NULL ) {
1072  fprintf( stderr, "ERROR: Cannot create file %s!!!\naborting...\n", module_filename );
1073  exit( 1 );
1074  }
1075  fprintf( module_file, "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
1076  "\"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1077  fprintf( module_file, "<html>\n" );
1078  fprintf( module_file, "<head>\n" );
1079  fprintf( module_file, "<title>\n" );
1080  fprintf( module_file, "%s\n", module_name );
1081  fprintf( module_file, "</title>\n" );
1082  fprintf( module_file, "</head>\n" );
1083  fprintf( module_file, "<body>\n" );
1084  fprintf( module_file, "<h2>%s</h2><br/>Events Sampled:<br/>\n", module_name );
1085  fprintf( module_file, "<ul>\n" );
1086  for ( std::vector<std::string>::const_iterator it = S_events.begin(); it != S_events.end(); ++it ) {
1087  fprintf( module_file, "<li><a href=\"#%s\">%s</a></li>\n", it->c_str(), it->c_str() );
1088  }
1089  fprintf( module_file, "</ul>\n" );
1090  } // if(result == modules_tot_samples.end()) //not found
1091  else {
1092  if ( ( !strcmp( event, "UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1093  ( !strcmp( event, "CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1094  modules_tot_samples[cur_module->get_module_name()] = cur_module->get_total_num_samples();
1095  }
1096  module_file = fopen( module_filename, "a" );
1097  } // else:: if(result != modules_tot_samples.end()) //found!!
1098  char event_str[MAX_EVENT_NAME_LENGTH];
1099  bzero( event_str, MAX_EVENT_NAME_LENGTH );
1100  strcpy( event_str, event );
1101  if ( cur_module->get_c_mask() > 0 ) {
1102  sprintf( event_str + strlen( event_str ), " CMASK=%d", cur_module->get_c_mask() );
1103  }
1104  if ( cur_module->get_inv_mask() > 0 ) {
1105  sprintf( event_str + strlen( event_str ), " INV=%d", cur_module->get_inv_mask() );
1106  }
1107  fprintf( module_file, "<a name=\"%s\"><a>\n", event_str );
1108  fprintf( module_file, "<table cellpadding=\"5\">\n" );
1109  fprintf( module_file, "<tr bgcolor=\"#EEEEEE\">\n" );
1110  fprintf( module_file,
1111  "<th colspan=\"6\" align=\"left\">%s -- cmask: %u -- invmask: %u -- Total Samples: %u -- "
1112  "Sampling Period: %d</th>\n",
1113  event, cur_module->get_c_mask(), cur_module->get_inv_mask(), cur_module->get_total_num_samples(),
1114  cur_module->get_smpl_period() );
1115  fprintf( module_file, "</tr>\n" );
1116  fprintf( module_file, "<tr bgcolor=\"#EEEEEE\">\n" );
1117  fprintf( module_file, "<th align=\"left\">Samples</th>\n" );
1118  fprintf( module_file, "<th align=\"left\">Percentage</th>\n" );
1119  fprintf( module_file, "<th align=\"left\">Symbol Name</th>\n" );
1120  fprintf( module_file, "<th align=\"left\">Library Name</th>\n" );
1121  fprintf( module_file, "<th align=\"left\">Complete Signature</th>\n" );
1122  fprintf( module_file, "<th align=\"left\">Library Pathname</th>\n" );
1123  fprintf( module_file, "</tr>\n" );
1124  for ( int j = 0; j < 20; j++ ) {
1125  char sym[MAX_SYM_LENGTH];
1126  char sym_mod[MAX_SYM_MOD_LENGTH];
1127  char lib[MAX_LIB_LENGTH];
1128  char lib_mod[MAX_LIB_MOD_LENGTH];
1129  char simple_sym[MAX_SIMPLE_SYM_LENGTH];
1130  char simple_sym_mod[MAX_SIMPLE_SYM_MOD_LENGTH];
1131  char simple_lib[MAX_SIMPLE_LIB_LENGTH];
1132  char simple_lib_mod[MAX_SIMPLE_LIB_MOD_LENGTH];
1133 
1134  bzero( sym, MAX_SYM_LENGTH );
1135  bzero( sym_mod, MAX_SYM_MOD_LENGTH );
1136  bzero( lib, MAX_LIB_LENGTH );
1137  bzero( lib_mod, MAX_LIB_MOD_LENGTH );
1138  bzero( simple_sym, MAX_SIMPLE_SYM_LENGTH );
1139  bzero( simple_sym_mod, MAX_SIMPLE_SYM_MOD_LENGTH );
1140  bzero( simple_lib, MAX_SIMPLE_LIB_LENGTH );
1141  bzero( simple_lib_mod, MAX_SIMPLE_LIB_MOD_LENGTH );
1142 
1143  char index[MAX_SAMPLE_INDEX_LENGTH];
1144  bzero( index, MAX_SAMPLE_INDEX_LENGTH );
1145  unsigned int value;
1146  bool res = cur_module->get_max( index, value );
1147  if ( !res ) break;
1148  char* sym_end = strchr( index, '%' );
1149  if ( sym_end == NULL ) // error
1150  {
1151  fprintf( stderr, "ERROR: Invalid sym and lib name! : %s\naborting...\n", index );
1152  exit( 1 );
1153  }
1154  memcpy( sym, index, strlen( index ) - strlen( sym_end ) );
1155  strcpy( lib, sym_end + 1 );
1156  char temp[MAX_SYM_LENGTH];
1157  bzero( temp, MAX_SYM_LENGTH );
1158  strcpy( temp, sym );
1159  strcpy( simple_sym, ( func_name( temp ) ) );
1160  if ( strrchr( lib, '/' ) != NULL && *( strrchr( lib, '/' ) + 1 ) != '\0' ) {
1161  strcpy( simple_lib, strrchr( lib, '/' ) + 1 );
1162  } else {
1163  strcpy( simple_lib, lib );
1164  }
1165  if ( j % 2 != 0 ) {
1166  fprintf( module_file, "<tr bgcolor=\"#FFFFCC\">\n" );
1167  } else {
1168  fprintf( module_file, "<tr bgcolor=\"#CCFFCC\">\n" );
1169  }
1170  fprintf( module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%u</td>\n", value );
1171  fprintf( module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%f%%</td>\n",
1172  ( ( (double)( value ) ) / ( (double)( cur_module->get_total_num_samples() ) ) ) * 100 );
1173  html_special_chars( simple_sym, simple_sym_mod );
1174  html_special_chars( simple_lib, simple_lib_mod );
1175  html_special_chars( sym, sym_mod );
1176  html_special_chars( lib, lib_mod );
1177  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_sym_mod );
1178  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_lib_mod );
1179  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", sym_mod );
1180  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n</tr>\n", lib_mod );
1181  }
1182  fprintf( module_file, "</table><br/><br/>\n" );
1183  int res = fclose( module_file );
1184  if ( res ) {
1185  fprintf( stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1186  exit( 1 );
1187  }
1188  return;
1189 }

◆ read_C_file()

int read_C_file ( const char *  dir,
const char *  filename 
)

Definition at line 1384 of file pfm_gen_analysis.cpp.

1384  {
1385  char event[MAX_EVENT_NAME_LENGTH];
1386  char arch[MAX_ARCH_NAME_LENGTH];
1387  char line[MAX_LINE_LENGTH];
1388  char cmask_str[MAX_CMASK_STR_LENGTH];
1389  char inv_str[MAX_INV_STR_LENGTH];
1390  char sp_str[MAX_SP_STR_LENGTH];
1391  char cur_module_name[MAX_MODULE_NAME_LENGTH];
1392  bzero( line, MAX_LINE_LENGTH );
1393  bzero( event, MAX_EVENT_NAME_LENGTH );
1394  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1395  bzero( arch, MAX_ARCH_NAME_LENGTH );
1396  bzero( line, MAX_LINE_LENGTH );
1397  bzero( cmask_str, MAX_CMASK_STR_LENGTH );
1398  bzero( inv_str, MAX_INV_STR_LENGTH );
1399  bzero( sp_str, MAX_SP_STR_LENGTH );
1400  int number_of_modules = 0;
1401  long cur_sum = 0;
1402  int no_of_values = 0;
1403  char path_name[MAX_FILENAME_LENGTH];
1404  bzero( path_name, MAX_FILENAME_LENGTH );
1405  strcpy( path_name, dir );
1406  strcat( path_name, "/" );
1407  strcat( path_name, filename );
1408  FILE* fp = fopen( path_name, "r" );
1409  int stat = fscanf( fp, "%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str );
1410  if ( stat != 5 ) {
1411  std::cerr << "ERROR: failed to parse " << path_name << std::endl;
1412  exit( 1 );
1413  }
1414  if ( !strcmp( arch, "NHM" ) )
1415  nehalem = true;
1416  else
1417  nehalem = false;
1418  std::string event_str( event );
1419  if ( atoi( cmask_str ) > 0 ) {
1420  event_str += " CMASK=";
1421  event_str += cmask_str;
1422  }
1423  if ( atoi( inv_str ) > 0 ) {
1424  event_str += " INV=";
1425  event_str += inv_str;
1426  }
1427  C_events.push_back( event_str );
1428  while ( fscanf( fp, "%s\n", line ) != EOF ) {
1429  if ( isalpha( line[0] ) ) // module
1430  {
1431  if ( number_of_modules > 0 ) {
1432  C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values;
1433  cur_sum = 0;
1434  no_of_values = 0;
1435  }
1436  strcpy( cur_module_name, line );
1437  number_of_modules++;
1438  } else if ( isdigit( line[0] ) ) // value
1439  {
1440  cur_sum += strtol( line, NULL, 10 );
1441  no_of_values++;
1442  }
1443  }
1444  C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values; // last module
1445  fclose( fp );
1446  return number_of_modules;
1447 }

◆ read_S_events()

int read_S_events ( const char *  dir,
const char *  filename 
)

Definition at line 1311 of file pfm_gen_analysis.cpp.

1311  {
1312  char event[MAX_EVENT_NAME_LENGTH];
1313  char arch[MAX_ARCH_NAME_LENGTH];
1314  char line[MAX_LINE_LENGTH];
1315  char cmask_str[MAX_CMASK_STR_LENGTH];
1316  char inv_str[MAX_INV_STR_LENGTH];
1317  char sp_str[MAX_SP_STR_LENGTH];
1318  bzero( line, MAX_LINE_LENGTH );
1319  bzero( event, MAX_EVENT_NAME_LENGTH );
1320  bzero( arch, MAX_ARCH_NAME_LENGTH );
1321  bzero( cmask_str, MAX_CMASK_STR_LENGTH );
1322  bzero( inv_str, MAX_INV_STR_LENGTH );
1323  bzero( sp_str, MAX_SP_STR_LENGTH );
1324  char path_name[MAX_FILENAME_LENGTH];
1325  bzero( path_name, MAX_FILENAME_LENGTH );
1326  strcpy( path_name, dir );
1327  strcat( path_name, "/" );
1328  strcat( path_name, filename );
1329  gzFile res_file = gzopen( path_name, "rb" );
1330  if ( res_file != NULL ) {
1331  bzero( line, MAX_LINE_LENGTH );
1332  gzgets( res_file, line, MAX_LINE_LENGTH );
1333  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1334  bzero( event, MAX_EVENT_NAME_LENGTH );
1335  sscanf( line, "%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str );
1336  std::string event_str( event );
1337  if ( atoi( cmask_str ) > 0 ) {
1338  event_str += " CMASK=";
1339  event_str += cmask_str;
1340  }
1341  if ( atoi( inv_str ) > 0 ) {
1342  event_str += " INV=";
1343  event_str += inv_str;
1344  }
1345  S_events.push_back( event_str );
1346  } // if(res_file != NULL)
1347  else {
1348  fprintf( stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename );
1349  exit( 1 );
1350  }
1351  gzclose( res_file );
1352  return 0;
1353 }

◆ read_S_file()

int read_S_file ( const char *  dir,
const char *  filename 
)

Definition at line 1198 of file pfm_gen_analysis.cpp.

1198  {
1199  char line[MAX_LINE_LENGTH];
1200  char event[MAX_EVENT_NAME_LENGTH];
1201  char arch[MAX_ARCH_NAME_LENGTH];
1202  unsigned int cmask;
1203  unsigned int inv;
1204  unsigned int sp;
1205  char cur_module_name[MAX_MODULE_NAME_LENGTH];
1206  bzero( line, MAX_LINE_LENGTH );
1207  bzero( event, MAX_EVENT_NAME_LENGTH );
1208  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1209  bzero( arch, MAX_ARCH_NAME_LENGTH );
1210 
1211  S_module* cur_module = new S_module();
1212  unsigned int module_num = 0;
1213 
1214  char path_name[MAX_FILENAME_LENGTH];
1215  bzero( path_name, MAX_FILENAME_LENGTH );
1216  strcpy( path_name, dir );
1217  strcat( path_name, "/" );
1218  strcat( path_name, filename );
1219  gzFile res_file = gzopen( path_name, "rb" );
1220 
1221  if ( res_file != NULL ) {
1222  bzero( line, MAX_LINE_LENGTH );
1223  gzgets( res_file, line, MAX_LINE_LENGTH );
1224  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1225  bzero( event, MAX_EVENT_NAME_LENGTH );
1226  sscanf( line, "%s %s %u %u %u", arch, event, &cmask, &inv, &sp );
1227  if ( !strcmp( arch, "NHM" ) )
1228  nehalem = true;
1229  else
1230  nehalem = false;
1231  bzero( line, MAX_LINE_LENGTH );
1232  while ( gzgets( res_file, line, MAX_LINE_LENGTH ) != Z_NULL ) {
1233  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1234  if ( strchr( line, ' ' ) == NULL ) // module
1235  {
1236  if ( module_num > 0 ) {
1237  put_S_module( cur_module, dir );
1238  cur_module->clear();
1239  }
1240  module_num++;
1241  char* end_sym = strchr( line, '%' );
1242  if ( end_sym == NULL ) // error
1243  {
1244  fprintf( stderr, "ERROR: Invalid module name. \nLINE: %s\naborting...\n", line );
1245  exit( 1 );
1246  }
1247  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1248  memcpy( cur_module_name, line, strlen( line ) - strlen( end_sym ) );
1249  cur_module->init( cur_module_name, arch, event, cmask, inv, sp );
1250  cur_module->set_total( atoi( end_sym + 1 ) );
1251  } // module
1252  else // symbol, libName, libOffset, value
1253  {
1254  unsigned int value = 0, libOffset = 0;
1255  char symbol[MAX_SYM_LENGTH];
1256  char libName[MAX_LIB_LENGTH];
1257  char final_sym[MAX_SYM_MOD_LENGTH];
1258  char final_lib[MAX_LIB_MOD_LENGTH];
1259  bzero( symbol, MAX_SYM_LENGTH );
1260  bzero( libName, MAX_LIB_LENGTH );
1261  bzero( final_sym, MAX_SYM_MOD_LENGTH );
1262  bzero( final_lib, MAX_LIB_MOD_LENGTH );
1263 
1264  sscanf( line, "%s %s %u %u", symbol, libName, &libOffset, &value );
1265  char realPathName_s[FILENAME_MAX];
1266  bzero( realPathName_s, FILENAME_MAX );
1267  char* realPathName = realpath( libName, realPathName_s );
1268  if ( realPathName != NULL && strlen( realPathName ) > 0 ) {
1270  result = libsInfo.find( realPathName );
1271  if ( result == libsInfo.end() ) { libsInfo[realPathName] = FileInfo( realPathName, true ); }
1272  const char* temp_sym = libsInfo[realPathName].symbolByOffset( libOffset );
1273  if ( temp_sym != NULL && strlen( temp_sym ) > 0 ) {
1274  int status;
1275  char* demangled_symbol = abi::__cxa_demangle( temp_sym, NULL, NULL, &status );
1276  if ( status == 0 ) {
1277  strcpy( final_sym, demangled_symbol );
1278  free( demangled_symbol );
1279  } else {
1280  strcpy( final_sym, temp_sym );
1281  }
1282  } else {
1283  strcpy( final_sym, "???" );
1284  }
1285  strcpy( final_lib, realPathName );
1286  } else {
1287  strcpy( final_sym, symbol );
1288  strcpy( final_lib, libName );
1289  }
1290  char index[MAX_LINE_LENGTH];
1291  bzero( index, MAX_LINE_LENGTH );
1292  strcpy( index, final_sym );
1293  strcat( index, "%" );
1294  strcat( index, final_lib );
1295  cur_module->add_sample( index, value );
1296  } // symbol, libName, libOffset, value
1297  bzero( line, MAX_LINE_LENGTH );
1298  } // while(gzgets(res_file, line, MAX_LINE_LENGTH)!=Z_NULL)
1299  put_S_module( cur_module, dir ); // last module!
1300  cur_module->clear();
1301  gzclose( res_file );
1302  } // if(res_file != NULL)
1303  else {
1304  fprintf( stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename );
1305  exit( 1 );
1306  }
1307  delete cur_module; // delete it!
1308  return 0;
1309 }

◆ skipString()

bool skipString ( const char *  strptr,
const char *  srcbuffer,
const char **  dstbuffer 
)

Definition at line 147 of file pfm_gen_analysis.cpp.

147  {
148  if ( strncmp( srcbuffer, strptr, strlen( strptr ) ) ) { return false; }
149  *dstbuffer = srcbuffer + strlen( strptr );
150  return true;
151 }

◆ skipWhitespaces()

bool skipWhitespaces ( const char *  srcbuffer,
const char **  destbuffer 
)

Definition at line 133 of file pfm_gen_analysis.cpp.

133  {
134  if ( !isspace( *srcbuffer++ ) ) { return false; }
135  while ( isspace( *srcbuffer ) ) { srcbuffer++; }
136  *destbuffer = srcbuffer;
137  return true;
138 }
std::strcpy
T strcpy(T... args)
MAX_CMASK_STR_LENGTH
#define MAX_CMASK_STR_LENGTH
Definition: pfm_gen_analysis.cpp:93
I7_L1_ITLB_WALK_COMPLETED_CYCLES
#define I7_L1_ITLB_WALK_COMPLETED_CYCLES
Definition: pfm_gen_analysis.cpp:59
std::strtol
T strtol(T... args)
init_core_caa_events
void init_core_caa_events()
Definition: pfm_gen_analysis.cpp:284
std::isdigit
T isdigit(T... args)
html_special_chars
void html_special_chars(const char *s, char *s_mod)
Definition: pfm_gen_analysis.cpp:838
std::strcmp
T strcmp(T... args)
I7_OTHER_CORE_L2_HITM_CYCLES
#define I7_OTHER_CORE_L2_HITM_CYCLES
Definition: pfm_gen_analysis.cpp:63
std::strrchr
T strrchr(T... args)
std::strlen
T strlen(T... args)
MAX_SYM_MOD_LENGTH
#define MAX_SYM_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:81
std::string
STL class.
put_C_modules
void put_C_modules(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1478
plotBacklogPyRoot.argc
argc
Definition: plotBacklogPyRoot.py:153
std::pair
init_nhm_caa_events
void init_nhm_caa_events()
Definition: pfm_gen_analysis.cpp:308
I7_IFETCH_L2_MISS_L3_HITM
#define I7_IFETCH_L2_MISS_L3_HITM
Definition: pfm_gen_analysis.cpp:71
gaudirun.s
string s
Definition: gaudirun.py:328
S_module::get_event
std::string get_event()
Definition: pfm_gen_analysis.cpp:816
std::vector
STL class.
std::find
T find(T... args)
MAX_LIB_MOD_LENGTH
#define MAX_LIB_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:83
finalize_S_html_pages
int finalize_S_html_pages(const char *dir)
Definition: pfm_gen_analysis.cpp:1358
CORE_UNKNOWN_ADDR_STORE_CYCLES
#define CORE_UNKNOWN_ADDR_STORE_CYCLES
Definition: pfm_gen_analysis.cpp:53
std::strcat
T strcat(T... args)
put_C_header
void put_C_header(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1449
CORE_L1_DTLB_MISS_CYCLES
#define CORE_L1_DTLB_MISS_CYCLES
Definition: pfm_gen_analysis.cpp:51
hivetimeline.read
def read(f, regex='.*', skipevents=0)
Definition: hivetimeline.py:33
I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES
#define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:64
std::isalnum
T isalnum(T... args)
std::fscanf
T fscanf(T... args)
gaudirun.c
c
Definition: gaudirun.py:509
max
EventIDBase max(const EventIDBase &lhs, const EventIDBase &rhs)
Definition: EventIDBase.h:225
MAX_EVENT_NAME_LENGTH
#define MAX_EVENT_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:89
std::strerror
T strerror(T... args)
S_module::get_total_num_samples
unsigned int get_total_num_samples()
Definition: pfm_gen_analysis.cpp:831
std::sort
T sort(T... args)
graphanalysis.filename
string filename
Definition: graphanalysis.py:130
I7_L2_HIT_CYCLES
#define I7_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:60
EXPECTED_CPI
#define EXPECTED_CPI
Definition: pfm_gen_analysis.cpp:76
std::vector::push_back
T push_back(T... args)
S_module::get_max
bool get_max(char *index, unsigned int &value)
Definition: pfm_gen_analysis.cpp:821
compareOutputFiles.sp
sp
Definition: compareOutputFiles.py:506
I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT
#define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT
Definition: pfm_gen_analysis.cpp:68
I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD
#define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD
Definition: pfm_gen_analysis.cpp:72
read_C_file
int read_C_file(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1384
S_module::clear
void clear()
Definition: pfm_gen_analysis.cpp:790
CORE_L2_HIT_CYCLES
#define CORE_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:50
read_S_file
int read_S_file(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1198
calc_nhm_deriv_values
void calc_nhm_deriv_values(double totalCycles)
Definition: pfm_gen_analysis.cpp:607
S_module::get_c_mask
unsigned int get_c_mask()
Definition: pfm_gen_analysis.cpp:814
std::strchr
T strchr(T... args)
read_S_events
int read_S_events(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1311
std::fprintf
T fprintf(T... args)
MAX_SIMPLE_LIB_MOD_LENGTH
#define MAX_SIMPLE_LIB_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:87
std::fclose
T fclose(T... args)
MAX_LIB_LENGTH
#define MAX_LIB_LENGTH
Definition: pfm_gen_analysis.cpp:82
I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP
#define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP
Definition: pfm_gen_analysis.cpp:69
MAX_SIMPLE_SYM_LENGTH
#define MAX_SIMPLE_SYM_LENGTH
Definition: pfm_gen_analysis.cpp:84
std::cerr
I7_L1_DTLB_WALK_COMPLETED_CYCLES
#define I7_L1_DTLB_WALK_COMPLETED_CYCLES
Definition: pfm_gen_analysis.cpp:58
std::isalpha
T isalpha(T... args)
S_module::get_module_name
std::string get_module_name()
Definition: pfm_gen_analysis.cpp:830
std::atoi
T atoi(T... args)
Prepare.dst
dst
Definition: Prepare.py:22
I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT
#define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT
Definition: pfm_gen_analysis.cpp:67
func_name
const char * func_name(const char *demangled_symbol)
Definition: pfm_gen_analysis.cpp:869
std::fopen
T fopen(T... args)
getTotalCycles
double getTotalCycles()
Definition: pfm_gen_analysis.cpp:1591
CORE_LCP_STALL_CYCLES
#define CORE_LCP_STALL_CYCLES
Definition: pfm_gen_analysis.cpp:52
check_for_nhm_caa_events
bool check_for_nhm_caa_events()
Definition: pfm_gen_analysis.cpp:362
CORE_OVERLAPPING_CYCLES
#define CORE_OVERLAPPING_CYCLES
Definition: pfm_gen_analysis.cpp:54
std::map
STL class.
S_module::get_inv_mask
unsigned int get_inv_mask()
Definition: pfm_gen_analysis.cpp:813
put_C_footer
void put_C_footer(FILE *fp)
Definition: pfm_gen_analysis.cpp:1505
put_C_modules_csv
void put_C_modules_csv(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1521
GaudiPluginService.cpluginsvc.n
n
Definition: cpluginsvc.py:221
MAX_MODULE_NAME_LENGTH
#define MAX_MODULE_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:90
init_core_caa_events_displ
void init_core_caa_events_displ()
Definition: pfm_gen_analysis.cpp:372
S_module::add_sample
void add_sample(const char *index, unsigned int value)
Definition: pfm_gen_analysis.cpp:817
CORE_SPAN_ACROSS_CACHE_LINE_CYCLES
#define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES
Definition: pfm_gen_analysis.cpp:55
std::strstr
T strstr(T... args)
MAX_SYM_LENGTH
#define MAX_SYM_LENGTH
Definition: pfm_gen_analysis.cpp:80
std::strncmp
T strncmp(T... args)
std::endl
T endl(T... args)
put_C_header_csv
void put_C_header_csv(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1510
S_module::get_smpl_period
unsigned int get_smpl_period()
Definition: pfm_gen_analysis.cpp:812
std::vector::begin
T begin(T... args)
std::map::insert
T insert(T... args)
I7_OTHER_CORE_L2_HIT_CYCLES
#define I7_OTHER_CORE_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:62
CORE_L2_MISS_CYCLES
#define CORE_L2_MISS_CYCLES
Definition: pfm_gen_analysis.cpp:49
MAX_SIMPLE_LIB_LENGTH
#define MAX_SIMPLE_LIB_LENGTH
Definition: pfm_gen_analysis.cpp:86
MAX_LINE_LENGTH
#define MAX_LINE_LENGTH
Definition: pfm_gen_analysis.cpp:88
FileInfo
Definition: pfm_gen_analysis.cpp:153
MAX_SAMPLE_INDEX_LENGTH
#define MAX_SAMPLE_INDEX_LENGTH
Definition: pfm_gen_analysis.cpp:79
I7_IFETCH_L2_MISS_L3_HIT_SNOOP
#define I7_IFETCH_L2_MISS_L3_HIT_SNOOP
Definition: pfm_gen_analysis.cpp:70
std::isspace
T isspace(T... args)
calc_post_deriv_values
void calc_post_deriv_values()
Definition: pfm_gen_analysis.cpp:1565
std::free
T free(T... args)
plotSpeedupsPyRoot.line
line
Definition: plotSpeedupsPyRoot.py:181
put_S_module
void put_S_module(S_module *cur_module, const char *dir)
Definition: pfm_gen_analysis.cpp:1046
std::memcpy
T memcpy(T... args)
MAX_INV_STR_LENGTH
#define MAX_INV_STR_LENGTH
Definition: pfm_gen_analysis.cpp:94
check_for_core_caa_events
bool check_for_core_caa_events()
Definition: pfm_gen_analysis.cpp:352
std::vector::end
T end(T... args)
normalize
double normalize(std::string field, double value, double normalizeTo)
Definition: pfm_gen_analysis.cpp:1546
I7_L3_UNSHARED_HIT_CYCLES
#define I7_L3_UNSHARED_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:61
S_module::init
void init(const char *name, const char *architecture, const char *event_name, unsigned int c_mask, unsigned int inv_mask, unsigned int smpl_period)
Definition: pfm_gen_analysis.cpp:799
MAX_FILENAME_LENGTH
#define MAX_FILENAME_LENGTH
Definition: pfm_gen_analysis.cpp:78
I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES
#define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:66
init_nhm_caa_events_displ
void init_nhm_caa_events_displ()
Definition: pfm_gen_analysis.cpp:502
calc_core_deriv_values
void calc_core_deriv_values(double totalCycles)
Definition: pfm_gen_analysis.cpp:421
I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES
#define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:65
S_module::set_total
void set_total(unsigned int total)
Definition: pfm_gen_analysis.cpp:808
std::exit
T exit(T... args)
MAX_SIMPLE_SYM_MOD_LENGTH
#define MAX_SIMPLE_SYM_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:85
gaudirun.argv
list argv
Definition: gaudirun.py:310
MAX_SP_STR_LENGTH
#define MAX_SP_STR_LENGTH
Definition: pfm_gen_analysis.cpp:95
S_module
Definition: pfm_gen_analysis.cpp:777
MAX_ARCH_NAME_LENGTH
#define MAX_ARCH_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:92