The Gaudi Framework  master (37c0b60a)
pfm_gen_analysis.cpp File Reference
#include <ctype.h>
#include <cxxabi.h>
#include <fcntl.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <zlib.h>
#include <algorithm>
#include <format>
#include <iostream>
#include <list>
#include <map>
#include <memory>
#include <sstream>
#include <string>
#include <vector>
#include <dirent.h>
#include <errno.h>
Include dependency graph for pfm_gen_analysis.cpp:

Go to the source code of this file.

Classes

class  PipeReader
 
class  FileInfo
 
struct  FileInfo::CacheItem
 
struct  FileInfo::CacheItemComparator
 
class  S_module
 

Macros

#define CORE_L2_MISS_CYCLES   200
 
#define CORE_L2_HIT_CYCLES   14.5
 
#define CORE_L1_DTLB_MISS_CYCLES   10
 
#define CORE_LCP_STALL_CYCLES   6
 
#define CORE_UNKNOWN_ADDR_STORE_CYCLES   5
 
#define CORE_OVERLAPPING_CYCLES   6
 
#define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES   20
 
#define I7_L1_DTLB_WALK_COMPLETED_CYCLES   35
 
#define I7_L1_ITLB_WALK_COMPLETED_CYCLES   35
 
#define I7_L2_HIT_CYCLES   6
 
#define I7_L3_UNSHARED_HIT_CYCLES   35
 
#define I7_OTHER_CORE_L2_HIT_CYCLES   60
 
#define I7_OTHER_CORE_L2_HITM_CYCLES   75
 
#define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES   225
 
#define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES   360
 
#define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES   180
 
#define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT   200
 
#define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT   350
 
#define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP   35
 
#define I7_IFETCH_L2_MISS_L3_HIT_SNOOP   60
 
#define I7_IFETCH_L2_MISS_L3_HITM   75
 
#define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD   180
 
#define MAX_MODULES   1000
 
#define EXPECTED_CPI   0.25
 
#define MAX_FILENAME_LENGTH   1024
 
#define MAX_SAMPLE_INDEX_LENGTH   10000
 
#define MAX_SYM_LENGTH   15000
 
#define MAX_SYM_MOD_LENGTH   20000
 
#define MAX_LIB_LENGTH   5000
 
#define MAX_LIB_MOD_LENGTH   7000
 
#define MAX_SIMPLE_SYM_LENGTH   300
 
#define MAX_SIMPLE_SYM_MOD_LENGTH   500
 
#define MAX_SIMPLE_LIB_LENGTH   300
 
#define MAX_SIMPLE_LIB_MOD_LENGTH   500
 
#define MAX_LINE_LENGTH   20000
 
#define MAX_EVENT_NAME_LENGTH   150
 
#define MAX_MODULE_NAME_LENGTH   250
 
#define MAX_VALUE_STRING_LENGTH   250
 
#define MAX_ARCH_NAME_LENGTH   20
 
#define MAX_CMASK_STR_LENGTH   5
 
#define MAX_INV_STR_LENGTH   5
 
#define MAX_SP_STR_LENGTH   50
 
#define PIPE_BUFFER_LENGTH   1000
 

Functions

bool skipWhitespaces (const char *srcbuffer, const char **destbuffer)
 
bool skipString (const char *strptr, const char *srcbuffer, const char **dstbuffer)
 
void init_core_caa_events ()
 
void init_nhm_caa_events ()
 
bool check_for_core_caa_events ()
 
bool check_for_nhm_caa_events ()
 
void init_core_caa_events_displ ()
 
void calc_core_deriv_values (double totalCycles)
 
void init_nhm_caa_events_displ ()
 
void calc_nhm_deriv_values (double totalCycles)
 
void html_special_chars (const char *s, char *s_mod)
 
const char * func_name (const char *demangled_symbol)
 
void put_S_module (S_module *cur_module, const char *dir)
 
int read_S_file (const char *dir, const char *filename)
 
int read_S_events (const char *dir, const char *filename)
 
int finalize_S_html_pages (const char *dir)
 
int read_C_file (const char *dir, const char *filename)
 
void put_C_header (FILE *fp, std::vector< std::string > &columns)
 
void put_C_modules (FILE *fp, std::vector< std::string > &columns)
 
void put_C_footer (FILE *fp)
 
void put_C_header_csv (FILE *fp, std::vector< std::string > &columns)
 
void put_C_modules_csv (FILE *fp, std::vector< std::string > &columns)
 
double normalize (const std::string &field, double value, double normalizeTo)
 
void calc_post_deriv_values ()
 
double getTotalCycles ()
 
int main (int argc, char *argv[])
 

Macro Definition Documentation

◆ CORE_L1_DTLB_MISS_CYCLES

#define CORE_L1_DTLB_MISS_CYCLES   10

Definition at line 54 of file pfm_gen_analysis.cpp.

◆ CORE_L2_HIT_CYCLES

#define CORE_L2_HIT_CYCLES   14.5

Definition at line 53 of file pfm_gen_analysis.cpp.

◆ CORE_L2_MISS_CYCLES

#define CORE_L2_MISS_CYCLES   200

Definition at line 52 of file pfm_gen_analysis.cpp.

◆ CORE_LCP_STALL_CYCLES

#define CORE_LCP_STALL_CYCLES   6

Definition at line 55 of file pfm_gen_analysis.cpp.

◆ CORE_OVERLAPPING_CYCLES

#define CORE_OVERLAPPING_CYCLES   6

Definition at line 57 of file pfm_gen_analysis.cpp.

◆ CORE_SPAN_ACROSS_CACHE_LINE_CYCLES

#define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES   20

Definition at line 58 of file pfm_gen_analysis.cpp.

◆ CORE_UNKNOWN_ADDR_STORE_CYCLES

#define CORE_UNKNOWN_ADDR_STORE_CYCLES   5

Definition at line 56 of file pfm_gen_analysis.cpp.

◆ EXPECTED_CPI

#define EXPECTED_CPI   0.25

Definition at line 79 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP

#define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP   35

Definition at line 72 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L2_MISS_L3_HIT_SNOOP

#define I7_IFETCH_L2_MISS_L3_HIT_SNOOP   60

Definition at line 73 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L2_MISS_L3_HITM

#define I7_IFETCH_L2_MISS_L3_HITM   75

Definition at line 74 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT

#define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT   200

Definition at line 70 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD

#define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD   180

Definition at line 75 of file pfm_gen_analysis.cpp.

◆ I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT

#define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT   350

Definition at line 71 of file pfm_gen_analysis.cpp.

◆ I7_L1_DTLB_WALK_COMPLETED_CYCLES

#define I7_L1_DTLB_WALK_COMPLETED_CYCLES   35

Definition at line 61 of file pfm_gen_analysis.cpp.

◆ I7_L1_ITLB_WALK_COMPLETED_CYCLES

#define I7_L1_ITLB_WALK_COMPLETED_CYCLES   35

Definition at line 62 of file pfm_gen_analysis.cpp.

◆ I7_L2_HIT_CYCLES

#define I7_L2_HIT_CYCLES   6

Definition at line 63 of file pfm_gen_analysis.cpp.

◆ I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES

#define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES   225

Definition at line 67 of file pfm_gen_analysis.cpp.

◆ I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES

#define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES   180

Definition at line 69 of file pfm_gen_analysis.cpp.

◆ I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES

#define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES   360

Definition at line 68 of file pfm_gen_analysis.cpp.

◆ I7_L3_UNSHARED_HIT_CYCLES

#define I7_L3_UNSHARED_HIT_CYCLES   35

Definition at line 64 of file pfm_gen_analysis.cpp.

◆ I7_OTHER_CORE_L2_HIT_CYCLES

#define I7_OTHER_CORE_L2_HIT_CYCLES   60

Definition at line 65 of file pfm_gen_analysis.cpp.

◆ I7_OTHER_CORE_L2_HITM_CYCLES

#define I7_OTHER_CORE_L2_HITM_CYCLES   75

Definition at line 66 of file pfm_gen_analysis.cpp.

◆ MAX_ARCH_NAME_LENGTH

#define MAX_ARCH_NAME_LENGTH   20

Definition at line 95 of file pfm_gen_analysis.cpp.

◆ MAX_CMASK_STR_LENGTH

#define MAX_CMASK_STR_LENGTH   5

Definition at line 96 of file pfm_gen_analysis.cpp.

◆ MAX_EVENT_NAME_LENGTH

#define MAX_EVENT_NAME_LENGTH   150

Definition at line 92 of file pfm_gen_analysis.cpp.

◆ MAX_FILENAME_LENGTH

#define MAX_FILENAME_LENGTH   1024

Definition at line 81 of file pfm_gen_analysis.cpp.

◆ MAX_INV_STR_LENGTH

#define MAX_INV_STR_LENGTH   5

Definition at line 97 of file pfm_gen_analysis.cpp.

◆ MAX_LIB_LENGTH

#define MAX_LIB_LENGTH   5000

Definition at line 85 of file pfm_gen_analysis.cpp.

◆ MAX_LIB_MOD_LENGTH

#define MAX_LIB_MOD_LENGTH   7000

Definition at line 86 of file pfm_gen_analysis.cpp.

◆ MAX_LINE_LENGTH

#define MAX_LINE_LENGTH   20000

Definition at line 91 of file pfm_gen_analysis.cpp.

◆ MAX_MODULE_NAME_LENGTH

#define MAX_MODULE_NAME_LENGTH   250

Definition at line 93 of file pfm_gen_analysis.cpp.

◆ MAX_MODULES

#define MAX_MODULES   1000

Definition at line 77 of file pfm_gen_analysis.cpp.

◆ MAX_SAMPLE_INDEX_LENGTH

#define MAX_SAMPLE_INDEX_LENGTH   10000

Definition at line 82 of file pfm_gen_analysis.cpp.

◆ MAX_SIMPLE_LIB_LENGTH

#define MAX_SIMPLE_LIB_LENGTH   300

Definition at line 89 of file pfm_gen_analysis.cpp.

◆ MAX_SIMPLE_LIB_MOD_LENGTH

#define MAX_SIMPLE_LIB_MOD_LENGTH   500

Definition at line 90 of file pfm_gen_analysis.cpp.

◆ MAX_SIMPLE_SYM_LENGTH

#define MAX_SIMPLE_SYM_LENGTH   300

Definition at line 87 of file pfm_gen_analysis.cpp.

◆ MAX_SIMPLE_SYM_MOD_LENGTH

#define MAX_SIMPLE_SYM_MOD_LENGTH   500

Definition at line 88 of file pfm_gen_analysis.cpp.

◆ MAX_SP_STR_LENGTH

#define MAX_SP_STR_LENGTH   50

Definition at line 98 of file pfm_gen_analysis.cpp.

◆ MAX_SYM_LENGTH

#define MAX_SYM_LENGTH   15000

Definition at line 83 of file pfm_gen_analysis.cpp.

◆ MAX_SYM_MOD_LENGTH

#define MAX_SYM_MOD_LENGTH   20000

Definition at line 84 of file pfm_gen_analysis.cpp.

◆ MAX_VALUE_STRING_LENGTH

#define MAX_VALUE_STRING_LENGTH   250

Definition at line 94 of file pfm_gen_analysis.cpp.

◆ PIPE_BUFFER_LENGTH

#define PIPE_BUFFER_LENGTH   1000

Definition at line 100 of file pfm_gen_analysis.cpp.

Function Documentation

◆ calc_core_deriv_values()

void calc_core_deriv_values ( double  totalCycles)

Definition at line 420 of file pfm_gen_analysis.cpp.

420  {
421  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
422  ++it ) {
423  ( it->second )["Total Cycles"] = ( it->second )["UNHALTED_CORE_CYCLES"];
424  ( it->second )["Stalled Cycles"] = ( it->second )["RS_UOPS_DISPATCHED CMASK=1 INV=1"];
425  ( it->second )["L2 Miss Impact"] = ( it->second )["MEM_LOAD_RETIRED:L2_LINE_MISS"] * CORE_L2_MISS_CYCLES;
426  ( it->second )["L2 Hit Impact"] =
427  ( ( it->second )["MEM_LOAD_RETIRED:L1D_LINE_MISS"] - ( it->second )["MEM_LOAD_RETIRED:L2_LINE_MISS"] ) *
429  ( it->second )["L1 DTLB Miss Impact"] = ( it->second )["MEM_LOAD_RETIRED:DTLB_MISS"] * CORE_L1_DTLB_MISS_CYCLES;
430  ( it->second )["LCP Stalls Impact"] = ( it->second )["ILD_STALL"] * CORE_LCP_STALL_CYCLES;
431  ( it->second )["Loads Blocked by Unknown Address Store Impact"] =
432  ( it->second )["LOAD_BLOCK:STA"] * CORE_UNKNOWN_ADDR_STORE_CYCLES;
433  ( it->second )["Loads Overlapped with Stores Impact"] =
434  ( it->second )["LOAD_BLOCK:OVERLAP_STORE"] * CORE_OVERLAPPING_CYCLES;
435  ( it->second )["Loads Spanning across Cache Lines Impact"] =
436  ( it->second )["LOAD_BLOCK:UNTIL_RETIRE"] * CORE_SPAN_ACROSS_CACHE_LINE_CYCLES;
437  ( it->second )["Store-Fwd Stalls Impact"] = ( it->second )["Loads Blocked by Unknown Address Store Impact"] +
438  ( it->second )["Loads Overlapped with Stores Impact"] +
439  ( it->second )["Loads Spanning across Cache Lines Impact"];
440  ( it->second )["Counted Stalled Cycles"] =
441  ( it->second )["L2 Miss Impact"] + ( it->second )["L2 Hit Impact"] + ( it->second )["LCP Stalls Impact"] +
442  ( it->second )["L1 DTLB Miss Impact"] + ( it->second )["Store-Fwd Stalls Impact"];
443  ( it->second )["Instructions Retired"] = ( it->second )["INSTRUCTIONS_RETIRED"];
444  ( it->second )["ITLB Miss Rate in %"] =
445  ( ( it->second )["ITLB_MISS_RETIRED"] / ( it->second )["INSTRUCTIONS_RETIRED"] ) * 100;
446  ( it->second )["Branch Instructions"] = ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"];
447  ( it->second )["Load Instructions"] = ( it->second )["INST_RETIRED:LOADS"];
448  ( it->second )["Store Instructions"] = ( it->second )["INST_RETIRED:STORES"];
449  ( it->second )["Other Instructions"] = ( it->second )["INST_RETIRED:OTHER"] -
450  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] -
451  ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"];
452  ( it->second )["% of Mispredicted Branches"] =
453  ( ( it->second )["MISPREDICTED_BRANCH_RETIRED"] / ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"] ) * 100;
454  ( it->second )["Packed SIMD Computational Instructions"] =
455  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"];
456  ( it->second )["Counted Instructions Retired"] =
457  ( it->second )["Branch Instructions"] + ( it->second )["Load Instructions"] +
458  ( it->second )["Store Instructions"] + ( it->second )["Other Instructions"] +
459  ( it->second )["Packed SIMD Computational Instructions"];
460  ( it->second )["CPI"] = ( it->second )["UNHALTED_CORE_CYCLES"] / ( it->second )["INSTRUCTIONS_RETIRED"];
461 
462  double localPerformanceImprovement = ( it->second )["CPI"] / EXPECTED_CPI;
463  double cyclesAfterImprovement = ( it->second )["UNHALTED_CORE_CYCLES"] / localPerformanceImprovement;
464  double totalCyclesAfterImprovement = totalCycles - ( it->second )["UNHALTED_CORE_CYCLES"] + cyclesAfterImprovement;
465  ( it->second )["iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
466 
467  ( it->second )["% of Total Cycles"] =
468  ( it->second )["RS_UOPS_DISPATCHED CMASK=1 INV=1"] * 100 / ( it->second )["UNHALTED_CORE_CYCLES"];
469  ( it->second )["L2 Miss % of counted Stalled Cycles"] =
470  ( it->second )["L2 Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
471  ( it->second )["L2 Hit % of counted Stalled Cycles"] =
472  ( it->second )["L2 Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
473  ( it->second )["L1 DTLB Miss % of counted Stalled Cycles"] =
474  ( it->second )["L1 DTLB Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
475  ( it->second )["LCP Stalls % of counted Stalled Cycles"] =
476  ( it->second )["LCP Stalls Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
477  ( it->second )["Store-Fwd Stalls % of counted Stalled Cycles"] =
478  ( it->second )["Store-Fwd Stalls Impact"] * 100 / ( it->second )["Counted Stalled Cycles"];
479  ( it->second )["Loads Blocked % of Store-Fwd Stalls Cycles"] =
480  ( it->second )["Loads Blocked by Unknown Address Store Impact"] * 100 /
481  ( it->second )["Store-Fwd Stalls Impact"];
482  ( it->second )["Loads Overlapped % of Store-Fwd Stalls Cycles"] =
483  ( it->second )["Loads Overlapped with Stores Impact"] * 100 / ( it->second )["Store-Fwd Stalls Impact"];
484  ( it->second )["Loads Spanning % of Store-Fwd Stalls Cycles"] =
485  ( it->second )["Loads Spanning across Cache Lines Impact"] * 100 / ( it->second )["Store-Fwd Stalls Impact"];
486 
487  ( it->second )["Load % of all Instructions"] =
488  ( it->second )["INST_RETIRED:LOADS"] * 100 / ( it->second )["Counted Instructions Retired"];
489  ( it->second )["Store % of all Instructions"] =
490  ( it->second )["INST_RETIRED:STORES"] * 100 / ( it->second )["Counted Instructions Retired"];
491  ( it->second )["Branch % of all Instructions"] =
492  ( it->second )["BRANCH_INSTRUCTIONS_RETIRED"] * 100 / ( it->second )["Counted Instructions Retired"];
493  ( it->second )["Packed SIMD % of all Instructions"] =
494  ( it->second )["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] * 100 /
495  ( it->second )["Counted Instructions Retired"];
496  ( it->second )["Other % of all Instructions"] =
497  ( it->second )["Other Instructions"] * 100 / ( it->second )["Counted Instructions Retired"];
498  }
499 }

◆ calc_nhm_deriv_values()

void calc_nhm_deriv_values ( double  totalCycles)

Definition at line 606 of file pfm_gen_analysis.cpp.

606  {
607  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
608  ++it ) {
609  ( it->second )["Total Cycles"] = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
610 
611  ( it->second )["L2 Hit Impact"] = ( it->second )["MEM_LOAD_RETIRED:L2_HIT"] * I7_L2_HIT_CYCLES;
612  ( it->second )["L3 Unshared Hit Impact"] =
613  ( it->second )["MEM_LOAD_RETIRED:L3_UNSHARED_HIT"] * I7_L3_UNSHARED_HIT_CYCLES;
614  if ( ( it->second )["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] >
615  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) {
616  ( it->second )["L2 Other Core Hit Impact"] = ( ( it->second )["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] -
617  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) *
619  } else {
620  ( it->second )["L2 Other Core Hit Impact"] = 0.0;
621  }
622  ( it->second )["L2 Other Core Hit Modified Impact"] =
623  ( it->second )["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] * I7_OTHER_CORE_L2_HITM_CYCLES;
624  ( it->second )["L3 Miss -> Local DRAM Hit Impact"] =
625  ( it->second )["MEM_UNCORE_RETIRED:LOCAL_DRAM"] * I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES;
626  ( it->second )["L3 Miss -> Remote DRAM Hit Impact"] =
627  ( it->second )["MEM_UNCORE_RETIRED:REMOTE_DRAM"] * I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES;
628  ( it->second )["L3 Miss -> Remote Cache Hit Impact"] =
629  ( it->second )["MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT"] * I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES;
630  ( it->second )["L3 Miss -> Total Impact"] = ( it->second )["L3 Miss -> Local DRAM Hit Impact"] +
631  ( it->second )["L3 Miss -> Remote DRAM Hit Impact"] +
632  ( it->second )["L3 Miss -> Remote Cache Hit Impact"];
633  ( it->second )["L1 DTLB Miss Impact"] =
634  ( it->second )["DTLB_LOAD_MISSES:WALK_COMPLETED"] * I7_L1_DTLB_WALK_COMPLETED_CYCLES;
635  ( it->second )["Counted Stalled Cycles due to Load Ops"] =
636  ( it->second )["L3 Miss -> Total Impact"] + ( it->second )["L2 Hit Impact"] +
637  ( it->second )["L1 DTLB Miss Impact"] + ( it->second )["L3 Unshared Hit Impact"] +
638  ( it->second )["L2 Other Core Hit Modified Impact"] + ( it->second )["L2 Other Core Hit Impact"];
639  ( it->second )["Cycles spent during DIV & SQRT Ops"] = ( it->second )["ARITH:CYCLES_DIV_BUSY"];
640  ( it->second )["Total Counted Stalled Cycles"] =
641  ( it->second )["Counted Stalled Cycles due to Load Ops"] + ( it->second )["Cycles spent during DIV & SQRT Ops"];
642  ( it->second )["Stalled Cycles"] =
643  ( it->second )["Total Counted Stalled Cycles"]; // TO BE FIXED when UOPS_EXECUTED:0x3f is fixed!!
644  ( it->second )["% of Total Cycles"] =
645  ( it->second )["Stalled Cycles"] * 100 / ( it->second )["CPU_CLK_UNHALTED:THREAD_P"]; // TO BE FIXED!! see above
646  ( it->second )["L3 Miss % of Load Stalls"] =
647  ( it->second )["L3 Miss -> Total Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
648  ( it->second )["L2 Hit % of Load Stalls"] =
649  ( it->second )["L2 Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
650  ( it->second )["L1 DTLB Miss % of Load Stalls"] =
651  ( it->second )["L1 DTLB Miss Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
652  ( it->second )["L3 Unshared Hit % of Load Stalls"] =
653  ( it->second )["L3 Unshared Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
654  ( it->second )["L2 Other Core Hit % of Load Stalls"] =
655  ( it->second )["L2 Other Core Hit Impact"] * 100 / ( it->second )["Counted Stalled Cycles due to Load Ops"];
656  ( it->second )["L2 Other Core Hit Modified % of Load Stalls"] =
657  ( it->second )["L2 Other Core Hit Modified Impact"] * 100 /
658  ( it->second )["Counted Stalled Cycles due to Load Ops"];
659  ( it->second )["DIV & SQRT Ops % of counted Stalled Cycles"] =
660  ( it->second )["Cycles spent during DIV & SQRT Ops"] * 100 / ( it->second )["Total Counted Stalled Cycles"];
661 
662  ( it->second )["Cycles IFETCH served by Local DRAM"] =
663  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT;
664  ( it->second )["Cycles IFETCH served by L3 (Modified)"] =
665  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * I7_IFETCH_L2_MISS_L3_HITM;
666  ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] =
667  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * I7_IFETCH_L2_MISS_L3_HIT_SNOOP;
668  ( it->second )["Cycles IFETCH served by Remote L2"] =
669  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD;
670  ( it->second )["Cycles IFETCH served by Remote DRAM"] =
671  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT;
672  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"] =
673  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP;
674  ( it->second )["Total L2 IFETCH miss Impact"] =
675  ( it->second )["Cycles IFETCH served by Local DRAM"] + ( it->second )["Cycles IFETCH served by L3 (Modified)"] +
676  ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] +
677  ( it->second )["Cycles IFETCH served by Remote L2"] + ( it->second )["Cycles IFETCH served by Remote DRAM"] +
678  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"];
679  ( it->second )["Local DRAM IFECTHes % Impact"] =
680  ( it->second )["Cycles IFETCH served by Local DRAM"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
681  ( it->second )["L3 (Modified) IFECTHes % Impact"] =
682  ( it->second )["Cycles IFETCH served by L3 (Modified)"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
683  ( it->second )["L3 (Clean Snoop) IFECTHes % Impact"] = ( it->second )["Cycles IFETCH served by L3 (Clean Snoop)"] *
684  100 / ( it->second )["Total L2 IFETCH miss Impact"];
685  ( it->second )["Remote L2 IFECTHes % Impact"] =
686  ( it->second )["Cycles IFETCH served by Remote L2"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
687  ( it->second )["Remote DRAM IFECTHes % Impact"] =
688  ( it->second )["Cycles IFETCH served by Remote DRAM"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
689  ( it->second )["L3 (No Snoop) IFECTHes % Impact"] =
690  ( it->second )["Cycles IFETCH served by L3 (No Snoop)"] * 100 / ( it->second )["Total L2 IFETCH miss Impact"];
691  ( it->second )["Total L2 IFETCH misses"] = ( it->second )["L2_RQSTS:IFETCH_MISS"];
692  ( it->second )["% of IFETCHes served by Local DRAM"] =
693  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
694  ( it->second )["% of IFETCHes served by L3 (Modified)"] =
695  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
696  ( it->second )["% of IFETCHes served by L3 (Clean Snoop)"] =
697  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * 100 /
698  ( it->second )["L2_RQSTS:IFETCH_MISS"];
699  ( it->second )["% of IFETCHes served by Remote L2"] =
700  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * 100 /
701  ( it->second )["L2_RQSTS:IFETCH_MISS"];
702  ( it->second )["% of IFETCHes served by Remote DRAM"] =
703  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
704  ( it->second )["% of IFETCHes served by L3 (No Snoop)"] =
705  ( it->second )["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * 100 / ( it->second )["L2_RQSTS:IFETCH_MISS"];
706  ( it->second )["% of L2 IFETCH misses"] =
707  ( it->second )["L2_RQSTS:IFETCH_MISS"] * 100 /
708  ( ( it->second )["L2_RQSTS:IFETCH_MISS"] + ( it->second )["L2_RQSTS:IFETCH_HIT"] );
709  ( it->second )["L1 ITLB Miss Impact"] =
710  ( it->second )["ITLB_MISSES:WALK_COMPLETED"] * I7_L1_ITLB_WALK_COMPLETED_CYCLES;
711 
712  ( it->second )["Total Branch Instructions Executed"] = ( it->second )["BR_INST_EXEC:ANY"];
713  ( it->second )["% of Mispredicted Branches"] =
714  ( it->second )["BR_MISP_EXEC:ANY"] * 100 / ( it->second )["BR_INST_EXEC:ANY"];
715  ( it->second )["Direct Near Calls % of Total Branches Executed"] =
716  ( it->second )["BR_INST_EXEC:DIRECT_NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
717  ( it->second )["Indirect Near Calls % of Total Branches Executed"] =
718  ( it->second )["BR_INST_EXEC:INDIRECT_NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
719  ( it->second )["Indirect Near Non-Calls % of Total Branches Executed"] =
720  ( it->second )["BR_INST_EXEC:INDIRECT_NON_CALL"] * 100 / ( it->second )["Total Branch Instructions Executed"];
721  ( it->second )["All Near Calls % of Total Branches Executed"] =
722  ( it->second )["BR_INST_EXEC:NEAR_CALLS"] * 100 / ( it->second )["Total Branch Instructions Executed"];
723  ( it->second )["All Non Calls % of Total Branches Executed"] =
724  ( it->second )["BR_INST_EXEC:NON_CALLS"] * 100 / ( it->second )["Total Branch Instructions Executed"];
725  ( it->second )["All Returns % of Total Branches Executed"] =
726  ( it->second )["BR_INST_EXEC:RETURN_NEAR"] * 100 / ( it->second )["Total Branch Instructions Executed"];
727  ( it->second )["Total Branch Instructions Retired"] = ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
728  ( it->second )["Conditionals % of Total Branches Retired"] =
729  ( it->second )["BR_INST_RETIRED:CONDITIONAL"] * 100 / ( it->second )["Total Branch Instructions Retired"];
730  ( it->second )["Near Calls % of Total Branches Retired"] =
731  ( it->second )["BR_INST_RETIRED:NEAR_CALL"] * 100 / ( it->second )["Total Branch Instructions Retired"];
732 
733  ( it->second )["Instruction Starvation % of Total Cycles"] =
734  ( ( it->second )["UOPS_ISSUED:ANY CMASK=1 INV=1"] - ( it->second )["RESOURCE_STALLS:ANY"] ) * 100 /
735  ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
736  ( it->second )["% of Total Cycles spent handling FP exceptions"] =
737  ( it->second )["UOPS_DECODED:MS CMASK=1"] * 100 / ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
738  ( it->second )["# of Instructions per Call"] =
739  ( it->second )["INST_RETIRED:ANY_P"] / ( it->second )["BR_INST_EXEC:NEAR_CALLS"];
740 
741  ( it->second )["Instructions Retired"] = ( it->second )["INST_RETIRED:ANY_P"];
742  ( it->second )["ITLB Miss Rate in %"] =
743  ( ( it->second )["ITLB_MISS_RETIRED"] / ( it->second )["INST_RETIRED:ANY_P"] ) * 100;
744 
745  ( it->second )["Branch Instructions"] = ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
746  ( it->second )["Load Instructions"] = ( it->second )["MEM_INST_RETIRED:LOADS"];
747  ( it->second )["Store Instructions"] = ( it->second )["MEM_INST_RETIRED:STORES"];
748  ( it->second )["Other Instructions"] =
749  ( it->second )["Instructions Retired"] - ( it->second )["MEM_INST_RETIRED:LOADS"] -
750  ( it->second )["MEM_INST_RETIRED:STORES"] - ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"];
751  ( it->second )["Packed UOPS Retired"] =
752  ( it->second )["SSEX_UOPS_RETIRED:PACKED_DOUBLE"] + ( it->second )["SSEX_UOPS_RETIRED:PACKED_SINGLE"];
753  ( it->second )["CPI"] = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] / ( it->second )["INST_RETIRED:ANY_P"];
754 
755  double localPerformanceImprovement = ( it->second )["CPI"] / EXPECTED_CPI;
756  double cyclesAfterImprovement = ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] / localPerformanceImprovement;
757  double totalCyclesAfterImprovement =
758  totalCycles - ( it->second )["CPU_CLK_UNHALTED:THREAD_P"] + cyclesAfterImprovement;
759  ( it->second )["iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
760 
761  ( it->second )["Load % of all Instructions"] =
762  ( it->second )["MEM_INST_RETIRED:LOADS"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
763  ( it->second )["Store % of all Instructions"] =
764  ( it->second )["MEM_INST_RETIRED:STORES"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
765  ( it->second )["Branch % of all Instructions"] =
766  ( it->second )["BR_INST_RETIRED:ALL_BRANCHES"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
767  ( it->second )["Other % of all Instructions"] =
768  ( it->second )["Other Instructions"] * 100 / ( it->second )["INST_RETIRED:ANY_P"];
769 
770  ( it->second )["Packed % of all UOPS Retired"] =
771  ( it->second )["Packed UOPS Retired"] * 100 / ( it->second )["UOPS_RETIRED:ANY"];
772  }
773 }

◆ calc_post_deriv_values()

void calc_post_deriv_values ( )

Definition at line 1573 of file pfm_gen_analysis.cpp.

1573  {
1574  if ( nehalem ) {
1575  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1576  ++it ) {
1577  double simdnorm =
1578  1. - normalize( "Packed % of all UOPS Retired", ( it->second )["Packed % of all UOPS Retired"], 1 );
1579  double misspnorm = normalize( "% of Mispredicted Branches", ( it->second )["% of Mispredicted Branches"], 1 );
1580  double stallnorm = normalize( "Stalled Cycles", ( it->second )["Stalled Cycles"], 1 );
1581  ( it->second )["iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1582  }
1583  } else {
1584  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1585  ++it ) {
1586  double simdnorm =
1587  1. - normalize( "Packed SIMD % of all Instructions", ( it->second )["Packed SIMD % of all Instructions"], 1 );
1588  double misspnorm = normalize( "% of Mispredicted Branches", ( it->second )["% of Mispredicted Branches"], 1 );
1589  double stallnorm = normalize( "Stalled Cycles", ( it->second )["Stalled Cycles"], 1 );
1590  ( it->second )["iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1591  }
1592  }
1593 }

◆ check_for_core_caa_events()

bool check_for_core_caa_events ( )

Definition at line 351 of file pfm_gen_analysis.cpp.

351  {
352  for ( std::vector<std::string>::const_iterator it = core_caa_events.begin(); it != core_caa_events.end(); ++it ) {
353  if ( find( C_events.begin(), C_events.end(), ( *it ) ) == C_events.end() ) {
354  fprintf( stderr, "ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
355  return false;
356  }
357  }
358  return true;
359 }

◆ check_for_nhm_caa_events()

bool check_for_nhm_caa_events ( )

Definition at line 361 of file pfm_gen_analysis.cpp.

361  {
362  for ( std::vector<std::string>::const_iterator it = nhm_caa_events.begin(); it != nhm_caa_events.end(); ++it ) {
363  if ( find( C_events.begin(), C_events.end(), ( *it ) ) == C_events.end() ) {
364  fprintf( stderr, "ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
365  return false;
366  }
367  }
368  return true;
369 }

◆ finalize_S_html_pages()

int finalize_S_html_pages ( const char *  dir)

Definition at line 1362 of file pfm_gen_analysis.cpp.

1362  {
1363  for ( std::map<std::string, unsigned int>::const_iterator i = modules_tot_samples.begin();
1364  i != modules_tot_samples.end(); ++i ) {
1365  char module_filename[MAX_FILENAME_LENGTH];
1366  strcpy( module_filename, dir );
1367  strcat( module_filename, "/HTML/" );
1368  strcat( module_filename, ( i->first ).c_str() );
1369  strcat( module_filename, ".html" );
1370  FILE* module_file = fopen( module_filename, "a" );
1371  if ( module_file == NULL ) {
1372  fprintf( stderr, "ERROR: Unable to append to file: %s\naborting...\n", module_filename );
1373  exit( 1 );
1374  }
1375  fprintf( module_file, "</body>\n</html>\n" );
1376  if ( fclose( module_file ) ) {
1377  fprintf( stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1378  exit( 1 );
1379  }
1380  }
1381  return 0;
1382 }

◆ func_name()

const char* func_name ( const char *  demangled_symbol)

Definition at line 868 of file pfm_gen_analysis.cpp.

868  {
869  char* operator_string_begin = const_cast<char*>( strstr( demangled_symbol, "operator" ) );
870  if ( operator_string_begin != NULL ) {
871  char* operator_string_end = operator_string_begin + 8;
872  while ( *operator_string_end == ' ' ) operator_string_end++;
873  if ( strstr( operator_string_end, "delete[]" ) == operator_string_end ) {
874  operator_string_end += 8;
875  *operator_string_end = '\0';
876  } else if ( strstr( operator_string_end, "delete" ) == operator_string_end ) {
877  operator_string_end += 6;
878  *operator_string_end = '\0';
879  } else if ( strstr( operator_string_end, "new[]" ) == operator_string_end ) {
880  operator_string_end += 5;
881  *operator_string_end = '\0';
882  } else if ( strstr( operator_string_end, "new" ) == operator_string_end ) {
883  operator_string_end += 3;
884  *operator_string_end = '\0';
885  } else if ( strstr( operator_string_end, ">>=" ) == operator_string_end ) {
886  operator_string_end += 3;
887  *operator_string_end = '\0';
888  } else if ( strstr( operator_string_end, "<<=" ) == operator_string_end ) {
889  operator_string_end += 3;
890  *operator_string_end = '\0';
891  } else if ( strstr( operator_string_end, "->*" ) == operator_string_end ) {
892  operator_string_end += 3;
893  *operator_string_end = '\0';
894  } else if ( strstr( operator_string_end, "<<" ) == operator_string_end ) {
895  operator_string_end += 2;
896  *operator_string_end = '\0';
897  } else if ( strstr( operator_string_end, ">>" ) == operator_string_end ) {
898  operator_string_end += 2;
899  *operator_string_end = '\0';
900  } else if ( strstr( operator_string_end, ">=" ) == operator_string_end ) {
901  operator_string_end += 2;
902  *operator_string_end = '\0';
903  } else if ( strstr( operator_string_end, "<=" ) == operator_string_end ) {
904  operator_string_end += 2;
905  *operator_string_end = '\0';
906  } else if ( strstr( operator_string_end, "==" ) == operator_string_end ) {
907  operator_string_end += 2;
908  *operator_string_end = '\0';
909  } else if ( strstr( operator_string_end, "!=" ) == operator_string_end ) {
910  operator_string_end += 2;
911  *operator_string_end = '\0';
912  } else if ( strstr( operator_string_end, "|=" ) == operator_string_end ) {
913  operator_string_end += 2;
914  *operator_string_end = '\0';
915  } else if ( strstr( operator_string_end, "&=" ) == operator_string_end ) {
916  operator_string_end += 2;
917  *operator_string_end = '\0';
918  } else if ( strstr( operator_string_end, "^=" ) == operator_string_end ) {
919  operator_string_end += 2;
920  *operator_string_end = '\0';
921  } else if ( strstr( operator_string_end, "%=" ) == operator_string_end ) {
922  operator_string_end += 2;
923  *operator_string_end = '\0';
924  } else if ( strstr( operator_string_end, "/=" ) == operator_string_end ) {
925  operator_string_end += 2;
926  *operator_string_end = '\0';
927  } else if ( strstr( operator_string_end, "*=" ) == operator_string_end ) {
928  operator_string_end += 2;
929  *operator_string_end = '\0';
930  } else if ( strstr( operator_string_end, "-=" ) == operator_string_end ) {
931  operator_string_end += 2;
932  *operator_string_end = '\0';
933  } else if ( strstr( operator_string_end, "+=" ) == operator_string_end ) {
934  operator_string_end += 2;
935  *operator_string_end = '\0';
936  } else if ( strstr( operator_string_end, "&&" ) == operator_string_end ) {
937  operator_string_end += 2;
938  *operator_string_end = '\0';
939  } else if ( strstr( operator_string_end, "||" ) == operator_string_end ) {
940  operator_string_end += 2;
941  *operator_string_end = '\0';
942  } else if ( strstr( operator_string_end, "[]" ) == operator_string_end ) {
943  operator_string_end += 2;
944  *operator_string_end = '\0';
945  } else if ( strstr( operator_string_end, "()" ) == operator_string_end ) {
946  operator_string_end += 2;
947  *operator_string_end = '\0';
948  } else if ( strstr( operator_string_end, "++" ) == operator_string_end ) {
949  operator_string_end += 2;
950  *operator_string_end = '\0';
951  } else if ( strstr( operator_string_end, "--" ) == operator_string_end ) {
952  operator_string_end += 2;
953  *operator_string_end = '\0';
954  } else if ( strstr( operator_string_end, "->" ) == operator_string_end ) {
955  operator_string_end += 2;
956  *operator_string_end = '\0';
957  } else if ( strstr( operator_string_end, "<" ) == operator_string_end ) {
958  operator_string_end += 1;
959  *operator_string_end = '\0';
960  } else if ( strstr( operator_string_end, ">" ) == operator_string_end ) {
961  operator_string_end += 1;
962  *operator_string_end = '\0';
963  } else if ( strstr( operator_string_end, "~" ) == operator_string_end ) {
964  operator_string_end += 1;
965  *operator_string_end = '\0';
966  } else if ( strstr( operator_string_end, "!" ) == operator_string_end ) {
967  operator_string_end += 1;
968  *operator_string_end = '\0';
969  } else if ( strstr( operator_string_end, "+" ) == operator_string_end ) {
970  operator_string_end += 1;
971  *operator_string_end = '\0';
972  } else if ( strstr( operator_string_end, "-" ) == operator_string_end ) {
973  operator_string_end += 1;
974  *operator_string_end = '\0';
975  } else if ( strstr( operator_string_end, "*" ) == operator_string_end ) {
976  operator_string_end += 1;
977  *operator_string_end = '\0';
978  } else if ( strstr( operator_string_end, "/" ) == operator_string_end ) {
979  operator_string_end += 1;
980  *operator_string_end = '\0';
981  } else if ( strstr( operator_string_end, "%" ) == operator_string_end ) {
982  operator_string_end += 1;
983  *operator_string_end = '\0';
984  } else if ( strstr( operator_string_end, "^" ) == operator_string_end ) {
985  operator_string_end += 1;
986  *operator_string_end = '\0';
987  } else if ( strstr( operator_string_end, "&" ) == operator_string_end ) {
988  operator_string_end += 1;
989  *operator_string_end = '\0';
990  } else if ( strstr( operator_string_end, "|" ) == operator_string_end ) {
991  operator_string_end += 1;
992  *operator_string_end = '\0';
993  } else if ( strstr( operator_string_end, "," ) == operator_string_end ) {
994  operator_string_end += 1;
995  *operator_string_end = '\0';
996  } else if ( strstr( operator_string_end, "=" ) == operator_string_end ) {
997  operator_string_end += 1;
998  *operator_string_end = '\0';
999  }
1000  return operator_string_begin;
1001  }
1002  char* end_of_demangled_name = const_cast<char*>( strrchr( demangled_symbol, ')' ) );
1003  if ( end_of_demangled_name != NULL ) {
1004  int pars = 1;
1005  char c;
1006  while ( pars > 0 && end_of_demangled_name != demangled_symbol ) {
1007  c = *( --end_of_demangled_name );
1008  if ( c == ')' ) {
1009  pars++;
1010  } else if ( c == '(' ) {
1011  pars--;
1012  }
1013  }
1014  } else {
1015  return demangled_symbol;
1016  }
1017  char* end_of_func_name = end_of_demangled_name;
1018  if ( end_of_func_name != NULL ) {
1019  *end_of_func_name = '\0';
1020  char c = *( --end_of_func_name );
1021  if ( c == '>' ) {
1022  int pars = 1;
1023  while ( pars > 0 && end_of_func_name != demangled_symbol ) {
1024  c = *( --end_of_func_name );
1025  if ( c == '>' ) {
1026  pars++;
1027  } else if ( c == '<' ) {
1028  pars--;
1029  }
1030  }
1031  *end_of_func_name = '\0';
1032  }
1033  c = *( --end_of_func_name );
1034  while ( isalnum( c ) || c == '_' || c == '~' ) { c = *( --end_of_func_name ); }
1035  return ++end_of_func_name;
1036  }
1037  return demangled_symbol;
1038 }

◆ getTotalCycles()

double getTotalCycles ( )

Definition at line 1599 of file pfm_gen_analysis.cpp.

1599  {
1600  double sum = 0;
1601  if ( nehalem ) {
1602  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1603  ++it ) {
1604  sum += ( it->second )["CPU_CLK_UNHALTED:THREAD_P"];
1605  }
1606  } else {
1607  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1608  ++it ) {
1609  sum += ( it->second )["UNHALTED_CORE_CYCLES"];
1610  }
1611  }
1612  return sum;
1613 }

◆ html_special_chars()

void html_special_chars ( const char *  s,
char *  s_mod 
)

Definition at line 837 of file pfm_gen_analysis.cpp.

837  {
838  int n = strlen( s );
839  *s_mod = '\0';
840  for ( int i = 0; i < n; i++ ) {
841  switch ( s[i] ) {
842  case '<':
843  strcat( s_mod, "&lt;" );
844  break;
845  case '>':
846  strcat( s_mod, "&gt;" );
847  break;
848  case '&':
849  strcat( s_mod, "&amp;" );
850  break;
851  case '"':
852  strcat( s_mod, "&quot;" );
853  break;
854  default:
855  char to_app[2];
856  to_app[0] = s[i];
857  to_app[1] = '\0';
858  strcat( s_mod, to_app );
859  break;
860  }
861  }
862  return;
863 }

◆ init_core_caa_events()

void init_core_caa_events ( )

Definition at line 283 of file pfm_gen_analysis.cpp.

283  {
284  core_caa_events.push_back( "BRANCH_INSTRUCTIONS_RETIRED" );
285  core_caa_events.push_back( "ILD_STALL" );
286  core_caa_events.push_back( "INST_RETIRED:LOADS" );
287  core_caa_events.push_back( "INST_RETIRED:OTHER" );
288  core_caa_events.push_back( "INST_RETIRED:STORES" );
289  core_caa_events.push_back( "INSTRUCTIONS_RETIRED" );
290  core_caa_events.push_back( "LOAD_BLOCK:OVERLAP_STORE" );
291  core_caa_events.push_back( "LOAD_BLOCK:STA" );
292  core_caa_events.push_back( "LOAD_BLOCK:UNTIL_RETIRE" );
293  core_caa_events.push_back( "MEM_LOAD_RETIRED:DTLB_MISS" );
294  core_caa_events.push_back( "MEM_LOAD_RETIRED:L1D_LINE_MISS" );
295  core_caa_events.push_back( "MEM_LOAD_RETIRED:L2_LINE_MISS" );
296  core_caa_events.push_back( "MISPREDICTED_BRANCH_RETIRED" );
297  // core_caa_events.push_back("RS_UOPS_DISPATCHED");
298  // core_caa_events.push_back("RS_UOPS_DISPATCHED CMASK=1");
299  core_caa_events.push_back( "RS_UOPS_DISPATCHED CMASK=1 INV=1" );
300  core_caa_events.push_back( "SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE" );
301  core_caa_events.push_back( "UNHALTED_CORE_CYCLES" );
302  // core_caa_events.push_back("UOPS_RETIRED:ANY");
303  // core_caa_events.push_back("UOPS_RETIRED:FUSED");
304  // core_caa_events.push_back("IDLE_DURING_DIV");
305 }

◆ init_core_caa_events_displ()

void init_core_caa_events_displ ( )

Definition at line 371 of file pfm_gen_analysis.cpp.

371  {
372  core_caa_events_displ.push_back( "Total Cycles" );
373  core_caa_events_displ.push_back( "Stalled Cycles" );
374  core_caa_events_displ.push_back( "% of Total Cycles" );
375  core_caa_events_displ.push_back( "Instructions Retired" );
376  core_caa_events_displ.push_back( "CPI" );
377  core_caa_events_displ.push_back( "" );
378  core_caa_events_displ.push_back( "iMargin" );
379  core_caa_events_displ.push_back( "iFactor" );
380  core_caa_events_displ.push_back( "" );
381  core_caa_events_displ.push_back( "Counted Stalled Cycles" );
382  core_caa_events_displ.push_back( "" );
383  core_caa_events_displ.push_back( "L2 Miss Impact" );
384  core_caa_events_displ.push_back( "L2 Miss % of counted Stalled Cycles" );
385  core_caa_events_displ.push_back( "" );
386  core_caa_events_displ.push_back( "L2 Hit Impact" );
387  core_caa_events_displ.push_back( "L2 Hit % of counted Stalled Cycles" );
388  core_caa_events_displ.push_back( "" );
389  core_caa_events_displ.push_back( "L1 DTLB Miss Impact" );
390  core_caa_events_displ.push_back( "L1 DTLB Miss % of counted Stalled Cycles" );
391  core_caa_events_displ.push_back( "" );
392  core_caa_events_displ.push_back( "LCP Stalls Impact" );
393  core_caa_events_displ.push_back( "LCP Stalls % of counted Stalled Cycles" );
394  core_caa_events_displ.push_back( "" );
395  core_caa_events_displ.push_back( "Store-Fwd Stalls Impact" );
396  core_caa_events_displ.push_back( "Store-Fwd Stalls % of counted Stalled Cycles" );
397  core_caa_events_displ.push_back( "" );
398  core_caa_events_displ.push_back( "Loads Blocked by Unknown Address Store Impact" );
399  core_caa_events_displ.push_back( "Loads Blocked % of Store-Fwd Stalls Cycles" );
400  core_caa_events_displ.push_back( "Loads Overlapped with Stores Impact" );
401  core_caa_events_displ.push_back( "Loads Overlapped % of Store-Fwd Stalls Cycles" );
402  core_caa_events_displ.push_back( "Loads Spanning across Cache Lines Impact" );
403  core_caa_events_displ.push_back( "Loads Spanning % of Store-Fwd Stalls Cycles" );
404  core_caa_events_displ.push_back( "" );
405  core_caa_events_displ.push_back( "Load Instructions" );
406  core_caa_events_displ.push_back( "Load % of all Instructions" );
407  core_caa_events_displ.push_back( "Store Instructions" );
408  core_caa_events_displ.push_back( "Store % of all Instructions" );
409  core_caa_events_displ.push_back( "Branch Instructions" );
410  core_caa_events_displ.push_back( "Branch % of all Instructions" );
411  core_caa_events_displ.push_back( "Packed SIMD Computational Instructions" );
412  core_caa_events_displ.push_back( "Packed SIMD % of all Instructions" );
413  core_caa_events_displ.push_back( "Other Instructions" );
414  core_caa_events_displ.push_back( "Other % of all Instructions" );
415  core_caa_events_displ.push_back( "" );
416  core_caa_events_displ.push_back( "ITLB Miss Rate in %" );
417  core_caa_events_displ.push_back( "% of Mispredicted Branches" );
418 }

◆ init_nhm_caa_events()

void init_nhm_caa_events ( )

Definition at line 307 of file pfm_gen_analysis.cpp.

307  {
308  nhm_caa_events.push_back( "ARITH:CYCLES_DIV_BUSY" );
309  nhm_caa_events.push_back( "BR_INST_EXEC:ANY" );
310  nhm_caa_events.push_back( "BR_INST_EXEC:DIRECT_NEAR_CALL" );
311  nhm_caa_events.push_back( "BR_INST_EXEC:INDIRECT_NEAR_CALL" );
312  nhm_caa_events.push_back( "BR_INST_EXEC:INDIRECT_NON_CALL" );
313  nhm_caa_events.push_back( "BR_INST_EXEC:NEAR_CALLS" );
314  nhm_caa_events.push_back( "BR_INST_EXEC:NON_CALLS" );
315  nhm_caa_events.push_back( "BR_INST_EXEC:RETURN_NEAR" );
316  nhm_caa_events.push_back( "BR_INST_RETIRED:ALL_BRANCHES" );
317  nhm_caa_events.push_back( "BR_INST_RETIRED:CONDITIONAL" );
318  nhm_caa_events.push_back( "BR_INST_RETIRED:NEAR_CALL" );
319  nhm_caa_events.push_back( "BR_MISP_EXEC:ANY" );
320  nhm_caa_events.push_back( "CPU_CLK_UNHALTED:THREAD_P" );
321  nhm_caa_events.push_back( "DTLB_LOAD_MISSES:WALK_COMPLETED" );
322  nhm_caa_events.push_back( "INST_RETIRED:ANY_P" );
323  nhm_caa_events.push_back( "ITLB_MISSES:WALK_COMPLETED" );
324  nhm_caa_events.push_back( "L2_RQSTS:IFETCH_HIT" );
325  nhm_caa_events.push_back( "L2_RQSTS:IFETCH_MISS" );
326  nhm_caa_events.push_back( "MEM_INST_RETIRED:LOADS" );
327  nhm_caa_events.push_back( "MEM_INST_RETIRED:STORES" );
328  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L2_HIT" );
329  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L3_MISS" );
330  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:L3_UNSHARED_HIT" );
331  nhm_caa_events.push_back( "MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM" );
332  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:LOCAL_DRAM" );
333  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM" );
334  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT" );
335  nhm_caa_events.push_back( "MEM_UNCORE_RETIRED:REMOTE_DRAM" );
336  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM" );
337  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM" );
338  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP" );
339  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD" );
340  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM" );
341  nhm_caa_events.push_back( "OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT" );
342  nhm_caa_events.push_back( "RESOURCE_STALLS:ANY" );
343  nhm_caa_events.push_back( "SSEX_UOPS_RETIRED:PACKED_DOUBLE" );
344  nhm_caa_events.push_back( "SSEX_UOPS_RETIRED:PACKED_SINGLE" );
345  nhm_caa_events.push_back( "UOPS_DECODED:MS CMASK=1" );
346  nhm_caa_events.push_back( "UOPS_ISSUED:ANY CMASK=1 INV=1" );
347  nhm_caa_events.push_back( "ITLB_MISS_RETIRED" );
348  nhm_caa_events.push_back( "UOPS_RETIRED:ANY" );
349 }

◆ init_nhm_caa_events_displ()

void init_nhm_caa_events_displ ( )

Definition at line 501 of file pfm_gen_analysis.cpp.

501  {
502  nhm_caa_events_displ.push_back( "Total Cycles" );
503  nhm_caa_events_displ.push_back( "Instructions Retired" );
504  nhm_caa_events_displ.push_back( "CPI" );
505  nhm_caa_events_displ.push_back( "" );
506  nhm_caa_events_displ.push_back( "iMargin" );
507  nhm_caa_events_displ.push_back( "iFactor" );
508  nhm_caa_events_displ.push_back( "" );
509  nhm_caa_events_displ.push_back( "Stalled Cycles" );
510  nhm_caa_events_displ.push_back( "% of Total Cycles" );
511  nhm_caa_events_displ.push_back( "Total Counted Stalled Cycles" );
512  nhm_caa_events_displ.push_back( "" );
513  nhm_caa_events_displ.push_back( "Instruction Starvation % of Total Cycles" );
514  nhm_caa_events_displ.push_back( "# of Instructions per Call" );
515  nhm_caa_events_displ.push_back( "% of Total Cycles spent handling FP exceptions" );
516  nhm_caa_events_displ.push_back( "" );
517  nhm_caa_events_displ.push_back( "Counted Stalled Cycles due to Load Ops" );
518  nhm_caa_events_displ.push_back( "" );
519  nhm_caa_events_displ.push_back( "L2 Hit Impact" );
520  nhm_caa_events_displ.push_back( "L2 Hit % of Load Stalls" );
521  nhm_caa_events_displ.push_back( "" );
522  nhm_caa_events_displ.push_back( "L3 Unshared Hit Impact" );
523  nhm_caa_events_displ.push_back( "L3 Unshared Hit % of Load Stalls" );
524  nhm_caa_events_displ.push_back( "" );
525  nhm_caa_events_displ.push_back( "L2 Other Core Hit Impact" );
526  nhm_caa_events_displ.push_back( "L2 Other Core Hit % of Load Stalls" );
527  nhm_caa_events_displ.push_back( "" );
528  nhm_caa_events_displ.push_back( "L2 Other Core Hit Modified Impact" );
529  nhm_caa_events_displ.push_back( "L2 Other Core Hit Modified % of Load Stalls" );
530  nhm_caa_events_displ.push_back( "" );
531  nhm_caa_events_displ.push_back( "L3 Miss -> Local DRAM Hit Impact" );
532  nhm_caa_events_displ.push_back( "L3 Miss -> Remote DRAM Hit Impact" );
533  nhm_caa_events_displ.push_back( "L3 Miss -> Remote Cache Hit Impact" );
534  nhm_caa_events_displ.push_back( "L3 Miss -> Total Impact" );
535  nhm_caa_events_displ.push_back( "L3 Miss % of Load Stalls" );
536  nhm_caa_events_displ.push_back( "" );
537  nhm_caa_events_displ.push_back( "L1 DTLB Miss Impact" );
538  nhm_caa_events_displ.push_back( "L1 DTLB Miss % of Load Stalls" );
539  nhm_caa_events_displ.push_back( "" );
540  nhm_caa_events_displ.push_back( "Cycles spent during DIV & SQRT Ops" );
541  nhm_caa_events_displ.push_back( "DIV & SQRT Ops % of counted Stalled Cycles" );
542  nhm_caa_events_displ.push_back( "" );
543  nhm_caa_events_displ.push_back( "Total L2 IFETCH misses" );
544  nhm_caa_events_displ.push_back( "% of L2 IFETCH misses" );
545  nhm_caa_events_displ.push_back( "" );
546  nhm_caa_events_displ.push_back( "% of IFETCHes served by Local DRAM" );
547  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (Modified)" );
548  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (Clean Snoop)" );
549  nhm_caa_events_displ.push_back( "% of IFETCHes served by Remote L2" );
550  nhm_caa_events_displ.push_back( "% of IFETCHes served by Remote DRAM" );
551  nhm_caa_events_displ.push_back( "% of IFETCHes served by L3 (No Snoop)" );
552  nhm_caa_events_displ.push_back( "" );
553  nhm_caa_events_displ.push_back( "Total L2 IFETCH miss Impact" );
554  nhm_caa_events_displ.push_back( "" );
555  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Local DRAM" );
556  nhm_caa_events_displ.push_back( "Local DRAM IFECTHes % Impact" );
557  nhm_caa_events_displ.push_back( "" );
558  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (Modified)" );
559  nhm_caa_events_displ.push_back( "L3 (Modified) IFECTHes % Impact" );
560  nhm_caa_events_displ.push_back( "" );
561  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (Clean Snoop)" );
562  nhm_caa_events_displ.push_back( "L3 (Clean Snoop) IFECTHes % Impact" );
563  nhm_caa_events_displ.push_back( "" );
564  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Remote L2" );
565  nhm_caa_events_displ.push_back( "Remote L2 IFECTHes % Impact" );
566  nhm_caa_events_displ.push_back( "" );
567  nhm_caa_events_displ.push_back( "Cycles IFETCH served by Remote DRAM" );
568  nhm_caa_events_displ.push_back( "Remote DRAM IFECTHes % Impact" );
569  nhm_caa_events_displ.push_back( "" );
570  nhm_caa_events_displ.push_back( "Cycles IFETCH served by L3 (No Snoop)" );
571  nhm_caa_events_displ.push_back( "L3 (No Snoop) IFECTHes % Impact" );
572  nhm_caa_events_displ.push_back( "" );
573  nhm_caa_events_displ.push_back( "Total Branch Instructions Executed" );
574  nhm_caa_events_displ.push_back( "% of Mispredicted Branches" );
575  nhm_caa_events_displ.push_back( "" );
576  nhm_caa_events_displ.push_back( "Direct Near Calls % of Total Branches Executed" );
577  nhm_caa_events_displ.push_back( "Indirect Near Calls % of Total Branches Executed" );
578  nhm_caa_events_displ.push_back( "Indirect Near Non-Calls % of Total Branches Executed" );
579  nhm_caa_events_displ.push_back( "All Near Calls % of Total Branches Executed" );
580  nhm_caa_events_displ.push_back( "All Non Calls % of Total Branches Executed" );
581  nhm_caa_events_displ.push_back( "All Returns % of Total Branches Executed" );
582  nhm_caa_events_displ.push_back( "" );
583  nhm_caa_events_displ.push_back( "Total Branch Instructions Retired" );
584  nhm_caa_events_displ.push_back( "Conditionals % of Total Branches Retired" );
585  nhm_caa_events_displ.push_back( "Near Calls % of Total Branches Retired" );
586  nhm_caa_events_displ.push_back( "" );
587  nhm_caa_events_displ.push_back( "L1 ITLB Miss Impact" );
588  nhm_caa_events_displ.push_back( "ITLB Miss Rate in %" );
589  nhm_caa_events_displ.push_back( "" );
590  nhm_caa_events_displ.push_back( "Branch Instructions" );
591  nhm_caa_events_displ.push_back( "Branch % of all Instructions" );
592  nhm_caa_events_displ.push_back( "" );
593  nhm_caa_events_displ.push_back( "Load Instructions" );
594  nhm_caa_events_displ.push_back( "Load % of all Instructions" );
595  nhm_caa_events_displ.push_back( "" );
596  nhm_caa_events_displ.push_back( "Store Instructions" );
597  nhm_caa_events_displ.push_back( "Store % of all Instructions" );
598  nhm_caa_events_displ.push_back( "" );
599  nhm_caa_events_displ.push_back( "Other Instructions" );
600  nhm_caa_events_displ.push_back( "Other % of all Instructions" );
601  nhm_caa_events_displ.push_back( "" );
602  nhm_caa_events_displ.push_back( "Packed UOPS Retired" );
603  nhm_caa_events_displ.push_back( "Packed % of all UOPS Retired" );
604 }

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 1618 of file pfm_gen_analysis.cpp.

1618  {
1619  if ( argc < 2 || argc > 4 ) {
1620  printf( "\n\nUsage: %s DIRECTORY [--caa] [--csv]\n\n", argv[0] );
1621  exit( 1 );
1622  }
1623 
1624  bool caa = false;
1625  bool csv = false;
1626  for ( int i = 2; i < argc; i++ ) {
1627  if ( !strcmp( argv[i], "--caa" ) ) caa = true;
1628  if ( !strcmp( argv[i], "--csv" ) ) csv = true;
1629  }
1630 
1631  char dir[MAX_FILENAME_LENGTH];
1632  strcpy( dir, argv[1] );
1633  if ( !csv ) {
1634  strcat( dir, "/HTML" );
1635  int res = mkdir( dir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH );
1636  if ( res != 0 ) {
1637  fprintf( stderr, "ERROR: Cannot create directory %s\naborting...\n", dir );
1638  exit( 1 );
1639  }
1640  }
1641 
1642  DIR* dp;
1643  struct dirent* dirp;
1644  int num_of_modules = 0;
1645  if ( ( dp = opendir( argv[1] ) ) == NULL ) {
1646  printf( "Error(%d) opening %s\n", errno, argv[1] );
1647  return errno;
1648  }
1649  while ( ( dirp = readdir( dp ) ) != NULL ) {
1650  if ( strstr( dirp->d_name, "_S_" ) != NULL && strstr( dirp->d_name, ".txt.gz" ) != NULL && !csv ) {
1651  if ( read_S_events( argv[1], dirp->d_name ) ) {
1652  fprintf( stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1653  exit( 1 );
1654  }
1655  }
1656  }
1657  closedir( dp );
1658  sort( S_events.begin(), S_events.end() );
1659  if ( ( dp = opendir( argv[1] ) ) == NULL ) {
1660  printf( "Error(%d) opening %s\n", errno, argv[1] );
1661  return errno;
1662  }
1663  while ( ( dirp = readdir( dp ) ) != NULL ) {
1664  if ( strstr( dirp->d_name, "_S_" ) != NULL && strstr( dirp->d_name, ".txt.gz" ) != NULL && !csv ) {
1665  if ( read_S_file( argv[1], dirp->d_name ) ) {
1666  fprintf( stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1667  exit( 1 );
1668  }
1669  } else if ( strstr( dirp->d_name, "_C_" ) != NULL && strstr( dirp->d_name, ".txt" ) != NULL ) {
1670  int res = read_C_file( argv[1], dirp->d_name );
1671  if ( res > num_of_modules ) { num_of_modules = res; }
1672  }
1673  }
1674  closedir( dp );
1675 
1676  if ( !csv ) {
1677  if ( finalize_S_html_pages( argv[1] ) ) {
1678  fprintf( stderr, "ERROR: Cannot finalize HTML pages!!!\naborting...\n" );
1679  exit( 1 );
1680  }
1681  }
1682 
1683  char filepath[MAX_FILENAME_LENGTH];
1684  bzero( filepath, MAX_FILENAME_LENGTH );
1685  if ( !csv )
1686  sprintf( filepath, "%s/HTML/index.html", argv[1] );
1687  else
1688  sprintf( filepath, "%s/results.csv", argv[1] );
1689  FILE* fp = fopen( filepath, "w" );
1690  if ( fp == NULL ) {
1691  fprintf( stderr, "ERROR: Cannot create file index.html!!!\naborting...\n" );
1692  exit( 1 );
1693  }
1694 
1695  if ( caa ) {
1696  double totalCycles;
1697  if ( !nehalem ) {
1699  if ( !check_for_core_caa_events() ) {
1700  fprintf( stderr, "(core) ERROR: One or more events for CAA missing!\naborting...\n" );
1701  exit( 1 );
1702  }
1704  totalCycles = getTotalCycles();
1705  calc_core_deriv_values( totalCycles );
1707  if ( !csv ) {
1708  put_C_header( fp, core_caa_events_displ );
1709  put_C_modules( fp, core_caa_events_displ );
1710  } else {
1711  put_C_header_csv( fp, core_caa_events_displ );
1712  put_C_modules_csv( fp, core_caa_events_displ );
1713  }
1714  } else {
1716  if ( !check_for_nhm_caa_events() ) {
1717  fprintf( stderr, "(nehalem) ERROR: One or more events for CAA missing!\naborting...\n" );
1718  exit( 1 );
1719  }
1721  totalCycles = getTotalCycles();
1722  calc_nhm_deriv_values( totalCycles );
1724  if ( !csv ) {
1725  put_C_header( fp, nhm_caa_events_displ );
1726  put_C_modules( fp, nhm_caa_events_displ );
1727  } else {
1728  put_C_header_csv( fp, nhm_caa_events_displ );
1729  put_C_modules_csv( fp, nhm_caa_events_displ );
1730  }
1731  }
1732  if ( !csv ) put_C_footer( fp );
1733  fclose( fp );
1734  } else {
1735  if ( !csv ) {
1736  put_C_header( fp, C_events );
1737  put_C_modules( fp, C_events );
1738  put_C_footer( fp );
1739  } else {
1740  put_C_header_csv( fp, C_events );
1741  put_C_modules_csv( fp, C_events );
1742  }
1743  fclose( fp );
1744  }
1745  if ( !csv ) {
1746  char src[MAX_FILENAME_LENGTH];
1747  char dst[MAX_FILENAME_LENGTH];
1748  sprintf( src, "sorttable.js" );
1749  sprintf( dst, "%s/HTML/sorttable.js", argv[1] );
1750  int fd_src = open( src, O_RDONLY );
1751  if ( fd_src == -1 ) {
1752  fprintf( stderr, "ERROR: Cannot open file \"%s\"!\naborting...\n", src );
1753  exit( 1 );
1754  }
1755  int fd_dst = open( dst, O_WRONLY | O_CREAT | O_TRUNC, 0644 );
1756  if ( fd_dst == -1 ) {
1757  fprintf( stderr, "ERROR: Cannot open file \"%s\" (%s)!\naborting...\n", dst, strerror( errno ) );
1758  exit( 1 );
1759  }
1760  char c;
1761  while ( read( fd_src, &c, 1 ) ) {
1762  if ( write( fd_dst, &c, 1 ) == -1 ) {
1763  std::cerr << "ERROR: failed to write to " << dst << std::endl;
1764  exit( 1 );
1765  }
1766  }
1767  close( fd_dst );
1768  close( fd_src );
1769  }
1770  return 0;
1771 }

◆ normalize()

double normalize ( const std::string field,
double  value,
double  normalizeTo 
)

Definition at line 1554 of file pfm_gen_analysis.cpp.

1554  {
1555  double max = 0;
1556  double counter_value;
1557  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1558  ++it ) {
1559  counter_value = ( it->second )[field];
1560  if ( max < counter_value ) max = counter_value;
1561  }
1562  if ( value > 0 && max > 0 && normalizeTo > 0 ) {
1563  return 1. * value / max * normalizeTo;
1564  } else
1565  return 0;
1566 }

◆ put_C_footer()

void put_C_footer ( FILE *  fp)

Definition at line 1513 of file pfm_gen_analysis.cpp.

1513  {
1514  fprintf( fp, "</table>\n</body>\n</html>\n" );
1515  return;
1516 }

◆ put_C_header()

void put_C_header ( FILE *  fp,
std::vector< std::string > &  columns 
)

Definition at line 1457 of file pfm_gen_analysis.cpp.

1457  {
1458  fprintf(
1459  fp,
1460  "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1461  fprintf( fp, "<html>\n" );
1462  fprintf( fp, "<head>\n" );
1463  fprintf( fp, "<title>\n" );
1464  fprintf( fp, "Analysis Result\n" );
1465  fprintf( fp, "</title>\n" );
1466  fprintf( fp, "<script src=\"sorttable.js\"></script>\n" );
1467  fprintf( fp, "<style>\ntable.sortable thead "
1468  "{\nbackground-color:#eee;\ncolor:#666666;\nfont-weight:bold;\ncursor:default;\nfont-family:courier;\n}"
1469  "\n</style>\n" );
1470  fprintf( fp, "</head>\n" );
1471  fprintf( fp, "<body link=\"black\">\n" );
1472  fprintf( fp, "<h1>RESULTS:</h1><br/>Click for detailed symbol view...<p/>\n" );
1473  fprintf( fp, "<table class=\"sortable\" cellpadding=\"5\">\n" );
1474  fprintf( fp, "<tr>\n" );
1475  fprintf( fp, "<th>MODULE NAME</th>\n" );
1476  for ( std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it ) {
1477  if ( strlen( it->c_str() ) == 0 )
1478  fprintf( fp, "<th bgcolor=\"#FFFFFF\">&nbsp;</th>\n" );
1479  else
1480  fprintf( fp, "<th>%s</th>\n", ( *it ).c_str() );
1481  }
1482  fprintf( fp, "</tr>\n" );
1483  return;
1484 }

◆ put_C_header_csv()

void put_C_header_csv ( FILE *  fp,
std::vector< std::string > &  columns 
)

Definition at line 1518 of file pfm_gen_analysis.cpp.

1518  {
1519  fprintf( fp, "MODULE NAME" );
1520  for ( std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it ) {
1521  if ( strlen( it->c_str() ) == 0 ) {
1522  } else
1523  fprintf( fp, ",%s", ( *it ).c_str() );
1524  }
1525  fprintf( fp, "\n" );
1526  return;
1527 }

◆ put_C_modules()

void put_C_modules ( FILE *  fp,
std::vector< std::string > &  columns 
)

Definition at line 1486 of file pfm_gen_analysis.cpp.

1486  {
1487  int index = 0;
1488  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1489  ++it ) {
1490  if ( index % 2 )
1491  fprintf( fp, "<tr bgcolor=\"#FFFFCC\">\n" );
1492  else
1493  fprintf( fp, "<tr bgcolor=\"#CCFFCC\">\n" );
1494  fprintf( fp, "<td style=\"font-family:monospace;font-size:large;color:Black\"><a href=\"%s.html\">%s</a></td>\n",
1495  ( it->first ).c_str(), ( it->first ).c_str() );
1496  for ( std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt ) {
1497  if ( strlen( jt->c_str() ) == 0 ) {
1498  fprintf( fp, "<td bgcolor=\"#FFFFFF\">&nbsp;</td>" );
1499  } else {
1500  if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1501  fprintf( stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1502  exit( 1 );
1503  }
1504  fprintf( fp, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\" align=\"right\">%.2f</td>\n",
1505  ( it->second )[*jt] );
1506  }
1507  }
1508  fprintf( fp, "</tr>\n" );
1509  index++;
1510  }
1511 }

◆ put_C_modules_csv()

void put_C_modules_csv ( FILE *  fp,
std::vector< std::string > &  columns 
)

Definition at line 1529 of file pfm_gen_analysis.cpp.

1529  {
1530  for ( std::map<std::string, std::map<std::string, double>>::iterator it = C_modules.begin(); it != C_modules.end();
1531  ++it ) {
1532  fprintf( fp, "%s", ( it->first ).c_str() );
1533  for ( std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt ) {
1534  if ( strlen( jt->c_str() ) == 0 ) {
1535  } else {
1536  if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1537  fprintf( stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1538  exit( 1 );
1539  }
1540  fprintf( fp, ",%.2f", ( it->second )[*jt] );
1541  }
1542  }
1543  fprintf( fp, "\n" );
1544  }
1545 }

◆ put_S_module()

void put_S_module ( S_module cur_module,
const char *  dir 
)

Definition at line 1045 of file pfm_gen_analysis.cpp.

1045  {
1046  char module_name[MAX_MODULE_NAME_LENGTH];
1047  bzero( module_name, MAX_MODULE_NAME_LENGTH );
1048  strcpy( module_name, ( cur_module->get_module_name() ).c_str() );
1049  char module_filename[MAX_FILENAME_LENGTH];
1050  bzero( module_filename, MAX_FILENAME_LENGTH );
1051  strcpy( module_filename, dir );
1052  strcat( module_filename, "/HTML/" );
1053  strcat( module_filename, module_name );
1054  strcat( module_filename, ".html" );
1055  char event[MAX_EVENT_NAME_LENGTH];
1056  bzero( event, MAX_EVENT_NAME_LENGTH );
1057  strcpy( event, ( cur_module->get_event() ).c_str() );
1058  std::map<std::string, unsigned int>::iterator result = modules_tot_samples.find( cur_module->get_module_name() );
1059  FILE* module_file;
1060  if ( result == modules_tot_samples.end() ) // not found
1061  {
1062  if ( ( !strcmp( event, "UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1063  ( !strcmp( event, "CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1064  modules_tot_samples.insert(
1066  } else {
1067  modules_tot_samples.insert( std::pair<std::string, unsigned int>( cur_module->get_module_name(), 0 ) );
1068  }
1069  module_file = fopen( module_filename, "w" );
1070  if ( module_file == NULL ) {
1071  fprintf( stderr, "ERROR: Cannot create file %s!!!\naborting...\n", module_filename );
1072  exit( 1 );
1073  }
1074  fprintf( module_file, "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
1075  "\"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1076  fprintf( module_file, "<html>\n" );
1077  fprintf( module_file, "<head>\n" );
1078  fprintf( module_file, "<title>\n" );
1079  fprintf( module_file, "%s\n", module_name );
1080  fprintf( module_file, "</title>\n" );
1081  fprintf( module_file, "</head>\n" );
1082  fprintf( module_file, "<body>\n" );
1083  fprintf( module_file, "<h2>%s</h2><br/>Events Sampled:<br/>\n", module_name );
1084  fprintf( module_file, "<ul>\n" );
1085  for ( std::vector<std::string>::const_iterator it = S_events.begin(); it != S_events.end(); ++it ) {
1086  fprintf( module_file, "<li><a href=\"#%s\">%s</a></li>\n", it->c_str(), it->c_str() );
1087  }
1088  fprintf( module_file, "</ul>\n" );
1089  } // if(result == modules_tot_samples.end()) //not found
1090  else {
1091  if ( ( !strcmp( event, "UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1092  ( !strcmp( event, "CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1093  modules_tot_samples[cur_module->get_module_name()] = cur_module->get_total_num_samples();
1094  }
1095  module_file = fopen( module_filename, "a" );
1096  } // else:: if(result != modules_tot_samples.end()) //found!!
1097  char event_str[MAX_EVENT_NAME_LENGTH];
1098  bzero( event_str, MAX_EVENT_NAME_LENGTH );
1099  strcpy( event_str, event );
1100  if ( cur_module->get_c_mask() > 0 ) {
1101  sprintf( event_str + strlen( event_str ), " CMASK=%u", cur_module->get_c_mask() );
1102  }
1103  if ( cur_module->get_inv_mask() > 0 ) {
1104  sprintf( event_str + strlen( event_str ), " INV=%u", cur_module->get_inv_mask() );
1105  }
1106  fprintf( module_file, "<a name=\"%s\"><a>\n", event_str );
1107  fprintf( module_file, "<table cellpadding=\"5\">\n" );
1108  fprintf( module_file, "<tr bgcolor=\"#EEEEEE\">\n" );
1109  fprintf( module_file,
1110  "<th colspan=\"6\" align=\"left\">%s -- cmask: %u -- invmask: %u -- Total Samples: %u -- "
1111  "Sampling Period: %u</th>\n",
1112  event, cur_module->get_c_mask(), cur_module->get_inv_mask(), cur_module->get_total_num_samples(),
1113  cur_module->get_smpl_period() );
1114  fprintf( module_file, "</tr>\n" );
1115  fprintf( module_file, "<tr bgcolor=\"#EEEEEE\">\n" );
1116  fprintf( module_file, "<th align=\"left\">Samples</th>\n" );
1117  fprintf( module_file, "<th align=\"left\">Percentage</th>\n" );
1118  fprintf( module_file, "<th align=\"left\">Symbol Name</th>\n" );
1119  fprintf( module_file, "<th align=\"left\">Library Name</th>\n" );
1120  fprintf( module_file, "<th align=\"left\">Complete Signature</th>\n" );
1121  fprintf( module_file, "<th align=\"left\">Library Pathname</th>\n" );
1122  fprintf( module_file, "</tr>\n" );
1123  for ( int j = 0; j < 20; j++ ) {
1124  char sym[MAX_SYM_LENGTH];
1125  char sym_mod[MAX_SYM_MOD_LENGTH];
1126  char lib[MAX_LIB_LENGTH];
1127  char lib_mod[MAX_LIB_MOD_LENGTH];
1128  char simple_sym[MAX_SIMPLE_SYM_LENGTH];
1129  char simple_sym_mod[MAX_SIMPLE_SYM_MOD_LENGTH];
1130  char simple_lib[MAX_SIMPLE_LIB_LENGTH];
1131  char simple_lib_mod[MAX_SIMPLE_LIB_MOD_LENGTH];
1132 
1133  bzero( sym, MAX_SYM_LENGTH );
1134  bzero( sym_mod, MAX_SYM_MOD_LENGTH );
1135  bzero( lib, MAX_LIB_LENGTH );
1136  bzero( lib_mod, MAX_LIB_MOD_LENGTH );
1137  bzero( simple_sym, MAX_SIMPLE_SYM_LENGTH );
1138  bzero( simple_sym_mod, MAX_SIMPLE_SYM_MOD_LENGTH );
1139  bzero( simple_lib, MAX_SIMPLE_LIB_LENGTH );
1140  bzero( simple_lib_mod, MAX_SIMPLE_LIB_MOD_LENGTH );
1141 
1143  bzero( index, MAX_SAMPLE_INDEX_LENGTH );
1144  unsigned int value;
1145  bool res = cur_module->get_max( index, value );
1146  if ( !res ) break;
1147  char* sym_end = strchr( index, '%' );
1148  if ( sym_end == NULL ) // error
1149  {
1150  fprintf( stderr, "ERROR: Invalid sym and lib name! : %s\naborting...\n", index );
1151  exit( 1 );
1152  }
1153  memcpy( sym, index, strlen( index ) - strlen( sym_end ) );
1154  strcpy( lib, sym_end + 1 );
1155  char temp[MAX_SYM_LENGTH];
1156  bzero( temp, MAX_SYM_LENGTH );
1157  strcpy( temp, sym );
1158  strcpy( simple_sym, ( func_name( temp ) ) );
1159  if ( strrchr( lib, '/' ) != NULL && *( strrchr( lib, '/' ) + 1 ) != '\0' ) {
1160  strcpy( simple_lib, strrchr( lib, '/' ) + 1 );
1161  } else {
1162  strcpy( simple_lib, lib );
1163  }
1164  if ( j % 2 != 0 ) {
1165  fprintf( module_file, "<tr bgcolor=\"#FFFFCC\">\n" );
1166  } else {
1167  fprintf( module_file, "<tr bgcolor=\"#CCFFCC\">\n" );
1168  }
1169  fprintf( module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%u</td>\n", value );
1170  fprintf( module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%f%%</td>\n",
1171  ( ( (double)( value ) ) / ( (double)( cur_module->get_total_num_samples() ) ) ) * 100 );
1172  html_special_chars( simple_sym, simple_sym_mod );
1173  html_special_chars( simple_lib, simple_lib_mod );
1174  html_special_chars( sym, sym_mod );
1175  html_special_chars( lib, lib_mod );
1176  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_sym_mod );
1177  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_lib_mod );
1178  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n", sym_mod );
1179  fprintf( module_file, "<td style=\"font-family:courier;\">%s</td>\n</tr>\n", lib_mod );
1180  }
1181  fprintf( module_file, "</table><br/><br/>\n" );
1182  int res = fclose( module_file );
1183  if ( res ) {
1184  fprintf( stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1185  exit( 1 );
1186  }
1187  return;
1188 }

◆ read_C_file()

int read_C_file ( const char *  dir,
const char *  filename 
)

Definition at line 1388 of file pfm_gen_analysis.cpp.

1388  {
1389  char event[MAX_EVENT_NAME_LENGTH];
1390  char arch[MAX_ARCH_NAME_LENGTH];
1391  char line[MAX_LINE_LENGTH];
1392  char cmask_str[MAX_CMASK_STR_LENGTH];
1393  char inv_str[MAX_INV_STR_LENGTH];
1394  char sp_str[MAX_SP_STR_LENGTH];
1395  char cur_module_name[MAX_MODULE_NAME_LENGTH];
1396  bzero( line, MAX_LINE_LENGTH );
1397  bzero( event, MAX_EVENT_NAME_LENGTH );
1398  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1399  bzero( arch, MAX_ARCH_NAME_LENGTH );
1400  bzero( line, MAX_LINE_LENGTH );
1401  bzero( cmask_str, MAX_CMASK_STR_LENGTH );
1402  bzero( inv_str, MAX_INV_STR_LENGTH );
1403  bzero( sp_str, MAX_SP_STR_LENGTH );
1404  int number_of_modules = 0;
1405  long cur_sum = 0;
1406  int no_of_values = 0;
1407  char path_name[MAX_FILENAME_LENGTH];
1408  bzero( path_name, MAX_FILENAME_LENGTH );
1409  strcpy( path_name, dir );
1410  strcat( path_name, "/" );
1411  strcat( path_name, filename );
1412  FILE* fp = fopen( path_name, "r" );
1413  const std::string fmt1 =
1414  std::format( "%{:}s %{:}s %{:}s %{:}s %{:}s\n", MAX_ARCH_NAME_LENGTH - 1, MAX_EVENT_NAME_LENGTH - 1,
1416  int stat = fscanf( fp, fmt1.c_str(), arch, event, cmask_str, inv_str, sp_str );
1417  if ( stat != 5 ) {
1418  std::cerr << "ERROR: failed to parse " << path_name << std::endl;
1419  exit( 1 );
1420  }
1421  if ( !strcmp( arch, "NHM" ) )
1422  nehalem = true;
1423  else
1424  nehalem = false;
1425  std::string event_str( event );
1426  if ( atoi( cmask_str ) > 0 ) {
1427  event_str += " CMASK=";
1428  event_str += cmask_str;
1429  }
1430  if ( atoi( inv_str ) > 0 ) {
1431  event_str += " INV=";
1432  event_str += inv_str;
1433  }
1434  C_events.push_back( event_str );
1435  const std::string fmt2 = std::format( "%{:}s\n", MAX_LINE_LENGTH - 1 );
1436  while ( fscanf( fp, fmt2.c_str(), line ) != EOF ) {
1437  if ( isalpha( line[0] ) ) // module
1438  {
1439  if ( number_of_modules > 0 ) {
1440  C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values;
1441  cur_sum = 0;
1442  no_of_values = 0;
1443  }
1444  strcpy( cur_module_name, line );
1445  number_of_modules++;
1446  } else if ( isdigit( line[0] ) ) // value
1447  {
1448  cur_sum += strtol( line, NULL, 10 );
1449  no_of_values++;
1450  }
1451  }
1452  C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values; // last module
1453  fclose( fp );
1454  return number_of_modules;
1455 }

◆ read_S_events()

int read_S_events ( const char *  dir,
const char *  filename 
)

Definition at line 1312 of file pfm_gen_analysis.cpp.

1312  {
1313  char event[MAX_EVENT_NAME_LENGTH];
1314  char arch[MAX_ARCH_NAME_LENGTH];
1315  char line[MAX_LINE_LENGTH];
1316  char cmask_str[MAX_CMASK_STR_LENGTH];
1317  char inv_str[MAX_INV_STR_LENGTH];
1318  char sp_str[MAX_SP_STR_LENGTH];
1319  bzero( line, MAX_LINE_LENGTH );
1320  bzero( event, MAX_EVENT_NAME_LENGTH );
1321  bzero( arch, MAX_ARCH_NAME_LENGTH );
1322  bzero( cmask_str, MAX_CMASK_STR_LENGTH );
1323  bzero( inv_str, MAX_INV_STR_LENGTH );
1324  bzero( sp_str, MAX_SP_STR_LENGTH );
1325  char path_name[MAX_FILENAME_LENGTH];
1326  bzero( path_name, MAX_FILENAME_LENGTH );
1327  strcpy( path_name, dir );
1328  strcat( path_name, "/" );
1329  strcat( path_name, filename );
1330  gzFile res_file = gzopen( path_name, "rb" );
1331  if ( res_file != NULL ) {
1332  bzero( line, MAX_LINE_LENGTH );
1333  gzgets( res_file, line, MAX_LINE_LENGTH );
1334  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1335  bzero( event, MAX_EVENT_NAME_LENGTH );
1336  const std::string fmt =
1337  std::format( "%{:}s %{:}s %{:}s %{:}s %{:}s\n", MAX_ARCH_NAME_LENGTH - 1, MAX_EVENT_NAME_LENGTH - 1,
1339  sscanf( line, fmt.c_str(), arch, event, cmask_str, inv_str, sp_str );
1340  std::string event_str( event );
1341  if ( atoi( cmask_str ) > 0 ) {
1342  event_str += " CMASK=";
1343  event_str += cmask_str;
1344  }
1345  if ( atoi( inv_str ) > 0 ) {
1346  event_str += " INV=";
1347  event_str += inv_str;
1348  }
1349  S_events.push_back( event_str );
1350  } // if(res_file != NULL)
1351  else {
1352  fprintf( stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename );
1353  exit( 1 );
1354  }
1355  gzclose( res_file );
1356  return 0;
1357 }

◆ read_S_file()

int read_S_file ( const char *  dir,
const char *  filename 
)

Definition at line 1197 of file pfm_gen_analysis.cpp.

1197  {
1198  char line[MAX_LINE_LENGTH];
1199  char event[MAX_EVENT_NAME_LENGTH];
1200  char arch[MAX_ARCH_NAME_LENGTH];
1201  unsigned int cmask;
1202  unsigned int inv;
1203  unsigned int sp;
1204  char cur_module_name[MAX_MODULE_NAME_LENGTH];
1205  bzero( line, MAX_LINE_LENGTH );
1206  bzero( event, MAX_EVENT_NAME_LENGTH );
1207  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1208  bzero( arch, MAX_ARCH_NAME_LENGTH );
1209 
1210  S_module* cur_module = new S_module();
1211  unsigned int module_num = 0;
1212 
1213  char path_name[MAX_FILENAME_LENGTH];
1214  bzero( path_name, MAX_FILENAME_LENGTH );
1215  strcpy( path_name, dir );
1216  strcat( path_name, "/" );
1217  strcat( path_name, filename );
1218  gzFile res_file = gzopen( path_name, "rb" );
1219 
1220  if ( res_file != NULL ) {
1221  bzero( line, MAX_LINE_LENGTH );
1222  gzgets( res_file, line, MAX_LINE_LENGTH );
1223  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1224  bzero( event, MAX_EVENT_NAME_LENGTH );
1225  const std::string fmt = std::format( "%{:}s %{:}s %u %u %u", MAX_ARCH_NAME_LENGTH - 1, MAX_EVENT_NAME_LENGTH - 1 );
1226  sscanf( line, fmt.c_str(), arch, event, &cmask, &inv, &sp );
1227  if ( !strcmp( arch, "NHM" ) )
1228  nehalem = true;
1229  else
1230  nehalem = false;
1231  bzero( line, MAX_LINE_LENGTH );
1232  while ( gzgets( res_file, line, MAX_LINE_LENGTH ) != Z_NULL ) {
1233  if ( line[strlen( line ) - 1] == '\n' ) line[strlen( line ) - 1] = '\0';
1234  if ( strchr( line, ' ' ) == NULL ) // module
1235  {
1236  if ( module_num > 0 ) {
1237  put_S_module( cur_module, dir );
1238  cur_module->clear();
1239  }
1240  module_num++;
1241  char* end_sym = strchr( line, '%' );
1242  if ( end_sym == NULL ) // error
1243  {
1244  fprintf( stderr, "ERROR: Invalid module name. \nLINE: %s\naborting...\n", line );
1245  exit( 1 );
1246  }
1247  bzero( cur_module_name, MAX_MODULE_NAME_LENGTH );
1248  memcpy( cur_module_name, line, strlen( line ) - strlen( end_sym ) );
1249  cur_module->init( cur_module_name, arch, event, cmask, inv, sp );
1250  cur_module->set_total( atoi( end_sym + 1 ) );
1251  } // module
1252  else // symbol, libName, libOffset, value
1253  {
1254  unsigned int value = 0, libOffset = 0;
1255  char symbol[MAX_SYM_LENGTH];
1256  char libName[MAX_LIB_LENGTH];
1257  char final_sym[MAX_SYM_MOD_LENGTH];
1258  char final_lib[MAX_LIB_MOD_LENGTH];
1259  bzero( symbol, MAX_SYM_LENGTH );
1260  bzero( libName, MAX_LIB_LENGTH );
1261  bzero( final_sym, MAX_SYM_MOD_LENGTH );
1262  bzero( final_lib, MAX_LIB_MOD_LENGTH );
1263 
1264  const std::string fmt = std::format( "%{:}s %{:}s %u %u", MAX_SYM_LENGTH - 1, MAX_LIB_LENGTH - 1 );
1265  sscanf( line, fmt.c_str(), symbol, libName, &libOffset, &value );
1266  char realPathName_s[FILENAME_MAX];
1267  bzero( realPathName_s, FILENAME_MAX );
1268  char* realPathName = realpath( libName, realPathName_s );
1269  if ( realPathName != NULL && strlen( realPathName ) > 0 ) {
1271  result = libsInfo.find( realPathName );
1272  if ( result == libsInfo.end() ) { libsInfo[realPathName] = FileInfo( realPathName, true ); }
1273  const char* temp_sym = libsInfo[realPathName].symbolByOffset( libOffset );
1274  if ( temp_sym != NULL && strlen( temp_sym ) > 0 ) {
1275  int status;
1276  char* demangled_symbol = abi::__cxa_demangle( temp_sym, NULL, NULL, &status );
1277  if ( status == 0 ) {
1278  strcpy( final_sym, demangled_symbol );
1279  free( demangled_symbol );
1280  } else {
1281  strcpy( final_sym, temp_sym );
1282  }
1283  } else {
1284  strcpy( final_sym, "???" );
1285  }
1286  strcpy( final_lib, realPathName );
1287  } else {
1288  strcpy( final_sym, symbol );
1289  strcpy( final_lib, libName );
1290  }
1291  char index[MAX_LINE_LENGTH];
1292  bzero( index, MAX_LINE_LENGTH );
1293  strcpy( index, final_sym );
1294  strcat( index, "%" );
1295  strcat( index, final_lib );
1296  cur_module->add_sample( index, value );
1297  } // symbol, libName, libOffset, value
1298  bzero( line, MAX_LINE_LENGTH );
1299  } // while(gzgets(res_file, line, MAX_LINE_LENGTH)!=Z_NULL)
1300  put_S_module( cur_module, dir ); // last module!
1301  cur_module->clear();
1302  gzclose( res_file );
1303  } // if(res_file != NULL)
1304  else {
1305  fprintf( stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename );
1306  exit( 1 );
1307  }
1308  delete cur_module; // delete it!
1309  return 0;
1310 }

◆ skipString()

bool skipString ( const char *  strptr,
const char *  srcbuffer,
const char **  dstbuffer 
)

Definition at line 146 of file pfm_gen_analysis.cpp.

146  {
147  if ( strncmp( srcbuffer, strptr, strlen( strptr ) ) ) { return false; }
148  *dstbuffer = srcbuffer + strlen( strptr );
149  return true;
150 }

◆ skipWhitespaces()

bool skipWhitespaces ( const char *  srcbuffer,
const char **  destbuffer 
)

Definition at line 132 of file pfm_gen_analysis.cpp.

132  {
133  if ( !isspace( *srcbuffer++ ) ) { return false; }
134  while ( isspace( *srcbuffer ) ) { srcbuffer++; }
135  *destbuffer = srcbuffer;
136  return true;
137 }
std::strcpy
T strcpy(T... args)
MAX_CMASK_STR_LENGTH
#define MAX_CMASK_STR_LENGTH
Definition: pfm_gen_analysis.cpp:95
I7_L1_ITLB_WALK_COMPLETED_CYCLES
#define I7_L1_ITLB_WALK_COMPLETED_CYCLES
Definition: pfm_gen_analysis.cpp:61
std::strtol
T strtol(T... args)
init_core_caa_events
void init_core_caa_events()
Definition: pfm_gen_analysis.cpp:283
std::isdigit
T isdigit(T... args)
html_special_chars
void html_special_chars(const char *s, char *s_mod)
Definition: pfm_gen_analysis.cpp:837
std::strcmp
T strcmp(T... args)
I7_OTHER_CORE_L2_HITM_CYCLES
#define I7_OTHER_CORE_L2_HITM_CYCLES
Definition: pfm_gen_analysis.cpp:65
std::strrchr
T strrchr(T... args)
std::strlen
T strlen(T... args)
MAX_SYM_MOD_LENGTH
#define MAX_SYM_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:83
std::string
STL class.
put_C_modules
void put_C_modules(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1486
plotBacklogPyRoot.argc
argc
Definition: plotBacklogPyRoot.py:173
std::pair
init_nhm_caa_events
void init_nhm_caa_events()
Definition: pfm_gen_analysis.cpp:307
I7_IFETCH_L2_MISS_L3_HITM
#define I7_IFETCH_L2_MISS_L3_HITM
Definition: pfm_gen_analysis.cpp:73
gaudirun.s
string s
Definition: gaudirun.py:346
std::vector
STL class.
std::find
T find(T... args)
MAX_LIB_MOD_LENGTH
#define MAX_LIB_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:85
finalize_S_html_pages
int finalize_S_html_pages(const char *dir)
Definition: pfm_gen_analysis.cpp:1362
CORE_UNKNOWN_ADDR_STORE_CYCLES
#define CORE_UNKNOWN_ADDR_STORE_CYCLES
Definition: pfm_gen_analysis.cpp:55
std::strcat
T strcat(T... args)
put_C_header
void put_C_header(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1457
CORE_L1_DTLB_MISS_CYCLES
#define CORE_L1_DTLB_MISS_CYCLES
Definition: pfm_gen_analysis.cpp:53
I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES
#define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:66
S_module::get_event
const std::string & get_event()
Definition: pfm_gen_analysis.cpp:815
std::isalnum
T isalnum(T... args)
std::fscanf
T fscanf(T... args)
gaudirun.c
c
Definition: gaudirun.py:525
MAX_EVENT_NAME_LENGTH
#define MAX_EVENT_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:91
std::strerror
T strerror(T... args)
S_module::get_total_num_samples
unsigned int get_total_num_samples()
Definition: pfm_gen_analysis.cpp:830
std::sort
T sort(T... args)
I7_L2_HIT_CYCLES
#define I7_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:62
EXPECTED_CPI
#define EXPECTED_CPI
Definition: pfm_gen_analysis.cpp:78
std::vector::push_back
T push_back(T... args)
S_module::get_max
bool get_max(char *index, unsigned int &value)
Definition: pfm_gen_analysis.cpp:820
compareOutputFiles.sp
sp
Definition: compareOutputFiles.py:506
I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT
#define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT
Definition: pfm_gen_analysis.cpp:70
I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD
#define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD
Definition: pfm_gen_analysis.cpp:74
fixtures.stderr
Generator[bytes, None, None] stderr(subprocess.CompletedProcess completed_process)
Definition: fixtures.py:147
read_C_file
int read_C_file(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1388
normalize
double normalize(const std::string &field, double value, double normalizeTo)
Definition: pfm_gen_analysis.cpp:1554
S_module::clear
void clear()
Definition: pfm_gen_analysis.cpp:789
CORE_L2_HIT_CYCLES
#define CORE_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:52
read_S_file
int read_S_file(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1197
calc_nhm_deriv_values
void calc_nhm_deriv_values(double totalCycles)
Definition: pfm_gen_analysis.cpp:606
S_module::get_c_mask
unsigned int get_c_mask()
Definition: pfm_gen_analysis.cpp:813
std::strchr
T strchr(T... args)
read_S_events
int read_S_events(const char *dir, const char *filename)
Definition: pfm_gen_analysis.cpp:1312
std::fprintf
T fprintf(T... args)
MAX_SIMPLE_LIB_MOD_LENGTH
#define MAX_SIMPLE_LIB_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:89
std::fclose
T fclose(T... args)
MAX_LIB_LENGTH
#define MAX_LIB_LENGTH
Definition: pfm_gen_analysis.cpp:84
I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP
#define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP
Definition: pfm_gen_analysis.cpp:71
MAX_SIMPLE_SYM_LENGTH
#define MAX_SIMPLE_SYM_LENGTH
Definition: pfm_gen_analysis.cpp:86
ProduceConsume.j
j
Definition: ProduceConsume.py:104
std::cerr
I7_L1_DTLB_WALK_COMPLETED_CYCLES
#define I7_L1_DTLB_WALK_COMPLETED_CYCLES
Definition: pfm_gen_analysis.cpp:60
std::isalpha
T isalpha(T... args)
std::string::c_str
T c_str(T... args)
std::atoi
T atoi(T... args)
I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT
#define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT
Definition: pfm_gen_analysis.cpp:69
func_name
const char * func_name(const char *demangled_symbol)
Definition: pfm_gen_analysis.cpp:868
std::fopen
T fopen(T... args)
getTotalCycles
double getTotalCycles()
Definition: pfm_gen_analysis.cpp:1599
format
GAUDI_API std::string format(const char *,...)
MsgStream format utility "a la sprintf(...)".
Definition: MsgStream.cpp:119
CORE_LCP_STALL_CYCLES
#define CORE_LCP_STALL_CYCLES
Definition: pfm_gen_analysis.cpp:54
check_for_nhm_caa_events
bool check_for_nhm_caa_events()
Definition: pfm_gen_analysis.cpp:361
CORE_OVERLAPPING_CYCLES
#define CORE_OVERLAPPING_CYCLES
Definition: pfm_gen_analysis.cpp:56
std::map
STL class.
S_module::get_inv_mask
unsigned int get_inv_mask()
Definition: pfm_gen_analysis.cpp:812
put_C_footer
void put_C_footer(FILE *fp)
Definition: pfm_gen_analysis.cpp:1513
put_C_modules_csv
void put_C_modules_csv(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1529
cpluginsvc.n
n
Definition: cpluginsvc.py:234
MAX_MODULE_NAME_LENGTH
#define MAX_MODULE_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:92
S_module::get_module_name
const std::string & get_module_name()
Definition: pfm_gen_analysis.cpp:829
init_core_caa_events_displ
void init_core_caa_events_displ()
Definition: pfm_gen_analysis.cpp:371
S_module::add_sample
void add_sample(const char *index, unsigned int value)
Definition: pfm_gen_analysis.cpp:816
CORE_SPAN_ACROSS_CACHE_LINE_CYCLES
#define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES
Definition: pfm_gen_analysis.cpp:57
std::strstr
T strstr(T... args)
MAX_SYM_LENGTH
#define MAX_SYM_LENGTH
Definition: pfm_gen_analysis.cpp:82
hivetimeline.read
def read(f, regex=".*", skipevents=0)
Definition: hivetimeline.py:32
std::strncmp
T strncmp(T... args)
std::endl
T endl(T... args)
put_C_header_csv
void put_C_header_csv(FILE *fp, std::vector< std::string > &columns)
Definition: pfm_gen_analysis.cpp:1518
S_module::get_smpl_period
unsigned int get_smpl_period()
Definition: pfm_gen_analysis.cpp:811
std::vector::begin
T begin(T... args)
std::map::insert
T insert(T... args)
I7_OTHER_CORE_L2_HIT_CYCLES
#define I7_OTHER_CORE_L2_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:64
CORE_L2_MISS_CYCLES
#define CORE_L2_MISS_CYCLES
Definition: pfm_gen_analysis.cpp:51
fmt
MAX_SIMPLE_LIB_LENGTH
#define MAX_SIMPLE_LIB_LENGTH
Definition: pfm_gen_analysis.cpp:88
MAX_LINE_LENGTH
#define MAX_LINE_LENGTH
Definition: pfm_gen_analysis.cpp:90
FileInfo
Definition: pfm_gen_analysis.cpp:152
MAX_SAMPLE_INDEX_LENGTH
#define MAX_SAMPLE_INDEX_LENGTH
Definition: pfm_gen_analysis.cpp:81
I7_IFETCH_L2_MISS_L3_HIT_SNOOP
#define I7_IFETCH_L2_MISS_L3_HIT_SNOOP
Definition: pfm_gen_analysis.cpp:72
ReadAndWriteWhiteBoard.dst
dst
Definition: ReadAndWriteWhiteBoard.py:33
std::isspace
T isspace(T... args)
calc_post_deriv_values
void calc_post_deriv_values()
Definition: pfm_gen_analysis.cpp:1573
std::free
T free(T... args)
plotSpeedupsPyRoot.line
line
Definition: plotSpeedupsPyRoot.py:198
put_S_module
void put_S_module(S_module *cur_module, const char *dir)
Definition: pfm_gen_analysis.cpp:1045
std::memcpy
T memcpy(T... args)
MAX_INV_STR_LENGTH
#define MAX_INV_STR_LENGTH
Definition: pfm_gen_analysis.cpp:96
check_for_core_caa_events
bool check_for_core_caa_events()
Definition: pfm_gen_analysis.cpp:351
std::vector::end
T end(T... args)
std::max
T max(T... args)
I7_L3_UNSHARED_HIT_CYCLES
#define I7_L3_UNSHARED_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:63
graphanalysis.filename
filename
Definition: graphanalysis.py:130
S_module::init
void init(const char *name, const char *architecture, const char *event_name, unsigned int c_mask, unsigned int inv_mask, unsigned int smpl_period)
Definition: pfm_gen_analysis.cpp:798
MAX_FILENAME_LENGTH
#define MAX_FILENAME_LENGTH
Definition: pfm_gen_analysis.cpp:80
I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES
#define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:68
init_nhm_caa_events_displ
void init_nhm_caa_events_displ()
Definition: pfm_gen_analysis.cpp:501
calc_core_deriv_values
void calc_core_deriv_values(double totalCycles)
Definition: pfm_gen_analysis.cpp:420
I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES
#define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES
Definition: pfm_gen_analysis.cpp:67
S_module::set_total
void set_total(unsigned int total)
Definition: pfm_gen_analysis.cpp:807
std::exit
T exit(T... args)
Gaudi::ParticleProperties::index
size_t index(const Gaudi::ParticleProperty *property, const Gaudi::Interfaces::IParticlePropertySvc *service)
helper utility for mapping of Gaudi::ParticleProperty object into non-negative integral sequential id...
Definition: IParticlePropertySvc.cpp:39
MAX_SIMPLE_SYM_MOD_LENGTH
#define MAX_SIMPLE_SYM_MOD_LENGTH
Definition: pfm_gen_analysis.cpp:87
gaudirun.argv
list argv
Definition: gaudirun.py:327
MAX_SP_STR_LENGTH
#define MAX_SP_STR_LENGTH
Definition: pfm_gen_analysis.cpp:97
S_module
Definition: pfm_gen_analysis.cpp:776
MAX_ARCH_NAME_LENGTH
#define MAX_ARCH_NAME_LENGTH
Definition: pfm_gen_analysis.cpp:94