33 #include <sys/types.h>
51 #define CORE_L2_MISS_CYCLES 200
52 #define CORE_L2_HIT_CYCLES 14.5
53 #define CORE_L1_DTLB_MISS_CYCLES 10
54 #define CORE_LCP_STALL_CYCLES 6
55 #define CORE_UNKNOWN_ADDR_STORE_CYCLES 5
56 #define CORE_OVERLAPPING_CYCLES 6
57 #define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES 20
60 #define I7_L1_DTLB_WALK_COMPLETED_CYCLES 35
61 #define I7_L1_ITLB_WALK_COMPLETED_CYCLES 35
62 #define I7_L2_HIT_CYCLES 6
63 #define I7_L3_UNSHARED_HIT_CYCLES 35
64 #define I7_OTHER_CORE_L2_HIT_CYCLES 60
65 #define I7_OTHER_CORE_L2_HITM_CYCLES 75
66 #define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES 225 // average of 200 (not modified) and 225-250 (modified)
67 #define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES 360 // average of 350 (not modified) and 370 (modified)
68 #define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES 180
69 #define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT 200
70 #define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT 350
71 #define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP 35
72 #define I7_IFETCH_L2_MISS_L3_HIT_SNOOP 60
73 #define I7_IFETCH_L2_MISS_L3_HITM 75
74 #define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD 180
76 #define MAX_MODULES 1000
78 #define EXPECTED_CPI 0.25
80 #define MAX_FILENAME_LENGTH 1024
81 #define MAX_SAMPLE_INDEX_LENGTH 10000
82 #define MAX_SYM_LENGTH 15000
83 #define MAX_SYM_MOD_LENGTH 20000
84 #define MAX_LIB_LENGTH 5000
85 #define MAX_LIB_MOD_LENGTH 7000
86 #define MAX_SIMPLE_SYM_LENGTH 300
87 #define MAX_SIMPLE_SYM_MOD_LENGTH 500
88 #define MAX_SIMPLE_LIB_LENGTH 300
89 #define MAX_SIMPLE_LIB_MOD_LENGTH 500
90 #define MAX_LINE_LENGTH 20000
91 #define MAX_EVENT_NAME_LENGTH 150
92 #define MAX_MODULE_NAME_LENGTH 250
93 #define MAX_VALUE_STRING_LENGTH 250
94 #define MAX_ARCH_NAME_LENGTH 20
95 #define MAX_CMASK_STR_LENGTH 5
96 #define MAX_INV_STR_LENGTH 5
97 #define MAX_SP_STR_LENGTH 50
99 #define PIPE_BUFFER_LENGTH 1000
104 pipe = popen( cmd,
"r" );
106 printf(
"Cannot open pipe. Exiting...\n" );
112 while ( !feof(
pipe ) ) {
116 iss = std::make_unique<std::istringstream>( result, std::istringstream::in );
133 if ( !isspace( *srcbuffer++ ) ) {
return false; }
134 while ( isspace( *srcbuffer ) ) { srcbuffer++; }
135 *destbuffer = srcbuffer;
146 bool skipString(
const char* strptr,
const char* srcbuffer,
const char** dstbuffer ) {
147 if ( strncmp( srcbuffer, strptr, strlen( strptr ) ) ) {
return false; }
148 *dstbuffer = srcbuffer + strlen( strptr );
165 if ( i->OFFSET == offset ) {
return i->NAME.c_str(); }
171 return i->NAME.c_str();
201 bool matched =
false;
202 while ( objdump.
output() ) {
212 if ( !objdump.
output() )
break;
213 if (
line.empty() )
continue;
214 const char* lineptr =
line.c_str();
216 if ( !
skipString(
"LOAD", lineptr, &lineptr ) )
continue;
218 if ( !
skipString(
"off", lineptr, &lineptr ) )
continue;
220 int initialBase = strtol( lineptr, &endptr, 16 );
221 if ( lineptr == endptr )
continue;
224 if ( !
skipString(
"vaddr", lineptr, &lineptr ) )
continue;
226 int finalBase = strtol( lineptr, &endptr, 16 );
227 if ( lineptr == endptr )
continue;
228 vmbase = finalBase - initialBase;
233 fprintf(
stderr,
"Cannot determine VM base address for %s\n",
NAME.
c_str() );
234 fprintf(
stderr,
"Error while running `objdump -p %s`\n",
NAME.
c_str() );
239 while (
nm.output() ) {
242 if ( !
nm.output() )
break;
243 if (
line.empty() )
continue;
247 int address = strtol(
begin, &endptr, 10 );
248 if ( endptr ==
begin )
continue;
249 if ( *endptr++ !=
' ' )
continue;
250 if ( isspace( *endptr++ ) )
continue;
251 if ( *endptr++ !=
' ' )
continue;
252 char* symbolName = endptr;
253 while ( *endptr && !isspace( *endptr ) ) endptr++;
254 if ( *endptr != 0 )
continue;
256 if ( symbolName[0] ==
'.' )
continue;
261 int offset = address - vmbase;
284 core_caa_events.
push_back(
"BRANCH_INSTRUCTIONS_RETIRED" );
285 core_caa_events.
push_back(
"ILD_STALL" );
286 core_caa_events.
push_back(
"INST_RETIRED:LOADS" );
287 core_caa_events.
push_back(
"INST_RETIRED:OTHER" );
288 core_caa_events.
push_back(
"INST_RETIRED:STORES" );
289 core_caa_events.
push_back(
"INSTRUCTIONS_RETIRED" );
290 core_caa_events.
push_back(
"LOAD_BLOCK:OVERLAP_STORE" );
291 core_caa_events.
push_back(
"LOAD_BLOCK:STA" );
292 core_caa_events.
push_back(
"LOAD_BLOCK:UNTIL_RETIRE" );
293 core_caa_events.
push_back(
"MEM_LOAD_RETIRED:DTLB_MISS" );
294 core_caa_events.
push_back(
"MEM_LOAD_RETIRED:L1D_LINE_MISS" );
295 core_caa_events.
push_back(
"MEM_LOAD_RETIRED:L2_LINE_MISS" );
296 core_caa_events.
push_back(
"MISPREDICTED_BRANCH_RETIRED" );
299 core_caa_events.
push_back(
"RS_UOPS_DISPATCHED CMASK=1 INV=1" );
300 core_caa_events.
push_back(
"SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE" );
301 core_caa_events.
push_back(
"UNHALTED_CORE_CYCLES" );
308 nhm_caa_events.
push_back(
"ARITH:CYCLES_DIV_BUSY" );
309 nhm_caa_events.
push_back(
"BR_INST_EXEC:ANY" );
310 nhm_caa_events.
push_back(
"BR_INST_EXEC:DIRECT_NEAR_CALL" );
311 nhm_caa_events.
push_back(
"BR_INST_EXEC:INDIRECT_NEAR_CALL" );
312 nhm_caa_events.
push_back(
"BR_INST_EXEC:INDIRECT_NON_CALL" );
313 nhm_caa_events.
push_back(
"BR_INST_EXEC:NEAR_CALLS" );
314 nhm_caa_events.
push_back(
"BR_INST_EXEC:NON_CALLS" );
315 nhm_caa_events.
push_back(
"BR_INST_EXEC:RETURN_NEAR" );
316 nhm_caa_events.
push_back(
"BR_INST_RETIRED:ALL_BRANCHES" );
317 nhm_caa_events.
push_back(
"BR_INST_RETIRED:CONDITIONAL" );
318 nhm_caa_events.
push_back(
"BR_INST_RETIRED:NEAR_CALL" );
319 nhm_caa_events.
push_back(
"BR_MISP_EXEC:ANY" );
320 nhm_caa_events.
push_back(
"CPU_CLK_UNHALTED:THREAD_P" );
321 nhm_caa_events.
push_back(
"DTLB_LOAD_MISSES:WALK_COMPLETED" );
322 nhm_caa_events.
push_back(
"INST_RETIRED:ANY_P" );
323 nhm_caa_events.
push_back(
"ITLB_MISSES:WALK_COMPLETED" );
324 nhm_caa_events.
push_back(
"L2_RQSTS:IFETCH_HIT" );
325 nhm_caa_events.
push_back(
"L2_RQSTS:IFETCH_MISS" );
326 nhm_caa_events.
push_back(
"MEM_INST_RETIRED:LOADS" );
327 nhm_caa_events.
push_back(
"MEM_INST_RETIRED:STORES" );
328 nhm_caa_events.
push_back(
"MEM_LOAD_RETIRED:L2_HIT" );
329 nhm_caa_events.
push_back(
"MEM_LOAD_RETIRED:L3_MISS" );
330 nhm_caa_events.
push_back(
"MEM_LOAD_RETIRED:L3_UNSHARED_HIT" );
331 nhm_caa_events.
push_back(
"MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM" );
332 nhm_caa_events.
push_back(
"MEM_UNCORE_RETIRED:LOCAL_DRAM" );
333 nhm_caa_events.
push_back(
"MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM" );
334 nhm_caa_events.
push_back(
"MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT" );
335 nhm_caa_events.
push_back(
"MEM_UNCORE_RETIRED:REMOTE_DRAM" );
336 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM" );
337 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM" );
338 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP" );
339 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD" );
340 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM" );
341 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT" );
342 nhm_caa_events.
push_back(
"RESOURCE_STALLS:ANY" );
343 nhm_caa_events.
push_back(
"SSEX_UOPS_RETIRED:PACKED_DOUBLE" );
344 nhm_caa_events.
push_back(
"SSEX_UOPS_RETIRED:PACKED_SINGLE" );
345 nhm_caa_events.
push_back(
"UOPS_DECODED:MS CMASK=1" );
346 nhm_caa_events.
push_back(
"UOPS_ISSUED:ANY CMASK=1 INV=1" );
347 nhm_caa_events.
push_back(
"ITLB_MISS_RETIRED" );
348 nhm_caa_events.
push_back(
"UOPS_RETIRED:ANY" );
353 if ( find( C_events.
begin(), C_events.
end(), ( *it ) ) == C_events.
end() ) {
354 fprintf(
stderr,
"ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
363 if ( find( C_events.
begin(), C_events.
end(), ( *it ) ) == C_events.
end() ) {
364 fprintf(
stderr,
"ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
372 core_caa_events_displ.
push_back(
"Total Cycles" );
373 core_caa_events_displ.
push_back(
"Stalled Cycles" );
374 core_caa_events_displ.
push_back(
"% of Total Cycles" );
375 core_caa_events_displ.
push_back(
"Instructions Retired" );
376 core_caa_events_displ.
push_back(
"CPI" );
378 core_caa_events_displ.
push_back(
"iMargin" );
379 core_caa_events_displ.
push_back(
"iFactor" );
381 core_caa_events_displ.
push_back(
"Counted Stalled Cycles" );
383 core_caa_events_displ.
push_back(
"L2 Miss Impact" );
384 core_caa_events_displ.
push_back(
"L2 Miss % of counted Stalled Cycles" );
386 core_caa_events_displ.
push_back(
"L2 Hit Impact" );
387 core_caa_events_displ.
push_back(
"L2 Hit % of counted Stalled Cycles" );
389 core_caa_events_displ.
push_back(
"L1 DTLB Miss Impact" );
390 core_caa_events_displ.
push_back(
"L1 DTLB Miss % of counted Stalled Cycles" );
392 core_caa_events_displ.
push_back(
"LCP Stalls Impact" );
393 core_caa_events_displ.
push_back(
"LCP Stalls % of counted Stalled Cycles" );
395 core_caa_events_displ.
push_back(
"Store-Fwd Stalls Impact" );
396 core_caa_events_displ.
push_back(
"Store-Fwd Stalls % of counted Stalled Cycles" );
398 core_caa_events_displ.
push_back(
"Loads Blocked by Unknown Address Store Impact" );
399 core_caa_events_displ.
push_back(
"Loads Blocked % of Store-Fwd Stalls Cycles" );
400 core_caa_events_displ.
push_back(
"Loads Overlapped with Stores Impact" );
401 core_caa_events_displ.
push_back(
"Loads Overlapped % of Store-Fwd Stalls Cycles" );
402 core_caa_events_displ.
push_back(
"Loads Spanning across Cache Lines Impact" );
403 core_caa_events_displ.
push_back(
"Loads Spanning % of Store-Fwd Stalls Cycles" );
405 core_caa_events_displ.
push_back(
"Load Instructions" );
406 core_caa_events_displ.
push_back(
"Load % of all Instructions" );
407 core_caa_events_displ.
push_back(
"Store Instructions" );
408 core_caa_events_displ.
push_back(
"Store % of all Instructions" );
409 core_caa_events_displ.
push_back(
"Branch Instructions" );
410 core_caa_events_displ.
push_back(
"Branch % of all Instructions" );
411 core_caa_events_displ.
push_back(
"Packed SIMD Computational Instructions" );
412 core_caa_events_displ.
push_back(
"Packed SIMD % of all Instructions" );
413 core_caa_events_displ.
push_back(
"Other Instructions" );
414 core_caa_events_displ.
push_back(
"Other % of all Instructions" );
416 core_caa_events_displ.
push_back(
"ITLB Miss Rate in %" );
417 core_caa_events_displ.
push_back(
"% of Mispredicted Branches" );
423 ( it->second )[
"Total Cycles"] = ( it->second )[
"UNHALTED_CORE_CYCLES"];
424 ( it->second )[
"Stalled Cycles"] = ( it->second )[
"RS_UOPS_DISPATCHED CMASK=1 INV=1"];
425 ( it->second )[
"L2 Miss Impact"] = ( it->second )[
"MEM_LOAD_RETIRED:L2_LINE_MISS"] *
CORE_L2_MISS_CYCLES;
426 ( it->second )[
"L2 Hit Impact"] =
427 ( ( it->second )[
"MEM_LOAD_RETIRED:L1D_LINE_MISS"] - ( it->second )[
"MEM_LOAD_RETIRED:L2_LINE_MISS"] ) *
431 ( it->second )[
"Loads Blocked by Unknown Address Store Impact"] =
433 ( it->second )[
"Loads Overlapped with Stores Impact"] =
435 ( it->second )[
"Loads Spanning across Cache Lines Impact"] =
437 ( it->second )[
"Store-Fwd Stalls Impact"] = ( it->second )[
"Loads Blocked by Unknown Address Store Impact"] +
438 ( it->second )[
"Loads Overlapped with Stores Impact"] +
439 ( it->second )[
"Loads Spanning across Cache Lines Impact"];
440 ( it->second )[
"Counted Stalled Cycles"] =
441 ( it->second )[
"L2 Miss Impact"] + ( it->second )[
"L2 Hit Impact"] + ( it->second )[
"LCP Stalls Impact"] +
442 ( it->second )[
"L1 DTLB Miss Impact"] + ( it->second )[
"Store-Fwd Stalls Impact"];
443 ( it->second )[
"Instructions Retired"] = ( it->second )[
"INSTRUCTIONS_RETIRED"];
444 ( it->second )[
"ITLB Miss Rate in %"] =
445 ( ( it->second )[
"ITLB_MISS_RETIRED"] / ( it->second )[
"INSTRUCTIONS_RETIRED"] ) * 100;
446 ( it->second )[
"Branch Instructions"] = ( it->second )[
"BRANCH_INSTRUCTIONS_RETIRED"];
447 ( it->second )[
"Load Instructions"] = ( it->second )[
"INST_RETIRED:LOADS"];
448 ( it->second )[
"Store Instructions"] = ( it->second )[
"INST_RETIRED:STORES"];
449 ( it->second )[
"Other Instructions"] = ( it->second )[
"INST_RETIRED:OTHER"] -
450 ( it->second )[
"SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] -
451 ( it->second )[
"BRANCH_INSTRUCTIONS_RETIRED"];
452 ( it->second )[
"% of Mispredicted Branches"] =
453 ( ( it->second )[
"MISPREDICTED_BRANCH_RETIRED"] / ( it->second )[
"BRANCH_INSTRUCTIONS_RETIRED"] ) * 100;
454 ( it->second )[
"Packed SIMD Computational Instructions"] =
455 ( it->second )[
"SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"];
456 ( it->second )[
"Counted Instructions Retired"] =
457 ( it->second )[
"Branch Instructions"] + ( it->second )[
"Load Instructions"] +
458 ( it->second )[
"Store Instructions"] + ( it->second )[
"Other Instructions"] +
459 ( it->second )[
"Packed SIMD Computational Instructions"];
460 ( it->second )[
"CPI"] = ( it->second )[
"UNHALTED_CORE_CYCLES"] / ( it->second )[
"INSTRUCTIONS_RETIRED"];
462 double localPerformanceImprovement = ( it->second )[
"CPI"] /
EXPECTED_CPI;
463 double cyclesAfterImprovement = ( it->second )[
"UNHALTED_CORE_CYCLES"] / localPerformanceImprovement;
464 double totalCyclesAfterImprovement = totalCycles - ( it->second )[
"UNHALTED_CORE_CYCLES"] + cyclesAfterImprovement;
465 ( it->second )[
"iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
467 ( it->second )[
"% of Total Cycles"] =
468 ( it->second )[
"RS_UOPS_DISPATCHED CMASK=1 INV=1"] * 100 / ( it->second )[
"UNHALTED_CORE_CYCLES"];
469 ( it->second )[
"L2 Miss % of counted Stalled Cycles"] =
470 ( it->second )[
"L2 Miss Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles"];
471 ( it->second )[
"L2 Hit % of counted Stalled Cycles"] =
472 ( it->second )[
"L2 Hit Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles"];
473 ( it->second )[
"L1 DTLB Miss % of counted Stalled Cycles"] =
474 ( it->second )[
"L1 DTLB Miss Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles"];
475 ( it->second )[
"LCP Stalls % of counted Stalled Cycles"] =
476 ( it->second )[
"LCP Stalls Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles"];
477 ( it->second )[
"Store-Fwd Stalls % of counted Stalled Cycles"] =
478 ( it->second )[
"Store-Fwd Stalls Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles"];
479 ( it->second )[
"Loads Blocked % of Store-Fwd Stalls Cycles"] =
480 ( it->second )[
"Loads Blocked by Unknown Address Store Impact"] * 100 /
481 ( it->second )[
"Store-Fwd Stalls Impact"];
482 ( it->second )[
"Loads Overlapped % of Store-Fwd Stalls Cycles"] =
483 ( it->second )[
"Loads Overlapped with Stores Impact"] * 100 / ( it->second )[
"Store-Fwd Stalls Impact"];
484 ( it->second )[
"Loads Spanning % of Store-Fwd Stalls Cycles"] =
485 ( it->second )[
"Loads Spanning across Cache Lines Impact"] * 100 / ( it->second )[
"Store-Fwd Stalls Impact"];
487 ( it->second )[
"Load % of all Instructions"] =
488 ( it->second )[
"INST_RETIRED:LOADS"] * 100 / ( it->second )[
"Counted Instructions Retired"];
489 ( it->second )[
"Store % of all Instructions"] =
490 ( it->second )[
"INST_RETIRED:STORES"] * 100 / ( it->second )[
"Counted Instructions Retired"];
491 ( it->second )[
"Branch % of all Instructions"] =
492 ( it->second )[
"BRANCH_INSTRUCTIONS_RETIRED"] * 100 / ( it->second )[
"Counted Instructions Retired"];
493 ( it->second )[
"Packed SIMD % of all Instructions"] =
494 ( it->second )[
"SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] * 100 /
495 ( it->second )[
"Counted Instructions Retired"];
496 ( it->second )[
"Other % of all Instructions"] =
497 ( it->second )[
"Other Instructions"] * 100 / ( it->second )[
"Counted Instructions Retired"];
502 nhm_caa_events_displ.
push_back(
"Total Cycles" );
503 nhm_caa_events_displ.
push_back(
"Instructions Retired" );
506 nhm_caa_events_displ.
push_back(
"iMargin" );
507 nhm_caa_events_displ.
push_back(
"iFactor" );
509 nhm_caa_events_displ.
push_back(
"Stalled Cycles" );
510 nhm_caa_events_displ.
push_back(
"% of Total Cycles" );
511 nhm_caa_events_displ.
push_back(
"Total Counted Stalled Cycles" );
513 nhm_caa_events_displ.
push_back(
"Instruction Starvation % of Total Cycles" );
514 nhm_caa_events_displ.
push_back(
"# of Instructions per Call" );
515 nhm_caa_events_displ.
push_back(
"% of Total Cycles spent handling FP exceptions" );
517 nhm_caa_events_displ.
push_back(
"Counted Stalled Cycles due to Load Ops" );
519 nhm_caa_events_displ.
push_back(
"L2 Hit Impact" );
520 nhm_caa_events_displ.
push_back(
"L2 Hit % of Load Stalls" );
522 nhm_caa_events_displ.
push_back(
"L3 Unshared Hit Impact" );
523 nhm_caa_events_displ.
push_back(
"L3 Unshared Hit % of Load Stalls" );
525 nhm_caa_events_displ.
push_back(
"L2 Other Core Hit Impact" );
526 nhm_caa_events_displ.
push_back(
"L2 Other Core Hit % of Load Stalls" );
528 nhm_caa_events_displ.
push_back(
"L2 Other Core Hit Modified Impact" );
529 nhm_caa_events_displ.
push_back(
"L2 Other Core Hit Modified % of Load Stalls" );
531 nhm_caa_events_displ.
push_back(
"L3 Miss -> Local DRAM Hit Impact" );
532 nhm_caa_events_displ.
push_back(
"L3 Miss -> Remote DRAM Hit Impact" );
533 nhm_caa_events_displ.
push_back(
"L3 Miss -> Remote Cache Hit Impact" );
534 nhm_caa_events_displ.
push_back(
"L3 Miss -> Total Impact" );
535 nhm_caa_events_displ.
push_back(
"L3 Miss % of Load Stalls" );
537 nhm_caa_events_displ.
push_back(
"L1 DTLB Miss Impact" );
538 nhm_caa_events_displ.
push_back(
"L1 DTLB Miss % of Load Stalls" );
540 nhm_caa_events_displ.
push_back(
"Cycles spent during DIV & SQRT Ops" );
541 nhm_caa_events_displ.
push_back(
"DIV & SQRT Ops % of counted Stalled Cycles" );
543 nhm_caa_events_displ.
push_back(
"Total L2 IFETCH misses" );
544 nhm_caa_events_displ.
push_back(
"% of L2 IFETCH misses" );
546 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by Local DRAM" );
547 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by L3 (Modified)" );
548 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by L3 (Clean Snoop)" );
549 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by Remote L2" );
550 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by Remote DRAM" );
551 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by L3 (No Snoop)" );
553 nhm_caa_events_displ.
push_back(
"Total L2 IFETCH miss Impact" );
555 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by Local DRAM" );
556 nhm_caa_events_displ.
push_back(
"Local DRAM IFECTHes % Impact" );
558 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by L3 (Modified)" );
559 nhm_caa_events_displ.
push_back(
"L3 (Modified) IFECTHes % Impact" );
561 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by L3 (Clean Snoop)" );
562 nhm_caa_events_displ.
push_back(
"L3 (Clean Snoop) IFECTHes % Impact" );
564 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by Remote L2" );
565 nhm_caa_events_displ.
push_back(
"Remote L2 IFECTHes % Impact" );
567 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by Remote DRAM" );
568 nhm_caa_events_displ.
push_back(
"Remote DRAM IFECTHes % Impact" );
570 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by L3 (No Snoop)" );
571 nhm_caa_events_displ.
push_back(
"L3 (No Snoop) IFECTHes % Impact" );
573 nhm_caa_events_displ.
push_back(
"Total Branch Instructions Executed" );
574 nhm_caa_events_displ.
push_back(
"% of Mispredicted Branches" );
576 nhm_caa_events_displ.
push_back(
"Direct Near Calls % of Total Branches Executed" );
577 nhm_caa_events_displ.
push_back(
"Indirect Near Calls % of Total Branches Executed" );
578 nhm_caa_events_displ.
push_back(
"Indirect Near Non-Calls % of Total Branches Executed" );
579 nhm_caa_events_displ.
push_back(
"All Near Calls % of Total Branches Executed" );
580 nhm_caa_events_displ.
push_back(
"All Non Calls % of Total Branches Executed" );
581 nhm_caa_events_displ.
push_back(
"All Returns % of Total Branches Executed" );
583 nhm_caa_events_displ.
push_back(
"Total Branch Instructions Retired" );
584 nhm_caa_events_displ.
push_back(
"Conditionals % of Total Branches Retired" );
585 nhm_caa_events_displ.
push_back(
"Near Calls % of Total Branches Retired" );
587 nhm_caa_events_displ.
push_back(
"L1 ITLB Miss Impact" );
588 nhm_caa_events_displ.
push_back(
"ITLB Miss Rate in %" );
590 nhm_caa_events_displ.
push_back(
"Branch Instructions" );
591 nhm_caa_events_displ.
push_back(
"Branch % of all Instructions" );
593 nhm_caa_events_displ.
push_back(
"Load Instructions" );
594 nhm_caa_events_displ.
push_back(
"Load % of all Instructions" );
596 nhm_caa_events_displ.
push_back(
"Store Instructions" );
597 nhm_caa_events_displ.
push_back(
"Store % of all Instructions" );
599 nhm_caa_events_displ.
push_back(
"Other Instructions" );
600 nhm_caa_events_displ.
push_back(
"Other % of all Instructions" );
602 nhm_caa_events_displ.
push_back(
"Packed UOPS Retired" );
603 nhm_caa_events_displ.
push_back(
"Packed % of all UOPS Retired" );
609 ( it->second )[
"Total Cycles"] = ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"];
611 ( it->second )[
"L2 Hit Impact"] = ( it->second )[
"MEM_LOAD_RETIRED:L2_HIT"] *
I7_L2_HIT_CYCLES;
612 ( it->second )[
"L3 Unshared Hit Impact"] =
614 if ( ( it->second )[
"MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] >
615 ( it->second )[
"MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) {
616 ( it->second )[
"L2 Other Core Hit Impact"] = ( ( it->second )[
"MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] -
617 ( it->second )[
"MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) *
620 ( it->second )[
"L2 Other Core Hit Impact"] = 0.0;
622 ( it->second )[
"L2 Other Core Hit Modified Impact"] =
624 ( it->second )[
"L3 Miss -> Local DRAM Hit Impact"] =
626 ( it->second )[
"L3 Miss -> Remote DRAM Hit Impact"] =
628 ( it->second )[
"L3 Miss -> Remote Cache Hit Impact"] =
630 ( it->second )[
"L3 Miss -> Total Impact"] = ( it->second )[
"L3 Miss -> Local DRAM Hit Impact"] +
631 ( it->second )[
"L3 Miss -> Remote DRAM Hit Impact"] +
632 ( it->second )[
"L3 Miss -> Remote Cache Hit Impact"];
633 ( it->second )[
"L1 DTLB Miss Impact"] =
635 ( it->second )[
"Counted Stalled Cycles due to Load Ops"] =
636 ( it->second )[
"L3 Miss -> Total Impact"] + ( it->second )[
"L2 Hit Impact"] +
637 ( it->second )[
"L1 DTLB Miss Impact"] + ( it->second )[
"L3 Unshared Hit Impact"] +
638 ( it->second )[
"L2 Other Core Hit Modified Impact"] + ( it->second )[
"L2 Other Core Hit Impact"];
639 ( it->second )[
"Cycles spent during DIV & SQRT Ops"] = ( it->second )[
"ARITH:CYCLES_DIV_BUSY"];
640 ( it->second )[
"Total Counted Stalled Cycles"] =
641 ( it->second )[
"Counted Stalled Cycles due to Load Ops"] + ( it->second )[
"Cycles spent during DIV & SQRT Ops"];
642 ( it->second )[
"Stalled Cycles"] =
643 ( it->second )[
"Total Counted Stalled Cycles"];
644 ( it->second )[
"% of Total Cycles"] =
645 ( it->second )[
"Stalled Cycles"] * 100 / ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"];
646 ( it->second )[
"L3 Miss % of Load Stalls"] =
647 ( it->second )[
"L3 Miss -> Total Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
648 ( it->second )[
"L2 Hit % of Load Stalls"] =
649 ( it->second )[
"L2 Hit Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
650 ( it->second )[
"L1 DTLB Miss % of Load Stalls"] =
651 ( it->second )[
"L1 DTLB Miss Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
652 ( it->second )[
"L3 Unshared Hit % of Load Stalls"] =
653 ( it->second )[
"L3 Unshared Hit Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
654 ( it->second )[
"L2 Other Core Hit % of Load Stalls"] =
655 ( it->second )[
"L2 Other Core Hit Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
656 ( it->second )[
"L2 Other Core Hit Modified % of Load Stalls"] =
657 ( it->second )[
"L2 Other Core Hit Modified Impact"] * 100 /
658 ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
659 ( it->second )[
"DIV & SQRT Ops % of counted Stalled Cycles"] =
660 ( it->second )[
"Cycles spent during DIV & SQRT Ops"] * 100 / ( it->second )[
"Total Counted Stalled Cycles"];
662 ( it->second )[
"Cycles IFETCH served by Local DRAM"] =
664 ( it->second )[
"Cycles IFETCH served by L3 (Modified)"] =
666 ( it->second )[
"Cycles IFETCH served by L3 (Clean Snoop)"] =
668 ( it->second )[
"Cycles IFETCH served by Remote L2"] =
670 ( it->second )[
"Cycles IFETCH served by Remote DRAM"] =
672 ( it->second )[
"Cycles IFETCH served by L3 (No Snoop)"] =
674 ( it->second )[
"Total L2 IFETCH miss Impact"] =
675 ( it->second )[
"Cycles IFETCH served by Local DRAM"] + ( it->second )[
"Cycles IFETCH served by L3 (Modified)"] +
676 ( it->second )[
"Cycles IFETCH served by L3 (Clean Snoop)"] +
677 ( it->second )[
"Cycles IFETCH served by Remote L2"] + ( it->second )[
"Cycles IFETCH served by Remote DRAM"] +
678 ( it->second )[
"Cycles IFETCH served by L3 (No Snoop)"];
679 ( it->second )[
"Local DRAM IFECTHes % Impact"] =
680 ( it->second )[
"Cycles IFETCH served by Local DRAM"] * 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
681 ( it->second )[
"L3 (Modified) IFECTHes % Impact"] =
682 ( it->second )[
"Cycles IFETCH served by L3 (Modified)"] * 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
683 ( it->second )[
"L3 (Clean Snoop) IFECTHes % Impact"] = ( it->second )[
"Cycles IFETCH served by L3 (Clean Snoop)"] *
684 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
685 ( it->second )[
"Remote L2 IFECTHes % Impact"] =
686 ( it->second )[
"Cycles IFETCH served by Remote L2"] * 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
687 ( it->second )[
"Remote DRAM IFECTHes % Impact"] =
688 ( it->second )[
"Cycles IFETCH served by Remote DRAM"] * 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
689 ( it->second )[
"L3 (No Snoop) IFECTHes % Impact"] =
690 ( it->second )[
"Cycles IFETCH served by L3 (No Snoop)"] * 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
691 ( it->second )[
"Total L2 IFETCH misses"] = ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
692 ( it->second )[
"% of IFETCHes served by Local DRAM"] =
693 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * 100 / ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
694 ( it->second )[
"% of IFETCHes served by L3 (Modified)"] =
695 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * 100 / ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
696 ( it->second )[
"% of IFETCHes served by L3 (Clean Snoop)"] =
697 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * 100 /
698 ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
699 ( it->second )[
"% of IFETCHes served by Remote L2"] =
700 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * 100 /
701 ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
702 ( it->second )[
"% of IFETCHes served by Remote DRAM"] =
703 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * 100 / ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
704 ( it->second )[
"% of IFETCHes served by L3 (No Snoop)"] =
705 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * 100 / ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
706 ( it->second )[
"% of L2 IFETCH misses"] =
707 ( it->second )[
"L2_RQSTS:IFETCH_MISS"] * 100 /
708 ( ( it->second )[
"L2_RQSTS:IFETCH_MISS"] + ( it->second )[
"L2_RQSTS:IFETCH_HIT"] );
709 ( it->second )[
"L1 ITLB Miss Impact"] =
712 ( it->second )[
"Total Branch Instructions Executed"] = ( it->second )[
"BR_INST_EXEC:ANY"];
713 ( it->second )[
"% of Mispredicted Branches"] =
714 ( it->second )[
"BR_MISP_EXEC:ANY"] * 100 / ( it->second )[
"BR_INST_EXEC:ANY"];
715 ( it->second )[
"Direct Near Calls % of Total Branches Executed"] =
716 ( it->second )[
"BR_INST_EXEC:DIRECT_NEAR_CALL"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
717 ( it->second )[
"Indirect Near Calls % of Total Branches Executed"] =
718 ( it->second )[
"BR_INST_EXEC:INDIRECT_NEAR_CALL"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
719 ( it->second )[
"Indirect Near Non-Calls % of Total Branches Executed"] =
720 ( it->second )[
"BR_INST_EXEC:INDIRECT_NON_CALL"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
721 ( it->second )[
"All Near Calls % of Total Branches Executed"] =
722 ( it->second )[
"BR_INST_EXEC:NEAR_CALLS"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
723 ( it->second )[
"All Non Calls % of Total Branches Executed"] =
724 ( it->second )[
"BR_INST_EXEC:NON_CALLS"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
725 ( it->second )[
"All Returns % of Total Branches Executed"] =
726 ( it->second )[
"BR_INST_EXEC:RETURN_NEAR"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
727 ( it->second )[
"Total Branch Instructions Retired"] = ( it->second )[
"BR_INST_RETIRED:ALL_BRANCHES"];
728 ( it->second )[
"Conditionals % of Total Branches Retired"] =
729 ( it->second )[
"BR_INST_RETIRED:CONDITIONAL"] * 100 / ( it->second )[
"Total Branch Instructions Retired"];
730 ( it->second )[
"Near Calls % of Total Branches Retired"] =
731 ( it->second )[
"BR_INST_RETIRED:NEAR_CALL"] * 100 / ( it->second )[
"Total Branch Instructions Retired"];
733 ( it->second )[
"Instruction Starvation % of Total Cycles"] =
734 ( ( it->second )[
"UOPS_ISSUED:ANY CMASK=1 INV=1"] - ( it->second )[
"RESOURCE_STALLS:ANY"] ) * 100 /
735 ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"];
736 ( it->second )[
"% of Total Cycles spent handling FP exceptions"] =
737 ( it->second )[
"UOPS_DECODED:MS CMASK=1"] * 100 / ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"];
738 ( it->second )[
"# of Instructions per Call"] =
739 ( it->second )[
"INST_RETIRED:ANY_P"] / ( it->second )[
"BR_INST_EXEC:NEAR_CALLS"];
741 ( it->second )[
"Instructions Retired"] = ( it->second )[
"INST_RETIRED:ANY_P"];
742 ( it->second )[
"ITLB Miss Rate in %"] =
743 ( ( it->second )[
"ITLB_MISS_RETIRED"] / ( it->second )[
"INST_RETIRED:ANY_P"] ) * 100;
745 ( it->second )[
"Branch Instructions"] = ( it->second )[
"BR_INST_RETIRED:ALL_BRANCHES"];
746 ( it->second )[
"Load Instructions"] = ( it->second )[
"MEM_INST_RETIRED:LOADS"];
747 ( it->second )[
"Store Instructions"] = ( it->second )[
"MEM_INST_RETIRED:STORES"];
748 ( it->second )[
"Other Instructions"] =
749 ( it->second )[
"Instructions Retired"] - ( it->second )[
"MEM_INST_RETIRED:LOADS"] -
750 ( it->second )[
"MEM_INST_RETIRED:STORES"] - ( it->second )[
"BR_INST_RETIRED:ALL_BRANCHES"];
751 ( it->second )[
"Packed UOPS Retired"] =
752 ( it->second )[
"SSEX_UOPS_RETIRED:PACKED_DOUBLE"] + ( it->second )[
"SSEX_UOPS_RETIRED:PACKED_SINGLE"];
753 ( it->second )[
"CPI"] = ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"] / ( it->second )[
"INST_RETIRED:ANY_P"];
755 double localPerformanceImprovement = ( it->second )[
"CPI"] /
EXPECTED_CPI;
756 double cyclesAfterImprovement = ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"] / localPerformanceImprovement;
757 double totalCyclesAfterImprovement =
758 totalCycles - ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"] + cyclesAfterImprovement;
759 ( it->second )[
"iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
761 ( it->second )[
"Load % of all Instructions"] =
762 ( it->second )[
"MEM_INST_RETIRED:LOADS"] * 100 / ( it->second )[
"INST_RETIRED:ANY_P"];
763 ( it->second )[
"Store % of all Instructions"] =
764 ( it->second )[
"MEM_INST_RETIRED:STORES"] * 100 / ( it->second )[
"INST_RETIRED:ANY_P"];
765 ( it->second )[
"Branch % of all Instructions"] =
766 ( it->second )[
"BR_INST_RETIRED:ALL_BRANCHES"] * 100 / ( it->second )[
"INST_RETIRED:ANY_P"];
767 ( it->second )[
"Other % of all Instructions"] =
768 ( it->second )[
"Other Instructions"] * 100 / ( it->second )[
"INST_RETIRED:ANY_P"];
770 ( it->second )[
"Packed % of all UOPS Retired"] =
771 ( it->second )[
"Packed UOPS Retired"] * 100 / ( it->second )[
"UOPS_RETIRED:ANY"];
798 void init(
const char*
name,
const char* architecture,
const char* event_name,
unsigned int c_mask,
799 unsigned int inv_mask,
unsigned int smpl_period ) {
822 [](
const auto& lhs,
const auto& rhs ) { return lhs.second < rhs.second; } );
824 strcpy(
index, ( max_pos->first ).c_str() );
825 value = max_pos->second;
840 for (
int i = 0; i <
n; i++ ) {
843 strcat( s_mod,
"<" );
846 strcat( s_mod,
">" );
849 strcat( s_mod,
"&" );
852 strcat( s_mod,
""" );
858 strcat( s_mod, to_app );
869 char* operator_string_begin =
const_cast<char*
>( strstr( demangled_symbol,
"operator" ) );
870 if ( operator_string_begin != NULL ) {
871 char* operator_string_end = operator_string_begin + 8;
872 while ( *operator_string_end ==
' ' ) operator_string_end++;
873 if ( strstr( operator_string_end,
"delete[]" ) == operator_string_end ) {
874 operator_string_end += 8;
875 *operator_string_end =
'\0';
876 }
else if ( strstr( operator_string_end,
"delete" ) == operator_string_end ) {
877 operator_string_end += 6;
878 *operator_string_end =
'\0';
879 }
else if ( strstr( operator_string_end,
"new[]" ) == operator_string_end ) {
880 operator_string_end += 5;
881 *operator_string_end =
'\0';
882 }
else if ( strstr( operator_string_end,
"new" ) == operator_string_end ) {
883 operator_string_end += 3;
884 *operator_string_end =
'\0';
885 }
else if ( strstr( operator_string_end,
">>=" ) == operator_string_end ) {
886 operator_string_end += 3;
887 *operator_string_end =
'\0';
888 }
else if ( strstr( operator_string_end,
"<<=" ) == operator_string_end ) {
889 operator_string_end += 3;
890 *operator_string_end =
'\0';
891 }
else if ( strstr( operator_string_end,
"->*" ) == operator_string_end ) {
892 operator_string_end += 3;
893 *operator_string_end =
'\0';
894 }
else if ( strstr( operator_string_end,
"<<" ) == operator_string_end ) {
895 operator_string_end += 2;
896 *operator_string_end =
'\0';
897 }
else if ( strstr( operator_string_end,
">>" ) == operator_string_end ) {
898 operator_string_end += 2;
899 *operator_string_end =
'\0';
900 }
else if ( strstr( operator_string_end,
">=" ) == operator_string_end ) {
901 operator_string_end += 2;
902 *operator_string_end =
'\0';
903 }
else if ( strstr( operator_string_end,
"<=" ) == operator_string_end ) {
904 operator_string_end += 2;
905 *operator_string_end =
'\0';
906 }
else if ( strstr( operator_string_end,
"==" ) == operator_string_end ) {
907 operator_string_end += 2;
908 *operator_string_end =
'\0';
909 }
else if ( strstr( operator_string_end,
"!=" ) == operator_string_end ) {
910 operator_string_end += 2;
911 *operator_string_end =
'\0';
912 }
else if ( strstr( operator_string_end,
"|=" ) == operator_string_end ) {
913 operator_string_end += 2;
914 *operator_string_end =
'\0';
915 }
else if ( strstr( operator_string_end,
"&=" ) == operator_string_end ) {
916 operator_string_end += 2;
917 *operator_string_end =
'\0';
918 }
else if ( strstr( operator_string_end,
"^=" ) == operator_string_end ) {
919 operator_string_end += 2;
920 *operator_string_end =
'\0';
921 }
else if ( strstr( operator_string_end,
"%=" ) == operator_string_end ) {
922 operator_string_end += 2;
923 *operator_string_end =
'\0';
924 }
else if ( strstr( operator_string_end,
"/=" ) == operator_string_end ) {
925 operator_string_end += 2;
926 *operator_string_end =
'\0';
927 }
else if ( strstr( operator_string_end,
"*=" ) == operator_string_end ) {
928 operator_string_end += 2;
929 *operator_string_end =
'\0';
930 }
else if ( strstr( operator_string_end,
"-=" ) == operator_string_end ) {
931 operator_string_end += 2;
932 *operator_string_end =
'\0';
933 }
else if ( strstr( operator_string_end,
"+=" ) == operator_string_end ) {
934 operator_string_end += 2;
935 *operator_string_end =
'\0';
936 }
else if ( strstr( operator_string_end,
"&&" ) == operator_string_end ) {
937 operator_string_end += 2;
938 *operator_string_end =
'\0';
939 }
else if ( strstr( operator_string_end,
"||" ) == operator_string_end ) {
940 operator_string_end += 2;
941 *operator_string_end =
'\0';
942 }
else if ( strstr( operator_string_end,
"[]" ) == operator_string_end ) {
943 operator_string_end += 2;
944 *operator_string_end =
'\0';
945 }
else if ( strstr( operator_string_end,
"()" ) == operator_string_end ) {
946 operator_string_end += 2;
947 *operator_string_end =
'\0';
948 }
else if ( strstr( operator_string_end,
"++" ) == operator_string_end ) {
949 operator_string_end += 2;
950 *operator_string_end =
'\0';
951 }
else if ( strstr( operator_string_end,
"--" ) == operator_string_end ) {
952 operator_string_end += 2;
953 *operator_string_end =
'\0';
954 }
else if ( strstr( operator_string_end,
"->" ) == operator_string_end ) {
955 operator_string_end += 2;
956 *operator_string_end =
'\0';
957 }
else if ( strstr( operator_string_end,
"<" ) == operator_string_end ) {
958 operator_string_end += 1;
959 *operator_string_end =
'\0';
960 }
else if ( strstr( operator_string_end,
">" ) == operator_string_end ) {
961 operator_string_end += 1;
962 *operator_string_end =
'\0';
963 }
else if ( strstr( operator_string_end,
"~" ) == operator_string_end ) {
964 operator_string_end += 1;
965 *operator_string_end =
'\0';
966 }
else if ( strstr( operator_string_end,
"!" ) == operator_string_end ) {
967 operator_string_end += 1;
968 *operator_string_end =
'\0';
969 }
else if ( strstr( operator_string_end,
"+" ) == operator_string_end ) {
970 operator_string_end += 1;
971 *operator_string_end =
'\0';
972 }
else if ( strstr( operator_string_end,
"-" ) == operator_string_end ) {
973 operator_string_end += 1;
974 *operator_string_end =
'\0';
975 }
else if ( strstr( operator_string_end,
"*" ) == operator_string_end ) {
976 operator_string_end += 1;
977 *operator_string_end =
'\0';
978 }
else if ( strstr( operator_string_end,
"/" ) == operator_string_end ) {
979 operator_string_end += 1;
980 *operator_string_end =
'\0';
981 }
else if ( strstr( operator_string_end,
"%" ) == operator_string_end ) {
982 operator_string_end += 1;
983 *operator_string_end =
'\0';
984 }
else if ( strstr( operator_string_end,
"^" ) == operator_string_end ) {
985 operator_string_end += 1;
986 *operator_string_end =
'\0';
987 }
else if ( strstr( operator_string_end,
"&" ) == operator_string_end ) {
988 operator_string_end += 1;
989 *operator_string_end =
'\0';
990 }
else if ( strstr( operator_string_end,
"|" ) == operator_string_end ) {
991 operator_string_end += 1;
992 *operator_string_end =
'\0';
993 }
else if ( strstr( operator_string_end,
"," ) == operator_string_end ) {
994 operator_string_end += 1;
995 *operator_string_end =
'\0';
996 }
else if ( strstr( operator_string_end,
"=" ) == operator_string_end ) {
997 operator_string_end += 1;
998 *operator_string_end =
'\0';
1000 return operator_string_begin;
1002 char* end_of_demangled_name =
const_cast<char*
>( strrchr( demangled_symbol,
')' ) );
1003 if ( end_of_demangled_name != NULL ) {
1006 while ( pars > 0 && end_of_demangled_name != demangled_symbol ) {
1007 c = *( --end_of_demangled_name );
1010 }
else if (
c ==
'(' ) {
1015 return demangled_symbol;
1017 char* end_of_func_name = end_of_demangled_name;
1018 if ( end_of_func_name != NULL ) {
1019 *end_of_func_name =
'\0';
1020 char c = *( --end_of_func_name );
1023 while ( pars > 0 && end_of_func_name != demangled_symbol ) {
1024 c = *( --end_of_func_name );
1027 }
else if (
c ==
'<' ) {
1031 *end_of_func_name =
'\0';
1033 c = *( --end_of_func_name );
1034 while ( isalnum(
c ) ||
c ==
'_' ||
c ==
'~' ) {
c = *( --end_of_func_name ); }
1035 return ++end_of_func_name;
1037 return demangled_symbol;
1051 strcpy( module_filename, dir );
1052 strcat( module_filename,
"/HTML/" );
1053 strcat( module_filename, module_name );
1054 strcat( module_filename,
".html" );
1057 strcpy( event, ( cur_module->
get_event() ).c_str() );
1060 if ( result == modules_tot_samples.
end() )
1062 if ( ( !strcmp( event,
"UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1063 ( !strcmp( event,
"CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1064 modules_tot_samples.
insert(
1069 module_file = fopen( module_filename,
"w" );
1070 if ( module_file == NULL ) {
1071 fprintf(
stderr,
"ERROR: Cannot create file %s!!!\naborting...\n", module_filename );
1074 fprintf( module_file,
"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
1075 "\"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1076 fprintf( module_file,
"<html>\n" );
1077 fprintf( module_file,
"<head>\n" );
1078 fprintf( module_file,
"<title>\n" );
1079 fprintf( module_file,
"%s\n", module_name );
1080 fprintf( module_file,
"</title>\n" );
1081 fprintf( module_file,
"</head>\n" );
1082 fprintf( module_file,
"<body>\n" );
1083 fprintf( module_file,
"<h2>%s</h2><br/>Events Sampled:<br/>\n", module_name );
1084 fprintf( module_file,
"<ul>\n" );
1086 fprintf( module_file,
"<li><a href=\"#%s\">%s</a></li>\n", it->c_str(), it->c_str() );
1088 fprintf( module_file,
"</ul>\n" );
1091 if ( ( !strcmp( event,
"UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1092 ( !strcmp( event,
"CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1095 module_file = fopen( module_filename,
"a" );
1099 strcpy( event_str, event );
1101 sprintf( event_str + strlen( event_str ),
" CMASK=%u", cur_module->
get_c_mask() );
1104 sprintf( event_str + strlen( event_str ),
" INV=%u", cur_module->
get_inv_mask() );
1106 fprintf( module_file,
"<a name=\"%s\"><a>\n", event_str );
1107 fprintf( module_file,
"<table cellpadding=\"5\">\n" );
1108 fprintf( module_file,
"<tr bgcolor=\"#EEEEEE\">\n" );
1109 fprintf( module_file,
1110 "<th colspan=\"6\" align=\"left\">%s -- cmask: %u -- invmask: %u -- Total Samples: %u -- "
1111 "Sampling Period: %u</th>\n",
1114 fprintf( module_file,
"</tr>\n" );
1115 fprintf( module_file,
"<tr bgcolor=\"#EEEEEE\">\n" );
1116 fprintf( module_file,
"<th align=\"left\">Samples</th>\n" );
1117 fprintf( module_file,
"<th align=\"left\">Percentage</th>\n" );
1118 fprintf( module_file,
"<th align=\"left\">Symbol Name</th>\n" );
1119 fprintf( module_file,
"<th align=\"left\">Library Name</th>\n" );
1120 fprintf( module_file,
"<th align=\"left\">Complete Signature</th>\n" );
1121 fprintf( module_file,
"<th align=\"left\">Library Pathname</th>\n" );
1122 fprintf( module_file,
"</tr>\n" );
1123 for (
int j = 0;
j < 20;
j++ ) {
1147 char* sym_end = strchr(
index,
'%' );
1148 if ( sym_end == NULL )
1150 fprintf(
stderr,
"ERROR: Invalid sym and lib name! : %s\naborting...\n",
index );
1153 memcpy( sym,
index, strlen(
index ) - strlen( sym_end ) );
1154 strcpy( lib, sym_end + 1 );
1157 strcpy( temp, sym );
1158 strcpy( simple_sym, (
func_name( temp ) ) );
1159 if ( strrchr( lib,
'/' ) != NULL && *( strrchr( lib,
'/' ) + 1 ) !=
'\0' ) {
1160 strcpy( simple_lib, strrchr( lib,
'/' ) + 1 );
1162 strcpy( simple_lib, lib );
1165 fprintf( module_file,
"<tr bgcolor=\"#FFFFCC\">\n" );
1167 fprintf( module_file,
"<tr bgcolor=\"#CCFFCC\">\n" );
1169 fprintf( module_file,
"<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%u</td>\n", value );
1170 fprintf( module_file,
"<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%f%%</td>\n",
1176 fprintf( module_file,
"<td style=\"font-family:courier;\">%s</td>\n", simple_sym_mod );
1177 fprintf( module_file,
"<td style=\"font-family:courier;\">%s</td>\n", simple_lib_mod );
1178 fprintf( module_file,
"<td style=\"font-family:courier;\">%s</td>\n", sym_mod );
1179 fprintf( module_file,
"<td style=\"font-family:courier;\">%s</td>\n</tr>\n", lib_mod );
1181 fprintf( module_file,
"</table><br/><br/>\n" );
1182 int res = fclose( module_file );
1184 fprintf(
stderr,
"ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1211 unsigned int module_num = 0;
1215 strcpy( path_name, dir );
1216 strcat( path_name,
"/" );
1218 gzFile res_file = gzopen( path_name,
"rb" );
1220 if ( res_file != NULL ) {
1226 sscanf(
line,
fmt.c_str(), arch, event, &cmask, &inv, &
sp );
1227 if ( !strcmp( arch,
"NHM" ) )
1234 if ( strchr(
line,
' ' ) == NULL )
1236 if ( module_num > 0 ) {
1238 cur_module->
clear();
1241 char* end_sym = strchr(
line,
'%' );
1242 if ( end_sym == NULL )
1244 fprintf(
stderr,
"ERROR: Invalid module name. \nLINE: %s\naborting...\n",
line );
1248 memcpy( cur_module_name,
line, strlen(
line ) - strlen( end_sym ) );
1249 cur_module->
init( cur_module_name, arch, event, cmask, inv,
sp );
1250 cur_module->
set_total( atoi( end_sym + 1 ) );
1254 unsigned int value = 0, libOffset = 0;
1265 sscanf(
line,
fmt.c_str(), symbol, libName, &libOffset, &value );
1266 char realPathName_s[FILENAME_MAX];
1267 bzero( realPathName_s, FILENAME_MAX );
1268 char* realPathName = realpath( libName, realPathName_s );
1269 if ( realPathName != NULL && strlen( realPathName ) > 0 ) {
1271 result = libsInfo.find( realPathName );
1272 if ( result == libsInfo.end() ) { libsInfo[realPathName] =
FileInfo( realPathName,
true ); }
1273 const char* temp_sym = libsInfo[realPathName].symbolByOffset( libOffset );
1274 if ( temp_sym != NULL && strlen( temp_sym ) > 0 ) {
1276 char* demangled_symbol = abi::__cxa_demangle( temp_sym, NULL, NULL, &status );
1277 if ( status == 0 ) {
1278 strcpy( final_sym, demangled_symbol );
1279 free( demangled_symbol );
1281 strcpy( final_sym, temp_sym );
1284 strcpy( final_sym,
"???" );
1286 strcpy( final_lib, realPathName );
1288 strcpy( final_sym, symbol );
1289 strcpy( final_lib, libName );
1293 strcpy(
index, final_sym );
1294 strcat(
index,
"%" );
1295 strcat(
index, final_lib );
1301 cur_module->
clear();
1302 gzclose( res_file );
1305 fprintf(
stderr,
"ERROR: Unable to open input file: %s\naborting...\n",
filename );
1327 strcpy( path_name, dir );
1328 strcat( path_name,
"/" );
1330 gzFile res_file = gzopen( path_name,
"rb" );
1331 if ( res_file != NULL ) {
1339 sscanf(
line,
fmt.c_str(), arch, event, cmask_str, inv_str, sp_str );
1341 if ( atoi( cmask_str ) > 0 ) {
1342 event_str +=
" CMASK=";
1343 event_str += cmask_str;
1345 if ( atoi( inv_str ) > 0 ) {
1346 event_str +=
" INV=";
1347 event_str += inv_str;
1352 fprintf(
stderr,
"ERROR: Unable to open input file: %s\naborting...\n",
filename );
1355 gzclose( res_file );
1364 i != modules_tot_samples.
end(); ++i ) {
1366 strcpy( module_filename, dir );
1367 strcat( module_filename,
"/HTML/" );
1368 strcat( module_filename, ( i->first ).c_str() );
1369 strcat( module_filename,
".html" );
1370 FILE* module_file = fopen( module_filename,
"a" );
1371 if ( module_file == NULL ) {
1372 fprintf(
stderr,
"ERROR: Unable to append to file: %s\naborting...\n", module_filename );
1375 fprintf( module_file,
"</body>\n</html>\n" );
1376 if ( fclose( module_file ) ) {
1377 fprintf(
stderr,
"ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1404 int number_of_modules = 0;
1406 int no_of_values = 0;
1409 strcpy( path_name, dir );
1410 strcat( path_name,
"/" );
1412 FILE* fp = fopen( path_name,
"r" );
1416 int stat = fscanf( fp, fmt1.
c_str(), arch, event, cmask_str, inv_str, sp_str );
1421 if ( !strcmp( arch,
"NHM" ) )
1426 if ( atoi( cmask_str ) > 0 ) {
1427 event_str +=
" CMASK=";
1428 event_str += cmask_str;
1430 if ( atoi( inv_str ) > 0 ) {
1431 event_str +=
" INV=";
1432 event_str += inv_str;
1436 while ( fscanf( fp, fmt2.
c_str(),
line ) != EOF ) {
1437 if ( isalpha(
line[0] ) )
1439 if ( number_of_modules > 0 ) {
1440 C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values;
1444 strcpy( cur_module_name,
line );
1445 number_of_modules++;
1446 }
else if ( isdigit(
line[0] ) )
1448 cur_sum += strtol(
line, NULL, 10 );
1452 C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values;
1454 return number_of_modules;
1460 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1461 fprintf( fp,
"<html>\n" );
1462 fprintf( fp,
"<head>\n" );
1463 fprintf( fp,
"<title>\n" );
1464 fprintf( fp,
"Analysis Result\n" );
1465 fprintf( fp,
"</title>\n" );
1466 fprintf( fp,
"<script src=\"sorttable.js\"></script>\n" );
1467 fprintf( fp,
"<style>\ntable.sortable thead "
1468 "{\nbackground-color:#eee;\ncolor:#666666;\nfont-weight:bold;\ncursor:default;\nfont-family:courier;\n}"
1470 fprintf( fp,
"</head>\n" );
1471 fprintf( fp,
"<body link=\"black\">\n" );
1472 fprintf( fp,
"<h1>RESULTS:</h1><br/>Click for detailed symbol view...<p/>\n" );
1473 fprintf( fp,
"<table class=\"sortable\" cellpadding=\"5\">\n" );
1474 fprintf( fp,
"<tr>\n" );
1475 fprintf( fp,
"<th>MODULE NAME</th>\n" );
1477 if ( strlen( it->c_str() ) == 0 )
1478 fprintf( fp,
"<th bgcolor=\"#FFFFFF\"> </th>\n" );
1480 fprintf( fp,
"<th>%s</th>\n", ( *it ).c_str() );
1482 fprintf( fp,
"</tr>\n" );
1491 fprintf( fp,
"<tr bgcolor=\"#FFFFCC\">\n" );
1493 fprintf( fp,
"<tr bgcolor=\"#CCFFCC\">\n" );
1494 fprintf( fp,
"<td style=\"font-family:monospace;font-size:large;color:Black\"><a href=\"%s.html\">%s</a></td>\n",
1495 ( it->first ).c_str(), ( it->first ).c_str() );
1497 if ( strlen( jt->c_str() ) == 0 ) {
1498 fprintf( fp,
"<td bgcolor=\"#FFFFFF\"> </td>" );
1500 if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1501 fprintf(
stderr,
"ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1504 fprintf( fp,
"<td style=\"font-family:monospace;font-size:large;color:DarkBlue\" align=\"right\">%.2f</td>\n",
1505 ( it->second )[*jt] );
1508 fprintf( fp,
"</tr>\n" );
1514 fprintf( fp,
"</table>\n</body>\n</html>\n" );
1519 fprintf( fp,
"MODULE NAME" );
1521 if ( strlen( it->c_str() ) == 0 ) {
1523 fprintf( fp,
",%s", ( *it ).c_str() );
1525 fprintf( fp,
"\n" );
1532 fprintf( fp,
"%s", ( it->first ).c_str() );
1534 if ( strlen( jt->c_str() ) == 0 ) {
1536 if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1537 fprintf(
stderr,
"ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1540 fprintf( fp,
",%.2f", ( it->second )[*jt] );
1543 fprintf( fp,
"\n" );
1556 double counter_value;
1559 counter_value = ( it->second )[field];
1560 if ( max < counter_value ) max = counter_value;
1562 if ( value > 0 && max > 0 && normalizeTo > 0 ) {
1563 return 1. * value / max * normalizeTo;
1578 1. -
normalize(
"Packed % of all UOPS Retired", ( it->second )[
"Packed % of all UOPS Retired"], 1 );
1579 double misspnorm =
normalize(
"% of Mispredicted Branches", ( it->second )[
"% of Mispredicted Branches"], 1 );
1580 double stallnorm =
normalize(
"Stalled Cycles", ( it->second )[
"Stalled Cycles"], 1 );
1581 ( it->second )[
"iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1587 1. -
normalize(
"Packed SIMD % of all Instructions", ( it->second )[
"Packed SIMD % of all Instructions"], 1 );
1588 double misspnorm =
normalize(
"% of Mispredicted Branches", ( it->second )[
"% of Mispredicted Branches"], 1 );
1589 double stallnorm =
normalize(
"Stalled Cycles", ( it->second )[
"Stalled Cycles"], 1 );
1590 ( it->second )[
"iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1604 sum += ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"];
1609 sum += ( it->second )[
"UNHALTED_CORE_CYCLES"];
1619 if ( argc < 2 || argc > 4 ) {
1620 printf(
"\n\nUsage: %s DIRECTORY [--caa] [--csv]\n\n",
argv[0] );
1626 for (
int i = 2; i <
argc; i++ ) {
1627 if ( !strcmp(
argv[i],
"--caa" ) ) caa =
true;
1628 if ( !strcmp(
argv[i],
"--csv" ) ) csv =
true;
1632 strcpy( dir,
argv[1] );
1634 strcat( dir,
"/HTML" );
1635 int res = mkdir( dir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH );
1637 fprintf(
stderr,
"ERROR: Cannot create directory %s\naborting...\n", dir );
1643 struct dirent* dirp;
1644 int num_of_modules = 0;
1645 if ( ( dp = opendir(
argv[1] ) ) == NULL ) {
1646 printf(
"Error(%d) opening %s\n", errno,
argv[1] );
1649 while ( ( dirp = readdir( dp ) ) != NULL ) {
1650 if ( strstr( dirp->d_name,
"_S_" ) != NULL && strstr( dirp->d_name,
".txt.gz" ) != NULL && !csv ) {
1652 fprintf(
stderr,
"ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1658 sort( S_events.
begin(), S_events.
end() );
1659 if ( ( dp = opendir(
argv[1] ) ) == NULL ) {
1660 printf(
"Error(%d) opening %s\n", errno,
argv[1] );
1663 while ( ( dirp = readdir( dp ) ) != NULL ) {
1664 if ( strstr( dirp->d_name,
"_S_" ) != NULL && strstr( dirp->d_name,
".txt.gz" ) != NULL && !csv ) {
1666 fprintf(
stderr,
"ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1669 }
else if ( strstr( dirp->d_name,
"_C_" ) != NULL && strstr( dirp->d_name,
".txt" ) != NULL ) {
1671 if ( res > num_of_modules ) { num_of_modules = res; }
1678 fprintf(
stderr,
"ERROR: Cannot finalize HTML pages!!!\naborting...\n" );
1686 sprintf( filepath,
"%s/HTML/index.html",
argv[1] );
1688 sprintf( filepath,
"%s/results.csv",
argv[1] );
1689 FILE* fp = fopen( filepath,
"w" );
1691 fprintf(
stderr,
"ERROR: Cannot create file index.html!!!\naborting...\n" );
1700 fprintf(
stderr,
"(core) ERROR: One or more events for CAA missing!\naborting...\n" );
1717 fprintf(
stderr,
"(nehalem) ERROR: One or more events for CAA missing!\naborting...\n" );
1748 sprintf( src,
"sorttable.js" );
1749 sprintf(
dst,
"%s/HTML/sorttable.js",
argv[1] );
1750 int fd_src = open( src, O_RDONLY );
1751 if ( fd_src == -1 ) {
1752 fprintf(
stderr,
"ERROR: Cannot open file \"%s\"!\naborting...\n", src );
1755 int fd_dst = open(
dst, O_WRONLY | O_CREAT | O_TRUNC, 0644 );
1756 if ( fd_dst == -1 ) {
1757 fprintf(
stderr,
"ERROR: Cannot open file \"%s\" (%s)!\naborting...\n",
dst, strerror( errno ) );
1761 while (
read( fd_src, &
c, 1 ) ) {
1762 if ( write( fd_dst, &
c, 1 ) == -1 ) {