33 #include <sys/types.h>
49 #define CORE_L2_MISS_CYCLES 200
50 #define CORE_L2_HIT_CYCLES 14.5
51 #define CORE_L1_DTLB_MISS_CYCLES 10
52 #define CORE_LCP_STALL_CYCLES 6
53 #define CORE_UNKNOWN_ADDR_STORE_CYCLES 5
54 #define CORE_OVERLAPPING_CYCLES 6
55 #define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES 20
58 #define I7_L1_DTLB_WALK_COMPLETED_CYCLES 35
59 #define I7_L1_ITLB_WALK_COMPLETED_CYCLES 35
60 #define I7_L2_HIT_CYCLES 6
61 #define I7_L3_UNSHARED_HIT_CYCLES 35
62 #define I7_OTHER_CORE_L2_HIT_CYCLES 60
63 #define I7_OTHER_CORE_L2_HITM_CYCLES 75
64 #define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES 225 // average of 200 (not modified) and 225-250 (modified)
65 #define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES 360 // average of 350 (not modified) and 370 (modified)
66 #define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES 180
67 #define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT 200
68 #define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT 350
69 #define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP 35
70 #define I7_IFETCH_L2_MISS_L3_HIT_SNOOP 60
71 #define I7_IFETCH_L2_MISS_L3_HITM 75
72 #define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD 180
74 #define MAX_MODULES 1000
76 #define EXPECTED_CPI 0.25
78 #define MAX_FILENAME_LENGTH 1024
79 #define MAX_SAMPLE_INDEX_LENGTH 10000
80 #define MAX_SYM_LENGTH 15000
81 #define MAX_SYM_MOD_LENGTH 20000
82 #define MAX_LIB_LENGTH 5000
83 #define MAX_LIB_MOD_LENGTH 7000
84 #define MAX_SIMPLE_SYM_LENGTH 300
85 #define MAX_SIMPLE_SYM_MOD_LENGTH 500
86 #define MAX_SIMPLE_LIB_LENGTH 300
87 #define MAX_SIMPLE_LIB_MOD_LENGTH 500
88 #define MAX_LINE_LENGTH 20000
89 #define MAX_EVENT_NAME_LENGTH 150
90 #define MAX_MODULE_NAME_LENGTH 250
91 #define MAX_VALUE_STRING_LENGTH 250
92 #define MAX_ARCH_NAME_LENGTH 20
93 #define MAX_CMASK_STR_LENGTH 5
94 #define MAX_INV_STR_LENGTH 5
95 #define MAX_SP_STR_LENGTH 50
97 #define PIPE_BUFFER_LENGTH 1000
102 pipe = popen( cmd,
"r" );
104 printf(
"Cannot open pipe. Exiting...\n" );
110 while ( !feof(
pipe ) ) {
134 if ( !isspace( *srcbuffer++ ) ) {
return false; }
135 while ( isspace( *srcbuffer ) ) { srcbuffer++; }
136 *destbuffer = srcbuffer;
147 bool skipString(
const char* strptr,
const char* srcbuffer,
const char** dstbuffer ) {
148 if ( strncmp( srcbuffer, strptr, strlen( strptr ) ) ) {
return false; }
149 *dstbuffer = srcbuffer + strlen( strptr );
166 if ( i->OFFSET == offset ) {
return i->NAME.c_str(); }
172 return i->NAME.c_str();
202 bool matched =
false;
203 while ( objdump.
output() ) {
213 if ( !objdump.
output() )
break;
214 if (
line.empty() )
continue;
215 const char* lineptr =
line.c_str();
217 if ( !
skipString(
"LOAD", lineptr, &lineptr ) )
continue;
219 if ( !
skipString(
"off", lineptr, &lineptr ) )
continue;
221 int initialBase = strtol( lineptr, &endptr, 16 );
222 if ( lineptr == endptr )
continue;
225 if ( !
skipString(
"vaddr", lineptr, &lineptr ) )
continue;
227 int finalBase = strtol( lineptr, &endptr, 16 );
228 if ( lineptr == endptr )
continue;
229 vmbase = finalBase - initialBase;
234 fprintf( stderr,
"Cannot determine VM base address for %s\n",
NAME.
c_str() );
235 fprintf( stderr,
"Error while running `objdump -p %s`\n",
NAME.
c_str() );
240 while (
nm.output() ) {
243 if ( !
nm.output() )
break;
244 if (
line.empty() )
continue;
248 int address = strtol(
begin, &endptr, 10 );
249 if ( endptr ==
begin )
continue;
250 if ( *endptr++ !=
' ' )
continue;
251 if ( isspace( *endptr++ ) )
continue;
252 if ( *endptr++ !=
' ' )
continue;
253 char* symbolName = endptr;
254 while ( *endptr && !isspace( *endptr ) ) endptr++;
255 if ( *endptr != 0 )
continue;
257 if ( symbolName[0] ==
'.' )
continue;
262 int offset = address - vmbase;
285 core_caa_events.
push_back(
"BRANCH_INSTRUCTIONS_RETIRED" );
286 core_caa_events.
push_back(
"ILD_STALL" );
287 core_caa_events.
push_back(
"INST_RETIRED:LOADS" );
288 core_caa_events.
push_back(
"INST_RETIRED:OTHER" );
289 core_caa_events.
push_back(
"INST_RETIRED:STORES" );
290 core_caa_events.
push_back(
"INSTRUCTIONS_RETIRED" );
291 core_caa_events.
push_back(
"LOAD_BLOCK:OVERLAP_STORE" );
292 core_caa_events.
push_back(
"LOAD_BLOCK:STA" );
293 core_caa_events.
push_back(
"LOAD_BLOCK:UNTIL_RETIRE" );
294 core_caa_events.
push_back(
"MEM_LOAD_RETIRED:DTLB_MISS" );
295 core_caa_events.
push_back(
"MEM_LOAD_RETIRED:L1D_LINE_MISS" );
296 core_caa_events.
push_back(
"MEM_LOAD_RETIRED:L2_LINE_MISS" );
297 core_caa_events.
push_back(
"MISPREDICTED_BRANCH_RETIRED" );
300 core_caa_events.
push_back(
"RS_UOPS_DISPATCHED CMASK=1 INV=1" );
301 core_caa_events.
push_back(
"SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE" );
302 core_caa_events.
push_back(
"UNHALTED_CORE_CYCLES" );
309 nhm_caa_events.
push_back(
"ARITH:CYCLES_DIV_BUSY" );
310 nhm_caa_events.
push_back(
"BR_INST_EXEC:ANY" );
311 nhm_caa_events.
push_back(
"BR_INST_EXEC:DIRECT_NEAR_CALL" );
312 nhm_caa_events.
push_back(
"BR_INST_EXEC:INDIRECT_NEAR_CALL" );
313 nhm_caa_events.
push_back(
"BR_INST_EXEC:INDIRECT_NON_CALL" );
314 nhm_caa_events.
push_back(
"BR_INST_EXEC:NEAR_CALLS" );
315 nhm_caa_events.
push_back(
"BR_INST_EXEC:NON_CALLS" );
316 nhm_caa_events.
push_back(
"BR_INST_EXEC:RETURN_NEAR" );
317 nhm_caa_events.
push_back(
"BR_INST_RETIRED:ALL_BRANCHES" );
318 nhm_caa_events.
push_back(
"BR_INST_RETIRED:CONDITIONAL" );
319 nhm_caa_events.
push_back(
"BR_INST_RETIRED:NEAR_CALL" );
320 nhm_caa_events.
push_back(
"BR_MISP_EXEC:ANY" );
321 nhm_caa_events.
push_back(
"CPU_CLK_UNHALTED:THREAD_P" );
322 nhm_caa_events.
push_back(
"DTLB_LOAD_MISSES:WALK_COMPLETED" );
323 nhm_caa_events.
push_back(
"INST_RETIRED:ANY_P" );
324 nhm_caa_events.
push_back(
"ITLB_MISSES:WALK_COMPLETED" );
325 nhm_caa_events.
push_back(
"L2_RQSTS:IFETCH_HIT" );
326 nhm_caa_events.
push_back(
"L2_RQSTS:IFETCH_MISS" );
327 nhm_caa_events.
push_back(
"MEM_INST_RETIRED:LOADS" );
328 nhm_caa_events.
push_back(
"MEM_INST_RETIRED:STORES" );
329 nhm_caa_events.
push_back(
"MEM_LOAD_RETIRED:L2_HIT" );
330 nhm_caa_events.
push_back(
"MEM_LOAD_RETIRED:L3_MISS" );
331 nhm_caa_events.
push_back(
"MEM_LOAD_RETIRED:L3_UNSHARED_HIT" );
332 nhm_caa_events.
push_back(
"MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM" );
333 nhm_caa_events.
push_back(
"MEM_UNCORE_RETIRED:LOCAL_DRAM" );
334 nhm_caa_events.
push_back(
"MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM" );
335 nhm_caa_events.
push_back(
"MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT" );
336 nhm_caa_events.
push_back(
"MEM_UNCORE_RETIRED:REMOTE_DRAM" );
337 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM" );
338 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM" );
339 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP" );
340 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD" );
341 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM" );
342 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT" );
343 nhm_caa_events.
push_back(
"RESOURCE_STALLS:ANY" );
344 nhm_caa_events.
push_back(
"SSEX_UOPS_RETIRED:PACKED_DOUBLE" );
345 nhm_caa_events.
push_back(
"SSEX_UOPS_RETIRED:PACKED_SINGLE" );
346 nhm_caa_events.
push_back(
"UOPS_DECODED:MS CMASK=1" );
347 nhm_caa_events.
push_back(
"UOPS_ISSUED:ANY CMASK=1 INV=1" );
348 nhm_caa_events.
push_back(
"ITLB_MISS_RETIRED" );
349 nhm_caa_events.
push_back(
"UOPS_RETIRED:ANY" );
354 if ( find( C_events.
begin(), C_events.
end(), ( *it ) ) == C_events.
end() ) {
355 fprintf( stderr,
"ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
364 if ( find( C_events.
begin(), C_events.
end(), ( *it ) ) == C_events.
end() ) {
365 fprintf( stderr,
"ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
373 core_caa_events_displ.
push_back(
"Total Cycles" );
374 core_caa_events_displ.
push_back(
"Stalled Cycles" );
375 core_caa_events_displ.
push_back(
"% of Total Cycles" );
376 core_caa_events_displ.
push_back(
"Instructions Retired" );
377 core_caa_events_displ.
push_back(
"CPI" );
379 core_caa_events_displ.
push_back(
"iMargin" );
380 core_caa_events_displ.
push_back(
"iFactor" );
382 core_caa_events_displ.
push_back(
"Counted Stalled Cycles" );
384 core_caa_events_displ.
push_back(
"L2 Miss Impact" );
385 core_caa_events_displ.
push_back(
"L2 Miss % of counted Stalled Cycles" );
387 core_caa_events_displ.
push_back(
"L2 Hit Impact" );
388 core_caa_events_displ.
push_back(
"L2 Hit % of counted Stalled Cycles" );
390 core_caa_events_displ.
push_back(
"L1 DTLB Miss Impact" );
391 core_caa_events_displ.
push_back(
"L1 DTLB Miss % of counted Stalled Cycles" );
393 core_caa_events_displ.
push_back(
"LCP Stalls Impact" );
394 core_caa_events_displ.
push_back(
"LCP Stalls % of counted Stalled Cycles" );
396 core_caa_events_displ.
push_back(
"Store-Fwd Stalls Impact" );
397 core_caa_events_displ.
push_back(
"Store-Fwd Stalls % of counted Stalled Cycles" );
399 core_caa_events_displ.
push_back(
"Loads Blocked by Unknown Address Store Impact" );
400 core_caa_events_displ.
push_back(
"Loads Blocked % of Store-Fwd Stalls Cycles" );
401 core_caa_events_displ.
push_back(
"Loads Overlapped with Stores Impact" );
402 core_caa_events_displ.
push_back(
"Loads Overlapped % of Store-Fwd Stalls Cycles" );
403 core_caa_events_displ.
push_back(
"Loads Spanning across Cache Lines Impact" );
404 core_caa_events_displ.
push_back(
"Loads Spanning % of Store-Fwd Stalls Cycles" );
406 core_caa_events_displ.
push_back(
"Load Instructions" );
407 core_caa_events_displ.
push_back(
"Load % of all Instructions" );
408 core_caa_events_displ.
push_back(
"Store Instructions" );
409 core_caa_events_displ.
push_back(
"Store % of all Instructions" );
410 core_caa_events_displ.
push_back(
"Branch Instructions" );
411 core_caa_events_displ.
push_back(
"Branch % of all Instructions" );
412 core_caa_events_displ.
push_back(
"Packed SIMD Computational Instructions" );
413 core_caa_events_displ.
push_back(
"Packed SIMD % of all Instructions" );
414 core_caa_events_displ.
push_back(
"Other Instructions" );
415 core_caa_events_displ.
push_back(
"Other % of all Instructions" );
417 core_caa_events_displ.
push_back(
"ITLB Miss Rate in %" );
418 core_caa_events_displ.
push_back(
"% of Mispredicted Branches" );
424 ( it->second )[
"Total Cycles"] = ( it->second )[
"UNHALTED_CORE_CYCLES"];
425 ( it->second )[
"Stalled Cycles"] = ( it->second )[
"RS_UOPS_DISPATCHED CMASK=1 INV=1"];
426 ( it->second )[
"L2 Miss Impact"] = ( it->second )[
"MEM_LOAD_RETIRED:L2_LINE_MISS"] *
CORE_L2_MISS_CYCLES;
427 ( it->second )[
"L2 Hit Impact"] =
428 ( ( it->second )[
"MEM_LOAD_RETIRED:L1D_LINE_MISS"] - ( it->second )[
"MEM_LOAD_RETIRED:L2_LINE_MISS"] ) *
432 ( it->second )[
"Loads Blocked by Unknown Address Store Impact"] =
434 ( it->second )[
"Loads Overlapped with Stores Impact"] =
436 ( it->second )[
"Loads Spanning across Cache Lines Impact"] =
438 ( it->second )[
"Store-Fwd Stalls Impact"] = ( it->second )[
"Loads Blocked by Unknown Address Store Impact"] +
439 ( it->second )[
"Loads Overlapped with Stores Impact"] +
440 ( it->second )[
"Loads Spanning across Cache Lines Impact"];
441 ( it->second )[
"Counted Stalled Cycles"] =
442 ( it->second )[
"L2 Miss Impact"] + ( it->second )[
"L2 Hit Impact"] + ( it->second )[
"LCP Stalls Impact"] +
443 ( it->second )[
"L1 DTLB Miss Impact"] + ( it->second )[
"Store-Fwd Stalls Impact"];
444 ( it->second )[
"Instructions Retired"] = ( it->second )[
"INSTRUCTIONS_RETIRED"];
445 ( it->second )[
"ITLB Miss Rate in %"] =
446 ( ( it->second )[
"ITLB_MISS_RETIRED"] / ( it->second )[
"INSTRUCTIONS_RETIRED"] ) * 100;
447 ( it->second )[
"Branch Instructions"] = ( it->second )[
"BRANCH_INSTRUCTIONS_RETIRED"];
448 ( it->second )[
"Load Instructions"] = ( it->second )[
"INST_RETIRED:LOADS"];
449 ( it->second )[
"Store Instructions"] = ( it->second )[
"INST_RETIRED:STORES"];
450 ( it->second )[
"Other Instructions"] = ( it->second )[
"INST_RETIRED:OTHER"] -
451 ( it->second )[
"SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] -
452 ( it->second )[
"BRANCH_INSTRUCTIONS_RETIRED"];
453 ( it->second )[
"% of Mispredicted Branches"] =
454 ( ( it->second )[
"MISPREDICTED_BRANCH_RETIRED"] / ( it->second )[
"BRANCH_INSTRUCTIONS_RETIRED"] ) * 100;
455 ( it->second )[
"Packed SIMD Computational Instructions"] =
456 ( it->second )[
"SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"];
457 ( it->second )[
"Counted Instructions Retired"] =
458 ( it->second )[
"Branch Instructions"] + ( it->second )[
"Load Instructions"] +
459 ( it->second )[
"Store Instructions"] + ( it->second )[
"Other Instructions"] +
460 ( it->second )[
"Packed SIMD Computational Instructions"];
461 ( it->second )[
"CPI"] = ( it->second )[
"UNHALTED_CORE_CYCLES"] / ( it->second )[
"INSTRUCTIONS_RETIRED"];
463 double localPerformanceImprovement = ( it->second )[
"CPI"] /
EXPECTED_CPI;
464 double cyclesAfterImprovement = ( it->second )[
"UNHALTED_CORE_CYCLES"] / localPerformanceImprovement;
465 double totalCyclesAfterImprovement = totalCycles - ( it->second )[
"UNHALTED_CORE_CYCLES"] + cyclesAfterImprovement;
466 ( it->second )[
"iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
468 ( it->second )[
"% of Total Cycles"] =
469 ( it->second )[
"RS_UOPS_DISPATCHED CMASK=1 INV=1"] * 100 / ( it->second )[
"UNHALTED_CORE_CYCLES"];
470 ( it->second )[
"L2 Miss % of counted Stalled Cycles"] =
471 ( it->second )[
"L2 Miss Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles"];
472 ( it->second )[
"L2 Hit % of counted Stalled Cycles"] =
473 ( it->second )[
"L2 Hit Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles"];
474 ( it->second )[
"L1 DTLB Miss % of counted Stalled Cycles"] =
475 ( it->second )[
"L1 DTLB Miss Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles"];
476 ( it->second )[
"LCP Stalls % of counted Stalled Cycles"] =
477 ( it->second )[
"LCP Stalls Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles"];
478 ( it->second )[
"Store-Fwd Stalls % of counted Stalled Cycles"] =
479 ( it->second )[
"Store-Fwd Stalls Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles"];
480 ( it->second )[
"Loads Blocked % of Store-Fwd Stalls Cycles"] =
481 ( it->second )[
"Loads Blocked by Unknown Address Store Impact"] * 100 /
482 ( it->second )[
"Store-Fwd Stalls Impact"];
483 ( it->second )[
"Loads Overlapped % of Store-Fwd Stalls Cycles"] =
484 ( it->second )[
"Loads Overlapped with Stores Impact"] * 100 / ( it->second )[
"Store-Fwd Stalls Impact"];
485 ( it->second )[
"Loads Spanning % of Store-Fwd Stalls Cycles"] =
486 ( it->second )[
"Loads Spanning across Cache Lines Impact"] * 100 / ( it->second )[
"Store-Fwd Stalls Impact"];
488 ( it->second )[
"Load % of all Instructions"] =
489 ( it->second )[
"INST_RETIRED:LOADS"] * 100 / ( it->second )[
"Counted Instructions Retired"];
490 ( it->second )[
"Store % of all Instructions"] =
491 ( it->second )[
"INST_RETIRED:STORES"] * 100 / ( it->second )[
"Counted Instructions Retired"];
492 ( it->second )[
"Branch % of all Instructions"] =
493 ( it->second )[
"BRANCH_INSTRUCTIONS_RETIRED"] * 100 / ( it->second )[
"Counted Instructions Retired"];
494 ( it->second )[
"Packed SIMD % of all Instructions"] =
495 ( it->second )[
"SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] * 100 /
496 ( it->second )[
"Counted Instructions Retired"];
497 ( it->second )[
"Other % of all Instructions"] =
498 ( it->second )[
"Other Instructions"] * 100 / ( it->second )[
"Counted Instructions Retired"];
503 nhm_caa_events_displ.
push_back(
"Total Cycles" );
504 nhm_caa_events_displ.
push_back(
"Instructions Retired" );
507 nhm_caa_events_displ.
push_back(
"iMargin" );
508 nhm_caa_events_displ.
push_back(
"iFactor" );
510 nhm_caa_events_displ.
push_back(
"Stalled Cycles" );
511 nhm_caa_events_displ.
push_back(
"% of Total Cycles" );
512 nhm_caa_events_displ.
push_back(
"Total Counted Stalled Cycles" );
514 nhm_caa_events_displ.
push_back(
"Instruction Starvation % of Total Cycles" );
515 nhm_caa_events_displ.
push_back(
"# of Instructions per Call" );
516 nhm_caa_events_displ.
push_back(
"% of Total Cycles spent handling FP exceptions" );
518 nhm_caa_events_displ.
push_back(
"Counted Stalled Cycles due to Load Ops" );
520 nhm_caa_events_displ.
push_back(
"L2 Hit Impact" );
521 nhm_caa_events_displ.
push_back(
"L2 Hit % of Load Stalls" );
523 nhm_caa_events_displ.
push_back(
"L3 Unshared Hit Impact" );
524 nhm_caa_events_displ.
push_back(
"L3 Unshared Hit % of Load Stalls" );
526 nhm_caa_events_displ.
push_back(
"L2 Other Core Hit Impact" );
527 nhm_caa_events_displ.
push_back(
"L2 Other Core Hit % of Load Stalls" );
529 nhm_caa_events_displ.
push_back(
"L2 Other Core Hit Modified Impact" );
530 nhm_caa_events_displ.
push_back(
"L2 Other Core Hit Modified % of Load Stalls" );
532 nhm_caa_events_displ.
push_back(
"L3 Miss -> Local DRAM Hit Impact" );
533 nhm_caa_events_displ.
push_back(
"L3 Miss -> Remote DRAM Hit Impact" );
534 nhm_caa_events_displ.
push_back(
"L3 Miss -> Remote Cache Hit Impact" );
535 nhm_caa_events_displ.
push_back(
"L3 Miss -> Total Impact" );
536 nhm_caa_events_displ.
push_back(
"L3 Miss % of Load Stalls" );
538 nhm_caa_events_displ.
push_back(
"L1 DTLB Miss Impact" );
539 nhm_caa_events_displ.
push_back(
"L1 DTLB Miss % of Load Stalls" );
541 nhm_caa_events_displ.
push_back(
"Cycles spent during DIV & SQRT Ops" );
542 nhm_caa_events_displ.
push_back(
"DIV & SQRT Ops % of counted Stalled Cycles" );
544 nhm_caa_events_displ.
push_back(
"Total L2 IFETCH misses" );
545 nhm_caa_events_displ.
push_back(
"% of L2 IFETCH misses" );
547 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by Local DRAM" );
548 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by L3 (Modified)" );
549 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by L3 (Clean Snoop)" );
550 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by Remote L2" );
551 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by Remote DRAM" );
552 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by L3 (No Snoop)" );
554 nhm_caa_events_displ.
push_back(
"Total L2 IFETCH miss Impact" );
556 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by Local DRAM" );
557 nhm_caa_events_displ.
push_back(
"Local DRAM IFECTHes % Impact" );
559 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by L3 (Modified)" );
560 nhm_caa_events_displ.
push_back(
"L3 (Modified) IFECTHes % Impact" );
562 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by L3 (Clean Snoop)" );
563 nhm_caa_events_displ.
push_back(
"L3 (Clean Snoop) IFECTHes % Impact" );
565 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by Remote L2" );
566 nhm_caa_events_displ.
push_back(
"Remote L2 IFECTHes % Impact" );
568 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by Remote DRAM" );
569 nhm_caa_events_displ.
push_back(
"Remote DRAM IFECTHes % Impact" );
571 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by L3 (No Snoop)" );
572 nhm_caa_events_displ.
push_back(
"L3 (No Snoop) IFECTHes % Impact" );
574 nhm_caa_events_displ.
push_back(
"Total Branch Instructions Executed" );
575 nhm_caa_events_displ.
push_back(
"% of Mispredicted Branches" );
577 nhm_caa_events_displ.
push_back(
"Direct Near Calls % of Total Branches Executed" );
578 nhm_caa_events_displ.
push_back(
"Indirect Near Calls % of Total Branches Executed" );
579 nhm_caa_events_displ.
push_back(
"Indirect Near Non-Calls % of Total Branches Executed" );
580 nhm_caa_events_displ.
push_back(
"All Near Calls % of Total Branches Executed" );
581 nhm_caa_events_displ.
push_back(
"All Non Calls % of Total Branches Executed" );
582 nhm_caa_events_displ.
push_back(
"All Returns % of Total Branches Executed" );
584 nhm_caa_events_displ.
push_back(
"Total Branch Instructions Retired" );
585 nhm_caa_events_displ.
push_back(
"Conditionals % of Total Branches Retired" );
586 nhm_caa_events_displ.
push_back(
"Near Calls % of Total Branches Retired" );
588 nhm_caa_events_displ.
push_back(
"L1 ITLB Miss Impact" );
589 nhm_caa_events_displ.
push_back(
"ITLB Miss Rate in %" );
591 nhm_caa_events_displ.
push_back(
"Branch Instructions" );
592 nhm_caa_events_displ.
push_back(
"Branch % of all Instructions" );
594 nhm_caa_events_displ.
push_back(
"Load Instructions" );
595 nhm_caa_events_displ.
push_back(
"Load % of all Instructions" );
597 nhm_caa_events_displ.
push_back(
"Store Instructions" );
598 nhm_caa_events_displ.
push_back(
"Store % of all Instructions" );
600 nhm_caa_events_displ.
push_back(
"Other Instructions" );
601 nhm_caa_events_displ.
push_back(
"Other % of all Instructions" );
603 nhm_caa_events_displ.
push_back(
"Packed UOPS Retired" );
604 nhm_caa_events_displ.
push_back(
"Packed % of all UOPS Retired" );
610 ( it->second )[
"Total Cycles"] = ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"];
612 ( it->second )[
"L2 Hit Impact"] = ( it->second )[
"MEM_LOAD_RETIRED:L2_HIT"] *
I7_L2_HIT_CYCLES;
613 ( it->second )[
"L3 Unshared Hit Impact"] =
615 if ( ( it->second )[
"MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] >
616 ( it->second )[
"MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) {
617 ( it->second )[
"L2 Other Core Hit Impact"] = ( ( it->second )[
"MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] -
618 ( it->second )[
"MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) *
621 ( it->second )[
"L2 Other Core Hit Impact"] = 0.0;
623 ( it->second )[
"L2 Other Core Hit Modified Impact"] =
625 ( it->second )[
"L3 Miss -> Local DRAM Hit Impact"] =
627 ( it->second )[
"L3 Miss -> Remote DRAM Hit Impact"] =
629 ( it->second )[
"L3 Miss -> Remote Cache Hit Impact"] =
631 ( it->second )[
"L3 Miss -> Total Impact"] = ( it->second )[
"L3 Miss -> Local DRAM Hit Impact"] +
632 ( it->second )[
"L3 Miss -> Remote DRAM Hit Impact"] +
633 ( it->second )[
"L3 Miss -> Remote Cache Hit Impact"];
634 ( it->second )[
"L1 DTLB Miss Impact"] =
636 ( it->second )[
"Counted Stalled Cycles due to Load Ops"] =
637 ( it->second )[
"L3 Miss -> Total Impact"] + ( it->second )[
"L2 Hit Impact"] +
638 ( it->second )[
"L1 DTLB Miss Impact"] + ( it->second )[
"L3 Unshared Hit Impact"] +
639 ( it->second )[
"L2 Other Core Hit Modified Impact"] + ( it->second )[
"L2 Other Core Hit Impact"];
640 ( it->second )[
"Cycles spent during DIV & SQRT Ops"] = ( it->second )[
"ARITH:CYCLES_DIV_BUSY"];
641 ( it->second )[
"Total Counted Stalled Cycles"] =
642 ( it->second )[
"Counted Stalled Cycles due to Load Ops"] + ( it->second )[
"Cycles spent during DIV & SQRT Ops"];
643 ( it->second )[
"Stalled Cycles"] =
644 ( it->second )[
"Total Counted Stalled Cycles"];
645 ( it->second )[
"% of Total Cycles"] =
646 ( it->second )[
"Stalled Cycles"] * 100 / ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"];
647 ( it->second )[
"L3 Miss % of Load Stalls"] =
648 ( it->second )[
"L3 Miss -> Total Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
649 ( it->second )[
"L2 Hit % of Load Stalls"] =
650 ( it->second )[
"L2 Hit Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
651 ( it->second )[
"L1 DTLB Miss % of Load Stalls"] =
652 ( it->second )[
"L1 DTLB Miss Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
653 ( it->second )[
"L3 Unshared Hit % of Load Stalls"] =
654 ( it->second )[
"L3 Unshared Hit Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
655 ( it->second )[
"L2 Other Core Hit % of Load Stalls"] =
656 ( it->second )[
"L2 Other Core Hit Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
657 ( it->second )[
"L2 Other Core Hit Modified % of Load Stalls"] =
658 ( it->second )[
"L2 Other Core Hit Modified Impact"] * 100 /
659 ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
660 ( it->second )[
"DIV & SQRT Ops % of counted Stalled Cycles"] =
661 ( it->second )[
"Cycles spent during DIV & SQRT Ops"] * 100 / ( it->second )[
"Total Counted Stalled Cycles"];
663 ( it->second )[
"Cycles IFETCH served by Local DRAM"] =
665 ( it->second )[
"Cycles IFETCH served by L3 (Modified)"] =
667 ( it->second )[
"Cycles IFETCH served by L3 (Clean Snoop)"] =
669 ( it->second )[
"Cycles IFETCH served by Remote L2"] =
671 ( it->second )[
"Cycles IFETCH served by Remote DRAM"] =
673 ( it->second )[
"Cycles IFETCH served by L3 (No Snoop)"] =
675 ( it->second )[
"Total L2 IFETCH miss Impact"] =
676 ( it->second )[
"Cycles IFETCH served by Local DRAM"] + ( it->second )[
"Cycles IFETCH served by L3 (Modified)"] +
677 ( it->second )[
"Cycles IFETCH served by L3 (Clean Snoop)"] +
678 ( it->second )[
"Cycles IFETCH served by Remote L2"] + ( it->second )[
"Cycles IFETCH served by Remote DRAM"] +
679 ( it->second )[
"Cycles IFETCH served by L3 (No Snoop)"];
680 ( it->second )[
"Local DRAM IFECTHes % Impact"] =
681 ( it->second )[
"Cycles IFETCH served by Local DRAM"] * 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
682 ( it->second )[
"L3 (Modified) IFECTHes % Impact"] =
683 ( it->second )[
"Cycles IFETCH served by L3 (Modified)"] * 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
684 ( it->second )[
"L3 (Clean Snoop) IFECTHes % Impact"] = ( it->second )[
"Cycles IFETCH served by L3 (Clean Snoop)"] *
685 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
686 ( it->second )[
"Remote L2 IFECTHes % Impact"] =
687 ( it->second )[
"Cycles IFETCH served by Remote L2"] * 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
688 ( it->second )[
"Remote DRAM IFECTHes % Impact"] =
689 ( it->second )[
"Cycles IFETCH served by Remote DRAM"] * 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
690 ( it->second )[
"L3 (No Snoop) IFECTHes % Impact"] =
691 ( it->second )[
"Cycles IFETCH served by L3 (No Snoop)"] * 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
692 ( it->second )[
"Total L2 IFETCH misses"] = ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
693 ( it->second )[
"% of IFETCHes served by Local DRAM"] =
694 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * 100 / ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
695 ( it->second )[
"% of IFETCHes served by L3 (Modified)"] =
696 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * 100 / ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
697 ( it->second )[
"% of IFETCHes served by L3 (Clean Snoop)"] =
698 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * 100 /
699 ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
700 ( it->second )[
"% of IFETCHes served by Remote L2"] =
701 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * 100 /
702 ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
703 ( it->second )[
"% of IFETCHes served by Remote DRAM"] =
704 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * 100 / ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
705 ( it->second )[
"% of IFETCHes served by L3 (No Snoop)"] =
706 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * 100 / ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
707 ( it->second )[
"% of L2 IFETCH misses"] =
708 ( it->second )[
"L2_RQSTS:IFETCH_MISS"] * 100 /
709 ( ( it->second )[
"L2_RQSTS:IFETCH_MISS"] + ( it->second )[
"L2_RQSTS:IFETCH_HIT"] );
710 ( it->second )[
"L1 ITLB Miss Impact"] =
713 ( it->second )[
"Total Branch Instructions Executed"] = ( it->second )[
"BR_INST_EXEC:ANY"];
714 ( it->second )[
"% of Mispredicted Branches"] =
715 ( it->second )[
"BR_MISP_EXEC:ANY"] * 100 / ( it->second )[
"BR_INST_EXEC:ANY"];
716 ( it->second )[
"Direct Near Calls % of Total Branches Executed"] =
717 ( it->second )[
"BR_INST_EXEC:DIRECT_NEAR_CALL"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
718 ( it->second )[
"Indirect Near Calls % of Total Branches Executed"] =
719 ( it->second )[
"BR_INST_EXEC:INDIRECT_NEAR_CALL"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
720 ( it->second )[
"Indirect Near Non-Calls % of Total Branches Executed"] =
721 ( it->second )[
"BR_INST_EXEC:INDIRECT_NON_CALL"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
722 ( it->second )[
"All Near Calls % of Total Branches Executed"] =
723 ( it->second )[
"BR_INST_EXEC:NEAR_CALLS"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
724 ( it->second )[
"All Non Calls % of Total Branches Executed"] =
725 ( it->second )[
"BR_INST_EXEC:NON_CALLS"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
726 ( it->second )[
"All Returns % of Total Branches Executed"] =
727 ( it->second )[
"BR_INST_EXEC:RETURN_NEAR"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
728 ( it->second )[
"Total Branch Instructions Retired"] = ( it->second )[
"BR_INST_RETIRED:ALL_BRANCHES"];
729 ( it->second )[
"Conditionals % of Total Branches Retired"] =
730 ( it->second )[
"BR_INST_RETIRED:CONDITIONAL"] * 100 / ( it->second )[
"Total Branch Instructions Retired"];
731 ( it->second )[
"Near Calls % of Total Branches Retired"] =
732 ( it->second )[
"BR_INST_RETIRED:NEAR_CALL"] * 100 / ( it->second )[
"Total Branch Instructions Retired"];
734 ( it->second )[
"Instruction Starvation % of Total Cycles"] =
735 ( ( it->second )[
"UOPS_ISSUED:ANY CMASK=1 INV=1"] - ( it->second )[
"RESOURCE_STALLS:ANY"] ) * 100 /
736 ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"];
737 ( it->second )[
"% of Total Cycles spent handling FP exceptions"] =
738 ( it->second )[
"UOPS_DECODED:MS CMASK=1"] * 100 / ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"];
739 ( it->second )[
"# of Instructions per Call"] =
740 ( it->second )[
"INST_RETIRED:ANY_P"] / ( it->second )[
"BR_INST_EXEC:NEAR_CALLS"];
742 ( it->second )[
"Instructions Retired"] = ( it->second )[
"INST_RETIRED:ANY_P"];
743 ( it->second )[
"ITLB Miss Rate in %"] =
744 ( ( it->second )[
"ITLB_MISS_RETIRED"] / ( it->second )[
"INST_RETIRED:ANY_P"] ) * 100;
746 ( it->second )[
"Branch Instructions"] = ( it->second )[
"BR_INST_RETIRED:ALL_BRANCHES"];
747 ( it->second )[
"Load Instructions"] = ( it->second )[
"MEM_INST_RETIRED:LOADS"];
748 ( it->second )[
"Store Instructions"] = ( it->second )[
"MEM_INST_RETIRED:STORES"];
749 ( it->second )[
"Other Instructions"] =
750 ( it->second )[
"Instructions Retired"] - ( it->second )[
"MEM_INST_RETIRED:LOADS"] -
751 ( it->second )[
"MEM_INST_RETIRED:STORES"] - ( it->second )[
"BR_INST_RETIRED:ALL_BRANCHES"];
752 ( it->second )[
"Packed UOPS Retired"] =
753 ( it->second )[
"SSEX_UOPS_RETIRED:PACKED_DOUBLE"] + ( it->second )[
"SSEX_UOPS_RETIRED:PACKED_SINGLE"];
754 ( it->second )[
"CPI"] = ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"] / ( it->second )[
"INST_RETIRED:ANY_P"];
756 double localPerformanceImprovement = ( it->second )[
"CPI"] /
EXPECTED_CPI;
757 double cyclesAfterImprovement = ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"] / localPerformanceImprovement;
758 double totalCyclesAfterImprovement =
759 totalCycles - ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"] + cyclesAfterImprovement;
760 ( it->second )[
"iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
762 ( it->second )[
"Load % of all Instructions"] =
763 ( it->second )[
"MEM_INST_RETIRED:LOADS"] * 100 / ( it->second )[
"INST_RETIRED:ANY_P"];
764 ( it->second )[
"Store % of all Instructions"] =
765 ( it->second )[
"MEM_INST_RETIRED:STORES"] * 100 / ( it->second )[
"INST_RETIRED:ANY_P"];
766 ( it->second )[
"Branch % of all Instructions"] =
767 ( it->second )[
"BR_INST_RETIRED:ALL_BRANCHES"] * 100 / ( it->second )[
"INST_RETIRED:ANY_P"];
768 ( it->second )[
"Other % of all Instructions"] =
769 ( it->second )[
"Other Instructions"] * 100 / ( it->second )[
"INST_RETIRED:ANY_P"];
771 ( it->second )[
"Packed % of all UOPS Retired"] =
772 ( it->second )[
"Packed UOPS Retired"] * 100 / ( it->second )[
"UOPS_RETIRED:ANY"];
799 void init(
const char*
name,
const char* architecture,
const char* event_name,
unsigned int c_mask,
800 unsigned int inv_mask,
unsigned int smpl_period ) {
821 bool get_max(
char* index,
unsigned int& value ) {
823 [](
const auto& lhs,
const auto& rhs ) { return lhs.second < rhs.second; } );
825 strcpy( index, ( max_pos->first ).c_str() );
826 value = max_pos->second;
841 for (
int i = 0; i <
n; i++ ) {
844 strcat( s_mod,
"<" );
847 strcat( s_mod,
">" );
850 strcat( s_mod,
"&" );
853 strcat( s_mod,
""" );
859 strcat( s_mod, to_app );
870 char* operator_string_begin =
const_cast<char*
>( strstr( demangled_symbol,
"operator" ) );
871 if ( operator_string_begin != NULL ) {
872 char* operator_string_end = operator_string_begin + 8;
873 while ( *operator_string_end ==
' ' ) operator_string_end++;
874 if ( strstr( operator_string_end,
"delete[]" ) == operator_string_end ) {
875 operator_string_end += 8;
876 *operator_string_end =
'\0';
877 }
else if ( strstr( operator_string_end,
"delete" ) == operator_string_end ) {
878 operator_string_end += 6;
879 *operator_string_end =
'\0';
880 }
else if ( strstr( operator_string_end,
"new[]" ) == operator_string_end ) {
881 operator_string_end += 5;
882 *operator_string_end =
'\0';
883 }
else if ( strstr( operator_string_end,
"new" ) == operator_string_end ) {
884 operator_string_end += 3;
885 *operator_string_end =
'\0';
886 }
else if ( strstr( operator_string_end,
">>=" ) == operator_string_end ) {
887 operator_string_end += 3;
888 *operator_string_end =
'\0';
889 }
else if ( strstr( operator_string_end,
"<<=" ) == operator_string_end ) {
890 operator_string_end += 3;
891 *operator_string_end =
'\0';
892 }
else if ( strstr( operator_string_end,
"->*" ) == operator_string_end ) {
893 operator_string_end += 3;
894 *operator_string_end =
'\0';
895 }
else if ( strstr( operator_string_end,
"<<" ) == operator_string_end ) {
896 operator_string_end += 2;
897 *operator_string_end =
'\0';
898 }
else if ( strstr( operator_string_end,
">>" ) == operator_string_end ) {
899 operator_string_end += 2;
900 *operator_string_end =
'\0';
901 }
else if ( strstr( operator_string_end,
">=" ) == operator_string_end ) {
902 operator_string_end += 2;
903 *operator_string_end =
'\0';
904 }
else if ( strstr( operator_string_end,
"<=" ) == operator_string_end ) {
905 operator_string_end += 2;
906 *operator_string_end =
'\0';
907 }
else if ( strstr( operator_string_end,
"==" ) == operator_string_end ) {
908 operator_string_end += 2;
909 *operator_string_end =
'\0';
910 }
else if ( strstr( operator_string_end,
"!=" ) == operator_string_end ) {
911 operator_string_end += 2;
912 *operator_string_end =
'\0';
913 }
else if ( strstr( operator_string_end,
"|=" ) == operator_string_end ) {
914 operator_string_end += 2;
915 *operator_string_end =
'\0';
916 }
else if ( strstr( operator_string_end,
"&=" ) == operator_string_end ) {
917 operator_string_end += 2;
918 *operator_string_end =
'\0';
919 }
else if ( strstr( operator_string_end,
"^=" ) == operator_string_end ) {
920 operator_string_end += 2;
921 *operator_string_end =
'\0';
922 }
else if ( strstr( operator_string_end,
"%=" ) == operator_string_end ) {
923 operator_string_end += 2;
924 *operator_string_end =
'\0';
925 }
else if ( strstr( operator_string_end,
"/=" ) == operator_string_end ) {
926 operator_string_end += 2;
927 *operator_string_end =
'\0';
928 }
else if ( strstr( operator_string_end,
"*=" ) == operator_string_end ) {
929 operator_string_end += 2;
930 *operator_string_end =
'\0';
931 }
else if ( strstr( operator_string_end,
"-=" ) == operator_string_end ) {
932 operator_string_end += 2;
933 *operator_string_end =
'\0';
934 }
else if ( strstr( operator_string_end,
"+=" ) == operator_string_end ) {
935 operator_string_end += 2;
936 *operator_string_end =
'\0';
937 }
else if ( strstr( operator_string_end,
"&&" ) == operator_string_end ) {
938 operator_string_end += 2;
939 *operator_string_end =
'\0';
940 }
else if ( strstr( operator_string_end,
"||" ) == operator_string_end ) {
941 operator_string_end += 2;
942 *operator_string_end =
'\0';
943 }
else if ( strstr( operator_string_end,
"[]" ) == operator_string_end ) {
944 operator_string_end += 2;
945 *operator_string_end =
'\0';
946 }
else if ( strstr( operator_string_end,
"()" ) == operator_string_end ) {
947 operator_string_end += 2;
948 *operator_string_end =
'\0';
949 }
else if ( strstr( operator_string_end,
"++" ) == operator_string_end ) {
950 operator_string_end += 2;
951 *operator_string_end =
'\0';
952 }
else if ( strstr( operator_string_end,
"--" ) == operator_string_end ) {
953 operator_string_end += 2;
954 *operator_string_end =
'\0';
955 }
else if ( strstr( operator_string_end,
"->" ) == operator_string_end ) {
956 operator_string_end += 2;
957 *operator_string_end =
'\0';
958 }
else if ( strstr( operator_string_end,
"<" ) == operator_string_end ) {
959 operator_string_end += 1;
960 *operator_string_end =
'\0';
961 }
else if ( strstr( operator_string_end,
">" ) == operator_string_end ) {
962 operator_string_end += 1;
963 *operator_string_end =
'\0';
964 }
else if ( strstr( operator_string_end,
"~" ) == operator_string_end ) {
965 operator_string_end += 1;
966 *operator_string_end =
'\0';
967 }
else if ( strstr( operator_string_end,
"!" ) == operator_string_end ) {
968 operator_string_end += 1;
969 *operator_string_end =
'\0';
970 }
else if ( strstr( operator_string_end,
"+" ) == operator_string_end ) {
971 operator_string_end += 1;
972 *operator_string_end =
'\0';
973 }
else if ( strstr( operator_string_end,
"-" ) == operator_string_end ) {
974 operator_string_end += 1;
975 *operator_string_end =
'\0';
976 }
else if ( strstr( operator_string_end,
"*" ) == operator_string_end ) {
977 operator_string_end += 1;
978 *operator_string_end =
'\0';
979 }
else if ( strstr( operator_string_end,
"/" ) == operator_string_end ) {
980 operator_string_end += 1;
981 *operator_string_end =
'\0';
982 }
else if ( strstr( operator_string_end,
"%" ) == operator_string_end ) {
983 operator_string_end += 1;
984 *operator_string_end =
'\0';
985 }
else if ( strstr( operator_string_end,
"^" ) == operator_string_end ) {
986 operator_string_end += 1;
987 *operator_string_end =
'\0';
988 }
else if ( strstr( operator_string_end,
"&" ) == operator_string_end ) {
989 operator_string_end += 1;
990 *operator_string_end =
'\0';
991 }
else if ( strstr( operator_string_end,
"|" ) == operator_string_end ) {
992 operator_string_end += 1;
993 *operator_string_end =
'\0';
994 }
else if ( strstr( operator_string_end,
"," ) == operator_string_end ) {
995 operator_string_end += 1;
996 *operator_string_end =
'\0';
997 }
else if ( strstr( operator_string_end,
"=" ) == operator_string_end ) {
998 operator_string_end += 1;
999 *operator_string_end =
'\0';
1001 return operator_string_begin;
1003 char* end_of_demangled_name =
const_cast<char*
>( strrchr( demangled_symbol,
')' ) );
1004 if ( end_of_demangled_name != NULL ) {
1007 while ( pars > 0 && end_of_demangled_name != demangled_symbol ) {
1008 c = *( --end_of_demangled_name );
1011 }
else if (
c ==
'(' ) {
1016 return demangled_symbol;
1018 char* end_of_func_name = end_of_demangled_name;
1019 if ( end_of_func_name != NULL ) {
1020 *end_of_func_name =
'\0';
1021 char c = *( --end_of_func_name );
1024 while ( pars > 0 && end_of_func_name != demangled_symbol ) {
1025 c = *( --end_of_func_name );
1028 }
else if (
c ==
'<' ) {
1032 *end_of_func_name =
'\0';
1034 c = *( --end_of_func_name );
1035 while ( isalnum(
c ) ||
c ==
'_' ||
c ==
'~' ) {
c = *( --end_of_func_name ); }
1036 return ++end_of_func_name;
1038 return demangled_symbol;
1052 strcpy( module_filename, dir );
1053 strcat( module_filename,
"/HTML/" );
1054 strcat( module_filename, module_name );
1055 strcat( module_filename,
".html" );
1058 strcpy( event, ( cur_module->
get_event() ).c_str() );
1061 if ( result == modules_tot_samples.
end() )
1063 if ( ( !strcmp( event,
"UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1064 ( !strcmp( event,
"CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1065 modules_tot_samples.
insert(
1070 module_file = fopen( module_filename,
"w" );
1071 if ( module_file == NULL ) {
1072 fprintf( stderr,
"ERROR: Cannot create file %s!!!\naborting...\n", module_filename );
1075 fprintf( module_file,
"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
1076 "\"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1077 fprintf( module_file,
"<html>\n" );
1078 fprintf( module_file,
"<head>\n" );
1079 fprintf( module_file,
"<title>\n" );
1080 fprintf( module_file,
"%s\n", module_name );
1081 fprintf( module_file,
"</title>\n" );
1082 fprintf( module_file,
"</head>\n" );
1083 fprintf( module_file,
"<body>\n" );
1084 fprintf( module_file,
"<h2>%s</h2><br/>Events Sampled:<br/>\n", module_name );
1085 fprintf( module_file,
"<ul>\n" );
1087 fprintf( module_file,
"<li><a href=\"#%s\">%s</a></li>\n", it->c_str(), it->c_str() );
1089 fprintf( module_file,
"</ul>\n" );
1092 if ( ( !strcmp( event,
"UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1093 ( !strcmp( event,
"CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1096 module_file = fopen( module_filename,
"a" );
1100 strcpy( event_str, event );
1102 sprintf( event_str + strlen( event_str ),
" CMASK=%d", cur_module->
get_c_mask() );
1105 sprintf( event_str + strlen( event_str ),
" INV=%d", cur_module->
get_inv_mask() );
1107 fprintf( module_file,
"<a name=\"%s\"><a>\n", event_str );
1108 fprintf( module_file,
"<table cellpadding=\"5\">\n" );
1109 fprintf( module_file,
"<tr bgcolor=\"#EEEEEE\">\n" );
1110 fprintf( module_file,
1111 "<th colspan=\"6\" align=\"left\">%s -- cmask: %u -- invmask: %u -- Total Samples: %u -- "
1112 "Sampling Period: %d</th>\n",
1115 fprintf( module_file,
"</tr>\n" );
1116 fprintf( module_file,
"<tr bgcolor=\"#EEEEEE\">\n" );
1117 fprintf( module_file,
"<th align=\"left\">Samples</th>\n" );
1118 fprintf( module_file,
"<th align=\"left\">Percentage</th>\n" );
1119 fprintf( module_file,
"<th align=\"left\">Symbol Name</th>\n" );
1120 fprintf( module_file,
"<th align=\"left\">Library Name</th>\n" );
1121 fprintf( module_file,
"<th align=\"left\">Complete Signature</th>\n" );
1122 fprintf( module_file,
"<th align=\"left\">Library Pathname</th>\n" );
1123 fprintf( module_file,
"</tr>\n" );
1124 for (
int j = 0;
j < 20;
j++ ) {
1146 bool res = cur_module->
get_max( index, value );
1148 char* sym_end = strchr( index,
'%' );
1149 if ( sym_end == NULL )
1151 fprintf( stderr,
"ERROR: Invalid sym and lib name! : %s\naborting...\n", index );
1154 memcpy( sym, index, strlen( index ) - strlen( sym_end ) );
1155 strcpy( lib, sym_end + 1 );
1158 strcpy( temp, sym );
1159 strcpy( simple_sym, (
func_name( temp ) ) );
1160 if ( strrchr( lib,
'/' ) != NULL && *( strrchr( lib,
'/' ) + 1 ) !=
'\0' ) {
1161 strcpy( simple_lib, strrchr( lib,
'/' ) + 1 );
1163 strcpy( simple_lib, lib );
1166 fprintf( module_file,
"<tr bgcolor=\"#FFFFCC\">\n" );
1168 fprintf( module_file,
"<tr bgcolor=\"#CCFFCC\">\n" );
1170 fprintf( module_file,
"<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%u</td>\n", value );
1171 fprintf( module_file,
"<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%f%%</td>\n",
1177 fprintf( module_file,
"<td style=\"font-family:courier;\">%s</td>\n", simple_sym_mod );
1178 fprintf( module_file,
"<td style=\"font-family:courier;\">%s</td>\n", simple_lib_mod );
1179 fprintf( module_file,
"<td style=\"font-family:courier;\">%s</td>\n", sym_mod );
1180 fprintf( module_file,
"<td style=\"font-family:courier;\">%s</td>\n</tr>\n", lib_mod );
1182 fprintf( module_file,
"</table><br/><br/>\n" );
1183 int res = fclose( module_file );
1185 fprintf( stderr,
"ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1212 unsigned int module_num = 0;
1216 strcpy( path_name, dir );
1217 strcat( path_name,
"/" );
1219 gzFile res_file = gzopen( path_name,
"rb" );
1221 if ( res_file != NULL ) {
1226 sscanf(
line,
"%s %s %u %u %u", arch, event, &cmask, &inv, &
sp );
1227 if ( !strcmp( arch,
"NHM" ) )
1234 if ( strchr(
line,
' ' ) == NULL )
1236 if ( module_num > 0 ) {
1238 cur_module->
clear();
1241 char* end_sym = strchr(
line,
'%' );
1242 if ( end_sym == NULL )
1244 fprintf( stderr,
"ERROR: Invalid module name. \nLINE: %s\naborting...\n",
line );
1248 memcpy( cur_module_name,
line, strlen(
line ) - strlen( end_sym ) );
1249 cur_module->
init( cur_module_name, arch, event, cmask, inv,
sp );
1250 cur_module->
set_total( atoi( end_sym + 1 ) );
1254 unsigned int value = 0, libOffset = 0;
1264 sscanf(
line,
"%s %s %u %u", symbol, libName, &libOffset, &value );
1265 char realPathName_s[FILENAME_MAX];
1266 bzero( realPathName_s, FILENAME_MAX );
1267 char* realPathName = realpath( libName, realPathName_s );
1268 if ( realPathName != NULL && strlen( realPathName ) > 0 ) {
1270 result = libsInfo.find( realPathName );
1271 if ( result == libsInfo.end() ) { libsInfo[realPathName] =
FileInfo( realPathName,
true ); }
1272 const char* temp_sym = libsInfo[realPathName].symbolByOffset( libOffset );
1273 if ( temp_sym != NULL && strlen( temp_sym ) > 0 ) {
1275 char* demangled_symbol = abi::__cxa_demangle( temp_sym, NULL, NULL, &status );
1276 if ( status == 0 ) {
1277 strcpy( final_sym, demangled_symbol );
1278 free( demangled_symbol );
1280 strcpy( final_sym, temp_sym );
1283 strcpy( final_sym,
"???" );
1285 strcpy( final_lib, realPathName );
1287 strcpy( final_sym, symbol );
1288 strcpy( final_lib, libName );
1292 strcpy( index, final_sym );
1293 strcat( index,
"%" );
1294 strcat( index, final_lib );
1300 cur_module->
clear();
1301 gzclose( res_file );
1304 fprintf( stderr,
"ERROR: Unable to open input file: %s\naborting...\n",
filename );
1326 strcpy( path_name, dir );
1327 strcat( path_name,
"/" );
1329 gzFile res_file = gzopen( path_name,
"rb" );
1330 if ( res_file != NULL ) {
1335 sscanf(
line,
"%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str );
1337 if ( atoi( cmask_str ) > 0 ) {
1338 event_str +=
" CMASK=";
1339 event_str += cmask_str;
1341 if ( atoi( inv_str ) > 0 ) {
1342 event_str +=
" INV=";
1343 event_str += inv_str;
1348 fprintf( stderr,
"ERROR: Unable to open input file: %s\naborting...\n",
filename );
1351 gzclose( res_file );
1360 i != modules_tot_samples.
end(); i++ ) {
1362 strcpy( module_filename, dir );
1363 strcat( module_filename,
"/HTML/" );
1364 strcat( module_filename, ( i->first ).c_str() );
1365 strcat( module_filename,
".html" );
1366 FILE* module_file = fopen( module_filename,
"a" );
1367 if ( module_file == NULL ) {
1368 fprintf( stderr,
"ERROR: Unable to append to file: %s\naborting...\n", module_filename );
1371 fprintf( module_file,
"</body>\n</html>\n" );
1372 if ( fclose( module_file ) ) {
1373 fprintf( stderr,
"ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1400 int number_of_modules = 0;
1402 int no_of_values = 0;
1405 strcpy( path_name, dir );
1406 strcat( path_name,
"/" );
1408 FILE* fp = fopen( path_name,
"r" );
1409 int stat = fscanf( fp,
"%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str );
1414 if ( !strcmp( arch,
"NHM" ) )
1419 if ( atoi( cmask_str ) > 0 ) {
1420 event_str +=
" CMASK=";
1421 event_str += cmask_str;
1423 if ( atoi( inv_str ) > 0 ) {
1424 event_str +=
" INV=";
1425 event_str += inv_str;
1428 while ( fscanf( fp,
"%s\n",
line ) != EOF ) {
1429 if ( isalpha(
line[0] ) )
1431 if ( number_of_modules > 0 ) {
1432 C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values;
1436 strcpy( cur_module_name,
line );
1437 number_of_modules++;
1438 }
else if ( isdigit(
line[0] ) )
1440 cur_sum += strtol(
line, NULL, 10 );
1444 C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values;
1446 return number_of_modules;
1452 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1453 fprintf( fp,
"<html>\n" );
1454 fprintf( fp,
"<head>\n" );
1455 fprintf( fp,
"<title>\n" );
1456 fprintf( fp,
"Analysis Result\n" );
1457 fprintf( fp,
"</title>\n" );
1458 fprintf( fp,
"<script src=\"sorttable.js\"></script>\n" );
1459 fprintf( fp,
"<style>\ntable.sortable thead "
1460 "{\nbackground-color:#eee;\ncolor:#666666;\nfont-weight:bold;\ncursor:default;\nfont-family:courier;\n}"
1462 fprintf( fp,
"</head>\n" );
1463 fprintf( fp,
"<body link=\"black\">\n" );
1464 fprintf( fp,
"<h1>RESULTS:</h1><br/>Click for detailed symbol view...<p/>\n" );
1465 fprintf( fp,
"<table class=\"sortable\" cellpadding=\"5\">\n" );
1466 fprintf( fp,
"<tr>\n" );
1467 fprintf( fp,
"<th>MODULE NAME</th>\n" );
1469 if ( strlen( it->c_str() ) == 0 )
1470 fprintf( fp,
"<th bgcolor=\"#FFFFFF\"> </th>\n" );
1472 fprintf( fp,
"<th>%s</th>\n", ( *it ).c_str() );
1474 fprintf( fp,
"</tr>\n" );
1483 fprintf( fp,
"<tr bgcolor=\"#FFFFCC\">\n" );
1485 fprintf( fp,
"<tr bgcolor=\"#CCFFCC\">\n" );
1486 fprintf( fp,
"<td style=\"font-family:monospace;font-size:large;color:Black\"><a href=\"%s.html\">%s</a></td>\n",
1487 ( it->first ).c_str(), ( it->first ).c_str() );
1489 if ( strlen( jt->c_str() ) == 0 ) {
1490 fprintf( fp,
"<td bgcolor=\"#FFFFFF\"> </td>" );
1492 if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1493 fprintf( stderr,
"ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1496 fprintf( fp,
"<td style=\"font-family:monospace;font-size:large;color:DarkBlue\" align=\"right\">%.2f</td>\n",
1497 ( it->second )[*jt] );
1500 fprintf( fp,
"</tr>\n" );
1506 fprintf( fp,
"</table>\n</body>\n</html>\n" );
1511 fprintf( fp,
"MODULE NAME" );
1513 if ( strlen( it->c_str() ) == 0 ) {
1515 fprintf( fp,
",%s", ( *it ).c_str() );
1517 fprintf( fp,
"\n" );
1524 fprintf( fp,
"%s", ( it->first ).c_str() );
1526 if ( strlen( jt->c_str() ) == 0 ) {
1528 if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1529 fprintf( stderr,
"ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1532 fprintf( fp,
",%.2f", ( it->second )[*jt] );
1535 fprintf( fp,
"\n" );
1548 double counter_value;
1551 counter_value = ( it->second )[field];
1552 if (
max < counter_value )
max = counter_value;
1554 if ( value > 0 &&
max > 0 && normalizeTo > 0 ) {
1555 return 1. * value /
max * normalizeTo;
1570 1. -
normalize(
"Packed % of all UOPS Retired", ( it->second )[
"Packed % of all UOPS Retired"], 1 );
1571 double misspnorm =
normalize(
"% of Mispredicted Branches", ( it->second )[
"% of Mispredicted Branches"], 1 );
1572 double stallnorm =
normalize(
"Stalled Cycles", ( it->second )[
"Stalled Cycles"], 1 );
1573 ( it->second )[
"iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1579 1. -
normalize(
"Packed SIMD % of all Instructions", ( it->second )[
"Packed SIMD % of all Instructions"], 1 );
1580 double misspnorm =
normalize(
"% of Mispredicted Branches", ( it->second )[
"% of Mispredicted Branches"], 1 );
1581 double stallnorm =
normalize(
"Stalled Cycles", ( it->second )[
"Stalled Cycles"], 1 );
1582 ( it->second )[
"iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1596 sum += ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"];
1601 sum += ( it->second )[
"UNHALTED_CORE_CYCLES"];
1611 if ( argc < 2 || argc > 4 ) {
1612 printf(
"\n\nUsage: %s DIRECTORY [--caa] [--csv]\n\n",
argv[0] );
1618 for (
int i = 2; i <
argc; i++ ) {
1619 if ( !strcmp(
argv[i],
"--caa" ) ) caa =
true;
1620 if ( !strcmp(
argv[i],
"--csv" ) ) csv =
true;
1624 strcpy( dir,
argv[1] );
1626 strcat( dir,
"/HTML" );
1627 int res = mkdir( dir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH );
1629 fprintf( stderr,
"ERROR: Cannot create directory %s\naborting...\n", dir );
1635 struct dirent* dirp;
1636 int num_of_modules = 0;
1637 if ( ( dp = opendir(
argv[1] ) ) == NULL ) {
1638 printf(
"Error(%d) opening %s\n", errno,
argv[1] );
1641 while ( ( dirp = readdir( dp ) ) != NULL ) {
1642 if ( strstr( dirp->d_name,
"_S_" ) != NULL && strstr( dirp->d_name,
".txt.gz" ) != NULL && !csv ) {
1644 fprintf( stderr,
"ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1650 sort( S_events.
begin(), S_events.
end() );
1651 if ( ( dp = opendir(
argv[1] ) ) == NULL ) {
1652 printf(
"Error(%d) opening %s\n", errno,
argv[1] );
1655 while ( ( dirp = readdir( dp ) ) != NULL ) {
1656 if ( strstr( dirp->d_name,
"_S_" ) != NULL && strstr( dirp->d_name,
".txt.gz" ) != NULL && !csv ) {
1658 fprintf( stderr,
"ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1661 }
else if ( strstr( dirp->d_name,
"_C_" ) != NULL && strstr( dirp->d_name,
".txt" ) != NULL ) {
1663 if ( res > num_of_modules ) { num_of_modules = res; }
1670 fprintf( stderr,
"ERROR: Cannot finalize HTML pages!!!\naborting...\n" );
1678 sprintf( filepath,
"%s/HTML/index.html",
argv[1] );
1680 sprintf( filepath,
"%s/results.csv",
argv[1] );
1681 FILE* fp = fopen( filepath,
"w" );
1683 fprintf( stderr,
"ERROR: Cannot create file index.html!!!\naborting...\n" );
1692 fprintf( stderr,
"(core) ERROR: One or more events for CAA missing!\naborting...\n" );
1709 fprintf( stderr,
"(nehalem) ERROR: One or more events for CAA missing!\naborting...\n" );
1740 sprintf( src,
"sorttable.js" );
1741 sprintf(
dst,
"%s/HTML/sorttable.js",
argv[1] );
1742 int fd_src = open( src, O_RDONLY );
1743 if ( fd_src == -1 ) {
1744 fprintf( stderr,
"ERROR: Cannot open file \"%s\"!\naborting...\n", src );
1747 int fd_dst = open(
dst, O_WRONLY | O_CREAT | O_TRUNC, 0644 );
1748 if ( fd_dst == -1 ) {
1749 fprintf( stderr,
"ERROR: Cannot open file \"%s\" (%s)!\naborting...\n",
dst, strerror( errno ) );
1753 while (
read( fd_src, &
c, 1 ) ) {
1754 if ( write( fd_dst, &
c, 1 ) == -1 ) {