33 #include <sys/types.h>
50 #define CORE_L2_MISS_CYCLES 200
51 #define CORE_L2_HIT_CYCLES 14.5
52 #define CORE_L1_DTLB_MISS_CYCLES 10
53 #define CORE_LCP_STALL_CYCLES 6
54 #define CORE_UNKNOWN_ADDR_STORE_CYCLES 5
55 #define CORE_OVERLAPPING_CYCLES 6
56 #define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES 20
59 #define I7_L1_DTLB_WALK_COMPLETED_CYCLES 35
60 #define I7_L1_ITLB_WALK_COMPLETED_CYCLES 35
61 #define I7_L2_HIT_CYCLES 6
62 #define I7_L3_UNSHARED_HIT_CYCLES 35
63 #define I7_OTHER_CORE_L2_HIT_CYCLES 60
64 #define I7_OTHER_CORE_L2_HITM_CYCLES 75
65 #define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES 225 // average of 200 (not modified) and 225-250 (modified)
66 #define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES 360 // average of 350 (not modified) and 370 (modified)
67 #define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES 180
68 #define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT 200
69 #define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT 350
70 #define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP 35
71 #define I7_IFETCH_L2_MISS_L3_HIT_SNOOP 60
72 #define I7_IFETCH_L2_MISS_L3_HITM 75
73 #define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD 180
75 #define MAX_MODULES 1000
77 #define EXPECTED_CPI 0.25
79 #define MAX_FILENAME_LENGTH 1024
80 #define MAX_SAMPLE_INDEX_LENGTH 10000
81 #define MAX_SYM_LENGTH 15000
82 #define MAX_SYM_MOD_LENGTH 20000
83 #define MAX_LIB_LENGTH 5000
84 #define MAX_LIB_MOD_LENGTH 7000
85 #define MAX_SIMPLE_SYM_LENGTH 300
86 #define MAX_SIMPLE_SYM_MOD_LENGTH 500
87 #define MAX_SIMPLE_LIB_LENGTH 300
88 #define MAX_SIMPLE_LIB_MOD_LENGTH 500
89 #define MAX_LINE_LENGTH 20000
90 #define MAX_EVENT_NAME_LENGTH 150
91 #define MAX_MODULE_NAME_LENGTH 250
92 #define MAX_VALUE_STRING_LENGTH 250
93 #define MAX_ARCH_NAME_LENGTH 20
94 #define MAX_CMASK_STR_LENGTH 5
95 #define MAX_INV_STR_LENGTH 5
96 #define MAX_SP_STR_LENGTH 50
98 #define PIPE_BUFFER_LENGTH 1000
103 pipe = popen( cmd,
"r" );
105 printf(
"Cannot open pipe. Exiting...\n" );
111 while ( !feof(
pipe ) ) {
115 iss = std::make_unique<std::istringstream>( result, std::istringstream::in );
132 if ( !isspace( *srcbuffer++ ) ) {
return false; }
133 while ( isspace( *srcbuffer ) ) { srcbuffer++; }
134 *destbuffer = srcbuffer;
145 bool skipString(
const char* strptr,
const char* srcbuffer,
const char** dstbuffer ) {
146 if ( strncmp( srcbuffer, strptr, strlen( strptr ) ) ) {
return false; }
147 *dstbuffer = srcbuffer + strlen( strptr );
164 if ( i->OFFSET == offset ) {
return i->NAME.c_str(); }
170 return i->NAME.c_str();
200 bool matched =
false;
201 while ( objdump.
output() ) {
211 if ( !objdump.
output() )
break;
212 if (
line.empty() )
continue;
213 const char* lineptr =
line.c_str();
215 if ( !
skipString(
"LOAD", lineptr, &lineptr ) )
continue;
217 if ( !
skipString(
"off", lineptr, &lineptr ) )
continue;
219 int initialBase = strtol( lineptr, &endptr, 16 );
220 if ( lineptr == endptr )
continue;
223 if ( !
skipString(
"vaddr", lineptr, &lineptr ) )
continue;
225 int finalBase = strtol( lineptr, &endptr, 16 );
226 if ( lineptr == endptr )
continue;
227 vmbase = finalBase - initialBase;
232 fprintf(
stderr,
"Cannot determine VM base address for %s\n",
NAME.
c_str() );
233 fprintf(
stderr,
"Error while running `objdump -p %s`\n",
NAME.
c_str() );
238 while (
nm.output() ) {
241 if ( !
nm.output() )
break;
242 if (
line.empty() )
continue;
246 int address = strtol(
begin, &endptr, 10 );
247 if ( endptr ==
begin )
continue;
248 if ( *endptr++ !=
' ' )
continue;
249 if ( isspace( *endptr++ ) )
continue;
250 if ( *endptr++ !=
' ' )
continue;
251 char* symbolName = endptr;
252 while ( *endptr && !isspace( *endptr ) ) endptr++;
253 if ( *endptr != 0 )
continue;
255 if ( symbolName[0] ==
'.' )
continue;
260 int offset = address - vmbase;
283 core_caa_events.
push_back(
"BRANCH_INSTRUCTIONS_RETIRED" );
284 core_caa_events.
push_back(
"ILD_STALL" );
285 core_caa_events.
push_back(
"INST_RETIRED:LOADS" );
286 core_caa_events.
push_back(
"INST_RETIRED:OTHER" );
287 core_caa_events.
push_back(
"INST_RETIRED:STORES" );
288 core_caa_events.
push_back(
"INSTRUCTIONS_RETIRED" );
289 core_caa_events.
push_back(
"LOAD_BLOCK:OVERLAP_STORE" );
290 core_caa_events.
push_back(
"LOAD_BLOCK:STA" );
291 core_caa_events.
push_back(
"LOAD_BLOCK:UNTIL_RETIRE" );
292 core_caa_events.
push_back(
"MEM_LOAD_RETIRED:DTLB_MISS" );
293 core_caa_events.
push_back(
"MEM_LOAD_RETIRED:L1D_LINE_MISS" );
294 core_caa_events.
push_back(
"MEM_LOAD_RETIRED:L2_LINE_MISS" );
295 core_caa_events.
push_back(
"MISPREDICTED_BRANCH_RETIRED" );
298 core_caa_events.
push_back(
"RS_UOPS_DISPATCHED CMASK=1 INV=1" );
299 core_caa_events.
push_back(
"SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE" );
300 core_caa_events.
push_back(
"UNHALTED_CORE_CYCLES" );
307 nhm_caa_events.
push_back(
"ARITH:CYCLES_DIV_BUSY" );
308 nhm_caa_events.
push_back(
"BR_INST_EXEC:ANY" );
309 nhm_caa_events.
push_back(
"BR_INST_EXEC:DIRECT_NEAR_CALL" );
310 nhm_caa_events.
push_back(
"BR_INST_EXEC:INDIRECT_NEAR_CALL" );
311 nhm_caa_events.
push_back(
"BR_INST_EXEC:INDIRECT_NON_CALL" );
312 nhm_caa_events.
push_back(
"BR_INST_EXEC:NEAR_CALLS" );
313 nhm_caa_events.
push_back(
"BR_INST_EXEC:NON_CALLS" );
314 nhm_caa_events.
push_back(
"BR_INST_EXEC:RETURN_NEAR" );
315 nhm_caa_events.
push_back(
"BR_INST_RETIRED:ALL_BRANCHES" );
316 nhm_caa_events.
push_back(
"BR_INST_RETIRED:CONDITIONAL" );
317 nhm_caa_events.
push_back(
"BR_INST_RETIRED:NEAR_CALL" );
318 nhm_caa_events.
push_back(
"BR_MISP_EXEC:ANY" );
319 nhm_caa_events.
push_back(
"CPU_CLK_UNHALTED:THREAD_P" );
320 nhm_caa_events.
push_back(
"DTLB_LOAD_MISSES:WALK_COMPLETED" );
321 nhm_caa_events.
push_back(
"INST_RETIRED:ANY_P" );
322 nhm_caa_events.
push_back(
"ITLB_MISSES:WALK_COMPLETED" );
323 nhm_caa_events.
push_back(
"L2_RQSTS:IFETCH_HIT" );
324 nhm_caa_events.
push_back(
"L2_RQSTS:IFETCH_MISS" );
325 nhm_caa_events.
push_back(
"MEM_INST_RETIRED:LOADS" );
326 nhm_caa_events.
push_back(
"MEM_INST_RETIRED:STORES" );
327 nhm_caa_events.
push_back(
"MEM_LOAD_RETIRED:L2_HIT" );
328 nhm_caa_events.
push_back(
"MEM_LOAD_RETIRED:L3_MISS" );
329 nhm_caa_events.
push_back(
"MEM_LOAD_RETIRED:L3_UNSHARED_HIT" );
330 nhm_caa_events.
push_back(
"MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM" );
331 nhm_caa_events.
push_back(
"MEM_UNCORE_RETIRED:LOCAL_DRAM" );
332 nhm_caa_events.
push_back(
"MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM" );
333 nhm_caa_events.
push_back(
"MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT" );
334 nhm_caa_events.
push_back(
"MEM_UNCORE_RETIRED:REMOTE_DRAM" );
335 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM" );
336 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM" );
337 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP" );
338 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD" );
339 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM" );
340 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT" );
341 nhm_caa_events.
push_back(
"RESOURCE_STALLS:ANY" );
342 nhm_caa_events.
push_back(
"SSEX_UOPS_RETIRED:PACKED_DOUBLE" );
343 nhm_caa_events.
push_back(
"SSEX_UOPS_RETIRED:PACKED_SINGLE" );
344 nhm_caa_events.
push_back(
"UOPS_DECODED:MS CMASK=1" );
345 nhm_caa_events.
push_back(
"UOPS_ISSUED:ANY CMASK=1 INV=1" );
346 nhm_caa_events.
push_back(
"ITLB_MISS_RETIRED" );
347 nhm_caa_events.
push_back(
"UOPS_RETIRED:ANY" );
352 if ( find( C_events.
begin(), C_events.
end(), ( *it ) ) == C_events.
end() ) {
353 fprintf(
stderr,
"ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
362 if ( find( C_events.
begin(), C_events.
end(), ( *it ) ) == C_events.
end() ) {
363 fprintf(
stderr,
"ERROR: Cannot find event %s!!!\naborting...\n", ( *it ).c_str() );
371 core_caa_events_displ.
push_back(
"Total Cycles" );
372 core_caa_events_displ.
push_back(
"Stalled Cycles" );
373 core_caa_events_displ.
push_back(
"% of Total Cycles" );
374 core_caa_events_displ.
push_back(
"Instructions Retired" );
375 core_caa_events_displ.
push_back(
"CPI" );
377 core_caa_events_displ.
push_back(
"iMargin" );
378 core_caa_events_displ.
push_back(
"iFactor" );
380 core_caa_events_displ.
push_back(
"Counted Stalled Cycles" );
382 core_caa_events_displ.
push_back(
"L2 Miss Impact" );
383 core_caa_events_displ.
push_back(
"L2 Miss % of counted Stalled Cycles" );
385 core_caa_events_displ.
push_back(
"L2 Hit Impact" );
386 core_caa_events_displ.
push_back(
"L2 Hit % of counted Stalled Cycles" );
388 core_caa_events_displ.
push_back(
"L1 DTLB Miss Impact" );
389 core_caa_events_displ.
push_back(
"L1 DTLB Miss % of counted Stalled Cycles" );
391 core_caa_events_displ.
push_back(
"LCP Stalls Impact" );
392 core_caa_events_displ.
push_back(
"LCP Stalls % of counted Stalled Cycles" );
394 core_caa_events_displ.
push_back(
"Store-Fwd Stalls Impact" );
395 core_caa_events_displ.
push_back(
"Store-Fwd Stalls % of counted Stalled Cycles" );
397 core_caa_events_displ.
push_back(
"Loads Blocked by Unknown Address Store Impact" );
398 core_caa_events_displ.
push_back(
"Loads Blocked % of Store-Fwd Stalls Cycles" );
399 core_caa_events_displ.
push_back(
"Loads Overlapped with Stores Impact" );
400 core_caa_events_displ.
push_back(
"Loads Overlapped % of Store-Fwd Stalls Cycles" );
401 core_caa_events_displ.
push_back(
"Loads Spanning across Cache Lines Impact" );
402 core_caa_events_displ.
push_back(
"Loads Spanning % of Store-Fwd Stalls Cycles" );
404 core_caa_events_displ.
push_back(
"Load Instructions" );
405 core_caa_events_displ.
push_back(
"Load % of all Instructions" );
406 core_caa_events_displ.
push_back(
"Store Instructions" );
407 core_caa_events_displ.
push_back(
"Store % of all Instructions" );
408 core_caa_events_displ.
push_back(
"Branch Instructions" );
409 core_caa_events_displ.
push_back(
"Branch % of all Instructions" );
410 core_caa_events_displ.
push_back(
"Packed SIMD Computational Instructions" );
411 core_caa_events_displ.
push_back(
"Packed SIMD % of all Instructions" );
412 core_caa_events_displ.
push_back(
"Other Instructions" );
413 core_caa_events_displ.
push_back(
"Other % of all Instructions" );
415 core_caa_events_displ.
push_back(
"ITLB Miss Rate in %" );
416 core_caa_events_displ.
push_back(
"% of Mispredicted Branches" );
422 ( it->second )[
"Total Cycles"] = ( it->second )[
"UNHALTED_CORE_CYCLES"];
423 ( it->second )[
"Stalled Cycles"] = ( it->second )[
"RS_UOPS_DISPATCHED CMASK=1 INV=1"];
424 ( it->second )[
"L2 Miss Impact"] = ( it->second )[
"MEM_LOAD_RETIRED:L2_LINE_MISS"] *
CORE_L2_MISS_CYCLES;
425 ( it->second )[
"L2 Hit Impact"] =
426 ( ( it->second )[
"MEM_LOAD_RETIRED:L1D_LINE_MISS"] - ( it->second )[
"MEM_LOAD_RETIRED:L2_LINE_MISS"] ) *
430 ( it->second )[
"Loads Blocked by Unknown Address Store Impact"] =
432 ( it->second )[
"Loads Overlapped with Stores Impact"] =
434 ( it->second )[
"Loads Spanning across Cache Lines Impact"] =
436 ( it->second )[
"Store-Fwd Stalls Impact"] = ( it->second )[
"Loads Blocked by Unknown Address Store Impact"] +
437 ( it->second )[
"Loads Overlapped with Stores Impact"] +
438 ( it->second )[
"Loads Spanning across Cache Lines Impact"];
439 ( it->second )[
"Counted Stalled Cycles"] =
440 ( it->second )[
"L2 Miss Impact"] + ( it->second )[
"L2 Hit Impact"] + ( it->second )[
"LCP Stalls Impact"] +
441 ( it->second )[
"L1 DTLB Miss Impact"] + ( it->second )[
"Store-Fwd Stalls Impact"];
442 ( it->second )[
"Instructions Retired"] = ( it->second )[
"INSTRUCTIONS_RETIRED"];
443 ( it->second )[
"ITLB Miss Rate in %"] =
444 ( ( it->second )[
"ITLB_MISS_RETIRED"] / ( it->second )[
"INSTRUCTIONS_RETIRED"] ) * 100;
445 ( it->second )[
"Branch Instructions"] = ( it->second )[
"BRANCH_INSTRUCTIONS_RETIRED"];
446 ( it->second )[
"Load Instructions"] = ( it->second )[
"INST_RETIRED:LOADS"];
447 ( it->second )[
"Store Instructions"] = ( it->second )[
"INST_RETIRED:STORES"];
448 ( it->second )[
"Other Instructions"] = ( it->second )[
"INST_RETIRED:OTHER"] -
449 ( it->second )[
"SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] -
450 ( it->second )[
"BRANCH_INSTRUCTIONS_RETIRED"];
451 ( it->second )[
"% of Mispredicted Branches"] =
452 ( ( it->second )[
"MISPREDICTED_BRANCH_RETIRED"] / ( it->second )[
"BRANCH_INSTRUCTIONS_RETIRED"] ) * 100;
453 ( it->second )[
"Packed SIMD Computational Instructions"] =
454 ( it->second )[
"SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"];
455 ( it->second )[
"Counted Instructions Retired"] =
456 ( it->second )[
"Branch Instructions"] + ( it->second )[
"Load Instructions"] +
457 ( it->second )[
"Store Instructions"] + ( it->second )[
"Other Instructions"] +
458 ( it->second )[
"Packed SIMD Computational Instructions"];
459 ( it->second )[
"CPI"] = ( it->second )[
"UNHALTED_CORE_CYCLES"] / ( it->second )[
"INSTRUCTIONS_RETIRED"];
461 double localPerformanceImprovement = ( it->second )[
"CPI"] /
EXPECTED_CPI;
462 double cyclesAfterImprovement = ( it->second )[
"UNHALTED_CORE_CYCLES"] / localPerformanceImprovement;
463 double totalCyclesAfterImprovement = totalCycles - ( it->second )[
"UNHALTED_CORE_CYCLES"] + cyclesAfterImprovement;
464 ( it->second )[
"iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
466 ( it->second )[
"% of Total Cycles"] =
467 ( it->second )[
"RS_UOPS_DISPATCHED CMASK=1 INV=1"] * 100 / ( it->second )[
"UNHALTED_CORE_CYCLES"];
468 ( it->second )[
"L2 Miss % of counted Stalled Cycles"] =
469 ( it->second )[
"L2 Miss Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles"];
470 ( it->second )[
"L2 Hit % of counted Stalled Cycles"] =
471 ( it->second )[
"L2 Hit Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles"];
472 ( it->second )[
"L1 DTLB Miss % of counted Stalled Cycles"] =
473 ( it->second )[
"L1 DTLB Miss Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles"];
474 ( it->second )[
"LCP Stalls % of counted Stalled Cycles"] =
475 ( it->second )[
"LCP Stalls Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles"];
476 ( it->second )[
"Store-Fwd Stalls % of counted Stalled Cycles"] =
477 ( it->second )[
"Store-Fwd Stalls Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles"];
478 ( it->second )[
"Loads Blocked % of Store-Fwd Stalls Cycles"] =
479 ( it->second )[
"Loads Blocked by Unknown Address Store Impact"] * 100 /
480 ( it->second )[
"Store-Fwd Stalls Impact"];
481 ( it->second )[
"Loads Overlapped % of Store-Fwd Stalls Cycles"] =
482 ( it->second )[
"Loads Overlapped with Stores Impact"] * 100 / ( it->second )[
"Store-Fwd Stalls Impact"];
483 ( it->second )[
"Loads Spanning % of Store-Fwd Stalls Cycles"] =
484 ( it->second )[
"Loads Spanning across Cache Lines Impact"] * 100 / ( it->second )[
"Store-Fwd Stalls Impact"];
486 ( it->second )[
"Load % of all Instructions"] =
487 ( it->second )[
"INST_RETIRED:LOADS"] * 100 / ( it->second )[
"Counted Instructions Retired"];
488 ( it->second )[
"Store % of all Instructions"] =
489 ( it->second )[
"INST_RETIRED:STORES"] * 100 / ( it->second )[
"Counted Instructions Retired"];
490 ( it->second )[
"Branch % of all Instructions"] =
491 ( it->second )[
"BRANCH_INSTRUCTIONS_RETIRED"] * 100 / ( it->second )[
"Counted Instructions Retired"];
492 ( it->second )[
"Packed SIMD % of all Instructions"] =
493 ( it->second )[
"SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] * 100 /
494 ( it->second )[
"Counted Instructions Retired"];
495 ( it->second )[
"Other % of all Instructions"] =
496 ( it->second )[
"Other Instructions"] * 100 / ( it->second )[
"Counted Instructions Retired"];
501 nhm_caa_events_displ.
push_back(
"Total Cycles" );
502 nhm_caa_events_displ.
push_back(
"Instructions Retired" );
505 nhm_caa_events_displ.
push_back(
"iMargin" );
506 nhm_caa_events_displ.
push_back(
"iFactor" );
508 nhm_caa_events_displ.
push_back(
"Stalled Cycles" );
509 nhm_caa_events_displ.
push_back(
"% of Total Cycles" );
510 nhm_caa_events_displ.
push_back(
"Total Counted Stalled Cycles" );
512 nhm_caa_events_displ.
push_back(
"Instruction Starvation % of Total Cycles" );
513 nhm_caa_events_displ.
push_back(
"# of Instructions per Call" );
514 nhm_caa_events_displ.
push_back(
"% of Total Cycles spent handling FP exceptions" );
516 nhm_caa_events_displ.
push_back(
"Counted Stalled Cycles due to Load Ops" );
518 nhm_caa_events_displ.
push_back(
"L2 Hit Impact" );
519 nhm_caa_events_displ.
push_back(
"L2 Hit % of Load Stalls" );
521 nhm_caa_events_displ.
push_back(
"L3 Unshared Hit Impact" );
522 nhm_caa_events_displ.
push_back(
"L3 Unshared Hit % of Load Stalls" );
524 nhm_caa_events_displ.
push_back(
"L2 Other Core Hit Impact" );
525 nhm_caa_events_displ.
push_back(
"L2 Other Core Hit % of Load Stalls" );
527 nhm_caa_events_displ.
push_back(
"L2 Other Core Hit Modified Impact" );
528 nhm_caa_events_displ.
push_back(
"L2 Other Core Hit Modified % of Load Stalls" );
530 nhm_caa_events_displ.
push_back(
"L3 Miss -> Local DRAM Hit Impact" );
531 nhm_caa_events_displ.
push_back(
"L3 Miss -> Remote DRAM Hit Impact" );
532 nhm_caa_events_displ.
push_back(
"L3 Miss -> Remote Cache Hit Impact" );
533 nhm_caa_events_displ.
push_back(
"L3 Miss -> Total Impact" );
534 nhm_caa_events_displ.
push_back(
"L3 Miss % of Load Stalls" );
536 nhm_caa_events_displ.
push_back(
"L1 DTLB Miss Impact" );
537 nhm_caa_events_displ.
push_back(
"L1 DTLB Miss % of Load Stalls" );
539 nhm_caa_events_displ.
push_back(
"Cycles spent during DIV & SQRT Ops" );
540 nhm_caa_events_displ.
push_back(
"DIV & SQRT Ops % of counted Stalled Cycles" );
542 nhm_caa_events_displ.
push_back(
"Total L2 IFETCH misses" );
543 nhm_caa_events_displ.
push_back(
"% of L2 IFETCH misses" );
545 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by Local DRAM" );
546 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by L3 (Modified)" );
547 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by L3 (Clean Snoop)" );
548 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by Remote L2" );
549 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by Remote DRAM" );
550 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by L3 (No Snoop)" );
552 nhm_caa_events_displ.
push_back(
"Total L2 IFETCH miss Impact" );
554 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by Local DRAM" );
555 nhm_caa_events_displ.
push_back(
"Local DRAM IFECTHes % Impact" );
557 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by L3 (Modified)" );
558 nhm_caa_events_displ.
push_back(
"L3 (Modified) IFECTHes % Impact" );
560 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by L3 (Clean Snoop)" );
561 nhm_caa_events_displ.
push_back(
"L3 (Clean Snoop) IFECTHes % Impact" );
563 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by Remote L2" );
564 nhm_caa_events_displ.
push_back(
"Remote L2 IFECTHes % Impact" );
566 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by Remote DRAM" );
567 nhm_caa_events_displ.
push_back(
"Remote DRAM IFECTHes % Impact" );
569 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by L3 (No Snoop)" );
570 nhm_caa_events_displ.
push_back(
"L3 (No Snoop) IFECTHes % Impact" );
572 nhm_caa_events_displ.
push_back(
"Total Branch Instructions Executed" );
573 nhm_caa_events_displ.
push_back(
"% of Mispredicted Branches" );
575 nhm_caa_events_displ.
push_back(
"Direct Near Calls % of Total Branches Executed" );
576 nhm_caa_events_displ.
push_back(
"Indirect Near Calls % of Total Branches Executed" );
577 nhm_caa_events_displ.
push_back(
"Indirect Near Non-Calls % of Total Branches Executed" );
578 nhm_caa_events_displ.
push_back(
"All Near Calls % of Total Branches Executed" );
579 nhm_caa_events_displ.
push_back(
"All Non Calls % of Total Branches Executed" );
580 nhm_caa_events_displ.
push_back(
"All Returns % of Total Branches Executed" );
582 nhm_caa_events_displ.
push_back(
"Total Branch Instructions Retired" );
583 nhm_caa_events_displ.
push_back(
"Conditionals % of Total Branches Retired" );
584 nhm_caa_events_displ.
push_back(
"Near Calls % of Total Branches Retired" );
586 nhm_caa_events_displ.
push_back(
"L1 ITLB Miss Impact" );
587 nhm_caa_events_displ.
push_back(
"ITLB Miss Rate in %" );
589 nhm_caa_events_displ.
push_back(
"Branch Instructions" );
590 nhm_caa_events_displ.
push_back(
"Branch % of all Instructions" );
592 nhm_caa_events_displ.
push_back(
"Load Instructions" );
593 nhm_caa_events_displ.
push_back(
"Load % of all Instructions" );
595 nhm_caa_events_displ.
push_back(
"Store Instructions" );
596 nhm_caa_events_displ.
push_back(
"Store % of all Instructions" );
598 nhm_caa_events_displ.
push_back(
"Other Instructions" );
599 nhm_caa_events_displ.
push_back(
"Other % of all Instructions" );
601 nhm_caa_events_displ.
push_back(
"Packed UOPS Retired" );
602 nhm_caa_events_displ.
push_back(
"Packed % of all UOPS Retired" );
608 ( it->second )[
"Total Cycles"] = ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"];
610 ( it->second )[
"L2 Hit Impact"] = ( it->second )[
"MEM_LOAD_RETIRED:L2_HIT"] *
I7_L2_HIT_CYCLES;
611 ( it->second )[
"L3 Unshared Hit Impact"] =
613 if ( ( it->second )[
"MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] >
614 ( it->second )[
"MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) {
615 ( it->second )[
"L2 Other Core Hit Impact"] = ( ( it->second )[
"MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] -
616 ( it->second )[
"MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] ) *
619 ( it->second )[
"L2 Other Core Hit Impact"] = 0.0;
621 ( it->second )[
"L2 Other Core Hit Modified Impact"] =
623 ( it->second )[
"L3 Miss -> Local DRAM Hit Impact"] =
625 ( it->second )[
"L3 Miss -> Remote DRAM Hit Impact"] =
627 ( it->second )[
"L3 Miss -> Remote Cache Hit Impact"] =
629 ( it->second )[
"L3 Miss -> Total Impact"] = ( it->second )[
"L3 Miss -> Local DRAM Hit Impact"] +
630 ( it->second )[
"L3 Miss -> Remote DRAM Hit Impact"] +
631 ( it->second )[
"L3 Miss -> Remote Cache Hit Impact"];
632 ( it->second )[
"L1 DTLB Miss Impact"] =
634 ( it->second )[
"Counted Stalled Cycles due to Load Ops"] =
635 ( it->second )[
"L3 Miss -> Total Impact"] + ( it->second )[
"L2 Hit Impact"] +
636 ( it->second )[
"L1 DTLB Miss Impact"] + ( it->second )[
"L3 Unshared Hit Impact"] +
637 ( it->second )[
"L2 Other Core Hit Modified Impact"] + ( it->second )[
"L2 Other Core Hit Impact"];
638 ( it->second )[
"Cycles spent during DIV & SQRT Ops"] = ( it->second )[
"ARITH:CYCLES_DIV_BUSY"];
639 ( it->second )[
"Total Counted Stalled Cycles"] =
640 ( it->second )[
"Counted Stalled Cycles due to Load Ops"] + ( it->second )[
"Cycles spent during DIV & SQRT Ops"];
641 ( it->second )[
"Stalled Cycles"] =
642 ( it->second )[
"Total Counted Stalled Cycles"];
643 ( it->second )[
"% of Total Cycles"] =
644 ( it->second )[
"Stalled Cycles"] * 100 / ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"];
645 ( it->second )[
"L3 Miss % of Load Stalls"] =
646 ( it->second )[
"L3 Miss -> Total Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
647 ( it->second )[
"L2 Hit % of Load Stalls"] =
648 ( it->second )[
"L2 Hit Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
649 ( it->second )[
"L1 DTLB Miss % of Load Stalls"] =
650 ( it->second )[
"L1 DTLB Miss Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
651 ( it->second )[
"L3 Unshared Hit % of Load Stalls"] =
652 ( it->second )[
"L3 Unshared Hit Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
653 ( it->second )[
"L2 Other Core Hit % of Load Stalls"] =
654 ( it->second )[
"L2 Other Core Hit Impact"] * 100 / ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
655 ( it->second )[
"L2 Other Core Hit Modified % of Load Stalls"] =
656 ( it->second )[
"L2 Other Core Hit Modified Impact"] * 100 /
657 ( it->second )[
"Counted Stalled Cycles due to Load Ops"];
658 ( it->second )[
"DIV & SQRT Ops % of counted Stalled Cycles"] =
659 ( it->second )[
"Cycles spent during DIV & SQRT Ops"] * 100 / ( it->second )[
"Total Counted Stalled Cycles"];
661 ( it->second )[
"Cycles IFETCH served by Local DRAM"] =
663 ( it->second )[
"Cycles IFETCH served by L3 (Modified)"] =
665 ( it->second )[
"Cycles IFETCH served by L3 (Clean Snoop)"] =
667 ( it->second )[
"Cycles IFETCH served by Remote L2"] =
669 ( it->second )[
"Cycles IFETCH served by Remote DRAM"] =
671 ( it->second )[
"Cycles IFETCH served by L3 (No Snoop)"] =
673 ( it->second )[
"Total L2 IFETCH miss Impact"] =
674 ( it->second )[
"Cycles IFETCH served by Local DRAM"] + ( it->second )[
"Cycles IFETCH served by L3 (Modified)"] +
675 ( it->second )[
"Cycles IFETCH served by L3 (Clean Snoop)"] +
676 ( it->second )[
"Cycles IFETCH served by Remote L2"] + ( it->second )[
"Cycles IFETCH served by Remote DRAM"] +
677 ( it->second )[
"Cycles IFETCH served by L3 (No Snoop)"];
678 ( it->second )[
"Local DRAM IFECTHes % Impact"] =
679 ( it->second )[
"Cycles IFETCH served by Local DRAM"] * 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
680 ( it->second )[
"L3 (Modified) IFECTHes % Impact"] =
681 ( it->second )[
"Cycles IFETCH served by L3 (Modified)"] * 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
682 ( it->second )[
"L3 (Clean Snoop) IFECTHes % Impact"] = ( it->second )[
"Cycles IFETCH served by L3 (Clean Snoop)"] *
683 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
684 ( it->second )[
"Remote L2 IFECTHes % Impact"] =
685 ( it->second )[
"Cycles IFETCH served by Remote L2"] * 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
686 ( it->second )[
"Remote DRAM IFECTHes % Impact"] =
687 ( it->second )[
"Cycles IFETCH served by Remote DRAM"] * 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
688 ( it->second )[
"L3 (No Snoop) IFECTHes % Impact"] =
689 ( it->second )[
"Cycles IFETCH served by L3 (No Snoop)"] * 100 / ( it->second )[
"Total L2 IFETCH miss Impact"];
690 ( it->second )[
"Total L2 IFETCH misses"] = ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
691 ( it->second )[
"% of IFETCHes served by Local DRAM"] =
692 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * 100 / ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
693 ( it->second )[
"% of IFETCHes served by L3 (Modified)"] =
694 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * 100 / ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
695 ( it->second )[
"% of IFETCHes served by L3 (Clean Snoop)"] =
696 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * 100 /
697 ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
698 ( it->second )[
"% of IFETCHes served by Remote L2"] =
699 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * 100 /
700 ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
701 ( it->second )[
"% of IFETCHes served by Remote DRAM"] =
702 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * 100 / ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
703 ( it->second )[
"% of IFETCHes served by L3 (No Snoop)"] =
704 ( it->second )[
"OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * 100 / ( it->second )[
"L2_RQSTS:IFETCH_MISS"];
705 ( it->second )[
"% of L2 IFETCH misses"] =
706 ( it->second )[
"L2_RQSTS:IFETCH_MISS"] * 100 /
707 ( ( it->second )[
"L2_RQSTS:IFETCH_MISS"] + ( it->second )[
"L2_RQSTS:IFETCH_HIT"] );
708 ( it->second )[
"L1 ITLB Miss Impact"] =
711 ( it->second )[
"Total Branch Instructions Executed"] = ( it->second )[
"BR_INST_EXEC:ANY"];
712 ( it->second )[
"% of Mispredicted Branches"] =
713 ( it->second )[
"BR_MISP_EXEC:ANY"] * 100 / ( it->second )[
"BR_INST_EXEC:ANY"];
714 ( it->second )[
"Direct Near Calls % of Total Branches Executed"] =
715 ( it->second )[
"BR_INST_EXEC:DIRECT_NEAR_CALL"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
716 ( it->second )[
"Indirect Near Calls % of Total Branches Executed"] =
717 ( it->second )[
"BR_INST_EXEC:INDIRECT_NEAR_CALL"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
718 ( it->second )[
"Indirect Near Non-Calls % of Total Branches Executed"] =
719 ( it->second )[
"BR_INST_EXEC:INDIRECT_NON_CALL"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
720 ( it->second )[
"All Near Calls % of Total Branches Executed"] =
721 ( it->second )[
"BR_INST_EXEC:NEAR_CALLS"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
722 ( it->second )[
"All Non Calls % of Total Branches Executed"] =
723 ( it->second )[
"BR_INST_EXEC:NON_CALLS"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
724 ( it->second )[
"All Returns % of Total Branches Executed"] =
725 ( it->second )[
"BR_INST_EXEC:RETURN_NEAR"] * 100 / ( it->second )[
"Total Branch Instructions Executed"];
726 ( it->second )[
"Total Branch Instructions Retired"] = ( it->second )[
"BR_INST_RETIRED:ALL_BRANCHES"];
727 ( it->second )[
"Conditionals % of Total Branches Retired"] =
728 ( it->second )[
"BR_INST_RETIRED:CONDITIONAL"] * 100 / ( it->second )[
"Total Branch Instructions Retired"];
729 ( it->second )[
"Near Calls % of Total Branches Retired"] =
730 ( it->second )[
"BR_INST_RETIRED:NEAR_CALL"] * 100 / ( it->second )[
"Total Branch Instructions Retired"];
732 ( it->second )[
"Instruction Starvation % of Total Cycles"] =
733 ( ( it->second )[
"UOPS_ISSUED:ANY CMASK=1 INV=1"] - ( it->second )[
"RESOURCE_STALLS:ANY"] ) * 100 /
734 ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"];
735 ( it->second )[
"% of Total Cycles spent handling FP exceptions"] =
736 ( it->second )[
"UOPS_DECODED:MS CMASK=1"] * 100 / ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"];
737 ( it->second )[
"# of Instructions per Call"] =
738 ( it->second )[
"INST_RETIRED:ANY_P"] / ( it->second )[
"BR_INST_EXEC:NEAR_CALLS"];
740 ( it->second )[
"Instructions Retired"] = ( it->second )[
"INST_RETIRED:ANY_P"];
741 ( it->second )[
"ITLB Miss Rate in %"] =
742 ( ( it->second )[
"ITLB_MISS_RETIRED"] / ( it->second )[
"INST_RETIRED:ANY_P"] ) * 100;
744 ( it->second )[
"Branch Instructions"] = ( it->second )[
"BR_INST_RETIRED:ALL_BRANCHES"];
745 ( it->second )[
"Load Instructions"] = ( it->second )[
"MEM_INST_RETIRED:LOADS"];
746 ( it->second )[
"Store Instructions"] = ( it->second )[
"MEM_INST_RETIRED:STORES"];
747 ( it->second )[
"Other Instructions"] =
748 ( it->second )[
"Instructions Retired"] - ( it->second )[
"MEM_INST_RETIRED:LOADS"] -
749 ( it->second )[
"MEM_INST_RETIRED:STORES"] - ( it->second )[
"BR_INST_RETIRED:ALL_BRANCHES"];
750 ( it->second )[
"Packed UOPS Retired"] =
751 ( it->second )[
"SSEX_UOPS_RETIRED:PACKED_DOUBLE"] + ( it->second )[
"SSEX_UOPS_RETIRED:PACKED_SINGLE"];
752 ( it->second )[
"CPI"] = ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"] / ( it->second )[
"INST_RETIRED:ANY_P"];
754 double localPerformanceImprovement = ( it->second )[
"CPI"] /
EXPECTED_CPI;
755 double cyclesAfterImprovement = ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"] / localPerformanceImprovement;
756 double totalCyclesAfterImprovement =
757 totalCycles - ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"] + cyclesAfterImprovement;
758 ( it->second )[
"iMargin"] = 100 - ( totalCyclesAfterImprovement / totalCycles ) * 100;
760 ( it->second )[
"Load % of all Instructions"] =
761 ( it->second )[
"MEM_INST_RETIRED:LOADS"] * 100 / ( it->second )[
"INST_RETIRED:ANY_P"];
762 ( it->second )[
"Store % of all Instructions"] =
763 ( it->second )[
"MEM_INST_RETIRED:STORES"] * 100 / ( it->second )[
"INST_RETIRED:ANY_P"];
764 ( it->second )[
"Branch % of all Instructions"] =
765 ( it->second )[
"BR_INST_RETIRED:ALL_BRANCHES"] * 100 / ( it->second )[
"INST_RETIRED:ANY_P"];
766 ( it->second )[
"Other % of all Instructions"] =
767 ( it->second )[
"Other Instructions"] * 100 / ( it->second )[
"INST_RETIRED:ANY_P"];
769 ( it->second )[
"Packed % of all UOPS Retired"] =
770 ( it->second )[
"Packed UOPS Retired"] * 100 / ( it->second )[
"UOPS_RETIRED:ANY"];
797 void init(
const char*
name,
const char* architecture,
const char* event_name,
unsigned int c_mask,
798 unsigned int inv_mask,
unsigned int smpl_period ) {
821 [](
const auto& lhs,
const auto& rhs ) { return lhs.second < rhs.second; } );
823 strcpy(
index, ( max_pos->first ).c_str() );
824 value = max_pos->second;
839 for (
int i = 0; i <
n; i++ ) {
842 strcat( s_mod,
"<" );
845 strcat( s_mod,
">" );
848 strcat( s_mod,
"&" );
851 strcat( s_mod,
""" );
857 strcat( s_mod, to_app );
868 char* operator_string_begin =
const_cast<char*
>( strstr( demangled_symbol,
"operator" ) );
869 if ( operator_string_begin != NULL ) {
870 char* operator_string_end = operator_string_begin + 8;
871 while ( *operator_string_end ==
' ' ) operator_string_end++;
872 if ( strstr( operator_string_end,
"delete[]" ) == operator_string_end ) {
873 operator_string_end += 8;
874 *operator_string_end =
'\0';
875 }
else if ( strstr( operator_string_end,
"delete" ) == operator_string_end ) {
876 operator_string_end += 6;
877 *operator_string_end =
'\0';
878 }
else if ( strstr( operator_string_end,
"new[]" ) == operator_string_end ) {
879 operator_string_end += 5;
880 *operator_string_end =
'\0';
881 }
else if ( strstr( operator_string_end,
"new" ) == operator_string_end ) {
882 operator_string_end += 3;
883 *operator_string_end =
'\0';
884 }
else if ( strstr( operator_string_end,
">>=" ) == operator_string_end ) {
885 operator_string_end += 3;
886 *operator_string_end =
'\0';
887 }
else if ( strstr( operator_string_end,
"<<=" ) == operator_string_end ) {
888 operator_string_end += 3;
889 *operator_string_end =
'\0';
890 }
else if ( strstr( operator_string_end,
"->*" ) == operator_string_end ) {
891 operator_string_end += 3;
892 *operator_string_end =
'\0';
893 }
else if ( strstr( operator_string_end,
"<<" ) == operator_string_end ) {
894 operator_string_end += 2;
895 *operator_string_end =
'\0';
896 }
else if ( strstr( operator_string_end,
">>" ) == operator_string_end ) {
897 operator_string_end += 2;
898 *operator_string_end =
'\0';
899 }
else if ( strstr( operator_string_end,
">=" ) == operator_string_end ) {
900 operator_string_end += 2;
901 *operator_string_end =
'\0';
902 }
else if ( strstr( operator_string_end,
"<=" ) == operator_string_end ) {
903 operator_string_end += 2;
904 *operator_string_end =
'\0';
905 }
else if ( strstr( operator_string_end,
"==" ) == operator_string_end ) {
906 operator_string_end += 2;
907 *operator_string_end =
'\0';
908 }
else if ( strstr( operator_string_end,
"!=" ) == operator_string_end ) {
909 operator_string_end += 2;
910 *operator_string_end =
'\0';
911 }
else if ( strstr( operator_string_end,
"|=" ) == operator_string_end ) {
912 operator_string_end += 2;
913 *operator_string_end =
'\0';
914 }
else if ( strstr( operator_string_end,
"&=" ) == operator_string_end ) {
915 operator_string_end += 2;
916 *operator_string_end =
'\0';
917 }
else if ( strstr( operator_string_end,
"^=" ) == operator_string_end ) {
918 operator_string_end += 2;
919 *operator_string_end =
'\0';
920 }
else if ( strstr( operator_string_end,
"%=" ) == operator_string_end ) {
921 operator_string_end += 2;
922 *operator_string_end =
'\0';
923 }
else if ( strstr( operator_string_end,
"/=" ) == operator_string_end ) {
924 operator_string_end += 2;
925 *operator_string_end =
'\0';
926 }
else if ( strstr( operator_string_end,
"*=" ) == operator_string_end ) {
927 operator_string_end += 2;
928 *operator_string_end =
'\0';
929 }
else if ( strstr( operator_string_end,
"-=" ) == operator_string_end ) {
930 operator_string_end += 2;
931 *operator_string_end =
'\0';
932 }
else if ( strstr( operator_string_end,
"+=" ) == operator_string_end ) {
933 operator_string_end += 2;
934 *operator_string_end =
'\0';
935 }
else if ( strstr( operator_string_end,
"&&" ) == operator_string_end ) {
936 operator_string_end += 2;
937 *operator_string_end =
'\0';
938 }
else if ( strstr( operator_string_end,
"||" ) == operator_string_end ) {
939 operator_string_end += 2;
940 *operator_string_end =
'\0';
941 }
else if ( strstr( operator_string_end,
"[]" ) == operator_string_end ) {
942 operator_string_end += 2;
943 *operator_string_end =
'\0';
944 }
else if ( strstr( operator_string_end,
"()" ) == operator_string_end ) {
945 operator_string_end += 2;
946 *operator_string_end =
'\0';
947 }
else if ( strstr( operator_string_end,
"++" ) == operator_string_end ) {
948 operator_string_end += 2;
949 *operator_string_end =
'\0';
950 }
else if ( strstr( operator_string_end,
"--" ) == operator_string_end ) {
951 operator_string_end += 2;
952 *operator_string_end =
'\0';
953 }
else if ( strstr( operator_string_end,
"->" ) == operator_string_end ) {
954 operator_string_end += 2;
955 *operator_string_end =
'\0';
956 }
else if ( strstr( operator_string_end,
"<" ) == operator_string_end ) {
957 operator_string_end += 1;
958 *operator_string_end =
'\0';
959 }
else if ( strstr( operator_string_end,
">" ) == operator_string_end ) {
960 operator_string_end += 1;
961 *operator_string_end =
'\0';
962 }
else if ( strstr( operator_string_end,
"~" ) == operator_string_end ) {
963 operator_string_end += 1;
964 *operator_string_end =
'\0';
965 }
else if ( strstr( operator_string_end,
"!" ) == operator_string_end ) {
966 operator_string_end += 1;
967 *operator_string_end =
'\0';
968 }
else if ( strstr( operator_string_end,
"+" ) == operator_string_end ) {
969 operator_string_end += 1;
970 *operator_string_end =
'\0';
971 }
else if ( strstr( operator_string_end,
"-" ) == operator_string_end ) {
972 operator_string_end += 1;
973 *operator_string_end =
'\0';
974 }
else if ( strstr( operator_string_end,
"*" ) == operator_string_end ) {
975 operator_string_end += 1;
976 *operator_string_end =
'\0';
977 }
else if ( strstr( operator_string_end,
"/" ) == operator_string_end ) {
978 operator_string_end += 1;
979 *operator_string_end =
'\0';
980 }
else if ( strstr( operator_string_end,
"%" ) == operator_string_end ) {
981 operator_string_end += 1;
982 *operator_string_end =
'\0';
983 }
else if ( strstr( operator_string_end,
"^" ) == operator_string_end ) {
984 operator_string_end += 1;
985 *operator_string_end =
'\0';
986 }
else if ( strstr( operator_string_end,
"&" ) == operator_string_end ) {
987 operator_string_end += 1;
988 *operator_string_end =
'\0';
989 }
else if ( strstr( operator_string_end,
"|" ) == operator_string_end ) {
990 operator_string_end += 1;
991 *operator_string_end =
'\0';
992 }
else if ( strstr( operator_string_end,
"," ) == operator_string_end ) {
993 operator_string_end += 1;
994 *operator_string_end =
'\0';
995 }
else if ( strstr( operator_string_end,
"=" ) == operator_string_end ) {
996 operator_string_end += 1;
997 *operator_string_end =
'\0';
999 return operator_string_begin;
1001 char* end_of_demangled_name =
const_cast<char*
>( strrchr( demangled_symbol,
')' ) );
1002 if ( end_of_demangled_name != NULL ) {
1005 while ( pars > 0 && end_of_demangled_name != demangled_symbol ) {
1006 c = *( --end_of_demangled_name );
1009 }
else if (
c ==
'(' ) {
1014 return demangled_symbol;
1016 char* end_of_func_name = end_of_demangled_name;
1017 if ( end_of_func_name != NULL ) {
1018 *end_of_func_name =
'\0';
1019 char c = *( --end_of_func_name );
1022 while ( pars > 0 && end_of_func_name != demangled_symbol ) {
1023 c = *( --end_of_func_name );
1026 }
else if (
c ==
'<' ) {
1030 *end_of_func_name =
'\0';
1032 c = *( --end_of_func_name );
1033 while ( isalnum(
c ) ||
c ==
'_' ||
c ==
'~' ) {
c = *( --end_of_func_name ); }
1034 return ++end_of_func_name;
1036 return demangled_symbol;
1050 strcpy( module_filename, dir );
1051 strcat( module_filename,
"/HTML/" );
1052 strcat( module_filename, module_name );
1053 strcat( module_filename,
".html" );
1056 strcpy( event, ( cur_module->
get_event() ).c_str() );
1059 if ( result == modules_tot_samples.
end() )
1061 if ( ( !strcmp( event,
"UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1062 ( !strcmp( event,
"CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1063 modules_tot_samples.
insert(
1068 module_file = fopen( module_filename,
"w" );
1069 if ( module_file == NULL ) {
1070 fprintf(
stderr,
"ERROR: Cannot create file %s!!!\naborting...\n", module_filename );
1073 fprintf( module_file,
"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
1074 "\"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1075 fprintf( module_file,
"<html>\n" );
1076 fprintf( module_file,
"<head>\n" );
1077 fprintf( module_file,
"<title>\n" );
1078 fprintf( module_file,
"%s\n", module_name );
1079 fprintf( module_file,
"</title>\n" );
1080 fprintf( module_file,
"</head>\n" );
1081 fprintf( module_file,
"<body>\n" );
1082 fprintf( module_file,
"<h2>%s</h2><br/>Events Sampled:<br/>\n", module_name );
1083 fprintf( module_file,
"<ul>\n" );
1085 fprintf( module_file,
"<li><a href=\"#%s\">%s</a></li>\n", it->c_str(), it->c_str() );
1087 fprintf( module_file,
"</ul>\n" );
1090 if ( ( !strcmp( event,
"UNHALTED_CORE_CYCLES" ) && !nehalem ) ||
1091 ( !strcmp( event,
"CPU_CLK_UNHALTED:THREAD_P" ) && nehalem ) ) {
1094 module_file = fopen( module_filename,
"a" );
1098 strcpy( event_str, event );
1100 sprintf( event_str + strlen( event_str ),
" CMASK=%u", cur_module->
get_c_mask() );
1103 sprintf( event_str + strlen( event_str ),
" INV=%u", cur_module->
get_inv_mask() );
1105 fprintf( module_file,
"<a name=\"%s\"><a>\n", event_str );
1106 fprintf( module_file,
"<table cellpadding=\"5\">\n" );
1107 fprintf( module_file,
"<tr bgcolor=\"#EEEEEE\">\n" );
1108 fprintf( module_file,
1109 "<th colspan=\"6\" align=\"left\">%s -- cmask: %u -- invmask: %u -- Total Samples: %u -- "
1110 "Sampling Period: %u</th>\n",
1113 fprintf( module_file,
"</tr>\n" );
1114 fprintf( module_file,
"<tr bgcolor=\"#EEEEEE\">\n" );
1115 fprintf( module_file,
"<th align=\"left\">Samples</th>\n" );
1116 fprintf( module_file,
"<th align=\"left\">Percentage</th>\n" );
1117 fprintf( module_file,
"<th align=\"left\">Symbol Name</th>\n" );
1118 fprintf( module_file,
"<th align=\"left\">Library Name</th>\n" );
1119 fprintf( module_file,
"<th align=\"left\">Complete Signature</th>\n" );
1120 fprintf( module_file,
"<th align=\"left\">Library Pathname</th>\n" );
1121 fprintf( module_file,
"</tr>\n" );
1122 for (
int j = 0;
j < 20;
j++ ) {
1146 char* sym_end = strchr(
index,
'%' );
1147 if ( sym_end == NULL )
1149 fprintf(
stderr,
"ERROR: Invalid sym and lib name! : %s\naborting...\n",
index );
1152 memcpy( sym,
index, strlen(
index ) - strlen( sym_end ) );
1153 strcpy( lib, sym_end + 1 );
1156 strcpy( temp, sym );
1157 strcpy( simple_sym, (
func_name( temp ) ) );
1158 if ( strrchr( lib,
'/' ) != NULL && *( strrchr( lib,
'/' ) + 1 ) !=
'\0' ) {
1159 strcpy( simple_lib, strrchr( lib,
'/' ) + 1 );
1161 strcpy( simple_lib, lib );
1164 fprintf( module_file,
"<tr bgcolor=\"#FFFFCC\">\n" );
1166 fprintf( module_file,
"<tr bgcolor=\"#CCFFCC\">\n" );
1168 fprintf( module_file,
"<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%u</td>\n", value );
1169 fprintf( module_file,
"<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%f%%</td>\n",
1175 fprintf( module_file,
"<td style=\"font-family:courier;\">%s</td>\n", simple_sym_mod );
1176 fprintf( module_file,
"<td style=\"font-family:courier;\">%s</td>\n", simple_lib_mod );
1177 fprintf( module_file,
"<td style=\"font-family:courier;\">%s</td>\n", sym_mod );
1178 fprintf( module_file,
"<td style=\"font-family:courier;\">%s</td>\n</tr>\n", lib_mod );
1180 fprintf( module_file,
"</table><br/><br/>\n" );
1181 int res = fclose( module_file );
1183 fprintf(
stderr,
"ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1210 unsigned int module_num = 0;
1214 strcpy( path_name, dir );
1215 strcat( path_name,
"/" );
1217 gzFile res_file = gzopen( path_name,
"rb" );
1219 if ( res_file != NULL ) {
1224 sscanf(
line,
"%s %s %u %u %u", arch, event, &cmask, &inv, &
sp );
1225 if ( !strcmp( arch,
"NHM" ) )
1232 if ( strchr(
line,
' ' ) == NULL )
1234 if ( module_num > 0 ) {
1236 cur_module->
clear();
1239 char* end_sym = strchr(
line,
'%' );
1240 if ( end_sym == NULL )
1242 fprintf(
stderr,
"ERROR: Invalid module name. \nLINE: %s\naborting...\n",
line );
1246 memcpy( cur_module_name,
line, strlen(
line ) - strlen( end_sym ) );
1247 cur_module->
init( cur_module_name, arch, event, cmask, inv,
sp );
1248 cur_module->
set_total( atoi( end_sym + 1 ) );
1252 unsigned int value = 0, libOffset = 0;
1262 sscanf(
line,
"%s %s %u %u", symbol, libName, &libOffset, &value );
1263 char realPathName_s[FILENAME_MAX];
1264 bzero( realPathName_s, FILENAME_MAX );
1265 char* realPathName = realpath( libName, realPathName_s );
1266 if ( realPathName != NULL && strlen( realPathName ) > 0 ) {
1268 result = libsInfo.find( realPathName );
1269 if ( result == libsInfo.end() ) { libsInfo[realPathName] =
FileInfo( realPathName,
true ); }
1270 const char* temp_sym = libsInfo[realPathName].symbolByOffset( libOffset );
1271 if ( temp_sym != NULL && strlen( temp_sym ) > 0 ) {
1273 char* demangled_symbol = abi::__cxa_demangle( temp_sym, NULL, NULL, &status );
1274 if ( status == 0 ) {
1275 strcpy( final_sym, demangled_symbol );
1276 free( demangled_symbol );
1278 strcpy( final_sym, temp_sym );
1281 strcpy( final_sym,
"???" );
1283 strcpy( final_lib, realPathName );
1285 strcpy( final_sym, symbol );
1286 strcpy( final_lib, libName );
1290 strcpy(
index, final_sym );
1291 strcat(
index,
"%" );
1292 strcat(
index, final_lib );
1298 cur_module->
clear();
1299 gzclose( res_file );
1302 fprintf(
stderr,
"ERROR: Unable to open input file: %s\naborting...\n",
filename );
1324 strcpy( path_name, dir );
1325 strcat( path_name,
"/" );
1327 gzFile res_file = gzopen( path_name,
"rb" );
1328 if ( res_file != NULL ) {
1333 sscanf(
line,
"%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str );
1335 if ( atoi( cmask_str ) > 0 ) {
1336 event_str +=
" CMASK=";
1337 event_str += cmask_str;
1339 if ( atoi( inv_str ) > 0 ) {
1340 event_str +=
" INV=";
1341 event_str += inv_str;
1346 fprintf(
stderr,
"ERROR: Unable to open input file: %s\naborting...\n",
filename );
1349 gzclose( res_file );
1358 i != modules_tot_samples.
end(); i++ ) {
1360 strcpy( module_filename, dir );
1361 strcat( module_filename,
"/HTML/" );
1362 strcat( module_filename, ( i->first ).c_str() );
1363 strcat( module_filename,
".html" );
1364 FILE* module_file = fopen( module_filename,
"a" );
1365 if ( module_file == NULL ) {
1366 fprintf(
stderr,
"ERROR: Unable to append to file: %s\naborting...\n", module_filename );
1369 fprintf( module_file,
"</body>\n</html>\n" );
1370 if ( fclose( module_file ) ) {
1371 fprintf(
stderr,
"ERROR: Cannot close file %s!!!\naborting...\n", module_filename );
1398 int number_of_modules = 0;
1400 int no_of_values = 0;
1403 strcpy( path_name, dir );
1404 strcat( path_name,
"/" );
1406 FILE* fp = fopen( path_name,
"r" );
1407 int stat = fscanf( fp,
"%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str );
1412 if ( !strcmp( arch,
"NHM" ) )
1417 if ( atoi( cmask_str ) > 0 ) {
1418 event_str +=
" CMASK=";
1419 event_str += cmask_str;
1421 if ( atoi( inv_str ) > 0 ) {
1422 event_str +=
" INV=";
1423 event_str += inv_str;
1426 while ( fscanf( fp,
"%s\n",
line ) != EOF ) {
1427 if ( isalpha(
line[0] ) )
1429 if ( number_of_modules > 0 ) {
1430 C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values;
1434 strcpy( cur_module_name,
line );
1435 number_of_modules++;
1436 }
else if ( isdigit(
line[0] ) )
1438 cur_sum += strtol(
line, NULL, 10 );
1442 C_modules[cur_module_name][event_str] = (double)cur_sum / no_of_values;
1444 return number_of_modules;
1450 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n" );
1451 fprintf( fp,
"<html>\n" );
1452 fprintf( fp,
"<head>\n" );
1453 fprintf( fp,
"<title>\n" );
1454 fprintf( fp,
"Analysis Result\n" );
1455 fprintf( fp,
"</title>\n" );
1456 fprintf( fp,
"<script src=\"sorttable.js\"></script>\n" );
1457 fprintf( fp,
"<style>\ntable.sortable thead "
1458 "{\nbackground-color:#eee;\ncolor:#666666;\nfont-weight:bold;\ncursor:default;\nfont-family:courier;\n}"
1460 fprintf( fp,
"</head>\n" );
1461 fprintf( fp,
"<body link=\"black\">\n" );
1462 fprintf( fp,
"<h1>RESULTS:</h1><br/>Click for detailed symbol view...<p/>\n" );
1463 fprintf( fp,
"<table class=\"sortable\" cellpadding=\"5\">\n" );
1464 fprintf( fp,
"<tr>\n" );
1465 fprintf( fp,
"<th>MODULE NAME</th>\n" );
1467 if ( strlen( it->c_str() ) == 0 )
1468 fprintf( fp,
"<th bgcolor=\"#FFFFFF\"> </th>\n" );
1470 fprintf( fp,
"<th>%s</th>\n", ( *it ).c_str() );
1472 fprintf( fp,
"</tr>\n" );
1481 fprintf( fp,
"<tr bgcolor=\"#FFFFCC\">\n" );
1483 fprintf( fp,
"<tr bgcolor=\"#CCFFCC\">\n" );
1484 fprintf( fp,
"<td style=\"font-family:monospace;font-size:large;color:Black\"><a href=\"%s.html\">%s</a></td>\n",
1485 ( it->first ).c_str(), ( it->first ).c_str() );
1487 if ( strlen( jt->c_str() ) == 0 ) {
1488 fprintf( fp,
"<td bgcolor=\"#FFFFFF\"> </td>" );
1490 if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1491 fprintf(
stderr,
"ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1494 fprintf( fp,
"<td style=\"font-family:monospace;font-size:large;color:DarkBlue\" align=\"right\">%.2f</td>\n",
1495 ( it->second )[*jt] );
1498 fprintf( fp,
"</tr>\n" );
1504 fprintf( fp,
"</table>\n</body>\n</html>\n" );
1509 fprintf( fp,
"MODULE NAME" );
1511 if ( strlen( it->c_str() ) == 0 ) {
1513 fprintf( fp,
",%s", ( *it ).c_str() );
1515 fprintf( fp,
"\n" );
1522 fprintf( fp,
"%s", ( it->first ).c_str() );
1524 if ( strlen( jt->c_str() ) == 0 ) {
1526 if ( ( it->second ).find( *jt ) == ( it->second ).end() ) {
1527 fprintf(
stderr,
"ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", ( *jt ).c_str() );
1530 fprintf( fp,
",%.2f", ( it->second )[*jt] );
1533 fprintf( fp,
"\n" );
1546 double counter_value;
1549 counter_value = ( it->second )[field];
1550 if ( max < counter_value ) max = counter_value;
1552 if ( value > 0 && max > 0 && normalizeTo > 0 ) {
1553 return 1. * value / max * normalizeTo;
1568 1. -
normalize(
"Packed % of all UOPS Retired", ( it->second )[
"Packed % of all UOPS Retired"], 1 );
1569 double misspnorm =
normalize(
"% of Mispredicted Branches", ( it->second )[
"% of Mispredicted Branches"], 1 );
1570 double stallnorm =
normalize(
"Stalled Cycles", ( it->second )[
"Stalled Cycles"], 1 );
1571 ( it->second )[
"iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1577 1. -
normalize(
"Packed SIMD % of all Instructions", ( it->second )[
"Packed SIMD % of all Instructions"], 1 );
1578 double misspnorm =
normalize(
"% of Mispredicted Branches", ( it->second )[
"% of Mispredicted Branches"], 1 );
1579 double stallnorm =
normalize(
"Stalled Cycles", ( it->second )[
"Stalled Cycles"], 1 );
1580 ( it->second )[
"iFactor"] = stallnorm * ( simdnorm + misspnorm + stallnorm );
1594 sum += ( it->second )[
"CPU_CLK_UNHALTED:THREAD_P"];
1599 sum += ( it->second )[
"UNHALTED_CORE_CYCLES"];
1609 if ( argc < 2 || argc > 4 ) {
1610 printf(
"\n\nUsage: %s DIRECTORY [--caa] [--csv]\n\n",
argv[0] );
1616 for (
int i = 2; i <
argc; i++ ) {
1617 if ( !strcmp(
argv[i],
"--caa" ) ) caa =
true;
1618 if ( !strcmp(
argv[i],
"--csv" ) ) csv =
true;
1622 strcpy( dir,
argv[1] );
1624 strcat( dir,
"/HTML" );
1625 int res = mkdir( dir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH );
1627 fprintf(
stderr,
"ERROR: Cannot create directory %s\naborting...\n", dir );
1633 struct dirent* dirp;
1634 int num_of_modules = 0;
1635 if ( ( dp = opendir(
argv[1] ) ) == NULL ) {
1636 printf(
"Error(%d) opening %s\n", errno,
argv[1] );
1639 while ( ( dirp = readdir( dp ) ) != NULL ) {
1640 if ( strstr( dirp->d_name,
"_S_" ) != NULL && strstr( dirp->d_name,
".txt.gz" ) != NULL && !csv ) {
1642 fprintf(
stderr,
"ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1648 sort( S_events.
begin(), S_events.
end() );
1649 if ( ( dp = opendir(
argv[1] ) ) == NULL ) {
1650 printf(
"Error(%d) opening %s\n", errno,
argv[1] );
1653 while ( ( dirp = readdir( dp ) ) != NULL ) {
1654 if ( strstr( dirp->d_name,
"_S_" ) != NULL && strstr( dirp->d_name,
".txt.gz" ) != NULL && !csv ) {
1656 fprintf(
stderr,
"ERROR: Cannot read file %s\naborting...\n", dirp->d_name );
1659 }
else if ( strstr( dirp->d_name,
"_C_" ) != NULL && strstr( dirp->d_name,
".txt" ) != NULL ) {
1661 if ( res > num_of_modules ) { num_of_modules = res; }
1668 fprintf(
stderr,
"ERROR: Cannot finalize HTML pages!!!\naborting...\n" );
1676 sprintf( filepath,
"%s/HTML/index.html",
argv[1] );
1678 sprintf( filepath,
"%s/results.csv",
argv[1] );
1679 FILE* fp = fopen( filepath,
"w" );
1681 fprintf(
stderr,
"ERROR: Cannot create file index.html!!!\naborting...\n" );
1690 fprintf(
stderr,
"(core) ERROR: One or more events for CAA missing!\naborting...\n" );
1707 fprintf(
stderr,
"(nehalem) ERROR: One or more events for CAA missing!\naborting...\n" );
1738 sprintf( src,
"sorttable.js" );
1739 sprintf(
dst,
"%s/HTML/sorttable.js",
argv[1] );
1740 int fd_src = open( src, O_RDONLY );
1741 if ( fd_src == -1 ) {
1742 fprintf(
stderr,
"ERROR: Cannot open file \"%s\"!\naborting...\n", src );
1745 int fd_dst = open(
dst, O_WRONLY | O_CREAT | O_TRUNC, 0644 );
1746 if ( fd_dst == -1 ) {
1747 fprintf(
stderr,
"ERROR: Cannot open file \"%s\" (%s)!\naborting...\n",
dst, strerror( errno ) );
1751 while (
read( fd_src, &
c, 1 ) ) {
1752 if ( write( fd_dst, &
c, 1 ) == -1 ) {