23 #include <sys/types.h> 39 #define CORE_L2_MISS_CYCLES 200 40 #define CORE_L2_HIT_CYCLES 14.5 41 #define CORE_L1_DTLB_MISS_CYCLES 10 42 #define CORE_LCP_STALL_CYCLES 6 43 #define CORE_UNKNOWN_ADDR_STORE_CYCLES 5 44 #define CORE_OVERLAPPING_CYCLES 6 45 #define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES 20 48 #define I7_L1_DTLB_WALK_COMPLETED_CYCLES 35 49 #define I7_L1_ITLB_WALK_COMPLETED_CYCLES 35 50 #define I7_L2_HIT_CYCLES 6 51 #define I7_L3_UNSHARED_HIT_CYCLES 35 52 #define I7_OTHER_CORE_L2_HIT_CYCLES 60 53 #define I7_OTHER_CORE_L2_HITM_CYCLES 75 54 #define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES 225 //average of 200 (not modified) and 225-250 (modified) 55 #define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES 360 //average of 350 (not modified) and 370 (modified) 56 #define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES 180 57 #define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT 200 58 #define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT 350 59 #define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP 35 60 #define I7_IFETCH_L2_MISS_L3_HIT_SNOOP 60 61 #define I7_IFETCH_L2_MISS_L3_HITM 75 62 #define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD 180 64 #define MAX_MODULES 1000 66 #define EXPECTED_CPI 0.25 68 #define MAX_FILENAME_LENGTH 1024 69 #define MAX_SAMPLE_INDEX_LENGTH 10000 70 #define MAX_SYM_LENGTH 15000 71 #define MAX_SYM_MOD_LENGTH 20000 72 #define MAX_LIB_LENGTH 5000 73 #define MAX_LIB_MOD_LENGTH 7000 74 #define MAX_SIMPLE_SYM_LENGTH 300 75 #define MAX_SIMPLE_SYM_MOD_LENGTH 500 76 #define MAX_SIMPLE_LIB_LENGTH 300 77 #define MAX_SIMPLE_LIB_MOD_LENGTH 500 78 #define MAX_LINE_LENGTH 20000 79 #define MAX_EVENT_NAME_LENGTH 150 80 #define MAX_MODULE_NAME_LENGTH 250 81 #define MAX_VALUE_STRING_LENGTH 250 82 #define MAX_ARCH_NAME_LENGTH 20 83 #define MAX_CMASK_STR_LENGTH 5 84 #define MAX_INV_STR_LENGTH 5 85 #define MAX_SP_STR_LENGTH 50 87 #define PIPE_BUFFER_LENGTH 1000 94 pipe = popen(cmd,
"r");
97 printf(
"Cannot open pipe. Exiting...\n");
135 if(!isspace(*srcbuffer++))
139 while(isspace(*srcbuffer))
143 *destbuffer = srcbuffer;
154 bool skipString(
const char *strptr,
const char *srcbuffer,
const char **dstbuffer)
156 if(strncmp(srcbuffer, strptr, strlen(strptr)))
160 *dstbuffer = srcbuffer + strlen(strptr);
169 FileInfo(
void) : NAME(
"<dynamically generated>") {}
174 this->createOffsetMap();
180 if(m_symbolCache.empty())
185 SymbolCache::iterator i = lower_bound(m_symbolCache.begin(), m_symbolCache.end(), offset,
CacheItemComparator());
186 if(i->OFFSET == offset)
188 return i->NAME.c_str();
191 if(i == m_symbolCache.begin())
193 return m_symbolCache.begin()->NAME.c_str();
198 return i->NAME.c_str();
203 SymbolCache::iterator i = upper_bound(m_symbolCache.begin(), m_symbolCache.end(), offset,
CacheItemComparator());
204 if(i == m_symbolCache.end())
241 bool matched =
false;
242 while(objdump.output())
253 if(!objdump.output())
break;
254 if(line.
empty())
continue;
255 const char *lineptr = line.
c_str();
257 if(!
skipString(
"LOAD", lineptr, &lineptr))
continue;
259 if(!
skipString(
"off", lineptr, &lineptr))
continue;
261 int initialBase = strtol(lineptr, &endptr, 16);
262 if(lineptr == endptr)
continue;
265 if(!
skipString(
"vaddr", lineptr, &lineptr))
continue;
267 int finalBase = strtol(lineptr, &endptr, 16);
268 if(lineptr == endptr)
continue;
269 vmbase=finalBase - initialBase;
275 fprintf(stderr,
"Cannot determine VM base address for %s\n", NAME.
c_str());
276 fprintf(stderr,
"Error while running `objdump -p %s`\n", NAME.
c_str());
285 if(!
nm.output())
break;
286 if(line.
empty())
continue;
290 int address = strtol(begin, &endptr, 10);
291 if(endptr == begin)
continue;
292 if(*endptr++ !=
' ')
continue;
293 if(isspace(*endptr++))
continue;
294 if(*endptr++ !=
' ')
continue;
295 char *symbolName = endptr;
296 while(*endptr && !isspace(*endptr)) endptr++;
297 if(*endptr != 0)
continue;
299 if(symbolName[0] ==
'.')
continue;
304 int offset = address-vmbase;
305 if(m_symbolCache.
size() && (m_symbolCache.
back().OFFSET == offset)) m_symbolCache.
back().NAME = symbolName;
326 core_caa_events.
push_back(
"BRANCH_INSTRUCTIONS_RETIRED");
328 core_caa_events.
push_back(
"INST_RETIRED:LOADS");
329 core_caa_events.
push_back(
"INST_RETIRED:OTHER");
330 core_caa_events.
push_back(
"INST_RETIRED:STORES");
331 core_caa_events.
push_back(
"INSTRUCTIONS_RETIRED");
332 core_caa_events.
push_back(
"LOAD_BLOCK:OVERLAP_STORE");
333 core_caa_events.
push_back(
"LOAD_BLOCK:STA");
334 core_caa_events.
push_back(
"LOAD_BLOCK:UNTIL_RETIRE");
335 core_caa_events.
push_back(
"MEM_LOAD_RETIRED:DTLB_MISS");
336 core_caa_events.
push_back(
"MEM_LOAD_RETIRED:L1D_LINE_MISS");
337 core_caa_events.
push_back(
"MEM_LOAD_RETIRED:L2_LINE_MISS");
338 core_caa_events.
push_back(
"MISPREDICTED_BRANCH_RETIRED");
341 core_caa_events.
push_back(
"RS_UOPS_DISPATCHED CMASK=1 INV=1");
342 core_caa_events.
push_back(
"SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE");
343 core_caa_events.
push_back(
"UNHALTED_CORE_CYCLES");
351 nhm_caa_events.
push_back(
"ARITH:CYCLES_DIV_BUSY");
352 nhm_caa_events.
push_back(
"BR_INST_EXEC:ANY");
353 nhm_caa_events.
push_back(
"BR_INST_EXEC:DIRECT_NEAR_CALL");
354 nhm_caa_events.
push_back(
"BR_INST_EXEC:INDIRECT_NEAR_CALL");
355 nhm_caa_events.
push_back(
"BR_INST_EXEC:INDIRECT_NON_CALL");
356 nhm_caa_events.
push_back(
"BR_INST_EXEC:NEAR_CALLS");
357 nhm_caa_events.
push_back(
"BR_INST_EXEC:NON_CALLS");
358 nhm_caa_events.
push_back(
"BR_INST_EXEC:RETURN_NEAR");
359 nhm_caa_events.
push_back(
"BR_INST_RETIRED:ALL_BRANCHES");
360 nhm_caa_events.
push_back(
"BR_INST_RETIRED:CONDITIONAL");
361 nhm_caa_events.
push_back(
"BR_INST_RETIRED:NEAR_CALL");
362 nhm_caa_events.
push_back(
"BR_MISP_EXEC:ANY");
363 nhm_caa_events.
push_back(
"CPU_CLK_UNHALTED:THREAD_P");
364 nhm_caa_events.
push_back(
"DTLB_LOAD_MISSES:WALK_COMPLETED");
365 nhm_caa_events.
push_back(
"INST_RETIRED:ANY_P");
366 nhm_caa_events.
push_back(
"ITLB_MISSES:WALK_COMPLETED");
367 nhm_caa_events.
push_back(
"L2_RQSTS:IFETCH_HIT");
368 nhm_caa_events.
push_back(
"L2_RQSTS:IFETCH_MISS");
369 nhm_caa_events.
push_back(
"MEM_INST_RETIRED:LOADS");
370 nhm_caa_events.
push_back(
"MEM_INST_RETIRED:STORES");
371 nhm_caa_events.
push_back(
"MEM_LOAD_RETIRED:L2_HIT");
372 nhm_caa_events.
push_back(
"MEM_LOAD_RETIRED:L3_MISS");
373 nhm_caa_events.
push_back(
"MEM_LOAD_RETIRED:L3_UNSHARED_HIT");
374 nhm_caa_events.
push_back(
"MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM");
375 nhm_caa_events.
push_back(
"MEM_UNCORE_RETIRED:LOCAL_DRAM");
376 nhm_caa_events.
push_back(
"MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM");
377 nhm_caa_events.
push_back(
"MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT");
378 nhm_caa_events.
push_back(
"MEM_UNCORE_RETIRED:REMOTE_DRAM");
379 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM");
380 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM");
381 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP");
382 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD");
383 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM");
384 nhm_caa_events.
push_back(
"OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT");
385 nhm_caa_events.
push_back(
"RESOURCE_STALLS:ANY");
386 nhm_caa_events.
push_back(
"SSEX_UOPS_RETIRED:PACKED_DOUBLE");
387 nhm_caa_events.
push_back(
"SSEX_UOPS_RETIRED:PACKED_SINGLE");
388 nhm_caa_events.
push_back(
"UOPS_DECODED:MS CMASK=1");
389 nhm_caa_events.
push_back(
"UOPS_ISSUED:ANY CMASK=1 INV=1");
390 nhm_caa_events.
push_back(
"ITLB_MISS_RETIRED");
391 nhm_caa_events.
push_back(
"UOPS_RETIRED:ANY");
398 if(find(C_events.
begin(), C_events.
end(), (*it))==C_events.
end())
400 fprintf(stderr,
"ERROR: Cannot find event %s!!!\naborting...\n", (*it).c_str());
411 if(find(C_events.
begin(), C_events.
end(), (*it))==C_events.
end())
413 fprintf(stderr,
"ERROR: Cannot find event %s!!!\naborting...\n", (*it).c_str());
422 core_caa_events_displ.
push_back(
"Total Cycles");
423 core_caa_events_displ.
push_back(
"Stalled Cycles");
424 core_caa_events_displ.
push_back(
"% of Total Cycles");
425 core_caa_events_displ.
push_back(
"Instructions Retired");
428 core_caa_events_displ.
push_back(
"iMargin");
429 core_caa_events_displ.
push_back(
"iFactor");
431 core_caa_events_displ.
push_back(
"Counted Stalled Cycles");
433 core_caa_events_displ.
push_back(
"L2 Miss Impact");
434 core_caa_events_displ.
push_back(
"L2 Miss % of counted Stalled Cycles");
436 core_caa_events_displ.
push_back(
"L2 Hit Impact");
437 core_caa_events_displ.
push_back(
"L2 Hit % of counted Stalled Cycles");
439 core_caa_events_displ.
push_back(
"L1 DTLB Miss Impact");
440 core_caa_events_displ.
push_back(
"L1 DTLB Miss % of counted Stalled Cycles");
442 core_caa_events_displ.
push_back(
"LCP Stalls Impact");
443 core_caa_events_displ.
push_back(
"LCP Stalls % of counted Stalled Cycles");
445 core_caa_events_displ.
push_back(
"Store-Fwd Stalls Impact");
446 core_caa_events_displ.
push_back(
"Store-Fwd Stalls % of counted Stalled Cycles");
448 core_caa_events_displ.
push_back(
"Loads Blocked by Unknown Address Store Impact");
449 core_caa_events_displ.
push_back(
"Loads Blocked % of Store-Fwd Stalls Cycles");
450 core_caa_events_displ.
push_back(
"Loads Overlapped with Stores Impact");
451 core_caa_events_displ.
push_back(
"Loads Overlapped % of Store-Fwd Stalls Cycles");
452 core_caa_events_displ.
push_back(
"Loads Spanning across Cache Lines Impact");
453 core_caa_events_displ.
push_back(
"Loads Spanning % of Store-Fwd Stalls Cycles");
455 core_caa_events_displ.
push_back(
"Load Instructions");
456 core_caa_events_displ.
push_back(
"Load % of all Instructions");
457 core_caa_events_displ.
push_back(
"Store Instructions");
458 core_caa_events_displ.
push_back(
"Store % of all Instructions");
459 core_caa_events_displ.
push_back(
"Branch Instructions");
460 core_caa_events_displ.
push_back(
"Branch % of all Instructions");
461 core_caa_events_displ.
push_back(
"Packed SIMD Computational Instructions");
462 core_caa_events_displ.
push_back(
"Packed SIMD % of all Instructions");
463 core_caa_events_displ.
push_back(
"Other Instructions");
464 core_caa_events_displ.
push_back(
"Other % of all Instructions");
466 core_caa_events_displ.
push_back(
"ITLB Miss Rate in %");
467 core_caa_events_displ.
push_back(
"% of Mispredicted Branches");
474 (it->second)[
"Total Cycles"] = (it->second)[
"UNHALTED_CORE_CYCLES"];
475 (it->second)[
"Stalled Cycles"] = (it->second)[
"RS_UOPS_DISPATCHED CMASK=1 INV=1"];
476 (it->second)[
"L2 Miss Impact"] = (it->second)[
"MEM_LOAD_RETIRED:L2_LINE_MISS"] *
CORE_L2_MISS_CYCLES;
477 (it->second)[
"L2 Hit Impact"] = ((it->second)[
"MEM_LOAD_RETIRED:L1D_LINE_MISS"] - (it->second)[
"MEM_LOAD_RETIRED:L2_LINE_MISS"]) *
CORE_L2_HIT_CYCLES;
481 (it->second)[
"Loads Overlapped with Stores Impact"] = (it->second)[
"LOAD_BLOCK:OVERLAP_STORE"] *
CORE_OVERLAPPING_CYCLES;
483 (it->second)[
"Store-Fwd Stalls Impact"] = (it->second)[
"Loads Blocked by Unknown Address Store Impact"] + (it->second)[
"Loads Overlapped with Stores Impact"] + (it->second)[
"Loads Spanning across Cache Lines Impact"];
484 (it->second)[
"Counted Stalled Cycles"] = (it->second)[
"L2 Miss Impact"] + (it->second)[
"L2 Hit Impact"] + (it->second)[
"LCP Stalls Impact"] + (it->second)[
"L1 DTLB Miss Impact"] + (it->second)[
"Store-Fwd Stalls Impact"];
485 (it->second)[
"Instructions Retired"] = (it->second)[
"INSTRUCTIONS_RETIRED"];
486 (it->second)[
"ITLB Miss Rate in %"] = ((it->second)[
"ITLB_MISS_RETIRED"]/(it->second)[
"INSTRUCTIONS_RETIRED"])*100;
487 (it->second)[
"Branch Instructions"] = (it->second)[
"BRANCH_INSTRUCTIONS_RETIRED"];
488 (it->second)[
"Load Instructions"] = (it->second)[
"INST_RETIRED:LOADS"];
489 (it->second)[
"Store Instructions"] = (it->second)[
"INST_RETIRED:STORES"];
490 (it->second)[
"Other Instructions"] = (it->second)[
"INST_RETIRED:OTHER"] - (it->second)[
"SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] - (it->second)[
"BRANCH_INSTRUCTIONS_RETIRED"];
491 (it->second)[
"% of Mispredicted Branches"] = ((it->second)[
"MISPREDICTED_BRANCH_RETIRED"]/(it->second)[
"BRANCH_INSTRUCTIONS_RETIRED"])*100;
492 (it->second)[
"Packed SIMD Computational Instructions"] = (it->second)[
"SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"];
493 (it->second)[
"Counted Instructions Retired"] = (it->second)[
"Branch Instructions"] + (it->second)[
"Load Instructions"] + (it->second)[
"Store Instructions"] + (it->second)[
"Other Instructions"] + (it->second)[
"Packed SIMD Computational Instructions"];
494 (it->second)[
"CPI"] = (it->second)[
"UNHALTED_CORE_CYCLES"]/(it->second)[
"INSTRUCTIONS_RETIRED"];
496 double localPerformanceImprovement = (it->second)[
"CPI"]/
EXPECTED_CPI;
497 double cyclesAfterImprovement = (it->second)[
"UNHALTED_CORE_CYCLES"]/localPerformanceImprovement;
498 double totalCyclesAfterImprovement = totalCycles-(it->second)[
"UNHALTED_CORE_CYCLES"]+cyclesAfterImprovement;
499 (it->second)[
"iMargin"] = 100-(totalCyclesAfterImprovement/totalCycles)*100;
501 (it->second)[
"% of Total Cycles"] = (it->second)[
"RS_UOPS_DISPATCHED CMASK=1 INV=1"]*100/(it->second)[
"UNHALTED_CORE_CYCLES"];
502 (it->second)[
"L2 Miss % of counted Stalled Cycles"] =(it->second)[
"L2 Miss Impact"]*100/(it->second)[
"Counted Stalled Cycles"];
503 (it->second)[
"L2 Hit % of counted Stalled Cycles"] =(it->second)[
"L2 Hit Impact"]*100/(it->second)[
"Counted Stalled Cycles"];
504 (it->second)[
"L1 DTLB Miss % of counted Stalled Cycles"] =(it->second)[
"L1 DTLB Miss Impact"]*100/(it->second)[
"Counted Stalled Cycles"];
505 (it->second)[
"LCP Stalls % of counted Stalled Cycles"] =(it->second)[
"LCP Stalls Impact"]*100/(it->second)[
"Counted Stalled Cycles"];
506 (it->second)[
"Store-Fwd Stalls % of counted Stalled Cycles"] =(it->second)[
"Store-Fwd Stalls Impact"]*100/(it->second)[
"Counted Stalled Cycles"];
507 (it->second)[
"Loads Blocked % of Store-Fwd Stalls Cycles"] =(it->second)[
"Loads Blocked by Unknown Address Store Impact"]*100/(it->second)[
"Store-Fwd Stalls Impact"];
508 (it->second)[
"Loads Overlapped % of Store-Fwd Stalls Cycles"] =(it->second)[
"Loads Overlapped with Stores Impact"]*100/(it->second)[
"Store-Fwd Stalls Impact"];
509 (it->second)[
"Loads Spanning % of Store-Fwd Stalls Cycles"] =(it->second)[
"Loads Spanning across Cache Lines Impact"]*100/(it->second)[
"Store-Fwd Stalls Impact"];
511 (it->second)[
"Load % of all Instructions"] =(it->second)[
"INST_RETIRED:LOADS"]*100/(it->second)[
"Counted Instructions Retired"];
512 (it->second)[
"Store % of all Instructions"] =(it->second)[
"INST_RETIRED:STORES"]*100/(it->second)[
"Counted Instructions Retired"];
513 (it->second)[
"Branch % of all Instructions"] =(it->second)[
"BRANCH_INSTRUCTIONS_RETIRED"]*100/(it->second)[
"Counted Instructions Retired"];
514 (it->second)[
"Packed SIMD % of all Instructions"] =(it->second)[
"SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"]*100/(it->second)[
"Counted Instructions Retired"];
515 (it->second)[
"Other % of all Instructions"] =(it->second)[
"Other Instructions"]*100/(it->second)[
"Counted Instructions Retired"];
521 nhm_caa_events_displ.
push_back(
"Total Cycles");
522 nhm_caa_events_displ.
push_back(
"Instructions Retired");
525 nhm_caa_events_displ.
push_back(
"iMargin");
526 nhm_caa_events_displ.
push_back(
"iFactor");
528 nhm_caa_events_displ.
push_back(
"Stalled Cycles");
529 nhm_caa_events_displ.
push_back(
"% of Total Cycles");
530 nhm_caa_events_displ.
push_back(
"Total Counted Stalled Cycles");
532 nhm_caa_events_displ.
push_back(
"Instruction Starvation % of Total Cycles");
533 nhm_caa_events_displ.
push_back(
"# of Instructions per Call");
534 nhm_caa_events_displ.
push_back(
"% of Total Cycles spent handling FP exceptions");
536 nhm_caa_events_displ.
push_back(
"Counted Stalled Cycles due to Load Ops");
538 nhm_caa_events_displ.
push_back(
"L2 Hit Impact");
539 nhm_caa_events_displ.
push_back(
"L2 Hit % of Load Stalls");
541 nhm_caa_events_displ.
push_back(
"L3 Unshared Hit Impact");
542 nhm_caa_events_displ.
push_back(
"L3 Unshared Hit % of Load Stalls");
544 nhm_caa_events_displ.
push_back(
"L2 Other Core Hit Impact");
545 nhm_caa_events_displ.
push_back(
"L2 Other Core Hit % of Load Stalls");
547 nhm_caa_events_displ.
push_back(
"L2 Other Core Hit Modified Impact");
548 nhm_caa_events_displ.
push_back(
"L2 Other Core Hit Modified % of Load Stalls");
550 nhm_caa_events_displ.
push_back(
"L3 Miss -> Local DRAM Hit Impact");
551 nhm_caa_events_displ.
push_back(
"L3 Miss -> Remote DRAM Hit Impact");
552 nhm_caa_events_displ.
push_back(
"L3 Miss -> Remote Cache Hit Impact");
553 nhm_caa_events_displ.
push_back(
"L3 Miss -> Total Impact");
554 nhm_caa_events_displ.
push_back(
"L3 Miss % of Load Stalls");
556 nhm_caa_events_displ.
push_back(
"L1 DTLB Miss Impact");
557 nhm_caa_events_displ.
push_back(
"L1 DTLB Miss % of Load Stalls");
559 nhm_caa_events_displ.
push_back(
"Cycles spent during DIV & SQRT Ops");
560 nhm_caa_events_displ.
push_back(
"DIV & SQRT Ops % of counted Stalled Cycles");
562 nhm_caa_events_displ.
push_back(
"Total L2 IFETCH misses");
563 nhm_caa_events_displ.
push_back(
"% of L2 IFETCH misses");
565 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by Local DRAM");
566 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by L3 (Modified)");
567 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by L3 (Clean Snoop)");
568 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by Remote L2");
569 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by Remote DRAM");
570 nhm_caa_events_displ.
push_back(
"% of IFETCHes served by L3 (No Snoop)");
572 nhm_caa_events_displ.
push_back(
"Total L2 IFETCH miss Impact");
574 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by Local DRAM");
575 nhm_caa_events_displ.
push_back(
"Local DRAM IFECTHes % Impact");
577 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by L3 (Modified)");
578 nhm_caa_events_displ.
push_back(
"L3 (Modified) IFECTHes % Impact");
580 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by L3 (Clean Snoop)");
581 nhm_caa_events_displ.
push_back(
"L3 (Clean Snoop) IFECTHes % Impact");
583 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by Remote L2");
584 nhm_caa_events_displ.
push_back(
"Remote L2 IFECTHes % Impact");
586 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by Remote DRAM");
587 nhm_caa_events_displ.
push_back(
"Remote DRAM IFECTHes % Impact");
589 nhm_caa_events_displ.
push_back(
"Cycles IFETCH served by L3 (No Snoop)");
590 nhm_caa_events_displ.
push_back(
"L3 (No Snoop) IFECTHes % Impact");
592 nhm_caa_events_displ.
push_back(
"Total Branch Instructions Executed");
593 nhm_caa_events_displ.
push_back(
"% of Mispredicted Branches");
595 nhm_caa_events_displ.
push_back(
"Direct Near Calls % of Total Branches Executed");
596 nhm_caa_events_displ.
push_back(
"Indirect Near Calls % of Total Branches Executed");
597 nhm_caa_events_displ.
push_back(
"Indirect Near Non-Calls % of Total Branches Executed");
598 nhm_caa_events_displ.
push_back(
"All Near Calls % of Total Branches Executed");
599 nhm_caa_events_displ.
push_back(
"All Non Calls % of Total Branches Executed");
600 nhm_caa_events_displ.
push_back(
"All Returns % of Total Branches Executed");
602 nhm_caa_events_displ.
push_back(
"Total Branch Instructions Retired");
603 nhm_caa_events_displ.
push_back(
"Conditionals % of Total Branches Retired");
604 nhm_caa_events_displ.
push_back(
"Near Calls % of Total Branches Retired");
606 nhm_caa_events_displ.
push_back(
"L1 ITLB Miss Impact");
607 nhm_caa_events_displ.
push_back(
"ITLB Miss Rate in %");
609 nhm_caa_events_displ.
push_back(
"Branch Instructions");
610 nhm_caa_events_displ.
push_back(
"Branch % of all Instructions");
612 nhm_caa_events_displ.
push_back(
"Load Instructions");
613 nhm_caa_events_displ.
push_back(
"Load % of all Instructions");
615 nhm_caa_events_displ.
push_back(
"Store Instructions");
616 nhm_caa_events_displ.
push_back(
"Store % of all Instructions");
618 nhm_caa_events_displ.
push_back(
"Other Instructions");
619 nhm_caa_events_displ.
push_back(
"Other % of all Instructions");
621 nhm_caa_events_displ.
push_back(
"Packed UOPS Retired");
622 nhm_caa_events_displ.
push_back(
"Packed % of all UOPS Retired");
629 (it->second)[
"Total Cycles"] = (it->second)[
"CPU_CLK_UNHALTED:THREAD_P"];
631 (it->second)[
"L2 Hit Impact"] = (it->second)[
"MEM_LOAD_RETIRED:L2_HIT"] *
I7_L2_HIT_CYCLES;
633 if((it->second)[
"MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"]>(it->second)[
"MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"])
635 (it->second)[
"L2 Other Core Hit Impact"] = ((it->second)[
"MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] - (it->second)[
"MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"])*
I7_OTHER_CORE_L2_HIT_CYCLES;
639 (it->second)[
"L2 Other Core Hit Impact"] = 0.0;
641 (it->second)[
"L2 Other Core Hit Modified Impact"] = (it->second)[
"MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] *
I7_OTHER_CORE_L2_HITM_CYCLES;
645 (it->second)[
"L3 Miss -> Total Impact"] = (it->second)[
"L3 Miss -> Local DRAM Hit Impact"] + (it->second)[
"L3 Miss -> Remote DRAM Hit Impact"] + (it->second)[
"L3 Miss -> Remote Cache Hit Impact"];
647 (it->second)[
"Counted Stalled Cycles due to Load Ops"] = (it->second)[
"L3 Miss -> Total Impact"] + (it->second)[
"L2 Hit Impact"] + (it->second)[
"L1 DTLB Miss Impact"] + (it->second)[
"L3 Unshared Hit Impact"] + (it->second)[
"L2 Other Core Hit Modified Impact"] + (it->second)[
"L2 Other Core Hit Impact"];
648 (it->second)[
"Cycles spent during DIV & SQRT Ops"] = (it->second)[
"ARITH:CYCLES_DIV_BUSY"];
649 (it->second)[
"Total Counted Stalled Cycles"] = (it->second)[
"Counted Stalled Cycles due to Load Ops"] + (it->second)[
"Cycles spent during DIV & SQRT Ops"];
650 (it->second)[
"Stalled Cycles"] = (it->second)[
"Total Counted Stalled Cycles"];
651 (it->second)[
"% of Total Cycles"] = (it->second)[
"Stalled Cycles"] * 100 / (it->second)[
"CPU_CLK_UNHALTED:THREAD_P"];
652 (it->second)[
"L3 Miss % of Load Stalls"] = (it->second)[
"L3 Miss -> Total Impact"] * 100 / (it->second)[
"Counted Stalled Cycles due to Load Ops"];
653 (it->second)[
"L2 Hit % of Load Stalls"] = (it->second)[
"L2 Hit Impact"] * 100 / (it->second)[
"Counted Stalled Cycles due to Load Ops"];
654 (it->second)[
"L1 DTLB Miss % of Load Stalls"] = (it->second)[
"L1 DTLB Miss Impact"] * 100 / (it->second)[
"Counted Stalled Cycles due to Load Ops"];
655 (it->second)[
"L3 Unshared Hit % of Load Stalls"] = (it->second)[
"L3 Unshared Hit Impact"] * 100 / (it->second)[
"Counted Stalled Cycles due to Load Ops"];
656 (it->second)[
"L2 Other Core Hit % of Load Stalls"] = (it->second)[
"L2 Other Core Hit Impact"] * 100 / (it->second)[
"Counted Stalled Cycles due to Load Ops"];
657 (it->second)[
"L2 Other Core Hit Modified % of Load Stalls"] = (it->second)[
"L2 Other Core Hit Modified Impact"] * 100 / (it->second)[
"Counted Stalled Cycles due to Load Ops"];
658 (it->second)[
"DIV & SQRT Ops % of counted Stalled Cycles"] = (it->second)[
"Cycles spent during DIV & SQRT Ops"] * 100 / (it->second)[
"Total Counted Stalled Cycles"];
661 (it->second)[
"Cycles IFETCH served by L3 (Modified)"] = (it->second)[
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] *
I7_IFETCH_L2_MISS_L3_HITM;
662 (it->second)[
"Cycles IFETCH served by L3 (Clean Snoop)"] = (it->second)[
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] *
I7_IFETCH_L2_MISS_L3_HIT_SNOOP;
666 (it->second)[
"Total L2 IFETCH miss Impact"] = (it->second)[
"Cycles IFETCH served by Local DRAM"] + (it->second)[
"Cycles IFETCH served by L3 (Modified)"] + (it->second)[
"Cycles IFETCH served by L3 (Clean Snoop)"] + (it->second)[
"Cycles IFETCH served by Remote L2"] + (it->second)[
"Cycles IFETCH served by Remote DRAM"] + (it->second)[
"Cycles IFETCH served by L3 (No Snoop)"];
667 (it->second)[
"Local DRAM IFECTHes % Impact"] = (it->second)[
"Cycles IFETCH served by Local DRAM"] * 100 / (it->second)[
"Total L2 IFETCH miss Impact"];
668 (it->second)[
"L3 (Modified) IFECTHes % Impact"] = (it->second)[
"Cycles IFETCH served by L3 (Modified)"] * 100 / (it->second)[
"Total L2 IFETCH miss Impact"];
669 (it->second)[
"L3 (Clean Snoop) IFECTHes % Impact"] = (it->second)[
"Cycles IFETCH served by L3 (Clean Snoop)"] * 100 / (it->second)[
"Total L2 IFETCH miss Impact"];
670 (it->second)[
"Remote L2 IFECTHes % Impact"] = (it->second)[
"Cycles IFETCH served by Remote L2"] * 100 / (it->second)[
"Total L2 IFETCH miss Impact"];
671 (it->second)[
"Remote DRAM IFECTHes % Impact"] = (it->second)[
"Cycles IFETCH served by Remote DRAM"] * 100 / (it->second)[
"Total L2 IFETCH miss Impact"];
672 (it->second)[
"L3 (No Snoop) IFECTHes % Impact"] = (it->second)[
"Cycles IFETCH served by L3 (No Snoop)"] * 100 / (it->second)[
"Total L2 IFETCH miss Impact"];
673 (it->second)[
"Total L2 IFETCH misses"] = (it->second)[
"L2_RQSTS:IFETCH_MISS"];
674 (it->second)[
"% of IFETCHes served by Local DRAM"] = (it->second)[
"OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * 100 / (it->second)[
"L2_RQSTS:IFETCH_MISS"];
675 (it->second)[
"% of IFETCHes served by L3 (Modified)"] = (it->second)[
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * 100 / (it->second)[
"L2_RQSTS:IFETCH_MISS"];
676 (it->second)[
"% of IFETCHes served by L3 (Clean Snoop)"] = (it->second)[
"OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * 100 / (it->second)[
"L2_RQSTS:IFETCH_MISS"];
677 (it->second)[
"% of IFETCHes served by Remote L2"] = (it->second)[
"OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * 100 / (it->second)[
"L2_RQSTS:IFETCH_MISS"];
678 (it->second)[
"% of IFETCHes served by Remote DRAM"] = (it->second)[
"OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * 100 / (it->second)[
"L2_RQSTS:IFETCH_MISS"];
679 (it->second)[
"% of IFETCHes served by L3 (No Snoop)"] = (it->second)[
"OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * 100 / (it->second)[
"L2_RQSTS:IFETCH_MISS"];
680 (it->second)[
"% of L2 IFETCH misses"] = (it->second)[
"L2_RQSTS:IFETCH_MISS"] * 100 / ((it->second)[
"L2_RQSTS:IFETCH_MISS"] + (it->second)[
"L2_RQSTS:IFETCH_HIT"]);
683 (it->second)[
"Total Branch Instructions Executed"] = (it->second)[
"BR_INST_EXEC:ANY"];
684 (it->second)[
"% of Mispredicted Branches"] = (it->second)[
"BR_MISP_EXEC:ANY"] * 100 / (it->second)[
"BR_INST_EXEC:ANY"];
685 (it->second)[
"Direct Near Calls % of Total Branches Executed"] = (it->second)[
"BR_INST_EXEC:DIRECT_NEAR_CALL"] * 100 / (it->second)[
"Total Branch Instructions Executed"];
686 (it->second)[
"Indirect Near Calls % of Total Branches Executed"] = (it->second)[
"BR_INST_EXEC:INDIRECT_NEAR_CALL"] * 100 / (it->second)[
"Total Branch Instructions Executed"];
687 (it->second)[
"Indirect Near Non-Calls % of Total Branches Executed"] = (it->second)[
"BR_INST_EXEC:INDIRECT_NON_CALL"] * 100 / (it->second)[
"Total Branch Instructions Executed"];
688 (it->second)[
"All Near Calls % of Total Branches Executed"] = (it->second)[
"BR_INST_EXEC:NEAR_CALLS"] * 100 / (it->second)[
"Total Branch Instructions Executed"];
689 (it->second)[
"All Non Calls % of Total Branches Executed"] = (it->second)[
"BR_INST_EXEC:NON_CALLS"] * 100 / (it->second)[
"Total Branch Instructions Executed"];
690 (it->second)[
"All Returns % of Total Branches Executed"] = (it->second)[
"BR_INST_EXEC:RETURN_NEAR"] * 100 / (it->second)[
"Total Branch Instructions Executed"];
691 (it->second)[
"Total Branch Instructions Retired"] = (it->second)[
"BR_INST_RETIRED:ALL_BRANCHES"];
692 (it->second)[
"Conditionals % of Total Branches Retired"] = (it->second)[
"BR_INST_RETIRED:CONDITIONAL"] * 100 / (it->second)[
"Total Branch Instructions Retired"];
693 (it->second)[
"Near Calls % of Total Branches Retired"] = (it->second)[
"BR_INST_RETIRED:NEAR_CALL"] * 100 / (it->second)[
"Total Branch Instructions Retired"];
695 (it->second)[
"Instruction Starvation % of Total Cycles"] = ((it->second)[
"UOPS_ISSUED:ANY CMASK=1 INV=1"] - (it->second)[
"RESOURCE_STALLS:ANY"])* 100 / (it->second)[
"CPU_CLK_UNHALTED:THREAD_P"];
696 (it->second)[
"% of Total Cycles spent handling FP exceptions"] = (it->second)[
"UOPS_DECODED:MS CMASK=1"]* 100 / (it->second)[
"CPU_CLK_UNHALTED:THREAD_P"];
697 (it->second)[
"# of Instructions per Call"] = (it->second)[
"INST_RETIRED:ANY_P"] / (it->second)[
"BR_INST_EXEC:NEAR_CALLS"];
699 (it->second)[
"Instructions Retired"] = (it->second)[
"INST_RETIRED:ANY_P"];
700 (it->second)[
"ITLB Miss Rate in %"] = ((it->second)[
"ITLB_MISS_RETIRED"] / (it->second)[
"INST_RETIRED:ANY_P"]) * 100;
702 (it->second)[
"Branch Instructions"] = (it->second)[
"BR_INST_RETIRED:ALL_BRANCHES"];
703 (it->second)[
"Load Instructions"] = (it->second)[
"MEM_INST_RETIRED:LOADS"];
704 (it->second)[
"Store Instructions"] = (it->second)[
"MEM_INST_RETIRED:STORES"];
705 (it->second)[
"Other Instructions"] = (it->second)[
"Instructions Retired"] - (it->second)[
"MEM_INST_RETIRED:LOADS"] - (it->second)[
"MEM_INST_RETIRED:STORES"] - (it->second)[
"BR_INST_RETIRED:ALL_BRANCHES"];
706 (it->second)[
"Packed UOPS Retired"] = (it->second)[
"SSEX_UOPS_RETIRED:PACKED_DOUBLE"] + (it->second)[
"SSEX_UOPS_RETIRED:PACKED_SINGLE"];
707 (it->second)[
"CPI"] = (it->second)[
"CPU_CLK_UNHALTED:THREAD_P"] / (it->second)[
"INST_RETIRED:ANY_P"];
709 double localPerformanceImprovement = (it->second)[
"CPI"]/
EXPECTED_CPI;
710 double cyclesAfterImprovement = (it->second)[
"CPU_CLK_UNHALTED:THREAD_P"]/localPerformanceImprovement;
711 double totalCyclesAfterImprovement = totalCycles-(it->second)[
"CPU_CLK_UNHALTED:THREAD_P"]+cyclesAfterImprovement;
712 (it->second)[
"iMargin"] = 100-(totalCyclesAfterImprovement/totalCycles)*100;
714 (it->second)[
"Load % of all Instructions"] = (it->second)[
"MEM_INST_RETIRED:LOADS"] * 100 / (it->second)[
"INST_RETIRED:ANY_P"];
715 (it->second)[
"Store % of all Instructions"] = (it->second)[
"MEM_INST_RETIRED:STORES"] * 100 / (it->second)[
"INST_RETIRED:ANY_P"];
716 (it->second)[
"Branch % of all Instructions"] = (it->second)[
"BR_INST_RETIRED:ALL_BRANCHES"] * 100 / (it->second)[
"INST_RETIRED:ANY_P"];
717 (it->second)[
"Other % of all Instructions"] = (it->second)[
"Other Instructions"] * 100 / (it->second)[
"INST_RETIRED:ANY_P"];
719 (it->second)[
"Packed % of all UOPS Retired"] = (it->second)[
"Packed UOPS Retired"] * 100 / (it->second)[
"UOPS_RETIRED:ANY"];
744 total_num_samples = 0;
751 void init(
const char*
name,
const char* architecture,
const char* event_name,
unsigned int c_mask,
unsigned int inv_mask,
unsigned int smpl_period)
762 total_num_samples = total;
787 samples[index] += value;
790 bool get_max(
char *index,
unsigned int *value)
792 if(samples.
empty())
return false;
793 unsigned int cur_max = 0;
797 if(it->second > cur_max)
799 cur_max = it->second;
803 strcpy(index, (max_pos->first).c_str());
804 *value = max_pos->second;
805 samples.
erase(max_pos);
814 return total_num_samples;
826 for (
int i=0; i <
n; i++)
831 strcat(s_mod,
"<");
834 strcat(s_mod,
">");
837 strcat(s_mod,
"&");
840 strcat(s_mod,
""");
846 strcat(s_mod, to_app);
858 char *operator_string_begin =
const_cast<char *
>(strstr(demangled_symbol,
"operator"));
859 if(operator_string_begin != NULL)
861 char *operator_string_end = operator_string_begin+8;
862 while(*operator_string_end ==
' ') operator_string_end++;
863 if(strstr(operator_string_end,
"delete[]")==operator_string_end)
865 operator_string_end+=8;
866 *operator_string_end=
'\0';
868 else if(strstr(operator_string_end,
"delete")==operator_string_end)
870 operator_string_end+=6;
871 *operator_string_end=
'\0';
873 else if(strstr(operator_string_end,
"new[]")==operator_string_end)
875 operator_string_end+=5;
876 *operator_string_end=
'\0';
878 else if(strstr(operator_string_end,
"new")==operator_string_end)
880 operator_string_end+=3;
881 *operator_string_end=
'\0';
883 else if(strstr(operator_string_end,
">>=")==operator_string_end)
885 operator_string_end+=3;
886 *operator_string_end=
'\0';
888 else if(strstr(operator_string_end,
"<<=")==operator_string_end)
890 operator_string_end+=3;
891 *operator_string_end=
'\0';
893 else if(strstr(operator_string_end,
"->*")==operator_string_end)
895 operator_string_end+=3;
896 *operator_string_end=
'\0';
898 else if(strstr(operator_string_end,
"<<")==operator_string_end)
900 operator_string_end+=2;
901 *operator_string_end=
'\0';
903 else if(strstr(operator_string_end,
">>")==operator_string_end)
905 operator_string_end+=2;
906 *operator_string_end=
'\0';
908 else if(strstr(operator_string_end,
">=")==operator_string_end)
910 operator_string_end+=2;
911 *operator_string_end=
'\0';
913 else if(strstr(operator_string_end,
"<=")==operator_string_end)
915 operator_string_end+=2;
916 *operator_string_end=
'\0';
918 else if(strstr(operator_string_end,
"==")==operator_string_end)
920 operator_string_end+=2;
921 *operator_string_end=
'\0';
923 else if(strstr(operator_string_end,
"!=")==operator_string_end)
925 operator_string_end+=2;
926 *operator_string_end=
'\0';
928 else if(strstr(operator_string_end,
"|=")==operator_string_end)
930 operator_string_end+=2;
931 *operator_string_end=
'\0';
933 else if(strstr(operator_string_end,
"&=")==operator_string_end)
935 operator_string_end+=2;
936 *operator_string_end=
'\0';
938 else if(strstr(operator_string_end,
"^=")==operator_string_end)
940 operator_string_end+=2;
941 *operator_string_end=
'\0';
943 else if(strstr(operator_string_end,
"%=")==operator_string_end)
945 operator_string_end+=2;
946 *operator_string_end=
'\0';
948 else if(strstr(operator_string_end,
"/=")==operator_string_end)
950 operator_string_end+=2;
951 *operator_string_end=
'\0';
953 else if(strstr(operator_string_end,
"*=")==operator_string_end)
955 operator_string_end+=2;
956 *operator_string_end=
'\0';
958 else if(strstr(operator_string_end,
"-=")==operator_string_end)
960 operator_string_end+=2;
961 *operator_string_end=
'\0';
963 else if(strstr(operator_string_end,
"+=")==operator_string_end)
965 operator_string_end+=2;
966 *operator_string_end=
'\0';
968 else if(strstr(operator_string_end,
"&&")==operator_string_end)
970 operator_string_end+=2;
971 *operator_string_end=
'\0';
973 else if(strstr(operator_string_end,
"||")==operator_string_end)
975 operator_string_end+=2;
976 *operator_string_end=
'\0';
978 else if(strstr(operator_string_end,
"[]")==operator_string_end)
980 operator_string_end+=2;
981 *operator_string_end=
'\0';
983 else if(strstr(operator_string_end,
"()")==operator_string_end)
985 operator_string_end+=2;
986 *operator_string_end=
'\0';
988 else if(strstr(operator_string_end,
"++")==operator_string_end)
990 operator_string_end+=2;
991 *operator_string_end=
'\0';
993 else if(strstr(operator_string_end,
"--")==operator_string_end)
995 operator_string_end+=2;
996 *operator_string_end=
'\0';
998 else if(strstr(operator_string_end,
"->")==operator_string_end)
1000 operator_string_end+=2;
1001 *operator_string_end=
'\0';
1003 else if(strstr(operator_string_end,
"<")==operator_string_end)
1005 operator_string_end+=1;
1006 *operator_string_end=
'\0';
1008 else if(strstr(operator_string_end,
">")==operator_string_end)
1010 operator_string_end+=1;
1011 *operator_string_end=
'\0';
1013 else if(strstr(operator_string_end,
"~")==operator_string_end)
1015 operator_string_end+=1;
1016 *operator_string_end=
'\0';
1018 else if(strstr(operator_string_end,
"!")==operator_string_end)
1020 operator_string_end+=1;
1021 *operator_string_end=
'\0';
1023 else if(strstr(operator_string_end,
"+")==operator_string_end)
1025 operator_string_end+=1;
1026 *operator_string_end=
'\0';
1028 else if(strstr(operator_string_end,
"-")==operator_string_end)
1030 operator_string_end+=1;
1031 *operator_string_end=
'\0';
1033 else if(strstr(operator_string_end,
"*")==operator_string_end)
1035 operator_string_end+=1;
1036 *operator_string_end=
'\0';
1038 else if(strstr(operator_string_end,
"/")==operator_string_end)
1040 operator_string_end+=1;
1041 *operator_string_end=
'\0';
1043 else if(strstr(operator_string_end,
"%")==operator_string_end)
1045 operator_string_end+=1;
1046 *operator_string_end=
'\0';
1048 else if(strstr(operator_string_end,
"^")==operator_string_end)
1050 operator_string_end+=1;
1051 *operator_string_end=
'\0';
1053 else if(strstr(operator_string_end,
"&")==operator_string_end)
1055 operator_string_end+=1;
1056 *operator_string_end=
'\0';
1058 else if(strstr(operator_string_end,
"|")==operator_string_end)
1060 operator_string_end+=1;
1061 *operator_string_end=
'\0';
1063 else if(strstr(operator_string_end,
",")==operator_string_end)
1065 operator_string_end+=1;
1066 *operator_string_end=
'\0';
1068 else if(strstr(operator_string_end,
"=")==operator_string_end)
1070 operator_string_end+=1;
1071 *operator_string_end=
'\0';
1073 return operator_string_begin;
1075 char *end_of_demangled_name =
const_cast<char *
>(strrchr(demangled_symbol,
')'));
1076 if(end_of_demangled_name != NULL)
1080 while(pars>0 && end_of_demangled_name!=demangled_symbol)
1082 c = *(--end_of_demangled_name);
1095 return demangled_symbol;
1097 char *end_of_func_name = end_of_demangled_name;
1098 if(end_of_func_name != NULL)
1100 *end_of_func_name =
'\0';
1101 char c = *(--end_of_func_name);
1105 while(pars>0 && end_of_func_name!=demangled_symbol)
1107 c = *(--end_of_func_name);
1117 *end_of_func_name =
'\0';
1119 c = *(--end_of_func_name);
1120 while(isalnum(c) || c==
'_' || c==
'~')
1122 c = *(--end_of_func_name);
1124 return ++end_of_func_name;
1126 return demangled_symbol;
1141 strcpy(module_filename, dir);
1142 strcat(module_filename,
"/HTML/");
1143 strcat(module_filename, module_name);
1144 strcat(module_filename,
".html");
1147 strcpy(event, (cur_module->
get_event()).c_str());
1150 if(result == modules_tot_samples.
end())
1152 if((!strcmp(event,
"UNHALTED_CORE_CYCLES") && !nehalem) || (!strcmp(event,
"CPU_CLK_UNHALTED:THREAD_P") && nehalem))
1160 module_file = fopen(module_filename,
"w");
1161 if(module_file == NULL)
1163 fprintf(stderr,
"ERROR: Cannot create file %s!!!\naborting...\n", module_filename);
1166 fprintf(module_file,
"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n");
1167 fprintf(module_file,
"<html>\n");
1168 fprintf(module_file,
"<head>\n");
1169 fprintf(module_file,
"<title>\n");
1170 fprintf(module_file,
"%s\n", module_name);
1171 fprintf(module_file,
"</title>\n");
1172 fprintf(module_file,
"</head>\n");
1173 fprintf(module_file,
"<body>\n");
1174 fprintf(module_file,
"<h2>%s</h2><br/>Events Sampled:<br/>\n", module_name);
1175 fprintf(module_file,
"<ul>\n");
1178 fprintf(module_file,
"<li><a href=\"#%s\">%s</a></li>\n", it->c_str(), it->c_str());
1180 fprintf(module_file,
"</ul>\n");
1184 if((!strcmp(event,
"UNHALTED_CORE_CYCLES") && !nehalem) || (!strcmp(event,
"CPU_CLK_UNHALTED:THREAD_P") && nehalem))
1188 module_file = fopen(module_filename,
"a");
1192 strcpy(event_str, event);
1195 sprintf(event_str,
"%s CMASK=%d", event_str, cur_module->
get_c_mask());
1199 sprintf(event_str,
"%s INV=%d", event_str, cur_module->
get_inv_mask());
1201 fprintf(module_file,
"<a name=\"%s\"><a>\n", event_str);
1202 fprintf(module_file,
"<table cellpadding=\"5\">\n");
1203 fprintf(module_file,
"<tr bgcolor=\"#EEEEEE\">\n");
1205 fprintf(module_file,
"</tr>\n");
1206 fprintf(module_file,
"<tr bgcolor=\"#EEEEEE\">\n");
1207 fprintf(module_file,
"<th align=\"left\">Samples</th>\n");
1208 fprintf(module_file,
"<th align=\"left\">Percentage</th>\n");
1209 fprintf(module_file,
"<th align=\"left\">Symbol Name</th>\n");
1210 fprintf(module_file,
"<th align=\"left\">Library Name</th>\n");
1211 fprintf(module_file,
"<th align=\"left\">Complete Signature</th>\n");
1212 fprintf(module_file,
"<th align=\"left\">Library Pathname</th>\n");
1213 fprintf(module_file,
"</tr>\n");
1214 for(
int j=0; j<20; j++)
1237 bool res = cur_module->
get_max(index, &value);
1239 char *sym_end = strchr(index,
'%');
1242 fprintf(stderr,
"ERROR: Invalid sym and lib name! : %s\naborting...\n", index);
1245 strncpy(sym, index, strlen(index)-strlen(sym_end));
1246 strcpy(lib, sym_end+1);
1251 if(strrchr(lib,
'/')!=NULL && *(strrchr(lib,
'/')+1)!=
'\0')
1253 strcpy(simple_lib, strrchr(lib,
'/')+1);
1257 strcpy(simple_lib, lib);
1261 fprintf(module_file,
"<tr bgcolor=\"#FFFFCC\">\n");
1265 fprintf(module_file,
"<tr bgcolor=\"#CCFFCC\">\n");
1267 fprintf(module_file,
"<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%u</td>\n", value);
1268 fprintf(module_file,
"<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%f%%</td>\n", (((
double)(value))/((
double)(cur_module->
get_total_num_samples())))*100);
1273 fprintf(module_file,
"<td style=\"font-family:courier;\">%s</td>\n", simple_sym_mod);
1274 fprintf(module_file,
"<td style=\"font-family:courier;\">%s</td>\n", simple_lib_mod);
1275 fprintf(module_file,
"<td style=\"font-family:courier;\">%s</td>\n", sym_mod);
1276 fprintf(module_file,
"<td style=\"font-family:courier;\">%s</td>\n</tr>\n", lib_mod);
1278 fprintf(module_file,
"</table><br/><br/>\n");
1279 int res = fclose(module_file);
1282 fprintf(stderr,
"ERROR: Cannot close file %s!!!\naborting...\n", module_filename);
1310 unsigned int module_num = 0;
1314 strcpy(path_name, dir);
1315 strcat(path_name,
"/");
1316 strcat(path_name, filename);
1317 gzFile res_file = gzopen(path_name,
"rb");
1319 if(res_file != NULL)
1323 if(line[strlen(line)-1]==
'\n') line[strlen(line)-1]=
'\0';
1325 sscanf(line,
"%s %s %u %u %u", arch, event, &cmask, &inv, &sp);
1326 if(!strcmp(arch,
"NHM")) nehalem =
true;
else nehalem =
false;
1330 if(line[strlen(line)-1]==
'\n') line[strlen(line)-1]=
'\0';
1331 if(strchr(line,
' ')==NULL)
1336 cur_module->
clear();
1339 char *end_sym = strchr(line,
'%');
1342 fprintf(stderr,
"ERROR: Invalid module name. \nLINE: %s\naborting...\n", line);
1346 strncpy(cur_module_name, line, strlen(line)-strlen(end_sym));
1347 cur_module->
init(cur_module_name, arch, event, cmask, inv, sp);
1352 unsigned int value=0, libOffset=0;
1362 sscanf(line,
"%s %s %u %u", symbol, libName, &libOffset, &value);
1363 char realPathName_s[FILENAME_MAX];
1364 bzero(realPathName_s, FILENAME_MAX);
1365 char *realPathName = realpath(libName, realPathName_s);
1366 if(realPathName!=NULL && strlen(realPathName)>0)
1369 result = libsInfo.
find(realPathName);
1370 if(result == libsInfo.
end())
1372 libsInfo[realPathName] =
FileInfo(realPathName,
true);
1374 const char *temp_sym = libsInfo[realPathName].symbolByOffset(libOffset);
1375 if(temp_sym!=NULL && strlen(temp_sym)>0)
1378 char *demangled_symbol = abi::__cxa_demangle(temp_sym, NULL, NULL, &status);
1381 strcpy(final_sym, demangled_symbol);
1382 free(demangled_symbol);
1386 strcpy(final_sym, temp_sym);
1391 strcpy(final_sym,
"???");
1393 strcpy(final_lib, realPathName);
1397 strcpy(final_sym, symbol);
1398 strcpy(final_lib, libName);
1402 strcpy(index, final_sym);
1404 strcat(index, final_lib);
1410 cur_module->
clear();
1415 fprintf(stderr,
"ERROR: Unable to open input file: %s\naborting...\n", filename);
1438 strcpy(path_name, dir);
1439 strcat(path_name,
"/");
1440 strcat(path_name, filename);
1441 gzFile res_file = gzopen(path_name,
"rb");
1442 if(res_file != NULL)
1446 if(line[strlen(line)-1]==
'\n') line[strlen(line)-1]=
'\0';
1448 sscanf(line,
"%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str);
1450 if(atoi(cmask_str)>0)
1452 event_str +=
" CMASK=";
1453 event_str += cmask_str;
1457 event_str +=
" INV=";
1458 event_str += inv_str;
1464 fprintf(stderr,
"ERROR: Unable to open input file: %s\naborting...\n", filename);
1478 strcpy(module_filename, dir);
1479 strcat(module_filename,
"/HTML/");
1480 strcat(module_filename, (i->first).c_str());
1481 strcat(module_filename,
".html");
1482 FILE *module_file = fopen(module_filename,
"a");
1483 if(module_file == NULL)
1485 fprintf(stderr,
"ERROR: Unable to append to file: %s\naborting...\n", module_filename);
1488 fprintf(module_file,
"</body>\n</html>\n");
1489 if(fclose(module_file))
1491 fprintf(stderr,
"ERROR: Cannot close file %s!!!\naborting...\n", module_filename);
1519 int number_of_modules = 0;
1521 int no_of_values = 0;
1524 strcpy(path_name, dir);
1525 strcat(path_name,
"/");
1526 strcat(path_name, filename);
1527 FILE *fp = fopen(path_name,
"r");
1528 int stat = fscanf(fp,
"%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str);
1533 if(!strcmp(arch,
"NHM")) nehalem =
true;
else nehalem =
false;
1535 if(atoi(cmask_str)>0)
1537 event_str +=
" CMASK=";
1538 event_str += cmask_str;
1542 event_str +=
" INV=";
1543 event_str += inv_str;
1546 while(fscanf(fp,
"%s\n", line)!=EOF)
1548 if(isalpha(line[0]))
1550 if(number_of_modules>0)
1552 C_modules[cur_module_name][event_str]=(double)cur_sum/no_of_values;
1556 strcpy(cur_module_name, line);
1557 number_of_modules++;
1559 else if(isdigit(line[0]))
1561 cur_sum += strtol(line, NULL, 10);
1565 C_modules[cur_module_name][event_str]=(double)cur_sum/no_of_values;
1567 return number_of_modules;
1572 fprintf(fp,
"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n");
1573 fprintf(fp,
"<html>\n");
1574 fprintf(fp,
"<head>\n");
1575 fprintf(fp,
"<title>\n");
1576 fprintf(fp,
"Analysis Result\n");
1577 fprintf(fp,
"</title>\n");
1578 fprintf(fp,
"<script src=\"sorttable.js\"></script>\n");
1579 fprintf(fp,
"<style>\ntable.sortable thead {\nbackground-color:#eee;\ncolor:#666666;\nfont-weight:bold;\ncursor:default;\nfont-family:courier;\n}\n</style>\n");
1580 fprintf(fp,
"</head>\n");
1581 fprintf(fp,
"<body link=\"black\">\n");
1582 fprintf(fp,
"<h1>RESULTS:</h1><br/>Click for detailed symbol view...<p/>\n");
1583 fprintf(fp,
"<table class=\"sortable\" cellpadding=\"5\">\n");
1584 fprintf(fp,
"<tr>\n");
1585 fprintf(fp,
"<th>MODULE NAME</th>\n");
1588 if(strlen(it->c_str())==0) fprintf(fp,
"<th bgcolor=\"#FFFFFF\"> </th>\n");
1589 else fprintf(fp,
"<th>%s</th>\n", (*it).c_str());
1591 fprintf(fp,
"</tr>\n");
1600 if(index%2) fprintf(fp,
"<tr bgcolor=\"#FFFFCC\">\n");
1601 else fprintf(fp,
"<tr bgcolor=\"#CCFFCC\">\n");
1602 fprintf(fp,
"<td style=\"font-family:monospace;font-size:large;color:Black\"><a href=\"%s.html\">%s</a></td>\n", (it->first).c_str(), (it->first).c_str());
1605 if(strlen(jt->c_str())==0)
1607 fprintf(fp,
"<td bgcolor=\"#FFFFFF\"> </td>");
1611 if((it->second).find(*jt) == (it->second).
end())
1613 fprintf(stderr,
"ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", (*jt).c_str());
1616 fprintf(fp,
"<td style=\"font-family:monospace;font-size:large;color:DarkBlue\" align=\"right\">%.2f</td>\n", (it->second)[*jt]);
1619 fprintf(fp,
"</tr>\n");
1626 fprintf(fp,
"</table>\n</body>\n</html>\n");
1632 fprintf(fp,
"MODULE NAME");
1635 if(strlen(it->c_str())==0) {}
1636 else fprintf(fp,
",%s", (*it).c_str());
1646 fprintf(fp,
"%s", (it->first).c_str()) ;
1649 if(strlen(jt->c_str())==0) {}
1652 if((it->second).find(*jt) == (it->second).
end())
1654 fprintf(stderr,
"ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", (*jt).c_str());
1657 fprintf(fp,
",%.2f", (it->second)[*jt]);
1674 double counter_value;
1677 counter_value = (it->second)[field];
1678 if(max < counter_value) max = counter_value;
1680 if(value>0 && max>0 && normalizeTo>0)
1682 return 1.*value/max*normalizeTo;
1698 double simdnorm = 1. -
normalize(
"Packed % of all UOPS Retired", (it->second)[
"Packed % of all UOPS Retired"], 1);
1699 double misspnorm =
normalize(
"% of Mispredicted Branches", (it->second)[
"% of Mispredicted Branches"], 1);
1700 double stallnorm =
normalize(
"Stalled Cycles", (it->second)[
"Stalled Cycles"], 1);
1701 (it->second)[
"iFactor"] = stallnorm*(simdnorm + misspnorm + stallnorm);
1708 double simdnorm = 1. -
normalize(
"Packed SIMD % of all Instructions", (it->second)[
"Packed SIMD % of all Instructions"], 1);
1709 double misspnorm =
normalize(
"% of Mispredicted Branches", (it->second)[
"% of Mispredicted Branches"], 1);
1710 double stallnorm =
normalize(
"Stalled Cycles", (it->second)[
"Stalled Cycles"], 1);
1711 (it->second)[
"iFactor"] = stallnorm*(simdnorm + misspnorm + stallnorm);
1727 sum += (it->second)[
"CPU_CLK_UNHALTED:THREAD_P"];
1734 sum += (it->second)[
"UNHALTED_CORE_CYCLES"];
1745 if(argc<2 || argc>4)
1747 printf(
"\n\nUsage: %s DIRECTORY [--caa] [--csv]\n\n", argv[0]);
1753 for(
int i=2; i<
argc; i++)
1755 if(!strcmp(argv[i],
"--caa")) caa =
true;
1756 if(!strcmp(argv[i],
"--csv")) csv =
true;
1760 strcpy(dir, argv[1]);
1763 strcat(dir,
"/HTML");
1764 int res = mkdir(dir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
1767 fprintf(stderr,
"ERROR: Cannot create directory %s\naborting...\n", dir);
1773 struct dirent *dirp;
1774 int num_of_modules = 0;
1775 if((dp = opendir(argv[1]))==NULL)
1777 printf(
"Error(%d) opening %s\n", errno, argv[1]);
1780 while((dirp = readdir(dp))!=NULL)
1782 if(strstr(dirp->d_name,
"_S_")!=NULL && strstr(dirp->d_name,
".txt.gz")!=NULL && !csv)
1786 fprintf(stderr,
"ERROR: Cannot read file %s\naborting...\n", dirp->d_name);
1792 sort(S_events.
begin(), S_events.
end());
1793 if((dp = opendir(argv[1]))==NULL)
1795 printf(
"Error(%d) opening %s\n", errno, argv[1]);
1798 while((dirp = readdir(dp))!=NULL)
1800 if(strstr(dirp->d_name,
"_S_")!=NULL && strstr(dirp->d_name,
".txt.gz")!=NULL && !csv)
1804 fprintf(stderr,
"ERROR: Cannot read file %s\naborting...\n", dirp->d_name);
1808 else if(strstr(dirp->d_name,
"_C_")!=NULL && strstr(dirp->d_name,
".txt")!=NULL)
1811 if(res>num_of_modules)
1813 num_of_modules = res;
1823 fprintf(stderr,
"ERROR: Cannot finalize HTML pages!!!\naborting...\n");
1830 if(!csv) sprintf(filepath,
"%s/HTML/index.html", argv[1]);
1831 else sprintf(filepath,
"%s/results.csv", argv[1]);
1832 FILE *fp = fopen(filepath,
"w");
1835 fprintf(stderr,
"ERROR: Cannot create file index.html!!!\naborting...\n");
1847 fprintf(stderr,
"(core) ERROR: One or more events for CAA missing!\naborting...\n");
1870 fprintf(stderr,
"(nehalem) ERROR: One or more events for CAA missing!\naborting...\n");
1910 sprintf(src,
"sorttable.js");
1911 sprintf(dst,
"%s/HTML/sorttable.js", argv[1]);
1912 int fd_src = open(src, O_RDONLY);
1915 fprintf(stderr,
"ERROR: Cannot open file \"%s\"!\naborting...\n", src);
1918 int fd_dst = open(dst, O_WRONLY|O_CREAT|O_TRUNC, 0644);
1921 fprintf(stderr,
"ERROR: Cannot open file \"%s\" (%s)!\naborting...\n", dst, strerror(errno));
1925 while(
read(fd_src, &c, 1))
1927 if (
write(fd_dst, &c, 1) == -1 ) {
#define MAX_EVENT_NAME_LENGTH
std::map< std::string, unsigned int > samples
unsigned int get_inv_mask()
#define I7_L3_UNSHARED_HIT_CYCLES
const char * func_name(const char *demangled_symbol)
const char * symbolByOffset(Offset offset)
bool skipString(const char *strptr, const char *srcbuffer, const char **dstbuffer)
#define MAX_SAMPLE_INDEX_LENGTH
#define CORE_UNKNOWN_ADDR_STORE_CYCLES
#define CORE_LCP_STALL_CYCLES
void put_C_footer(FILE *fp)
void put_S_module(S_module *cur_module, const char *dir)
double sum(double x, double y, double z)
#define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES
void init_core_caa_events_displ()
void put_C_modules(FILE *fp, std::vector< std::string > &columns)
bool get_max(char *index, unsigned int *value)
void init_core_caa_events()
def read(f, regex='.*', skipevents=0)
#define I7_IFETCH_L2_MISS_L3_HITM
int read_S_file(const char *dir, const char *filename)
std::vector< CacheItem > SymbolCache
#define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD
bool check_for_nhm_caa_events()
unsigned int get_smpl_period()
std::istringstream & output(void)
#define MAX_ARCH_NAME_LENGTH
void add_sample(const char *index, unsigned int value)
#define I7_OTHER_CORE_L2_HITM_CYCLES
double normalize(std::string field, double value, double normalizeTo)
#define CORE_L1_DTLB_MISS_CYCLES
FileInfo(const std::string &name, bool useGdb)
#define CORE_OVERLAPPING_CYCLES
void calc_post_deriv_values()
void calc_nhm_deriv_values(double totalCycles)
#define MAX_SIMPLE_SYM_LENGTH
void init_nhm_caa_events_displ()
auto begin(reverse_wrapper< T > &w)
#define MAX_LIB_MOD_LENGTH
int finalize_S_html_pages(const char *dir)
void put_C_header(FILE *fp, std::vector< std::string > &columns)
#define MAX_SIMPLE_LIB_MOD_LENGTH
void init_nhm_caa_events()
#define I7_L1_ITLB_WALK_COMPLETED_CYCLES
void calc_core_deriv_values(double totalCycles)
#define MAX_SP_STR_LENGTH
Offset next(Offset offset)
void init(const char *name, const char *architecture, const char *event_name, unsigned int c_mask, unsigned int inv_mask, unsigned int smpl_period)
#define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES
unsigned int total_num_samples
void set_total(unsigned int total)
#define I7_OTHER_CORE_L2_HIT_CYCLES
void html_special_chars(const char *s, char *s_mod)
#define MAX_CMASK_STR_LENGTH
#define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES
#define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES
int read_S_events(const char *dir, const char *filename)
auto end(reverse_wrapper< T > &w)
#define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP
#define I7_L1_DTLB_WALK_COMPLETED_CYCLES
bool operator()(const int &a, const CacheItem &b) const
#define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT
#define MAX_FILENAME_LENGTH
#define CORE_L2_MISS_CYCLES
void put_C_header_csv(FILE *fp, std::vector< std::string > &columns)
#define PIPE_BUFFER_LENGTH
void createOffsetMap(void)
#define MAX_SIMPLE_SYM_MOD_LENGTH
SymbolCache m_symbolCache
#define MAX_INV_STR_LENGTH
bool check_for_core_caa_events()
#define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT
PipeReader(const char *cmd)
bool operator()(const CacheItem &a, const int &b) const
void put_C_modules_csv(FILE *fp, std::vector< std::string > &columns)
unsigned int get_total_num_samples()
#define MAX_MODULE_NAME_LENGTH
#define MAX_SIMPLE_LIB_LENGTH
bool skipWhitespaces(const char *srcbuffer, const char **destbuffer)
#define I7_IFETCH_L2_MISS_L3_HIT_SNOOP
int read_C_file(const char *dir, const char *filename)
int main(int argc, char *argv[])
std::string get_module_name()
#define CORE_L2_HIT_CYCLES
#define MAX_SYM_MOD_LENGTH
unsigned int get_c_mask()