00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include <ctype.h>
00015 #include <cxxabi.h>
00016 #include <fcntl.h>
00017 #include <math.h>
00018 #include <stdint.h>
00019 #include <stdio.h>
00020 #include <stdlib.h>
00021 #include <string.h>
00022 #include <sys/stat.h>
00023 #include <sys/types.h>
00024 #include <unistd.h>
00025 #include <zlib.h>
00026
00027 #include <algorithm>
00028 #include <list>
00029 #include <map>
00030 #include <string>
00031 #include <vector>
00032 #include <sstream>
00033
00034 #include <dirent.h>
00035 #include <errno.h>
00036
00037
00038 #define CORE_L2_MISS_CYCLES 200
00039 #define CORE_L2_HIT_CYCLES 14.5
00040 #define CORE_L1_DTLB_MISS_CYCLES 10
00041 #define CORE_LCP_STALL_CYCLES 6
00042 #define CORE_UNKNOWN_ADDR_STORE_CYCLES 5
00043 #define CORE_OVERLAPPING_CYCLES 6
00044 #define CORE_SPAN_ACROSS_CACHE_LINE_CYCLES 20
00045
00046
00047 #define I7_L1_DTLB_WALK_COMPLETED_CYCLES 35
00048 #define I7_L1_ITLB_WALK_COMPLETED_CYCLES 35
00049 #define I7_L2_HIT_CYCLES 6
00050 #define I7_L3_UNSHARED_HIT_CYCLES 35
00051 #define I7_OTHER_CORE_L2_HIT_CYCLES 60
00052 #define I7_OTHER_CORE_L2_HITM_CYCLES 75
00053 #define I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES 225 //average of 200 (not modified) and 225-250 (modified)
00054 #define I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES 360 //average of 350 (not modified) and 370 (modified)
00055 #define I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES 180
00056 #define I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT 200
00057 #define I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT 350
00058 #define I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP 35
00059 #define I7_IFETCH_L2_MISS_L3_HIT_SNOOP 60
00060 #define I7_IFETCH_L2_MISS_L3_HITM 75
00061 #define I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD 180
00062
00063 #define MAX_MODULES 1000
00064
00065 #define EXPECTED_CPI 0.25
00066
00067 #define MAX_FILENAME_LENGTH 1024
00068 #define MAX_SAMPLE_INDEX_LENGTH 10000
00069 #define MAX_SYM_LENGTH 15000
00070 #define MAX_SYM_MOD_LENGTH 20000
00071 #define MAX_LIB_LENGTH 5000
00072 #define MAX_LIB_MOD_LENGTH 7000
00073 #define MAX_SIMPLE_SYM_LENGTH 300
00074 #define MAX_SIMPLE_SYM_MOD_LENGTH 500
00075 #define MAX_SIMPLE_LIB_LENGTH 300
00076 #define MAX_SIMPLE_LIB_MOD_LENGTH 500
00077 #define MAX_LINE_LENGTH 20000
00078 #define MAX_EVENT_NAME_LENGTH 150
00079 #define MAX_MODULE_NAME_LENGTH 250
00080 #define MAX_VALUE_STRING_LENGTH 250
00081 #define MAX_ARCH_NAME_LENGTH 20
00082 #define MAX_CMASK_STR_LENGTH 5
00083 #define MAX_INV_STR_LENGTH 5
00084 #define MAX_SP_STR_LENGTH 50
00085
00086 #define PIPE_BUFFER_LENGTH 1000
00087
00088 class PipeReader
00089 {
00090 public:
00091 PipeReader(const char *cmd)
00092 {
00093 pipe = popen(cmd, "r");
00094 if(!pipe)
00095 {
00096 printf("Cannot open pipe. Exiting...\n");
00097 exit(1);
00098 }
00099 char buffer[PIPE_BUFFER_LENGTH];
00100 bzero(buffer, PIPE_BUFFER_LENGTH);
00101 std::string result = "";
00102 while(!feof(pipe))
00103 {
00104 if(fgets(buffer, PIPE_BUFFER_LENGTH, pipe)!=NULL)
00105 {
00106 result += buffer;
00107 }
00108 bzero(buffer, PIPE_BUFFER_LENGTH);
00109 }
00110 iss = new std::istringstream(result, std::istringstream::in);
00111 }
00112
00113 ~PipeReader(void)
00114 {
00115 pclose(pipe);
00116 delete iss;
00117 }
00118
00119 std::istringstream &output(void)
00120 {
00121 return *iss;
00122 }
00123 private:
00124 FILE* pipe;
00125 std::istringstream *iss;
00126 };
00127
00128
00129
00130
00131
00132 bool skipWhitespaces(const char *srcbuffer, const char **destbuffer)
00133 {
00134 if(!isspace(*srcbuffer++))
00135 {
00136 return false;
00137 }
00138 while(isspace(*srcbuffer))
00139 {
00140 srcbuffer++;
00141 }
00142 *destbuffer = srcbuffer;
00143 return true;
00144 }
00145
00146
00147
00148
00149
00150
00151
00152
00153 bool skipString(const char *strptr, const char *srcbuffer, const char **dstbuffer)
00154 {
00155 if(strncmp(srcbuffer, strptr, strlen(strptr)))
00156 {
00157 return false;
00158 }
00159 *dstbuffer = srcbuffer + strlen(strptr);
00160 return true;
00161 }
00162
00163 class FileInfo
00164 {
00165 public:
00166 typedef int Offset;
00167 std::string NAME;
00168 FileInfo(void) : NAME("<dynamically generated>") {}
00169 FileInfo(const std::string &name, bool useGdb) : NAME(name)
00170 {
00171 if(useGdb)
00172 {
00173 this->createOffsetMap();
00174 }
00175 }
00176
00177 const char *symbolByOffset(Offset offset)
00178 {
00179 if(m_symbolCache.empty())
00180 {
00181 return 0;
00182 }
00183
00184 SymbolCache::iterator i = lower_bound(m_symbolCache.begin(), m_symbolCache.end(), offset, CacheItemComparator());
00185 if(i->OFFSET == offset)
00186 {
00187 return i->NAME.c_str();
00188 }
00189
00190 if(i == m_symbolCache.begin())
00191 {
00192 return m_symbolCache.begin()->NAME.c_str();
00193 }
00194
00195 --i;
00196
00197 return i->NAME.c_str();
00198 }
00199
00200 Offset next(Offset offset)
00201 {
00202 SymbolCache::iterator i = upper_bound(m_symbolCache.begin(), m_symbolCache.end(), offset, CacheItemComparator());
00203 if(i == m_symbolCache.end())
00204 {
00205 return 0;
00206 }
00207 return i->OFFSET;
00208 }
00209
00210 private:
00211 struct CacheItem
00212 {
00213 CacheItem(Offset offset, const std::string &name) : OFFSET(offset), NAME(name) {};
00214 Offset OFFSET;
00215 std::string NAME;
00216 };
00217
00218 typedef std::vector<CacheItem> SymbolCache;
00219 SymbolCache m_symbolCache;
00220
00221 struct CacheItemComparator
00222 {
00223 bool operator()(const CacheItem& a, const int &b) const
00224 {
00225 return a.OFFSET < b;
00226 }
00227 bool operator()(const int& a, const CacheItem &b) const
00228 {
00229 return a < b.OFFSET;
00230 }
00231 };
00232
00233 void createOffsetMap(void)
00234 {
00235 std::string commandLine = "objdump -p " + NAME;
00236 PipeReader objdump(commandLine.c_str());
00237 std::string oldname;
00238 std::string suffix;
00239 int vmbase = 0;
00240 bool matched = false;
00241 while(objdump.output())
00242 {
00243
00244
00245
00246
00247
00248
00249 std::string line;
00250 std::getline(objdump.output(), line);
00251
00252 if(!objdump.output()) break;
00253 if(line.empty()) continue;
00254 const char *lineptr = line.c_str();
00255 if(!skipWhitespaces(lineptr, &lineptr)) continue;
00256 if(!skipString("LOAD", lineptr, &lineptr)) continue;
00257 if(!skipWhitespaces(lineptr, &lineptr)) continue;
00258 if(!skipString("off", lineptr, &lineptr)) continue;
00259 char *endptr = 0;
00260 int initialBase = strtol(lineptr, &endptr, 16);
00261 if(lineptr == endptr) continue;
00262 lineptr = endptr;
00263 if(!skipWhitespaces(lineptr, &lineptr)) continue;
00264 if(!skipString("vaddr", lineptr, &lineptr)) continue;
00265 if(!skipWhitespaces(lineptr, &lineptr)) continue;
00266 int finalBase = strtol(lineptr, &endptr, 16);
00267 if(lineptr == endptr) continue;
00268 vmbase=finalBase - initialBase;
00269 matched = true;
00270 break;
00271 }
00272 if(!matched)
00273 {
00274 fprintf(stderr, "Cannot determine VM base address for %s\n", NAME.c_str());
00275 fprintf(stderr, "Error while running `objdump -p %s`\n", NAME.c_str());
00276 exit(1);
00277 }
00278 std::string commandLine2 = "nm -t d -n " + NAME;
00279 PipeReader nm(commandLine2.c_str());
00280 while(nm.output())
00281 {
00282 std::string line;
00283 std::getline(nm.output(), line);
00284 if(!nm.output()) break;
00285 if(line.empty()) continue;
00286
00287 const char *begin = line.c_str();
00288 char *endptr = 0;
00289 int address = strtol(begin, &endptr, 10);
00290 if(endptr == begin) continue;
00291 if(*endptr++ != ' ') continue;
00292 if(isspace(*endptr++)) continue;
00293 if(*endptr++ != ' ') continue;
00294 char *symbolName = endptr;
00295 while(*endptr && !isspace(*endptr)) endptr++;
00296 if(*endptr != 0) continue;
00297
00298 if(symbolName[0] == '.') continue;
00299
00300
00301
00302
00303 int offset = address-vmbase;
00304 if(m_symbolCache.size() && (m_symbolCache.back().OFFSET == offset)) m_symbolCache.back().NAME = symbolName;
00305 else m_symbolCache.push_back(CacheItem(address-vmbase, symbolName));
00306 }
00307 }
00308 };
00309
00310 static std::map<std::string, unsigned int> modules_tot_samples;
00311 static std::map<std::string, FileInfo> libsInfo;
00312 static int nehalem;
00313
00314 static std::map<std::string, std::map<std::string, double> > C_modules;
00315 static std::vector<std::string> C_events;
00316 static std::vector<std::string> S_events;
00317
00318 static std::vector<std::string> core_caa_events;
00319 static std::vector<std::string> nhm_caa_events;
00320 static std::vector<std::string> core_caa_events_displ;
00321 static std::vector<std::string> nhm_caa_events_displ;
00322
00323 void init_core_caa_events()
00324 {
00325 core_caa_events.push_back("BRANCH_INSTRUCTIONS_RETIRED");
00326 core_caa_events.push_back("ILD_STALL");
00327 core_caa_events.push_back("INST_RETIRED:LOADS");
00328 core_caa_events.push_back("INST_RETIRED:OTHER");
00329 core_caa_events.push_back("INST_RETIRED:STORES");
00330 core_caa_events.push_back("INSTRUCTIONS_RETIRED");
00331 core_caa_events.push_back("LOAD_BLOCK:OVERLAP_STORE");
00332 core_caa_events.push_back("LOAD_BLOCK:STA");
00333 core_caa_events.push_back("LOAD_BLOCK:UNTIL_RETIRE");
00334 core_caa_events.push_back("MEM_LOAD_RETIRED:DTLB_MISS");
00335 core_caa_events.push_back("MEM_LOAD_RETIRED:L1D_LINE_MISS");
00336 core_caa_events.push_back("MEM_LOAD_RETIRED:L2_LINE_MISS");
00337 core_caa_events.push_back("MISPREDICTED_BRANCH_RETIRED");
00338
00339
00340 core_caa_events.push_back("RS_UOPS_DISPATCHED CMASK=1 INV=1");
00341 core_caa_events.push_back("SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE");
00342 core_caa_events.push_back("UNHALTED_CORE_CYCLES");
00343
00344
00345
00346 }
00347
00348 void init_nhm_caa_events()
00349 {
00350 nhm_caa_events.push_back("ARITH:CYCLES_DIV_BUSY");
00351 nhm_caa_events.push_back("BR_INST_EXEC:ANY");
00352 nhm_caa_events.push_back("BR_INST_EXEC:DIRECT_NEAR_CALL");
00353 nhm_caa_events.push_back("BR_INST_EXEC:INDIRECT_NEAR_CALL");
00354 nhm_caa_events.push_back("BR_INST_EXEC:INDIRECT_NON_CALL");
00355 nhm_caa_events.push_back("BR_INST_EXEC:NEAR_CALLS");
00356 nhm_caa_events.push_back("BR_INST_EXEC:NON_CALLS");
00357 nhm_caa_events.push_back("BR_INST_EXEC:RETURN_NEAR");
00358 nhm_caa_events.push_back("BR_INST_RETIRED:ALL_BRANCHES");
00359 nhm_caa_events.push_back("BR_INST_RETIRED:CONDITIONAL");
00360 nhm_caa_events.push_back("BR_INST_RETIRED:NEAR_CALL");
00361 nhm_caa_events.push_back("BR_MISP_EXEC:ANY");
00362 nhm_caa_events.push_back("CPU_CLK_UNHALTED:THREAD_P");
00363 nhm_caa_events.push_back("DTLB_LOAD_MISSES:WALK_COMPLETED");
00364 nhm_caa_events.push_back("INST_RETIRED:ANY_P");
00365 nhm_caa_events.push_back("ITLB_MISSES:WALK_COMPLETED");
00366 nhm_caa_events.push_back("L2_RQSTS:IFETCH_HIT");
00367 nhm_caa_events.push_back("L2_RQSTS:IFETCH_MISS");
00368 nhm_caa_events.push_back("MEM_INST_RETIRED:LOADS");
00369 nhm_caa_events.push_back("MEM_INST_RETIRED:STORES");
00370 nhm_caa_events.push_back("MEM_LOAD_RETIRED:L2_HIT");
00371 nhm_caa_events.push_back("MEM_LOAD_RETIRED:L3_MISS");
00372 nhm_caa_events.push_back("MEM_LOAD_RETIRED:L3_UNSHARED_HIT");
00373 nhm_caa_events.push_back("MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM");
00374 nhm_caa_events.push_back("MEM_UNCORE_RETIRED:LOCAL_DRAM");
00375 nhm_caa_events.push_back("MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM");
00376 nhm_caa_events.push_back("MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT");
00377 nhm_caa_events.push_back("MEM_UNCORE_RETIRED:REMOTE_DRAM");
00378 nhm_caa_events.push_back("OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM");
00379 nhm_caa_events.push_back("OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM");
00380 nhm_caa_events.push_back("OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP");
00381 nhm_caa_events.push_back("OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD");
00382 nhm_caa_events.push_back("OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM");
00383 nhm_caa_events.push_back("OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT");
00384 nhm_caa_events.push_back("RESOURCE_STALLS:ANY");
00385 nhm_caa_events.push_back("SSEX_UOPS_RETIRED:PACKED_DOUBLE");
00386 nhm_caa_events.push_back("SSEX_UOPS_RETIRED:PACKED_SINGLE");
00387 nhm_caa_events.push_back("UOPS_DECODED:MS CMASK=1");
00388 nhm_caa_events.push_back("UOPS_ISSUED:ANY CMASK=1 INV=1");
00389 nhm_caa_events.push_back("ITLB_MISS_RETIRED");
00390 nhm_caa_events.push_back("UOPS_RETIRED:ANY");
00391 }
00392
00393 bool check_for_core_caa_events()
00394 {
00395 for(std::vector<std::string>::const_iterator it=core_caa_events.begin(); it!=core_caa_events.end(); ++it)
00396 {
00397 if(find(C_events.begin(), C_events.end(), (*it))==C_events.end())
00398 {
00399 fprintf(stderr, "ERROR: Cannot find event %s!!!\naborting...\n", (*it).c_str());
00400 return false;
00401 }
00402 }
00403 return true;
00404 }
00405
00406 bool check_for_nhm_caa_events()
00407 {
00408 for(std::vector<std::string>::const_iterator it=nhm_caa_events.begin(); it!=nhm_caa_events.end(); ++it)
00409 {
00410 if(find(C_events.begin(), C_events.end(), (*it))==C_events.end())
00411 {
00412 fprintf(stderr, "ERROR: Cannot find event %s!!!\naborting...\n", (*it).c_str());
00413 return false;
00414 }
00415 }
00416 return true;
00417 }
00418
00419 void init_core_caa_events_displ()
00420 {
00421 core_caa_events_displ.push_back("Total Cycles");
00422 core_caa_events_displ.push_back("Stalled Cycles");
00423 core_caa_events_displ.push_back("% of Total Cycles");
00424 core_caa_events_displ.push_back("Instructions Retired");
00425 core_caa_events_displ.push_back("CPI");
00426 core_caa_events_displ.push_back("");
00427 core_caa_events_displ.push_back("iMargin");
00428 core_caa_events_displ.push_back("iFactor");
00429 core_caa_events_displ.push_back("");
00430 core_caa_events_displ.push_back("Counted Stalled Cycles");
00431 core_caa_events_displ.push_back("");
00432 core_caa_events_displ.push_back("L2 Miss Impact");
00433 core_caa_events_displ.push_back("L2 Miss % of counted Stalled Cycles");
00434 core_caa_events_displ.push_back("");
00435 core_caa_events_displ.push_back("L2 Hit Impact");
00436 core_caa_events_displ.push_back("L2 Hit % of counted Stalled Cycles");
00437 core_caa_events_displ.push_back("");
00438 core_caa_events_displ.push_back("L1 DTLB Miss Impact");
00439 core_caa_events_displ.push_back("L1 DTLB Miss % of counted Stalled Cycles");
00440 core_caa_events_displ.push_back("");
00441 core_caa_events_displ.push_back("LCP Stalls Impact");
00442 core_caa_events_displ.push_back("LCP Stalls % of counted Stalled Cycles");
00443 core_caa_events_displ.push_back("");
00444 core_caa_events_displ.push_back("Store-Fwd Stalls Impact");
00445 core_caa_events_displ.push_back("Store-Fwd Stalls % of counted Stalled Cycles");
00446 core_caa_events_displ.push_back("");
00447 core_caa_events_displ.push_back("Loads Blocked by Unknown Address Store Impact");
00448 core_caa_events_displ.push_back("Loads Blocked % of Store-Fwd Stalls Cycles");
00449 core_caa_events_displ.push_back("Loads Overlapped with Stores Impact");
00450 core_caa_events_displ.push_back("Loads Overlapped % of Store-Fwd Stalls Cycles");
00451 core_caa_events_displ.push_back("Loads Spanning across Cache Lines Impact");
00452 core_caa_events_displ.push_back("Loads Spanning % of Store-Fwd Stalls Cycles");
00453 core_caa_events_displ.push_back("");
00454 core_caa_events_displ.push_back("Load Instructions");
00455 core_caa_events_displ.push_back("Load % of all Instructions");
00456 core_caa_events_displ.push_back("Store Instructions");
00457 core_caa_events_displ.push_back("Store % of all Instructions");
00458 core_caa_events_displ.push_back("Branch Instructions");
00459 core_caa_events_displ.push_back("Branch % of all Instructions");
00460 core_caa_events_displ.push_back("Packed SIMD Computational Instructions");
00461 core_caa_events_displ.push_back("Packed SIMD % of all Instructions");
00462 core_caa_events_displ.push_back("Other Instructions");
00463 core_caa_events_displ.push_back("Other % of all Instructions");
00464 core_caa_events_displ.push_back("");
00465 core_caa_events_displ.push_back("ITLB Miss Rate in %");
00466 core_caa_events_displ.push_back("% of Mispredicted Branches");
00467 }
00468
00469 void calc_core_deriv_values(double totalCycles)
00470 {
00471 for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
00472 {
00473 (it->second)["Total Cycles"] = (it->second)["UNHALTED_CORE_CYCLES"];
00474 (it->second)["Stalled Cycles"] = (it->second)["RS_UOPS_DISPATCHED CMASK=1 INV=1"];
00475 (it->second)["L2 Miss Impact"] = (it->second)["MEM_LOAD_RETIRED:L2_LINE_MISS"] * CORE_L2_MISS_CYCLES;
00476 (it->second)["L2 Hit Impact"] = ((it->second)["MEM_LOAD_RETIRED:L1D_LINE_MISS"] - (it->second)["MEM_LOAD_RETIRED:L2_LINE_MISS"]) * CORE_L2_HIT_CYCLES;
00477 (it->second)["L1 DTLB Miss Impact"] = (it->second)["MEM_LOAD_RETIRED:DTLB_MISS"] * CORE_L1_DTLB_MISS_CYCLES;
00478 (it->second)["LCP Stalls Impact"] = (it->second)["ILD_STALL"] * CORE_LCP_STALL_CYCLES;
00479 (it->second)["Loads Blocked by Unknown Address Store Impact"] = (it->second)["LOAD_BLOCK:STA"] * CORE_UNKNOWN_ADDR_STORE_CYCLES;
00480 (it->second)["Loads Overlapped with Stores Impact"] = (it->second)["LOAD_BLOCK:OVERLAP_STORE"] * CORE_OVERLAPPING_CYCLES;
00481 (it->second)["Loads Spanning across Cache Lines Impact"] = (it->second)["LOAD_BLOCK:UNTIL_RETIRE"] * CORE_SPAN_ACROSS_CACHE_LINE_CYCLES;
00482 (it->second)["Store-Fwd Stalls Impact"] = (it->second)["Loads Blocked by Unknown Address Store Impact"] + (it->second)["Loads Overlapped with Stores Impact"] + (it->second)["Loads Spanning across Cache Lines Impact"];
00483 (it->second)["Counted Stalled Cycles"] = (it->second)["L2 Miss Impact"] + (it->second)["L2 Hit Impact"] + (it->second)["LCP Stalls Impact"] + (it->second)["L1 DTLB Miss Impact"] + (it->second)["Store-Fwd Stalls Impact"];
00484 (it->second)["Instructions Retired"] = (it->second)["INSTRUCTIONS_RETIRED"];
00485 (it->second)["ITLB Miss Rate in %"] = ((it->second)["ITLB_MISS_RETIRED"]/(it->second)["INSTRUCTIONS_RETIRED"])*100;
00486 (it->second)["Branch Instructions"] = (it->second)["BRANCH_INSTRUCTIONS_RETIRED"];
00487 (it->second)["Load Instructions"] = (it->second)["INST_RETIRED:LOADS"];
00488 (it->second)["Store Instructions"] = (it->second)["INST_RETIRED:STORES"];
00489 (it->second)["Other Instructions"] = (it->second)["INST_RETIRED:OTHER"] - (it->second)["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"] - (it->second)["BRANCH_INSTRUCTIONS_RETIRED"];
00490 (it->second)["% of Mispredicted Branches"] = ((it->second)["MISPREDICTED_BRANCH_RETIRED"]/(it->second)["BRANCH_INSTRUCTIONS_RETIRED"])*100;
00491 (it->second)["Packed SIMD Computational Instructions"] = (it->second)["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"];
00492 (it->second)["Counted Instructions Retired"] = (it->second)["Branch Instructions"] + (it->second)["Load Instructions"] + (it->second)["Store Instructions"] + (it->second)["Other Instructions"] + (it->second)["Packed SIMD Computational Instructions"];
00493 (it->second)["CPI"] = (it->second)["UNHALTED_CORE_CYCLES"]/(it->second)["INSTRUCTIONS_RETIRED"];
00494
00495 double localPerformanceImprovement = (it->second)["CPI"]/EXPECTED_CPI;
00496 double cyclesAfterImprovement = (it->second)["UNHALTED_CORE_CYCLES"]/localPerformanceImprovement;
00497 double totalCyclesAfterImprovement = totalCycles-(it->second)["UNHALTED_CORE_CYCLES"]+cyclesAfterImprovement;
00498 (it->second)["iMargin"] = 100-(totalCyclesAfterImprovement/totalCycles)*100;
00499
00500 (it->second)["% of Total Cycles"] = (it->second)["RS_UOPS_DISPATCHED CMASK=1 INV=1"]*100/(it->second)["UNHALTED_CORE_CYCLES"];
00501 (it->second)["L2 Miss % of counted Stalled Cycles"] =(it->second)["L2 Miss Impact"]*100/(it->second)["Counted Stalled Cycles"];
00502 (it->second)["L2 Hit % of counted Stalled Cycles"] =(it->second)["L2 Hit Impact"]*100/(it->second)["Counted Stalled Cycles"];
00503 (it->second)["L1 DTLB Miss % of counted Stalled Cycles"] =(it->second)["L1 DTLB Miss Impact"]*100/(it->second)["Counted Stalled Cycles"];
00504 (it->second)["LCP Stalls % of counted Stalled Cycles"] =(it->second)["LCP Stalls Impact"]*100/(it->second)["Counted Stalled Cycles"];
00505 (it->second)["Store-Fwd Stalls % of counted Stalled Cycles"] =(it->second)["Store-Fwd Stalls Impact"]*100/(it->second)["Counted Stalled Cycles"];
00506 (it->second)["Loads Blocked % of Store-Fwd Stalls Cycles"] =(it->second)["Loads Blocked by Unknown Address Store Impact"]*100/(it->second)["Store-Fwd Stalls Impact"];
00507 (it->second)["Loads Overlapped % of Store-Fwd Stalls Cycles"] =(it->second)["Loads Overlapped with Stores Impact"]*100/(it->second)["Store-Fwd Stalls Impact"];
00508 (it->second)["Loads Spanning % of Store-Fwd Stalls Cycles"] =(it->second)["Loads Spanning across Cache Lines Impact"]*100/(it->second)["Store-Fwd Stalls Impact"];
00509
00510 (it->second)["Load % of all Instructions"] =(it->second)["INST_RETIRED:LOADS"]*100/(it->second)["Counted Instructions Retired"];
00511 (it->second)["Store % of all Instructions"] =(it->second)["INST_RETIRED:STORES"]*100/(it->second)["Counted Instructions Retired"];
00512 (it->second)["Branch % of all Instructions"] =(it->second)["BRANCH_INSTRUCTIONS_RETIRED"]*100/(it->second)["Counted Instructions Retired"];
00513 (it->second)["Packed SIMD % of all Instructions"] =(it->second)["SIMD_COMP_INST_RETIRED:PACKED_SINGLE:PACKED_DOUBLE"]*100/(it->second)["Counted Instructions Retired"];
00514 (it->second)["Other % of all Instructions"] =(it->second)["Other Instructions"]*100/(it->second)["Counted Instructions Retired"];
00515 }
00516 }
00517
00518 void init_nhm_caa_events_displ()
00519 {
00520 nhm_caa_events_displ.push_back("Total Cycles");
00521 nhm_caa_events_displ.push_back("Instructions Retired");
00522 nhm_caa_events_displ.push_back("CPI");
00523 nhm_caa_events_displ.push_back("");
00524 nhm_caa_events_displ.push_back("iMargin");
00525 nhm_caa_events_displ.push_back("iFactor");
00526 nhm_caa_events_displ.push_back("");
00527 nhm_caa_events_displ.push_back("Stalled Cycles");
00528 nhm_caa_events_displ.push_back("% of Total Cycles");
00529 nhm_caa_events_displ.push_back("Total Counted Stalled Cycles");
00530 nhm_caa_events_displ.push_back("");
00531 nhm_caa_events_displ.push_back("Instruction Starvation % of Total Cycles");
00532 nhm_caa_events_displ.push_back("# of Instructions per Call");
00533 nhm_caa_events_displ.push_back("% of Total Cycles spent handling FP exceptions");
00534 nhm_caa_events_displ.push_back("");
00535 nhm_caa_events_displ.push_back("Counted Stalled Cycles due to Load Ops");
00536 nhm_caa_events_displ.push_back("");
00537 nhm_caa_events_displ.push_back("L2 Hit Impact");
00538 nhm_caa_events_displ.push_back("L2 Hit % of Load Stalls");
00539 nhm_caa_events_displ.push_back("");
00540 nhm_caa_events_displ.push_back("L3 Unshared Hit Impact");
00541 nhm_caa_events_displ.push_back("L3 Unshared Hit % of Load Stalls");
00542 nhm_caa_events_displ.push_back("");
00543 nhm_caa_events_displ.push_back("L2 Other Core Hit Impact");
00544 nhm_caa_events_displ.push_back("L2 Other Core Hit % of Load Stalls");
00545 nhm_caa_events_displ.push_back("");
00546 nhm_caa_events_displ.push_back("L2 Other Core Hit Modified Impact");
00547 nhm_caa_events_displ.push_back("L2 Other Core Hit Modified % of Load Stalls");
00548 nhm_caa_events_displ.push_back("");
00549 nhm_caa_events_displ.push_back("L3 Miss -> Local DRAM Hit Impact");
00550 nhm_caa_events_displ.push_back("L3 Miss -> Remote DRAM Hit Impact");
00551 nhm_caa_events_displ.push_back("L3 Miss -> Remote Cache Hit Impact");
00552 nhm_caa_events_displ.push_back("L3 Miss -> Total Impact");
00553 nhm_caa_events_displ.push_back("L3 Miss % of Load Stalls");
00554 nhm_caa_events_displ.push_back("");
00555 nhm_caa_events_displ.push_back("L1 DTLB Miss Impact");
00556 nhm_caa_events_displ.push_back("L1 DTLB Miss % of Load Stalls");
00557 nhm_caa_events_displ.push_back("");
00558 nhm_caa_events_displ.push_back("Cycles spent during DIV & SQRT Ops");
00559 nhm_caa_events_displ.push_back("DIV & SQRT Ops % of counted Stalled Cycles");
00560 nhm_caa_events_displ.push_back("");
00561 nhm_caa_events_displ.push_back("Total L2 IFETCH misses");
00562 nhm_caa_events_displ.push_back("% of L2 IFETCH misses");
00563 nhm_caa_events_displ.push_back("");
00564 nhm_caa_events_displ.push_back("% of IFETCHes served by Local DRAM");
00565 nhm_caa_events_displ.push_back("% of IFETCHes served by L3 (Modified)");
00566 nhm_caa_events_displ.push_back("% of IFETCHes served by L3 (Clean Snoop)");
00567 nhm_caa_events_displ.push_back("% of IFETCHes served by Remote L2");
00568 nhm_caa_events_displ.push_back("% of IFETCHes served by Remote DRAM");
00569 nhm_caa_events_displ.push_back("% of IFETCHes served by L3 (No Snoop)");
00570 nhm_caa_events_displ.push_back("");
00571 nhm_caa_events_displ.push_back("Total L2 IFETCH miss Impact");
00572 nhm_caa_events_displ.push_back("");
00573 nhm_caa_events_displ.push_back("Cycles IFETCH served by Local DRAM");
00574 nhm_caa_events_displ.push_back("Local DRAM IFECTHes % Impact");
00575 nhm_caa_events_displ.push_back("");
00576 nhm_caa_events_displ.push_back("Cycles IFETCH served by L3 (Modified)");
00577 nhm_caa_events_displ.push_back("L3 (Modified) IFECTHes % Impact");
00578 nhm_caa_events_displ.push_back("");
00579 nhm_caa_events_displ.push_back("Cycles IFETCH served by L3 (Clean Snoop)");
00580 nhm_caa_events_displ.push_back("L3 (Clean Snoop) IFECTHes % Impact");
00581 nhm_caa_events_displ.push_back("");
00582 nhm_caa_events_displ.push_back("Cycles IFETCH served by Remote L2");
00583 nhm_caa_events_displ.push_back("Remote L2 IFECTHes % Impact");
00584 nhm_caa_events_displ.push_back("");
00585 nhm_caa_events_displ.push_back("Cycles IFETCH served by Remote DRAM");
00586 nhm_caa_events_displ.push_back("Remote DRAM IFECTHes % Impact");
00587 nhm_caa_events_displ.push_back("");
00588 nhm_caa_events_displ.push_back("Cycles IFETCH served by L3 (No Snoop)");
00589 nhm_caa_events_displ.push_back("L3 (No Snoop) IFECTHes % Impact");
00590 nhm_caa_events_displ.push_back("");
00591 nhm_caa_events_displ.push_back("Total Branch Instructions Executed");
00592 nhm_caa_events_displ.push_back("% of Mispredicted Branches");
00593 nhm_caa_events_displ.push_back("");
00594 nhm_caa_events_displ.push_back("Direct Near Calls % of Total Branches Executed");
00595 nhm_caa_events_displ.push_back("Indirect Near Calls % of Total Branches Executed");
00596 nhm_caa_events_displ.push_back("Indirect Near Non-Calls % of Total Branches Executed");
00597 nhm_caa_events_displ.push_back("All Near Calls % of Total Branches Executed");
00598 nhm_caa_events_displ.push_back("All Non Calls % of Total Branches Executed");
00599 nhm_caa_events_displ.push_back("All Returns % of Total Branches Executed");
00600 nhm_caa_events_displ.push_back("");
00601 nhm_caa_events_displ.push_back("Total Branch Instructions Retired");
00602 nhm_caa_events_displ.push_back("Conditionals % of Total Branches Retired");
00603 nhm_caa_events_displ.push_back("Near Calls % of Total Branches Retired");
00604 nhm_caa_events_displ.push_back("");
00605 nhm_caa_events_displ.push_back("L1 ITLB Miss Impact");
00606 nhm_caa_events_displ.push_back("ITLB Miss Rate in %");
00607 nhm_caa_events_displ.push_back("");
00608 nhm_caa_events_displ.push_back("Branch Instructions");
00609 nhm_caa_events_displ.push_back("Branch % of all Instructions");
00610 nhm_caa_events_displ.push_back("");
00611 nhm_caa_events_displ.push_back("Load Instructions");
00612 nhm_caa_events_displ.push_back("Load % of all Instructions");
00613 nhm_caa_events_displ.push_back("");
00614 nhm_caa_events_displ.push_back("Store Instructions");
00615 nhm_caa_events_displ.push_back("Store % of all Instructions");
00616 nhm_caa_events_displ.push_back("");
00617 nhm_caa_events_displ.push_back("Other Instructions");
00618 nhm_caa_events_displ.push_back("Other % of all Instructions");
00619 nhm_caa_events_displ.push_back("");
00620 nhm_caa_events_displ.push_back("Packed UOPS Retired");
00621 nhm_caa_events_displ.push_back("Packed % of all UOPS Retired");
00622 }
00623
00624 void calc_nhm_deriv_values(double totalCycles)
00625 {
00626 for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
00627 {
00628 (it->second)["Total Cycles"] = (it->second)["CPU_CLK_UNHALTED:THREAD_P"];
00629
00630 (it->second)["L2 Hit Impact"] = (it->second)["MEM_LOAD_RETIRED:L2_HIT"] * I7_L2_HIT_CYCLES;
00631 (it->second)["L3 Unshared Hit Impact"] = (it->second)["MEM_LOAD_RETIRED:L3_UNSHARED_HIT"] * I7_L3_UNSHARED_HIT_CYCLES;
00632 if((it->second)["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"]>(it->second)["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"])
00633 {
00634 (it->second)["L2 Other Core Hit Impact"] = ((it->second)["MEM_LOAD_RETIRED:OTHER_CORE_L2_HIT_HITM"] - (it->second)["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"])* I7_OTHER_CORE_L2_HIT_CYCLES;
00635 }
00636 else
00637 {
00638 (it->second)["L2 Other Core Hit Impact"] = 0.0;
00639 }
00640 (it->second)["L2 Other Core Hit Modified Impact"] = (it->second)["MEM_UNCORE_RETIRED:OTHER_CORE_L2_HITM"] * I7_OTHER_CORE_L2_HITM_CYCLES;
00641 (it->second)["L3 Miss -> Local DRAM Hit Impact"] = (it->second)["MEM_UNCORE_RETIRED:LOCAL_DRAM"] * I7_L3_MISS_LOCAL_DRAM_HIT_CYCLES;
00642 (it->second)["L3 Miss -> Remote DRAM Hit Impact"] = (it->second)["MEM_UNCORE_RETIRED:REMOTE_DRAM"] * I7_L3_MISS_REMOTE_DRAM_HIT_CYCLES;
00643 (it->second)["L3 Miss -> Remote Cache Hit Impact"] = (it->second)["MEM_UNCORE_RETIRED:REMOTE_CACHE_LOCAL_HOME_HIT"] * I7_L3_MISS_REMOTE_CACHE_HIT_CYCLES;
00644 (it->second)["L3 Miss -> Total Impact"] = (it->second)["L3 Miss -> Local DRAM Hit Impact"] + (it->second)["L3 Miss -> Remote DRAM Hit Impact"] + (it->second)["L3 Miss -> Remote Cache Hit Impact"];
00645 (it->second)["L1 DTLB Miss Impact"] = (it->second)["DTLB_LOAD_MISSES:WALK_COMPLETED"] * I7_L1_DTLB_WALK_COMPLETED_CYCLES;
00646 (it->second)["Counted Stalled Cycles due to Load Ops"] = (it->second)["L3 Miss -> Total Impact"] + (it->second)["L2 Hit Impact"] + (it->second)["L1 DTLB Miss Impact"] + (it->second)["L3 Unshared Hit Impact"] + (it->second)["L2 Other Core Hit Modified Impact"] + (it->second)["L2 Other Core Hit Impact"];
00647 (it->second)["Cycles spent during DIV & SQRT Ops"] = (it->second)["ARITH:CYCLES_DIV_BUSY"];
00648 (it->second)["Total Counted Stalled Cycles"] = (it->second)["Counted Stalled Cycles due to Load Ops"] + (it->second)["Cycles spent during DIV & SQRT Ops"];
00649 (it->second)["Stalled Cycles"] = (it->second)["Total Counted Stalled Cycles"];
00650 (it->second)["% of Total Cycles"] = (it->second)["Stalled Cycles"] * 100 / (it->second)["CPU_CLK_UNHALTED:THREAD_P"];
00651 (it->second)["L3 Miss % of Load Stalls"] = (it->second)["L3 Miss -> Total Impact"] * 100 / (it->second)["Counted Stalled Cycles due to Load Ops"];
00652 (it->second)["L2 Hit % of Load Stalls"] = (it->second)["L2 Hit Impact"] * 100 / (it->second)["Counted Stalled Cycles due to Load Ops"];
00653 (it->second)["L1 DTLB Miss % of Load Stalls"] = (it->second)["L1 DTLB Miss Impact"] * 100 / (it->second)["Counted Stalled Cycles due to Load Ops"];
00654 (it->second)["L3 Unshared Hit % of Load Stalls"] = (it->second)["L3 Unshared Hit Impact"] * 100 / (it->second)["Counted Stalled Cycles due to Load Ops"];
00655 (it->second)["L2 Other Core Hit % of Load Stalls"] = (it->second)["L2 Other Core Hit Impact"] * 100 / (it->second)["Counted Stalled Cycles due to Load Ops"];
00656 (it->second)["L2 Other Core Hit Modified % of Load Stalls"] = (it->second)["L2 Other Core Hit Modified Impact"] * 100 / (it->second)["Counted Stalled Cycles due to Load Ops"];
00657 (it->second)["DIV & SQRT Ops % of counted Stalled Cycles"] = (it->second)["Cycles spent during DIV & SQRT Ops"] * 100 / (it->second)["Total Counted Stalled Cycles"];
00658
00659 (it->second)["Cycles IFETCH served by Local DRAM"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * I7_IFETCH_L3_MISS_LOCAL_DRAM_HIT;
00660 (it->second)["Cycles IFETCH served by L3 (Modified)"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * I7_IFETCH_L2_MISS_L3_HITM;
00661 (it->second)["Cycles IFETCH served by L3 (Clean Snoop)"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * I7_IFETCH_L2_MISS_L3_HIT_SNOOP;
00662 (it->second)["Cycles IFETCH served by Remote L2"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * I7_IFETCH_L3_MISS_REMOTE_CACHE_FWD;
00663 (it->second)["Cycles IFETCH served by Remote DRAM"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * I7_IFETCH_L3_MISS_REMOTE_DRAM_HIT;
00664 (it->second)["Cycles IFETCH served by L3 (No Snoop)"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * I7_IFETCH_L2_MISS_L3_HIT_NO_SNOOP;
00665 (it->second)["Total L2 IFETCH miss Impact"] = (it->second)["Cycles IFETCH served by Local DRAM"] + (it->second)["Cycles IFETCH served by L3 (Modified)"] + (it->second)["Cycles IFETCH served by L3 (Clean Snoop)"] + (it->second)["Cycles IFETCH served by Remote L2"] + (it->second)["Cycles IFETCH served by Remote DRAM"] + (it->second)["Cycles IFETCH served by L3 (No Snoop)"];
00666 (it->second)["Local DRAM IFECTHes % Impact"] = (it->second)["Cycles IFETCH served by Local DRAM"] * 100 / (it->second)["Total L2 IFETCH miss Impact"];
00667 (it->second)["L3 (Modified) IFECTHes % Impact"] = (it->second)["Cycles IFETCH served by L3 (Modified)"] * 100 / (it->second)["Total L2 IFETCH miss Impact"];
00668 (it->second)["L3 (Clean Snoop) IFECTHes % Impact"] = (it->second)["Cycles IFETCH served by L3 (Clean Snoop)"] * 100 / (it->second)["Total L2 IFETCH miss Impact"];
00669 (it->second)["Remote L2 IFECTHes % Impact"] = (it->second)["Cycles IFETCH served by Remote L2"] * 100 / (it->second)["Total L2 IFETCH miss Impact"];
00670 (it->second)["Remote DRAM IFECTHes % Impact"] = (it->second)["Cycles IFETCH served by Remote DRAM"] * 100 / (it->second)["Total L2 IFETCH miss Impact"];
00671 (it->second)["L3 (No Snoop) IFECTHes % Impact"] = (it->second)["Cycles IFETCH served by L3 (No Snoop)"] * 100 / (it->second)["Total L2 IFETCH miss Impact"];
00672 (it->second)["Total L2 IFETCH misses"] = (it->second)["L2_RQSTS:IFETCH_MISS"];
00673 (it->second)["% of IFETCHes served by Local DRAM"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:LOCAL_DRAM"] * 100 / (it->second)["L2_RQSTS:IFETCH_MISS"];
00674 (it->second)["% of IFETCHes served by L3 (Modified)"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HITM"] * 100 / (it->second)["L2_RQSTS:IFETCH_MISS"];
00675 (it->second)["% of IFETCHes served by L3 (Clean Snoop)"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:OTHER_CORE_HIT_SNP"] * 100 / (it->second)["L2_RQSTS:IFETCH_MISS"];
00676 (it->second)["% of IFETCHes served by Remote L2"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_CACHE_FWD"] * 100 / (it->second)["L2_RQSTS:IFETCH_MISS"];
00677 (it->second)["% of IFETCHes served by Remote DRAM"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:REMOTE_DRAM"] * 100 / (it->second)["L2_RQSTS:IFETCH_MISS"];
00678 (it->second)["% of IFETCHes served by L3 (No Snoop)"] = (it->second)["OFFCORE_RESPONSE_0:DMND_IFETCH:UNCORE_HIT"] * 100 / (it->second)["L2_RQSTS:IFETCH_MISS"];
00679 (it->second)["% of L2 IFETCH misses"] = (it->second)["L2_RQSTS:IFETCH_MISS"] * 100 / ((it->second)["L2_RQSTS:IFETCH_MISS"] + (it->second)["L2_RQSTS:IFETCH_HIT"]);
00680 (it->second)["L1 ITLB Miss Impact"] = (it->second)["ITLB_MISSES:WALK_COMPLETED"] * I7_L1_ITLB_WALK_COMPLETED_CYCLES;
00681
00682 (it->second)["Total Branch Instructions Executed"] = (it->second)["BR_INST_EXEC:ANY"];
00683 (it->second)["% of Mispredicted Branches"] = (it->second)["BR_MISP_EXEC:ANY"] * 100 / (it->second)["BR_INST_EXEC:ANY"];
00684 (it->second)["Direct Near Calls % of Total Branches Executed"] = (it->second)["BR_INST_EXEC:DIRECT_NEAR_CALL"] * 100 / (it->second)["Total Branch Instructions Executed"];
00685 (it->second)["Indirect Near Calls % of Total Branches Executed"] = (it->second)["BR_INST_EXEC:INDIRECT_NEAR_CALL"] * 100 / (it->second)["Total Branch Instructions Executed"];
00686 (it->second)["Indirect Near Non-Calls % of Total Branches Executed"] = (it->second)["BR_INST_EXEC:INDIRECT_NON_CALL"] * 100 / (it->second)["Total Branch Instructions Executed"];
00687 (it->second)["All Near Calls % of Total Branches Executed"] = (it->second)["BR_INST_EXEC:NEAR_CALLS"] * 100 / (it->second)["Total Branch Instructions Executed"];
00688 (it->second)["All Non Calls % of Total Branches Executed"] = (it->second)["BR_INST_EXEC:NON_CALLS"] * 100 / (it->second)["Total Branch Instructions Executed"];
00689 (it->second)["All Returns % of Total Branches Executed"] = (it->second)["BR_INST_EXEC:RETURN_NEAR"] * 100 / (it->second)["Total Branch Instructions Executed"];
00690 (it->second)["Total Branch Instructions Retired"] = (it->second)["BR_INST_RETIRED:ALL_BRANCHES"];
00691 (it->second)["Conditionals % of Total Branches Retired"] = (it->second)["BR_INST_RETIRED:CONDITIONAL"] * 100 / (it->second)["Total Branch Instructions Retired"];
00692 (it->second)["Near Calls % of Total Branches Retired"] = (it->second)["BR_INST_RETIRED:NEAR_CALL"] * 100 / (it->second)["Total Branch Instructions Retired"];
00693
00694 (it->second)["Instruction Starvation % of Total Cycles"] = ((it->second)["UOPS_ISSUED:ANY CMASK=1 INV=1"] - (it->second)["RESOURCE_STALLS:ANY"])* 100 / (it->second)["CPU_CLK_UNHALTED:THREAD_P"];
00695 (it->second)["% of Total Cycles spent handling FP exceptions"] = (it->second)["UOPS_DECODED:MS CMASK=1"]* 100 / (it->second)["CPU_CLK_UNHALTED:THREAD_P"];
00696 (it->second)["# of Instructions per Call"] = (it->second)["INST_RETIRED:ANY_P"] / (it->second)["BR_INST_EXEC:NEAR_CALLS"];
00697
00698 (it->second)["Instructions Retired"] = (it->second)["INST_RETIRED:ANY_P"];
00699 (it->second)["ITLB Miss Rate in %"] = ((it->second)["ITLB_MISS_RETIRED"] / (it->second)["INST_RETIRED:ANY_P"]) * 100;
00700
00701 (it->second)["Branch Instructions"] = (it->second)["BR_INST_RETIRED:ALL_BRANCHES"];
00702 (it->second)["Load Instructions"] = (it->second)["MEM_INST_RETIRED:LOADS"];
00703 (it->second)["Store Instructions"] = (it->second)["MEM_INST_RETIRED:STORES"];
00704 (it->second)["Other Instructions"] = (it->second)["Instructions Retired"] - (it->second)["MEM_INST_RETIRED:LOADS"] - (it->second)["MEM_INST_RETIRED:STORES"] - (it->second)["BR_INST_RETIRED:ALL_BRANCHES"];
00705 (it->second)["Packed UOPS Retired"] = (it->second)["SSEX_UOPS_RETIRED:PACKED_DOUBLE"] + (it->second)["SSEX_UOPS_RETIRED:PACKED_SINGLE"];
00706 (it->second)["CPI"] = (it->second)["CPU_CLK_UNHALTED:THREAD_P"] / (it->second)["INST_RETIRED:ANY_P"];
00707
00708 double localPerformanceImprovement = (it->second)["CPI"]/EXPECTED_CPI;
00709 double cyclesAfterImprovement = (it->second)["CPU_CLK_UNHALTED:THREAD_P"]/localPerformanceImprovement;
00710 double totalCyclesAfterImprovement = totalCycles-(it->second)["CPU_CLK_UNHALTED:THREAD_P"]+cyclesAfterImprovement;
00711 (it->second)["iMargin"] = 100-(totalCyclesAfterImprovement/totalCycles)*100;
00712
00713 (it->second)["Load % of all Instructions"] = (it->second)["MEM_INST_RETIRED:LOADS"] * 100 / (it->second)["INST_RETIRED:ANY_P"];
00714 (it->second)["Store % of all Instructions"] = (it->second)["MEM_INST_RETIRED:STORES"] * 100 / (it->second)["INST_RETIRED:ANY_P"];
00715 (it->second)["Branch % of all Instructions"] = (it->second)["BR_INST_RETIRED:ALL_BRANCHES"] * 100 / (it->second)["INST_RETIRED:ANY_P"];
00716 (it->second)["Other % of all Instructions"] = (it->second)["Other Instructions"] * 100 / (it->second)["INST_RETIRED:ANY_P"];
00717
00718 (it->second)["Packed % of all UOPS Retired"] = (it->second)["Packed UOPS Retired"] * 100 / (it->second)["UOPS_RETIRED:ANY"];
00719 }
00720 }
00721
00722
00723 class S_module
00724 {
00725 private:
00726 std::map<std::string, unsigned int> samples;
00727 unsigned int total_num_samples;
00728 std::string module_name;
00729 std::string arch;
00730 std::string event;
00731 unsigned int cmask;
00732 unsigned int inv;
00733 unsigned int sp;
00734
00735 public:
00736 S_module()
00737 {
00738 clear();
00739 }
00740 void clear()
00741 {
00742 samples.clear();
00743 total_num_samples = 0;
00744 sp = 0;
00745 module_name = "";
00746 cmask = 0;
00747 inv = 0;
00748 sp = 0;
00749 }
00750 void init(const char* name, const char* architecture, const char* event_name, unsigned int c_mask, unsigned int inv_mask, unsigned int smpl_period)
00751 {
00752 module_name = name;
00753 arch = architecture;
00754 event = event_name;
00755 cmask = c_mask;
00756 inv = inv_mask;
00757 sp = smpl_period;
00758 }
00759 void set_total(unsigned int total)
00760 {
00761 total_num_samples = total;
00762 return;
00763 }
00764 unsigned int get_smpl_period()
00765 {
00766 return sp;
00767 }
00768 unsigned int get_inv_mask()
00769 {
00770 return inv;
00771 }
00772 unsigned int get_c_mask()
00773 {
00774 return cmask;
00775 }
00776 std::string get_arch()
00777 {
00778 return arch;
00779 }
00780 std::string get_event()
00781 {
00782 return event;
00783 }
00784 void add_sample(const char* index, unsigned int value)
00785 {
00786 samples[index] += value;
00787 return;
00788 }
00789 bool get_max(char *index, unsigned int *value)
00790 {
00791 if(samples.empty()) return false;
00792 unsigned int cur_max = 0;
00793 std::map<std::string, unsigned int>::iterator max_pos;
00794 for(std::map<std::string, unsigned int>::iterator it = samples.begin(); it != samples.end(); ++it)
00795 {
00796 if(it->second > cur_max)
00797 {
00798 cur_max = it->second;
00799 max_pos = it;
00800 }
00801 }
00802 strcpy(index, (max_pos->first).c_str());
00803 *value = max_pos->second;
00804 samples.erase(max_pos);
00805 return true;
00806 }
00807 std::string get_module_name()
00808 {
00809 return module_name;
00810 }
00811 unsigned int get_total_num_samples()
00812 {
00813 return total_num_samples;
00814 }
00815 };
00816
00817
00818
00819
00820
00821 void html_special_chars(const char *s, char *s_mod)
00822 {
00823 int n = strlen(s);
00824 *s_mod = '\0';
00825 for (int i=0; i < n; i++)
00826 {
00827 switch(s[i])
00828 {
00829 case '<':
00830 strcat(s_mod, "<");
00831 break;
00832 case '>':
00833 strcat(s_mod, ">");
00834 break;
00835 case '&':
00836 strcat(s_mod, "&");
00837 break;
00838 case '"':
00839 strcat(s_mod, """);
00840 break;
00841 default:
00842 char to_app[2];
00843 to_app[0]=s[i];
00844 to_app[1]='\0';
00845 strcat(s_mod, to_app);
00846 break;
00847 }
00848 }
00849 return;
00850 }
00851
00852
00853
00854
00855 const char *func_name(const char *demangled_symbol)
00856 {
00857 char *operator_string_begin = const_cast<char *>(strstr(demangled_symbol, "operator"));
00858 if(operator_string_begin != NULL)
00859 {
00860 char *operator_string_end = operator_string_begin+8;
00861 while(*operator_string_end == ' ') operator_string_end++;
00862 if(strstr(operator_string_end, "delete[]")==operator_string_end)
00863 {
00864 operator_string_end+=8;
00865 *operator_string_end='\0';
00866 }
00867 else if(strstr(operator_string_end, "delete")==operator_string_end)
00868 {
00869 operator_string_end+=6;
00870 *operator_string_end='\0';
00871 }
00872 else if(strstr(operator_string_end, "new[]")==operator_string_end)
00873 {
00874 operator_string_end+=5;
00875 *operator_string_end='\0';
00876 }
00877 else if(strstr(operator_string_end, "new")==operator_string_end)
00878 {
00879 operator_string_end+=3;
00880 *operator_string_end='\0';
00881 }
00882 else if(strstr(operator_string_end, ">>=")==operator_string_end)
00883 {
00884 operator_string_end+=3;
00885 *operator_string_end='\0';
00886 }
00887 else if(strstr(operator_string_end, "<<=")==operator_string_end)
00888 {
00889 operator_string_end+=3;
00890 *operator_string_end='\0';
00891 }
00892 else if(strstr(operator_string_end, "->*")==operator_string_end)
00893 {
00894 operator_string_end+=3;
00895 *operator_string_end='\0';
00896 }
00897 else if(strstr(operator_string_end, "<<")==operator_string_end)
00898 {
00899 operator_string_end+=2;
00900 *operator_string_end='\0';
00901 }
00902 else if(strstr(operator_string_end, ">>")==operator_string_end)
00903 {
00904 operator_string_end+=2;
00905 *operator_string_end='\0';
00906 }
00907 else if(strstr(operator_string_end, ">=")==operator_string_end)
00908 {
00909 operator_string_end+=2;
00910 *operator_string_end='\0';
00911 }
00912 else if(strstr(operator_string_end, "<=")==operator_string_end)
00913 {
00914 operator_string_end+=2;
00915 *operator_string_end='\0';
00916 }
00917 else if(strstr(operator_string_end, "==")==operator_string_end)
00918 {
00919 operator_string_end+=2;
00920 *operator_string_end='\0';
00921 }
00922 else if(strstr(operator_string_end, "!=")==operator_string_end)
00923 {
00924 operator_string_end+=2;
00925 *operator_string_end='\0';
00926 }
00927 else if(strstr(operator_string_end, "|=")==operator_string_end)
00928 {
00929 operator_string_end+=2;
00930 *operator_string_end='\0';
00931 }
00932 else if(strstr(operator_string_end, "&=")==operator_string_end)
00933 {
00934 operator_string_end+=2;
00935 *operator_string_end='\0';
00936 }
00937 else if(strstr(operator_string_end, "^=")==operator_string_end)
00938 {
00939 operator_string_end+=2;
00940 *operator_string_end='\0';
00941 }
00942 else if(strstr(operator_string_end, "%=")==operator_string_end)
00943 {
00944 operator_string_end+=2;
00945 *operator_string_end='\0';
00946 }
00947 else if(strstr(operator_string_end, "/=")==operator_string_end)
00948 {
00949 operator_string_end+=2;
00950 *operator_string_end='\0';
00951 }
00952 else if(strstr(operator_string_end, "*=")==operator_string_end)
00953 {
00954 operator_string_end+=2;
00955 *operator_string_end='\0';
00956 }
00957 else if(strstr(operator_string_end, "-=")==operator_string_end)
00958 {
00959 operator_string_end+=2;
00960 *operator_string_end='\0';
00961 }
00962 else if(strstr(operator_string_end, "+=")==operator_string_end)
00963 {
00964 operator_string_end+=2;
00965 *operator_string_end='\0';
00966 }
00967 else if(strstr(operator_string_end, "&&")==operator_string_end)
00968 {
00969 operator_string_end+=2;
00970 *operator_string_end='\0';
00971 }
00972 else if(strstr(operator_string_end, "||")==operator_string_end)
00973 {
00974 operator_string_end+=2;
00975 *operator_string_end='\0';
00976 }
00977 else if(strstr(operator_string_end, "[]")==operator_string_end)
00978 {
00979 operator_string_end+=2;
00980 *operator_string_end='\0';
00981 }
00982 else if(strstr(operator_string_end, "()")==operator_string_end)
00983 {
00984 operator_string_end+=2;
00985 *operator_string_end='\0';
00986 }
00987 else if(strstr(operator_string_end, "++")==operator_string_end)
00988 {
00989 operator_string_end+=2;
00990 *operator_string_end='\0';
00991 }
00992 else if(strstr(operator_string_end, "--")==operator_string_end)
00993 {
00994 operator_string_end+=2;
00995 *operator_string_end='\0';
00996 }
00997 else if(strstr(operator_string_end, "->")==operator_string_end)
00998 {
00999 operator_string_end+=2;
01000 *operator_string_end='\0';
01001 }
01002 else if(strstr(operator_string_end, "<")==operator_string_end)
01003 {
01004 operator_string_end+=1;
01005 *operator_string_end='\0';
01006 }
01007 else if(strstr(operator_string_end, ">")==operator_string_end)
01008 {
01009 operator_string_end+=1;
01010 *operator_string_end='\0';
01011 }
01012 else if(strstr(operator_string_end, "~")==operator_string_end)
01013 {
01014 operator_string_end+=1;
01015 *operator_string_end='\0';
01016 }
01017 else if(strstr(operator_string_end, "!")==operator_string_end)
01018 {
01019 operator_string_end+=1;
01020 *operator_string_end='\0';
01021 }
01022 else if(strstr(operator_string_end, "+")==operator_string_end)
01023 {
01024 operator_string_end+=1;
01025 *operator_string_end='\0';
01026 }
01027 else if(strstr(operator_string_end, "-")==operator_string_end)
01028 {
01029 operator_string_end+=1;
01030 *operator_string_end='\0';
01031 }
01032 else if(strstr(operator_string_end, "*")==operator_string_end)
01033 {
01034 operator_string_end+=1;
01035 *operator_string_end='\0';
01036 }
01037 else if(strstr(operator_string_end, "/")==operator_string_end)
01038 {
01039 operator_string_end+=1;
01040 *operator_string_end='\0';
01041 }
01042 else if(strstr(operator_string_end, "%")==operator_string_end)
01043 {
01044 operator_string_end+=1;
01045 *operator_string_end='\0';
01046 }
01047 else if(strstr(operator_string_end, "^")==operator_string_end)
01048 {
01049 operator_string_end+=1;
01050 *operator_string_end='\0';
01051 }
01052 else if(strstr(operator_string_end, "&")==operator_string_end)
01053 {
01054 operator_string_end+=1;
01055 *operator_string_end='\0';
01056 }
01057 else if(strstr(operator_string_end, "|")==operator_string_end)
01058 {
01059 operator_string_end+=1;
01060 *operator_string_end='\0';
01061 }
01062 else if(strstr(operator_string_end, ",")==operator_string_end)
01063 {
01064 operator_string_end+=1;
01065 *operator_string_end='\0';
01066 }
01067 else if(strstr(operator_string_end, "=")==operator_string_end)
01068 {
01069 operator_string_end+=1;
01070 *operator_string_end='\0';
01071 }
01072 return operator_string_begin;
01073 }
01074 char *end_of_demangled_name = const_cast<char *>(strrchr(demangled_symbol, ')'));
01075 if(end_of_demangled_name != NULL)
01076 {
01077 int pars = 1;
01078 char c;
01079 while(pars>0 && end_of_demangled_name!=demangled_symbol)
01080 {
01081 c = *(--end_of_demangled_name);
01082 if(c==')')
01083 {
01084 pars++;
01085 }
01086 else if(c=='(')
01087 {
01088 pars--;
01089 }
01090 }
01091 }
01092 else
01093 {
01094 return demangled_symbol;
01095 }
01096 char *end_of_func_name = end_of_demangled_name;
01097 if(end_of_func_name != NULL)
01098 {
01099 *end_of_func_name = '\0';
01100 char c = *(--end_of_func_name);
01101 if(c=='>')
01102 {
01103 int pars = 1;
01104 while(pars>0 && end_of_func_name!=demangled_symbol)
01105 {
01106 c = *(--end_of_func_name);
01107 if(c=='>')
01108 {
01109 pars++;
01110 }
01111 else if(c=='<')
01112 {
01113 pars--;
01114 }
01115 }
01116 *end_of_func_name = '\0';
01117 }
01118 c = *(--end_of_func_name);
01119 while(isalnum(c) || c=='_' || c=='~')
01120 {
01121 c = *(--end_of_func_name);
01122 }
01123 return ++end_of_func_name;
01124 }
01125 return demangled_symbol;
01126 }
01127
01128
01129
01130
01131
01132
01133 void put_S_module(S_module *cur_module, const char *dir)
01134 {
01135 char module_name[MAX_MODULE_NAME_LENGTH];
01136 bzero(module_name, MAX_MODULE_NAME_LENGTH);
01137 strcpy(module_name, (cur_module->get_module_name()).c_str());
01138 char module_filename[MAX_FILENAME_LENGTH];
01139 bzero(module_filename, MAX_FILENAME_LENGTH);
01140 strcpy(module_filename, dir);
01141 strcat(module_filename, "/HTML/");
01142 strcat(module_filename, module_name);
01143 strcat(module_filename, ".html");
01144 char event[MAX_EVENT_NAME_LENGTH];
01145 bzero(event, MAX_EVENT_NAME_LENGTH);
01146 strcpy(event, (cur_module->get_event()).c_str());
01147 std::map<std::string, unsigned int>::iterator result = modules_tot_samples.find(cur_module->get_module_name());
01148 FILE *module_file;
01149 if(result == modules_tot_samples.end())
01150 {
01151 if((!strcmp(event, "UNHALTED_CORE_CYCLES") && !nehalem) || (!strcmp(event, "CPU_CLK_UNHALTED:THREAD_P") && nehalem))
01152 {
01153 modules_tot_samples.insert(std::pair<std::string, unsigned int>(cur_module->get_module_name(), cur_module->get_total_num_samples()));
01154 }
01155 else
01156 {
01157 modules_tot_samples.insert(std::pair<std::string, unsigned int>(cur_module->get_module_name(), 0));
01158 }
01159 module_file = fopen(module_filename, "w");
01160 if(module_file == NULL)
01161 {
01162 fprintf(stderr, "ERROR: Cannot create file %s!!!\naborting...\n", module_filename);
01163 exit(1);
01164 }
01165 fprintf(module_file, "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n");
01166 fprintf(module_file, "<html>\n");
01167 fprintf(module_file, "<head>\n");
01168 fprintf(module_file, "<title>\n");
01169 fprintf(module_file, "%s\n", module_name);
01170 fprintf(module_file, "</title>\n");
01171 fprintf(module_file, "</head>\n");
01172 fprintf(module_file, "<body>\n");
01173 fprintf(module_file, "<h2>%s</h2><br/>Events Sampled:<br/>\n", module_name);
01174 fprintf(module_file, "<ul>\n");
01175 for(std::vector<std::string>::const_iterator it = S_events.begin(); it != S_events.end(); ++it)
01176 {
01177 fprintf(module_file, "<li><a href=\"#%s\">%s</a></li>\n", it->c_str(), it->c_str());
01178 }
01179 fprintf(module_file, "</ul>\n");
01180 }
01181 else
01182 {
01183 if((!strcmp(event, "UNHALTED_CORE_CYCLES") && !nehalem) || (!strcmp(event, "CPU_CLK_UNHALTED:THREAD_P") && nehalem))
01184 {
01185 modules_tot_samples[cur_module->get_module_name()] = cur_module->get_total_num_samples();
01186 }
01187 module_file = fopen(module_filename, "a");
01188 }
01189 char event_str[MAX_EVENT_NAME_LENGTH];
01190 bzero(event_str, MAX_EVENT_NAME_LENGTH);
01191 strcpy(event_str, event);
01192 if(cur_module->get_c_mask()>0)
01193 {
01194 sprintf(event_str, "%s CMASK=%d", event_str, cur_module->get_c_mask());
01195 }
01196 if(cur_module->get_inv_mask()>0)
01197 {
01198 sprintf(event_str, "%s INV=%d", event_str, cur_module->get_inv_mask());
01199 }
01200 fprintf(module_file, "<a name=\"%s\"><a>\n", event_str);
01201 fprintf(module_file, "<table cellpadding=\"5\">\n");
01202 fprintf(module_file, "<tr bgcolor=\"#EEEEEE\">\n");
01203 fprintf(module_file, "<th colspan=\"6\" align=\"left\">%s -- cmask: %u -- invmask: %u -- Total Samples: %u -- Sampling Period: %d</th>\n", event, cur_module->get_c_mask(), cur_module->get_inv_mask(), cur_module->get_total_num_samples(), cur_module->get_smpl_period());
01204 fprintf(module_file, "</tr>\n");
01205 fprintf(module_file, "<tr bgcolor=\"#EEEEEE\">\n");
01206 fprintf(module_file, "<th align=\"left\">Samples</th>\n");
01207 fprintf(module_file, "<th align=\"left\">Percentage</th>\n");
01208 fprintf(module_file, "<th align=\"left\">Symbol Name</th>\n");
01209 fprintf(module_file, "<th align=\"left\">Library Name</th>\n");
01210 fprintf(module_file, "<th align=\"left\">Complete Signature</th>\n");
01211 fprintf(module_file, "<th align=\"left\">Library Pathname</th>\n");
01212 fprintf(module_file, "</tr>\n");
01213 for(int j=0; j<20; j++)
01214 {
01215 char sym[MAX_SYM_LENGTH];
01216 char sym_mod[MAX_SYM_MOD_LENGTH];
01217 char lib[MAX_LIB_LENGTH];
01218 char lib_mod[MAX_LIB_MOD_LENGTH];
01219 char simple_sym[MAX_SIMPLE_SYM_LENGTH];
01220 char simple_sym_mod[MAX_SIMPLE_SYM_MOD_LENGTH];
01221 char simple_lib[MAX_SIMPLE_LIB_LENGTH];
01222 char simple_lib_mod[MAX_SIMPLE_LIB_MOD_LENGTH];
01223
01224 bzero(sym, MAX_SYM_LENGTH);
01225 bzero(sym_mod, MAX_SYM_MOD_LENGTH);
01226 bzero(lib, MAX_LIB_LENGTH);
01227 bzero(lib_mod, MAX_LIB_MOD_LENGTH);
01228 bzero(simple_sym, MAX_SIMPLE_SYM_LENGTH);
01229 bzero(simple_sym_mod, MAX_SIMPLE_SYM_MOD_LENGTH);
01230 bzero(simple_lib, MAX_SIMPLE_LIB_LENGTH);
01231 bzero(simple_lib_mod, MAX_SIMPLE_LIB_MOD_LENGTH);
01232
01233 char index[MAX_SAMPLE_INDEX_LENGTH];
01234 bzero(index, MAX_SAMPLE_INDEX_LENGTH);
01235 unsigned int value;
01236 bool res = cur_module->get_max(index, &value);
01237 if(!res) break;
01238 char *sym_end = strchr(index, '%');
01239 if(sym_end==NULL)
01240 {
01241 fprintf(stderr, "ERROR: Invalid sym and lib name! : %s\naborting...\n", index);
01242 exit(1);
01243 }
01244 strncpy(sym, index, strlen(index)-strlen(sym_end));
01245 strcpy(lib, sym_end+1);
01246 char temp[MAX_SYM_LENGTH];
01247 bzero(temp, MAX_SYM_LENGTH);
01248 strcpy(temp, sym);
01249 strcpy(simple_sym, (func_name(temp)));
01250 if(strrchr(lib, '/')!=NULL && *(strrchr(lib, '/')+1)!='\0')
01251 {
01252 strcpy(simple_lib, strrchr(lib, '/')+1);
01253 }
01254 else
01255 {
01256 strcpy(simple_lib, lib);
01257 }
01258 if(j%2!=0)
01259 {
01260 fprintf(module_file, "<tr bgcolor=\"#FFFFCC\">\n");
01261 }
01262 else
01263 {
01264 fprintf(module_file, "<tr bgcolor=\"#CCFFCC\">\n");
01265 }
01266 fprintf(module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%u</td>\n", value);
01267 fprintf(module_file, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\">%f%%</td>\n", (((double)(value))/((double)(cur_module->get_total_num_samples())))*100);
01268 html_special_chars(simple_sym, simple_sym_mod);
01269 html_special_chars(simple_lib, simple_lib_mod);
01270 html_special_chars(sym, sym_mod);
01271 html_special_chars(lib, lib_mod);
01272 fprintf(module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_sym_mod);
01273 fprintf(module_file, "<td style=\"font-family:courier;\">%s</td>\n", simple_lib_mod);
01274 fprintf(module_file, "<td style=\"font-family:courier;\">%s</td>\n", sym_mod);
01275 fprintf(module_file, "<td style=\"font-family:courier;\">%s</td>\n</tr>\n", lib_mod);
01276 }
01277 fprintf(module_file, "</table><br/><br/>\n");
01278 int res = fclose(module_file);
01279 if(res)
01280 {
01281 fprintf(stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename);
01282 exit(1);
01283 }
01284 return;
01285 }
01286
01287
01288
01289
01290
01291
01292
01293
01294 int read_S_file(const char *dir, const char *filename)
01295 {
01296 char line[MAX_LINE_LENGTH];
01297 char event[MAX_EVENT_NAME_LENGTH];
01298 char arch[MAX_ARCH_NAME_LENGTH];
01299 unsigned int cmask;
01300 unsigned int inv;
01301 unsigned int sp;
01302 char cur_module_name[MAX_MODULE_NAME_LENGTH];
01303 bzero(line, MAX_LINE_LENGTH);
01304 bzero(event, MAX_EVENT_NAME_LENGTH);
01305 bzero(cur_module_name, MAX_MODULE_NAME_LENGTH);
01306 bzero(arch, MAX_ARCH_NAME_LENGTH);
01307
01308 S_module *cur_module = new S_module();
01309 unsigned int module_num = 0;
01310
01311 char path_name[MAX_FILENAME_LENGTH];
01312 bzero(path_name, MAX_FILENAME_LENGTH);
01313 strcpy(path_name, dir);
01314 strcat(path_name, "/");
01315 strcat(path_name, filename);
01316 gzFile res_file = gzopen(path_name, "rb");
01317
01318 if(res_file != NULL)
01319 {
01320 bzero(line, MAX_LINE_LENGTH);
01321 gzgets(res_file, line, MAX_LINE_LENGTH);
01322 if(line[strlen(line)-1]=='\n') line[strlen(line)-1]='\0';
01323 bzero(event, MAX_EVENT_NAME_LENGTH);
01324 sscanf(line, "%s %s %u %u %u", arch, event, &cmask, &inv, &sp);
01325 if(!strcmp(arch, "NHM")) nehalem = true; else nehalem = false;
01326 bzero(line, MAX_LINE_LENGTH);
01327 while(gzgets(res_file, line, MAX_LINE_LENGTH)!=Z_NULL)
01328 {
01329 if(line[strlen(line)-1]=='\n') line[strlen(line)-1]='\0';
01330 if(strchr(line, ' ')==NULL)
01331 {
01332 if(module_num>0)
01333 {
01334 put_S_module(cur_module, dir);
01335 cur_module->clear();
01336 }
01337 module_num++;
01338 char *end_sym = strchr(line, '%');
01339 if(end_sym == NULL)
01340 {
01341 fprintf(stderr, "ERROR: Invalid module name. \nLINE: %s\naborting...\n", line);
01342 exit(1);
01343 }
01344 bzero(cur_module_name, MAX_MODULE_NAME_LENGTH);
01345 strncpy(cur_module_name, line, strlen(line)-strlen(end_sym));
01346 cur_module->init(cur_module_name, arch, event, cmask, inv, sp);
01347 cur_module->set_total(atoi(end_sym+1));
01348 }
01349 else
01350 {
01351 unsigned int value=0, libOffset=0;
01352 char symbol[MAX_SYM_LENGTH];
01353 char libName[MAX_LIB_LENGTH];
01354 char final_sym[MAX_SYM_MOD_LENGTH];
01355 char final_lib[MAX_LIB_MOD_LENGTH];
01356 bzero(symbol, MAX_SYM_LENGTH);
01357 bzero(libName, MAX_LIB_LENGTH);
01358 bzero(final_sym, MAX_SYM_MOD_LENGTH);
01359 bzero(final_lib, MAX_LIB_MOD_LENGTH);
01360
01361 sscanf(line, "%s %s %u %u", symbol, libName, &libOffset, &value);
01362 char realPathName_s[FILENAME_MAX];
01363 bzero(realPathName_s, FILENAME_MAX);
01364 char *realPathName = realpath(libName, realPathName_s);
01365 if(realPathName!=NULL && strlen(realPathName)>0)
01366 {
01367 std::map<std::string, FileInfo>::iterator result;
01368 result = libsInfo.find(realPathName);
01369 if(result == libsInfo.end())
01370 {
01371 libsInfo[realPathName] = FileInfo(realPathName, true);
01372 }
01373 const char *temp_sym = libsInfo[realPathName].symbolByOffset(libOffset);
01374 if(temp_sym!=NULL && strlen(temp_sym)>0)
01375 {
01376 int status;
01377 char *demangled_symbol = abi::__cxa_demangle(temp_sym, NULL, NULL, &status);
01378 if(status == 0)
01379 {
01380 strcpy(final_sym, demangled_symbol);
01381 free(demangled_symbol);
01382 }
01383 else
01384 {
01385 strcpy(final_sym, temp_sym);
01386 }
01387 }
01388 else
01389 {
01390 strcpy(final_sym, "???");
01391 }
01392 strcpy(final_lib, realPathName);
01393 }
01394 else
01395 {
01396 strcpy(final_sym, symbol);
01397 strcpy(final_lib, libName);
01398 }
01399 char index[MAX_LINE_LENGTH];
01400 bzero(index, MAX_LINE_LENGTH);
01401 strcpy(index, final_sym);
01402 strcat(index, "%");
01403 strcat(index, final_lib);
01404 cur_module->add_sample(index, value);
01405 }
01406 bzero(line, MAX_LINE_LENGTH);
01407 }
01408 put_S_module(cur_module, dir);
01409 cur_module->clear();
01410 gzclose(res_file);
01411 }
01412 else
01413 {
01414 fprintf(stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename);
01415 exit(1);
01416 }
01417 delete cur_module;
01418 return 0;
01419 }
01420
01421 int read_S_events(const char *dir, const char *filename)
01422 {
01423 char event[MAX_EVENT_NAME_LENGTH];
01424 char arch[MAX_ARCH_NAME_LENGTH];
01425 char line[MAX_LINE_LENGTH];
01426 char cmask_str[MAX_CMASK_STR_LENGTH];
01427 char inv_str[MAX_INV_STR_LENGTH];
01428 char sp_str[MAX_SP_STR_LENGTH];
01429 bzero(line, MAX_LINE_LENGTH);
01430 bzero(event, MAX_EVENT_NAME_LENGTH);
01431 bzero(arch, MAX_ARCH_NAME_LENGTH);
01432 bzero(cmask_str, MAX_CMASK_STR_LENGTH);
01433 bzero(inv_str, MAX_INV_STR_LENGTH);
01434 bzero(sp_str, MAX_SP_STR_LENGTH);
01435 char path_name[MAX_FILENAME_LENGTH];
01436 bzero(path_name, MAX_FILENAME_LENGTH);
01437 strcpy(path_name, dir);
01438 strcat(path_name, "/");
01439 strcat(path_name, filename);
01440 gzFile res_file = gzopen(path_name, "rb");
01441 if(res_file != NULL)
01442 {
01443 bzero(line, MAX_LINE_LENGTH);
01444 gzgets(res_file, line, MAX_LINE_LENGTH);
01445 if(line[strlen(line)-1]=='\n') line[strlen(line)-1]='\0';
01446 bzero(event, MAX_EVENT_NAME_LENGTH);
01447 sscanf(line, "%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str);
01448 std::string event_str(event);
01449 if(atoi(cmask_str)>0)
01450 {
01451 event_str += " CMASK=";
01452 event_str += cmask_str;
01453 }
01454 if(atoi(inv_str)>0)
01455 {
01456 event_str += " INV=";
01457 event_str += inv_str;
01458 }
01459 S_events.push_back(event_str);
01460 }
01461 else
01462 {
01463 fprintf(stderr, "ERROR: Unable to open input file: %s\naborting...\n", filename);
01464 exit(1);
01465 }
01466 return 0;
01467 }
01468
01469
01470
01471
01472 int finalize_S_html_pages(const char *dir)
01473 {
01474 for(std::map<std::string, unsigned int>::const_iterator i = modules_tot_samples.begin(); i != modules_tot_samples.end(); i++)
01475 {
01476 char module_filename[MAX_FILENAME_LENGTH];
01477 strcpy(module_filename, dir);
01478 strcat(module_filename, "/HTML/");
01479 strcat(module_filename, (i->first).c_str());
01480 strcat(module_filename, ".html");
01481 FILE *module_file = fopen(module_filename, "a");
01482 if(module_file == NULL)
01483 {
01484 fprintf(stderr, "ERROR: Unable to append to file: %s\naborting...\n", module_filename);
01485 exit(1);
01486 }
01487 fprintf(module_file, "</body>\n</html>\n");
01488 if(fclose(module_file))
01489 {
01490 fprintf(stderr, "ERROR: Cannot close file %s!!!\naborting...\n", module_filename);
01491 exit(1);
01492 }
01493 }
01494 return 0;
01495 }
01496
01497
01498
01499
01500
01501 int read_C_file(const char *dir, const char *filename)
01502 {
01503 char event[MAX_EVENT_NAME_LENGTH];
01504 char arch[MAX_ARCH_NAME_LENGTH];
01505 char line[MAX_LINE_LENGTH];
01506 char cmask_str[MAX_CMASK_STR_LENGTH];
01507 char inv_str[MAX_INV_STR_LENGTH];
01508 char sp_str[MAX_SP_STR_LENGTH];
01509 char cur_module_name[MAX_MODULE_NAME_LENGTH];
01510 bzero(line, MAX_LINE_LENGTH);
01511 bzero(event, MAX_EVENT_NAME_LENGTH);
01512 bzero(cur_module_name, MAX_MODULE_NAME_LENGTH);
01513 bzero(arch, MAX_ARCH_NAME_LENGTH);
01514 bzero(line, MAX_LINE_LENGTH);
01515 bzero(cmask_str, MAX_CMASK_STR_LENGTH);
01516 bzero(inv_str, MAX_INV_STR_LENGTH);
01517 bzero(sp_str, MAX_SP_STR_LENGTH);
01518 int number_of_modules = 0;
01519 long cur_sum = 0;
01520 int no_of_values = 0;
01521 char path_name[MAX_FILENAME_LENGTH];
01522 bzero(path_name, MAX_FILENAME_LENGTH);
01523 strcpy(path_name, dir);
01524 strcat(path_name, "/");
01525 strcat(path_name, filename);
01526 FILE *fp = fopen(path_name, "r");
01527 fscanf(fp, "%s %s %s %s %s\n", arch, event, cmask_str, inv_str, sp_str);
01528 if(!strcmp(arch, "NHM")) nehalem = true; else nehalem = false;
01529 std::string event_str(event);
01530 if(atoi(cmask_str)>0)
01531 {
01532 event_str += " CMASK=";
01533 event_str += cmask_str;
01534 }
01535 if(atoi(inv_str)>0)
01536 {
01537 event_str += " INV=";
01538 event_str += inv_str;
01539 }
01540 C_events.push_back(event_str);
01541 while(fscanf(fp, "%s\n", line)!=EOF)
01542 {
01543 if(isalpha(line[0]))
01544 {
01545 if(number_of_modules>0)
01546 {
01547 C_modules[cur_module_name][event_str]=(double)cur_sum/no_of_values;
01548 cur_sum = 0;
01549 no_of_values = 0;
01550 }
01551 strcpy(cur_module_name, line);
01552 number_of_modules++;
01553 }
01554 else if(isdigit(line[0]))
01555 {
01556 cur_sum += strtol(line, NULL, 10);
01557 no_of_values++;
01558 }
01559 }
01560 C_modules[cur_module_name][event_str]=(double)cur_sum/no_of_values;
01561 fclose(fp);
01562 return number_of_modules;
01563 }
01564
01565 void put_C_header(FILE *fp, std::vector<std::string> &columns)
01566 {
01567 fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n");
01568 fprintf(fp, "<html>\n");
01569 fprintf(fp, "<head>\n");
01570 fprintf(fp, "<title>\n");
01571 fprintf(fp, "Analysis Result\n");
01572 fprintf(fp, "</title>\n");
01573 fprintf(fp, "<script src=\"sorttable.js\"></script>\n");
01574 fprintf(fp, "<style>\ntable.sortable thead {\nbackground-color:#eee;\ncolor:#666666;\nfont-weight:bold;\ncursor:default;\nfont-family:courier;\n}\n</style>\n");
01575 fprintf(fp, "</head>\n");
01576 fprintf(fp, "<body link=\"black\">\n");
01577 fprintf(fp, "<h1>RESULTS:</h1><br/>Click for detailed symbol view...<p/>\n");
01578 fprintf(fp, "<table class=\"sortable\" cellpadding=\"5\">\n");
01579 fprintf(fp, "<tr>\n");
01580 fprintf(fp, "<th>MODULE NAME</th>\n");
01581 for(std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it)
01582 {
01583 if(strlen(it->c_str())==0) fprintf(fp, "<th bgcolor=\"#FFFFFF\"> </th>\n");
01584 else fprintf(fp, "<th>%s</th>\n", (*it).c_str());
01585 }
01586 fprintf(fp, "</tr>\n");
01587 return;
01588 }
01589
01590 void put_C_modules(FILE *fp, std::vector<std::string> &columns)
01591 {
01592 int index = 0;
01593 for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
01594 {
01595 if(index%2) fprintf(fp, "<tr bgcolor=\"#FFFFCC\">\n");
01596 else fprintf(fp, "<tr bgcolor=\"#CCFFCC\">\n");
01597 fprintf(fp, "<td style=\"font-family:monospace;font-size:large;color:Black\"><a href=\"%s.html\">%s</a></td>\n", (it->first).c_str(), (it->first).c_str());
01598 for(std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt)
01599 {
01600 if(strlen(jt->c_str())==0)
01601 {
01602 fprintf(fp, "<td bgcolor=\"#FFFFFF\"> </td>");
01603 }
01604 else
01605 {
01606 if((it->second).find(*jt) == (it->second).end())
01607 {
01608 fprintf(stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", (*jt).c_str());
01609 exit(1);
01610 }
01611 fprintf(fp, "<td style=\"font-family:monospace;font-size:large;color:DarkBlue\" align=\"right\">%.2f</td>\n", (it->second)[*jt]);
01612 }
01613 }
01614 fprintf(fp, "</tr>\n");
01615 index++;
01616 }
01617 }
01618
01619 void put_C_footer(FILE *fp)
01620 {
01621 fprintf(fp, "</table>\n</body>\n</html>\n");
01622 return;
01623 }
01624
01625 void put_C_header_csv(FILE *fp, std::vector<std::string> &columns)
01626 {
01627 fprintf(fp, "MODULE NAME");
01628 for(std::vector<std::string>::const_iterator it = columns.begin(); it != columns.end(); ++it)
01629 {
01630 if(strlen(it->c_str())==0) {}
01631 else fprintf(fp, ",%s", (*it).c_str());
01632 }
01633 fprintf(fp, "\n");
01634 return;
01635 }
01636
01637 void put_C_modules_csv(FILE *fp, std::vector<std::string> &columns)
01638 {
01639 for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
01640 {
01641 fprintf(fp, "%s", (it->first).c_str()) ;
01642 for(std::vector<std::string>::const_iterator jt = columns.begin(); jt != columns.end(); ++jt)
01643 {
01644 if(strlen(jt->c_str())==0) {}
01645 else
01646 {
01647 if((it->second).find(*jt) == (it->second).end())
01648 {
01649 fprintf(stderr, "ERROR: Cannot find derivate value \"%s\"!!!\naborting...\n", (*jt).c_str());
01650 exit(1);
01651 }
01652 fprintf(fp, ",%.2f", (it->second)[*jt]);
01653 }
01654 }
01655 fprintf(fp, "\n");
01656 }
01657 }
01658
01659
01660
01661
01662
01663
01664
01665
01666 double normalize(std::string field, double value, double normalizeTo)
01667 {
01668 double max = 0;
01669 double counter_value;
01670 for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
01671 {
01672 counter_value = (it->second)[field];
01673 if(max < counter_value) max = counter_value;
01674 }
01675 if(value>0 && max>0 && normalizeTo>0)
01676 {
01677 return 1.*value/max*normalizeTo;
01678 }
01679 else return 0;
01680 }
01681
01682
01683
01684
01685
01686
01687 void calc_post_deriv_values()
01688 {
01689 if(nehalem)
01690 {
01691 for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
01692 {
01693 double simdnorm = 1. - normalize("Packed % of all UOPS Retired", (it->second)["Packed % of all UOPS Retired"], 1);
01694 double misspnorm = normalize("% of Mispredicted Branches", (it->second)["% of Mispredicted Branches"], 1);
01695 double stallnorm = normalize("Stalled Cycles", (it->second)["Stalled Cycles"], 1);
01696 (it->second)["iFactor"] = stallnorm*(simdnorm + misspnorm + stallnorm);
01697 }
01698 }
01699 else
01700 {
01701 for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
01702 {
01703 double simdnorm = 1. - normalize("Packed SIMD % of all Instructions", (it->second)["Packed SIMD % of all Instructions"], 1);
01704 double misspnorm = normalize("% of Mispredicted Branches", (it->second)["% of Mispredicted Branches"], 1);
01705 double stallnorm = normalize("Stalled Cycles", (it->second)["Stalled Cycles"], 1);
01706 (it->second)["iFactor"] = stallnorm*(simdnorm + misspnorm + stallnorm);
01707 }
01708 }
01709 }
01710
01711
01712
01713
01714
01715 double getTotalCycles()
01716 {
01717 double sum=0;
01718 if(nehalem)
01719 {
01720 for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
01721 {
01722 sum += (it->second)["CPU_CLK_UNHALTED:THREAD_P"];
01723 }
01724 }
01725 else
01726 {
01727 for(std::map<std::string, std::map<std::string, double> >::iterator it = C_modules.begin(); it != C_modules.end(); ++it)
01728 {
01729 sum += (it->second)["UNHALTED_CORE_CYCLES"];
01730 }
01731 }
01732 return sum;
01733 }
01734
01735
01736
01737
01738 int main(int argc, char *argv[])
01739 {
01740 if(argc<2 || argc>4)
01741 {
01742 printf("\n\nUsage: %s DIRECTORY [--caa] [--csv]\n\n", argv[0]);
01743 exit(1);
01744 }
01745
01746 bool caa = false;
01747 bool csv = false;
01748 for(int i=2; i<argc; i++)
01749 {
01750 if(!strcmp(argv[i], "--caa")) caa = true;
01751 if(!strcmp(argv[i], "--csv")) csv = true;
01752 }
01753
01754 char dir[MAX_FILENAME_LENGTH];
01755 strcpy(dir, argv[1]);
01756 if(!csv)
01757 {
01758 strcat(dir, "/HTML");
01759 int res = mkdir(dir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
01760 if(res!=0)
01761 {
01762 fprintf(stderr, "ERROR: Cannot create directory %s\naborting...\n", dir);
01763 exit(1);
01764 }
01765 }
01766
01767 DIR *dp;
01768 struct dirent *dirp;
01769 int num_of_modules = 0;
01770 if((dp = opendir(argv[1]))==NULL)
01771 {
01772 printf("Error(%d) opening %s\n", errno, argv[1]);
01773 return errno;
01774 }
01775 while((dirp = readdir(dp))!=NULL)
01776 {
01777 if(strstr(dirp->d_name, "_S_")!=NULL && strstr(dirp->d_name, ".txt.gz")!=NULL && !csv)
01778 {
01779 if(read_S_events(argv[1], dirp->d_name))
01780 {
01781 fprintf(stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name);
01782 exit(1);
01783 }
01784 }
01785 }
01786 closedir(dp);
01787 sort(S_events.begin(), S_events.end());
01788 if((dp = opendir(argv[1]))==NULL)
01789 {
01790 printf("Error(%d) opening %s\n", errno, argv[1]);
01791 return errno;
01792 }
01793 while((dirp = readdir(dp))!=NULL)
01794 {
01795 if(strstr(dirp->d_name, "_S_")!=NULL && strstr(dirp->d_name, ".txt.gz")!=NULL && !csv)
01796 {
01797 if(read_S_file(argv[1], dirp->d_name))
01798 {
01799 fprintf(stderr, "ERROR: Cannot read file %s\naborting...\n", dirp->d_name);
01800 exit(1);
01801 }
01802 }
01803 else if(strstr(dirp->d_name, "_C_")!=NULL && strstr(dirp->d_name, ".txt")!=NULL)
01804 {
01805 int res = read_C_file(argv[1], dirp->d_name);
01806 if(res>num_of_modules)
01807 {
01808 num_of_modules = res;
01809 }
01810 }
01811 }
01812 closedir(dp);
01813
01814 if(!csv)
01815 {
01816 if(finalize_S_html_pages(argv[1]))
01817 {
01818 fprintf(stderr, "ERROR: Cannot finalize HTML pages!!!\naborting...\n");
01819 exit(1);
01820 }
01821 }
01822
01823 char filepath[MAX_FILENAME_LENGTH];
01824 bzero(filepath, MAX_FILENAME_LENGTH);
01825 if(!csv) sprintf(filepath, "%s/HTML/index.html", argv[1]);
01826 else sprintf(filepath, "%s/results.csv", argv[1]);
01827 FILE *fp = fopen(filepath, "w");
01828 if(fp == NULL)
01829 {
01830 fprintf(stderr, "ERROR: Cannot create file index.html!!!\naborting...\n");
01831 exit(1);
01832 }
01833
01834 if(caa)
01835 {
01836 double totalCycles;
01837 if(!nehalem)
01838 {
01839 init_core_caa_events();
01840 if(!check_for_core_caa_events())
01841 {
01842 fprintf(stderr, "(core) ERROR: One or more events for CAA missing!\naborting...\n");
01843 exit(1);
01844 }
01845 init_core_caa_events_displ();
01846 totalCycles = getTotalCycles();
01847 calc_core_deriv_values(totalCycles);
01848 calc_post_deriv_values();
01849 if(!csv)
01850 {
01851 put_C_header(fp, core_caa_events_displ);
01852 put_C_modules(fp, core_caa_events_displ);
01853 }
01854 else
01855 {
01856 put_C_header_csv(fp, core_caa_events_displ);
01857 put_C_modules_csv(fp, core_caa_events_displ);
01858 }
01859 }
01860 else
01861 {
01862 init_nhm_caa_events();
01863 if(!check_for_nhm_caa_events())
01864 {
01865 fprintf(stderr, "(nehalem) ERROR: One or more events for CAA missing!\naborting...\n");
01866 exit(1);
01867 }
01868 init_nhm_caa_events_displ();
01869 totalCycles = getTotalCycles();
01870 calc_nhm_deriv_values(totalCycles);
01871 calc_post_deriv_values();
01872 if(!csv)
01873 {
01874 put_C_header(fp, nhm_caa_events_displ);
01875 put_C_modules(fp, nhm_caa_events_displ);
01876 }
01877 else
01878 {
01879 put_C_header_csv(fp, nhm_caa_events_displ);
01880 put_C_modules_csv(fp, nhm_caa_events_displ);
01881 }
01882 }
01883 if(!csv) put_C_footer(fp);
01884 fclose(fp);
01885 }
01886 else
01887 {
01888 if(!csv)
01889 {
01890 put_C_header(fp, C_events);
01891 put_C_modules(fp, C_events);
01892 put_C_footer(fp);
01893 }
01894 else
01895 {
01896 put_C_header_csv(fp, C_events);
01897 put_C_modules_csv(fp, C_events);
01898 }
01899 fclose(fp);
01900 }
01901 if(!csv)
01902 {
01903 char src[MAX_FILENAME_LENGTH];
01904 char dst[MAX_FILENAME_LENGTH];
01905 sprintf(src, "sorttable.js");
01906 sprintf(dst, "%s/HTML/sorttable.js", argv[1]);
01907 int fd_src = open(src, O_RDONLY);
01908 if(fd_src == -1)
01909 {
01910 fprintf(stderr, "ERROR: Cannot open file \"%s\"!\naborting...\n", src);
01911 exit(1);
01912 }
01913 int fd_dst = open(dst, O_WRONLY|O_CREAT|O_TRUNC, 0644);
01914 if(fd_dst == -1)
01915 {
01916 fprintf(stderr, "ERROR: Cannot open file \"%s\" (%s)!\naborting...\n", dst, strerror(errno));
01917 exit(1);
01918 }
01919 char c;
01920 while(read(fd_src, &c, 1))
01921 {
01922 write(fd_dst, &c, 1);
01923 }
01924 close(fd_dst);
01925 close(fd_src);
01926 }
01927 return 0;
01928 }