00001
00002
00003
00004
00005
00006 #include "GaudiKernel/Auditor.h"
00007 #include "GaudiKernel/IToolSvc.h"
00008 #include "GaudiKernel/IIncidentListener.h"
00009 #include "GaudiKernel/IIncidentSvc.h"
00010 #include "GaudiKernel/IToolSvc.h"
00011 #include "GaudiKernel/VectorMap.h"
00012 #include "GaudiKernel/HashMap.h"
00013 #include "GaudiKernel/AudFactory.h"
00014 #include "GaudiKernel/MsgStream.h"
00015
00016
00017 #include <iostream>
00018 #include <string>
00019 #include <cstring>
00020 #include <fstream>
00021 #include <perfmon/pfmlib.h>
00022 #include <perfmon/pfmlib_core.h>
00023 #include <perfmon/pfmlib_intel_nhm.h>
00024 #include <vector>
00025 #include <map>
00026 #include <utility>
00027 #include <sstream>
00028
00029 #include <perfmon/perfmon.h>
00030 #include <perfmon/perfmon_dfl_smpl.h>
00031
00033
00035
00036
00037
00038 #include <sys/types.h>
00039 #include <stdio.h>
00040 #include <stdlib.h>
00041 #include <stdarg.h>
00042 #include <errno.h>
00043 #include <unistd.h>
00044 #include <string.h>
00045 #include <signal.h>
00046 #include <stdarg.h>
00047 #include <stdint.h>
00048 #include <getopt.h>
00049 #include <time.h>
00050 #include <sys/ptrace.h>
00051 #include <sys/wait.h>
00052 #include <sys/mman.h>
00053 #include <sys/time.h>
00054 #include <sys/resource.h>
00055 #include <unistd.h>
00056 #include <fcntl.h>
00057 #include <zlib.h>
00058
00059 #include <algorithm>
00060 #include <list>
00061 #include <stack>
00062 #include <cmath>
00063 #include <sys/stat.h>
00064 #include "IgHook_IgHookTrace.h"
00065
00066
00067 #include <dlfcn.h>
00068
00069
00070 #define MAX_EVT_NAME_LEN 256
00071 #define NUM_PMCS PFMLIB_MAX_PMCS
00072 #define NUM_PMDS PFMLIB_MAX_PMDS
00073 #define FMT_NAME PFM_DFL_SMPL_NAME
00074 #define BPL (sizeof(uint64_t)<<3)
00075 #define LBPL 6
00076
00077 #define SYM_NAME_MAX_LENGTH 10000
00078 #define MAX_OUTPUT_FILENAME_LENGTH 1024
00079 #define MAX_EVENT_NAME_LENGTH 500
00080 #define MAX_PREFIX_NAME_LENGTH 1024
00081 #define FILENAME_MAX_LENGTH 1024
00082
00083 #define MAX_NUMBER_OF_PROGRAMMABLE_COUNTERS 4
00084
00085 #define cpuid(func,ax,bx,cx,dx) __asm__ __volatile__ ("cpuid": "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func));
00086
00087
00088 static pfarg_pmd_t pd_smpl[NUM_PMDS];
00089 static uint64_t collected_samples, collected_partial;
00090 static int ctx_fd;
00091 static pfm_dfl_smpl_hdr_t *hdr;
00092 static uint64_t ovfl_count;
00093 static size_t entry_size;
00094 static unsigned int num_smpl_pmds;
00095 static std::vector<std::map<std::string, std::map<unsigned long, unsigned int> > > samples(MAX_NUMBER_OF_PROGRAMMABLE_COUNTERS);
00096 static std::vector<std::map<std::string, std::vector<unsigned long int> > > results(MAX_NUMBER_OF_PROGRAMMABLE_COUNTERS);
00097 static uint64_t last_overflow;
00098 static uint64_t last_count;
00099 static int sp[MAX_NUMBER_OF_PROGRAMMABLE_COUNTERS];
00100
00101 static std::stack<std::pair<INamedInterface *, std::vector<unsigned long int> > > alg_stack;
00102
00103
00104
00105
00106 namespace {
00108 template <typename T>
00109 inline T function_cast(void *p) {
00110 union {
00111 void* object;
00112 T function;
00113 } caster;
00114 caster.object = p;
00115 return caster.function;
00116 }
00117 class PFMon {
00118 public:
00119 bool loaded;
00120 typedef void (*pfm_stop_t)(int);
00121 pfm_stop_t pfm_stop;
00122 typedef void (*pfm_self_stop_t)(int);
00123 pfm_self_stop_t pfm_self_stop;
00124 typedef os_err_t (*pfm_restart_t)(int);
00125 pfm_restart_t pfm_restart;
00126 typedef int (*pfm_read_pmds_t)(int, pfarg_pmd_t*, int);
00127 pfm_read_pmds_t pfm_read_pmds;
00128 typedef pfm_err_t (*pfm_initialize_t)();
00129 pfm_initialize_t pfm_initialize;
00130 typedef pfm_err_t (*pfm_find_full_event_t)(const char *, pfmlib_event_t *);
00131 pfm_find_full_event_t pfm_find_full_event;
00132 typedef pfm_err_t (*pfm_dispatch_events_t)(pfmlib_input_param_t *, void *, pfmlib_output_param_t *, void *);
00133 pfm_dispatch_events_t pfm_dispatch_events;
00134 typedef os_err_t (*pfm_create_context_t)(pfarg_ctx_t *, char *, void *, size_t);
00135 pfm_create_context_t pfm_create_context;
00136 typedef os_err_t (*pfm_write_pmcs_t)(int, pfarg_pmc_t *, int);
00137 pfm_write_pmcs_t pfm_write_pmcs;
00138 typedef os_err_t (*pfm_write_pmds_t)(int, pfarg_pmd_t *, int);
00139 pfm_write_pmds_t pfm_write_pmds;
00140 typedef os_err_t (*pfm_load_context_t)(int, pfarg_load_t *);
00141 pfm_load_context_t pfm_load_context;
00142 typedef os_err_t (*pfm_start_t)(int fd, pfarg_start_t *);
00143 pfm_start_t pfm_start;
00144 typedef char* (*pfm_strerror_t)(int);
00145 pfm_strerror_t pfm_strerror;
00146 typedef pfm_err_t (*pfm_set_options_t)(pfmlib_options_t *);
00147 pfm_set_options_t pfm_set_options;
00148 typedef pfm_err_t (*pfm_get_num_counters_t)(unsigned int *);
00149 pfm_get_num_counters_t pfm_get_num_counters;
00150 static PFMon &instance() {
00151 return s_instance;
00152 }
00153 private:
00154
00155
00156 void* handle;
00157
00158 PFMon() {
00159 handle = dlopen("libpfm.so", RTLD_NOW);
00160 if (handle) { loaded = true; } else { loaded = false; }
00161 if (loaded) {
00162 pfm_start = function_cast<pfm_start_t>(dlsym(handle, "pfm_start"));
00163 pfm_stop = function_cast<pfm_stop_t>(dlsym(handle, "pfm_stop"));
00164 pfm_self_stop = function_cast<pfm_self_stop_t>(dlsym(handle, "pfm_stop"));
00165 pfm_restart = function_cast<pfm_restart_t>(dlsym(handle, "pfm_restart"));
00166 pfm_read_pmds = function_cast<pfm_read_pmds_t>(dlsym(handle, "pfm_read_pmds"));
00167 pfm_initialize = function_cast<pfm_initialize_t>(dlsym(handle, "pfm_initialize"));
00168 pfm_find_full_event = function_cast<pfm_find_full_event_t>(dlsym(handle, "pfm_find_full_event"));
00169 pfm_dispatch_events = function_cast<pfm_dispatch_events_t>(dlsym(handle, "pfm_dispatch_events"));
00170 pfm_create_context = function_cast<pfm_create_context_t>(dlsym(handle, "pfm_create_context"));
00171 pfm_write_pmcs = function_cast<pfm_write_pmcs_t>(dlsym(handle, "pfm_write_pmcs"));
00172 pfm_write_pmds = function_cast<pfm_write_pmds_t>(dlsym(handle, "pfm_write_pmds"));
00173 pfm_load_context = function_cast<pfm_load_context_t>(dlsym(handle, "pfm_load_context"));
00174 pfm_strerror = function_cast<pfm_strerror_t>(dlsym(handle, "pfm_strerror"));
00175 pfm_set_options = function_cast<pfm_set_options_t>(dlsym(handle, "pfm_set_options"));
00176 pfm_get_num_counters = function_cast<pfm_get_num_counters_t>(dlsym(handle, "pfm_get_num_counters"));
00177 } else {
00178
00179 }
00180 }
00181 ~PFMon() {
00182 if (handle) dlclose(handle);
00183 }
00184
00185 static PFMon s_instance;
00186 };
00187
00188 PFMon PFMon::s_instance;
00189 }
00190
00191
00192
00193
00194
00195
00196
00206 class PerfMonAuditor: virtual public Auditor
00207 {
00208 public:
00209 virtual void before(StandardEventType evt, INamedInterface* alg);
00210 virtual void after(StandardEventType evt, INamedInterface* alg, const StatusCode &sc);
00211 using Auditor::before;
00212 using Auditor::after;
00213
00214 private:
00215 void i_beforeInitialize(INamedInterface* alg);
00216 void i_afterInitialize(INamedInterface* alg);
00217 void i_beforeExecute(INamedInterface* alg);
00218 void i_afterExecute(INamedInterface* alg);
00219
00220 public:
00221 virtual StatusCode initialize();
00222 virtual StatusCode finalize();
00223 int is_nehalem() {
00224 #ifdef __ICC
00225
00226 #pragma warning(push)
00227 #pragma warning(disable:593)
00228 #endif
00229 int a,b,c,d;
00230 cpuid(1,a,b,c,d);
00231 int sse4_2_mask = 1 << 20;
00232 if(c & sse4_2_mask) return 1; else return 0;
00233 #ifdef __ICC
00234 #pragma warning(pop)
00235 #endif
00236 }
00237
00238 private:
00239 PFMon &m_pfm;
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277 public:
00278 PerfMonAuditor(const std::string& name, ISvcLocator* pSvc):
00279 Auditor(name, pSvc),
00280 m_pfm(PFMon::instance()),
00281 m_map(),
00282 m_indent(0),
00283 m_inEvent(false)
00284 {
00285 is_nehalem_ret = is_nehalem();
00286 declareProperty("EVENT0", event_str[0]);
00287 declareProperty("EVENT1", event_str[1]);
00288 declareProperty("EVENT2", event_str[2]);
00289 declareProperty("EVENT3", event_str[3]);
00290 declareProperty("FAMILY", family);
00291 declareProperty("PREFIX", prefix);
00292 declareProperty("INV0", inv[0]);
00293 declareProperty("INV1", inv[1]);
00294 declareProperty("INV2", inv[2]);
00295 declareProperty("INV3", inv[3]);
00296 declareProperty("CMASK0", cmask[0]);
00297 declareProperty("CMASK1", cmask[1]);
00298 declareProperty("CMASK2", cmask[2]);
00299 declareProperty("CMASK3", cmask[3]);
00300 declareProperty("SP0", sp[0]);
00301 declareProperty("SP1", sp[1]);
00302 declareProperty("SP2", sp[2]);
00303 declareProperty("SP3", sp[3]);
00304 declareProperty("SAMPLE", sampling);
00305 declareProperty("START_AT_EVENT", start_at_event);
00306 declareProperty("IS_NEHALEM", is_nehalem_ret);
00307
00308
00309
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00349
00350 }
00351
00352 virtual ~PerfMonAuditor() {}
00353
00354 private:
00355 PerfMonAuditor();
00356 PerfMonAuditor(const PerfMonAuditor&);
00357 PerfMonAuditor& operator=(const PerfMonAuditor&);
00358
00359 private:
00360 typedef GaudiUtils::VectorMap<const INamedInterface*,int> Map;
00361 Map m_map;
00362 int m_indent;
00363 bool m_inEvent;
00364
00365 private:
00366 int is_nehalem_ret;
00367
00368 pfmlib_input_param_t inp;
00369 pfmlib_output_param_t outp;
00370 pfarg_ctx_t ctx;
00371 pfarg_pmd_t pd[NUM_PMDS];
00372 pfarg_pmc_t pc[NUM_PMCS];
00373 pfarg_load_t load_arg;
00374 int fd;
00375 unsigned int i;
00376 int ret;
00377 void startpm();
00378 void pausepm();
00379 void stoppm();
00380 void finalizepm();
00381 std::string event_str[MAX_NUMBER_OF_PROGRAMMABLE_COUNTERS];
00382 std::string prefix;
00383 std::string family;
00384 char event_cstr[MAX_NUMBER_OF_PROGRAMMABLE_COUNTERS][MAX_EVENT_NAME_LENGTH];
00385 char prefix_cstr[MAX_PREFIX_NAME_LENGTH];
00386 unsigned int ph_ev_count;
00387 bool inv[MAX_NUMBER_OF_PROGRAMMABLE_COUNTERS];
00388 unsigned int cmask[MAX_NUMBER_OF_PROGRAMMABLE_COUNTERS];
00389 unsigned int start_at_event;
00390 pfmlib_core_input_param_t params;
00391 pfmlib_nhm_input_param_t nhm_params;
00392 int used_counters_number;
00393 bool nehalem;
00394 bool westmere;
00395 bool core;
00396
00397 bool sampling;
00398 int detect_unavail_pmu_regs(int fd, pfmlib_regmask_t *r_pmcs, pfmlib_regmask_t *r_pmds);
00399 int detect_unavail_pmcs(int fd, pfmlib_regmask_t *r_pmcs){return detect_unavail_pmu_regs(fd, r_pmcs, NULL);}
00400 void pfm_bv_set(uint64_t *bv, uint16_t rnum){bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1));}
00401 int pfm_bv_isset(uint64_t *bv, uint16_t rnum){return bv[rnum>>LBPL] & (1UL <<(rnum&(BPL-1))) ? 1 : 0;}
00402 void pfm_bv_copy(uint64_t *d, uint64_t *j, uint16_t n){if(n<=BPL) *d = *j; else {memcpy(d, j, (n>>LBPL)*sizeof(uint64_t));}}
00403 static void process_smpl_buf(pfm_dfl_smpl_hdr_t *hdr, size_t entry_size);
00404 static void sigio_handler(int, struct siginfo *, struct sigcontext *);
00405
00406 void start_smpl();
00407 void stop_smpl();
00408 void finalize_smpl();
00409 pfm_dfl_smpl_arg_t buf_arg;
00410 pfarg_load_t load_args;
00411 void *buf_addr;
00412 unsigned num_counters;
00413 unsigned int max_pmd;
00414 pfmlib_options_t pfmlib_options;
00415
00416
00417 int level;
00418
00419 bool first_alg;
00420 std::string first_alg_name;
00421 bool event_count_reached;
00422
00423
00424 };
00425
00426 void PerfMonAuditor::startpm()
00427 {
00428 MsgStream log(msgSvc(), name());
00429 memset(&ctx,0, sizeof(ctx));
00430 memset(&inp,0, sizeof(inp));
00431 memset(&outp,0, sizeof(outp));
00432 memset(pd, 0, sizeof(pd));
00433 memset(pc, 0, sizeof(pc));
00434 memset(&load_arg, 0, sizeof(load_arg));
00435 memset(¶ms, 0, sizeof(params));
00436 memset(&nhm_params, 0, sizeof(nhm_params));
00437
00438 for(int i=0; i<used_counters_number; i++)
00439 {
00440 ret = m_pfm.pfm_find_full_event(event_cstr[i], &inp.pfp_events[i]);
00441 if(ret != PFMLIB_SUCCESS)
00442 {
00443 log << MSG::ERROR << "ERROR: cannot find event: " << event_cstr[i] << ". Aborting..." << endmsg;
00444 }
00445 }
00446 inp.pfp_dfl_plm = PFM_PLM3;
00447 inp.pfp_event_count = 4;
00448 for(int i=0; i<used_counters_number; i++)
00449 {
00450 if(inv[i])
00451 {
00452 (params.pfp_core_counters[i]).flags |= PFM_CORE_SEL_INV;
00453 (nhm_params.pfp_nhm_counters[i]).flags |= PFM_NHM_SEL_INV;
00454 }
00455 if(cmask[i]>0)
00456 {
00457 (params.pfp_core_counters[i]).cnt_mask = cmask[i];
00458 (nhm_params.pfp_nhm_counters[i]).cnt_mask = cmask[i];
00459 }
00460 }
00461 if(nehalem || westmere)
00462 {
00463 ret = m_pfm.pfm_dispatch_events(&inp, &nhm_params, &outp, NULL);
00464 }
00465 else
00466 {
00467 ret = m_pfm.pfm_dispatch_events(&inp, ¶ms, &outp, NULL);
00468 }
00469 if(ret != PFMLIB_SUCCESS)
00470 {
00471 log << MSG::ERROR << "ERROR: cannot dispatch events: " << m_pfm.pfm_strerror(ret) << ". Aborting..." << endmsg;
00472 }
00473 for(unsigned int i=0; i<outp.pfp_pmc_count; i++)
00474 {
00475 pc[i].reg_num = outp.pfp_pmcs[i].reg_num;
00476 pc[i].reg_value = outp.pfp_pmcs[i].reg_value;
00477 }
00478 for(unsigned int i=0; i<outp.pfp_pmd_count; i++)
00479 {
00480 pd[i].reg_num = outp.pfp_pmds[i].reg_num;
00481 pd[i].reg_value = 0;
00482 }
00483 fd = m_pfm.pfm_create_context(&ctx, NULL, 0, 0);
00484 if(fd == -1)
00485 {
00486 log << MSG::ERROR << "ERROR: Context not created. Aborting..." << endmsg;
00487 }
00488 if(m_pfm.pfm_write_pmcs(fd, pc, outp.pfp_pmc_count) == -1)
00489 {
00490 log << MSG::ERROR << "ERROR: Could not write pmcs. Aborting..." << endmsg;
00491 }
00492 if(m_pfm.pfm_write_pmds(fd, pd, outp.pfp_pmd_count) == -1)
00493 {
00494 log << MSG::ERROR << "ERROR: Could not write pmds. Aborting..." << endmsg;
00495 }
00496 load_arg.load_pid = getpid();
00497 if(m_pfm.pfm_load_context(fd, &load_arg) == -1)
00498 {
00499 log << MSG::ERROR << "ERROR: Could not load context. Aborting..." << endmsg;
00500
00501
00502 }
00503
00504 m_pfm.pfm_start(fd, NULL);
00505 }
00506
00507
00508
00509
00510
00511
00512
00513
00514 void PerfMonAuditor::stoppm()
00515 {
00516 MsgStream log(msgSvc(), name());
00517 m_pfm.pfm_stop(fd);
00518 if(m_pfm.pfm_read_pmds(fd, pd, inp.pfp_event_count) == -1)
00519 {
00520 log << MSG::ERROR << "Could not read pmds" << endmsg;
00521 }
00522 for(int i=0; i<used_counters_number; i++)
00523 {
00524 results[i][(alg_stack.top().first)->name()].push_back(alg_stack.top().second[i] + pd[i].reg_value);
00525 }
00526
00527 close(fd);
00528 }
00529
00530
00531 void PerfMonAuditor::pausepm()
00532 {
00533 MsgStream log(msgSvc(), name());
00534 m_pfm.pfm_stop(fd);
00535 if(m_pfm.pfm_read_pmds(fd, pd, inp.pfp_event_count) == -1)
00536 {
00537 log << MSG::ERROR << "Could not read pmds" << endmsg;
00538 }
00539
00540 for(int i=0; i<used_counters_number; i++)
00541 {
00542 alg_stack.top().second[i] += pd[i].reg_value;
00543 }
00544
00545 close(fd);
00546 }
00547
00548
00549
00550
00551
00552 void PerfMonAuditor::finalizepm()
00553 {
00554 MsgStream log(msgSvc(), name());
00555 log << MSG::INFO << "start of finalizepm ucn:" << used_counters_number << endmsg;
00556 char filename[MAX_OUTPUT_FILENAME_LENGTH];
00557 char to_cat[50];
00558 FILE *outfile;
00559 for(int i=0; i<used_counters_number; i++)
00560 {
00561 bzero(filename, MAX_OUTPUT_FILENAME_LENGTH);
00562 sprintf(filename, "%s_%s", prefix_cstr, event_cstr[i]);
00563 for(int j=0; j<(int)strlen(filename); j++)
00564 {
00565 if(filename[j]==':')
00566 {
00567 filename[j]='-';
00568 }
00569 }
00570 bzero(to_cat, 50);
00571 if(inv[i])
00572 {
00573 strcpy(to_cat, "_INV_1");
00574 }
00575 if(cmask[i]>0)
00576 {
00577 sprintf(to_cat, "%s_CMASK_%d", to_cat, cmask[i]);
00578 }
00579 sprintf(filename, "%s%s.txt", filename, to_cat);
00580 log << MSG::INFO << "Filename:" << filename << endmsg;
00581 outfile = fopen(filename, "w");
00582 if(nehalem)
00583 {
00584 fprintf(outfile, "NHM ");
00585 }
00586 else if(westmere)
00587 {
00588 fprintf(outfile, "WSM ");
00589 }
00590 else if(core)
00591 {
00592 fprintf(outfile, "CORE ");
00593 }
00594 fprintf(outfile, "%s %d %d %d\n", event_cstr[i], cmask[i], inv[i], sp[i]);
00595 for(std::map<std::string, std::vector<unsigned long int> >::iterator it=(results[i]).begin(); it!=(results[i]).end(); it++)
00596 {
00597 fprintf(outfile, "%s\n", (it->first).c_str());
00598 for(std::vector<unsigned long int>::iterator j=(it->second).begin(); j!=(it->second).end(); j++)
00599 {
00600 fprintf(outfile, "%lu\n", *j);
00601 }
00602 }
00603 fclose(outfile);
00604 }
00605 }
00606
00607 StatusCode PerfMonAuditor::initialize()
00608 {
00609 MsgStream log(msgSvc(), name());
00610
00611 if (!m_pfm.loaded) {
00612 log << MSG::ERROR << "pfm library could not be loaded" << endmsg;
00613 return false;
00614 }
00615
00616 log << MSG::INFO << "Initializing..." << endmsg;
00617 StatusCode sc = Auditor::initialize() ;
00618 if(sc.isFailure())
00619 {
00620 return sc;
00621 }
00622 used_counters_number = 0;
00623 for(int i=0; i<MAX_NUMBER_OF_PROGRAMMABLE_COUNTERS; i++)
00624 {
00625 if(event_str[i].length()>0) used_counters_number++;
00626 }
00627 for(int i=0; i<MAX_NUMBER_OF_PROGRAMMABLE_COUNTERS; i++)
00628 {
00629 strcpy(event_cstr[i], event_str[i].c_str());
00630 }
00631 strcpy(prefix_cstr, prefix.c_str());
00632
00633 if(m_pfm.pfm_initialize() != PFMLIB_SUCCESS)
00634 {
00635 log << MSG::ERROR << "Cannot initialize perfmon!!" << endmsg;
00636 }
00637 ph_ev_count = 0;
00638 first_alg = true;
00639 event_count_reached = false;
00640 nehalem = false;
00641 core = false;
00642 westmere = false;
00643 if(family.compare("CORE")==0) core = true;
00644 else if(family.compare("NEHALEM")==0) nehalem = true;
00645 else if(family.compare("WESTMERE")==0) westmere = true;
00646 else
00647 {
00648 log << MSG::ERROR << "ERROR: Unsupported processor family " << family << ". aborting..." << endmsg;
00649 }
00650
00651 log << MSG::INFO << "Initialized!" << endmsg;
00652 return StatusCode::SUCCESS ;
00653 }
00654
00655
00656
00657
00658
00659 void PerfMonAuditor::process_smpl_buf(pfm_dfl_smpl_hdr_t *hdr, size_t entry_size)
00660 {
00662 pfm_dfl_smpl_entry_t *ent;
00663 size_t pos, count;
00664 uint64_t entry;
00665 if(hdr->hdr_overflows == last_overflow && hdr->hdr_count == last_count)
00666 {
00667 printf("skipping identical set of samples...\n");
00668 return;
00669 }
00670 count = hdr->hdr_count;
00671 ent = (pfm_dfl_smpl_entry_t *)(hdr+1);
00672 pos = (unsigned long)ent;
00673 entry = collected_samples;
00674 while(count--)
00675 {
00676
00677 if(ent->ovfl_pmd<=3)
00678 {
00679 ((samples[ent->ovfl_pmd])[(alg_stack.top().first)->name()])[(unsigned long)(ent->ip)]++;
00680 }
00681 pos += entry_size;
00682 ent = (pfm_dfl_smpl_entry_t *)pos;
00683 entry++;
00684 }
00685 collected_samples = entry;
00686 last_overflow = hdr->hdr_overflows;
00687 if (last_count != hdr->hdr_count && (last_count || last_overflow == 0))
00688 {
00689 collected_partial += hdr->hdr_count;
00690 }
00691 last_count = hdr->hdr_count;
00692 return;
00693 }
00694
00695
00696
00697
00698
00699
00700 void PerfMonAuditor::sigio_handler(int , struct siginfo *, struct sigcontext *)
00701 {
00702
00703 PFMon& pfm = PFMon::instance();
00704 pfarg_msg_t msg;
00705 int fd = ctx_fd;
00706 int r;
00707 if(fd != ctx_fd)
00708 {
00709
00710 }
00711 if(pfm.pfm_read_pmds(fd, pd_smpl+1, 1) == -1)
00712 {
00713
00714 }
00715 while(true)
00716 {
00717 r = read(fd, &msg, sizeof(msg));
00718 if(r!=sizeof(msg))
00719 {
00720 if(r==-1 && errno==EINTR)
00721 {
00722 printf("read interrupted, retrying\n");
00723 continue;
00724 }
00725
00726 }
00727 break;
00728 }
00729 switch(msg.type)
00730 {
00731 case PFM_MSG_OVFL:
00732 process_smpl_buf(hdr, entry_size);
00733 ovfl_count++;
00734 if(pfm.pfm_restart(fd))
00735 {
00736 if(errno!=EBUSY)
00737 {
00738
00739 }
00740 else
00741 {
00742 printf("pfm_restart: task probably terminated \n");
00743 }
00744 }
00745 break;
00746 default:
00747
00748 break;
00749 }
00750
00751 }
00752
00753
00754
00755
00756
00757
00758
00759
00760
00761
00762 void PerfMonAuditor::start_smpl()
00763 {
00764 MsgStream log(msgSvc(), name());
00765 ovfl_count = 0;
00766 num_smpl_pmds = 0;
00767 last_overflow = ~0;
00768 max_pmd = 0;
00769 memset(&pfmlib_options, 0, sizeof(pfmlib_options));
00770 pfmlib_options.pfm_debug = 0;
00771 pfmlib_options.pfm_verbose = 0;
00772 m_pfm.pfm_set_options(&pfmlib_options);
00773 ret = m_pfm.pfm_initialize();
00774 if(ret != PFMLIB_SUCCESS)
00775 {
00776 log << MSG::ERROR << "ERROR: Cannot initialize library: " << m_pfm.pfm_strerror(ret) << ". Aborting..." << endmsg;
00777 }
00778 struct sigaction act;
00779 memset(&act, 0, sizeof(act));
00780 act.sa_handler = (sig_t)sigio_handler;
00781
00782 sigaction(SIGIO, &act, 0);
00783 memset(&ctx, 0, sizeof(ctx));
00784 memset(&buf_arg, 0, sizeof(buf_arg));
00785 memset(&inp,0, sizeof(inp));
00786 memset(&outp,0, sizeof(outp));
00787 memset(pd_smpl, 0, sizeof(pd_smpl));
00788 memset(pc, 0, sizeof(pc));
00789 memset(&load_args, 0, sizeof(load_args));
00790 m_pfm.pfm_get_num_counters(&num_counters);
00791 memset(¶ms, 0, sizeof(params));
00792 memset(&nhm_params, 0, sizeof(nhm_params));
00793
00794 for(int i=0; i<used_counters_number; i++)
00795 {
00796 ret = m_pfm.pfm_find_full_event(event_cstr[i], &inp.pfp_events[i]);
00797 if(ret != PFMLIB_SUCCESS)
00798 {
00799 log << MSG::ERROR << "ERROR: cannot find event: " << event_cstr[i] << ". Aborting..." << endmsg;
00800 }
00801 }
00802 inp.pfp_dfl_plm = PFM_PLM3;
00803 inp.pfp_event_count = 4;
00804 for(int i=0; i<used_counters_number; i++)
00805 {
00806 if(inv[i])
00807 {
00808 (params.pfp_core_counters[i]).flags |= PFM_CORE_SEL_INV;
00809 (nhm_params.pfp_nhm_counters[i]).flags |= PFM_NHM_SEL_INV;
00810 }
00811 if(cmask[i]>0)
00812 {
00813 (params.pfp_core_counters[i]).cnt_mask = cmask[i];
00814 (nhm_params.pfp_nhm_counters[i]).cnt_mask = cmask[i];
00815 }
00816 }
00817 if(nehalem || westmere)
00818 {
00819 ret = m_pfm.pfm_dispatch_events(&inp, &nhm_params, &outp, NULL);
00820 }
00821 else
00822 {
00823 ret = m_pfm.pfm_dispatch_events(&inp, ¶ms, &outp, NULL);
00824 }
00825 if(ret != PFMLIB_SUCCESS)
00826 {
00827 log << MSG::ERROR << "ERROR: cannot configure events: " << m_pfm.pfm_strerror(ret) << ". Aborting..." << endmsg;
00828 }
00829 for(unsigned int i=0; i<outp.pfp_pmc_count; i++)
00830 {
00831 pc[i].reg_num = outp.pfp_pmcs[i].reg_num;
00832 pc[i].reg_value = outp.pfp_pmcs[i].reg_value;
00833 }
00834 for(unsigned int i=0; i<outp.pfp_pmd_count; i++)
00835 {
00836 pd_smpl[i].reg_num = outp.pfp_pmds[i].reg_num;
00837 if(i)
00838 {
00839 pfm_bv_set(pd_smpl[0].reg_smpl_pmds, pd_smpl[i].reg_num);
00840 if(pd_smpl[i].reg_num>max_pmd)
00841 {
00842 max_pmd = pd_smpl[i].reg_num;
00843 }
00844 num_smpl_pmds++;
00845 }
00846 }
00847 for(int i=0; i<used_counters_number; i++)
00848 {
00849 pd_smpl[i].reg_flags |= PFM_REGFL_OVFL_NOTIFY | PFM_REGFL_RANDOM;
00850 pfm_bv_copy(pd_smpl[i].reg_reset_pmds, pd_smpl[i].reg_smpl_pmds, max_pmd);
00851 pd_smpl[i].reg_value = (uint64_t)(sp[i] * -1);
00852 pd_smpl[i].reg_short_reset = (uint64_t)(sp[i] * -1);
00853 pd_smpl[i].reg_long_reset = (uint64_t)(sp[i] * -1);
00854 pd_smpl[i].reg_random_seed = 5;
00855 pd_smpl[i].reg_random_mask = 0xff;
00856 }
00857 entry_size = sizeof(pfm_dfl_smpl_entry_t)+(num_smpl_pmds<<3);
00858 ctx.ctx_flags = 0;
00859 buf_arg.buf_size = 3*getpagesize()+512;
00860 ctx_fd = m_pfm.pfm_create_context(&ctx, (char *)FMT_NAME, &buf_arg, sizeof(buf_arg));
00861 if(ctx_fd==-1)
00862 {
00863 if(errno==ENOSYS)
00864 {
00865 log << MSG::ERROR << "ERROR: Your kernel does not have performance monitoring support! Aborting..." << endmsg;
00866 }
00867 log << MSG::ERROR << "ERROR: Can't create PFM context " << strerror(errno) << ". Aborting..." << endmsg;
00868 }
00869 buf_addr = mmap(NULL, (size_t)buf_arg.buf_size, PROT_READ, MAP_PRIVATE, ctx_fd, 0);
00870 if(buf_addr==MAP_FAILED)
00871 {
00872 log << MSG::ERROR << "ERROR: cannot mmap sampling buffer: " << strerror(errno) << ". Aborting..." << endmsg;
00873 }
00874 hdr = (pfm_dfl_smpl_hdr_t *)buf_addr;
00875 if(PFM_VERSION_MAJOR(hdr->hdr_version)<1)
00876 {
00877 log << MSG::ERROR << "ERROR: invalid buffer format version. Aborting..." << endmsg;
00878 }
00879 if(m_pfm.pfm_write_pmcs(ctx_fd, pc, outp.pfp_pmc_count))
00880 {
00881 log << MSG::ERROR << "ERROR: pfm_write_pmcs error errno " << strerror(errno) << ". Aborting..." << endmsg;
00882 }
00883 if(m_pfm.pfm_write_pmds(ctx_fd, pd_smpl, outp.pfp_pmd_count))
00884 {
00885 log << MSG::ERROR << "ERROR: pfm_write_pmds error errno " << strerror(errno) << ". Aborting..." << endmsg;
00886 }
00887 load_args.load_pid = getpid();
00888 if(m_pfm.pfm_load_context(ctx_fd, &load_args))
00889 {
00890 log << MSG::ERROR << "ERROR: pfm_load_context error errno " << strerror(errno) << ". Aborting..." << endmsg;
00891 }
00892 ret = fcntl(ctx_fd, F_SETFL, fcntl(ctx_fd, F_GETFL, 0) | O_ASYNC);
00893 if(ret == -1)
00894 {
00895 log << MSG::ERROR << "ERROR: cannot set ASYNC: " << strerror(errno) << ". Aborting..." << endmsg;
00896 }
00897 ret = fcntl(ctx_fd, F_SETOWN, getpid());
00898 if(ret == -1)
00899 {
00900 log << MSG::ERROR << "ERROR: cannot setown: " << strerror(errno) << ". Aborting..." << endmsg;
00901 }
00902
00903 m_pfm.pfm_start(ctx_fd, NULL);
00904 }
00905
00906
00907
00908
00909
00910 void PerfMonAuditor::stop_smpl()
00911 {
00912 MsgStream log(msgSvc(), name());
00913 m_pfm.pfm_self_stop(ctx_fd);
00914 process_smpl_buf(hdr, entry_size);
00915 close(ctx_fd);
00916 ret = munmap(hdr, (size_t)buf_arg.buf_size);
00917 if(ret)
00918 {
00919 log << MSG::ERROR << "Cannot unmap buffer: %s" << strerror(errno) << endmsg;
00920 }
00921 return;
00922 }
00923
00924
00925
00926
00927
00928
00929 void PerfMonAuditor::finalize_smpl()
00930 {
00931 MsgStream log(msgSvc(), name());
00932 char filename[MAX_OUTPUT_FILENAME_LENGTH];
00933 bzero(filename, MAX_OUTPUT_FILENAME_LENGTH);
00934 char to_cat[50];
00935 gzFile outfile;
00936 int err;
00937 for(int i=0; i<used_counters_number; i++)
00938 {
00939 sprintf(filename, "%s_%s", prefix_cstr, event_cstr[i]);
00940 for(int j=0; j<(int)strlen(filename); j++)
00941 {
00942 if(filename[j]==':')
00943 {
00944 filename[j]='-';
00945 }
00946 }
00947 bzero(to_cat, 50);
00948 if(inv[i])
00949 {
00950 strcpy(to_cat, "_INV_1");
00951 }
00952 if(cmask[i]>0)
00953 {
00954 sprintf(to_cat, "%s_CMASK_%d", to_cat, cmask[i]);
00955 }
00956 sprintf(filename, "%s%s.txt.gz", filename, to_cat);
00957 outfile = gzopen(filename, "wb");
00958 if(outfile!=NULL)
00959 {
00960 if(nehalem)
00961 {
00962 gzprintf(outfile, "NHM ");
00963 }
00964 else if(westmere)
00965 {
00966 gzprintf(outfile, "WSM ");
00967 }
00968 else if(core)
00969 {
00970 gzprintf(outfile, "CORE ");
00971 }
00972 if(gzprintf(outfile, "%s %d %d %d\n", event_cstr[i], cmask[i], inv[i], sp[i]) < (int)strlen(event_cstr[i]))
00973 {
00974 log << MSG::ERROR << "ERROR: gzputs err: " << gzerror(outfile, &err) << ". Aborting..." << endmsg;
00975 }
00976 for(std::map<std::string, std::map<unsigned long, unsigned int> >::iterator it=samples[i].begin(); it!=samples[i].end(); it++)
00977 {
00978 unsigned long long sum = 0;
00979 for(std::map<unsigned long, unsigned int>::iterator jt=(it->second).begin(); jt!=(it->second).end(); jt++)
00980 {
00981 sum += jt->second;
00982 }
00983 if(gzprintf(outfile, "%s%%%llu\n", (it->first).c_str(), sum) < (int)((it->first).length()))
00984 {
00985 log << MSG::ERROR << "ERROR: gzputs err: " << gzerror(outfile, &err) << ". Aborting..." << endmsg;
00986 }
00987 for(std::map<unsigned long, unsigned int>::iterator jt=(it->second).begin(); jt!=(it->second).end(); jt++)
00988 {
00989 char sym_name[SYM_NAME_MAX_LENGTH];
00990 bzero(sym_name, SYM_NAME_MAX_LENGTH);
00991 const char *libName;
00992 const char *symbolName;
00993 int libOffset = 0;
00994 int offset = 0;
00995 void *sym_addr = IgHookTrace::tosymbol((void *)(jt->first));
00996 if(sym_addr != NULL)
00997 {
00998 bool success = IgHookTrace::symbol(sym_addr, symbolName, libName, offset, libOffset);
00999 if(success)
01000 {
01001 if(symbolName!=NULL && strlen(symbolName)>0)
01002 {
01003 strcpy(sym_name, symbolName);
01004 strcat(sym_name, " ");
01005 }
01006 else
01007 {
01008 strcpy(sym_name, "??? ");
01009 }
01010 if(libName!=NULL && strlen(libName)>0)
01011 {
01012 strcat(sym_name, libName);
01013 strcat(sym_name, " ");
01014 }
01015 else
01016 {
01017 strcat(sym_name, "??? ");
01018 }
01019 sprintf(sym_name, "%s%d ", sym_name, libOffset);
01020 if(strlen(sym_name)<=0)
01021 {
01022 log << MSG::ERROR << "ERROR: Symbol name length is zero. Aborting..." << endmsg;
01023 }
01024 }
01025 else
01026 {
01027 strcpy(sym_name,"??? ??? 0 ");
01028 }
01029 }
01030 else
01031 {
01032 strcpy(sym_name,"??? ??? 0 ");
01033 }
01034 if(gzprintf(outfile, "%s %d\n", sym_name, jt->second) < (int)strlen(sym_name))
01035 {
01036 log << MSG::ERROR << "ERROR: gzputs err: " << gzerror(outfile, &err) << endmsg;
01037 }
01038 }
01039 }
01040 }
01041 else
01042 {
01043 log << MSG::ERROR << "ERROR: Could not open file: " << filename << ". Aborting..." << endmsg;
01044 }
01045 gzclose(outfile);
01046 }
01047 }
01048
01049
01050
01051 StatusCode PerfMonAuditor::finalize()
01052 {
01053 if(sampling == 0) finalizepm();
01054 else finalize_smpl();
01055 return Auditor::finalize();
01056 }
01057
01058
01059 void PerfMonAuditor::before(StandardEventType evt, INamedInterface *alg)
01060 {
01061 switch(evt)
01062 {
01063 case IAuditor::Initialize:
01064 i_beforeInitialize(alg);
01065 break;
01066 case IAuditor::Execute:
01067 i_beforeExecute(alg);
01068 break;
01069 default:
01070 break;
01071 }
01072 return;
01073 }
01074
01075 void PerfMonAuditor::after(StandardEventType evt, INamedInterface *alg, const StatusCode &)
01076 {
01077 switch(evt)
01078 {
01079 case IAuditor::Initialize:
01080 i_afterInitialize(alg);
01081 break;
01082 case IAuditor::Execute:
01083 i_afterExecute(alg);
01084 break;
01085 default:
01086 break;
01087 }
01088 return;
01089 }
01090
01091 void PerfMonAuditor::i_beforeInitialize(INamedInterface* alg)
01092 {
01093 if(alg == 0)
01094 {
01095 return;
01096 }
01097 return;
01098 }
01099
01100 void PerfMonAuditor::i_afterInitialize(INamedInterface* alg)
01101 {
01102 if(alg == 0)
01103 {
01104 return;
01105 }
01106 return;
01107 }
01108
01109 void PerfMonAuditor::i_beforeExecute(INamedInterface* alg)
01110 {
01111 MsgStream log(msgSvc(), name());
01112 if(alg == 0)
01113 {
01114 return;
01115 }
01116
01117 if(first_alg)
01118 {
01119 first_alg = false;
01120 first_alg_name = alg->name();
01121
01122 }
01123 if(!event_count_reached)
01124 {
01125 if(!first_alg_name.compare(alg->name()))
01126 {
01127 ph_ev_count++;
01128
01129 if(ph_ev_count==start_at_event)
01130 {
01131 event_count_reached = true;
01132
01133 }
01134 }
01135 }
01136 if(event_count_reached)
01137 {
01138
01139
01140 if(!alg_stack.empty())
01141 {
01142 if(sampling == 0) pausepm();
01143 else stop_smpl();
01144 }
01145 ++m_indent;
01146 std::vector <unsigned long int> zeroes(4,0);
01147 alg_stack.push(std::make_pair(alg, zeroes));
01148 if(sampling == 0) startpm();
01149 else start_smpl();
01150 }
01151 return;
01152 }
01153
01154 void PerfMonAuditor::i_afterExecute(INamedInterface* alg)
01155 {
01156 MsgStream log(msgSvc(), name());
01157 if(alg == 0)
01158 {
01159 return;
01160 }
01161
01162 if(event_count_reached)
01163 {
01164
01165
01166 if(sampling == 0) stoppm();
01167 else stop_smpl();
01168 alg_stack.pop();
01169 --m_indent;
01170 if(!alg_stack.empty())
01171 {
01172 if(sampling == 0) startpm();
01173 else start_smpl();
01174 }
01175 }
01176 return;
01177 }
01178
01179 DECLARE_AUDITOR_FACTORY(PerfMonAuditor)