StalledEventMonitor.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009 #include "StalledEventMonitor.h"
00010
00011 #include "GaudiKernel/WatchdogThread.h"
00012 #include "GaudiKernel/IIncidentSvc.h"
00013 #include "GaudiKernel/Memory.h"
00014
00015 namespace {
00017 class EventWatchdog: public WatchdogThread {
00018 public:
00019 EventWatchdog(const SmartIF<IMessageSvc> &msgSvc,
00020 const std::string &name,
00021 boost::posix_time::time_duration timeout,
00022 bool autostart = false):
00023 WatchdogThread(timeout, autostart),
00024 log(msgSvc, name),
00025 m_counter(0) {}
00026 virtual ~EventWatchdog() {}
00027 private:
00028 MsgStream log;
00029 long m_counter;
00030 void action() {
00031 if (!m_counter) {
00032 log << MSG::WARNING << "More than " << getTimeout().total_seconds()
00033 << "s since the last " << IncidentType::BeginEvent << endmsg;
00034 } else {
00035 log << MSG::WARNING << "Other " << getTimeout().total_seconds()
00036 << "s passed" << endmsg;
00037 }
00038 log << MSG::INFO << "Current memory usage is"
00039 " virtual size = " << System::virtualMemory() / 1024. << " MB"
00040 ", resident set size = " << System::pagedMemory() / 1024.<< " MB"
00041 << endmsg;
00042 ++m_counter;
00043 }
00044 void onPing() {
00045 if (m_counter) {
00046 if (m_counter >= 3)
00047 log << MSG::INFO << "Starting a new event after ~"
00048 << m_counter * getTimeout().total_seconds() << "s" << endmsg;
00049 m_counter = 0;
00050 }
00051 }
00052 void onStop() {
00053 if (m_counter >= 3)
00054 log << MSG::INFO << "The last event took ~"
00055 << m_counter * getTimeout().total_seconds() << "s" << endmsg;
00056 }
00057 };
00058 }
00059
00060
00061 StalledEventMonitor::StalledEventMonitor(const std::string& name, ISvcLocator* svcLoc):
00062 base_class(name, svcLoc) {
00063
00064 declareProperty("EventTimeout", m_eventTimeout = 600,
00065 "Number of seconds allowed to process a single event (0 to disable the check)");
00066
00067 }
00068
00069
00070 StalledEventMonitor::~StalledEventMonitor(){
00071
00072 }
00073
00074
00075 StatusCode StalledEventMonitor::initialize() {
00076 StatusCode sc = base_class::initialize();
00077 if (sc.isFailure()) return sc;
00078
00079
00080 if (m_eventTimeout) {
00081
00082 m_watchdog = std::auto_ptr<WatchdogThread>(
00083 new EventWatchdog(msgSvc(),
00084 "EventWatchdog",
00085 boost::posix_time::seconds(m_eventTimeout)));
00086
00087
00088 std::string serviceName = "IncidentSvc";
00089 m_incidentSvc = serviceLocator()->service(serviceName);
00090 if ( ! m_incidentSvc ) {
00091 error() << "Cannot retrieve " << serviceName << endmsg;
00092 return StatusCode::FAILURE;
00093 }
00094 debug() << "Register to the IncidentSvc" << endmsg;
00095 m_incidentSvc->addListener(this, IncidentType::BeginEvent);
00096 } else {
00097 warning() << "StalledEventMonitor/" << name()
00098 << " instantiated with 0 time-out: no monitoring performed" << endmsg;
00099 }
00100
00101 return StatusCode::SUCCESS;
00102 }
00103
00104
00105 StatusCode StalledEventMonitor::start() {
00106 if (m_watchdog.get()) m_watchdog->start();
00107 return StatusCode::SUCCESS;
00108 }
00109
00110
00111 void StalledEventMonitor::handle(const Incident& ) {
00112 if (m_watchdog.get()) m_watchdog->ping();
00113 }
00114
00115
00116 StatusCode StalledEventMonitor::stop() {
00117 if (m_watchdog.get()) m_watchdog->stop();
00118 return StatusCode::SUCCESS;
00119 }
00120
00121
00122 StatusCode StalledEventMonitor::finalize() {
00123
00124 m_watchdog.reset();
00125
00126 m_incidentSvc->removeListener(this, IncidentType::BeginEvent);
00127 m_incidentSvc.reset();
00128 return base_class::finalize();
00129 }
00130
00131
00132 #include "GaudiKernel/SvcFactory.h"
00133 DECLARE_SERVICE_FACTORY(StalledEventMonitor)