Gaudi Framework, version v25r0
Home
Generated: Mon Feb 17 2014
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Properties
Friends
Macros
Groups
Pages
GaudiUtils
src
component
StalledEventMonitor.cpp
Go to the documentation of this file.
1
/*
2
* StalledEventMonitor.cpp
3
*
4
* Created on: Apr 19, 2010
5
* Author: Marco Clemencic
6
*/
7
8
// Include files
9
#include "
StalledEventMonitor.h
"
10
11
#include "
GaudiKernel/WatchdogThread.h
"
12
#include "
GaudiKernel/IIncidentSvc.h
"
13
#include "
GaudiKernel/Memory.h
"
14
15
#include "TSystem.h"
16
17
#include <
csignal
>
18
19
namespace
{
21
class
EventWatchdog:
public
WatchdogThread
{
22
public
:
23
EventWatchdog(
const
SmartIF<IMessageSvc>
&msgSvc,
24
const
std::string
&name,
25
boost::posix_time::time_duration timeout,
26
bool
stackTrace =
false
,
27
long
maxCount = 0,
28
bool
autostart =
false
):
29
WatchdogThread
(timeout, autostart),
30
log
(msgSvc, name),
31
m_counter(0),
32
m_maxCount(maxCount),
33
m_stackTrace(stackTrace){}
34
virtual
~EventWatchdog() {}
35
private
:
37
MsgStream
log
;
39
long
m_counter;
41
long
m_maxCount;
43
bool
m_stackTrace;
45
void
action
() {
46
if
(!m_counter) {
47
log
<<
MSG::WARNING
<<
"More than "
<<
getTimeout
().total_seconds()
48
<<
"s since the last "
<<
IncidentType::BeginEvent
<<
endmsg
;
49
}
else
{
50
log
<<
MSG::WARNING
<<
"Other "
<<
getTimeout
().total_seconds()
51
<<
"s passed"
<<
endmsg
;
52
}
53
log
<<
MSG::INFO
<<
"Current memory usage is"
54
" virtual size = "
<<
System::virtualMemory
() / 1024. <<
" MB"
55
", resident set size = "
<<
System::pagedMemory
() / 1024.<<
" MB"
56
<<
endmsg
;
57
if
(m_stackTrace && gSystem) {
58
// TSystem::StackTrace() prints on the standard error, so we
59
std::cerr
<<
"=== Stalled event: current stack trace ==="
<<
std::endl
;
60
gSystem->StackTrace();
61
}
62
++m_counter;
63
if
(m_maxCount > 0 && m_counter >= m_maxCount) {
64
log
<<
MSG::FATAL
<<
"too much time on a single event: aborting process"
<<
endmsg
;
65
std::raise(SIGABRT);
66
}
67
}
68
void
onPing
() {
69
if
(m_counter) {
70
if
(m_counter >= 3)
71
log
<<
MSG::INFO
<<
"Starting a new event after ~"
72
<< m_counter *
getTimeout
().total_seconds() <<
"s"
<<
endmsg
;
73
m_counter = 0;
74
}
75
}
76
void
onStop
() {
77
if
(m_counter >= 3)
78
log
<<
MSG::INFO
<<
"The last event took ~"
79
<< m_counter *
getTimeout
().total_seconds() <<
"s"
<<
endmsg
;
80
}
81
};
82
}
83
84
// Constructor
85
StalledEventMonitor::StalledEventMonitor
(
const
std::string
& name,
ISvcLocator
* svcLoc):
86
base_class
(name, svcLoc) {
87
88
declareProperty
(
"EventTimeout"
,
m_eventTimeout
= 600,
89
"Number of seconds allowed to process a single event (0 to disable the check)."
);
90
91
declareProperty
(
"MaxTimeoutCount"
,
m_maxTimeoutCount
= 0,
92
"Number timeouts before aborting the execution (0 means never abort)."
);
93
94
declareProperty
(
"StackTrace"
,
m_stackTrace
=
false
,
95
"Whether to print the stack-trace on timeout."
);
96
}
97
98
// Destructor
99
StalledEventMonitor::~StalledEventMonitor
(){
100
101
}
102
103
// Initialization of the service.
104
StatusCode
StalledEventMonitor::initialize
() {
105
StatusCode
sc
=
base_class::initialize
();
106
if
(sc.
isFailure
())
return
sc;
107
108
109
if
(
m_eventTimeout
) {
110
// create the watchdog thread
111
m_watchdog
=
std::auto_ptr<WatchdogThread>
(
112
new
EventWatchdog(
msgSvc
(),
113
"EventWatchdog"
,
114
boost::posix_time::seconds(
m_eventTimeout
),
115
m_stackTrace
,
116
m_maxTimeoutCount
));
117
118
// register to the incident service
119
std::string
serviceName =
"IncidentSvc"
;
120
m_incidentSvc
=
serviceLocator
()->service(serviceName);
121
if
( !
m_incidentSvc
) {
122
error
() <<
"Cannot retrieve "
<< serviceName <<
endmsg
;
123
return
StatusCode::FAILURE
;
124
}
125
debug
() <<
"Register to the IncidentSvc"
<<
endmsg
;
126
m_incidentSvc
->addListener(
this
,
IncidentType::BeginEvent
);
127
}
else
{
128
warning
() <<
"StalledEventMonitor/"
<<
name
()
129
<<
" instantiated with 0 time-out: no monitoring performed"
<<
endmsg
;
130
}
131
132
return
StatusCode::SUCCESS
;
133
}
134
135
// Start the monitoring.
136
StatusCode
StalledEventMonitor::start
() {
137
if
(
m_watchdog
.
get
())
m_watchdog
->start();
138
return
StatusCode::SUCCESS
;
139
}
140
141
// Notify the watchdog that a new event has been started
142
void
StalledEventMonitor::handle
(
const
Incident
&
/* incident */
) {
143
if
(
m_watchdog
.
get
())
m_watchdog
->ping();
144
}
145
146
// Start the monitoring.
147
StatusCode
StalledEventMonitor::stop
() {
148
if
(
m_watchdog
.
get
())
m_watchdog
->stop();
149
return
StatusCode::SUCCESS
;
150
}
151
152
// Finalization of the service.
153
StatusCode
StalledEventMonitor::finalize
() {
154
// destroy the watchdog thread (if any)
155
m_watchdog
.
reset
();
156
// unregistering from the IncidentSvc
157
m_incidentSvc
->removeListener(
this
,
IncidentType::BeginEvent
);
158
m_incidentSvc
.
reset
();
159
return
base_class::finalize();
160
}
161
162
// Declaration of the factory
163
DECLARE_COMPONENT
(
StalledEventMonitor
)
Generated at Mon Feb 17 2014 14:37:49 for Gaudi Framework, version v25r0 by
Doxygen
version 1.8.2 written by
Dimitri van Heesch
, © 1997-2004