ApMon.h File Reference
Declarations for the
ApMon class.
More...
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdexcept>
#include <ctype.h>
#include <time.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/utsname.h>
#include <sys/time.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <net/if.h>
#include <rpc/rpc.h>
#include <netdb.h>
#include <unistd.h>
#include <pthread.h>
#include <pwd.h>
#include <grp.h>
#include <linux/param.h>
Go to the source code of this file.
|
Classes |
| struct | ConfURLs |
| | Data structure which holds the configuration URLs. More...
|
| struct | MonitoredJob |
| | Data structure which holds information about a job monitored by ApMon. More...
|
| class | ApMon |
| | Data structure used for sending monitoring data to a MonaLisa module. More...
|
Defines |
| #define | XDR_STRING 0 |
| | Used to code the string data type.
|
| #define | XDR_INT32 2 |
| | Used to code the 4 bytes integer data type.
|
| #define | XDR_REAL32 4 |
| | Used to code the 4 bytes real data type.
|
| #define | XDR_REAL64 5 |
| | Used to code the 8 bytes real data type.
|
| #define | MAX_DGRAM_SIZE 8192 |
| | Maximum UDP datagram size.
|
| #define | MAX_STRING_LEN 512 |
| | Maximum string length (for hostnames).
|
| #define | MAX_STRING_LEN1 (MAX_STRING_LEN + 1) |
| #define | RET_SUCCESS 0 |
| | Function return value (succes).
|
| #define | RET_ERROR -1 |
| | Function return value (error).
|
| #define | PROCUTILS_ERROR -2 |
| #define | RET_NOT_SENT -3 |
| | A datagram was not sent because the number of datagrams that can be sent per second is limited.
|
| #define | MAX_N_DESTINATIONS 30 |
| | Maximum number of destinations hosts to which we send the parameters.
|
| #define | DEFAULT_PORT 8884 |
| | The default port on which MonALISa listens.
|
| #define | MAX_HEADER_LENGTH 40 |
| | Maximum header length.
|
| #define | FILE_INIT 1 |
| | Indicates that the object was initialized from a file.
|
| #define | LIST_INIT 2 |
| | Indicates that the object was initialized from a list.
|
| #define | DIRECT_INIT 3 |
| | Indicates that the object was initialized directly.
|
| #define | JOB_MONITOR_INTERVAL 20 |
| | Time interval (in sec) at which job monitoring datagrams are sent.
|
| #define | SYS_MONITOR_INTERVAL 20 |
| | Time interval (in sec) at which system monitoring datagams are sent.
|
| #define | RECHECK_INTERVAL 600 |
| | Time interval (in sec) at which the configuration files are checked for changes.
|
| #define | GEN_MONITOR_INTERVALS 10 |
| | The number of time intervals at which ApMon sends general system monitoring information (considering the time intervals at which ApMon sends system monitoring information).
|
| #define | MAX_MONITORED_JOBS 30 |
| | The maximum number of jobs that can be monitored.
|
| #define | MAX_SYS_PARAMS 30 |
| | The maximum number of system parameters.
|
| #define | MAX_GEN_PARAMS 30 |
| | The maximum number of general system parameters.
|
| #define | MAX_JOB_PARAMS 30 |
| | The maximum number of job parameters.
|
| #define | MAX_MSG_RATE 20 |
| | The maxim number of mesages per second that will be sent to MonALISA.
|
| #define | NLETTERS 26 |
| #define | TWO_BILLION 2000000000 |
| #define | APMON_VERSION "2.2.1" |
Functions |
| void * | bkTask (void *param) |
| | Performs background actions like rechecking the configuration file and the URLs and sending monitoring information.
|
Detailed Description
Declarations for the
ApMon class.
The ApMon class can be used for sending monitoring data to one or more destination hosts that run MonALISA.
Definition in file ApMon.h.
Define Documentation
| #define APMON_VERSION "2.2.1" |
| #define DEFAULT_PORT 8884 |
The default port on which MonALISa listens.
Definition at line 114 of file ApMon.h.
Indicates that the object was initialized directly.
Definition at line 126 of file ApMon.h.
Indicates that the object was initialized from a file.
Definition at line 118 of file ApMon.h.
| #define GEN_MONITOR_INTERVALS 10 |
The number of time intervals at which ApMon sends general system monitoring information (considering the time intervals at which ApMon sends system monitoring information).
Definition at line 138 of file ApMon.h.
| #define JOB_MONITOR_INTERVAL 20 |
Time interval (in sec) at which job monitoring datagrams are sent.
Definition at line 128 of file ApMon.h.
Indicates that the object was initialized from a list.
Definition at line 123 of file ApMon.h.
| #define MAX_DGRAM_SIZE 8192 |
Maximum UDP datagram size.
Definition at line 102 of file ApMon.h.
| #define MAX_GEN_PARAMS 30 |
The maximum number of general system parameters.
Definition at line 144 of file ApMon.h.
| #define MAX_HEADER_LENGTH 40 |
Maximum header length.
Definition at line 115 of file ApMon.h.
| #define MAX_JOB_PARAMS 30 |
The maximum number of job parameters.
Definition at line 146 of file ApMon.h.
| #define MAX_MONITORED_JOBS 30 |
The maximum number of jobs that can be monitored.
Definition at line 140 of file ApMon.h.
The maxim number of mesages per second that will be sent to MonALISA.
Definition at line 149 of file ApMon.h.
| #define MAX_N_DESTINATIONS 30 |
Maximum number of destinations hosts to which we send the parameters.
Definition at line 111 of file ApMon.h.
| #define MAX_STRING_LEN 512 |
Maximum string length (for hostnames).
Definition at line 103 of file ApMon.h.
| #define MAX_STRING_LEN1 (MAX_STRING_LEN + 1) |
| #define MAX_SYS_PARAMS 30 |
The maximum number of system parameters.
Definition at line 142 of file ApMon.h.
| #define PROCUTILS_ERROR -2 |
| #define RECHECK_INTERVAL 600 |
Time interval (in sec) at which the configuration files are checked for changes.
Definition at line 133 of file ApMon.h.
Function return value (error).
Definition at line 106 of file ApMon.h.
A datagram was not sent because the number of datagrams that can be sent per second is limited.
Definition at line 108 of file ApMon.h.
Function return value (succes).
Definition at line 105 of file ApMon.h.
| #define SYS_MONITOR_INTERVAL 20 |
Time interval (in sec) at which system monitoring datagams are sent.
Definition at line 130 of file ApMon.h.
| #define TWO_BILLION 2000000000 |
Used to code the 4 bytes integer data type.
Definition at line 97 of file ApMon.h.
Used to code the 4 bytes real data type.
Definition at line 99 of file ApMon.h.
Used to code the 8 bytes real data type.
Definition at line 100 of file ApMon.h.
Used to code the string data type.
Definition at line 95 of file ApMon.h.
Function Documentation
| void* bkTask |
( |
void * |
param |
) |
|
Performs background actions like rechecking the configuration file and the URLs and sending monitoring information.
This function is executed in a background thread and has two roles: it automatically sends the system/job monitoring parameters (if the user requested) and it checks the configuration file/URLs for changes.
(this is done in a separate thread).
Definition at line 913 of file ApMon.cpp.
00913 {
00914 #else
00915 DWORD WINAPI bkTask(void *param) {
00916 #endif
00917 struct stat st;
00918 #ifndef WIN32
00919 struct timespec delay;
00920 #else
00921 DWORD delay;
00922 #endif
00923 bool resourceChanged, haveChange;
00924 int nextOp = -1, i, ret;
00925 int generalInfoCount;
00926 time_t crtTime, timeRemained;
00927 time_t nextRecheck = 0, nextJobInfoSend = 0, nextSysInfoSend = 0;
00928 ApMon *apm = (ApMon *)param;
00929 char logmsg[200];
00930
00931 logger(INFO, "[Starting background thread...]");
00932 apm -> bkThreadStarted = true;
00933
00934 crtTime = time(NULL);
00935
00936 pthread_mutex_lock(&(apm -> mutexBack));
00937 if (apm -> confCheck) {
00938 nextRecheck = crtTime + apm -> crtRecheckInterval;
00939
00940
00941
00942
00943 }
00944 if (apm -> jobMonitoring)
00945 nextJobInfoSend = crtTime + apm -> jobMonitorInterval;
00946 if (apm -> sysMonitoring)
00947 nextSysInfoSend = crtTime + apm -> sysMonitorInterval;
00948 pthread_mutex_unlock(&(apm -> mutexBack));
00949
00950 timeRemained = -1;
00951 generalInfoCount = 0;
00952
00953 while (1) {
00954 pthread_mutex_lock(&apm -> mutexBack);
00955 if (apm -> stopBkThread) {
00956
00957 pthread_mutex_unlock(&apm -> mutexBack);
00958 break;
00959 }
00960 pthread_mutex_unlock(&apm -> mutexBack);
00961
00962
00963
00964
00965
00966
00967 if (nextRecheck > 0 && (nextJobInfoSend <= 0 ||
00968 nextRecheck <= nextJobInfoSend)) {
00969 if (nextSysInfoSend <= 0 || nextRecheck <= nextSysInfoSend) {
00970 nextOp = RECHECK_CONF;
00971 timeRemained = nextRecheck - crtTime;
00972 } else {
00973 nextOp = SYS_INFO_SEND;
00974 timeRemained = nextSysInfoSend - crtTime;
00975 }
00976 } else {
00977 if (nextJobInfoSend > 0 && (nextSysInfoSend <= 0 ||
00978 nextJobInfoSend <= nextSysInfoSend)) {
00979 nextOp = JOB_INFO_SEND;
00980 timeRemained = nextJobInfoSend - crtTime;
00981 } else if (nextSysInfoSend > 0) {
00982 nextOp = SYS_INFO_SEND;
00983 timeRemained = nextSysInfoSend - crtTime;
00984 }
00985 }
00986
00987 if (timeRemained == -1)
00988 timeRemained = RECHECK_INTERVAL;
00989
00990 #ifndef WIN32
00991
00992 delay.tv_sec = crtTime + timeRemained;
00993 delay.tv_nsec = 0;
00994 #else
00995 delay = ( timeRemained) * 1000;
00996 #endif
00997
00998 pthread_mutex_lock(&(apm -> mutexBack));
00999
01000 pthread_mutex_lock(&(apm -> mutexCond));
01001
01002 haveChange = false;
01003 if (apm -> jobMonChanged || apm -> sysMonChanged || apm -> recheckChanged)
01004 haveChange = true;
01005 if (apm -> jobMonChanged) {
01006 if (apm -> jobMonitoring)
01007 nextJobInfoSend = crtTime + apm -> jobMonitorInterval;
01008 else
01009 nextJobInfoSend = -1;
01010 apm -> jobMonChanged = false;
01011 }
01012 if (apm -> sysMonChanged) {
01013 if (apm -> sysMonitoring)
01014 nextSysInfoSend = crtTime + apm -> sysMonitorInterval;
01015 else
01016 nextSysInfoSend = -1;
01017 apm -> sysMonChanged = false;
01018 }
01019 if (apm -> recheckChanged) {
01020 if (apm -> confCheck) {
01021 nextRecheck = crtTime + apm -> crtRecheckInterval;
01022 }
01023 else
01024 nextRecheck = -1;
01025 apm -> recheckChanged = false;
01026 }
01027 pthread_mutex_unlock(&(apm -> mutexBack));
01028
01029 if (haveChange) {
01030 pthread_mutex_unlock(&(apm -> mutexCond));
01031 continue;
01032 }
01033
01034
01035
01036 #ifndef WIN32
01037 ret = pthread_cond_timedwait(&(apm -> confChangedCond),
01038 &(apm -> mutexCond), &delay);
01039 pthread_mutex_unlock(&(apm -> mutexCond));
01040 #else
01041 pthread_mutex_unlock(&(apm -> mutexCond));
01042 ret = WaitForSingleObject(apm->confChangedCond, delay);
01043 #endif
01044 if (ret == ETIMEDOUT) {
01045
01046
01047 if (nextOp == JOB_INFO_SEND) {
01048 apm -> sendJobInfo();
01049 crtTime = time(NULL);
01050 nextJobInfoSend = crtTime + apm -> getJobMonitorInterval();
01051 }
01052
01053 if (nextOp == SYS_INFO_SEND) {
01054 apm -> sendSysInfo();
01055 if (apm -> getGenMonitoring()) {
01056 if (generalInfoCount <= 1)
01057 apm -> sendGeneralInfo();
01058 generalInfoCount = (generalInfoCount + 1) % apm -> genMonitorIntervals;
01059 }
01060 crtTime = time(NULL);
01061 nextSysInfoSend = crtTime + apm -> getSysMonitorInterval();
01062 }
01063
01064 if (nextOp == RECHECK_CONF) {
01065
01066 resourceChanged = false;
01067 try {
01068 if (apm -> initType == FILE_INIT) {
01069 sprintf(logmsg, "Checking for modifications for file %s ",
01070 apm -> initSources[0]);
01071 logger(INFO, logmsg);
01072 stat(apm -> initSources[0], &st);
01073 if (st.st_mtime > apm -> lastModifFile) {
01074 sprintf(logmsg, "File %s modified ", apm -> initSources[0]);
01075 logger(INFO, logmsg);
01076 resourceChanged = true;
01077 }
01078 }
01079
01080
01081 for (i = 0; i < apm -> confURLs.nConfURLs; i++) {
01082 sprintf(logmsg, "[Checking for modifications for URL %s ] ",
01083 apm -> confURLs.vURLs[i]);
01084 logger(INFO, logmsg);
01085 if (urlModified(apm -> confURLs.vURLs[i], apm -> confURLs.lastModifURLs[i])) {
01086 sprintf(logmsg, "URL %s modified ", apm -> confURLs.vURLs[i]);
01087 logger(INFO, logmsg);
01088 resourceChanged = true;
01089 break;
01090 }
01091 }
01092
01093 if (resourceChanged) {
01094 logger(INFO, "Reloading configuration...");
01095 if (apm -> initType == FILE_INIT)
01096 apm -> initialize(apm -> initSources[0], false);
01097 else
01098 apm -> initialize(apm -> nInitSources, apm -> initSources, false);
01099 }
01100 apm -> setCrtRecheckInterval(apm -> getRecheckInterval());
01101 } catch (runtime_error &err) {
01102 logger(WARNING, err.what());
01103 logger(WARNING, "Increasing the time interval for reloading the configuration...");
01104 apm -> setCrtRecheckInterval(apm -> getRecheckInterval() * 5);
01105 }
01106 crtTime = time(NULL);
01107 nextRecheck = crtTime + apm -> getCrtRecheckInterval();
01108
01109 }
01110 }
01111
01112 }
01113
01114 #ifndef WIN32
01115 return NULL;
01116 #else
01117 return 0;
01118 #endif
01119 }