ApMon.h File Reference
Declarations for the ApMon class.
More...
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdexcept>
#include <ctype.h>
#include <time.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/utsname.h>
#include <sys/time.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <net/if.h>
#include <rpc/rpc.h>
#include <netdb.h>
#include <unistd.h>
#include <pthread.h>
#include <pwd.h>
#include <grp.h>
#include <linux/param.h>
Go to the source code of this file.
Classes |
| struct | ConfURLs |
| | Data structure which holds the configuration URLs. More...
|
| struct | MonitoredJob |
| | Data structure which holds information about a job monitored by ApMon. More...
|
| class | ApMon |
| | Data structure used for sending monitoring data to a MonaLisa module. More...
|
Defines |
| #define | XDR_STRING 0 |
| | Used to code the string data type.
|
| #define | XDR_INT32 2 |
| | Used to code the 4 bytes integer data type.
|
| #define | XDR_REAL32 4 |
| | Used to code the 4 bytes real data type.
|
| #define | XDR_REAL64 5 |
| | Used to code the 8 bytes real data type.
|
| #define | MAX_DGRAM_SIZE 8192 |
| | Maximum UDP datagram size.
|
| #define | MAX_STRING_LEN 512 |
| | Maximum string length (for hostnames).
|
| #define | MAX_STRING_LEN1 (MAX_STRING_LEN + 1) |
| #define | RET_SUCCESS 0 |
| | Function return value (succes).
|
| #define | RET_ERROR -1 |
| | Function return value (error).
|
| #define | PROCUTILS_ERROR -2 |
| #define | RET_NOT_SENT -3 |
| | A datagram was not sent because the number of datagrams that can be sent per second is limited.
|
| #define | MAX_N_DESTINATIONS 30 |
| | Maximum number of destinations hosts to which we send the parameters.
|
| #define | DEFAULT_PORT 8884 |
| | The default port on which MonALISa listens.
|
| #define | MAX_HEADER_LENGTH 40 |
| | Maximum header length.
|
| #define | FILE_INIT 1 |
| | Indicates that the object was initialized from a file.
|
| #define | LIST_INIT 2 |
| | Indicates that the object was initialized from a list.
|
| #define | DIRECT_INIT 3 |
| | Indicates that the object was initialized directly.
|
| #define | JOB_MONITOR_INTERVAL 20 |
| | Time interval (in sec) at which job monitoring datagrams are sent.
|
| #define | SYS_MONITOR_INTERVAL 20 |
| | Time interval (in sec) at which system monitoring datagams are sent.
|
| #define | RECHECK_INTERVAL 600 |
| | Time interval (in sec) at which the configuration files are checked for changes.
|
| #define | GEN_MONITOR_INTERVALS 10 |
| | The number of time intervals at which ApMon sends general system monitoring information (considering the time intervals at which ApMon sends system monitoring information).
|
| #define | MAX_MONITORED_JOBS 30 |
| | The maximum number of jobs that can be monitored.
|
| #define | MAX_SYS_PARAMS 30 |
| | The maximum number of system parameters.
|
| #define | MAX_GEN_PARAMS 30 |
| | The maximum number of general system parameters.
|
| #define | MAX_JOB_PARAMS 30 |
| | The maximum number of job parameters.
|
| #define | MAX_MSG_RATE 20 |
| | The maxim number of mesages per second that will be sent to MonALISA.
|
| #define | NLETTERS 26 |
| #define | TWO_BILLION 2000000000 |
| #define | APMON_VERSION "2.2.1" |
Functions |
| void * | bkTask (void *param) |
| | Performs background actions like rechecking the configuration file and the URLs and sending monitoring information.
|
Detailed Description
Declarations for the ApMon class.
The ApMon class can be used for sending monitoring data to one or more destination hosts that run MonALISA.
Definition in file ApMon.h.
Define Documentation
| #define APMON_VERSION "2.2.1" |
| #define DEFAULT_PORT 8884 |
The default port on which MonALISa listens.
Definition at line 114 of file ApMon.h.
Indicates that the object was initialized directly.
Definition at line 126 of file ApMon.h.
Indicates that the object was initialized from a file.
Definition at line 118 of file ApMon.h.
| #define GEN_MONITOR_INTERVALS 10 |
The number of time intervals at which ApMon sends general system monitoring information (considering the time intervals at which ApMon sends system monitoring information).
Definition at line 138 of file ApMon.h.
| #define JOB_MONITOR_INTERVAL 20 |
Time interval (in sec) at which job monitoring datagrams are sent.
Definition at line 128 of file ApMon.h.
Indicates that the object was initialized from a list.
Definition at line 123 of file ApMon.h.
| #define MAX_DGRAM_SIZE 8192 |
Maximum UDP datagram size.
Definition at line 102 of file ApMon.h.
| #define MAX_GEN_PARAMS 30 |
The maximum number of general system parameters.
Definition at line 144 of file ApMon.h.
| #define MAX_HEADER_LENGTH 40 |
Maximum header length.
Definition at line 115 of file ApMon.h.
| #define MAX_JOB_PARAMS 30 |
The maximum number of job parameters.
Definition at line 146 of file ApMon.h.
| #define MAX_MONITORED_JOBS 30 |
The maximum number of jobs that can be monitored.
Definition at line 140 of file ApMon.h.
The maxim number of mesages per second that will be sent to MonALISA.
Definition at line 149 of file ApMon.h.
| #define MAX_N_DESTINATIONS 30 |
Maximum number of destinations hosts to which we send the parameters.
Definition at line 111 of file ApMon.h.
| #define MAX_STRING_LEN 512 |
Maximum string length (for hostnames).
Definition at line 103 of file ApMon.h.
| #define MAX_STRING_LEN1 (MAX_STRING_LEN + 1) |
| #define MAX_SYS_PARAMS 30 |
The maximum number of system parameters.
Definition at line 142 of file ApMon.h.
| #define PROCUTILS_ERROR -2 |
| #define RECHECK_INTERVAL 600 |
Time interval (in sec) at which the configuration files are checked for changes.
Definition at line 133 of file ApMon.h.
Function return value (error).
Definition at line 106 of file ApMon.h.
A datagram was not sent because the number of datagrams that can be sent per second is limited.
Definition at line 108 of file ApMon.h.
Function return value (succes).
Definition at line 105 of file ApMon.h.
| #define SYS_MONITOR_INTERVAL 20 |
Time interval (in sec) at which system monitoring datagams are sent.
Definition at line 130 of file ApMon.h.
| #define TWO_BILLION 2000000000 |
Used to code the 4 bytes integer data type.
Definition at line 97 of file ApMon.h.
Used to code the 4 bytes real data type.
Definition at line 99 of file ApMon.h.
Used to code the 8 bytes real data type.
Definition at line 100 of file ApMon.h.
Used to code the string data type.
Definition at line 95 of file ApMon.h.
Function Documentation
| void* bkTask |
( |
void * |
param |
) |
|
Performs background actions like rechecking the configuration file and the URLs and sending monitoring information.
This function is executed in a background thread and has two roles: it automatically sends the system/job monitoring parameters (if the user requested) and it checks the configuration file/URLs for changes.
(this is done in a separate thread).
Definition at line 918 of file ApMon.cpp.
00918 {
00919 #else
00920 DWORD WINAPI bkTask(void *param) {
00921 #endif
00922 struct stat st;
00923 #ifndef WIN32
00924 struct timespec delay;
00925 #else
00926 DWORD delay;
00927 #endif
00928 bool resourceChanged, haveChange;
00929 int nextOp = -1, i, ret;
00930 int generalInfoCount;
00931 time_t crtTime, timeRemained;
00932 time_t nextRecheck = 0, nextJobInfoSend = 0, nextSysInfoSend = 0;
00933 ApMon *apm = (ApMon *)param;
00934 char logmsg[200];
00935
00936 logger(INFO, "[Starting background thread...]");
00937 apm -> bkThreadStarted = true;
00938
00939 crtTime = time(NULL);
00940
00941 pthread_mutex_lock(&(apm -> mutexBack));
00942 if (apm -> confCheck) {
00943 nextRecheck = crtTime + apm -> crtRecheckInterval;
00944
00945
00946
00947
00948 }
00949 if (apm -> jobMonitoring)
00950 nextJobInfoSend = crtTime + apm -> jobMonitorInterval;
00951 if (apm -> sysMonitoring)
00952 nextSysInfoSend = crtTime + apm -> sysMonitorInterval;
00953 pthread_mutex_unlock(&(apm -> mutexBack));
00954
00955 timeRemained = -1;
00956 generalInfoCount = 0;
00957
00958 while (1) {
00959 pthread_mutex_lock(&apm -> mutexBack);
00960 if (apm -> stopBkThread) {
00961
00962 pthread_mutex_unlock(&apm -> mutexBack);
00963 break;
00964 }
00965 pthread_mutex_unlock(&apm -> mutexBack);
00966
00967
00968
00969
00970
00971
00972 if (nextRecheck > 0 && (nextJobInfoSend <= 0 ||
00973 nextRecheck <= nextJobInfoSend)) {
00974 if (nextSysInfoSend <= 0 || nextRecheck <= nextSysInfoSend) {
00975 nextOp = RECHECK_CONF;
00976 timeRemained = nextRecheck - crtTime;
00977 } else {
00978 nextOp = SYS_INFO_SEND;
00979 timeRemained = nextSysInfoSend - crtTime;
00980 }
00981 } else {
00982 if (nextJobInfoSend > 0 && (nextSysInfoSend <= 0 ||
00983 nextJobInfoSend <= nextSysInfoSend)) {
00984 nextOp = JOB_INFO_SEND;
00985 timeRemained = nextJobInfoSend - crtTime;
00986 } else if (nextSysInfoSend > 0) {
00987 nextOp = SYS_INFO_SEND;
00988 timeRemained = nextSysInfoSend - crtTime;
00989 }
00990 }
00991
00992 if (timeRemained == -1)
00993 timeRemained = RECHECK_INTERVAL;
00994
00995 #ifndef WIN32
00996
00997 delay.tv_sec = crtTime + timeRemained;
00998 delay.tv_nsec = 0;
00999 #else
01000 delay = ( timeRemained) * 1000;
01001 #endif
01002
01003 pthread_mutex_lock(&(apm -> mutexBack));
01004
01005 pthread_mutex_lock(&(apm -> mutexCond));
01006
01007 haveChange = false;
01008 if (apm -> jobMonChanged || apm -> sysMonChanged || apm -> recheckChanged)
01009 haveChange = true;
01010 if (apm -> jobMonChanged) {
01011 if (apm -> jobMonitoring)
01012 nextJobInfoSend = crtTime + apm -> jobMonitorInterval;
01013 else
01014 nextJobInfoSend = -1;
01015 apm -> jobMonChanged = false;
01016 }
01017 if (apm -> sysMonChanged) {
01018 if (apm -> sysMonitoring)
01019 nextSysInfoSend = crtTime + apm -> sysMonitorInterval;
01020 else
01021 nextSysInfoSend = -1;
01022 apm -> sysMonChanged = false;
01023 }
01024 if (apm -> recheckChanged) {
01025 if (apm -> confCheck) {
01026 nextRecheck = crtTime + apm -> crtRecheckInterval;
01027 }
01028 else
01029 nextRecheck = -1;
01030 apm -> recheckChanged = false;
01031 }
01032 pthread_mutex_unlock(&(apm -> mutexBack));
01033
01034 if (haveChange) {
01035 pthread_mutex_unlock(&(apm -> mutexCond));
01036 continue;
01037 }
01038
01039
01040
01041 #ifndef WIN32
01042 ret = pthread_cond_timedwait(&(apm -> confChangedCond),
01043 &(apm -> mutexCond), &delay);
01044 pthread_mutex_unlock(&(apm -> mutexCond));
01045 #else
01046 pthread_mutex_unlock(&(apm -> mutexCond));
01047 ret = WaitForSingleObject(apm->confChangedCond, delay);
01048 #endif
01049 if (ret == ETIMEDOUT) {
01050
01051
01052 if (nextOp == JOB_INFO_SEND) {
01053 apm -> sendJobInfo();
01054 crtTime = time(NULL);
01055 nextJobInfoSend = crtTime + apm -> getJobMonitorInterval();
01056 }
01057
01058 if (nextOp == SYS_INFO_SEND) {
01059 apm -> sendSysInfo();
01060 if (apm -> getGenMonitoring()) {
01061 if (generalInfoCount <= 1)
01062 apm -> sendGeneralInfo();
01063 generalInfoCount = (generalInfoCount + 1) % apm -> genMonitorIntervals;
01064 }
01065 crtTime = time(NULL);
01066 nextSysInfoSend = crtTime + apm -> getSysMonitorInterval();
01067 }
01068
01069 if (nextOp == RECHECK_CONF) {
01070
01071 resourceChanged = false;
01072 try {
01073 if (apm -> initType == FILE_INIT) {
01074 sprintf(logmsg, "Checking for modifications for file %s ",
01075 apm -> initSources[0]);
01076 logger(INFO, logmsg);
01077 stat(apm -> initSources[0], &st);
01078 if (st.st_mtime > apm -> lastModifFile) {
01079 sprintf(logmsg, "File %s modified ", apm -> initSources[0]);
01080 logger(INFO, logmsg);
01081 resourceChanged = true;
01082 }
01083 }
01084
01085
01086 for (i = 0; i < apm -> confURLs.nConfURLs; i++) {
01087 sprintf(logmsg, "[Checking for modifications for URL %s ] ",
01088 apm -> confURLs.vURLs[i]);
01089 logger(INFO, logmsg);
01090 if (urlModified(apm -> confURLs.vURLs[i], apm -> confURLs.lastModifURLs[i])) {
01091 sprintf(logmsg, "URL %s modified ", apm -> confURLs.vURLs[i]);
01092 logger(INFO, logmsg);
01093 resourceChanged = true;
01094 break;
01095 }
01096 }
01097
01098 if (resourceChanged) {
01099 logger(INFO, "Reloading configuration...");
01100 if (apm -> initType == FILE_INIT)
01101 apm -> initialize(apm -> initSources[0], false);
01102 else
01103 apm -> initialize(apm -> nInitSources, apm -> initSources, false);
01104 }
01105 apm -> setCrtRecheckInterval(apm -> getRecheckInterval());
01106 } catch (runtime_error &err) {
01107 logger(WARNING, err.what());
01108 logger(WARNING, "Increasing the time interval for reloading the configuration...");
01109 apm -> setCrtRecheckInterval(apm -> getRecheckInterval() * 5);
01110 }
01111 crtTime = time(NULL);
01112 nextRecheck = crtTime + apm -> getCrtRecheckInterval();
01113
01114 }
01115 }
01116
01117 }
01118
01119 #ifndef WIN32
01120 return NULL;
01121 #else
01122 return 0;
01123 #endif
01124 }