Gaudi Framework, version v22r0

Home   Generated: 9 Feb 2011

ApMon.h File Reference

Declarations for the ApMon class. More...

#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdexcept>
#include <ctype.h>
#include <time.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/utsname.h>
#include <sys/time.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <net/if.h>
#include <rpc/rpc.h>
#include <netdb.h>
#include <unistd.h>
#include <pthread.h>
#include <pwd.h>
#include <grp.h>
#include <linux/param.h>
Include dependency graph for ApMon.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  ConfURLs
 Data structure which holds the configuration URLs. More...
struct  MonitoredJob
 Data structure which holds information about a job monitored by ApMon. More...
class  ApMon
 Data structure used for sending monitoring data to a MonaLisa module. More...

Defines

#define XDR_STRING   0
 Used to code the string data type.
#define XDR_INT32   2
 Used to code the 4 bytes integer data type.
#define XDR_REAL32   4
 Used to code the 4 bytes real data type.
#define XDR_REAL64   5
 Used to code the 8 bytes real data type.
#define MAX_DGRAM_SIZE   8192
 Maximum UDP datagram size.
#define MAX_STRING_LEN   512
 Maximum string length (for hostnames).
#define MAX_STRING_LEN1   (MAX_STRING_LEN + 1)
#define RET_SUCCESS   0
 Function return value (succes).
#define RET_ERROR   -1
 Function return value (error).
#define PROCUTILS_ERROR   -2
#define RET_NOT_SENT   -3
 A datagram was not sent because the number of datagrams that can be sent per second is limited.
#define MAX_N_DESTINATIONS   30
 Maximum number of destinations hosts to which we send the parameters.
#define DEFAULT_PORT   8884
 The default port on which MonALISa listens.
#define MAX_HEADER_LENGTH   40
 Maximum header length.
#define FILE_INIT   1
 Indicates that the object was initialized from a file.
#define LIST_INIT   2
 Indicates that the object was initialized from a list.
#define DIRECT_INIT   3
 Indicates that the object was initialized directly.
#define JOB_MONITOR_INTERVAL   20
 Time interval (in sec) at which job monitoring datagrams are sent.
#define SYS_MONITOR_INTERVAL   20
 Time interval (in sec) at which system monitoring datagams are sent.
#define RECHECK_INTERVAL   600
 Time interval (in sec) at which the configuration files are checked for changes.
#define GEN_MONITOR_INTERVALS   10
 The number of time intervals at which ApMon sends general system monitoring information (considering the time intervals at which ApMon sends system monitoring information).
#define MAX_MONITORED_JOBS   30
 The maximum number of jobs that can be monitored.
#define MAX_SYS_PARAMS   30
 The maximum number of system parameters.
#define MAX_GEN_PARAMS   30
 The maximum number of general system parameters.
#define MAX_JOB_PARAMS   30
 The maximum number of job parameters.
#define MAX_MSG_RATE   20
 The maxim number of mesages per second that will be sent to MonALISA.
#define NLETTERS   26
#define TWO_BILLION   2000000000
#define APMON_VERSION   "2.2.1"

Functions

void * bkTask (void *param)
 Performs background actions like rechecking the configuration file and the URLs and sending monitoring information.

Detailed Description

Declarations for the ApMon class.

The ApMon class can be used for sending monitoring data to one or more destination hosts that run MonALISA.

Definition in file ApMon.h.


Define Documentation

#define APMON_VERSION   "2.2.1"

Definition at line 155 of file ApMon.h.

#define DEFAULT_PORT   8884

The default port on which MonALISa listens.

Definition at line 114 of file ApMon.h.

#define DIRECT_INIT   3

Indicates that the object was initialized directly.

Definition at line 126 of file ApMon.h.

#define FILE_INIT   1

Indicates that the object was initialized from a file.

Definition at line 118 of file ApMon.h.

#define GEN_MONITOR_INTERVALS   10

The number of time intervals at which ApMon sends general system monitoring information (considering the time intervals at which ApMon sends system monitoring information).

Definition at line 138 of file ApMon.h.

#define JOB_MONITOR_INTERVAL   20

Time interval (in sec) at which job monitoring datagrams are sent.

Definition at line 128 of file ApMon.h.

#define LIST_INIT   2

Indicates that the object was initialized from a list.

Definition at line 123 of file ApMon.h.

#define MAX_DGRAM_SIZE   8192

Maximum UDP datagram size.

Definition at line 102 of file ApMon.h.

#define MAX_GEN_PARAMS   30

The maximum number of general system parameters.

Definition at line 144 of file ApMon.h.

#define MAX_HEADER_LENGTH   40

Maximum header length.

Definition at line 115 of file ApMon.h.

#define MAX_JOB_PARAMS   30

The maximum number of job parameters.

Definition at line 146 of file ApMon.h.

#define MAX_MONITORED_JOBS   30

The maximum number of jobs that can be monitored.

Definition at line 140 of file ApMon.h.

#define MAX_MSG_RATE   20

The maxim number of mesages per second that will be sent to MonALISA.

Definition at line 149 of file ApMon.h.

#define MAX_N_DESTINATIONS   30

Maximum number of destinations hosts to which we send the parameters.

Definition at line 111 of file ApMon.h.

#define MAX_STRING_LEN   512

Maximum string length (for hostnames).

Definition at line 103 of file ApMon.h.

#define MAX_STRING_LEN1   (MAX_STRING_LEN + 1)

Definition at line 104 of file ApMon.h.

#define MAX_SYS_PARAMS   30

The maximum number of system parameters.

Definition at line 142 of file ApMon.h.

#define NLETTERS   26

Definition at line 151 of file ApMon.h.

#define PROCUTILS_ERROR   -2

Definition at line 107 of file ApMon.h.

#define RECHECK_INTERVAL   600

Time interval (in sec) at which the configuration files are checked for changes.

Definition at line 133 of file ApMon.h.

#define RET_ERROR   -1

Function return value (error).

Definition at line 106 of file ApMon.h.

#define RET_NOT_SENT   -3

A datagram was not sent because the number of datagrams that can be sent per second is limited.

Definition at line 108 of file ApMon.h.

#define RET_SUCCESS   0

Function return value (succes).

Definition at line 105 of file ApMon.h.

#define SYS_MONITOR_INTERVAL   20

Time interval (in sec) at which system monitoring datagams are sent.

Definition at line 130 of file ApMon.h.

#define TWO_BILLION   2000000000

Definition at line 153 of file ApMon.h.

#define XDR_INT32   2

Used to code the 4 bytes integer data type.

Definition at line 97 of file ApMon.h.

#define XDR_REAL32   4

Used to code the 4 bytes real data type.

Definition at line 99 of file ApMon.h.

#define XDR_REAL64   5

Used to code the 8 bytes real data type.

Definition at line 100 of file ApMon.h.

#define XDR_STRING   0

Used to code the string data type.

Definition at line 95 of file ApMon.h.


Function Documentation

void* bkTask ( void *  param  ) 

Performs background actions like rechecking the configuration file and the URLs and sending monitoring information.

This function is executed in a background thread and has two roles: it automatically sends the system/job monitoring parameters (if the user requested) and it checks the configuration file/URLs for changes.

(this is done in a separate thread).

Definition at line 918 of file ApMon.cpp.

00918                           {
00919 #else
00920 DWORD WINAPI bkTask(void *param) {
00921 #endif
00922   struct stat st;
00923 #ifndef WIN32
00924   struct timespec delay;
00925 #else
00926   DWORD delay;
00927 #endif
00928   bool resourceChanged, haveChange;
00929   int nextOp = -1, i, ret;
00930   int generalInfoCount;
00931   time_t crtTime, timeRemained;
00932   time_t nextRecheck = 0, nextJobInfoSend = 0, nextSysInfoSend = 0;
00933   ApMon *apm = (ApMon *)param;
00934   char logmsg[200];
00935 
00936   logger(INFO, "[Starting background thread...]");
00937   apm -> bkThreadStarted = true;
00938 
00939   crtTime = time(NULL);
00940 
00941   pthread_mutex_lock(&(apm -> mutexBack));
00942   if (apm -> confCheck) {
00943     nextRecheck = crtTime + apm -> crtRecheckInterval;
00944     //sprintf(logmsg, "###1 crt %ld interv %ld recheck %ld ", crtTime,
00945     //   apm -> crtRecheckInterval, nextRecheck);
00946     //logger(FINE, logmsg);
00947     //fflush(stdout);
00948   }
00949   if (apm -> jobMonitoring)
00950     nextJobInfoSend = crtTime + apm -> jobMonitorInterval;
00951   if (apm -> sysMonitoring)
00952     nextSysInfoSend = crtTime + apm -> sysMonitorInterval;
00953   pthread_mutex_unlock(&(apm -> mutexBack));
00954 
00955   timeRemained = -1;
00956   generalInfoCount = 0;
00957 
00958   while (1) {
00959     pthread_mutex_lock(&apm -> mutexBack);
00960     if (apm -> stopBkThread) {
00961 //      printf("### stopBkThread \n");
00962       pthread_mutex_unlock(&apm -> mutexBack);
00963       break;
00964     }
00965     pthread_mutex_unlock(&apm -> mutexBack);
00966 
00967     //sprintf(logmsg, "### 2 recheck %ld sys %ld ", nextRecheck,
00968     //    nextSysInfoSend);
00969     //logger(FINE, logmsg);
00970 
00971     /* determine the next operation that must be performed */
00972     if (nextRecheck > 0 && (nextJobInfoSend <= 0 ||
00973                             nextRecheck <= nextJobInfoSend)) {
00974       if (nextSysInfoSend <= 0 || nextRecheck <= nextSysInfoSend) {
00975         nextOp = RECHECK_CONF;
00976         timeRemained = nextRecheck - crtTime;
00977       } else {
00978         nextOp = SYS_INFO_SEND;
00979         timeRemained = nextSysInfoSend - crtTime;
00980       }
00981     } else {
00982       if (nextJobInfoSend > 0 && (nextSysInfoSend <= 0 ||
00983                                   nextJobInfoSend <= nextSysInfoSend)) {
00984         nextOp = JOB_INFO_SEND;
00985         timeRemained = nextJobInfoSend - crtTime;
00986       } else if (nextSysInfoSend > 0) {
00987         nextOp = SYS_INFO_SEND;
00988         timeRemained = nextSysInfoSend - crtTime;
00989       }
00990     }
00991 
00992     if (timeRemained == -1)
00993       timeRemained = RECHECK_INTERVAL;
00994 
00995 #ifndef WIN32
00996     /* the moment when the next operation should be performed */
00997     delay.tv_sec = crtTime + timeRemained;
00998     delay.tv_nsec = 0;
00999 #else
01000     delay = (/*crtTime +*/ timeRemained) * 1000;  // this is in millis
01001 #endif
01002 
01003     pthread_mutex_lock(&(apm -> mutexBack));
01004 
01005     pthread_mutex_lock(&(apm -> mutexCond));
01006     /* check for changes in the settings */
01007     haveChange = false;
01008     if (apm -> jobMonChanged || apm -> sysMonChanged || apm -> recheckChanged)
01009       haveChange = true;
01010     if (apm -> jobMonChanged) {
01011       if (apm -> jobMonitoring)
01012         nextJobInfoSend = crtTime + apm -> jobMonitorInterval;
01013       else
01014         nextJobInfoSend = -1;
01015       apm -> jobMonChanged = false;
01016     }
01017     if (apm -> sysMonChanged) {
01018       if (apm -> sysMonitoring)
01019         nextSysInfoSend = crtTime + apm -> sysMonitorInterval;
01020       else
01021         nextSysInfoSend = -1;
01022       apm -> sysMonChanged = false;
01023     }
01024     if (apm -> recheckChanged) {
01025       if (apm -> confCheck) {
01026         nextRecheck = crtTime + apm -> crtRecheckInterval;
01027       }
01028       else
01029         nextRecheck = -1;
01030       apm -> recheckChanged = false;
01031     }
01032     pthread_mutex_unlock(&(apm -> mutexBack));
01033 
01034     if (haveChange) {
01035       pthread_mutex_unlock(&(apm -> mutexCond));
01036       continue;
01037     }
01038 
01039     /* wait until the next operation should be performed or until
01040        a change in the settings occurs */
01041 #ifndef WIN32
01042     ret = pthread_cond_timedwait(&(apm -> confChangedCond),
01043                                 &(apm -> mutexCond), &delay);
01044     pthread_mutex_unlock(&(apm -> mutexCond));
01045 #else
01046     pthread_mutex_unlock(&(apm -> mutexCond));
01047     ret = WaitForSingleObject(apm->confChangedCond, delay);
01048 #endif
01049     if (ret == ETIMEDOUT) {
01050 //      printf("### ret TIMEDOUT\n");
01051       /* now perform the operation */
01052       if (nextOp == JOB_INFO_SEND) {
01053         apm -> sendJobInfo();
01054         crtTime = time(NULL);
01055         nextJobInfoSend = crtTime + apm -> getJobMonitorInterval();
01056       }
01057 
01058       if (nextOp == SYS_INFO_SEND) {
01059         apm -> sendSysInfo();
01060         if (apm -> getGenMonitoring()) {
01061           if (generalInfoCount <= 1)
01062             apm -> sendGeneralInfo();
01063           generalInfoCount = (generalInfoCount + 1) % apm -> genMonitorIntervals;
01064         }
01065         crtTime = time(NULL);
01066         nextSysInfoSend = crtTime + apm -> getSysMonitorInterval();
01067       }
01068 
01069       if (nextOp == RECHECK_CONF) {
01070         //logger(FINE, "### recheck conf");
01071         resourceChanged = false;
01072         try {
01073           if (apm -> initType == FILE_INIT) {
01074             sprintf(logmsg, "Checking for modifications for file %s ",
01075                     apm -> initSources[0]);
01076             logger(INFO, logmsg);
01077             stat(apm -> initSources[0], &st);
01078             if (st.st_mtime > apm -> lastModifFile) {
01079               sprintf(logmsg, "File %s modified ", apm -> initSources[0]);
01080               logger(INFO, logmsg);
01081               resourceChanged = true;
01082             }
01083           }
01084 
01085           // check the configuration URLs
01086           for (i = 0; i < apm -> confURLs.nConfURLs; i++) {
01087             sprintf(logmsg, "[Checking for modifications for URL %s ] ",
01088                    apm -> confURLs.vURLs[i]);
01089             logger(INFO, logmsg);
01090             if (urlModified(apm -> confURLs.vURLs[i], apm -> confURLs.lastModifURLs[i])) {
01091               sprintf(logmsg, "URL %s modified ", apm -> confURLs.vURLs[i]);
01092               logger(INFO, logmsg);
01093               resourceChanged = true;
01094               break;
01095             }
01096           }
01097 
01098           if (resourceChanged) {
01099             logger(INFO, "Reloading configuration...");
01100             if (apm -> initType == FILE_INIT)
01101               apm -> initialize(apm -> initSources[0], false);
01102             else
01103               apm -> initialize(apm -> nInitSources, apm -> initSources, false);
01104           }
01105           apm -> setCrtRecheckInterval(apm -> getRecheckInterval());
01106         } catch (runtime_error &err) {
01107           logger(WARNING, err.what());
01108           logger(WARNING, "Increasing the time interval for reloading the configuration...");
01109           apm -> setCrtRecheckInterval(apm -> getRecheckInterval() * 5);
01110         }
01111         crtTime = time(NULL);
01112         nextRecheck = crtTime + apm -> getCrtRecheckInterval();
01113         //sleep(apm -> getCrtRecheckInterval());
01114       }
01115     }
01116 
01117   } // while
01118 
01119 #ifndef WIN32
01120   return NULL; // it doesn't matter what we return here
01121 #else
01122   return 0;
01123 #endif
01124 }

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Defines

Generated at Wed Feb 9 16:28:16 2011 for Gaudi Framework, version v22r0 by Doxygen version 1.6.2 written by Dimitri van Heesch, © 1997-2004