Gaudi Framework, version v21r8

Home   Generated: 17 Mar 2010

ApMon.h File Reference

Declarations for the ApMon class. More...

#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdexcept>
#include <ctype.h>
#include <time.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/utsname.h>
#include <sys/time.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <net/if.h>
#include <rpc/rpc.h>
#include <netdb.h>
#include <unistd.h>
#include <pthread.h>
#include <pwd.h>
#include <grp.h>
#include <linux/param.h>

Include dependency graph for ApMon.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  ConfURLs
 Data structure which holds the configuration URLs. More...
struct  MonitoredJob
 Data structure which holds information about a job monitored by ApMon. More...
class  ApMon
 Data structure used for sending monitoring data to a MonaLisa module. More...

Defines

#define XDR_STRING   0
 Used to code the string data type.
#define XDR_INT32   2
 Used to code the 4 bytes integer data type.
#define XDR_REAL32   4
 Used to code the 4 bytes real data type.
#define XDR_REAL64   5
 Used to code the 8 bytes real data type.
#define MAX_DGRAM_SIZE   8192
 Maximum UDP datagram size.
#define MAX_STRING_LEN   512
 Maximum string length (for hostnames).
#define MAX_STRING_LEN1   (MAX_STRING_LEN + 1)
#define RET_SUCCESS   0
 Function return value (succes).
#define RET_ERROR   -1
 Function return value (error).
#define PROCUTILS_ERROR   -2
#define RET_NOT_SENT   -3
 A datagram was not sent because the number of datagrams that can be sent per second is limited.
#define MAX_N_DESTINATIONS   30
 Maximum number of destinations hosts to which we send the parameters.
#define DEFAULT_PORT   8884
 The default port on which MonALISa listens.
#define MAX_HEADER_LENGTH   40
 Maximum header length.
#define FILE_INIT   1
 Indicates that the object was initialized from a file.
#define LIST_INIT   2
 Indicates that the object was initialized from a list.
#define DIRECT_INIT   3
 Indicates that the object was initialized directly.
#define JOB_MONITOR_INTERVAL   20
 Time interval (in sec) at which job monitoring datagrams are sent.
#define SYS_MONITOR_INTERVAL   20
 Time interval (in sec) at which system monitoring datagams are sent.
#define RECHECK_INTERVAL   600
 Time interval (in sec) at which the configuration files are checked for changes.
#define GEN_MONITOR_INTERVALS   10
 The number of time intervals at which ApMon sends general system monitoring information (considering the time intervals at which ApMon sends system monitoring information).
#define MAX_MONITORED_JOBS   30
 The maximum number of jobs that can be monitored.
#define MAX_SYS_PARAMS   30
 The maximum number of system parameters.
#define MAX_GEN_PARAMS   30
 The maximum number of general system parameters.
#define MAX_JOB_PARAMS   30
 The maximum number of job parameters.
#define MAX_MSG_RATE   20
 The maxim number of mesages per second that will be sent to MonALISA.
#define NLETTERS   26
#define TWO_BILLION   2000000000
#define APMON_VERSION   "2.2.1"

Functions

void * bkTask (void *param)
 Performs background actions like rechecking the configuration file and the URLs and sending monitoring information.


Detailed Description

Declarations for the ApMon class.

The ApMon class can be used for sending monitoring data to one or more destination hosts that run MonALISA.

Definition in file ApMon.h.


Define Documentation

#define APMON_VERSION   "2.2.1"

Definition at line 155 of file ApMon.h.

#define DEFAULT_PORT   8884

The default port on which MonALISa listens.

Definition at line 114 of file ApMon.h.

#define DIRECT_INIT   3

Indicates that the object was initialized directly.

Definition at line 126 of file ApMon.h.

#define FILE_INIT   1

Indicates that the object was initialized from a file.

Definition at line 118 of file ApMon.h.

#define GEN_MONITOR_INTERVALS   10

The number of time intervals at which ApMon sends general system monitoring information (considering the time intervals at which ApMon sends system monitoring information).

Definition at line 138 of file ApMon.h.

#define JOB_MONITOR_INTERVAL   20

Time interval (in sec) at which job monitoring datagrams are sent.

Definition at line 128 of file ApMon.h.

#define LIST_INIT   2

Indicates that the object was initialized from a list.

Definition at line 123 of file ApMon.h.

#define MAX_DGRAM_SIZE   8192

Maximum UDP datagram size.

Definition at line 102 of file ApMon.h.

#define MAX_GEN_PARAMS   30

The maximum number of general system parameters.

Definition at line 144 of file ApMon.h.

#define MAX_HEADER_LENGTH   40

Maximum header length.

Definition at line 115 of file ApMon.h.

#define MAX_JOB_PARAMS   30

The maximum number of job parameters.

Definition at line 146 of file ApMon.h.

#define MAX_MONITORED_JOBS   30

The maximum number of jobs that can be monitored.

Definition at line 140 of file ApMon.h.

#define MAX_MSG_RATE   20

The maxim number of mesages per second that will be sent to MonALISA.

Definition at line 149 of file ApMon.h.

#define MAX_N_DESTINATIONS   30

Maximum number of destinations hosts to which we send the parameters.

Definition at line 111 of file ApMon.h.

#define MAX_STRING_LEN   512

Maximum string length (for hostnames).

Definition at line 103 of file ApMon.h.

#define MAX_STRING_LEN1   (MAX_STRING_LEN + 1)

Definition at line 104 of file ApMon.h.

#define MAX_SYS_PARAMS   30

The maximum number of system parameters.

Definition at line 142 of file ApMon.h.

#define NLETTERS   26

Definition at line 151 of file ApMon.h.

#define PROCUTILS_ERROR   -2

Definition at line 107 of file ApMon.h.

#define RECHECK_INTERVAL   600

Time interval (in sec) at which the configuration files are checked for changes.

Definition at line 133 of file ApMon.h.

#define RET_ERROR   -1

Function return value (error).

Definition at line 106 of file ApMon.h.

#define RET_NOT_SENT   -3

A datagram was not sent because the number of datagrams that can be sent per second is limited.

Definition at line 108 of file ApMon.h.

#define RET_SUCCESS   0

Function return value (succes).

Definition at line 105 of file ApMon.h.

#define SYS_MONITOR_INTERVAL   20

Time interval (in sec) at which system monitoring datagams are sent.

Definition at line 130 of file ApMon.h.

#define TWO_BILLION   2000000000

Definition at line 153 of file ApMon.h.

#define XDR_INT32   2

Used to code the 4 bytes integer data type.

Definition at line 97 of file ApMon.h.

#define XDR_REAL32   4

Used to code the 4 bytes real data type.

Definition at line 99 of file ApMon.h.

#define XDR_REAL64   5

Used to code the 8 bytes real data type.

Definition at line 100 of file ApMon.h.

#define XDR_STRING   0

Used to code the string data type.

Definition at line 95 of file ApMon.h.


Function Documentation

void* bkTask ( void *  param  ) 

Performs background actions like rechecking the configuration file and the URLs and sending monitoring information.

This function is executed in a background thread and has two roles: it automatically sends the system/job monitoring parameters (if the user requested) and it checks the configuration file/URLs for changes.

(this is done in a separate thread).

Definition at line 913 of file ApMon.cpp.

00913                           { 
00914 #else
00915 DWORD WINAPI bkTask(void *param) {
00916 #endif
00917   struct stat st;
00918 #ifndef WIN32
00919   struct timespec delay;
00920 #else
00921   DWORD delay;
00922 #endif
00923   bool resourceChanged, haveChange;
00924   int nextOp = -1, i, ret;
00925   int generalInfoCount;
00926   time_t crtTime, timeRemained;
00927   time_t nextRecheck = 0, nextJobInfoSend = 0, nextSysInfoSend = 0;
00928   ApMon *apm = (ApMon *)param;
00929   char logmsg[200];
00930 
00931   logger(INFO, "[Starting background thread...]");
00932   apm -> bkThreadStarted = true;
00933 
00934   crtTime = time(NULL);
00935 
00936   pthread_mutex_lock(&(apm -> mutexBack));
00937   if (apm -> confCheck) {
00938     nextRecheck = crtTime + apm -> crtRecheckInterval;
00939     //sprintf(logmsg, "###1 crt %ld interv %ld recheck %ld ", crtTime,
00940     //   apm -> crtRecheckInterval, nextRecheck);
00941     //logger(FINE, logmsg);
00942     //fflush(stdout);
00943   }
00944   if (apm -> jobMonitoring)
00945     nextJobInfoSend = crtTime + apm -> jobMonitorInterval;
00946   if (apm -> sysMonitoring)
00947     nextSysInfoSend = crtTime + apm -> sysMonitorInterval;
00948   pthread_mutex_unlock(&(apm -> mutexBack));
00949   
00950   timeRemained = -1;
00951   generalInfoCount = 0;
00952 
00953   while (1) {
00954     pthread_mutex_lock(&apm -> mutexBack);
00955     if (apm -> stopBkThread) {
00956 //      printf("### stopBkThread \n");
00957       pthread_mutex_unlock(&apm -> mutexBack);
00958       break;
00959     }
00960     pthread_mutex_unlock(&apm -> mutexBack);
00961 
00962     //sprintf(logmsg, "### 2 recheck %ld sys %ld ", nextRecheck, 
00963     //    nextSysInfoSend);
00964     //logger(FINE, logmsg);
00965 
00966     /* determine the next operation that must be performed */
00967     if (nextRecheck > 0 && (nextJobInfoSend <= 0 || 
00968                             nextRecheck <= nextJobInfoSend)) {
00969       if (nextSysInfoSend <= 0 || nextRecheck <= nextSysInfoSend) {
00970         nextOp = RECHECK_CONF;
00971         timeRemained = nextRecheck - crtTime;
00972       } else {
00973         nextOp = SYS_INFO_SEND;
00974         timeRemained = nextSysInfoSend - crtTime;
00975       }
00976     } else {
00977       if (nextJobInfoSend > 0 && (nextSysInfoSend <= 0 || 
00978                                   nextJobInfoSend <= nextSysInfoSend)) {
00979         nextOp = JOB_INFO_SEND;
00980         timeRemained = nextJobInfoSend - crtTime;
00981       } else if (nextSysInfoSend > 0) {
00982         nextOp = SYS_INFO_SEND;
00983         timeRemained = nextSysInfoSend - crtTime;
00984       }
00985     }
00986 
00987     if (timeRemained == -1)
00988       timeRemained = RECHECK_INTERVAL;
00989 
00990 #ifndef WIN32
00991     /* the moment when the next operation should be performed */
00992     delay.tv_sec = crtTime + timeRemained;
00993     delay.tv_nsec = 0;
00994 #else
00995     delay = (/*crtTime +*/ timeRemained) * 1000;  // this is in millis
00996 #endif
00997 
00998     pthread_mutex_lock(&(apm -> mutexBack));
00999 
01000     pthread_mutex_lock(&(apm -> mutexCond));
01001     /* check for changes in the settings */
01002     haveChange = false;
01003     if (apm -> jobMonChanged || apm -> sysMonChanged || apm -> recheckChanged)
01004       haveChange = true;
01005     if (apm -> jobMonChanged) {
01006       if (apm -> jobMonitoring) 
01007         nextJobInfoSend = crtTime + apm -> jobMonitorInterval;
01008       else
01009         nextJobInfoSend = -1;
01010       apm -> jobMonChanged = false;
01011     }
01012     if (apm -> sysMonChanged) {
01013       if (apm -> sysMonitoring) 
01014         nextSysInfoSend = crtTime + apm -> sysMonitorInterval;
01015       else
01016         nextSysInfoSend = -1;
01017       apm -> sysMonChanged = false;
01018     }
01019     if (apm -> recheckChanged) {
01020       if (apm -> confCheck) {
01021         nextRecheck = crtTime + apm -> crtRecheckInterval;
01022       }
01023       else
01024         nextRecheck = -1;
01025       apm -> recheckChanged = false;
01026     }
01027     pthread_mutex_unlock(&(apm -> mutexBack));
01028 
01029     if (haveChange) {
01030       pthread_mutex_unlock(&(apm -> mutexCond));
01031       continue;
01032     }
01033     
01034     /* wait until the next operation should be performed or until
01035        a change in the settings occurs */
01036 #ifndef WIN32
01037     ret = pthread_cond_timedwait(&(apm -> confChangedCond), 
01038                                 &(apm -> mutexCond), &delay);
01039     pthread_mutex_unlock(&(apm -> mutexCond));
01040 #else
01041     pthread_mutex_unlock(&(apm -> mutexCond));
01042     ret = WaitForSingleObject(apm->confChangedCond, delay);
01043 #endif
01044     if (ret == ETIMEDOUT) {
01045 //      printf("### ret TIMEDOUT\n");
01046       /* now perform the operation */
01047       if (nextOp == JOB_INFO_SEND) {
01048         apm -> sendJobInfo();
01049         crtTime = time(NULL);
01050         nextJobInfoSend = crtTime + apm -> getJobMonitorInterval();
01051       }
01052       
01053       if (nextOp == SYS_INFO_SEND) {
01054         apm -> sendSysInfo();
01055         if (apm -> getGenMonitoring()) {
01056           if (generalInfoCount <= 1)
01057             apm -> sendGeneralInfo();
01058           generalInfoCount = (generalInfoCount + 1) % apm -> genMonitorIntervals;
01059         }
01060         crtTime = time(NULL);
01061         nextSysInfoSend = crtTime + apm -> getSysMonitorInterval();
01062       }
01063 
01064       if (nextOp == RECHECK_CONF) {
01065         //logger(FINE, "### recheck conf");
01066         resourceChanged = false;
01067         try {
01068           if (apm -> initType == FILE_INIT) {
01069             sprintf(logmsg, "Checking for modifications for file %s ", 
01070                     apm -> initSources[0]);
01071             logger(INFO, logmsg);
01072             stat(apm -> initSources[0], &st);
01073             if (st.st_mtime > apm -> lastModifFile) {
01074               sprintf(logmsg, "File %s modified ", apm -> initSources[0]);
01075               logger(INFO, logmsg);
01076               resourceChanged = true;
01077             }
01078           }
01079 
01080           // check the configuration URLs
01081           for (i = 0; i < apm -> confURLs.nConfURLs; i++) {
01082             sprintf(logmsg, "[Checking for modifications for URL %s ] ", 
01083                    apm -> confURLs.vURLs[i]);
01084             logger(INFO, logmsg);
01085             if (urlModified(apm -> confURLs.vURLs[i], apm -> confURLs.lastModifURLs[i])) {
01086               sprintf(logmsg, "URL %s modified ", apm -> confURLs.vURLs[i]);
01087               logger(INFO, logmsg);
01088               resourceChanged = true;
01089               break;
01090             }
01091           }
01092 
01093           if (resourceChanged) {
01094             logger(INFO, "Reloading configuration...");
01095             if (apm -> initType == FILE_INIT)
01096               apm -> initialize(apm -> initSources[0], false);
01097             else
01098               apm -> initialize(apm -> nInitSources, apm -> initSources, false);
01099           }
01100           apm -> setCrtRecheckInterval(apm -> getRecheckInterval());
01101         } catch (runtime_error &err) {
01102           logger(WARNING, err.what());
01103           logger(WARNING, "Increasing the time interval for reloading the configuration...");
01104           apm -> setCrtRecheckInterval(apm -> getRecheckInterval() * 5);
01105         }
01106         crtTime = time(NULL);
01107         nextRecheck = crtTime + apm -> getCrtRecheckInterval();
01108         //sleep(apm -> getCrtRecheckInterval());
01109       }
01110     }
01111  
01112   } // while
01113 
01114 #ifndef WIN32
01115   return NULL; // it doesn't matter what we return here
01116 #else
01117   return 0;
01118 #endif
01119 }


Generated at Wed Mar 17 18:11:25 2010 for Gaudi Framework, version v21r8 by Doxygen version 1.5.6 written by Dimitri van Heesch, © 1997-2004