Gaudi Framework, version v22r2

Home   Generated: Tue May 10 2011
Classes | Defines | Typedefs | Functions

ApMon.h File Reference

Declarations for the ApMon class. More...

#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdexcept>
#include <ctype.h>
#include <time.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/utsname.h>
#include <sys/time.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <net/if.h>
#include <rpc/rpc.h>
#include <netdb.h>
#include <unistd.h>
#include <pthread.h>
#include <pwd.h>
#include <grp.h>
#include <linux/param.h>
Include dependency graph for ApMon.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  ConfURLs
 Data structure which holds the configuration URLs. More...
struct  MonitoredJob
 Data structure which holds information about a job monitored by ApMon. More...
class  ApMon
 Data structure used for sending monitoring data to a MonaLisa module. More...

Defines

#define ApMon_h
#define XDR_STRING   0
 Used to code the string data type.
#define XDR_INT32   2
 Used to code the 4 bytes integer data type.
#define XDR_REAL32   4
 Used to code the 4 bytes real data type.
#define XDR_REAL64   5
 Used to code the 8 bytes real data type.
#define MAX_DGRAM_SIZE   8192
 Maximum UDP datagram size.
#define MAX_STRING_LEN   512
 Maximum string length (for hostnames).
#define MAX_STRING_LEN1   (MAX_STRING_LEN + 1)
#define RET_SUCCESS   0
 Function return value (succes).
#define RET_ERROR   -1
 Function return value (error).
#define PROCUTILS_ERROR   -2
#define RET_NOT_SENT   -3
 A datagram was not sent because the number of datagrams that can be sent per second is limited.
#define MAX_N_DESTINATIONS   30
 Maximum number of destinations hosts to which we send the parameters.
#define DEFAULT_PORT   8884
 The default port on which MonALISa listens.
#define MAX_HEADER_LENGTH   40
 Maximum header length.
#define FILE_INIT   1
 Indicates that the object was initialized from a file.
#define LIST_INIT   2
 Indicates that the object was initialized from a list.
#define DIRECT_INIT   3
 Indicates that the object was initialized directly.
#define JOB_MONITOR_INTERVAL   20
 Time interval (in sec) at which job monitoring datagrams are sent.
#define SYS_MONITOR_INTERVAL   20
 Time interval (in sec) at which system monitoring datagams are sent.
#define RECHECK_INTERVAL   600
 Time interval (in sec) at which the configuration files are checked for changes.
#define GEN_MONITOR_INTERVALS   10
 The number of time intervals at which ApMon sends general system monitoring information (considering the time intervals at which ApMon sends system monitoring information).
#define MAX_MONITORED_JOBS   30
 The maximum number of jobs that can be monitored.
#define MAX_SYS_PARAMS   30
 The maximum number of system parameters.
#define MAX_GEN_PARAMS   30
 The maximum number of general system parameters.
#define MAX_JOB_PARAMS   30
 The maximum number of job parameters.
#define MAX_MSG_RATE   20
 The maxim number of mesages per second that will be sent to MonALISA.
#define NLETTERS   26
#define TWO_BILLION   2000000000
#define APMON_VERSION   "2.2.1"

Typedefs

typedef struct ConfURLs ConfURLs
 Data structure which holds the configuration URLs.
typedef struct MonitoredJob MonitoredJob
 Data structure which holds information about a job monitored by ApMon.

Functions

void * bkTask (void *param)
 Performs background actions like rechecking the configuration file and the URLs and sending monitoring information.

Detailed Description

Declarations for the ApMon class.

The ApMon class can be used for sending monitoring data to one or more destination hosts that run MonALISA.

Definition in file ApMon.h.


Define Documentation

#define ApMon_h

Definition at line 49 of file ApMon.h.

#define APMON_VERSION   "2.2.1"

Definition at line 155 of file ApMon.h.

#define DEFAULT_PORT   8884

The default port on which MonALISa listens.

Definition at line 114 of file ApMon.h.

#define DIRECT_INIT   3

Indicates that the object was initialized directly.

Definition at line 126 of file ApMon.h.

#define FILE_INIT   1

Indicates that the object was initialized from a file.

Definition at line 118 of file ApMon.h.

#define GEN_MONITOR_INTERVALS   10

The number of time intervals at which ApMon sends general system monitoring information (considering the time intervals at which ApMon sends system monitoring information).

Definition at line 138 of file ApMon.h.

#define JOB_MONITOR_INTERVAL   20

Time interval (in sec) at which job monitoring datagrams are sent.

Definition at line 128 of file ApMon.h.

#define LIST_INIT   2

Indicates that the object was initialized from a list.

Definition at line 123 of file ApMon.h.

#define MAX_DGRAM_SIZE   8192

Maximum UDP datagram size.

Definition at line 102 of file ApMon.h.

#define MAX_GEN_PARAMS   30

The maximum number of general system parameters.

Definition at line 144 of file ApMon.h.

#define MAX_HEADER_LENGTH   40

Maximum header length.

Definition at line 115 of file ApMon.h.

#define MAX_JOB_PARAMS   30

The maximum number of job parameters.

Definition at line 146 of file ApMon.h.

#define MAX_MONITORED_JOBS   30

The maximum number of jobs that can be monitored.

Definition at line 140 of file ApMon.h.

#define MAX_MSG_RATE   20

The maxim number of mesages per second that will be sent to MonALISA.

Definition at line 149 of file ApMon.h.

#define MAX_N_DESTINATIONS   30

Maximum number of destinations hosts to which we send the parameters.

Definition at line 111 of file ApMon.h.

#define MAX_STRING_LEN   512

Maximum string length (for hostnames).

Definition at line 103 of file ApMon.h.

#define MAX_STRING_LEN1   (MAX_STRING_LEN + 1)

Definition at line 104 of file ApMon.h.

#define MAX_SYS_PARAMS   30

The maximum number of system parameters.

Definition at line 142 of file ApMon.h.

#define NLETTERS   26

Definition at line 151 of file ApMon.h.

#define PROCUTILS_ERROR   -2

Definition at line 107 of file ApMon.h.

#define RECHECK_INTERVAL   600

Time interval (in sec) at which the configuration files are checked for changes.

Definition at line 133 of file ApMon.h.

#define RET_ERROR   -1

Function return value (error).

Definition at line 106 of file ApMon.h.

#define RET_NOT_SENT   -3

A datagram was not sent because the number of datagrams that can be sent per second is limited.

Definition at line 108 of file ApMon.h.

#define RET_SUCCESS   0

Function return value (succes).

Definition at line 105 of file ApMon.h.

#define SYS_MONITOR_INTERVAL   20

Time interval (in sec) at which system monitoring datagams are sent.

Definition at line 130 of file ApMon.h.

#define TWO_BILLION   2000000000

Definition at line 153 of file ApMon.h.

#define XDR_INT32   2

Used to code the 4 bytes integer data type.

Definition at line 97 of file ApMon.h.

#define XDR_REAL32   4

Used to code the 4 bytes real data type.

Definition at line 99 of file ApMon.h.

#define XDR_REAL64   5

Used to code the 8 bytes real data type.

Definition at line 100 of file ApMon.h.

#define XDR_STRING   0

Used to code the string data type.

Definition at line 95 of file ApMon.h.


Typedef Documentation

typedef struct ConfURLs ConfURLs

Data structure which holds the configuration URLs.

typedef struct MonitoredJob MonitoredJob

Data structure which holds information about a job monitored by ApMon.


Function Documentation

void* bkTask ( void *  param )

Performs background actions like rechecking the configuration file and the URLs and sending monitoring information.

This function is executed in a background thread and has two roles: it automatically sends the system/job monitoring parameters (if the user requested) and it checks the configuration file/URLs for changes.

(this is done in a separate thread).

Definition at line 918 of file ApMon.cpp.

                          {
#else
DWORD WINAPI bkTask(void *param) {
#endif
  struct stat st;
#ifndef WIN32
  struct timespec delay;
#else
  DWORD delay;
#endif
  bool resourceChanged, haveChange;
  int nextOp = -1, i, ret;
  int generalInfoCount;
  time_t crtTime, timeRemained;
  time_t nextRecheck = 0, nextJobInfoSend = 0, nextSysInfoSend = 0;
  ApMon *apm = (ApMon *)param;
  char logmsg[200];

  logger(INFO, "[Starting background thread...]");
  apm -> bkThreadStarted = true;

  crtTime = time(NULL);

  pthread_mutex_lock(&(apm -> mutexBack));
  if (apm -> confCheck) {
    nextRecheck = crtTime + apm -> crtRecheckInterval;
    //sprintf(logmsg, "###1 crt %ld interv %ld recheck %ld ", crtTime,
    //   apm -> crtRecheckInterval, nextRecheck);
    //logger(FINE, logmsg);
    //fflush(stdout);
  }
  if (apm -> jobMonitoring)
    nextJobInfoSend = crtTime + apm -> jobMonitorInterval;
  if (apm -> sysMonitoring)
    nextSysInfoSend = crtTime + apm -> sysMonitorInterval;
  pthread_mutex_unlock(&(apm -> mutexBack));

  timeRemained = -1;
  generalInfoCount = 0;

  while (1) {
    pthread_mutex_lock(&apm -> mutexBack);
    if (apm -> stopBkThread) {
//      printf("### stopBkThread \n");
      pthread_mutex_unlock(&apm -> mutexBack);
      break;
    }
    pthread_mutex_unlock(&apm -> mutexBack);

    //sprintf(logmsg, "### 2 recheck %ld sys %ld ", nextRecheck,
    //    nextSysInfoSend);
    //logger(FINE, logmsg);

    /* determine the next operation that must be performed */
    if (nextRecheck > 0 && (nextJobInfoSend <= 0 ||
                            nextRecheck <= nextJobInfoSend)) {
      if (nextSysInfoSend <= 0 || nextRecheck <= nextSysInfoSend) {
        nextOp = RECHECK_CONF;
        timeRemained = nextRecheck - crtTime;
      } else {
        nextOp = SYS_INFO_SEND;
        timeRemained = nextSysInfoSend - crtTime;
      }
    } else {
      if (nextJobInfoSend > 0 && (nextSysInfoSend <= 0 ||
                                  nextJobInfoSend <= nextSysInfoSend)) {
        nextOp = JOB_INFO_SEND;
        timeRemained = nextJobInfoSend - crtTime;
      } else if (nextSysInfoSend > 0) {
        nextOp = SYS_INFO_SEND;
        timeRemained = nextSysInfoSend - crtTime;
      }
    }

    if (timeRemained == -1)
      timeRemained = RECHECK_INTERVAL;

#ifndef WIN32
    /* the moment when the next operation should be performed */
    delay.tv_sec = crtTime + timeRemained;
    delay.tv_nsec = 0;
#else
    delay = (/*crtTime +*/ timeRemained) * 1000;  // this is in millis
#endif

    pthread_mutex_lock(&(apm -> mutexBack));

    pthread_mutex_lock(&(apm -> mutexCond));
    /* check for changes in the settings */
    haveChange = false;
    if (apm -> jobMonChanged || apm -> sysMonChanged || apm -> recheckChanged)
      haveChange = true;
    if (apm -> jobMonChanged) {
      if (apm -> jobMonitoring)
        nextJobInfoSend = crtTime + apm -> jobMonitorInterval;
      else
        nextJobInfoSend = -1;
      apm -> jobMonChanged = false;
    }
    if (apm -> sysMonChanged) {
      if (apm -> sysMonitoring)
        nextSysInfoSend = crtTime + apm -> sysMonitorInterval;
      else
        nextSysInfoSend = -1;
      apm -> sysMonChanged = false;
    }
    if (apm -> recheckChanged) {
      if (apm -> confCheck) {
        nextRecheck = crtTime + apm -> crtRecheckInterval;
      }
      else
        nextRecheck = -1;
      apm -> recheckChanged = false;
    }
    pthread_mutex_unlock(&(apm -> mutexBack));

    if (haveChange) {
      pthread_mutex_unlock(&(apm -> mutexCond));
      continue;
    }

    /* wait until the next operation should be performed or until
       a change in the settings occurs */
#ifndef WIN32
    ret = pthread_cond_timedwait(&(apm -> confChangedCond),
                                &(apm -> mutexCond), &delay);
    pthread_mutex_unlock(&(apm -> mutexCond));
#else
    pthread_mutex_unlock(&(apm -> mutexCond));
    ret = WaitForSingleObject(apm->confChangedCond, delay);
#endif
    if (ret == ETIMEDOUT) {
//      printf("### ret TIMEDOUT\n");
      /* now perform the operation */
      if (nextOp == JOB_INFO_SEND) {
        apm -> sendJobInfo();
        crtTime = time(NULL);
        nextJobInfoSend = crtTime + apm -> getJobMonitorInterval();
      }

      if (nextOp == SYS_INFO_SEND) {
        apm -> sendSysInfo();
        if (apm -> getGenMonitoring()) {
          if (generalInfoCount <= 1)
            apm -> sendGeneralInfo();
          generalInfoCount = (generalInfoCount + 1) % apm -> genMonitorIntervals;
        }
        crtTime = time(NULL);
        nextSysInfoSend = crtTime + apm -> getSysMonitorInterval();
      }

      if (nextOp == RECHECK_CONF) {
        //logger(FINE, "### recheck conf");
        resourceChanged = false;
        try {
          if (apm -> initType == FILE_INIT) {
            sprintf(logmsg, "Checking for modifications for file %s ",
                    apm -> initSources[0]);
            logger(INFO, logmsg);
            stat(apm -> initSources[0], &st);
            if (st.st_mtime > apm -> lastModifFile) {
              sprintf(logmsg, "File %s modified ", apm -> initSources[0]);
              logger(INFO, logmsg);
              resourceChanged = true;
            }
          }

          // check the configuration URLs
          for (i = 0; i < apm -> confURLs.nConfURLs; i++) {
            sprintf(logmsg, "[Checking for modifications for URL %s ] ",
                   apm -> confURLs.vURLs[i]);
            logger(INFO, logmsg);
            if (urlModified(apm -> confURLs.vURLs[i], apm -> confURLs.lastModifURLs[i])) {
              sprintf(logmsg, "URL %s modified ", apm -> confURLs.vURLs[i]);
              logger(INFO, logmsg);
              resourceChanged = true;
              break;
            }
          }

          if (resourceChanged) {
            logger(INFO, "Reloading configuration...");
            if (apm -> initType == FILE_INIT)
              apm -> initialize(apm -> initSources[0], false);
            else
              apm -> initialize(apm -> nInitSources, apm -> initSources, false);
          }
          apm -> setCrtRecheckInterval(apm -> getRecheckInterval());
        } catch (runtime_error &err) {
          logger(WARNING, err.what());
          logger(WARNING, "Increasing the time interval for reloading the configuration...");
          apm -> setCrtRecheckInterval(apm -> getRecheckInterval() * 5);
        }
        crtTime = time(NULL);
        nextRecheck = crtTime + apm -> getCrtRecheckInterval();
        //sleep(apm -> getCrtRecheckInterval());
      }
    }

  } // while

#ifndef WIN32
  return NULL; // it doesn't matter what we return here
#else
  return 0;
#endif
}
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Defines

Generated at Tue May 10 2011 18:54:26 for Gaudi Framework, version v22r2 by Doxygen version 1.7.2 written by Dimitri van Heesch, © 1997-2004