![]() |
|
|
Generated: 18 Jul 2008 |
The ApMon class can be used for sending monitoring data to one or more destination hosts that run MonALISA.
Definition in file ApMon.h.
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdexcept>
#include <ctype.h>
#include <time.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/utsname.h>
#include <sys/time.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <net/if.h>
#include <rpc/rpc.h>
#include <netdb.h>
#include <unistd.h>
#include <pthread.h>
#include <pwd.h>
#include <grp.h>
#include <linux/param.h>
Include dependency graph for ApMon.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.
Classes | |
| struct | ConfURLs |
| Data structure which holds the configuration URLs. More... | |
| struct | MonitoredJob |
| Data structure which holds information about a job monitored by ApMon. More... | |
| class | ApMon |
| Data structure used for sending monitoring data to a MonaLisa module. More... | |
Defines | |
| #define | XDR_STRING 0 |
| Used to code the string data type. | |
| #define | XDR_INT32 2 |
| Used to code the 4 bytes integer data type. | |
| #define | XDR_REAL32 4 |
| Used to code the 4 bytes real data type. | |
| #define | XDR_REAL64 5 |
| Used to code the 8 bytes real data type. | |
| #define | MAX_DGRAM_SIZE 8192 |
| Maximum UDP datagram size. | |
| #define | MAX_STRING_LEN 512 |
| Maximum string length (for hostnames). | |
| #define | MAX_STRING_LEN1 (MAX_STRING_LEN + 1) |
| #define | RET_SUCCESS 0 |
| Function return value (succes). | |
| #define | RET_ERROR -1 |
| Function return value (error). | |
| #define | PROCUTILS_ERROR -2 |
| #define | RET_NOT_SENT -3 |
| A datagram was not sent because the number of datagrams that can be sent per second is limited. | |
| #define | MAX_N_DESTINATIONS 30 |
| Maximum number of destinations hosts to which we send the parameters. | |
| #define | DEFAULT_PORT 8884 |
| The default port on which MonALISa listens. | |
| #define | MAX_HEADER_LENGTH 40 |
| Maximum header length. | |
| #define | FILE_INIT 1 |
| Indicates that the object was initialized from a file. | |
| #define | LIST_INIT 2 |
| Indicates that the object was initialized from a list. | |
| #define | DIRECT_INIT 3 |
| Indicates that the object was initialized directly. | |
| #define | JOB_MONITOR_INTERVAL 20 |
| Time interval (in sec) at which job monitoring datagrams are sent. | |
| #define | SYS_MONITOR_INTERVAL 20 |
| Time interval (in sec) at which system monitoring datagams are sent. | |
| #define | RECHECK_INTERVAL 600 |
| Time interval (in sec) at which the configuration files are checked for changes. | |
| #define | GEN_MONITOR_INTERVALS 10 |
| The number of time intervals at which ApMon sends general system monitoring information (considering the time intervals at which ApMon sends system monitoring information). | |
| #define | MAX_MONITORED_JOBS 30 |
| The maximum number of jobs that can be monitored. | |
| #define | MAX_SYS_PARAMS 30 |
| The maximum number of system parameters. | |
| #define | MAX_GEN_PARAMS 30 |
| The maximum number of general system parameters. | |
| #define | MAX_JOB_PARAMS 30 |
| The maximum number of job parameters. | |
| #define | MAX_MSG_RATE 20 |
| The maxim number of mesages per second that will be sent to MonALISA. | |
| #define | NLETTERS 26 |
| #define | TWO_BILLION 2000000000 |
| #define | APMON_VERSION "2.2.1" |
Functions | |
| void * | bkTask (void *param) |
| This function is executed in a background thread and has two roles: it automatically sends the system/job monitoring parameters (if the user requested) and it checks the configuration file/URLs for changes. | |
| #define APMON_VERSION "2.2.1" |
| #define DEFAULT_PORT 8884 |
The default port on which MonALISa listens.
Definition at line 114 of file ApMon.h.
Referenced by ApMon::addToDestinations().
| #define DIRECT_INIT 3 |
Indicates that the object was initialized directly.
Definition at line 126 of file ApMon.h.
Referenced by ApMon::setConfRecheck().
| #define FILE_INIT 1 |
Indicates that the object was initialized from a file.
Definition at line 118 of file ApMon.h.
Referenced by ApMon::ApMon(), and bkTask().
| #define GEN_MONITOR_INTERVALS 10 |
The number of time intervals at which ApMon sends general system monitoring information (considering the time intervals at which ApMon sends system monitoring information).
Definition at line 138 of file ApMon.h.
Referenced by ApMon::setGenMonitoring().
| #define JOB_MONITOR_INTERVAL 20 |
Time interval (in sec) at which job monitoring datagrams are sent.
Definition at line 128 of file ApMon.h.
Referenced by ApMon::initMonitoring(), and ApMon::setJobMonitoring().
| #define LIST_INIT 2 |
Indicates that the object was initialized from a list.
Definition at line 123 of file ApMon.h.
Referenced by ApMon::constructFromList().
| #define MAX_DGRAM_SIZE 8192 |
Maximum UDP datagram size.
Definition at line 102 of file ApMon.h.
Referenced by ApMon::arrayInit(), ApMon::encodeParams(), and ApMon::sendTimedParameters().
| #define MAX_GEN_PARAMS 30 |
| #define MAX_HEADER_LENGTH 40 |
Maximum header length.
Definition at line 115 of file ApMon.h.
Referenced by ApMon::encodeParams(), and ApMon::sendTimedParameters().
| #define MAX_JOB_PARAMS 30 |
| #define MAX_MONITORED_JOBS 30 |
The maximum number of jobs that can be monitored.
Definition at line 140 of file ApMon.h.
Referenced by ApMon::addJobToMonitor(), and ApMon::arrayInit().
| #define MAX_MSG_RATE 20 |
The maxim number of mesages per second that will be sent to MonALISA.
Definition at line 149 of file ApMon.h.
Referenced by ApMon::initMonitoring().
| #define MAX_N_DESTINATIONS 30 |
Maximum number of destinations hosts to which we send the parameters.
Definition at line 111 of file ApMon.h.
Referenced by ApMon::initialize(), and ApMon::parseConf().
| #define MAX_STRING_LEN 512 |
Maximum string length (for hostnames).
Definition at line 103 of file ApMon.h.
Referenced by ApMon::arrayInit(), ProcUtils::countOpenFiles(), ProcUtils::getBootTime(), apmon_mon_utils::getChildren(), ProcUtils::getCPUInfo(), ProcUtils::getCPUUsage(), ApMon::getDestFromWeb(), ProcUtils::getMemUsed(), ProcUtils::getNetInfo(), ProcUtils::getNetworkInterfaces(), ProcUtils::getNumCPUs(), ProcUtils::getSwapPages(), ProcUtils::getSysMem(), apmon_utils::httpRequest(), apmon_utils::isPrivateAddress(), apmon_utils::parse_URL(), ApMon::parseConf(), ApMon::parseXApMonLine(), apmon_mon_utils::readJobInfo(), and apmon_utils::urlModified().
| #define MAX_STRING_LEN1 (MAX_STRING_LEN + 1) |
Definition at line 104 of file ApMon.h.
Referenced by ApMon::parseConf(), apmon_mon_utils::readJobInfo(), and apmon_utils::urlModified().
| #define MAX_SYS_PARAMS 30 |
| #define NLETTERS 26 |
| #define PROCUTILS_ERROR -2 |
Definition at line 107 of file ApMon.h.
Referenced by apmon_mon_utils::readJobInfo(), ApMon::sendGeneralInfo(), ApMon::sendSysInfo(), ApMon::updateGeneralInfo(), and ApMon::updateSysInfo().
| #define RECHECK_INTERVAL 600 |
Time interval (in sec) at which the configuration files are checked for changes.
Definition at line 133 of file ApMon.h.
Referenced by bkTask(), ApMon::initMonitoring(), and ApMon::setConfRecheck().
| #define RET_ERROR -1 |
Function return value (error).
Definition at line 106 of file ApMon.h.
Referenced by apmon_mon_utils::getChildren(), ProcUtils::getNetstatInfo(), ProcUtils::getProcesses(), apmon_mon_utils::parsePSTime(), apmon_mon_utils::readJobDiskUsage(), apmon_mon_utils::readJobInfo(), ApMon::sendGeneralInfo(), ApMon::sendOneJobInfo(), ApMon::sendSysInfo(), ApMon::sendTimedParameters(), apmon_utils::sizeEval(), ApMon::updateJobInfo(), ApMon::updateSysInfo(), and apmon_utils::xdrSize().
| #define RET_NOT_SENT -3 |
A datagram was not sent because the number of datagrams that can be sent per second is limited.
Definition at line 108 of file ApMon.h.
Referenced by ApMon::sendTimedParameters().
| #define RET_SUCCESS 0 |
Function return value (succes).
Definition at line 105 of file ApMon.h.
Referenced by ApMon::initMonitoring(), ApMon::sendOneJobInfo(), ApMon::sendSysInfo(), and ApMon::sendTimedParameters().
| #define SYS_MONITOR_INTERVAL 20 |
Time interval (in sec) at which system monitoring datagams are sent.
Definition at line 130 of file ApMon.h.
Referenced by ApMon::initMonitoring(), and ApMon::setSysMonitoring().
| #define TWO_BILLION 2000000000 |
| #define XDR_INT32 2 |
Used to code the 4 bytes integer data type.
Definition at line 97 of file ApMon.h.
Referenced by ApMon::encodeParams(), apmon_utils::logParameters(), ApMon::sendParameter(), ApMon::sendTimedParameters(), apmon_utils::sizeEval(), and apmon_utils::xdrSize().
| #define XDR_REAL32 4 |
Used to code the 4 bytes real data type.
Definition at line 99 of file ApMon.h.
Referenced by ApMon::encodeParams(), apmon_utils::logParameters(), ApMon::sendParameter(), apmon_utils::sizeEval(), and apmon_utils::xdrSize().
| #define XDR_REAL64 5 |
Used to code the 8 bytes real data type.
Definition at line 100 of file ApMon.h.
Referenced by ApMon::encodeParams(), apmon_utils::logParameters(), ApMon::sendGeneralInfo(), ApMon::sendOneJobInfo(), ApMon::sendParameter(), ApMon::sendSysInfo(), apmon_utils::sizeEval(), and apmon_utils::xdrSize().
| #define XDR_STRING 0 |
Used to code the string data type.
Definition at line 95 of file ApMon.h.
Referenced by ApMon::encodeParams(), apmon_utils::logParameters(), ApMon::sendGeneralInfo(), ApMon::sendParameter(), ApMon::sendTimedParameters(), apmon_utils::sizeEval(), and apmon_utils::xdrSize().
| void* bkTask | ( | void * | param | ) |
This function is executed in a background thread and has two roles: it automatically sends the system/job monitoring parameters (if the user requested) and it checks the configuration file/URLs for changes.
(this is done in a separate thread).
Definition at line 913 of file ApMon.cpp.
References bkTask(), ApMon::confChangedCond, FILE_INIT, INFO, JOB_INFO_SEND, apmon_utils::logger(), RECHECK_CONF, RECHECK_INTERVAL, SYS_INFO_SEND, apmon_utils::urlModified(), WARNING, and std::runtime_error::what().
Referenced by bkTask().
00913 { 00914 #else 00915 DWORD WINAPI bkTask(void *param) { 00916 #endif 00917 struct stat st; 00918 #ifndef WIN32 00919 struct timespec delay; 00920 #else 00921 DWORD delay; 00922 #endif 00923 bool resourceChanged, haveChange; 00924 int nextOp = -1, i, ret; 00925 int generalInfoCount; 00926 time_t crtTime, timeRemained; 00927 time_t nextRecheck = 0, nextJobInfoSend = 0, nextSysInfoSend = 0; 00928 ApMon *apm = (ApMon *)param; 00929 char logmsg[200]; 00930 00931 logger(INFO, "[Starting background thread...]"); 00932 apm -> bkThreadStarted = true; 00933 00934 crtTime = time(NULL); 00935 00936 pthread_mutex_lock(&(apm -> mutexBack)); 00937 if (apm -> confCheck) { 00938 nextRecheck = crtTime + apm -> crtRecheckInterval; 00939 //sprintf(logmsg, "###1 crt %ld interv %ld recheck %ld ", crtTime, 00940 // apm -> crtRecheckInterval, nextRecheck); 00941 //logger(FINE, logmsg); 00942 //fflush(stdout); 00943 } 00944 if (apm -> jobMonitoring) 00945 nextJobInfoSend = crtTime + apm -> jobMonitorInterval; 00946 if (apm -> sysMonitoring) 00947 nextSysInfoSend = crtTime + apm -> sysMonitorInterval; 00948 pthread_mutex_unlock(&(apm -> mutexBack)); 00949 00950 timeRemained = -1; 00951 generalInfoCount = 0; 00952 00953 while (1) { 00954 pthread_mutex_lock(&apm -> mutexBack); 00955 if (apm -> stopBkThread) { 00956 // printf("### stopBkThread \n"); 00957 pthread_mutex_unlock(&apm -> mutexBack); 00958 break; 00959 } 00960 pthread_mutex_unlock(&apm -> mutexBack); 00961 00962 //sprintf(logmsg, "### 2 recheck %ld sys %ld ", nextRecheck, 00963 // nextSysInfoSend); 00964 //logger(FINE, logmsg); 00965 00966 /* determine the next operation that must be performed */ 00967 if (nextRecheck > 0 && (nextJobInfoSend <= 0 || 00968 nextRecheck <= nextJobInfoSend)) { 00969 if (nextSysInfoSend <= 0 || nextRecheck <= nextSysInfoSend) { 00970 nextOp = RECHECK_CONF; 00971 timeRemained = nextRecheck - crtTime; 00972 } else { 00973 nextOp = SYS_INFO_SEND; 00974 timeRemained = nextSysInfoSend - crtTime; 00975 } 00976 } else { 00977 if (nextJobInfoSend > 0 && (nextSysInfoSend <= 0 || 00978 nextJobInfoSend <= nextSysInfoSend)) { 00979 nextOp = JOB_INFO_SEND; 00980 timeRemained = nextJobInfoSend - crtTime; 00981 } else if (nextSysInfoSend > 0) { 00982 nextOp = SYS_INFO_SEND; 00983 timeRemained = nextSysInfoSend - crtTime; 00984 } 00985 } 00986 00987 if (timeRemained == -1) 00988 timeRemained = RECHECK_INTERVAL; 00989 00990 #ifndef WIN32 00991 /* the moment when the next operation should be performed */ 00992 delay.tv_sec = crtTime + timeRemained; 00993 delay.tv_nsec = 0; 00994 #else 00995 delay = (/*crtTime +*/ timeRemained) * 1000; // this is in millis 00996 #endif 00997 00998 pthread_mutex_lock(&(apm -> mutexBack)); 00999 01000 pthread_mutex_lock(&(apm -> mutexCond)); 01001 /* check for changes in the settings */ 01002 haveChange = false; 01003 if (apm -> jobMonChanged || apm -> sysMonChanged || apm -> recheckChanged) 01004 haveChange = true; 01005 if (apm -> jobMonChanged) { 01006 if (apm -> jobMonitoring) 01007 nextJobInfoSend = crtTime + apm -> jobMonitorInterval; 01008 else 01009 nextJobInfoSend = -1; 01010 apm -> jobMonChanged = false; 01011 } 01012 if (apm -> sysMonChanged) { 01013 if (apm -> sysMonitoring) 01014 nextSysInfoSend = crtTime + apm -> sysMonitorInterval; 01015 else 01016 nextSysInfoSend = -1; 01017 apm -> sysMonChanged = false; 01018 } 01019 if (apm -> recheckChanged) { 01020 if (apm -> confCheck) { 01021 nextRecheck = crtTime + apm -> crtRecheckInterval; 01022 } 01023 else 01024 nextRecheck = -1; 01025 apm -> recheckChanged = false; 01026 } 01027 pthread_mutex_unlock(&(apm -> mutexBack)); 01028 01029 if (haveChange) { 01030 pthread_mutex_unlock(&(apm -> mutexCond)); 01031 continue; 01032 } 01033 01034 /* wait until the next operation should be performed or until 01035 a change in the settings occurs */ 01036 #ifndef WIN32 01037 ret = pthread_cond_timedwait(&(apm -> confChangedCond), 01038 &(apm -> mutexCond), &delay); 01039 pthread_mutex_unlock(&(apm -> mutexCond)); 01040 #else 01041 pthread_mutex_unlock(&(apm -> mutexCond)); 01042 ret = WaitForSingleObject(apm->confChangedCond, delay); 01043 #endif 01044 if (ret == ETIMEDOUT) { 01045 // printf("### ret TIMEDOUT\n"); 01046 /* now perform the operation */ 01047 if (nextOp == JOB_INFO_SEND) { 01048 apm -> sendJobInfo(); 01049 crtTime = time(NULL); 01050 nextJobInfoSend = crtTime + apm -> getJobMonitorInterval(); 01051 } 01052 01053 if (nextOp == SYS_INFO_SEND) { 01054 apm -> sendSysInfo(); 01055 if (apm -> getGenMonitoring()) { 01056 if (generalInfoCount <= 1) 01057 apm -> sendGeneralInfo(); 01058 generalInfoCount = (generalInfoCount + 1) % apm -> genMonitorIntervals; 01059 } 01060 crtTime = time(NULL); 01061 nextSysInfoSend = crtTime + apm -> getSysMonitorInterval(); 01062 } 01063 01064 if (nextOp == RECHECK_CONF) { 01065 //logger(FINE, "### recheck conf"); 01066 resourceChanged = false; 01067 try { 01068 if (apm -> initType == FILE_INIT) { 01069 sprintf(logmsg, "Checking for modifications for file %s ", 01070 apm -> initSources[0]); 01071 logger(INFO, logmsg); 01072 stat(apm -> initSources[0], &st); 01073 if (st.st_mtime > apm -> lastModifFile) { 01074 sprintf(logmsg, "File %s modified ", apm -> initSources[0]); 01075 logger(INFO, logmsg); 01076 resourceChanged = true; 01077 } 01078 } 01079 01080 // check the configuration URLs 01081 for (i = 0; i < apm -> confURLs.nConfURLs; i++) { 01082 sprintf(logmsg, "[Checking for modifications for URL %s ] ", 01083 apm -> confURLs.vURLs[i]); 01084 logger(INFO, logmsg); 01085 if (urlModified(apm -> confURLs.vURLs[i], apm -> confURLs.lastModifURLs[i])) { 01086 sprintf(logmsg, "URL %s modified ", apm -> confURLs.vURLs[i]); 01087 logger(INFO, logmsg); 01088 resourceChanged = true; 01089 break; 01090 } 01091 } 01092 01093 if (resourceChanged) { 01094 logger(INFO, "Reloading configuration..."); 01095 if (apm -> initType == FILE_INIT) 01096 apm -> initialize(apm -> initSources[0], false); 01097 else 01098 apm -> initialize(apm -> nInitSources, apm -> initSources, false); 01099 } 01100 apm -> setCrtRecheckInterval(apm -> getRecheckInterval()); 01101 } catch (runtime_error &err) { 01102 logger(WARNING, err.what()); 01103 logger(WARNING, "Increasing the time interval for reloading the configuration..."); 01104 apm -> setCrtRecheckInterval(apm -> getRecheckInterval() * 5); 01105 } 01106 crtTime = time(NULL); 01107 nextRecheck = crtTime + apm -> getCrtRecheckInterval(); 01108 //sleep(apm -> getCrtRecheckInterval()); 01109 } 01110 } 01111 01112 } // while 01113 01114 #ifndef WIN32 01115 return NULL; // it doesn't matter what we return here 01116 #else 01117 return 0; 01118 #endif 01119 }