![]() |
|
|
Generated: 8 Jan 2009 |
#include <ApMon.h>

The data is packed in UDP datagrams, in XDR format. A datagram has the following structure:
Since v1.6 ApMon has the xApMon extension, which can be configured to send periodically, in a background thread, monitoring information regarding the system and/or some specified jobs.
Definition at line 212 of file ApMon.h.
Public Member Functions | |
| ApMon (char *initsource) throw (runtime_error) | |
| Initializes an ApMon object from a configuration file or URL. | |
| ApMon (int nDestinations, char **destinationsList) throw (runtime_error) | |
| Initializes an ApMon data structure from a vector of strings. | |
| ApMon (int nDestinations, char **destAddresses, int *destPorts, char **destPasswds) throw (runtime_error) | |
| Initializes an ApMon data structure, using arrays instead of a file. | |
| ~ApMon () | |
| ApMon destructor. | |
| int | sendParameter (char *clusterName, char *nodeName, char *paramName, int valueType, char *paramValue) throw (runtime_error) |
| Sends a parameter and its value to the MonALISA module. | |
| int | sendTimedParameter (char *clusterName, char *nodeName, char *paramName, int valueType, char *paramValue, int timestamp) throw (runtime_error) |
| Sends a parameter and its value to the MonALISA module, together with a timestamp. | |
| int | sendParameter (char *clusterName, char *nodeName, char *paramName, int paramValue) throw (runtime_error) |
| Sends an integer parameter and its value to the MonALISA module. | |
| int | sendParameter (char *clusterName, char *nodeName, char *paramName, float paramValue) throw (runtime_error) |
| Sends a parameter of type float and its value to the MonALISA module. | |
| int | sendParameter (char *clusterName, char *nodeName, char *paramName, double paramValue) throw (runtime_error) |
| Sends a parameter of type double and its value to the MonALISA module. | |
| int | sendParameter (char *clusterName, char *nodeName, char *paramName, char *paramValue) throw (runtime_error) |
| Sends a parameter of type string and its value to the MonALISA module. | |
| int | sendParameters (char *clusterName, char *nodeName, int nParams, char **paramNames, int *valueTypes, char **paramValues) throw (runtime_error) |
| Sends a parameter of type string and its value to the MonALISA module. | |
| int | sendTimedParameters (char *clusterName, char *nodeName, int nParams, char **paramNames, int *valueTypes, char **paramValues, int timestamp) throw (runtime_error) |
| Sends a set of parameters and their values to the MonALISA module, together with a timestamp. | |
| bool | getConfCheck () |
| Returns the value of the confCheck flag. | |
| long | getRecheckInterval () |
| Returns the value of the time interval (in seconds) between two recheck operations for the configuration files. | |
| void | setRecheckInterval (long val) |
| Sets the value of the time interval (in seconds) between two recheck operations for the configuration files. | |
| void | setConfRecheck (bool confRecheck, long interval) |
| Enables/disables the periodical check for changes in the configuration files/URLs. | |
| void | setConfRecheck (bool confRecheck) |
| Enables/disables the periodical check for changes in the configuration files/URLs. | |
| void | setJobMonitoring (bool jobMonitoring, long interval) |
| Enables/disables the periodical sending of datagrams with job monitoring information. | |
| void | setJobMonitoring (bool jobMonitoring) |
| Enables/disables the job monitoring. | |
| long | getJobMonitorInterval () |
| Returns the interval at which job monitoring datagrams are sent. | |
| bool | getJobMonitoring () |
| Returns true if the job monitoring is enabled, and false otherwise. | |
| void | setSysMonitoring (bool sysMonitoring, long interval) |
| Enables/disables the periodical sending of datagrams with system monitoring information. | |
| void | setSysMonitoring (bool sysMonitoring) |
| Enables/disables the system monitoring. | |
| long | getSysMonitorInterval () |
| Returns the interval at which system monitoring datagrams are sent. | |
| bool | getSysMonitoring () |
| Returns true if the system monitoring is enabled, and false otherwise. | |
| void | setGenMonitoring (bool genMonitoring, int nIntervals) |
| Enables/disables the periodical sending of datagrams with general system information. | |
| void | setGenMonitoring (bool genMonitoring) |
| Enables/disables the sending of datagrams with general system information. | |
| bool | getGenMonitoring () |
| Returns true if the sending of general system information is enabled and false otherwise. | |
| void | addJobToMonitor (long pid, char *workdir, char *clusterName, char *nodeName) throw (runtime_error) |
| Adds a new job to the list of the jobs monitored by ApMon. | |
| void | removeJobToMonitor (long pid) throw (runtime_error) |
| Removes a job from the list of the jobs monitored by ApMon. | |
| void | setSysMonClusterNode (char *clusterName, char *nodeName) |
| This function is called by the user to set the cluster name and the node name for the system monitoring datagrams. | |
| void | setMaxMsgRate (int maxRate) |
| This sets the maxim number of messages that are send to MonALISA in one second. | |
Static Public Member Functions | |
| static void | setLogLevel (char *newLevel_s) |
| Sets the ApMon logging level. | |
| static void | errExit (char *msg) |
| Displays an error message and exits with -1 as return value. | |
Protected Member Functions | |
| void | initialize (char *filename, bool firstTime) throw (runtime_error) |
| Initializes an ApMon object from a configuration file. | |
| void | constructFromList (int nDestinations, char **destinationsList) throw (runtime_error) |
| Initializes an ApMon object from a list with URLs and destination hosts. | |
| void | initialize (int nDestinations, char **destList, bool firstTime) throw (runtime_error) |
| Initializes an ApMon object from a list with URLs and destination hosts. | |
| void | loadFile (char *filename, int *nDestinations, char **destAddresses, int *destPorts, char **destPasswds) throw (runtime_error) |
| Parses a configuration file which contains addresses, ports and passwords for the destination hosts and puts the results in the vectors given as parameters. | |
| void | arrayInit (int nDestinations, char **destAddresses, int *destPorts, char **destPasswds) throw (runtime_error) |
| Internal function that initializes an ApMon data structure. | |
| void | arrayInit (int nDestinations, char **destAddresses, int *destPorts, char **destPasswds, bool firstTime) throw (runtime_error) |
| Internal function that initializes an ApMon data structure. | |
| void | addToDestinations (char *line, int *nDestinations, char *destAddresses[], int destPorts[], char *destPasswds[]) |
| Parses the string line, which has the form hostname:port, and adds the hostname and the port to the lists given as parameters. | |
| void | getDestFromWeb (char *url, int *nDestinations, char *destAddresses[], int destPorts[], char *destPasswds[], ConfURLs &confURLs) throw (runtime_error) |
| Gets a configuration file from a web location and adds the destination addresses and ports to the lists given as parameters. | |
| void | encodeParams (int nParams, char **paramNames, int *valueTypes, char **paramValues, int timestamp) throw (runtime_error) |
| Encodes in the XDR format the data from a ApMon structure. | |
| void | initMonitoring () |
| Initializes the monitoring configurations and the names of the parameters included in the monitoring datagrams. | |
| void | sendJobInfo () |
| Sends datagrams containing information about the jobs that are currently being monitored. | |
| void | sendOneJobInfo (MonitoredJob job) |
| Sends datagrams with monitoring information about the specified job to all the destination hosts. | |
| void | updateJobInfo (MonitoredJob job) |
| Update the monitoring information regarding the specified job. | |
| void | sendSysInfo () |
| Sends datagrams with system monitoring information to all the destination hosts. | |
| void | updateSysInfo () |
| Update the system monitoring information with new values obtained from the proc/ filesystem. | |
| void | sendGeneralInfo () |
| Sends datagrams with general system monitoring information to all the destination hosts. | |
| void | updateGeneralInfo () |
| Update the general monitoring information. | |
| void | setBackgroundThread (bool val) |
| Sets the value of the confCheck flag. | |
| long | getCrtRecheckInterval () |
| Returns the actual value of the time interval (in seconds) between two recheck operations for the configuration files. | |
| void | setCrtRecheckInterval (long val) |
| void | freeConf () |
| Frees the data structures needed to hold the configuratin settings. | |
| void | parseXApMonLine (char *line) |
| Parses an xApMon line from the configuration file and sets the corresponding parameters in the ApMon object. | |
| void | initSocket () throw (runtime_error) |
| Initializes the UDP socket used to send the datagrams. | |
| void | parseConf (FILE *fp, int *nDestinations, char **destAddresses, int *destPorts, char **destPasswds) throw (runtime_error) |
| Parses the contents of a configuration file. | |
| bool | shouldSend () |
| Decides if the current datagram should be sent (so that the maximum number of datagrams per second is respected in average). | |
Protected Attributes | |
| char * | clusterName |
| The name of the monitored cluster. | |
| char * | nodeName |
| The name of the monitored node. | |
| char * | sysMonCluster |
| The cluster name used when sending system monitoring datagrams. | |
| char * | sysMonNode |
| The node name used when sending system monitoring datagrams. | |
| int | nDestinations |
| The number of destinations to send the results to. | |
| char ** | destAddresses |
| The IP addresses where the results will be sent. | |
| int * | destPorts |
| The ports where the destination hosts listen. | |
| char ** | destPasswds |
| Passwords for the MonALISA hosts. | |
| char * | buf |
| The buffer which holds the message data (encoded in XDR). | |
| int | dgramSize |
| The size of the data inside the datagram (header not included). | |
| int | sockfd |
| Socket descriptor. | |
| bool | confCheck |
| If this flag is true, the configuration file / URLs are periodically rechecked for changes. | |
| int | nInitSources |
| The number of initialization sources. | |
| char ** | initSources |
| The name(s) of the initialization source(s) (file or list). | |
| int | initType |
| long | recheckInterval |
| The configuration file and the URLs are checked for changes at this numer of seconds (this value is requested by the user and will be used if no errors appear when reloading the configuration). | |
| long | crtRecheckInterval |
| If the configuration URLs cannot be reloaded, the interval until the next attempt will be increased. | |
| pthread_t | bkThread |
| Background thread which periodically rechecks the configuration and sends monitoring information. | |
| pthread_mutex_t | mutex |
| Used to protect the general ApMon data structures. | |
| pthread_mutex_t | mutexBack |
| Used to protect the variables needed by the background thread. | |
| pthread_mutex_t | mutexCond |
| Used for the condition variable confChangedCond. | |
| pthread_cond_t | confChangedCond |
| Used to notify changes in the monitoring configuration. | |
| bool | recheckChanged |
| These flags indicate changes in the monitoring configuration. | |
| bool | jobMonChanged |
| bool | sysMonChanged |
| bool | haveBkThread |
| If this flag is true, the background thread is created (but not necessarily started). | |
| bool | bkThreadStarted |
| If this flag is true, the background thread is started. | |
| bool | stopBkThread |
| If this flag is true, there was a request to stop the background thread. | |
| bool | autoDisableMonitoring |
| If this flag is set to true, when the value of a parameter cannot be read from proc/, ApMon will not attempt to include that value in the next datagrams. | |
| bool | sysMonitoring |
| If this flag is true, packets with system information taken from /proc are periodically sent to MonALISA. | |
| bool | jobMonitoring |
| If this flag is true, packets with job information taken from /proc are periodically sent to MonALISA. | |
| bool | genMonitoring |
| If this flag is true, packets with general system information taken from /proc are periodically sent to MonALISA. | |
| long | jobMonitorInterval |
| Job/System monitoring information obtained from /proc is sent at these time intervals. | |
| long | sysMonitorInterval |
| int | genMonitorIntervals |
| General system monitoring information is sent at a time interval equal to genMonitorIntervals * sysMonitorInterval. | |
| int | nSysMonitorParams |
| Number of parameters that can be enabled/disabled by the user in the system/job/general monitoring datagrams. | |
| int | nJobMonitorParams |
| int | nGenMonitorParams |
| char * | sysMonitorParams [MAX_SYS_PARAMS] |
| char * | genMonitorParams [MAX_GEN_PARAMS] |
| char * | jobMonitorParams [MAX_JOB_PARAMS] |
| int | actSysMonitorParams [MAX_SYS_PARAMS] |
| int | actGenMonitorParams [MAX_GEN_PARAMS] |
| int | actJobMonitorParams [MAX_JOB_PARAMS] |
| ConfURLs | confURLs |
| int | nMonJobs |
| The number of jobs that will be monitored. | |
| MonitoredJob * | monJobs |
| Array which holds information about the jobs to be monitored. | |
| long | lastModifFile |
| The last time when the configuration file was modified. | |
| time_t | lastJobInfoSend |
| char | username [MAX_STRING_LEN] |
| The name of the user who owns this process. | |
| char | groupname [MAX_STRING_LEN] |
| The group to which the user belongs. | |
| char | myHostname [MAX_STRING_LEN] |
| The name of the host on which ApMon currently runs. | |
| char | myIP [MAX_STRING_LEN] |
| The main IP address of the host on which ApMon currently runs. | |
| int | numIPs |
| The number of IP addresses of the host. | |
| char | allMyIPs [20][20] |
| A list with all the IP addresses of the host. | |
| int | numCPUs |
| The number of CPUs on the machine that runs ApMon. | |
| bool | sysInfo_first |
| time_t | lastSysInfoSend |
| The moment when the last system monitoring datagram was sent. | |
| double | lastSysVals [MAX_SYS_PARAMS] |
| double | currentSysVals [MAX_SYS_PARAMS] |
| int | sysRetResults [MAX_SYS_PARAMS] |
| double | currentJobVals [MAX_JOB_PARAMS] |
| int | jobRetResults [MAX_JOB_PARAMS] |
| double | currentGenVals [MAX_GEN_PARAMS] |
| int | genRetResults [MAX_GEN_PARAMS] |
| double | currentProcessStates [NLETTERS] |
| char | cpuVendor [100] |
| char | cpuFamily [100] |
| char | cpuModel [100] |
| char | cpuModelName [200] |
| char | interfaceNames [20][20] |
| The names of the network interfaces. | |
| int | nInterfaces |
| The number of network interfaces. | |
| double | lastBytesSent [20] |
| The total number of bytes sent through each interface, when the previous system monitoring datagram was sent. | |
| double | lastBytesReceived [20] |
| double | lastNetErrs [20] |
| The total number of network errors for each interface, when the previous system monitoring datagram was sent. | |
| double * | currentNetIn |
| The current values for the net_in, net_out, net_errs parameters. | |
| double * | currentNetOut |
| double * | currentNetErrs |
| double | currentNSockets [4] |
| The number of open TCP, UDP, ICM and Unix sockets. | |
| double | currentSocketsTCP [20] |
| The number of TCP sockets in each possible state (ESTABLISHED, LISTEN, . | |
| char * | socketStatesMapTCP [20] |
| Table that associates the names of the TCP sockets states with the symbolic constants. | |
| int | maxMsgRate |
| long | prvTime |
| double | prvSent |
| double | prvDrop |
| long | crtTime |
| long | crtSent |
| long | crtDrop |
| double | hWeight |
| int | instance_id |
| Random number that identifies this instance of ApMon. | |
| int | seq_nr |
| Sequence number for the packets that are sent to MonALISA. | |
Friends | |
| class | ProcUtils |
| void * | bkTask (void *param) |
| This function is executed in a background thread and has two roles: it automatically sends the system/job monitoring parameters (if the user requested) and it checks the configuration file/URLs for changes. | |
| ApMon::ApMon | ( | char * | initsource | ) | throw (runtime_error) |
Initializes an ApMon object from a configuration file or URL.
| filename | The name of the file/URL which contains the addresses and the ports of the destination hosts, and also the passwords (see README for details about the structure of this file). |
Definition at line 56 of file ApMon.cpp.
00057 { 00058 00059 if (initsource == NULL) 00060 throw runtime_error("[ ApMon() ] No conf file/URL provided"); 00061 00062 if (strstr(initsource, "http://") == initsource) { 00063 char *destList[1]; 00064 destList[0] = initsource; 00065 constructFromList(1, destList); 00066 } else { 00067 nInitSources = 1; 00068 initType = FILE_INIT; 00069 initSources = (char **)malloc(nInitSources * sizeof(char *)); 00070 if (initSources == NULL) 00071 throw runtime_error("[ ApMon() ] Error allocating memory."); 00072 00073 initSources[0] = strdup(initsource); 00074 initMonitoring(); 00075 00076 initialize(initsource, true); 00077 } 00078 }
| ApMon::ApMon | ( | int | nDestinations, | |
| char ** | destinationsList | |||
| ) | throw (runtime_error) |
Initializes an ApMon data structure from a vector of strings.
The strings can be of the form hostname[:port] [passwd] or can be URLs from where the hostnames are to be read.
Definition at line 137 of file ApMon.cpp.
00137 { 00138 constructFromList(nDestinations, destinationsList); 00139 }
| ApMon::ApMon | ( | int | nDestinations, | |
| char ** | destAddresses, | |||
| int * | destPorts, | |||
| char ** | destPasswds | |||
| ) | throw (runtime_error) |
Initializes an ApMon data structure, using arrays instead of a file.
| nDestinations | The number of destination hosts where the results will be sent. | |
| destAddresses | Array that contains the hostnames or IP addresses of the destination hosts. | |
| destPorts | The ports where the MonaLisa modules listen on the destination hosts. | |
| destPasswds | The passwords for the MonALISA hosts. |
Definition at line 352 of file ApMon.cpp.
00354 { 00355 initMonitoring(); 00356 00357 arrayInit(nDestinations, destAddresses, destPorts, destPasswds); 00358 }
| ApMon::~ApMon | ( | ) |
ApMon destructor.
Definition at line 563 of file ApMon.cpp.
00563 { 00564 int i; 00565 00566 if (bkThreadStarted) { 00567 if (getJobMonitoring()) { 00568 /* send a datagram with job monitoring information which covers 00569 the last time interval */ 00570 sendJobInfo(); 00571 } 00572 } 00573 00574 pthread_mutex_lock(&mutexBack); 00575 setBackgroundThread(false); 00576 pthread_mutex_unlock(&mutexBack); 00577 00578 pthread_mutex_destroy(&mutex); 00579 pthread_mutex_destroy(&mutexBack); 00580 pthread_mutex_destroy(&mutexCond); 00581 pthread_cond_destroy(&confChangedCond); 00582 00583 free(clusterName); 00584 free(nodeName); 00585 free(sysMonCluster); free(sysMonNode); 00586 00587 freeConf(); 00588 00589 free(monJobs); 00590 for (i = 0; i < nInitSources; i++) { 00591 free(initSources[i]); 00592 } 00593 free(initSources); 00594 00595 free(buf); 00596 #ifndef WIN32 00597 close(sockfd); 00598 #else 00599 closesocket(sockfd); 00600 WSACleanup(); 00601 #endif 00602 }
| int ApMon::sendParameter | ( | char * | clusterName, | |
| char * | nodeName, | |||
| char * | paramName, | |||
| int | valueType, | |||
| char * | paramValue | |||
| ) | throw (runtime_error) |
Sends a parameter and its value to the MonALISA module.
| clusterName | The name of the cluster that is monitored. If it is NULL, we keep the same cluster and node name as in the previous datagram. | |
| nodeName | The name of the node from the cluster from which the value was taken. | |
| paramName | The name of the parameter. | |
| valueType | The value type of the parameter. Can be one of the constants XDR_INT32 (integer), XDR_REAL32 (float), XDR_REAL64 (double), XDR_STRING (null-terminated string). | |
| paramValue | Pointer to the value of the parameter. |
Definition at line 759 of file ApMon.cpp.
00761 { 00762 00763 return sendParameters(clusterName, nodeName, 1, ¶mName, 00764 &valueType, ¶mValue); 00765 }
| int ApMon::sendTimedParameter | ( | char * | clusterName, | |
| char * | nodeName, | |||
| char * | paramName, | |||
| int | valueType, | |||
| char * | paramValue, | |||
| int | timestamp | |||
| ) | throw (runtime_error) |
Sends a parameter and its value to the MonALISA module, together with a timestamp.
| clusterName | The name of the cluster that is monitored. If it is NULL, we keep the same cluster and node name as in the previous datagram. | |
| nodeName | The name of the node from the cluster from which the value was taken. | |
| paramName | The name of the parameter. | |
| valueType | The value type of the parameter. Can be one of the constants XDR_INT32 (integer), XDR_REAL32 (float), XDR_REAL64 (double), XDR_STRING (null-terminated string). | |
| paramValue | Pointer to the value of the parameter. | |
| timestamp | The associated timestamp (in seconds). |
Definition at line 767 of file ApMon.cpp.
00769 { 00770 00771 return sendTimedParameters(clusterName, nodeName, 1, ¶mName, 00772 &valueType, ¶mValue, timestamp); 00773 }
| int ApMon::sendParameter | ( | char * | clusterName, | |
| char * | nodeName, | |||
| char * | paramName, | |||
| int | paramValue | |||
| ) | throw (runtime_error) |
Sends an integer parameter and its value to the MonALISA module.
| clusterName | The name of the cluster that is monitored. If it is NULL, we keep the same cluster and node name as in the previous datagram. | |
| nodeName | The name of the node from the cluster from which the value was taken. | |
| paramName | The name of the parameter. | |
| paramValue | The value of the parameter. |
Definition at line 775 of file ApMon.cpp.
00776 { 00777 00778 return sendParameter(clusterName, nodeName, paramName, XDR_INT32, 00779 (char *)¶mValue); 00780 }
| int ApMon::sendParameter | ( | char * | clusterName, | |
| char * | nodeName, | |||
| char * | paramName, | |||
| float | paramValue | |||
| ) | throw (runtime_error) |
Sends a parameter of type float and its value to the MonALISA module.
| clusterName | The name of the cluster that is monitored. If it is NULL, we keep the same cluster and node name as in the previous datagram. | |
| nodeName | The name of the node from the cluster from which the value was taken. | |
| paramName | The name of the parameter. | |
| paramValue | The value of the parameter. |
Definition at line 782 of file ApMon.cpp.
00783 { 00784 00785 return sendParameter(clusterName, nodeName, paramName, XDR_REAL32, 00786 (char *)¶mValue); 00787 }
| int ApMon::sendParameter | ( | char * | clusterName, | |
| char * | nodeName, | |||
| char * | paramName, | |||
| double | paramValue | |||
| ) | throw (runtime_error) |
Sends a parameter of type double and its value to the MonALISA module.
| clusterName | The name of the cluster that is monitored. If it is NULL,we keep the same cluster and node name as in the previous datagram. | |
| nodeName | The name of the node from the cluster from which the value was taken. | |
| paramName | The name of the parameter. | |
| paramValue | The value of the parameter. |
Definition at line 789 of file ApMon.cpp.
00790 { 00791 00792 return sendParameter(clusterName, nodeName, paramName, XDR_REAL64, 00793 (char *)¶mValue); 00794 }
| int ApMon::sendParameter | ( | char * | clusterName, | |
| char * | nodeName, | |||
| char * | paramName, | |||
| char * | paramValue | |||
| ) | throw (runtime_error) |
Sends a parameter of type string and its value to the MonALISA module.
| clusterName | The name of the cluster that is monitored. If it is NULL, we keep the same cluster and node name as in the previous datagram. | |
| nodeName | The name of the node from the cluster from which the value was taken. | |
| paramName | The name of the parameter. | |
| paramValue | The value of the parameter. |
Definition at line 796 of file ApMon.cpp.
00797 { 00798 00799 return sendParameter(clusterName, nodeName, paramName, XDR_STRING, 00800 paramValue); 00801 }
| int ApMon::sendParameters | ( | char * | clusterName, | |
| char * | nodeName, | |||
| int | nParams, | |||
| char ** | paramNames, | |||
| int * | valueTypes, | |||
| char ** | paramValues | |||
| ) | throw (runtime_error) |
Sends a parameter of type string and its value to the MonALISA module.
| clusterName | The name of the cluster that is monitored.If it is NULL, we keep the same cluster and node name as in the previous datagram. | |
| nodeName | The name of the node from the cluster from which the value was taken. | |
| paramName | The name of the parameter. | |
| paramValue | The value of the parameter. |
Definition at line 616 of file ApMon.cpp.
00618 { 00619 return sendTimedParameters(clusterName, nodeName, nParams, 00620 paramNames, valueTypes, paramValues, -1); 00621 }
| int ApMon::sendTimedParameters | ( | char * | clusterName, | |
| char * | nodeName, | |||
| int | nParams, | |||
| char ** | paramNames, | |||
| int * | valueTypes, | |||
| char ** | paramValues, | |||
| int | timestamp | |||
| ) | throw (runtime_error) |
Sends a set of parameters and their values to the MonALISA module, together with a timestamp.
| clusterName | The name of the cluster that is monitored. If it is NULL, we keep the same cluster and node name as in the previous datagram. | |
| nodeName | The name of the node from the cluster from which the value was taken. | |
| nParams | The number of parameters to be sent. | |
| paramNames | Array with the parameter names. | |
| valueTypes | Array with the value types represented as integers. | |
| paramValue | Array with the parameter values. | |
| timestamp | The timestamp (in seconds) associated with the data. |
Definition at line 623 of file ApMon.cpp.
00625 { 00626 int i; 00627 int ret, ret1, ret2; 00628 char msg[100], buf2[MAX_HEADER_LENGTH+4], newBuf[MAX_DGRAM_SIZE]; 00629 #ifdef WIN32 00630 char crtAddr[20]; 00631 #endif 00632 char *headerTmp; 00633 char header[MAX_HEADER_LENGTH] = "v:"; 00634 strcat(header, APMON_VERSION); 00635 strcat(header, "_cpp"); // to indicate this is the C++ version 00636 strcat(header, "p:"); 00637 00638 pthread_mutex_lock(&mutex); 00639 00640 if(!shouldSend()) { 00641 pthread_mutex_unlock(&mutex); 00642 return RET_NOT_SENT; 00643 } 00644 00645 if (clusterName != NULL) { // don't keep the cached values for cluster name 00646 // and node name 00647 free(this -> clusterName); 00648 this -> clusterName = strdup(clusterName); 00649 00650 if (nodeName != NULL) { /* the user provided a name */ 00651 free(this -> nodeName); 00652 this -> nodeName = strdup(nodeName); 00653 } 00654 else { /* set the node name to the node's IP */ 00655 free(this -> nodeName); 00656 this -> nodeName = strdup(this -> myHostname); 00657 } // else 00658 } // if 00659 00660 if (this -> clusterName == NULL || this -> nodeName == NULL) { 00661 pthread_mutex_unlock(&mutex); 00662 throw runtime_error("[ sendTimedParameters() ] Null cluster name or node name"); 00663 } 00664 00665 //sortParams(nParams, paramNames, valueTypes, paramValues); 00666 00667 /* try to encode the parameters */ 00668 try { 00669 encodeParams(nParams, paramNames, valueTypes, paramValues, timestamp); 00670 } catch (runtime_error& err) { 00671 pthread_mutex_unlock(&mutex); 00672 throw err; 00673 } 00674 00675 headerTmp = (char *)malloc(MAX_HEADER_LENGTH * sizeof(char)); 00676 /* for each destination */ 00677 for (i = 0; i < nDestinations; i++) { 00678 XDR xdrs; 00679 struct sockaddr_in destAddr; 00680 00681 /* initialize the destination address */ 00682 memset(&destAddr, 0, sizeof(destAddr)); 00683 destAddr.sin_family = AF_INET; 00684 destAddr.sin_port = htons(destPorts[i]); 00685 #ifndef WIN32 00686 inet_pton(AF_INET, destAddresses[i], &destAddr.sin_addr); 00687 #else 00688 int dummy = sizeof(destAddr); 00689 sprintf(crtAddr, "%s:%d", destAddresses[i], destPorts[i]); 00690 ret = WSAStringToAddress(crtAddr, AF_INET, NULL, (struct sockaddr *) &destAddr, &dummy); 00691 if(ret){ 00692 ret = WSAGetLastError(); 00693 sprintf(msg, "[ sendTimedParameters() ] Error packing address %s, code %d ", crtAddr, ret); 00694 throw runtime_error(msg); 00695 } 00696 #endif 00697 /* add the header (which is different for each destination) */ 00698 00699 strcpy(headerTmp, header); 00700 strcat(headerTmp, destPasswds[i]); 00701 00702 /* initialize the XDR stream to encode the header */ 00703 xdrmem_create(&xdrs, buf2, MAX_HEADER_LENGTH, XDR_ENCODE); 00704 00705 /* encode the header */ 00706 ret = xdr_string(&xdrs, &(headerTmp), strlen(headerTmp) + 1); 00707 /* add the instance ID and the sequence number */ 00708 ret1 = xdr_int(&xdrs, &(instance_id)); 00709 ret2 = xdr_int(&xdrs, &(seq_nr)); 00710 00711 if (!ret || !ret1 || !ret2) { 00712 free(headerTmp); 00713 pthread_mutex_unlock(&mutex); 00714 throw runtime_error("[ sendTimedParameters() ] XDR encoding error for the header"); 00715 } 00716 00717 /* concatenate the header and the rest of the datagram */ 00718 int buf2Length = xdrSize(XDR_STRING, headerTmp) + 2 * xdrSize(XDR_INT32, NULL); 00719 memcpy(newBuf, buf2, buf2Length); 00720 memcpy(newBuf + buf2Length, buf, dgramSize); 00721 00722 /* send the buffer */ 00723 ret = sendto(sockfd, newBuf, dgramSize + buf2Length, 0, 00724 (struct sockaddr *)&destAddr, sizeof(destAddr)); 00725 if (ret == RET_ERROR) { 00726 free(headerTmp); 00727 pthread_mutex_unlock(&mutex); 00728 00729 /*re-initialize the socket */ 00730 #ifndef WIN32 00731 close(sockfd); 00732 #else 00733 closesocket(sockfd); 00734 #endif 00735 initSocket(); 00736 00737 /* throw exception because the datagram was not sent */ 00738 sprintf(msg, "[ sendTimedParameters() ] Error sending data to destination %s ", 00739 destAddresses[i]); 00740 throw runtime_error(msg); 00741 } 00742 else { 00743 sprintf(msg, "Datagram with size %d, instance id %d, sequence number %d, sent to %s, containing parameters:", 00744 ret, instance_id, seq_nr, destAddresses[i]); 00745 logger(FINE, msg); 00746 logParameters(FINE, nParams, paramNames, valueTypes, paramValues); 00747 } 00748 xdr_destroy(&xdrs); 00749 00750 } 00751 00752 seq_nr = (seq_nr + 1) % TWO_BILLION; 00753 free(headerTmp); 00754 pthread_mutex_unlock(&mutex); 00755 return RET_SUCCESS; 00756 }
| bool ApMon::getConfCheck | ( | ) | [inline] |
| long ApMon::getRecheckInterval | ( | ) | [inline] |
Returns the value of the time interval (in seconds) between two recheck operations for the configuration files.
If error(s) appear when reloading the configuration, the actual interval will be increased (transparently for the user).
Definition at line 639 of file ApMon.h.
00639 { return recheckInterval; }
| void ApMon::setRecheckInterval | ( | long | val | ) |
Sets the value of the time interval (in seconds) between two recheck operations for the configuration files.
The default value is 5min. If the value is negative, the configuration rechecking is turned off. If error(s) appear when reloading the configuration, the actual interval will be increased (transparently for the user).
Definition at line 1155 of file ApMon.cpp.
01155 { 01156 if (val > 0) { 01157 setConfRecheck(true, val); 01158 } 01159 else { 01160 setConfRecheck(false, val); 01161 } 01162 }
| void ApMon::setConfRecheck | ( | bool | confRecheck, | |
| long | interval | |||
| ) |
Enables/disables the periodical check for changes in the configuration files/URLs.
| confRecheck | If it is true, the periodical checking is enabled. | |
| interval | The time interval at which the verifications are done. If it is negative, a default value will be used. |
Definition at line 1121 of file ApMon.cpp.
01121 { 01122 char logmsg[100]; 01123 if (confCheck) { 01124 sprintf(logmsg, "Enabling configuration reloading (interval %ld)", 01125 interval); 01126 logger(INFO, logmsg); 01127 } 01128 01129 pthread_mutex_lock(&mutexBack); 01130 if (initType == DIRECT_INIT) { // no need to reload the configuration 01131 logger(WARNING, "[ setConfRecheck() } No configuration file/URL to reload."); 01132 return; 01133 } 01134 01135 this -> confCheck = confCheck; 01136 this -> recheckChanged = true; 01137 if (confCheck) { 01138 if (interval > 0) { 01139 this -> recheckInterval = interval; 01140 this -> crtRecheckInterval = interval; 01141 } else { 01142 this -> recheckInterval = RECHECK_INTERVAL; 01143 this -> crtRecheckInterval = RECHECK_INTERVAL; 01144 } 01145 setBackgroundThread(true); 01146 } 01147 else { 01148 if (jobMonitoring == false && sysMonitoring == false) 01149 setBackgroundThread(false); 01150 } 01151 pthread_mutex_unlock(&mutexBack); 01152 01153 }
| void ApMon::setConfRecheck | ( | bool | confRecheck | ) | [inline] |
Enables/disables the periodical check for changes in the configuration files/URLs.
If enabled, the verifications will be done at the default time interval.
Definition at line 663 of file ApMon.h.
00663 { 00664 setConfRecheck(confRecheck, RECHECK_INTERVAL); 00665 }
| void ApMon::setJobMonitoring | ( | bool | jobMonitoring, | |
| long | interval | |||
| ) |
Enables/disables the periodical sending of datagrams with job monitoring information.
| jobMonitoring | If it is true, the job monitoring is enabled | |
| interval | The time interval at which the datagrams are sent. If it is negative, a default value will be used. |
Definition at line 1170 of file ApMon.cpp.
01170 { 01171 char logmsg[100]; 01172 if (jobMonitoring) { 01173 sprintf(logmsg, "Enabling job monitoring, time interval %ld s... ", interval); 01174 logger(INFO, logmsg); 01175 } else 01176 logger(INFO, "Disabling job monitoring..."); 01177 01178 pthread_mutex_lock(&mutexBack); 01179 this -> jobMonitoring = jobMonitoring; 01180 this -> jobMonChanged = true; 01181 if (jobMonitoring == true) { 01182 if (interval > 0) 01183 this -> jobMonitorInterval = interval; 01184 else 01185 this -> jobMonitorInterval = JOB_MONITOR_INTERVAL; 01186 setBackgroundThread(true); 01187 } else { 01188 // disable the background thread if it is not needed anymore 01189 if (this -> sysMonitoring == false && this -> confCheck == false) 01190 setBackgroundThread(false); 01191 } 01192 pthread_mutex_unlock(&mutexBack); 01193 }
| void ApMon::setJobMonitoring | ( | bool | jobMonitoring | ) | [inline] |
Enables/disables the job monitoring.
If the job monitoring is enabled, the datagrams will be sent at the default time interval.
Definition at line 678 of file ApMon.h.
00678 { 00679 setJobMonitoring(jobMonitoring, JOB_MONITOR_INTERVAL); 00680 }
| long ApMon::getJobMonitorInterval | ( | ) | [inline] |
Returns the interval at which job monitoring datagrams are sent.
If the job monitoring is disabled, returns -1.
Definition at line 685 of file ApMon.h.
00685 { 00686 long i = -1; 00687 pthread_mutex_lock(&mutexBack); 00688 if (jobMonitoring) 00689 i = jobMonitorInterval; 00690 pthread_mutex_unlock(&mutexBack); 00691 return i; 00692 }
| bool ApMon::getJobMonitoring | ( | ) | [inline] |
| void ApMon::setSysMonitoring | ( | bool | sysMonitoring, | |
| long | interval | |||
| ) |
Enables/disables the periodical sending of datagrams with system monitoring information.
| sysMonitoring | If it is true, the system monitoring is enabled | |
| interval | The time interval at which the datagrams are sent. If it is negative, a default value will be used. |
Definition at line 1195 of file ApMon.cpp.
01195 { 01196 char logmsg[100]; 01197 if (sysMonitoring) { 01198 sprintf(logmsg, "Enabling system monitoring, time interval %ld s... ", interval); 01199 logger(INFO, logmsg); 01200 } else 01201 logger(INFO, "Disabling system monitoring..."); 01202 01203 pthread_mutex_lock(&mutexBack); 01204 this -> sysMonitoring = sysMonitoring; 01205 this -> sysMonChanged = true; 01206 if (sysMonitoring == true) { 01207 if (interval > 0) 01208 this -> sysMonitorInterval = interval; 01209 else 01210 this -> sysMonitorInterval = SYS_MONITOR_INTERVAL; 01211 setBackgroundThread(true); 01212 } else { 01213 // disable the background thread if it is not needed anymore 01214 if (this -> jobMonitoring == false && this -> confCheck == false) 01215 setBackgroundThread(false); 01216 } 01217 pthread_mutex_unlock(&mutexBack); 01218 }
| void ApMon::setSysMonitoring | ( | bool | sysMonitoring | ) | [inline] |
Enables/disables the system monitoring.
If the system monitoring is enabled, the datagrams will be sent at the default time interval.
Definition at line 714 of file ApMon.h.
00714 { 00715 setSysMonitoring(sysMonitoring, SYS_MONITOR_INTERVAL); 00716 }
| long ApMon::getSysMonitorInterval | ( | ) | [inline] |
Returns the interval at which system monitoring datagrams are sent.
If the job monitoring is disabled, returns -1.
Definition at line 721 of file ApMon.h.
00721 { 00722 long i = -1; 00723 pthread_mutex_lock(&mutexBack); 00724 if (sysMonitoring) 00725 i = sysMonitorInterval; 00726 pthread_mutex_unlock(&mutexBack); 00727 return i; 00728 }
| bool ApMon::getSysMonitoring | ( | ) | [inline] |
| void ApMon::setGenMonitoring | ( | bool | genMonitoring, | |
| int | nIntervals | |||
| ) |
Enables/disables the periodical sending of datagrams with general system information.
| genMonitoring | If it is true, enables the sending of the datagrams. | |
| interval | The number of time intervals at which the datagrams are sent (considering the interval for sending system monitoring information). If it is negative, a default value will be used. |
Definition at line 1220 of file ApMon.cpp.
01220 { 01221 char logmsg[100]; 01222 sprintf(logmsg, "Setting general information monitoring to %s ", 01223 boolStrings[(int)genMonitoring]); 01224 logger(INFO, logmsg); 01225 01226 pthread_mutex_lock(&mutexBack); 01227 this -> genMonitoring = genMonitoring; 01228 this -> sysMonChanged = true; 01229 if (genMonitoring == true) { 01230 if (nIntervals > 0) 01231 this -> genMonitorIntervals = nIntervals; 01232 else 01233 this -> genMonitorIntervals = GEN_MONITOR_INTERVALS; 01234 01235 if (this -> sysMonitoring == false) { 01236 pthread_mutex_unlock(&mutexBack); 01237 setSysMonitoring(true); 01238 pthread_mutex_lock(&mutexBack); 01239 } 01240 } // TODO: else check if we can stop the background thread (if no 01241 // system parameters are enabled for monitoring) 01242 pthread_mutex_unlock(&mutexBack); 01243 }
| void ApMon::setGenMonitoring | ( | bool | genMonitoring | ) | [inline] |
Enables/disables the sending of datagrams with general system information.
A default value is used for the number of time intervals at which the datagrams are sent.
Definition at line 752 of file ApMon.h.
00752 { 00753 setGenMonitoring(genMonitoring, GEN_MONITOR_INTERVALS); 00754 }
| bool ApMon::getGenMonitoring | ( | ) | [inline] |
| void ApMon::addJobToMonitor | ( | long | pid, | |
| char * | workdir, | |||
| char * | clusterName, | |||
| char * | nodeName | |||
| ) | throw (runtime_error) |
Adds a new job to the list of the jobs monitored by ApMon.
| pid | The job's PID. | |
| workdir | The working directory of the job. If it is NULL or if it has a zero length, directory monitoring will be disabled for this job. | |
| clusterName | The cluster name associated with the monitoring data for this job in MonALISA. | |
| nodeName | The node name associated with the monitoring data for this job in MonALISA. |
Definition at line 1287 of file ApMon.cpp.
01288 { 01289 if (nMonJobs >= MAX_MONITORED_JOBS) 01290 throw runtime_error("[ addJobToMonitor() ] Maximum number of jobs that can be monitored exceeded."); 01291 MonitoredJob job; 01292 job.pid = pid; 01293 if (workdir == NULL) 01294 strcpy(job.workdir, ""); 01295 else 01296 strcpy(job.workdir, workdir); 01297 01298 if (clusterName == NULL || strlen(clusterName) == 0) 01299 strcpy(job.clusterName, "ApMon_JobMon"); 01300 else 01301 strcpy(job.clusterName, clusterName); 01302 if (nodeName == NULL || strlen(nodeName) == 0) 01303 strcpy(job.nodeName, this -> myIP); 01304 else 01305 strcpy(job.nodeName, nodeName); 01306 01307 monJobs[nMonJobs++] = job; 01308 }
| void ApMon::removeJobToMonitor | ( | long | pid | ) | throw (runtime_error) |
Removes a job from the list of the jobs monitored by ApMon.
| pid | The pid of the job to be removed. |
Definition at line 1310 of file ApMon.cpp.
01310 { 01311 int i, j; 01312 char msg[100]; 01313 01314 if (nMonJobs <= 0) 01315 throw runtime_error("[ removeJobToMonitor() ] There are no monitored jobs."); 01316 01317 for (i = 0; i < nMonJobs; i++) { 01318 if (monJobs[i].pid == pid) { 01319 /* found the job, now remove it */ 01320 for (j = i; j < nMonJobs - 1; j++) 01321 monJobs[j] = monJobs[j + 1]; 01322 nMonJobs--; 01323 return; 01324 } 01325 } 01326 01327 /* the job was not found */ 01328 sprintf(msg, "removeJobToMonitor(): Job %ld not found.", pid); 01329 throw runtime_error(msg); 01330 }
| void ApMon::setSysMonClusterNode | ( | char * | clusterName, | |
| char * | nodeName | |||
| ) |
This function is called by the user to set the cluster name and the node name for the system monitoring datagrams.
Definition at line 1332 of file ApMon.cpp.
01332 { 01333 free (sysMonCluster); free(sysMonNode); 01334 sysMonCluster = strdup(clusterName); 01335 sysMonNode = strdup(nodeName); 01336 }
| void ApMon::setLogLevel | ( | char * | newLevel_s | ) | [static] |
Sets the ApMon logging level.
Possible values are 0 (FATAL), 1 (WARNING), 2 (INFO), 3 (FINE), 4 (DEBUG);
Definition at line 1338 of file ApMon.cpp.
01338 { 01339 int newLevel; 01340 const char *levels[5] = {"FATAL", "WARNING", "INFO", "FINE", "DEBUG"}; 01341 char logmsg[100]; 01342 01343 for (newLevel = 0; newLevel < 5; newLevel++) 01344 if (strcmp(newLevel_s, levels[newLevel]) == 0) 01345 break; 01346 01347 if (newLevel >= 5) { 01348 sprintf(logmsg, "[ setLogLevel() ] Invalid level value: %s", newLevel_s); 01349 logger(WARNING, logmsg); 01350 } 01351 else 01352 logger(0, NULL, newLevel); 01353 }
| void ApMon::setMaxMsgRate | ( | int | maxRate | ) |
This sets the maxim number of messages that are send to MonALISA in one second.
Default, this number is 50.
Definition at line 1356 of file ApMon.cpp.
01356 { 01357 if (maxRate > 0) 01358 this -> maxMsgRate = maxRate; 01359 }
| static void ApMon::errExit | ( | char * | msg | ) | [static] |
Displays an error message and exits with -1 as return value.
| msg | The message to be displayed. |
| void ApMon::initialize | ( | char * | filename, | |
| bool | firstTime | |||
| ) | throw (runtime_error) [protected] |
Initializes an ApMon object from a configuration file.
| filename | The name of the file which contains the addresses and the ports of the destination hosts (see README for details about the structure of this file). | |
| firstTime | If it is true, all the initializations will be done (the object is being constructed now). Else, only some structures will be reinitialized. |
Definition at line 80 of file ApMon.cpp.
00081 { 00082 00083 char *destAddresses[MAX_N_DESTINATIONS]; 00084 int destPorts[MAX_N_DESTINATIONS]; 00085 char *destPasswds[MAX_N_DESTINATIONS]; 00086 int nDest = 0, i; 00087 ConfURLs confURLs; 00088 00089 confURLs.nConfURLs = 0; 00090 00091 try { 00092 loadFile(filename, &nDest, destAddresses, destPorts, destPasswds); 00093 00094 arrayInit(nDest, destAddresses, destPorts, destPasswds, firstTime); 00095 } catch (runtime_error& err) { 00096 if (firstTime) 00097 throw err; 00098 else { 00099 logger(WARNING, err.what()); 00100 logger(WARNING, "Error reloading the configuration. Keeping the previous one."); 00101 return; 00102 } 00103 } 00104 00105 for (i = 0; i < nDest; i++) { 00106 free(destAddresses[i]); 00107 free(destPasswds[i]); 00108 } 00109 00110 pthread_mutex_lock(&mutex); 00111 this -> confURLs = confURLs; 00112 pthread_mutex_unlock(&mutex); 00113 }
| void ApMon::constructFromList | ( | int | nDestinations, | |
| char ** | destinationsList | |||
| ) | throw (runtime_error) [protected] |
Initializes an ApMon object from a list with URLs and destination hosts.
Definition at line 141 of file ApMon.cpp.
00142 { 00143 int i; 00144 00145 if (destinationsList == NULL) 00146 throw runtime_error("[ constructFromList() ] Null destination list"); 00147 00148 #ifdef __APPLE__ 00149 initType = OLIST_INIT; 00150 #else 00151 initType = LIST_INIT; 00152 #endif 00153 00154 initMonitoring(); 00155 00156 /* save the initialization list */ 00157 nInitSources = nDestinations; 00158 initSources = (char **)malloc(nInitSources * sizeof(char*)); 00159 if (initSources == NULL) 00160 throw runtime_error("[ ApMon() ] Error allocating memory."); 00161 00162 for (i = 0; i < nInitSources; i++) 00163 initSources[i] = strdup(destinationsList[i]); 00164 00165 initialize(nDestinations, destinationsList, true); 00166 }
| void ApMon::initialize | ( | int | nDestinations, | |
| char ** | destList, | |||
| bool | firstTime | |||
| ) | throw (runtime_error) [protected] |
Initializes an ApMon object from a list with URLs and destination hosts.
| nDestinations | The number of elements in destList. | |
| destList | The list with URLs. | |
| firstTime | If it is true, all the initializations will be done (the object is being constructed now). Else, only some structures will be reinitialized. |
Definition at line 168 of file ApMon.cpp.
00169 { 00170 char *destAddresses[MAX_N_DESTINATIONS]; 00171 int destPorts[MAX_N_DESTINATIONS]; 00172 char *destPasswds[MAX_N_DESTINATIONS]; 00173 char errmsg[200]; 00174 int i; 00175 int cnt = 0; 00176 ConfURLs confURLs; 00177 00178 logger(INFO, "Initializing destination addresses & ports:"); 00179 00180 if (nDestinations > MAX_N_DESTINATIONS) 00181 throw runtime_error("[ initialize() ] Maximum number of destinations exceeded"); 00182 00183 00184 confURLs.nConfURLs = 0; 00185 00186 for (i = 0; i < nDestinations; i++) { 00187 try { 00188 if (strstr(destinationsList[i], "http") == destinationsList[i]) 00189 getDestFromWeb(destinationsList[i], &cnt, 00190 destAddresses, destPorts, destPasswds, confURLs); 00191 else 00192 addToDestinations(destinationsList[i], &cnt, 00193 destAddresses, destPorts, destPasswds); 00194 00195 } catch (runtime_error &e) { 00196 sprintf(errmsg, "[ initialize() ] Error while loading the configuration: %s", e.what()); 00197 logger(WARNING, errmsg); 00198 if (!firstTime) { 00199 for (i = 0; i < cnt; i++) { 00200 free(destAddresses[i]); 00201 free(destPasswds[i]); 00202 } 00203 logger(WARNING, "Configuration not reloaded successfully. Keeping the previous one."); 00204 return; 00205 } 00206 } // catch 00207 } // for 00208 00209 try { 00210 arrayInit(cnt, destAddresses, destPorts, destPasswds, firstTime); 00211 } catch (runtime_error& err) { 00212 if (firstTime) 00213 throw err; 00214 else { 00215 logger(WARNING, "Error reloading the configuration. Keeping the previous one."); 00216 return; 00217 } 00218 } 00219 00220 for (i = 0; i < cnt; i++) { 00221 free(destAddresses[i]); 00222 free(destPasswds[i]); 00223 } 00224 00225 pthread_mutex_lock(&mutex); 00226 this -> confURLs = confURLs; 00227 pthread_mutex_unlock(&mutex); 00228 }
| void ApMon::loadFile | ( | char * | filename, | |
| int * | nDestinations, | |||
| char ** | destAddresses, | |||
| int * | destPorts, | |||
| char ** | destPasswds | |||
| ) | throw (runtime_error) [protected] |
Parses a configuration file which contains addresses, ports and passwords for the destination hosts and puts the results in the vectors given as parameters.
| filename | The name of the configuration file. | |
| nDestinations | Output parameter, will contain the number of destination hosts. | |
| destAddresses | Will contain the destination addresses. | |
| destPorts | Will contain the ports from the destination hosts. | |
| destPasswds | Will contain the passwords for the destination hosts. |
Definition at line 115 of file ApMon.cpp.
00117 { 00118 FILE *f; 00119 char msg[100]; 00120 00121 /* initializations for the destination addresses */ 00122 f = fopen(filename, "rt"); 00123 if (f == NULL) { 00124 throw runtime_error("[ loadFile() ] Error opening configuration file"); 00125 } 00126 00127 sprintf(msg, "Loading file %s ...", filename); 00128 logger(INFO, msg); 00129 00130 lastModifFile = time(NULL); 00131 00132 parseConf(f, nDestinations, destAddresses, destPorts, 00133 destPasswds); 00134 fclose(f); 00135 }
| void ApMon::arrayInit | ( | int | nDestinations, | |
| char ** | destAddresses, | |||
| int * | destPorts, | |||
| char ** | destPasswds | |||
| ) | throw (runtime_error) [protected] |
Internal function that initializes an ApMon data structure.
| nDestinations | The number of destination hosts where the results will be sent. | |
| destAddresses | Array that contains the hostnames or IP addresses of the destination hosts. | |
| destPorts | The ports where the MonaLisa modules listen on the destination hosts. | |
| destPasswds | Passwords for the destination hosts. |
Definition at line 360 of file ApMon.cpp.
00362 { 00363 arrayInit(nDestinations, destAddresses, destPorts, destPasswds, true); 00364 }
| void ApMon::arrayInit | ( | int | nDestinations, | |
| char ** | destAddresses, | |||
| int * | destPorts, | |||
| char ** | destPasswds, | |||
| bool | firstTime | |||
| ) | throw (runtime_error) [protected] |
Internal function that initializes an ApMon data structure.
| nDestinations | The number of destination hosts where the results will be sent. | |
| destAddresses | Array that contains the hostnames or IP addresses of the destination hosts. | |
| destPorts | The ports where the MonaLisa modules listen on the destination hosts. | |
| destPasswds | Passwords for the destination hosts. | |
| firstTime | If it is true, all the initializations will be done (the object is being constructed now). Else, only some structures will be reinitialized. |
Definition at line 367 of file ApMon.cpp.
00369 { 00370 int i, j; 00371 int ret; 00372 char *ipAddr, logmsg[100]; 00373 bool found, havePublicIP; 00374 int tmpNDestinations; 00375 char **tmpAddresses, **tmpPasswds; 00376 int *tmpPorts; 00377 00378 if (destAddresses == NULL || destPorts == NULL || nDestinations == 0) 00379 throw runtime_error("[ arrayInit() ] Destination addresses or ports not provided"); 00380 00381 /* initializations that we have to do only once */ 00382 if (firstTime) { 00383 //this -> appPID = getpid(); 00384 00385 this -> nMonJobs = 0; 00386 this -> monJobs = (MonitoredJob *)malloc(MAX_MONITORED_JOBS * 00387 sizeof(MonitoredJob)); 00388 00389 try { 00390 this -> numCPUs = ProcUtils::getNumCPUs(); 00391 } catch (procutils_error &err) { 00392 logger(WARNING, err.what()); 00393 this -> numCPUs = 0; 00394 } 00395 00396 /* get the names of the network interfaces */ 00397 this -> nInterfaces = 0; 00398 try { 00399 ProcUtils::getNetworkInterfaces(this -> nInterfaces, 00400 this -> interfaceNames); 00401 } catch (procutils_error &err) { 00402 logger(WARNING, err.what()); 00403 this -> nInterfaces = 0; 00404 } 00405 00406 /* get the hostname of the machine */ 00407 ret = gethostname(this -> myHostname, MAX_STRING_LEN -1); 00408 if (ret < 0) { 00409 logger(WARNING, "Could not obtain the local hostname"); 00410 strcpy(myHostname, "unknown"); 00411 } else 00412 myHostname[MAX_STRING_LEN - 1] = 0; 00413 00414 /* get the IPs of the machine */ 00415 this -> numIPs = 0; havePublicIP = false; 00416 strcpy(this -> myIP, "unknown"); 00417 00418 /* default values for cluster name and node name */ 00419 this -> clusterName = strdup("ApMon_UserSend"); 00420 this -> nodeName = strdup(myHostname); 00421 00422 #ifndef WIN32 00423 int sockd = socket(PF_INET, SOCK_STREAM, 0); 00424 if(sockd < 0){ 00425 logger(WARNING, "Could not obtain local IP addresses"); 00426 } else { 00427 for (i = 0; i < this -> nInterfaces; i++) { 00428 struct ifreq ifr; 00429 memset(&ifr, 0, sizeof(ifr)); 00430 strncpy(ifr.ifr_name, this -> interfaceNames[i], sizeof(ifr.ifr_name) - 1); 00431 if(ioctl(sockd, SIOCGIFADDR, &ifr)<0) 00432 continue; //???????? 00433 char ip[4], tmp_s[20]; 00434 #ifdef __APPLE__ 00435 memcpy(ip, ifr.ifr_addr.sa_data+2, 4); 00436 #else 00437 memcpy(ip, ifr.ifr_hwaddr.sa_data+2, 4); 00438 #endif 00439 strcpy(tmp_s, inet_ntoa(*(struct in_addr *)ip)); 00440 sprintf(logmsg, "Found local IP address: %s", tmp_s); 00441 logger(FINE, logmsg); 00442 if (strcmp(tmp_s, "127.0.0.1") != 0 && !havePublicIP) { 00443 strcpy(this -> myIP, tmp_s); 00444 if (!isPrivateAddress(tmp_s)) 00445 havePublicIP = true; 00446 } 00447 strcpy(this -> allMyIPs[this -> numIPs], tmp_s); 00448 this -> numIPs++; 00449 } 00450 } 00451 #else 00452 struct hostent *hptr; 00453 if ((hptr = gethostbyname(myHostname))!= NULL) { 00454 i = 0; 00455 struct in_addr addr; 00456 while ((hptr -> h_addr_list)[i] != NULL) { 00457 memcpy(&(addr.s_addr), (hptr -> h_addr_list)[i], 4); 00458 ipAddr = inet_ntoa(addr); 00459 if (strcmp(ipAddr, "127.0.0.1") != 0) { 00460 strcpy(this -> myIP, ipAddr); 00461 if (!isPrivateAddress(ipAddr)) 00462 break; 00463 } 00464 i++; 00465 } 00466 } 00467 #endif 00468 00469 this -> sysMonCluster = strdup("ApMon_SysMon"); 00470 this -> sysMonNode = strdup(this -> myIP); 00471 00472 this -> prvTime = 0; 00473 this -> prvSent = 0; 00474 this -> prvDrop = 0; 00475 this -> crtTime = 0; 00476 this -> crtSent = 0; 00477 this -> crtDrop = 0; 00478 this -> hWeight = exp(-5.0/60.0); 00479 00480 srand(time(NULL)); 00481 00482 /* initialize buffer for XDR encoding */ 00483 this -> buf = (char *)malloc(MAX_DGRAM_SIZE); 00484 if (this -> buf == NULL) 00485 throw runtime_error("[ arrayInit() ] Error allocating memory"); 00486 this -> dgramSize = 0; 00487 00488 /*create the socket & set options*/ 00489 initSocket(); 00490 00491 /* initialize the sender ID and the sequence number */ 00492 instance_id = rand(); 00493 seq_nr = 0; 00494 } 00495 00496 /* put the destination addresses, ports & passwords in some temporary 00497 buffers (because we don't want to lock mutex while making DNS 00498 requests) 00499 */ 00500 tmpNDestinations = 0; 00501 tmpPorts = (int *)malloc(nDestinations * sizeof(int)); 00502 tmpAddresses = (char **)malloc(nDestinations * sizeof(char *)); 00503 tmpPasswds = (char **)malloc(nDestinations * sizeof(char *)); 00504 if (tmpPorts == NULL || tmpAddresses == NULL || 00505 tmpPasswds == NULL) 00506 throw runtime_error("[ arrayInit() ] Error allocating memory"); 00507 00508 for (i = 0; i < nDestinations; i++) { 00509 try { 00510 ipAddr = findIP(destAddresses[i]); 00511 } catch (runtime_error &err) { 00512 logger(FATAL, err.what()); 00513 continue; 00514 } 00515 00516 /* make sure this address is not already in the list */ 00517 found = false; 00518 for (j = 0; j < tmpNDestinations; j++) { 00519 if (!strcmp(ipAddr, tmpAddresses[j])) { 00520 found = true; 00521 break; 00522 } 00523 } 00524 00525 /* add the address to the list */ 00526 if (!found) { 00527 tmpAddresses[tmpNDestinations] = ipAddr; 00528 tmpPorts[tmpNDestinations] = destPorts[i]; 00529 tmpPasswds[tmpNDestinations] = strdup(destPasswds[i]); 00530 00531 sprintf(logmsg, "Adding destination host: %s - port %d", 00532 tmpAddresses[tmpNDestinations], tmpPorts[tmpNDestinations]); 00533 logger(INFO, logmsg); 00534 00535 tmpNDestinations++; 00536 } 00537 } 00538 00539 if (tmpNDestinations == 0) { 00540 freeMat(tmpAddresses, tmpNDestinations); 00541 freeMat(tmpPasswds, tmpNDestinations); 00542 throw runtime_error("[ arrayInit() ] There is no destination host specified correctly!"); 00543 } 00544 00545 pthread_mutex_lock(&mutex); 00546 if (!firstTime) 00547 freeConf(); 00548 this -> nDestinations = tmpNDestinations; 00549 this -> destAddresses = tmpAddresses; 00550 this -> destPorts = tmpPorts; 00551 this -> destPasswds = tmpPasswds; 00552 pthread_mutex_unlock(&mutex); 00553 00554 /* start job/system monitoring according to the settings previously read 00555 from the configuration file */ 00556 setJobMonitoring(jobMonitoring, jobMonitorInterval); 00557 setSysMonitoring(sysMonitoring, sysMonitorInterval); 00558 setGenMonitoring(genMonitoring, genMonitorIntervals); 00559 setConfRecheck(confCheck, recheckInterval); 00560 }
| void ApMon::addToDestinations | ( | char * | line, | |
| int * | nDestinations, | |||
| char * | destAddresses[], | |||
| int | destPorts[], | |||
| char * | destPasswds[] | |||
| ) | [protected] |
Parses the string line, which has the form hostname:port, and adds the hostname and the port to the lists given as parameters.
| line | The line to be parsed. | |
| nDestinations | The number of destination hosts in the lists. Will be modified (incremented) in the function. | |
| destAddresses | The list with IP addresses or hostnames. | |
| destPorts | The list of corresponding ports. | |
| destPasswds | Passwords for the destination hosts. |
Definition at line 230 of file ApMon.cpp.
00231 { 00232 char *addr, *port, *passwd; 00233 const char *sep1 = " \t"; 00234 const char *sep2 = ":"; 00235 00236 char *tmp = strdup(line); 00237 char *firstToken; 00238 // char buf[MAX_STRING_LEN]; 00239 // char *pbuf = buf; 00240 00241 /* the address & port are separated from the password with spaces */ 00242 firstToken = strtok/*_r*/(tmp, sep1);//, &pbuf); 00243 passwd = strtok/*_r*/(NULL, sep1);//, &pbuf); 00244 00245 /* the address and the port are separated with ":" */ 00246 addr = strtok/*_r*/(firstToken, sep2);//, &pbuf); 00247 port = strtok/*_r*/(NULL, sep2);//, &pbuf); 00248 destAddresses[*nDestinations] = strdup(addr); 00249 if (port == NULL) 00250 destPorts[*nDestinations] = DEFAULT_PORT; 00251 else 00252 destPorts[*nDestinations] = atoi(port); 00253 if (passwd == NULL) 00254 destPasswds[*nDestinations] = strdup(""); 00255 else 00256 destPasswds[*nDestinations] = strdup(passwd); 00257 (*nDestinations)++; 00258 00259 free(tmp); 00260 }
| void ApMon::getDestFromWeb | ( | char * | url, | |
| int * | nDestinations, | |||
| char * | destAddresses[], | |||
| int | destPorts[], | |||
| char * | destPasswds[], | |||
| ConfURLs & | confURLs | |||
| ) | throw (runtime_error) [protected] |
Gets a configuration file from a web location and adds the destination addresses and ports to the lists given as parameters.
Definition at line 262 of file ApMon.cpp.
00264 { 00265 char temp_filename[300]; 00266 FILE *tmp_file; 00267 char *line, *ret, *tmp = NULL; 00268 bool modifLineFound; 00269 long mypid = getpid(); 00270 char str1[20], str2[20]; 00271 int totalSize, headerSize, contentSize; 00272 00273 #ifndef WIN32 00274 sprintf(temp_filename, "/tmp/apmon_webconf%ld", mypid); 00275 #else 00276 char *tmpp = getenv("TEMP"); 00277 if(tmpp == NULL) 00278 tmpp = getenv("TMP"); 00279 if(tmpp == NULL) 00280 tmpp = "c:"; 00281 sprintf(temp_filename, "%s\\apmon_webconf%ld", tmpp, mypid); 00282 #endif 00283 /* get the configuration file from web and put it in a temporary file */ 00284 totalSize = httpRequest(url, (char*)"GET", temp_filename); 00285 00286 /* read the configuration from the temporary file */ 00287 tmp_file = fopen(temp_filename, "rt"); 00288 if (tmp_file == NULL) 00289 throw runtime_error("[ getDestFromWeb() ] Error getting the configuration web page"); 00290 00291 line = (char*)malloc((MAX_STRING_LEN + 1) * sizeof(char)); 00292 00293 //check the HTTP header to see if we got the page correctly 00294 fgets(line, MAX_STRING_LEN, tmp_file); 00295 sscanf(line, "%s %s", str1, str2); 00296 if (atoi(str2) != 200) { 00297 free(line); 00298 fclose(tmp_file); 00299 throw runtime_error("[ getDestFromWeb() ] The web page does not exist on the server"); 00300 } 00301 00302 confURLs.vURLs[confURLs.nConfURLs] = strdup(url); 00303 00304 // check the header for the "Last-Modified" and "Content-Length" lines 00305 modifLineFound = false; 00306 contentSize = 0; 00307 do { 00308 if (tmp != NULL) 00309 free(tmp); 00310 ret = fgets(line, MAX_STRING_LEN, tmp_file); 00311 if (ret == NULL) { 00312 free(line); fclose(tmp_file); 00313 throw runtime_error("[ getDestFromWeb() ] Error getting the configuration web page"); 00314 } 00315 if (strstr(line, "Last-Modified") == line) { 00316 modifLineFound = true; 00317 confURLs.lastModifURLs[confURLs.nConfURLs] = strdup(line); 00318 } 00319 00320 if (strstr(line, "Content-Length") == line) { 00321 sscanf(line, "%s %d", str1, &contentSize); 00322 } 00323 00324 tmp = trimString(line); 00325 } while (strlen(tmp) != 0); 00326 free(tmp); free(line); 00327 00328 if (!modifLineFound) 00329 confURLs.lastModifURLs[confURLs.nConfURLs] = strdup(""); 00330 confURLs.nConfURLs++; 00331 00332 headerSize = ftell(tmp_file); 00333 if (totalSize - headerSize < contentSize) { 00334 fclose(tmp_file); 00335 throw runtime_error("[ getDestFromWeb() ] Web page received incompletely"); 00336 } 00337 00338 try { 00339 parseConf(tmp_file, nDestinations, destAddresses, destPorts, 00340 destPasswds); 00341 } catch (...) { 00342 fclose(tmp_file); 00343 unlink(temp_filename); 00344 throw; 00345 } 00346 00347 fclose(tmp_file); 00348 unlink(temp_filename); 00349 }
| void ApMon::encodeParams | ( | int | nParams, | |
| char ** | paramNames, | |||
| int * | valueTypes, | |||
| char ** | paramValues, | |||
| int | timestamp | |||
| ) | throw (runtime_error) [protected] |
Encodes in the XDR format the data from a ApMon structure.
Must be called before sending the data over the newtork.
Definition at line 803 of file ApMon.cpp.
00805 { 00806 XDR xdrs; /* XDR handle. */ 00807 int i, effectiveNParams; 00808 00809 /* count the number of parameters actually sent in the datagram 00810 (the parameters with a NULL name and the string parameters 00811 with a NULL value are skipped) 00812 */ 00813 effectiveNParams = nParams; 00814 for (i = 0; i < nParams; i++) { 00815 if (paramNames[i] == NULL || (valueTypes[i] == XDR_STRING && 00816 paramValues[i] == NULL)) { 00817 effectiveNParams--; 00818 } 00819 } 00820 if (effectiveNParams == 0) 00821 throw runtime_error("[ encodeParams() ] No valid parameters in datagram, sending aborted"); 00822 00823 /*** estimate the length of the send buffer ***/ 00824 00825 /* add the length of the cluster name & node name */ 00826 dgramSize = xdrSize(XDR_STRING, clusterName) + 00827 xdrSize(XDR_STRING, nodeName) + xdrSize(XDR_INT32, NULL); 00828 /* add the lengths for the parameters (name + size + value) */ 00829 for (i = 0; i < nParams; i++) { 00830 dgramSize += xdrSize(XDR_STRING, paramNames[i]) + xdrSize(XDR_INT32, NULL) + 00831 + xdrSize(valueTypes[i], paramValues[i]); 00832 } 00833 00834 /* check that the maximum datagram size is not exceeded */ 00835 if (dgramSize + MAX_HEADER_LENGTH > MAX_DGRAM_SIZE) 00836 throw runtime_error("[ encodeParams() ] Maximum datagram size exceeded"); 00837 00838 /* initialize the XDR stream */ 00839 xdrmem_create(&xdrs, buf, MAX_DGRAM_SIZE, XDR_ENCODE); 00840 00841 try { 00842 /* encode the cluster name, the node name and the number of parameters */ 00843 if (!xdr_string(&xdrs, &(clusterName), strlen(clusterName) 00844 + 1)) 00845 throw runtime_error("[ encodeParams() ] XDR encoding error for the cluster name"); 00846 00847 if (!xdr_string(&xdrs, &(nodeName), strlen(nodeName) + 1)) 00848 throw runtime_error("[ encodeParams() ] XDR encoding error for the node name"); 00849 00850 if (!xdr_int(&xdrs, &(effectiveNParams))) 00851 throw runtime_error("[ encodeParams() ] XDR encoding error for the number of parameters"); 00852 00853 /* encode the parameters */ 00854 for (i = 0; i < nParams; i++) { 00855 if (paramNames[i] == NULL || (valueTypes[i] == XDR_STRING && 00856 paramValues[i] == NULL)) { 00857 logger(WARNING, "NULL parameter name or value - skipping parameter..."); 00858 continue; 00859 } 00860 00861 /* parameter name */ 00862 if (!xdr_string(&xdrs, &(paramNames[i]), strlen(paramNames[i]) + 1)) 00863 throw runtime_error("[ encodeParams() ] XDR encoding error for parameter name"); 00864 00865 /* parameter value type */ 00866 if (!xdr_int(&xdrs, &(valueTypes[i]))) 00867 throw runtime_error("[ encodeParams() ] XDR encoding error for parameter value type"); 00868 00869 /* parameter value */ 00870 switch (valueTypes[i]) { 00871 case XDR_STRING: 00872 if (!xdr_string(&xdrs, &(paramValues[i]), 00873 strlen(paramValues[i]) + 1)) 00874 throw runtime_error("[ encodeParams() ] XDR encoding error for parameter value"); 00875 break; 00876 //INT16 is not supported 00877 /* case XDR_INT16: 00878 if (!xdr_short(&xdrs, (short *)(paramValues[i]))) 00879 return RET_ERROR; 00880 break; 00881 */ case XDR_INT32: 00882 if (!xdr_int(&xdrs, (int *)(paramValues[i]))) 00883 throw runtime_error("[ encodeParams() ] XDR encoding error for parameter value"); 00884 break; 00885 case XDR_REAL32: 00886 if (!xdr_float(&xdrs, (float *)(paramValues[i]))) 00887 throw runtime_error("[ encodeParams() ] XDR encoding error for parameter value"); 00888 break; 00889 case XDR_REAL64: 00890 if (!xdr_double(&xdrs, (double *)(paramValues[i]))) 00891 throw runtime_error("[ encodeParams() ] XDR encoding error for parameter value"); 00892 break; 00893 default: 00894 throw runtime_error("[ encodeParams() ] Unknown type for XDR encoding"); 00895 } 00896 } 00897 00898 /* encode the timestamp if necessary */ 00899 if (timestamp > 0) { 00900 if (!xdr_int(&xdrs, ×tamp)) 00901 throw runtime_error("[ encodeParams() ] XDR encoding error for the timestamp"); 00902 dgramSize += xdrSize(XDR_INT32, NULL); 00903 } 00904 } catch (runtime_error& err) { 00905 xdr_destroy(&xdrs); 00906 throw err; 00907 } 00908 00909 xdr_destroy(&xdrs); 00910 }
| void ApMon::initMonitoring | ( | ) | [protected] |
Initializes the monitoring configurations and the names of the parameters included in the monitoring datagrams.
Definition at line 684 of file monitor_utils.cpp.
00684 { 00685 int i; 00686 00687 this -> autoDisableMonitoring = true; 00688 this -> sysMonitoring = false; 00689 this -> jobMonitoring = false; 00690 this -> genMonitoring = false; 00691 this -> confCheck = false; 00692 00693 #ifndef WIN32 00694 pthread_mutex_init(&this -> mutex, NULL); 00695 pthread_mutex_init(&this -> mutexBack, NULL); 00696 pthread_mutex_init(&this -> mutexCond, NULL); 00697 pthread_cond_init(&this -> confChangedCond, NULL); 00698 #else 00699 logger(INFO, "init mutexes..."); 00700 this -> mutex = CreateMutex(NULL, FALSE, NULL); 00701 this -> mutexBack = CreateMutex(NULL, FALSE, NULL); 00702 this -> mutexCond = CreateMutex(NULL, FALSE, NULL); 00703 this -> confChangedCond = CreateEvent(NULL, FALSE, FALSE, NULL); 00704 00705 // Initialize the Windows Sockets library 00706 00707 WORD wVersionRequested; 00708 WSADATA wsaData; 00709 int err; 00710 wVersionRequested = MAKEWORD( 2, 0 ); 00711 err = WSAStartup( wVersionRequested, &wsaData ); 00712 if ( err != 0 ) { 00713 logger(FATAL, "Could not initialize the Windows Sockets library (WS2_32.dll)"); 00714 } 00715 00716 #endif 00717 00718 this -> haveBkThread = false; 00719 this -> bkThreadStarted = false; 00720 this -> stopBkThread = false; 00721 00722 this -> recheckChanged = false; 00723 this -> jobMonChanged = false; 00724 this -> sysMonChanged = false; 00725 00726 this -> recheckInterval = RECHECK_INTERVAL; 00727 this -> crtRecheckInterval = RECHECK_INTERVAL; 00728 this -> jobMonitorInterval = JOB_MONITOR_INTERVAL; 00729 this -> sysMonitorInterval = SYS_MONITOR_INTERVAL; 00730 00731 this -> nSysMonitorParams = initSysParams(this -> sysMonitorParams); 00732 00733 this -> nGenMonitorParams = initGenParams(this -> genMonitorParams); 00734 00735 this -> nJobMonitorParams = initJobParams(this -> jobMonitorParams); 00736 00737 initSocketStatesMapTCP(this -> socketStatesMapTCP); 00738 00739 this -> sysInfo_first = true; 00740 00741 try { 00742 this -> lastSysInfoSend = ProcUtils::getBootTime(); 00743 } catch (procutils_error& perr) { 00744 logger(WARNING, perr.what()); 00745 logger(WARNING, "The first system monitoring values may be inaccurate"); 00746 this -> lastSysInfoSend = 0; 00747 } 00748 00749 for (i = 0; i < nSysMonitorParams; i++) 00750 this -> lastSysVals[i] = 0; 00751 00752 //this -> lastUsrTime = this -> lastSysTime = 0; 00753 //this -> lastNiceTime = this -> lastIdleTime = 0; 00754 00755 for (i = 0; i < nSysMonitorParams; i++) { 00756 actSysMonitorParams[i] = 1; 00757 sysRetResults[i] = RET_SUCCESS; 00758 } 00759 00760 for (i = 0; i < nGenMonitorParams; i++) { 00761 actGenMonitorParams[i] = 1; 00762 genRetResults[i] = RET_SUCCESS; 00763 } 00764 00765 for (i = 0; i < nJobMonitorParams; i++) { 00766 actJobMonitorParams[i] = 1; 00767 jobRetResults[i] = RET_SUCCESS; 00768 } 00769 00770 this -> maxMsgRate = MAX_MSG_RATE; 00771 }
| void ApMon::sendJobInfo | ( | ) | [protected] |
Sends datagrams containing information about the jobs that are currently being monitored.
Definition at line 48 of file monitor_utils.cpp.
00048 { 00049 #ifndef WIN32 00050 int i; 00051 long crtTime; 00052 00053 /* the apMon_free() function calls sendJobInfo() from another thread and 00054 we need mutual exclusion */ 00055 pthread_mutex_lock(&mutexBack); 00056 00057 if (nMonJobs == 0) { 00058 logger(WARNING, "There are no jobs to be monitored, not sending job monitoring information."); 00059 pthread_mutex_unlock(&mutexBack); 00060 return; 00061 } 00062 00063 crtTime = time(NULL); 00064 logger(INFO, "Sending job monitoring information..."); 00065 lastJobInfoSend = (time_t)crtTime; 00066 00067 /* send monitoring information for all the jobs specified by the user */ 00068 for (i = 0; i < nMonJobs; i++) 00069 sendOneJobInfo(monJobs[i]); 00070 00071 pthread_mutex_unlock(&mutexBack); 00072 #endif 00073 }
| void ApMon::sendOneJobInfo | ( | MonitoredJob | job | ) | [protected] |
Sends datagrams with monitoring information about the specified job to all the destination hosts.
Definition at line 152 of file monitor_utils.cpp.
00152 { 00153 int i; 00154 int nParams = 0; 00155 00156 char **paramNames, **paramValues; 00157 int *valueTypes; 00158 00159 valueTypes = (int *)malloc(nJobMonitorParams * sizeof(int)); 00160 paramNames = (char **)malloc(nJobMonitorParams * sizeof(char *)); 00161 paramValues = (char **)malloc(nJobMonitorParams * sizeof(char *)); 00162 00163 for (i = 0; i < nJobMonitorParams; i++) { 00164 jobRetResults[i] = RET_SUCCESS; 00165 currentJobVals[i] = 0; 00166 } 00167 00168 updateJobInfo(job); 00169 00170 for (i = 0; i < nJobMonitorParams; i++) { 00171 if (actJobMonitorParams[i] && jobRetResults[i] != RET_ERROR) { 00172 00173 paramNames[nParams] = jobMonitorParams[i]; 00174 paramValues[nParams] = (char *)¤tJobVals[i]; 00175 valueTypes[nParams] = XDR_REAL64; 00176 nParams++; 00177 } 00178 /* don't disable the parameter (maybe for another job it can be 00179 obtained) */ 00180 /* 00181 else 00182 if (autoDisableMonitoring) 00183 actJobMonitorParams[ind] = 0; 00184 */ 00185 } 00186 00187 if (nParams == 0) { 00188 free(paramNames); free(valueTypes); 00189 free(paramValues); 00190 return; 00191 } 00192 00193 try { 00194 if (nParams > 0) 00195 sendParameters(job.clusterName, job.nodeName, nParams, 00196 paramNames, valueTypes, paramValues); 00197 } catch (runtime_error& err) { 00198 logger(WARNING, err.what()); 00199 } 00200 00201 free(paramNames); 00202 free(valueTypes); 00203 free(paramValues); 00204 }
| void ApMon::updateJobInfo | ( | MonitoredJob | job | ) | [protected] |
Update the monitoring information regarding the specified job.
Definition at line 75 of file monitor_utils.cpp.
00075 { 00076 bool needJobInfo, needDiskInfo; 00077 bool jobExists = true; 00078 char err_msg[200]; 00079 00080 PsInfo jobInfo; 00081 JobDirInfo dirInfo; 00082 00083 /**** runtime, CPU & memory usage information ****/ 00084 needJobInfo = actJobMonitorParams[JOB_RUN_TIME] 00085 || actJobMonitorParams[JOB_CPU_TIME] 00086 || actJobMonitorParams[JOB_CPU_USAGE] 00087 || actJobMonitorParams[JOB_MEM_USAGE] 00088 || actJobMonitorParams[JOB_VIRTUALMEM] 00089 || actJobMonitorParams[JOB_RSS] 00090 || actJobMonitorParams[JOB_OPEN_FILES]; 00091 if (needJobInfo) { 00092 try { 00093 readJobInfo(job.pid, jobInfo); 00094 currentJobVals[JOB_RUN_TIME] = jobInfo.etime; 00095 currentJobVals[JOB_CPU_TIME] = jobInfo.cputime; 00096 currentJobVals[JOB_CPU_USAGE] = jobInfo.pcpu; 00097 currentJobVals[JOB_MEM_USAGE] = jobInfo.pmem; 00098 currentJobVals[JOB_VIRTUALMEM] = jobInfo.vsz; 00099 currentJobVals[JOB_RSS] = jobInfo.rsz; 00100 00101 if (jobInfo.open_fd < 0) 00102 jobRetResults[JOB_OPEN_FILES] = RET_ERROR; 00103 currentJobVals[JOB_OPEN_FILES] = jobInfo.open_fd; 00104 00105 } catch (runtime_error &err) { 00106 logger(WARNING, err.what()); 00107 jobRetResults[JOB_RUN_TIME] = jobRetResults[JOB_CPU_TIME] = 00108 jobRetResults[JOB_CPU_USAGE] = jobRetResults[JOB_MEM_USAGE] = 00109 jobRetResults[JOB_VIRTUALMEM] = jobRetResults[JOB_RSS] = 00110 jobRetResults[JOB_OPEN_FILES] = RET_ERROR; 00111 strcpy(err_msg, err.what()); 00112 if (strstr(err_msg, "does not exist") != NULL) 00113 jobExists = false; 00114 } 00115 } 00116 00117 /* if the monitored job has terminated, remove it */ 00118 if (!jobExists) { 00119 try { 00120 removeJobToMonitor(job.pid); 00121 } catch (runtime_error &err) { 00122 logger(WARNING, err.what()); 00123 } 00124 return; 00125 } 00126 00127 /* disk usage information */ 00128 needDiskInfo = actJobMonitorParams[JOB_DISK_TOTAL] 00129 || actJobMonitorParams[JOB_DISK_USED] 00130 || actJobMonitorParams[JOB_DISK_FREE] 00131 || actJobMonitorParams[JOB_DISK_USAGE] 00132 || actJobMonitorParams[JOB_WORKDIR_SIZE]; 00133 if (needDiskInfo) { 00134 try { 00135 readJobDiskUsage(job, dirInfo); 00136 currentJobVals[JOB_WORKDIR_SIZE] = dirInfo.workdir_size; 00137 currentJobVals[JOB_DISK_TOTAL] = dirInfo.disk_total; 00138 currentJobVals[JOB_DISK_USED] = dirInfo.disk_used; 00139 currentJobVals[JOB_DISK_USAGE] = dirInfo.disk_usage; 00140 currentJobVals[JOB_DISK_FREE] = dirInfo.disk_free; 00141 } catch (runtime_error& err) { 00142 logger(WARNING, err.what()); 00143 jobRetResults[JOB_WORKDIR_SIZE] = jobRetResults[JOB_DISK_TOTAL] 00144 = jobRetResults[JOB_DISK_USED] 00145 = jobRetResults[JOB_DISK_USAGE] 00146 = jobRetResults[JOB_DISK_FREE] 00147 = RET_ERROR; 00148 } 00149 } 00150 }
| void ApMon::sendSysInfo | ( | ) | [protected] |
Sends datagrams with system monitoring information to all the destination hosts.
Definition at line 371 of file monitor_utils.cpp.
00371 { 00372 #ifndef WIN32 00373 int nParams = 0, maxNParams; 00374 int i; 00375 long crtTime; 00376 00377 int *valueTypes; 00378 char **paramNames, **paramValues; 00379 00380 crtTime = time(NULL); 00381 logger(INFO, "Sending system monitoring information..."); 00382 00383 /* make some initializations only the first time this 00384 function is called */ 00385 if (this -> sysInfo_first) { 00386 for (i = 0; i < this -> nInterfaces; i++) { 00387 this -> lastBytesSent[i] = this -> lastBytesReceived[i] = 0.0; 00388 this -> lastNetErrs[i] = 0; 00389 00390 } 00391 this -> sysInfo_first = FALSE; 00392 } 00393 00394 /* the maximum number of parameters that can be included in a datagram */ 00395 /* (the last three terms are for: parameters corresponding to each possible 00396 state of the processes, parameters corresponding to the types of open 00397 sockets, parameters corresponding to each possible state of the TCP 00398 sockets.) */ 00399 maxNParams = nSysMonitorParams + (2 * nInterfaces - 1) + 15 + 4 + 00400 N_TCP_STATES; 00401 00402 valueTypes = (int *)malloc(maxNParams * sizeof(int)); 00403 paramNames = (char **)malloc(maxNParams * sizeof(char *)); 00404 paramValues = (char **)malloc(maxNParams * sizeof(char *)); 00405 00406 for (i = 0; i < nSysMonitorParams; i++) { 00407 if (actSysMonitorParams[i] > 0) /* if the parameter is enabled */ 00408 sysRetResults[i] = RET_SUCCESS; 00409 else /* mark it with RET_ERROR so that it will be not included in the 00410 datagram */ 00411 sysRetResults[i] = RET_ERROR; 00412 } 00413 00414 updateSysInfo(); 00415 00416 for (i = 0; i < nSysMonitorParams; i++) { 00417 if (i == SYS_NET_IN || i == SYS_NET_OUT || i == SYS_NET_ERRS || 00418 i == SYS_NET_SOCKETS || i == SYS_NET_TCP_DETAILS || i == SYS_PROCESSES) 00419 continue; 00420 00421 if (sysRetResults[i] == PROCUTILS_ERROR) { 00422 /* could not read the requested information from /proc, disable this 00423 parameter */ 00424 if (autoDisableMonitoring) 00425 actSysMonitorParams[i] = 0; 00426 } else if (sysRetResults[i] != RET_ERROR) { 00427 /* the parameter is enabled and there were no errors obtaining it */ 00428 paramNames[nParams] = strdup(sysMonitorParams[i]); 00429 paramValues[nParams] = (char *)¤tSysVals[i]; 00430 valueTypes[nParams] = XDR_REAL64; 00431 nParams++; 00432 } 00433 } 00434 00435 if (actSysMonitorParams[SYS_NET_IN] == 1) { 00436 if (sysRetResults[SYS_NET_IN] == PROCUTILS_ERROR) { 00437 if (autoDisableMonitoring) 00438 actSysMonitorParams[SYS_NET_IN] = 0; 00439 } else if (sysRetResults[SYS_NET_IN] != RET_ERROR) { 00440 for (i = 0; i < nInterfaces; i++) { 00441 paramNames[nParams] = (char *)malloc(20 * sizeof(char)); 00442 strcpy(paramNames[nParams], interfaceNames[i]); 00443 strcat(paramNames[nParams], "_in"); 00444 paramValues[nParams] = (char *)¤tNetIn[i]; 00445 valueTypes[nParams] = XDR_REAL64; 00446 nParams++; 00447 } 00448 } 00449 } 00450 00451 if (actSysMonitorParams[SYS_NET_OUT] == 1) { 00452 if (sysRetResults[SYS_NET_IN] == PROCUTILS_ERROR) { 00453 if (autoDisableMonitoring) 00454 actSysMonitorParams[SYS_NET_OUT] = 0; 00455 } else if (sysRetResults[SYS_NET_OUT] != RET_ERROR) { 00456 for (i = 0; i < nInterfaces; i++) { 00457 paramNames[nParams] = (char *)malloc(20 * sizeof(char)); 00458 strcpy(paramNames[nParams], interfaceNames[i]); 00459 strcat(paramNames[nParams], "_out"); 00460 paramValues[nParams] = (char *)¤tNetOut[i]; 00461 valueTypes[nParams] = XDR_REAL64; 00462 nParams++; 00463 } 00464 } 00465 } 00466 00467 if (actSysMonitorParams[SYS_NET_ERRS] == 1) { 00468 if (sysRetResults[SYS_NET_ERRS] == PROCUTILS_ERROR) { 00469 if (autoDisableMonitoring) 00470 actSysMonitorParams[SYS_NET_ERRS] = 0; 00471 } else if (sysRetResults[SYS_NET_ERRS] != RET_ERROR) { 00472 for (i = 0; i < nInterfaces; i++) { 00473 paramNames[nParams] = (char *)malloc(20 * sizeof(char)); 00474 strcpy(paramNames[nParams], interfaceNames[i]); 00475 strcat(paramNames[nParams], "_errs"); 00476 paramValues[nParams] = (char *)¤tNetErrs[i]; 00477 valueTypes[nParams] = XDR_REAL64; 00478 nParams++; 00479 } 00480 } 00481 } 00482 00483 00484 if (actSysMonitorParams[SYS_PROCESSES] == 1) { 00485 if (sysRetResults[SYS_PROCESSES] != RET_ERROR) { 00486 char act_states[] = {'D', 'R', 'S', 'T', 'Z'}; 00487 for (i = 0; i < 5; i++) { 00488 paramNames[nParams] = (char *)malloc(20 * sizeof(char)); 00489 sprintf(paramNames[nParams], "processes_%c", act_states[i]); 00490 paramValues[nParams] = (char *)¤tProcessStates[act_states[i] - 65]; 00491 valueTypes[nParams] = XDR_REAL64; 00492 nParams++; 00493 } 00494 } 00495 } 00496 00497 if (actSysMonitorParams[SYS_NET_SOCKETS] == 1) { 00498 if (sysRetResults[SYS_NET_SOCKETS] != RET_ERROR) { 00499 const char *socket_types[] = {"tcp", "udp", "icm", "unix"}; 00500 for (i = 0; i < 4; i++) { 00501 paramNames[nParams] = (char *)malloc(30 * sizeof(char)); 00502 sprintf(paramNames[nParams], "sockets_%s", socket_types[i]); 00503 paramValues[nParams] = (char *)¤tNSockets[i]; 00504 valueTypes[nParams] = XDR_REAL64; 00505 nParams++; 00506 } 00507 } 00508 } 00509 00510 if (actSysMonitorParams[SYS_NET_TCP_DETAILS] == 1) { 00511 if (sysRetResults[SYS_NET_TCP_DETAILS] != RET_ERROR) { 00512 for (i = 0; i < N_TCP_STATES; i++) { 00513 paramNames[nParams] = (char *)malloc(30 * sizeof(char)); 00514 sprintf(paramNames[nParams], "sockets_tcp_%s", socketStatesMapTCP[i]); 00515 paramValues[nParams] = (char *)¤tSocketsTCP[i]; 00516 valueTypes[nParams] = XDR_REAL64; 00517 nParams++; 00518 } 00519 } 00520 } 00521 00522 try { 00523 if (nParams > 0) 00524 sendParameters(sysMonCluster, sysMonNode, nParams, 00525 paramNames, valueTypes, paramValues); 00526 } catch (runtime_error& err) { 00527 logger(WARNING, err.what()); 00528 } 00529 00530 this -> lastSysInfoSend = crtTime; 00531 00532 if (sysRetResults[SYS_NET_IN] == RET_SUCCESS) { 00533 free(currentNetIn); 00534 free(currentNetOut); 00535 free(currentNetErrs); 00536 } 00537 00538 for (i = 0; i < nParams; i++) 00539 free(paramNames[i]); 00540 free(paramNames); 00541 free(valueTypes); 00542 free(paramValues); 00543 #endif 00544 }
| void ApMon::updateSysInfo | ( | ) | [protected] |
Update the system monitoring information with new values obtained from the proc/ filesystem.
Definition at line 207 of file monitor_utils.cpp.
00207 { 00208 int needCPUInfo, needSwapPagesInfo, needLoadInfo, needMemInfo, 00209 needNetInfo, needUptime, needProcessesInfo, needNetstatInfo; 00210 00211 /**** CPU usage information ****/ 00212 needCPUInfo = actSysMonitorParams[SYS_CPU_USAGE] 00213 || actSysMonitorParams[SYS_CPU_USR] 00214 || actSysMonitorParams[SYS_CPU_SYS] 00215 || actSysMonitorParams[SYS_CPU_NICE] 00216 || actSysMonitorParams[SYS_CPU_IDLE]; 00217 if (needCPUInfo) { 00218 try { 00219 ProcUtils::getCPUUsage(*this, currentSysVals[SYS_CPU_USAGE], 00220 currentSysVals[SYS_CPU_USR], 00221 currentSysVals[SYS_CPU_SYS], 00222 currentSysVals[SYS_CPU_NICE], 00223 currentSysVals[SYS_CPU_IDLE], numCPUs); 00224 } catch (procutils_error &perr) { 00225 /* "permanent" error (the parameters could not be obtained) */ 00226 logger(WARNING, perr.what()); 00227 sysRetResults[SYS_CPU_USAGE] = sysRetResults[SYS_CPU_SYS] = 00228 sysRetResults[SYS_CPU_USR] = sysRetResults[SYS_CPU_NICE] = 00229 sysRetResults[SYS_CPU_IDLE] = sysRetResults[SYS_CPU_USAGE] = PROCUTILS_ERROR; 00230 } catch (runtime_error &err) { 00231 /* temporary error (next time we might be able to get the paramerers) */ 00232 logger(WARNING, err.what()); 00233 sysRetResults[SYS_CPU_USAGE] = sysRetResults[SYS_CPU_SYS] 00234 = sysRetResults[SYS_CPU_USR] 00235 = sysRetResults[SYS_CPU_NICE] 00236 = sysRetResults[SYS_CPU_IDLE] 00237 = sysRetResults[SYS_CPU_USAGE] 00238 = RET_ERROR; 00239 } 00240 } 00241 00242 needSwapPagesInfo = actSysMonitorParams[SYS_PAGES_IN] 00243 || actSysMonitorParams[SYS_PAGES_OUT] 00244 || actSysMonitorParams[SYS_SWAP_IN] 00245 || actSysMonitorParams[SYS_SWAP_OUT]; 00246 00247 if (needSwapPagesInfo) { 00248 try { 00249 ProcUtils::getSwapPages(*this, currentSysVals[SYS_PAGES_IN], 00250 currentSysVals[SYS_PAGES_OUT], 00251 currentSysVals[SYS_SWAP_IN], 00252 currentSysVals[SYS_SWAP_OUT]); 00253 } catch (procutils_error &perr) { 00254 /* "permanent" error (the parameters could not be obtained) */ 00255 logger(WARNING, perr.what()); 00256 sysRetResults[SYS_PAGES_IN] = sysRetResults[SYS_PAGES_OUT] = 00257 sysRetResults[SYS_SWAP_OUT] = sysRetResults[SYS_SWAP_IN] = PROCUTILS_ERROR; 00258 } catch (runtime_error &err) { 00259 /* temporary error (next time we might be able to get the paramerers) */ 00260 logger(WARNING, err.what()); 00261 sysRetResults[SYS_PAGES_IN] = sysRetResults[SYS_PAGES_OUT] 00262 = sysRetResults[SYS_SWAP_IN] 00263 = sysRetResults[SYS_SWAP_OUT] 00264 = RET_ERROR; 00265 } 00266 } 00267 00268 needLoadInfo = actSysMonitorParams[SYS_LOAD1] 00269 || actSysMonitorParams[SYS_LOAD5] 00270 || actSysMonitorParams[SYS_LOAD15]; 00271 00272 if (needLoadInfo) { 00273 double dummyVal; 00274 try { 00275 /* the number of processes is now obtained with the getProcesses() 00276 function, not with getLoad() */ 00277 ProcUtils::getLoad(currentSysVals[SYS_LOAD1], currentSysVals[SYS_LOAD5], 00278 currentSysVals[SYS_LOAD15],dummyVal); 00279 } catch (procutils_error& perr) { 00280 /* "permanent" error (the parameters could not be obtained) */ 00281 logger(WARNING, perr.what()); 00282 sysRetResults[SYS_LOAD1] = sysRetResults[SYS_LOAD5] 00283 = sysRetResults[SYS_LOAD15] 00284 = PROCUTILS_ERROR; 00285 } 00286 } 00287 00288 /**** get statistics about the current processes ****/ 00289 needProcessesInfo = actSysMonitorParams[SYS_PROCESSES]; 00290 if (needProcessesInfo) { 00291 try { 00292 ProcUtils::getProcesses(currentSysVals[SYS_PROCESSES], 00293 currentProcessStates); 00294 } catch (runtime_error& err) { 00295 logger(WARNING, err.what()); 00296 sysRetResults[SYS_PROCESSES] = RET_ERROR; 00297 } 00298 } 00299 00300 /**** get the amount of memory currently in use ****/ 00301 needMemInfo = actSysMonitorParams[SYS_MEM_USED] 00302 || actSysMonitorParams[SYS_MEM_FREE] 00303 || actSysMonitorParams[SYS_SWAP_USED] 00304 || actSysMonitorParams[SYS_SWAP_FREE] 00305 || actSysMonitorParams[SYS_MEM_USAGE] 00306 || actSysMonitorParams[SYS_SWAP_USAGE]; 00307 00308 if (needMemInfo) { 00309 try { 00310 ProcUtils::getMemUsed(currentSysVals[SYS_MEM_USED], 00311 currentSysVals[SYS_MEM_FREE], 00312 currentSysVals[SYS_SWAP_USED], 00313 currentSysVals[SYS_SWAP_FREE]); 00314 currentSysVals[SYS_MEM_USAGE] = 100 * currentSysVals[SYS_MEM_USED] / 00315 (currentSysVals[SYS_MEM_USED] + currentSysVals[SYS_MEM_FREE]); 00316 currentSysVals[SYS_SWAP_USAGE] = 100 * currentSysVals[SYS_SWAP_USED] / 00317 (currentSysVals[SYS_SWAP_USED] + currentSysVals[SYS_SWAP_FREE]); 00318 } catch (procutils_error &perr) { 00319 logger(WARNING, perr.what()); 00320 sysRetResults[SYS_MEM_USED] = sysRetResults[SYS_MEM_FREE] = 00321 sysRetResults[SYS_SWAP_USED] = sysRetResults[SYS_SWAP_FREE] = 00322 sysRetResults[SYS_MEM_USAGE] = sysRetResults[SYS_SWAP_USAGE] = 00323 PROCUTILS_ERROR; 00324 } 00325 } 00326 00327 00328 /**** network monitoring information ****/ 00329 needNetInfo = actSysMonitorParams[SYS_NET_IN] || 00330 actSysMonitorParams[SYS_NET_OUT] || actSysMonitorParams[SYS_NET_ERRS]; 00331 if (needNetInfo && this -> nInterfaces > 0) { 00332 try { 00333 ProcUtils::getNetInfo(*this, ¤tNetIn, ¤tNetOut, 00334 ¤tNetErrs); 00335 } catch (procutils_error &perr) { 00336 logger(WARNING, perr.what()); 00337 sysRetResults[SYS_NET_IN] = sysRetResults[SYS_NET_OUT] = 00338 sysRetResults[SYS_NET_ERRS] = PROCUTILS_ERROR; 00339 } catch (runtime_error &err) { 00340 logger(WARNING, err.what()); 00341 sysRetResults[SYS_NET_IN] = sysRetResults[SYS_NET_OUT] = 00342 sysRetResults[SYS_NET_ERRS] = RET_ERROR; 00343 } 00344 } 00345 00346 needNetstatInfo = actSysMonitorParams[SYS_NET_SOCKETS] || 00347 actSysMonitorParams[SYS_NET_TCP_DETAILS]; 00348 if (needNetstatInfo) { 00349 try { 00350 ProcUtils::getNetstatInfo(*this, this -> currentNSockets, 00351 this -> currentSocketsTCP); 00352 } catch (runtime_error &err) { 00353 logger(WARNING, err.what()); 00354 sysRetResults[SYS_NET_SOCKETS] = sysRetResults[SYS_NET_TCP_DETAILS] = 00355 RET_ERROR; 00356 } 00357 } 00358 00359 needUptime = actSysMonitorParams[SYS_UPTIME]; 00360 if (needUptime) { 00361 try { 00362 currentSysVals[SYS_UPTIME] = ProcUtils::getUpTime(); 00363 } catch (procutils_error &perr) { 00364 logger(WARNING, perr.what()); 00365 sysRetResults[SYS_UPTIME] = PROCUTILS_ERROR; 00366 } 00367 } 00368 00369 }
| void ApMon::sendGeneralInfo | ( | ) | [protected] |
Sends datagrams with general system monitoring information to all the destination hosts.
Definition at line 582 of file monitor_utils.cpp.
00582 { 00583 #ifndef WIN32 00584 int nParams, maxNParams, i; 00585 long crtTime; 00586 char tmp_s[50]; 00587 00588 char **paramNames, **paramValues; 00589 int *valueTypes; 00590 00591 crtTime = time(NULL); 00592 logger(INFO, "Sending general monitoring information..."); 00593 00594 maxNParams = nGenMonitorParams + numIPs; 00595 valueTypes = (int *)malloc(maxNParams * sizeof(int)); 00596 paramNames = (char **)malloc(maxNParams * sizeof(char *)); 00597 paramValues = (char **)malloc(maxNParams * sizeof(char *)); 00598 00599 nParams = 0; 00600 00601 updateGeneralInfo(); 00602 00603 if (actGenMonitorParams[GEN_HOSTNAME]) { 00604 paramNames[nParams] = strdup(genMonitorParams[GEN_HOSTNAME]); 00605 valueTypes[nParams] = XDR_STRING; 00606 paramValues[nParams] = myHostname; 00607 nParams++; 00608 } 00609 00610 if (actGenMonitorParams[GEN_IP]) { 00611 for (i = 0; i < this -> numIPs; i++) { 00612 strcpy(tmp_s, "ip_"); 00613 strcat(tmp_s, interfaceNames[i]); 00614 paramNames[nParams] = strdup(tmp_s); 00615 valueTypes[nParams] = XDR_STRING; 00616 paramValues[nParams] = this -> allMyIPs[i]; 00617 nParams++; 00618 } 00619 } 00620 00621 if (actGenMonitorParams[GEN_CPU_VENDOR_ID] && strlen(cpuVendor) != 0) { 00622 paramNames[nParams] = strdup(genMonitorParams[GEN_CPU_VENDOR_ID]); 00623 valueTypes[nParams] = XDR_STRING; 00624 paramValues[nParams] = cpuVendor; 00625 nParams++; 00626 } 00627 00628 if (actGenMonitorParams[GEN_CPU_FAMILY] && strlen(cpuFamily) != 0) { 00629 paramNames[nParams] = strdup(genMonitorParams[GEN_CPU_FAMILY]); 00630 valueTypes[nParams] = XDR_STRING; 00631 paramValues[nParams] = cpuFamily; 00632 nParams++; 00633 } 00634 00635 if (actGenMonitorParams[GEN_CPU_MODEL] && strlen(cpuModel) != 0) { 00636 paramNames[nParams] = strdup(genMonitorParams[GEN_CPU_MODEL]); 00637 valueTypes[nParams] = XDR_STRING; 00638 paramValues[nParams] = cpuModel; 00639 nParams++; 00640 } 00641 00642 if (actGenMonitorParams[GEN_CPU_MODEL_NAME] && strlen(cpuModelName) != 0) { 00643 paramNames[nParams] = strdup(genMonitorParams[GEN_CPU_MODEL_NAME]); 00644 valueTypes[nParams] = XDR_STRING; 00645 paramValues[nParams] = cpuModelName; 00646 nParams++; 00647 } 00648 00649 for (i = 0; i < nGenMonitorParams; i++) { 00650 if (actGenMonitorParams[i] != 1 || i == GEN_IP || i == GEN_HOSTNAME || 00651 i == GEN_CPU_VENDOR_ID || i == GEN_CPU_FAMILY || i == GEN_CPU_MODEL 00652 || i == GEN_CPU_MODEL_NAME) 00653 continue; 00654 00655 if (genRetResults[i] == PROCUTILS_ERROR) { 00656 /* could not read the requested information from /proc, disable this 00657 parameter */ 00658 if (autoDisableMonitoring) 00659 actGenMonitorParams[i] = 0; 00660 } else if (genRetResults[i] != RET_ERROR) { 00661 paramNames[nParams] = strdup(genMonitorParams[i]); 00662 paramValues[nParams] = (char *)¤tGenVals[i]; 00663 valueTypes[nParams] = XDR_REAL64; 00664 nParams++; 00665 } 00666 } 00667 00668 try { 00669 if (nParams > 0) 00670 sendParameters(sysMonCluster, sysMonNode, nParams, 00671 paramNames, valueTypes, paramValues); 00672 } catch (runtime_error& err) { 00673 logger(WARNING, err.what()); 00674 } 00675 00676 for (i = 0; i < nParams; i++) 00677 free(paramNames[i]); 00678 free(paramNames); 00679 free(valueTypes); 00680 free(paramValues); 00681 #endif 00682 }
| void ApMon::updateGeneralInfo | ( | ) | [protected] |
Update the general monitoring information.
Definition at line 546 of file monitor_utils.cpp.
00546 { 00547 00548 strcpy(cpuVendor, ""); strcpy(cpuFamily, ""); 00549 strcpy(cpuModel, ""); strcpy(cpuModelName, ""); 00550 00551 if (actGenMonitorParams[GEN_CPU_MHZ] == 1 || 00552 actGenMonitorParams[GEN_BOGOMIPS] == 1 || 00553 actGenMonitorParams[GEN_CPU_VENDOR_ID] == 1 || 00554 actGenMonitorParams[GEN_CPU_FAMILY] == 1 || 00555 actGenMonitorParams[GEN_CPU_MODEL] == 1 || 00556 actGenMonitorParams[GEN_CPU_MODEL_NAME] == 1) { 00557 try { 00558 ProcUtils::getCPUInfo(*this); 00559 } catch (procutils_error& err) { 00560 logger(WARNING, err.what()); 00561 genRetResults[GEN_CPU_MHZ] = genRetResults[GEN_BOGOMIPS] = PROCUTILS_ERROR; 00562 } 00563 } 00564 00565 if (actGenMonitorParams[GEN_TOTAL_MEM] == 1 || 00566 actGenMonitorParams[GEN_TOTAL_SWAP] == 1) { 00567 try { 00568 ProcUtils::getSysMem(currentGenVals[GEN_TOTAL_MEM], 00569 currentGenVals[GEN_TOTAL_SWAP]); 00570 } catch (procutils_error& perr) { 00571 logger(WARNING, perr.what()); 00572 genRetResults[GEN_TOTAL_MEM] = genRetResults[GEN_TOTAL_SWAP] = PROCUTILS_ERROR; 00573 } 00574 } 00575 00576 if (this -> numCPUs > 0) 00577 currentGenVals[GEN_NO_CPUS] = this -> numCPUs; 00578 else 00579 genRetResults[GEN_NO_CPUS] = PROCUTILS_ERROR; 00580 }
| void ApMon::setBackgroundThread | ( | bool | val | ) | [protected] |
Sets the value of the confCheck flag.
If it is true, the configuration file and/or the URLs will be periodically checked for modifications. By default it is false.
Definition at line 1245 of file ApMon.cpp.
01245 { 01246 // mutexBack is locked 01247 if (val == true) { 01248 if (!haveBkThread) { 01249 #ifndef WIN32 01250 pthread_create(&bkThread, NULL, &bkTask, this); 01251 #else 01252 DWORD dummy; 01253 bkThread = CreateThread(NULL, 65536, &bkTask, this, 0, &dummy); 01254 #endif 01255 haveBkThread = true; 01256 } else { 01257 pthread_mutex_lock(&mutexCond); 01258 pthread_cond_signal(&confChangedCond); 01259 pthread_mutex_unlock(&mutexCond); 01260 } 01261 } 01262 if (val == false) { 01263 //if (bkThreadStarted) { 01264 if (haveBkThread) { 01265 stopBkThread = true; 01266 pthread_mutex_unlock(&mutexBack); 01267 #ifndef WIN32 01268 pthread_mutex_lock(&mutexCond); 01269 #endif 01270 pthread_cond_signal(&confChangedCond); 01271 logger(INFO, "[Stopping the background thread...]"); 01272 #ifndef WIN32 01273 pthread_mutex_unlock(&mutexCond); 01274 pthread_join(bkThread, NULL); 01275 #else 01276 WaitForSingleObject(bkThread, INFINITE); 01277 #endif 01278 pthread_mutex_lock(&mutexBack); 01279 // logger(INFO, "bk thread stopped!"); 01280 haveBkThread = false; 01281 bkThreadStarted = false; 01282 stopBkThread = false; 01283 } 01284 } 01285 }
| long ApMon::getCrtRecheckInterval | ( | ) | [inline, protected] |
Returns the actual value of the time interval (in seconds) between two recheck operations for the configuration files.
Definition at line 956 of file ApMon.h.
00956 { 00957 return crtRecheckInterval; 00958 }
| void ApMon::setCrtRecheckInterval | ( | long | val | ) | [protected] |
Definition at line 1164 of file ApMon.cpp.
01164 { 01165 pthread_mutex_lock(&mutexBack); 01166 crtRecheckInterval = val; 01167 pthread_mutex_unlock(&mutexBack); 01168 }
| void ApMon::freeConf | ( | ) | [protected] |
Frees the data structures needed to hold the configuratin settings.
Definition at line 604 of file ApMon.cpp.
00604 { 00605 int i; 00606 freeMat(destAddresses, nDestinations); 00607 freeMat(destPasswds, nDestinations); 00608 free(destPorts); 00609 00610 for (i = 0; i < confURLs.nConfURLs; i++) { 00611 free(confURLs.vURLs[i]); 00612 free(confURLs.lastModifURLs[i]); 00613 } 00614 }
| void ApMon::parseXApMonLine | ( | char * | line | ) | [protected] |
Parses an xApMon line from the configuration file and sets the corresponding parameters in the ApMon object.
Definition at line 773 of file monitor_utils.cpp.
00773 { 00774 bool flag, found; 00775 int ind; 00776 char tmp[MAX_STRING_LEN], logmsg[200]; 00777 char *param, *value; 00778 // char sbuf[MAX_STRING_LEN]; 00779 // char *pbuf = sbuf; 00780 const char *sep = " ="; 00781 00782 strcpy(tmp, line); 00783 char *tmp2 = tmp + strlen("xApMon_"); 00784 00785 param = strtok/*_r*/(tmp2, sep);//, &pbuf); 00786 value = strtok/*_r*/(NULL, sep);//, &pbuf); 00787 00788 /* if it is an on/off parameter, assign its value to flag */ 00789 if (strcmp(value, "on") == 0) 00790 flag = true; 00791 else /* if it is not an on/off paramenter the value of flag doesn't matter */ 00792 flag = false; 00793 00794 pthread_mutex_lock(&mutexBack); 00795 00796 found = false; 00797 if (strcmp(param, "job_monitoring") == 0) { 00798 this -> jobMonitoring = flag; found = true; 00799 } 00800 if (strcmp(param, "sys_monitoring") == 0) { 00801 this -> sysMonitoring = flag; found = true; 00802 } 00803 if (strcmp(param, "job_interval") == 0) { 00804 this -> jobMonitorInterval = atol(value); found = true; 00805 } 00806 if (strcmp(param, "sys_interval") == 0) { 00807 this -> sysMonitorInterval = atol(value); found = true; 00808 } 00809 if (strcmp(param, "general_info") == 0) { 00810 this -> genMonitoring = flag; found = true; 00811 } 00812 if (strcmp(param, "conf_recheck") == 0) { 00813 this -> confCheck = flag; found = true; 00814 } 00815 if (strcmp(param, "recheck_interval") == 0) { 00816 this -> recheckInterval = this -> crtRecheckInterval = atol(value); 00817 found = true; 00818 } 00819 if (strcmp(param, "auto_disable") == 0) { 00820 this -> autoDisableMonitoring = flag; 00821 found = true; 00822 } 00823 if (strcmp(param, "maxMsgRate") == 0) { 00824 this -> maxMsgRate = atoi(value); 00825 found = true; 00826 } 00827 00828 if (found) { 00829 pthread_mutex_unlock(&mutexBack); 00830 return; 00831 } 00832 00833 if (strstr(param, "sys_") == param) { 00834 ind = getVectIndex(param + strlen("sys_"), sysMonitorParams, 00835 nSysMonitorParams); 00836 if (ind < 0) { 00837 pthread_mutex_unlock(&mutexBack); 00838 sprintf(logmsg, "Invalid parameter name in the configuration file: %s", 00839 param); 00840 logger(WARNING, logmsg); 00841 return; 00842 } 00843 found = true; 00844 this -> actSysMonitorParams[ind] = (int)flag; 00845 } 00846 00847 if (strstr(param, "job_") == param) { 00848 ind = getVectIndex(param + strlen("job_"), jobMonitorParams, 00849 nJobMonitorParams); 00850 00851 if (ind < 0) { 00852 pthread_mutex_unlock(&mutexBack); 00853 sprintf(logmsg, "Invalid parameter name in the configuration file: %s", 00854 param); 00855 logger(WARNING, logmsg); 00856 return; 00857 } 00858 found = true; 00859 this -> actJobMonitorParams[ind] = (int)flag; 00860 } 00861 00862 if (!found) { 00863 ind = getVectIndex(param, genMonitorParams, 00864 nGenMonitorParams); 00865 if (ind < 0) { 00866 pthread_mutex_unlock(&mutexBack); 00867 sprintf(logmsg, "Invalid parameter name in the configuration file: %s", 00868 param); 00869 logger(WARNING, logmsg); 00870 return; 00871 } else { 00872 found = true; 00873 this -> actGenMonitorParams[ind] = (int)flag; 00874 } 00875 } 00876 00877 if (!found) { 00878 sprintf(logmsg, "Invalid parameter name in the configuration file: %s", 00879 param); 00880 logger(WARNING, logmsg); 00881 } 00882 pthread_mutex_unlock(&mutexBack); 00883 }
| void ApMon::initSocket | ( | ) | throw (runtime_error) [protected] |
Initializes the UDP socket used to send the datagrams.
Definition at line 1361 of file ApMon.cpp.
01361 { 01362 int optval1 = 1; 01363 struct timeval optval2; 01364 int ret1, ret2, ret3; 01365 01366 sockfd = socket(AF_INET, SOCK_DGRAM, 0); 01367 if (sockfd < 0) 01368 throw runtime_error("[ initSocket() ] Error creating socket"); 01369 ret1 = setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, (char *) &optval1, 01370 sizeof(optval1)); 01371 01372 /* set connection timeout */ 01373 optval2.tv_sec = 20; 01374 optval2.tv_usec = 0; 01375 ret2 = setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, (char *) &optval2, 01376 sizeof(optval2)); 01377 ret3 = setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, (char *) &optval2, 01378 sizeof(optval2)); 01379 if (ret1 != 0 || ret2 != 0 || ret3 != 0) 01380 throw runtime_error("[ initSocket() ] Error initializing socket."); 01381 }
| void ApMon::parseConf | ( | FILE * | fp, | |
| int * | nDestinations, | |||
| char ** | destAddresses, | |||
| int * | destPorts, | |||
| char ** | destPasswds | |||
| ) | throw (runtime_error) [protected] |
Parses the contents of a configuration file.
The destination addresses and ports are stored in the arrays given as parameters.
Definition at line 1384 of file ApMon.cpp.
01386 { 01387 int i, ch; 01388 char *line = (char *)malloc ((MAX_STRING_LEN1) * sizeof(char)); 01389 char *tmp = NULL; 01390 char *loglevel_s; 01391 // char sbuf[30]; 01392 // char *pbuf = sbuf; 01393 01394 /* parse the input file */ 01395 while(fgets(line, MAX_STRING_LEN, fp) != NULL) { 01396 01397 if (tmp != NULL) { 01398 free(tmp); 01399 tmp = NULL; 01400 } 01401 01402 line[MAX_STRING_LEN - 1] = 0; 01403 /* check if the line was too long */ 01404 ch = fgetc(fp); // see if we are at the end of the file 01405 ungetc(ch, fp); 01406 if (line[strlen(line) - 1] != 10 && ch != EOF) { 01407 /* if the line doesn't end with a \n and we are not at the end 01408 of file, the line from the file was longer than MAX_STRING_LEN */ 01409 fclose(fp); 01410 throw runtime_error ("[ parseConf() ] Maximum line length exceeded in the conf file"); 01411 } 01412 01413 tmp = trimString(line); 01414 01415 /* skip the blank lines and the comment lines */ 01416 if (strlen(tmp) == 0 || strchr(tmp, '#') == tmp) 01417 continue; 01418 01419 if (strstr(tmp, "xApMon_loglevel") == tmp) { 01420 char *tmp2 = tmp; 01421 strtok/*_r*/(tmp2, "= ");//, &pbuf); 01422 loglevel_s = strtok/*_r*/(NULL, "= ");//, &pbuf); 01423 setLogLevel(loglevel_s); 01424 continue; 01425 } 01426 01427 if (strstr(tmp, "xApMon_") == tmp) { 01428 parseXApMonLine(tmp); 01429 continue; 01430 } 01431 01432 if (*nDestinations >= MAX_N_DESTINATIONS) { 01433 free(line); free(tmp); 01434 for (i = 0; i < *nDestinations; i++) { 01435 free(destAddresses[i]); 01436 free(destPasswds[i]); 01437 } 01438 fclose(fp); 01439 throw runtime_error("[ parseConf() ] Maximum number of destinations exceeded."); 01440 } 01441 01442 addToDestinations(tmp, nDestinations, destAddresses, destPorts, 01443 destPasswds); 01444 } 01445 01446 if (tmp != NULL) 01447 free(tmp); 01448 free(line); 01449 }
| bool ApMon::shouldSend | ( | ) | [protected] |
Decides if the current datagram should be sent (so that the maximum number of datagrams per second is respected in average).
This decision is based on the number of messages previously sent.
new time, update previous counters;
reset current counter
compute the history
when we should start dropping messages
counting sent and dropped messages
Definition at line 1451 of file ApMon.cpp.
01451 { 01452 01453 long now = time(NULL); 01454 bool doSend; 01455 char msg[200]; 01456 01457 //printf("now %ld crtTime %ld\n", now, crtTime); 01458 01459 if (now != crtTime){ 01461 prvSent = hWeight * prvSent + (1.0 - hWeight) * crtSent / (now - crtTime); 01462 prvTime = crtTime; 01463 sprintf(msg, "previously sent: %ld dropped: %ld", crtSent, crtDrop); 01464 logger(DEBUG, msg); 01466 crtTime = now; 01467 crtSent = 0; 01468 crtDrop = 0; 01469 //printf("\n"); 01470 } 01471 01473 int valSent = (int)(prvSent * hWeight + crtSent * (1.0 - hWeight)); 01474 01475 doSend = true; 01477 int level = this -> maxMsgRate - this -> maxMsgRate / 10; 01478 01479 01480 if (valSent > (this -> maxMsgRate - level)) { 01481 //int max10 = this -> maxMsgRate / 10; 01482 int rnd = rand() % (this -> maxMsgRate / 10); 01483 doSend = (rnd < (this -> maxMsgRate - valSent)); 01484 } 01486 if (doSend) { 01487 crtSent++; 01488 //printf("#"); 01489 } else { 01490 crtDrop++; 01491 //printf("."); 01492 } 01493 01494 return doSend; 01495 }
| void* bkTask | ( | void * | param | ) | [friend] |
This function is executed in a background thread and has two roles: it automatically sends the system/job monitoring parameters (if the user requested) and it checks the configuration file/URLs for changes.
(this is done in a separate thread).
Definition at line 913 of file ApMon.cpp.
00913 { 00914 #else 00915 DWORD WINAPI bkTask(void *param) { 00916 #endif 00917 struct stat st; 00918 #ifndef WIN32 00919 struct timespec delay; 00920 #else 00921 DWORD delay; 00922 #endif 00923 bool resourceChanged, haveChange; 00924 int nextOp = -1, i, ret; 00925 int generalInfoCount; 00926 time_t crtTime, timeRemained; 00927 time_t nextRecheck = 0, nextJobInfoSend = 0, nextSysInfoSend = 0; 00928 ApMon *apm = (ApMon *)param; 00929 char logmsg[200]; 00930 00931 logger(INFO, "[Starting background thread...]"); 00932 apm -> bkThreadStarted = true; 00933 00934 crtTime = time(NULL); 00935 00936 pthread_mutex_lock(&(apm -> mutexBack)); 00937 if (apm -> confCheck) { 00938 nextRecheck = crtTime + apm -> crtRecheckInterval; 00939 //sprintf(logmsg, "###1 crt %ld interv %ld recheck %ld ", crtTime, 00940 // apm -> crtRecheckInterval, nextRecheck); 00941 //logger(FINE, logmsg); 00942 //fflush(stdout); 00943 } 00944 if (apm -> jobMonitoring) 00945 nextJobInfoSend = crtTime + apm -> jobMonitorInterval; 00946 if (apm -> sysMonitoring) 00947 nextSysInfoSend = crtTime + apm -> sysMonitorInterval; 00948 pthread_mutex_unlock(&(apm -> mutexBack)); 00949 00950 timeRemained = -1; 00951 generalInfoCount = 0; 00952 00953 while (1) { 00954 pthread_mutex_lock(&apm -> mutexBack); 00955 if (apm -> stopBkThread) { 00956 // printf("### stopBkThread \n"); 00957 pthread_mutex_unlock(&apm -> mutexBack); 00958 break; 00959 } 00960 pthread_mutex_unlock(&apm -> mutexBack); 00961 00962 //sprintf(logmsg, "### 2 recheck %ld sys %ld ", nextRecheck, 00963 // nextSysInfoSend); 00964 //logger(FINE, logmsg); 00965 00966 /* determine the next operation that must be performed */ 00967 if (nextRecheck > 0 && (nextJobInfoSend <= 0 || 00968 nextRecheck <= nextJobInfoSend)) { 00969 if (nextSysInfoSend <= 0 || nextRecheck <= nextSysInfoSend) { 00970 nextOp = RECHECK_CONF; 00971 timeRemained = nextRecheck - crtTime; 00972 } else { 00973 nextOp = SYS_INFO_SEND; 00974 timeRemained = nextSysInfoSend - crtTime; 00975 } 00976 } else { 00977 if (nextJobInfoSend > 0 && (nextSysInfoSend <= 0 || 00978 nextJobInfoSend <= nextSysInfoSend)) { 00979 nextOp = JOB_INFO_SEND; 00980 timeRemained = nextJobInfoSend - crtTime; 00981 } else if (nextSysInfoSend > 0) { 00982 nextOp = SYS_INFO_SEND; 00983 timeRemained = nextSysInfoSend - crtTime; 00984 } 00985 } 00986 00987 if (timeRemained == -1) 00988 timeRemained = RECHECK_INTERVAL; 00989 00990 #ifndef WIN32 00991 /* the moment when the next operation should be performed */ 00992 delay.tv_sec = crtTime + timeRemained; 00993 delay.tv_nsec = 0; 00994 #else 00995 delay = (/*crtTime +*/ timeRemained) * 1000; // this is in millis 00996 #endif 00997 00998 pthread_mutex_lock(&(apm -> mutexBack)); 00999 01000 pthread_mutex_lock(&(apm -> mutexCond)); 01001 /* check for changes in the settings */ 01002 haveChange = false; 01003 if (apm -> jobMonChanged || apm -> sysMonChanged || apm -> recheckChanged) 01004 haveChange = true; 01005 if (apm -> jobMonChanged) { 01006 if (apm -> jobMonitoring) 01007 nextJobInfoSend = crtTime + apm -> jobMonitorInterval; 01008 else 01009 nextJobInfoSend = -1; 01010 apm -> jobMonChanged = false; 01011 } 01012 if (apm -> sysMonChanged) { 01013 if (apm -> sysMonitoring) 01014 nextSysInfoSend = crtTime + apm -> sysMonitorInterval; 01015 else 01016 nextSysInfoSend = -1; 01017 apm -> sysMonChanged = false; 01018 } 01019 if (apm -> recheckChanged) { 01020 if (apm -> confCheck) { 01021 nextRecheck = crtTime + apm -> crtRecheckInterval; 01022 } 01023 else 01024 nextRecheck = -1; 01025 apm -> recheckChanged = false; 01026 } 01027 pthread_mutex_unlock(&(apm -> mutexBack)); 01028 01029 if (haveChange) { 01030 pthread_mutex_unlock(&(apm -> mutexCond)); 01031 continue; 01032 } 01033 01034 /* wait until the next operation should be performed or until 01035 a change in the settings occurs */ 01036 #ifndef WIN32 01037 ret = pthread_cond_timedwait(&(apm -> confChangedCond), 01038 &(apm -> mutexCond), &delay); 01039 pthread_mutex_unlock(&(apm -> mutexCond)); 01040 #else 01041 pthread_mutex_unlock(&(apm -> mutexCond)); 01042 ret = WaitForSingleObject(apm->confChangedCond, delay); 01043 #endif 01044 if (ret == ETIMEDOUT) { 01045 // printf("### ret TIMEDOUT\n"); 01046 /* now perform the operation */ 01047 if (nextOp == JOB_INFO_SEND) { 01048 apm -> sendJobInfo(); 01049 crtTime = time(NULL); 01050 nextJobInfoSend = crtTime + apm -> getJobMonitorInterval(); 01051 } 01052 01053 if (nextOp == SYS_INFO_SEND) { 01054 apm -> sendSysInfo(); 01055 if (apm -> getGenMonitoring()) { 01056 if (generalInfoCount <= 1) 01057 apm -> sendGeneralInfo(); 01058 generalInfoCount = (generalInfoCount + 1) % apm -> genMonitorIntervals; 01059 } 01060 crtTime = time(NULL); 01061 nextSysInfoSend = crtTime + apm -> getSysMonitorInterval(); 01062 } 01063 01064 if (nextOp == RECHECK_CONF) { 01065 //logger(FINE, "### recheck conf"); 01066 resourceChanged = false; 01067 try { 01068 if (apm -> initType == FILE_INIT) { 01069 sprintf(logmsg, "Checking for modifications for file %s ", 01070 apm -> initSources[0]); 01071 logger(INFO, logmsg); 01072 stat(apm -> initSources[0], &st); 01073 if (st.st_mtime > apm -> lastModifFile) { 01074 sprintf(logmsg, "File %s modified ", apm -> initSources[0]); 01075 logger(INFO, logmsg); 01076 resourceChanged = true; 01077 } 01078 } 01079 01080 // check the configuration URLs 01081 for (i = 0; i < apm -> confURLs.nConfURLs; i++) { 01082 sprintf(logmsg, "[Checking for modifications for URL %s ] ", 01083 apm -> confURLs.vURLs[i]); 01084 logger(INFO, logmsg); 01085 if (urlModified(apm -> confURLs.vURLs[i], apm -> confURLs.lastModifURLs[i])) { 01086 sprintf(logmsg, "URL %s modified ", apm -> confURLs.vURLs[i]); 01087 logger(INFO, logmsg); 01088 resourceChanged = true; 01089 break; 01090 } 01091 } 01092 01093 if (resourceChanged) { 01094 logger(INFO, "Reloading configuration..."); 01095 if (apm -> initType == FILE_INIT) 01096 apm -> initialize(apm -> initSources[0], false); 01097 else 01098 apm -> initialize(apm -> nInitSources, apm -> initSources, false); 01099 } 01100 apm -> setCrtRecheckInterval(apm -> getRecheckInterval()); 01101 } catch (runtime_error &err) { 01102 logger(WARNING, err.what()); 01103 logger(WARNING, "Increasing the time interval for reloading the configuration..."); 01104 apm -> setCrtRecheckInterval(apm -> getRecheckInterval() * 5); 01105 } 01106 crtTime = time(NULL); 01107 nextRecheck = crtTime + apm -> getCrtRecheckInterval(); 01108 //sleep(apm -> getCrtRecheckInterval()); 01109 } 01110 } 01111 01112 } // while 01113 01114 #ifndef WIN32 01115 return NULL; // it doesn't matter what we return here 01116 #else 01117 return 0; 01118 #endif 01119 }
char* ApMon::clusterName [protected] |
char* ApMon::nodeName [protected] |
char* ApMon::sysMonCluster [protected] |
char* ApMon::sysMonNode [protected] |
int ApMon::nDestinations [protected] |
char** ApMon::destAddresses [protected] |
int* ApMon::destPorts [protected] |
char** ApMon::destPasswds [protected] |
char* ApMon::buf [protected] |
int ApMon::dgramSize [protected] |
int ApMon::sockfd [protected] |
bool ApMon::confCheck [protected] |
int ApMon::nInitSources [protected] |
char** ApMon::initSources [protected] |
int ApMon::initType [protected] |
long ApMon::recheckInterval [protected] |
long ApMon::crtRecheckInterval [protected] |
pthread_t ApMon::bkThread [protected] |
pthread_mutex_t ApMon::mutex [protected] |
pthread_mutex_t ApMon::mutexBack [protected] |
pthread_mutex_t ApMon::mutexCond [protected] |
pthread_cond_t ApMon::confChangedCond [protected] |
bool ApMon::recheckChanged [protected] |
bool ApMon::jobMonChanged [protected] |
bool ApMon::sysMonChanged [protected] |
bool ApMon::haveBkThread [protected] |
bool ApMon::bkThreadStarted [protected] |
bool ApMon::stopBkThread [protected] |
bool ApMon::autoDisableMonitoring [protected] |
bool ApMon::sysMonitoring [protected] |
bool ApMon::jobMonitoring [protected] |
bool ApMon::genMonitoring [protected] |
long ApMon::jobMonitorInterval [protected] |
long ApMon::sysMonitorInterval [protected] |
int ApMon::genMonitorIntervals [protected] |
int ApMon::nSysMonitorParams [protected] |
int ApMon::nJobMonitorParams [protected] |
int ApMon::nGenMonitorParams [protected] |
char* ApMon::sysMonitorParams[MAX_SYS_PARAMS] [protected] |
char* ApMon::genMonitorParams[MAX_GEN_PARAMS] [protected] |
char* ApMon::jobMonitorParams[MAX_JOB_PARAMS] [protected] |
int ApMon::actSysMonitorParams[MAX_SYS_PARAMS] [protected] |
int ApMon::actGenMonitorParams[MAX_GEN_PARAMS] [protected] |
int ApMon::actJobMonitorParams[MAX_JOB_PARAMS] [protected] |
ConfURLs ApMon::confURLs [protected] |
int ApMon::nMonJobs [protected] |
MonitoredJob* ApMon::monJobs [protected] |
long ApMon::lastModifFile [protected] |
time_t ApMon::lastJobInfoSend [protected] |
char ApMon::username[MAX_STRING_LEN] [protected] |
char ApMon::groupname[MAX_STRING_LEN] [protected] |
char ApMon::myHostname[MAX_STRING_LEN] [protected] |
char ApMon::myIP[MAX_STRING_LEN] [protected] |
int ApMon::numIPs [protected] |
char ApMon::allMyIPs[20][20] [protected] |
int ApMon::numCPUs [protected] |
bool ApMon::sysInfo_first [protected] |
time_t ApMon::lastSysInfoSend [protected] |
double ApMon::lastSysVals[MAX_SYS_PARAMS] [protected] |
double ApMon::currentSysVals[MAX_SYS_PARAMS] [protected] |
int ApMon::sysRetResults[MAX_SYS_PARAMS] [protected] |
double ApMon::currentJobVals[MAX_JOB_PARAMS] [protected] |
int ApMon::jobRetResults[MAX_JOB_PARAMS] [protected] |
double ApMon::currentGenVals[MAX_GEN_PARAMS] [protected] |
int ApMon::genRetResults[MAX_GEN_PARAMS] [protected] |
double ApMon::currentProcessStates[NLETTERS] [protected] |
char ApMon::cpuVendor[100] [protected] |
char ApMon::cpuFamily[100] [protected] |
char ApMon::cpuModel[100] [protected] |
char ApMon::cpuModelName[200] [protected] |
char ApMon::interfaceNames[20][20] [protected] |
int ApMon::nInterfaces [protected] |
double ApMon::lastBytesSent[20] [protected] |
double ApMon::lastBytesReceived[20] [protected] |
double ApMon::lastNetErrs[20] [protected] |
double* ApMon::currentNetIn [protected] |
double * ApMon::currentNetOut [protected] |
double * ApMon::currentNetErrs [protected] |
double ApMon::currentNSockets[4] [protected] |
double ApMon::currentSocketsTCP[20] [protected] |
char* ApMon::socketStatesMapTCP[20] [protected] |
int ApMon::maxMsgRate [protected] |
long ApMon::prvTime [protected] |
double ApMon::prvSent [protected] |
double ApMon::prvDrop [protected] |
long ApMon::crtTime [protected] |
long ApMon::crtSent [protected] |
long ApMon::crtDrop [protected] |
double ApMon::hWeight [protected] |
int ApMon::instance_id [protected] |
int ApMon::seq_nr [protected] |