|
Gaudi Framework, version v22r0 |
| Home | Generated: 9 Feb 2011 |
Data structure used for sending monitoring data to a MonaLisa module. More...
#include <ApMon.h>

Public Member Functions | |
| ApMon (char *initsource) throw (runtime_error) | |
| Initializes an ApMon object from a configuration file or URL. | |
| ApMon (int nDestinations, char **destinationsList) throw (runtime_error) | |
| Initializes an ApMon data structure from a vector of strings. | |
| ApMon (int nDestinations, char **destAddresses, int *destPorts, char **destPasswds) throw (runtime_error) | |
| Initializes an ApMon data structure, using arrays instead of a file. | |
| ~ApMon () | |
| ApMon destructor. | |
| int | sendParameter (char *clusterName, char *nodeName, char *paramName, int valueType, char *paramValue) throw (runtime_error) |
| Sends a parameter and its value to the MonALISA module. | |
| int | sendTimedParameter (char *clusterName, char *nodeName, char *paramName, int valueType, char *paramValue, int timestamp) throw (runtime_error) |
| Sends a parameter and its value to the MonALISA module, together with a timestamp. | |
| int | sendParameter (char *clusterName, char *nodeName, char *paramName, int paramValue) throw (runtime_error) |
| Sends an integer parameter and its value to the MonALISA module. | |
| int | sendParameter (char *clusterName, char *nodeName, char *paramName, float paramValue) throw (runtime_error) |
| Sends a parameter of type float and its value to the MonALISA module. | |
| int | sendParameter (char *clusterName, char *nodeName, char *paramName, double paramValue) throw (runtime_error) |
| Sends a parameter of type double and its value to the MonALISA module. | |
| int | sendParameter (char *clusterName, char *nodeName, char *paramName, char *paramValue) throw (runtime_error) |
| Sends a parameter of type string and its value to the MonALISA module. | |
| int | sendParameters (char *clusterName, char *nodeName, int nParams, char **paramNames, int *valueTypes, char **paramValues) throw (runtime_error) |
| Sends a parameter of type string and its value to the MonALISA module. | |
| int | sendTimedParameters (char *clusterName, char *nodeName, int nParams, char **paramNames, int *valueTypes, char **paramValues, int timestamp) throw (runtime_error) |
| Sends a set of parameters and their values to the MonALISA module, together with a timestamp. | |
| bool | getConfCheck () |
| Returns the value of the confCheck flag. | |
| long | getRecheckInterval () |
| Returns the value of the time interval (in seconds) between two recheck operations for the configuration files. | |
| void | setRecheckInterval (long val) |
| Sets the value of the time interval (in seconds) between two recheck operations for the configuration files. | |
| void | setConfRecheck (bool confRecheck, long interval) |
| Enables/disables the periodical check for changes in the configuration files/URLs. | |
| void | setConfRecheck (bool confRecheck) |
| Enables/disables the periodical check for changes in the configuration files/URLs. | |
| void | setJobMonitoring (bool jobMonitoring, long interval) |
| Enables/disables the periodical sending of datagrams with job monitoring information. | |
| void | setJobMonitoring (bool jobMonitoring) |
| Enables/disables the job monitoring. | |
| long | getJobMonitorInterval () |
| Returns the interval at which job monitoring datagrams are sent. | |
| bool | getJobMonitoring () |
| Returns true if the job monitoring is enabled, and false otherwise. | |
| void | setSysMonitoring (bool sysMonitoring, long interval) |
| Enables/disables the periodical sending of datagrams with system monitoring information. | |
| void | setSysMonitoring (bool sysMonitoring) |
| Enables/disables the system monitoring. | |
| long | getSysMonitorInterval () |
| Returns the interval at which system monitoring datagrams are sent. | |
| bool | getSysMonitoring () |
| Returns true if the system monitoring is enabled, and false otherwise. | |
| void | setGenMonitoring (bool genMonitoring, int nIntervals) |
| Enables/disables the periodical sending of datagrams with general system information. | |
| void | setGenMonitoring (bool genMonitoring) |
| Enables/disables the sending of datagrams with general system information. | |
| bool | getGenMonitoring () |
| Returns true if the sending of general system information is enabled and false otherwise. | |
| void | addJobToMonitor (long pid, char *workdir, char *clusterName, char *nodeName) throw (runtime_error) |
| Adds a new job to the list of the jobs monitored by ApMon. | |
| void | removeJobToMonitor (long pid) throw (runtime_error) |
| Removes a job from the list of the jobs monitored by ApMon. | |
| void | setSysMonClusterNode (char *clusterName, char *nodeName) |
| This function is called by the user to set the cluster name and the node name for the system monitoring datagrams. | |
| void | setMaxMsgRate (int maxRate) |
| This sets the maxim number of messages that are send to MonALISA in one second. | |
Static Public Member Functions | |
| static void | setLogLevel (char *newLevel_s) |
| Sets the ApMon logging level. | |
| static void | errExit (char *msg) |
| Displays an error message and exits with -1 as return value. | |
Protected Member Functions | |
| void | initialize (char *filename, bool firstTime) throw (runtime_error) |
| Initializes an ApMon object from a configuration file. | |
| void | constructFromList (int nDestinations, char **destinationsList) throw (runtime_error) |
| Initializes an ApMon object from a list with URLs and destination hosts. | |
| void | initialize (int nDestinations, char **destList, bool firstTime) throw (runtime_error) |
| Initializes an ApMon object from a list with URLs and destination hosts. | |
| void | loadFile (char *filename, int *nDestinations, char **destAddresses, int *destPorts, char **destPasswds) throw (runtime_error) |
| Parses a configuration file which contains addresses, ports and passwords for the destination hosts and puts the results in the vectors given as parameters. | |
| void | arrayInit (int nDestinations, char **destAddresses, int *destPorts, char **destPasswds) throw (runtime_error) |
| Internal function that initializes an ApMon data structure. | |
| void | arrayInit (int nDestinations, char **destAddresses, int *destPorts, char **destPasswds, bool firstTime) throw (runtime_error) |
| Internal function that initializes an ApMon data structure. | |
| void | addToDestinations (char *line, int *nDestinations, char *destAddresses[], int destPorts[], char *destPasswds[]) |
| Parses the string line, which has the form hostname:port, and adds the hostname and the port to the lists given as parameters. | |
| void | getDestFromWeb (char *url, int *nDestinations, char *destAddresses[], int destPorts[], char *destPasswds[], ConfURLs &confURLs) throw (runtime_error) |
| Gets a configuration file from a web location and adds the destination addresses and ports to the lists given as parameters. | |
| void | encodeParams (int nParams, char **paramNames, int *valueTypes, char **paramValues, int timestamp) throw (runtime_error) |
| Encodes in the XDR format the data from a ApMon structure. | |
| void | initMonitoring () |
| Initializes the monitoring configurations and the names of the parameters included in the monitoring datagrams. | |
| void | sendJobInfo () |
| Sends datagrams containing information about the jobs that are currently being monitored. | |
| void | sendOneJobInfo (MonitoredJob job) |
| Sends datagrams with monitoring information about the specified job to all the destination hosts. | |
| void | updateJobInfo (MonitoredJob job) |
| Update the monitoring information regarding the specified job. | |
| void | sendSysInfo () |
| Sends datagrams with system monitoring information to all the destination hosts. | |
| void | updateSysInfo () |
| Update the system monitoring information with new values obtained from the proc/ filesystem. | |
| void | sendGeneralInfo () |
| Sends datagrams with general system monitoring information to all the destination hosts. | |
| void | updateGeneralInfo () |
| Update the general monitoring information. | |
| void | setBackgroundThread (bool val) |
| Sets the value of the confCheck flag. | |
| long | getCrtRecheckInterval () |
| Returns the actual value of the time interval (in seconds) between two recheck operations for the configuration files. | |
| void | setCrtRecheckInterval (long val) |
| void | freeConf () |
| Frees the data structures needed to hold the configuratin settings. | |
| void | parseXApMonLine (char *line) |
| Parses an xApMon line from the configuration file and sets the corresponding parameters in the ApMon object. | |
| void | initSocket () throw (runtime_error) |
| Initializes the UDP socket used to send the datagrams. | |
| void | parseConf (FILE *fp, int *nDestinations, char **destAddresses, int *destPorts, char **destPasswds) throw (runtime_error) |
| Parses the contents of a configuration file. | |
| bool | shouldSend () |
| Decides if the current datagram should be sent (so that the maximum number of datagrams per second is respected in average). | |
Protected Attributes | |
| char * | clusterName |
| The name of the monitored cluster. | |
| char * | nodeName |
| The name of the monitored node. | |
| char * | sysMonCluster |
| The cluster name used when sending system monitoring datagrams. | |
| char * | sysMonNode |
| The node name used when sending system monitoring datagrams. | |
| int | nDestinations |
| The number of destinations to send the results to. | |
| char ** | destAddresses |
| The IP addresses where the results will be sent. | |
| int * | destPorts |
| The ports where the destination hosts listen. | |
| char ** | destPasswds |
| Passwords for the MonALISA hosts. | |
| char * | buf |
| The buffer which holds the message data (encoded in XDR). | |
| int | dgramSize |
| The size of the data inside the datagram (header not included). | |
| int | sockfd |
| Socket descriptor. | |
| bool | confCheck |
| If this flag is true, the configuration file / URLs are periodically rechecked for changes. | |
| int | nInitSources |
| The number of initialization sources. | |
| char ** | initSources |
| The name(s) of the initialization source(s) (file or list). | |
| int | initType |
| long | recheckInterval |
| The configuration file and the URLs are checked for changes at this numer of seconds (this value is requested by the user and will be used if no errors appear when reloading the configuration). | |
| long | crtRecheckInterval |
| If the configuration URLs cannot be reloaded, the interval until the next attempt will be increased. | |
| pthread_t | bkThread |
| Background thread which periodically rechecks the configuration and sends monitoring information. | |
| pthread_mutex_t | mutex |
| Used to protect the general ApMon data structures. | |
| pthread_mutex_t | mutexBack |
| Used to protect the variables needed by the background thread. | |
| pthread_mutex_t | mutexCond |
| Used for the condition variable confChangedCond. | |
| pthread_cond_t | confChangedCond |
| Used to notify changes in the monitoring configuration. | |
| bool | recheckChanged |
| These flags indicate changes in the monitoring configuration. | |
| bool | jobMonChanged |
| bool | sysMonChanged |
| bool | haveBkThread |
| If this flag is true, the background thread is created (but not necessarily started). | |
| bool | bkThreadStarted |
| If this flag is true, the background thread is started. | |
| bool | stopBkThread |
| If this flag is true, there was a request to stop the background thread. | |
| bool | autoDisableMonitoring |
| If this flag is set to true, when the value of a parameter cannot be read from proc/, ApMon will not attempt to include that value in the next datagrams. | |
| bool | sysMonitoring |
| If this flag is true, packets with system information taken from /proc are periodically sent to MonALISA. | |
| bool | jobMonitoring |
| If this flag is true, packets with job information taken from /proc are periodically sent to MonALISA. | |
| bool | genMonitoring |
| If this flag is true, packets with general system information taken from /proc are periodically sent to MonALISA. | |
| long | jobMonitorInterval |
| Job/System monitoring information obtained from /proc is sent at these time intervals. | |
| long | sysMonitorInterval |
| int | genMonitorIntervals |
| General system monitoring information is sent at a time interval equal to genMonitorIntervals * sysMonitorInterval. | |
| int | nSysMonitorParams |
| Number of parameters that can be enabled/disabled by the user in the system/job/general monitoring datagrams. | |
| int | nJobMonitorParams |
| int | nGenMonitorParams |
| char * | sysMonitorParams [MAX_SYS_PARAMS] |
| char * | genMonitorParams [MAX_GEN_PARAMS] |
| char * | jobMonitorParams [MAX_JOB_PARAMS] |
| int | actSysMonitorParams [MAX_SYS_PARAMS] |
| int | actGenMonitorParams [MAX_GEN_PARAMS] |
| int | actJobMonitorParams [MAX_JOB_PARAMS] |
| ConfURLs | confURLs |
| int | nMonJobs |
| The number of jobs that will be monitored. | |
| MonitoredJob * | monJobs |
| Array which holds information about the jobs to be monitored. | |
| long | lastModifFile |
| The last time when the configuration file was modified. | |
| time_t | lastJobInfoSend |
| char | username [MAX_STRING_LEN] |
| The name of the user who owns this process. | |
| char | groupname [MAX_STRING_LEN] |
| The group to which the user belongs. | |
| char | myHostname [MAX_STRING_LEN] |
| The name of the host on which ApMon currently runs. | |
| char | myIP [MAX_STRING_LEN] |
| The main IP address of the host on which ApMon currently runs. | |
| int | numIPs |
| The number of IP addresses of the host. | |
| char | allMyIPs [20][20] |
| A list with all the IP addresses of the host. | |
| int | numCPUs |
| The number of CPUs on the machine that runs ApMon. | |
| bool | sysInfo_first |
| time_t | lastSysInfoSend |
| The moment when the last system monitoring datagram was sent. | |
| double | lastSysVals [MAX_SYS_PARAMS] |
| double | currentSysVals [MAX_SYS_PARAMS] |
| int | sysRetResults [MAX_SYS_PARAMS] |
| double | currentJobVals [MAX_JOB_PARAMS] |
| int | jobRetResults [MAX_JOB_PARAMS] |
| double | currentGenVals [MAX_GEN_PARAMS] |
| int | genRetResults [MAX_GEN_PARAMS] |
| double | currentProcessStates [NLETTERS] |
| char | cpuVendor [100] |
| char | cpuFamily [100] |
| char | cpuModel [100] |
| char | cpuModelName [200] |
| char | interfaceNames [20][20] |
| The names of the network interfaces. | |
| int | nInterfaces |
| The number of network interfaces. | |
| double | lastBytesSent [20] |
| The total number of bytes sent through each interface, when the previous system monitoring datagram was sent. | |
| double | lastBytesReceived [20] |
| double | lastNetErrs [20] |
| The total number of network errors for each interface, when the previous system monitoring datagram was sent. | |
| double * | currentNetIn |
| The current values for the net_in, net_out, net_errs parameters. | |
| double * | currentNetOut |
| double * | currentNetErrs |
| double | currentNSockets [4] |
| The number of open TCP, UDP, ICM and Unix sockets. | |
| double | currentSocketsTCP [20] |
| The number of TCP sockets in each possible state (ESTABLISHED, LISTEN, . | |
| char * | socketStatesMapTCP [20] |
| Table that associates the names of the TCP sockets states with the symbolic constants. | |
| int | maxMsgRate |
| long | prvTime |
| double | prvSent |
| double | prvDrop |
| long | crtTime |
| long | crtSent |
| long | crtDrop |
| double | hWeight |
| int | instance_id |
| Random number that identifies this instance of ApMon. | |
| int | seq_nr |
| Sequence number for the packets that are sent to MonALISA. | |
Friends | |
| class | ProcUtils |
| void * | bkTask (void *param) |
| This function is executed in a background thread and has two roles: it automatically sends the system/job monitoring parameters (if the user requested) and it checks the configuration file/URLs for changes. | |
Data structure used for sending monitoring data to a MonaLisa module.
The data is packed in UDP datagrams, in XDR format. A datagram has the following structure:
Since v1.6 ApMon has the xApMon extension, which can be configured to send periodically, in a background thread, monitoring information regarding the system and/or some specified jobs.
Definition at line 212 of file ApMon.h.
| ApMon::ApMon | ( | char * | initsource | ) | throw (runtime_error) |
Initializes an ApMon object from a configuration file or URL.
| filename | The name of the file/URL which contains the addresses and the ports of the destination hosts, and also the passwords (see README for details about the structure of this file). |
Definition at line 61 of file ApMon.cpp.
00062 { 00063 00064 if (initsource == NULL) 00065 throw runtime_error("[ ApMon() ] No conf file/URL provided"); 00066 00067 if (strstr(initsource, "http://") == initsource) { 00068 char *destList[1]; 00069 destList[0] = initsource; 00070 constructFromList(1, destList); 00071 } else { 00072 nInitSources = 1; 00073 initType = FILE_INIT; 00074 initSources = (char **)malloc(nInitSources * sizeof(char *)); 00075 if (initSources == NULL) 00076 throw runtime_error("[ ApMon() ] Error allocating memory."); 00077 00078 initSources[0] = strdup(initsource); 00079 initMonitoring(); 00080 00081 initialize(initsource, true); 00082 } 00083 }
| ApMon::ApMon | ( | int | nDestinations, | |
| char ** | destinationsList | |||
| ) | throw (runtime_error) |
Initializes an ApMon data structure from a vector of strings.
The strings can be of the form hostname[:port] [passwd] or can be URLs from where the hostnames are to be read.
Definition at line 142 of file ApMon.cpp.
00142 { 00143 constructFromList(nDestinations, destinationsList); 00144 }
| ApMon::ApMon | ( | int | nDestinations, | |
| char ** | destAddresses, | |||
| int * | destPorts, | |||
| char ** | destPasswds | |||
| ) | throw (runtime_error) |
Initializes an ApMon data structure, using arrays instead of a file.
| nDestinations | The number of destination hosts where the results will be sent. | |
| destAddresses | Array that contains the hostnames or IP addresses of the destination hosts. | |
| destPorts | The ports where the MonaLisa modules listen on the destination hosts. | |
| destPasswds | The passwords for the MonALISA hosts. |
Definition at line 357 of file ApMon.cpp.
00359 { 00360 initMonitoring(); 00361 00362 arrayInit(nDestinations, destAddresses, destPorts, destPasswds); 00363 }
| ApMon::~ApMon | ( | ) |
ApMon destructor.
Definition at line 568 of file ApMon.cpp.
00568 { 00569 int i; 00570 00571 if (bkThreadStarted) { 00572 if (getJobMonitoring()) { 00573 /* send a datagram with job monitoring information which covers 00574 the last time interval */ 00575 sendJobInfo(); 00576 } 00577 } 00578 00579 pthread_mutex_lock(&mutexBack); 00580 setBackgroundThread(false); 00581 pthread_mutex_unlock(&mutexBack); 00582 00583 pthread_mutex_destroy(&mutex); 00584 pthread_mutex_destroy(&mutexBack); 00585 pthread_mutex_destroy(&mutexCond); 00586 pthread_cond_destroy(&confChangedCond); 00587 00588 free(clusterName); 00589 free(nodeName); 00590 free(sysMonCluster); free(sysMonNode); 00591 00592 freeConf(); 00593 00594 free(monJobs); 00595 for (i = 0; i < nInitSources; i++) { 00596 free(initSources[i]); 00597 } 00598 free(initSources); 00599 00600 free(buf); 00601 #ifndef WIN32 00602 close(sockfd); 00603 #else 00604 closesocket(sockfd); 00605 WSACleanup(); 00606 #endif 00607 }
| void ApMon::addJobToMonitor | ( | long | pid, | |
| char * | workdir, | |||
| char * | clusterName, | |||
| char * | nodeName | |||
| ) | throw (runtime_error) |
Adds a new job to the list of the jobs monitored by ApMon.
| pid | The job's PID. | |
| workdir | The working directory of the job. If it is NULL or if it has a zero length, directory monitoring will be disabled for this job. | |
| clusterName | The cluster name associated with the monitoring data for this job in MonALISA. | |
| nodeName | The node name associated with the monitoring data for this job in MonALISA. |
Definition at line 1292 of file ApMon.cpp.
01293 { 01294 if (nMonJobs >= MAX_MONITORED_JOBS) 01295 throw runtime_error("[ addJobToMonitor() ] Maximum number of jobs that can be monitored exceeded."); 01296 MonitoredJob job; 01297 job.pid = pid; 01298 if (workdir == NULL) 01299 strcpy(job.workdir, ""); 01300 else 01301 strcpy(job.workdir, workdir); 01302 01303 if (clusterName == NULL || strlen(clusterName) == 0) 01304 strcpy(job.clusterName, "ApMon_JobMon"); 01305 else 01306 strcpy(job.clusterName, clusterName); 01307 if (nodeName == NULL || strlen(nodeName) == 0) 01308 strcpy(job.nodeName, this -> myIP); 01309 else 01310 strcpy(job.nodeName, nodeName); 01311 01312 monJobs[nMonJobs++] = job; 01313 }
| void ApMon::addToDestinations | ( | char * | line, | |
| int * | nDestinations, | |||
| char * | destAddresses[], | |||
| int | destPorts[], | |||
| char * | destPasswds[] | |||
| ) | [protected] |
Parses the string line, which has the form hostname:port, and adds the hostname and the port to the lists given as parameters.
| line | The line to be parsed. | |
| nDestinations | The number of destination hosts in the lists. Will be modified (incremented) in the function. | |
| destAddresses | The list with IP addresses or hostnames. | |
| destPorts | The list of corresponding ports. | |
| destPasswds | Passwords for the destination hosts. |
Definition at line 235 of file ApMon.cpp.
00236 { 00237 char *addr, *port, *passwd; 00238 const char *sep1 = " \t"; 00239 const char *sep2 = ":"; 00240 00241 char *tmp = strdup(line); 00242 char *firstToken; 00243 // char buf[MAX_STRING_LEN]; 00244 // char *pbuf = buf; 00245 00246 /* the address & port are separated from the password with spaces */ 00247 firstToken = strtok/*_r*/(tmp, sep1);//, &pbuf); 00248 passwd = strtok/*_r*/(NULL, sep1);//, &pbuf); 00249 00250 /* the address and the port are separated with ":" */ 00251 addr = strtok/*_r*/(firstToken, sep2);//, &pbuf); 00252 port = strtok/*_r*/(NULL, sep2);//, &pbuf); 00253 destAddresses[*nDestinations] = strdup(addr); 00254 if (port == NULL) 00255 destPorts[*nDestinations] = DEFAULT_PORT; 00256 else 00257 destPorts[*nDestinations] = atoi(port); 00258 if (passwd == NULL) 00259 destPasswds[*nDestinations] = strdup(""); 00260 else 00261 destPasswds[*nDestinations] = strdup(passwd); 00262 (*nDestinations)++; 00263 00264 free(tmp); 00265 }
| void ApMon::arrayInit | ( | int | nDestinations, | |
| char ** | destAddresses, | |||
| int * | destPorts, | |||
| char ** | destPasswds, | |||
| bool | firstTime | |||
| ) | throw (runtime_error) [protected] |
Internal function that initializes an ApMon data structure.
| nDestinations | The number of destination hosts where the results will be sent. | |
| destAddresses | Array that contains the hostnames or IP addresses of the destination hosts. | |
| destPorts | The ports where the MonaLisa modules listen on the destination hosts. | |
| destPasswds | Passwords for the destination hosts. | |
| firstTime | If it is true, all the initializations will be done (the object is being constructed now). Else, only some structures will be reinitialized. |
Definition at line 372 of file ApMon.cpp.
00374 { 00375 int i, j; 00376 int ret; 00377 char *ipAddr, logmsg[100]; 00378 bool found, havePublicIP; 00379 int tmpNDestinations; 00380 char **tmpAddresses, **tmpPasswds; 00381 int *tmpPorts; 00382 00383 if (destAddresses == NULL || destPorts == NULL || nDestinations == 0) 00384 throw runtime_error("[ arrayInit() ] Destination addresses or ports not provided"); 00385 00386 /* initializations that we have to do only once */ 00387 if (firstTime) { 00388 //this -> appPID = getpid(); 00389 00390 this -> nMonJobs = 0; 00391 this -> monJobs = (MonitoredJob *)malloc(MAX_MONITORED_JOBS * 00392 sizeof(MonitoredJob)); 00393 00394 try { 00395 this -> numCPUs = ProcUtils::getNumCPUs(); 00396 } catch (procutils_error &err) { 00397 logger(WARNING, err.what()); 00398 this -> numCPUs = 0; 00399 } 00400 00401 /* get the names of the network interfaces */ 00402 this -> nInterfaces = 0; 00403 try { 00404 ProcUtils::getNetworkInterfaces(this -> nInterfaces, 00405 this -> interfaceNames); 00406 } catch (procutils_error &err) { 00407 logger(WARNING, err.what()); 00408 this -> nInterfaces = 0; 00409 } 00410 00411 /* get the hostname of the machine */ 00412 ret = gethostname(this -> myHostname, MAX_STRING_LEN -1); 00413 if (ret < 0) { 00414 logger(WARNING, "Could not obtain the local hostname"); 00415 strcpy(myHostname, "unknown"); 00416 } else 00417 myHostname[MAX_STRING_LEN - 1] = 0; 00418 00419 /* get the IPs of the machine */ 00420 this -> numIPs = 0; havePublicIP = false; 00421 strcpy(this -> myIP, "unknown"); 00422 00423 /* default values for cluster name and node name */ 00424 this -> clusterName = strdup("ApMon_UserSend"); 00425 this -> nodeName = strdup(myHostname); 00426 00427 #ifndef WIN32 00428 int sockd = socket(PF_INET, SOCK_STREAM, 0); 00429 if(sockd < 0){ 00430 logger(WARNING, "Could not obtain local IP addresses"); 00431 } else { 00432 for (i = 0; i < this -> nInterfaces; i++) { 00433 struct ifreq ifr; 00434 memset(&ifr, 0, sizeof(ifr)); 00435 strncpy(ifr.ifr_name, this -> interfaceNames[i], sizeof(ifr.ifr_name) - 1); 00436 if(ioctl(sockd, SIOCGIFADDR, &ifr)<0) 00437 continue; //???????? 00438 char ip[4], tmp_s[20]; 00439 #ifdef __APPLE__ 00440 memcpy(ip, ifr.ifr_addr.sa_data+2, 4); 00441 #else 00442 memcpy(ip, ifr.ifr_hwaddr.sa_data+2, 4); 00443 #endif 00444 strcpy(tmp_s, inet_ntoa(*(struct in_addr *)ip)); 00445 sprintf(logmsg, "Found local IP address: %s", tmp_s); 00446 logger(FINE, logmsg); 00447 if (strcmp(tmp_s, "127.0.0.1") != 0 && !havePublicIP) { 00448 strcpy(this -> myIP, tmp_s); 00449 if (!isPrivateAddress(tmp_s)) 00450 havePublicIP = true; 00451 } 00452 strcpy(this -> allMyIPs[this -> numIPs], tmp_s); 00453 this -> numIPs++; 00454 } 00455 } 00456 #else 00457 struct hostent *hptr; 00458 if ((hptr = gethostbyname(myHostname))!= NULL) { 00459 i = 0; 00460 struct in_addr addr; 00461 while ((hptr -> h_addr_list)[i] != NULL) { 00462 memcpy(&(addr.s_addr), (hptr -> h_addr_list)[i], 4); 00463 ipAddr = inet_ntoa(addr); 00464 if (strcmp(ipAddr, "127.0.0.1") != 0) { 00465 strcpy(this -> myIP, ipAddr); 00466 if (!isPrivateAddress(ipAddr)) 00467 break; 00468 } 00469 i++; 00470 } 00471 } 00472 #endif 00473 00474 this -> sysMonCluster = strdup("ApMon_SysMon"); 00475 this -> sysMonNode = strdup(this -> myIP); 00476 00477 this -> prvTime = 0; 00478 this -> prvSent = 0; 00479 this -> prvDrop = 0; 00480 this -> crtTime = 0; 00481 this -> crtSent = 0; 00482 this -> crtDrop = 0; 00483 this -> hWeight = exp(-5.0/60.0); 00484 00485 srand(time(NULL)); 00486 00487 /* initialize buffer for XDR encoding */ 00488 this -> buf = (char *)malloc(MAX_DGRAM_SIZE); 00489 if (this -> buf == NULL) 00490 throw runtime_error("[ arrayInit() ] Error allocating memory"); 00491 this -> dgramSize = 0; 00492 00493 /*create the socket & set options*/ 00494 initSocket(); 00495 00496 /* initialize the sender ID and the sequence number */ 00497 instance_id = rand(); 00498 seq_nr = 0; 00499 } 00500 00501 /* put the destination addresses, ports & passwords in some temporary 00502 buffers (because we don't want to lock mutex while making DNS 00503 requests) 00504 */ 00505 tmpNDestinations = 0; 00506 tmpPorts = (int *)malloc(nDestinations * sizeof(int)); 00507 tmpAddresses = (char **)malloc(nDestinations * sizeof(char *)); 00508 tmpPasswds = (char **)malloc(nDestinations * sizeof(char *)); 00509 if (tmpPorts == NULL || tmpAddresses == NULL || 00510 tmpPasswds == NULL) 00511 throw runtime_error("[ arrayInit() ] Error allocating memory"); 00512 00513 for (i = 0; i < nDestinations; i++) { 00514 try { 00515 ipAddr = findIP(destAddresses[i]); 00516 } catch (runtime_error &err) { 00517 logger(FATAL, err.what()); 00518 continue; 00519 } 00520 00521 /* make sure this address is not already in the list */ 00522 found = false; 00523 for (j = 0; j < tmpNDestinations; j++) { 00524 if (!strcmp(ipAddr, tmpAddresses[j])) { 00525 found = true; 00526 break; 00527 } 00528 } 00529 00530 /* add the address to the list */ 00531 if (!found) { 00532 tmpAddresses[tmpNDestinations] = ipAddr; 00533 tmpPorts[tmpNDestinations] = destPorts[i]; 00534 tmpPasswds[tmpNDestinations] = strdup(destPasswds[i]); 00535 00536 sprintf(logmsg, "Adding destination host: %s - port %d", 00537 tmpAddresses[tmpNDestinations], tmpPorts[tmpNDestinations]); 00538 logger(INFO, logmsg); 00539 00540 tmpNDestinations++; 00541 } 00542 } 00543 00544 if (tmpNDestinations == 0) { 00545 freeMat(tmpAddresses, tmpNDestinations); 00546 freeMat(tmpPasswds, tmpNDestinations); 00547 throw runtime_error("[ arrayInit() ] There is no destination host specified correctly!"); 00548 } 00549 00550 pthread_mutex_lock(&mutex); 00551 if (!firstTime) 00552 freeConf(); 00553 this -> nDestinations = tmpNDestinations; 00554 this -> destAddresses = tmpAddresses; 00555 this -> destPorts = tmpPorts; 00556 this -> destPasswds = tmpPasswds; 00557 pthread_mutex_unlock(&mutex); 00558 00559 /* start job/system monitoring according to the settings previously read 00560 from the configuration file */ 00561 setJobMonitoring(jobMonitoring, jobMonitorInterval); 00562 setSysMonitoring(sysMonitoring, sysMonitorInterval); 00563 setGenMonitoring(genMonitoring, genMonitorIntervals); 00564 setConfRecheck(confCheck, recheckInterval); 00565 }
| void ApMon::arrayInit | ( | int | nDestinations, | |
| char ** | destAddresses, | |||
| int * | destPorts, | |||
| char ** | destPasswds | |||
| ) | throw (runtime_error) [protected] |
Internal function that initializes an ApMon data structure.
| nDestinations | The number of destination hosts where the results will be sent. | |
| destAddresses | Array that contains the hostnames or IP addresses of the destination hosts. | |
| destPorts | The ports where the MonaLisa modules listen on the destination hosts. | |
| destPasswds | Passwords for the destination hosts. |
Definition at line 365 of file ApMon.cpp.
00367 { 00368 arrayInit(nDestinations, destAddresses, destPorts, destPasswds, true); 00369 }
| void ApMon::constructFromList | ( | int | nDestinations, | |
| char ** | destinationsList | |||
| ) | throw (runtime_error) [protected] |
Initializes an ApMon object from a list with URLs and destination hosts.
Definition at line 146 of file ApMon.cpp.
00147 { 00148 int i; 00149 00150 if (destinationsList == NULL) 00151 throw runtime_error("[ constructFromList() ] Null destination list"); 00152 00153 #ifdef __APPLE__ 00154 initType = OLIST_INIT; 00155 #else 00156 initType = LIST_INIT; 00157 #endif 00158 00159 initMonitoring(); 00160 00161 /* save the initialization list */ 00162 nInitSources = nDestinations; 00163 initSources = (char **)malloc(nInitSources * sizeof(char*)); 00164 if (initSources == NULL) 00165 throw runtime_error("[ ApMon() ] Error allocating memory."); 00166 00167 for (i = 0; i < nInitSources; i++) 00168 initSources[i] = strdup(destinationsList[i]); 00169 00170 initialize(nDestinations, destinationsList, true); 00171 }
| void ApMon::encodeParams | ( | int | nParams, | |
| char ** | paramNames, | |||
| int * | valueTypes, | |||
| char ** | paramValues, | |||
| int | timestamp | |||
| ) | throw (runtime_error) [protected] |
Encodes in the XDR format the data from a ApMon structure.
Must be called before sending the data over the newtork.
Definition at line 808 of file ApMon.cpp.
00810 { 00811 XDR xdrs; /* XDR handle. */ 00812 int i, effectiveNParams; 00813 00814 /* count the number of parameters actually sent in the datagram 00815 (the parameters with a NULL name and the string parameters 00816 with a NULL value are skipped) 00817 */ 00818 effectiveNParams = nParams; 00819 for (i = 0; i < nParams; i++) { 00820 if (paramNames[i] == NULL || (valueTypes[i] == XDR_STRING && 00821 paramValues[i] == NULL)) { 00822 effectiveNParams--; 00823 } 00824 } 00825 if (effectiveNParams == 0) 00826 throw runtime_error("[ encodeParams() ] No valid parameters in datagram, sending aborted"); 00827 00828 /*** estimate the length of the send buffer ***/ 00829 00830 /* add the length of the cluster name & node name */ 00831 dgramSize = xdrSize(XDR_STRING, clusterName) + 00832 xdrSize(XDR_STRING, nodeName) + xdrSize(XDR_INT32, NULL); 00833 /* add the lengths for the parameters (name + size + value) */ 00834 for (i = 0; i < nParams; i++) { 00835 dgramSize += xdrSize(XDR_STRING, paramNames[i]) + xdrSize(XDR_INT32, NULL) + 00836 + xdrSize(valueTypes[i], paramValues[i]); 00837 } 00838 00839 /* check that the maximum datagram size is not exceeded */ 00840 if (dgramSize + MAX_HEADER_LENGTH > MAX_DGRAM_SIZE) 00841 throw runtime_error("[ encodeParams() ] Maximum datagram size exceeded"); 00842 00843 /* initialize the XDR stream */ 00844 xdrmem_create(&xdrs, buf, MAX_DGRAM_SIZE, XDR_ENCODE); 00845 00846 try { 00847 /* encode the cluster name, the node name and the number of parameters */ 00848 if (!xdr_string(&xdrs, &(clusterName), strlen(clusterName) 00849 + 1)) 00850 throw runtime_error("[ encodeParams() ] XDR encoding error for the cluster name"); 00851 00852 if (!xdr_string(&xdrs, &(nodeName), strlen(nodeName) + 1)) 00853 throw runtime_error("[ encodeParams() ] XDR encoding error for the node name"); 00854 00855 if (!xdr_int(&xdrs, &(effectiveNParams))) 00856 throw runtime_error("[ encodeParams() ] XDR encoding error for the number of parameters"); 00857 00858 /* encode the parameters */ 00859 for (i = 0; i < nParams; i++) { 00860 if (paramNames[i] == NULL || (valueTypes[i] == XDR_STRING && 00861 paramValues[i] == NULL)) { 00862 logger(WARNING, "NULL parameter name or value - skipping parameter..."); 00863 continue; 00864 } 00865 00866 /* parameter name */ 00867 if (!xdr_string(&xdrs, &(paramNames[i]), strlen(paramNames[i]) + 1)) 00868 throw runtime_error("[ encodeParams() ] XDR encoding error for parameter name"); 00869 00870 /* parameter value type */ 00871 if (!xdr_int(&xdrs, &(valueTypes[i]))) 00872 throw runtime_error("[ encodeParams() ] XDR encoding error for parameter value type"); 00873 00874 /* parameter value */ 00875 switch (valueTypes[i]) { 00876 case XDR_STRING: 00877 if (!xdr_string(&xdrs, &(paramValues[i]), 00878 strlen(paramValues[i]) + 1)) 00879 throw runtime_error("[ encodeParams() ] XDR encoding error for parameter value"); 00880 break; 00881 //INT16 is not supported 00882 /* case XDR_INT16: 00883 if (!xdr_short(&xdrs, (short *)(paramValues[i]))) 00884 return RET_ERROR; 00885 break; 00886 */ case XDR_INT32: 00887 if (!xdr_int(&xdrs, (int *)(paramValues[i]))) 00888 throw runtime_error("[ encodeParams() ] XDR encoding error for parameter value"); 00889 break; 00890 case XDR_REAL32: 00891 if (!xdr_float(&xdrs, (float *)(paramValues[i]))) 00892 throw runtime_error("[ encodeParams() ] XDR encoding error for parameter value"); 00893 break; 00894 case XDR_REAL64: 00895 if (!xdr_double(&xdrs, (double *)(paramValues[i]))) 00896 throw runtime_error("[ encodeParams() ] XDR encoding error for parameter value"); 00897 break; 00898 default: 00899 throw runtime_error("[ encodeParams() ] Unknown type for XDR encoding"); 00900 } 00901 } 00902 00903 /* encode the timestamp if necessary */ 00904 if (timestamp > 0) { 00905 if (!xdr_int(&xdrs, ×tamp)) 00906 throw runtime_error("[ encodeParams() ] XDR encoding error for the timestamp"); 00907 dgramSize += xdrSize(XDR_INT32, NULL); 00908 } 00909 } catch (runtime_error& err) { 00910 xdr_destroy(&xdrs); 00911 throw err; 00912 } 00913 00914 xdr_destroy(&xdrs); 00915 }
| static void ApMon::errExit | ( | char * | msg | ) | [static] |
Displays an error message and exits with -1 as return value.
| msg | The message to be displayed. |
| void ApMon::freeConf | ( | ) | [protected] |
Frees the data structures needed to hold the configuratin settings.
Definition at line 609 of file ApMon.cpp.
00609 { 00610 int i; 00611 freeMat(destAddresses, nDestinations); 00612 freeMat(destPasswds, nDestinations); 00613 free(destPorts); 00614 00615 for (i = 0; i < confURLs.nConfURLs; i++) { 00616 free(confURLs.vURLs[i]); 00617 free(confURLs.lastModifURLs[i]); 00618 } 00619 }
| bool ApMon::getConfCheck | ( | ) | [inline] |
| long ApMon::getCrtRecheckInterval | ( | ) | [inline, protected] |
Returns the actual value of the time interval (in seconds) between two recheck operations for the configuration files.
Definition at line 956 of file ApMon.h.
00956 { 00957 return crtRecheckInterval; 00958 }
| void ApMon::getDestFromWeb | ( | char * | url, | |
| int * | nDestinations, | |||
| char * | destAddresses[], | |||
| int | destPorts[], | |||
| char * | destPasswds[], | |||
| ConfURLs & | confURLs | |||
| ) | throw (runtime_error) [protected] |
Gets a configuration file from a web location and adds the destination addresses and ports to the lists given as parameters.
Definition at line 267 of file ApMon.cpp.
00269 { 00270 char temp_filename[300]; 00271 FILE *tmp_file; 00272 char *line, *ret, *tmp = NULL; 00273 bool modifLineFound; 00274 long mypid = getpid(); 00275 char str1[20], str2[20]; 00276 int totalSize, headerSize, contentSize; 00277 00278 #ifndef WIN32 00279 sprintf(temp_filename, "/tmp/apmon_webconf%ld", mypid); 00280 #else 00281 char *tmpp = getenv("TEMP"); 00282 if(tmpp == NULL) 00283 tmpp = getenv("TMP"); 00284 if(tmpp == NULL) 00285 tmpp = "c:"; 00286 sprintf(temp_filename, "%s\\apmon_webconf%ld", tmpp, mypid); 00287 #endif 00288 /* get the configuration file from web and put it in a temporary file */ 00289 totalSize = httpRequest(url, (char*)"GET", temp_filename); 00290 00291 /* read the configuration from the temporary file */ 00292 tmp_file = fopen(temp_filename, "rt"); 00293 if (tmp_file == NULL) 00294 throw runtime_error("[ getDestFromWeb() ] Error getting the configuration web page"); 00295 00296 line = (char*)malloc((MAX_STRING_LEN + 1) * sizeof(char)); 00297 00298 //check the HTTP header to see if we got the page correctly 00299 fgets(line, MAX_STRING_LEN, tmp_file); 00300 sscanf(line, "%s %s", str1, str2); 00301 if (atoi(str2) != 200) { 00302 free(line); 00303 fclose(tmp_file); 00304 throw runtime_error("[ getDestFromWeb() ] The web page does not exist on the server"); 00305 } 00306 00307 confURLs.vURLs[confURLs.nConfURLs] = strdup(url); 00308 00309 // check the header for the "Last-Modified" and "Content-Length" lines 00310 modifLineFound = false; 00311 contentSize = 0; 00312 do { 00313 if (tmp != NULL) 00314 free(tmp); 00315 ret = fgets(line, MAX_STRING_LEN, tmp_file); 00316 if (ret == NULL) { 00317 free(line); fclose(tmp_file); 00318 throw runtime_error("[ getDestFromWeb() ] Error getting the configuration web page"); 00319 } 00320 if (strstr(line, "Last-Modified") == line) { 00321 modifLineFound = true; 00322 confURLs.lastModifURLs[confURLs.nConfURLs] = strdup(line); 00323 } 00324 00325 if (strstr(line, "Content-Length") == line) { 00326 sscanf(line, "%s %d", str1, &contentSize); 00327 } 00328 00329 tmp = trimString(line); 00330 } while (strlen(tmp) != 0); 00331 free(tmp); free(line); 00332 00333 if (!modifLineFound) 00334 confURLs.lastModifURLs[confURLs.nConfURLs] = strdup(""); 00335 confURLs.nConfURLs++; 00336 00337 headerSize = ftell(tmp_file); 00338 if (totalSize - headerSize < contentSize) { 00339 fclose(tmp_file); 00340 throw runtime_error("[ getDestFromWeb() ] Web page received incompletely"); 00341 } 00342 00343 try { 00344 parseConf(tmp_file, nDestinations, destAddresses, destPorts, 00345 destPasswds); 00346 } catch (...) { 00347 fclose(tmp_file); 00348 unlink(temp_filename); 00349 throw; 00350 } 00351 00352 fclose(tmp_file); 00353 unlink(temp_filename); 00354 }
| bool ApMon::getGenMonitoring | ( | ) | [inline] |
Returns true if the sending of general system information is enabled and false otherwise.
Definition at line 759 of file ApMon.h.
00759 { 00760 bool b; 00761 pthread_mutex_lock(&mutexBack); 00762 b = genMonitoring; 00763 pthread_mutex_unlock(&mutexBack); 00764 return b; 00765 }
| bool ApMon::getJobMonitoring | ( | ) | [inline] |
Returns true if the job monitoring is enabled, and false otherwise.
Definition at line 695 of file ApMon.h.
00695 { 00696 bool b; 00697 pthread_mutex_lock(&mutexBack); 00698 b = jobMonitoring; 00699 pthread_mutex_unlock(&mutexBack); 00700 return b; 00701 }
| long ApMon::getJobMonitorInterval | ( | ) | [inline] |
Returns the interval at which job monitoring datagrams are sent.
If the job monitoring is disabled, returns -1.
Definition at line 685 of file ApMon.h.
00685 { 00686 long i = -1; 00687 pthread_mutex_lock(&mutexBack); 00688 if (jobMonitoring) 00689 i = jobMonitorInterval; 00690 pthread_mutex_unlock(&mutexBack); 00691 return i; 00692 }
| long ApMon::getRecheckInterval | ( | ) | [inline] |
Returns the value of the time interval (in seconds) between two recheck operations for the configuration files.
If error(s) appear when reloading the configuration, the actual interval will be increased (transparently for the user).
Definition at line 639 of file ApMon.h.
00639 { return recheckInterval; }
| bool ApMon::getSysMonitoring | ( | ) | [inline] |
Returns true if the system monitoring is enabled, and false otherwise.
Definition at line 731 of file ApMon.h.
00731 { 00732 bool b; 00733 pthread_mutex_lock(&mutexBack); 00734 b = sysMonitoring; 00735 pthread_mutex_unlock(&mutexBack); 00736 return b; 00737 }
| long ApMon::getSysMonitorInterval | ( | ) | [inline] |
Returns the interval at which system monitoring datagrams are sent.
If the job monitoring is disabled, returns -1.
Definition at line 721 of file ApMon.h.
00721 { 00722 long i = -1; 00723 pthread_mutex_lock(&mutexBack); 00724 if (sysMonitoring) 00725 i = sysMonitorInterval; 00726 pthread_mutex_unlock(&mutexBack); 00727 return i; 00728 }
| void ApMon::initialize | ( | int | nDestinations, | |
| char ** | destList, | |||
| bool | firstTime | |||
| ) | throw (runtime_error) [protected] |
Initializes an ApMon object from a list with URLs and destination hosts.
| nDestinations | The number of elements in destList. | |
| destList | The list with URLs. | |
| firstTime | If it is true, all the initializations will be done (the object is being constructed now). Else, only some structures will be reinitialized. |
Definition at line 173 of file ApMon.cpp.
00174 { 00175 char *destAddresses[MAX_N_DESTINATIONS]; 00176 int destPorts[MAX_N_DESTINATIONS]; 00177 char *destPasswds[MAX_N_DESTINATIONS]; 00178 char errmsg[200]; 00179 int i; 00180 int cnt = 0; 00181 ConfURLs confURLs; 00182 00183 logger(INFO, "Initializing destination addresses & ports:"); 00184 00185 if (nDestinations > MAX_N_DESTINATIONS) 00186 throw runtime_error("[ initialize() ] Maximum number of destinations exceeded"); 00187 00188 00189 confURLs.nConfURLs = 0; 00190 00191 for (i = 0; i < nDestinations; i++) { 00192 try { 00193 if (strstr(destinationsList[i], "http") == destinationsList[i]) 00194 getDestFromWeb(destinationsList[i], &cnt, 00195 destAddresses, destPorts, destPasswds, confURLs); 00196 else 00197 addToDestinations(destinationsList[i], &cnt, 00198 destAddresses, destPorts, destPasswds); 00199 00200 } catch (runtime_error &e) { 00201 sprintf(errmsg, "[ initialize() ] Error while loading the configuration: %s", e.what()); 00202 logger(WARNING, errmsg); 00203 if (!firstTime) { 00204 for (i = 0; i < cnt; i++) { 00205 free(destAddresses[i]); 00206 free(destPasswds[i]); 00207 } 00208 logger(WARNING, "Configuration not reloaded successfully. Keeping the previous one."); 00209 return; 00210 } 00211 } // catch 00212 } // for 00213 00214 try { 00215 arrayInit(cnt, destAddresses, destPorts, destPasswds, firstTime); 00216 } catch (runtime_error& err) { 00217 if (firstTime) 00218 throw err; 00219 else { 00220 logger(WARNING, "Error reloading the configuration. Keeping the previous one."); 00221 return; 00222 } 00223 } 00224 00225 for (i = 0; i < cnt; i++) { 00226 free(destAddresses[i]); 00227 free(destPasswds[i]); 00228 } 00229 00230 pthread_mutex_lock(&mutex); 00231 this -> confURLs = confURLs; 00232 pthread_mutex_unlock(&mutex); 00233 }
| void ApMon::initialize | ( | char * | filename, | |
| bool | firstTime | |||
| ) | throw (runtime_error) [protected] |
Initializes an ApMon object from a configuration file.
| filename | The name of the file which contains the addresses and the ports of the destination hosts (see README for details about the structure of this file). | |
| firstTime | If it is true, all the initializations will be done (the object is being constructed now). Else, only some structures will be reinitialized. |
Definition at line 85 of file ApMon.cpp.
00086 { 00087 00088 char *destAddresses[MAX_N_DESTINATIONS]; 00089 int destPorts[MAX_N_DESTINATIONS]; 00090 char *destPasswds[MAX_N_DESTINATIONS]; 00091 int nDest = 0, i; 00092 ConfURLs confURLs; 00093 00094 confURLs.nConfURLs = 0; 00095 00096 try { 00097 loadFile(filename, &nDest, destAddresses, destPorts, destPasswds); 00098 00099 arrayInit(nDest, destAddresses, destPorts, destPasswds, firstTime); 00100 } catch (runtime_error& err) { 00101 if (firstTime) 00102 throw err; 00103 else { 00104 logger(WARNING, err.what()); 00105 logger(WARNING, "Error reloading the configuration. Keeping the previous one."); 00106 return; 00107 } 00108 } 00109 00110 for (i = 0; i < nDest; i++) { 00111 free(destAddresses[i]); 00112 free(destPasswds[i]); 00113 } 00114 00115 pthread_mutex_lock(&mutex); 00116 this -> confURLs = confURLs; 00117 pthread_mutex_unlock(&mutex); 00118 }
| void ApMon::initMonitoring | ( | ) | [protected] |
Initializes the monitoring configurations and the names of the parameters included in the monitoring datagrams.
Definition at line 682 of file monitor_utils.cpp.
00682 { 00683 int i; 00684 00685 this -> autoDisableMonitoring = true; 00686 this -> sysMonitoring = false; 00687 this -> jobMonitoring = false; 00688 this -> genMonitoring = false; 00689 this -> confCheck = false; 00690 00691 #ifndef WIN32 00692 pthread_mutex_init(&this -> mutex, NULL); 00693 pthread_mutex_init(&this -> mutexBack, NULL); 00694 pthread_mutex_init(&this -> mutexCond, NULL); 00695 pthread_cond_init(&this -> confChangedCond, NULL); 00696 #else 00697 logger(INFO, "init mutexes..."); 00698 this -> mutex = CreateMutex(NULL, FALSE, NULL); 00699 this -> mutexBack = CreateMutex(NULL, FALSE, NULL); 00700 this -> mutexCond = CreateMutex(NULL, FALSE, NULL); 00701 this -> confChangedCond = CreateEvent(NULL, FALSE, FALSE, NULL); 00702 00703 // Initialize the Windows Sockets library 00704 00705 WORD wVersionRequested; 00706 WSADATA wsaData; 00707 int err; 00708 wVersionRequested = MAKEWORD( 2, 0 ); 00709 err = WSAStartup( wVersionRequested, &wsaData ); 00710 if ( err != 0 ) { 00711 logger(FATAL, "Could not initialize the Windows Sockets library (WS2_32.dll)"); 00712 } 00713 00714 #endif 00715 00716 this -> haveBkThread = false; 00717 this -> bkThreadStarted = false; 00718 this -> stopBkThread = false; 00719 00720 this -> recheckChanged = false; 00721 this -> jobMonChanged = false; 00722 this -> sysMonChanged = false; 00723 00724 this -> recheckInterval = RECHECK_INTERVAL; 00725 this -> crtRecheckInterval = RECHECK_INTERVAL; 00726 this -> jobMonitorInterval = JOB_MONITOR_INTERVAL; 00727 this -> sysMonitorInterval = SYS_MONITOR_INTERVAL; 00728 00729 this -> nSysMonitorParams = initSysParams(this -> sysMonitorParams); 00730 00731 this -> nGenMonitorParams = initGenParams(this -> genMonitorParams); 00732 00733 this -> nJobMonitorParams = initJobParams(this -> jobMonitorParams); 00734 00735 initSocketStatesMapTCP(this -> socketStatesMapTCP); 00736 00737 this -> sysInfo_first = true; 00738 00739 try { 00740 this -> lastSysInfoSend = ProcUtils::getBootTime(); 00741 } catch (procutils_error& perr) { 00742 logger(WARNING, perr.what()); 00743 logger(WARNING, "The first system monitoring values may be inaccurate"); 00744 this -> lastSysInfoSend = 0; 00745 } 00746 00747 for (i = 0; i < nSysMonitorParams; i++) 00748 this -> lastSysVals[i] = 0; 00749 00750 //this -> lastUsrTime = this -> lastSysTime = 0; 00751 //this -> lastNiceTime = this -> lastIdleTime = 0; 00752 00753 for (i = 0; i < nSysMonitorParams; i++) { 00754 actSysMonitorParams[i] = 1; 00755 sysRetResults[i] = RET_SUCCESS; 00756 } 00757 00758 for (i = 0; i < nGenMonitorParams; i++) { 00759 actGenMonitorParams[i] = 1; 00760 genRetResults[i] = RET_SUCCESS; 00761 } 00762 00763 for (i = 0; i < nJobMonitorParams; i++) { 00764 actJobMonitorParams[i] = 1; 00765 jobRetResults[i] = RET_SUCCESS; 00766 } 00767 00768 this -> maxMsgRate = MAX_MSG_RATE; 00769 }
| void ApMon::initSocket | ( | ) | throw (runtime_error) [protected] |
Initializes the UDP socket used to send the datagrams.
Definition at line 1366 of file ApMon.cpp.
01366 { 01367 int optval1 = 1; 01368 struct timeval optval2; 01369 int ret1, ret2, ret3; 01370 01371 sockfd = socket(AF_INET, SOCK_DGRAM, 0); 01372 if (sockfd < 0) 01373 throw runtime_error("[ initSocket() ] Error creating socket"); 01374 ret1 = setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, (char *) &optval1, 01375 sizeof(optval1)); 01376 01377 /* set connection timeout */ 01378 optval2.tv_sec = 20; 01379 optval2.tv_usec = 0; 01380 ret2 = setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, (char *) &optval2, 01381 sizeof(optval2)); 01382 ret3 = setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, (char *) &optval2, 01383 sizeof(optval2)); 01384 if (ret1 != 0 || ret2 != 0 || ret3 != 0) 01385 throw runtime_error("[ initSocket() ] Error initializing socket."); 01386 }
| void ApMon::loadFile | ( | char * | filename, | |
| int * | nDestinations, | |||
| char ** | destAddresses, | |||
| int * | destPorts, | |||
| char ** | destPasswds | |||
| ) | throw (runtime_error) [protected] |
Parses a configuration file which contains addresses, ports and passwords for the destination hosts and puts the results in the vectors given as parameters.
| filename | The name of the configuration file. | |
| nDestinations | Output parameter, will contain the number of destination hosts. | |
| destAddresses | Will contain the destination addresses. | |
| destPorts | Will contain the ports from the destination hosts. | |
| destPasswds | Will contain the passwords for the destination hosts. |
Definition at line 120 of file ApMon.cpp.
00122 { 00123 FILE *f; 00124 char msg[100]; 00125 00126 /* initializations for the destination addresses */ 00127 f = fopen(filename, "rt"); 00128 if (f == NULL) { 00129 throw runtime_error("[ loadFile() ] Error opening configuration file"); 00130 } 00131 00132 sprintf(msg, "Loading file %s ...", filename); 00133 logger(INFO, msg); 00134 00135 lastModifFile = time(NULL); 00136 00137 parseConf(f, nDestinations, destAddresses, destPorts, 00138 destPasswds); 00139 fclose(f); 00140 }
| void ApMon::parseConf | ( | FILE * | fp, | |
| int * | nDestinations, | |||
| char ** | destAddresses, | |||
| int * | destPorts, | |||
| char ** | destPasswds | |||
| ) | throw (runtime_error) [protected] |
Parses the contents of a configuration file.
The destination addresses and ports are stored in the arrays given as parameters.
Definition at line 1389 of file ApMon.cpp.
01391 { 01392 int i, ch; 01393 char *line = (char *)malloc ((MAX_STRING_LEN1) * sizeof(char)); 01394 char *tmp = NULL; 01395 char *loglevel_s; 01396 // char sbuf[30]; 01397 // char *pbuf = sbuf; 01398 01399 /* parse the input file */ 01400 while(fgets(line, MAX_STRING_LEN, fp) != NULL) { 01401 01402 if (tmp != NULL) { 01403 free(tmp); 01404 tmp = NULL; 01405 } 01406 01407 line[MAX_STRING_LEN - 1] = 0; 01408 /* check if the line was too long */ 01409 ch = fgetc(fp); // see if we are at the end of the file 01410 ungetc(ch, fp); 01411 if (line[strlen(line) - 1] != 10 && ch != EOF) { 01412 /* if the line doesn't end with a \n and we are not at the end 01413 of file, the line from the file was longer than MAX_STRING_LEN */ 01414 fclose(fp); 01415 throw runtime_error ("[ parseConf() ] Maximum line length exceeded in the conf file"); 01416 } 01417 01418 tmp = trimString(line); 01419 01420 /* skip the blank lines and the comment lines */ 01421 if (strlen(tmp) == 0 || strchr(tmp, '#') == tmp) 01422 continue; 01423 01424 if (strstr(tmp, "xApMon_loglevel") == tmp) { 01425 char *tmp2 = tmp; 01426 strtok/*_r*/(tmp2, "= ");//, &pbuf); 01427 loglevel_s = strtok/*_r*/(NULL, "= ");//, &pbuf); 01428 setLogLevel(loglevel_s); 01429 continue; 01430 } 01431 01432 if (strstr(tmp, "xApMon_") == tmp) { 01433 parseXApMonLine(tmp); 01434 continue; 01435 } 01436 01437 if (*nDestinations >= MAX_N_DESTINATIONS) { 01438 free(line); free(tmp); 01439 for (i = 0; i < *nDestinations; i++) { 01440 free(destAddresses[i]); 01441 free(destPasswds[i]); 01442 } 01443 fclose(fp); 01444 throw runtime_error("[ parseConf() ] Maximum number of destinations exceeded."); 01445 } 01446 01447 addToDestinations(tmp, nDestinations, destAddresses, destPorts, 01448 destPasswds); 01449 } 01450 01451 if (tmp != NULL) 01452 free(tmp); 01453 free(line); 01454 }
| void ApMon::parseXApMonLine | ( | char * | line | ) | [protected] |
Parses an xApMon line from the configuration file and sets the corresponding parameters in the ApMon object.
Definition at line 771 of file monitor_utils.cpp.
00771 { 00772 bool flag, found; 00773 int ind; 00774 char tmp[MAX_STRING_LEN], logmsg[200]; 00775 char *param, *value; 00776 // char sbuf[MAX_STRING_LEN]; 00777 // char *pbuf = sbuf; 00778 const char *sep = " ="; 00779 00780 strcpy(tmp, line); 00781 char *tmp2 = tmp + strlen("xApMon_"); 00782 00783 param = strtok/*_r*/(tmp2, sep);//, &pbuf); 00784 value = strtok/*_r*/(NULL, sep);//, &pbuf); 00785 00786 /* if it is an on/off parameter, assign its value to flag */ 00787 if (strcmp(value, "on") == 0) 00788 flag = true; 00789 else /* if it is not an on/off paramenter the value of flag doesn't matter */ 00790 flag = false; 00791 00792 pthread_mutex_lock(&mutexBack); 00793 00794 found = false; 00795 if (strcmp(param, "job_monitoring") == 0) { 00796 this -> jobMonitoring = flag; found = true; 00797 } 00798 if (strcmp(param, "sys_monitoring") == 0) { 00799 this -> sysMonitoring = flag; found = true; 00800 } 00801 if (strcmp(param, "job_interval") == 0) { 00802 this -> jobMonitorInterval = atol(value); found = true; 00803 } 00804 if (strcmp(param, "sys_interval") == 0) { 00805 this -> sysMonitorInterval = atol(value); found = true; 00806 } 00807 if (strcmp(param, "general_info") == 0) { 00808 this -> genMonitoring = flag; found = true; 00809 } 00810 if (strcmp(param, "conf_recheck") == 0) { 00811 this -> confCheck = flag; found = true; 00812 } 00813 if (strcmp(param, "recheck_interval") == 0) { 00814 this -> recheckInterval = this -> crtRecheckInterval = atol(value); 00815 found = true; 00816 } 00817 if (strcmp(param, "auto_disable") == 0) { 00818 this -> autoDisableMonitoring = flag; 00819 found = true; 00820 } 00821 if (strcmp(param, "maxMsgRate") == 0) { 00822 this -> maxMsgRate = atoi(value); 00823 found = true; 00824 } 00825 00826 if (found) { 00827 pthread_mutex_unlock(&mutexBack); 00828 return; 00829 } 00830 00831 if (strstr(param, "sys_") == param) { 00832 ind = getVectIndex(param + strlen("sys_"), sysMonitorParams, 00833 nSysMonitorParams); 00834 if (ind < 0) { 00835 pthread_mutex_unlock(&mutexBack); 00836 sprintf(logmsg, "Invalid parameter name in the configuration file: %s", 00837 param); 00838 logger(WARNING, logmsg); 00839 return; 00840 } 00841 found = true; 00842 this -> actSysMonitorParams[ind] = (int)flag; 00843 } 00844 00845 if (strstr(param, "job_") == param) { 00846 ind = getVectIndex(param + strlen("job_"), jobMonitorParams, 00847 nJobMonitorParams); 00848 00849 if (ind < 0) { 00850 pthread_mutex_unlock(&mutexBack); 00851 sprintf(logmsg, "Invalid parameter name in the configuration file: %s", 00852 param); 00853 logger(WARNING, logmsg); 00854 return; 00855 } 00856 found = true; 00857 this -> actJobMonitorParams[ind] = (int)flag; 00858 } 00859 00860 if (!found) { 00861 ind = getVectIndex(param, genMonitorParams, 00862 nGenMonitorParams); 00863 if (ind < 0) { 00864 pthread_mutex_unlock(&mutexBack); 00865 sprintf(logmsg, "Invalid parameter name in the configuration file: %s", 00866 param); 00867 logger(WARNING, logmsg); 00868 return; 00869 } else { 00870 found = true; 00871 this -> actGenMonitorParams[ind] = (int)flag; 00872 } 00873 } 00874 00875 if (!found) { 00876 sprintf(logmsg, "Invalid parameter name in the configuration file: %s", 00877 param); 00878 logger(WARNING, logmsg); 00879 } 00880 pthread_mutex_unlock(&mutexBack); 00881 }
| void ApMon::removeJobToMonitor | ( | long | pid | ) | throw (runtime_error) |
Removes a job from the list of the jobs monitored by ApMon.
| pid | The pid of the job to be removed. |
Definition at line 1315 of file ApMon.cpp.
01315 { 01316 int i, j; 01317 char msg[100]; 01318 01319 if (nMonJobs <= 0) 01320 throw runtime_error("[ removeJobToMonitor() ] There are no monitored jobs."); 01321 01322 for (i = 0; i < nMonJobs; i++) { 01323 if (monJobs[i].pid == pid) { 01324 /* found the job, now remove it */ 01325 for (j = i; j < nMonJobs - 1; j++) 01326 monJobs[j] = monJobs[j + 1]; 01327 nMonJobs--; 01328 return; 01329 } 01330 } 01331 01332 /* the job was not found */ 01333 sprintf(msg, "removeJobToMonitor(): Job %ld not found.", pid); 01334 throw runtime_error(msg); 01335 }
| void ApMon::sendGeneralInfo | ( | ) | [protected] |
Sends datagrams with general system monitoring information to all the destination hosts.
Definition at line 582 of file monitor_utils.cpp.
00582 { 00583 #ifndef WIN32 00584 int nParams, maxNParams, i; 00585 char tmp_s[50]; 00586 00587 char **paramNames, **paramValues; 00588 int *valueTypes; 00589 00590 logger(INFO, "Sending general monitoring information..."); 00591 00592 maxNParams = nGenMonitorParams + numIPs; 00593 valueTypes = (int *)malloc(maxNParams * sizeof(int)); 00594 paramNames = (char **)malloc(maxNParams * sizeof(char *)); 00595 paramValues = (char **)malloc(maxNParams * sizeof(char *)); 00596 00597 nParams = 0; 00598 00599 updateGeneralInfo(); 00600 00601 if (actGenMonitorParams[GEN_HOSTNAME]) { 00602 paramNames[nParams] = strdup(genMonitorParams[GEN_HOSTNAME]); 00603 valueTypes[nParams] = XDR_STRING; 00604 paramValues[nParams] = myHostname; 00605 nParams++; 00606 } 00607 00608 if (actGenMonitorParams[GEN_IP]) { 00609 for (i = 0; i < this -> numIPs; i++) { 00610 strcpy(tmp_s, "ip_"); 00611 strcat(tmp_s, interfaceNames[i]); 00612 paramNames[nParams] = strdup(tmp_s); 00613 valueTypes[nParams] = XDR_STRING; 00614 paramValues[nParams] = this -> allMyIPs[i]; 00615 nParams++; 00616 } 00617 } 00618 00619 if (actGenMonitorParams[GEN_CPU_VENDOR_ID] && strlen(cpuVendor) != 0) { 00620 paramNames[nParams] = strdup(genMonitorParams[GEN_CPU_VENDOR_ID]); 00621 valueTypes[nParams] = XDR_STRING; 00622 paramValues[nParams] = cpuVendor; 00623 nParams++; 00624 } 00625 00626 if (actGenMonitorParams[GEN_CPU_FAMILY] && strlen(cpuFamily) != 0) { 00627 paramNames[nParams] = strdup(genMonitorParams[GEN_CPU_FAMILY]); 00628 valueTypes[nParams] = XDR_STRING; 00629 paramValues[nParams] = cpuFamily; 00630 nParams++; 00631 } 00632 00633 if (actGenMonitorParams[GEN_CPU_MODEL] && strlen(cpuModel) != 0) { 00634 paramNames[nParams] = strdup(genMonitorParams[GEN_CPU_MODEL]); 00635 valueTypes[nParams] = XDR_STRING; 00636 paramValues[nParams] = cpuModel; 00637 nParams++; 00638 } 00639 00640 if (actGenMonitorParams[GEN_CPU_MODEL_NAME] && strlen(cpuModelName) != 0) { 00641 paramNames[nParams] = strdup(genMonitorParams[GEN_CPU_MODEL_NAME]); 00642 valueTypes[nParams] = XDR_STRING; 00643 paramValues[nParams] = cpuModelName; 00644 nParams++; 00645 } 00646 00647 for (i = 0; i < nGenMonitorParams; i++) { 00648 if (actGenMonitorParams[i] != 1 || i == GEN_IP || i == GEN_HOSTNAME || 00649 i == GEN_CPU_VENDOR_ID || i == GEN_CPU_FAMILY || i == GEN_CPU_MODEL 00650 || i == GEN_CPU_MODEL_NAME) 00651 continue; 00652 00653 if (genRetResults[i] == PROCUTILS_ERROR) { 00654 /* could not read the requested information from /proc, disable this 00655 parameter */ 00656 if (autoDisableMonitoring) 00657 actGenMonitorParams[i] = 0; 00658 } else if (genRetResults[i] != RET_ERROR) { 00659 paramNames[nParams] = strdup(genMonitorParams[i]); 00660 paramValues[nParams] = (char *)¤tGenVals[i]; 00661 valueTypes[nParams] = XDR_REAL64; 00662 nParams++; 00663 } 00664 } 00665 00666 try { 00667 if (nParams > 0) 00668 sendParameters(sysMonCluster, sysMonNode, nParams, 00669 paramNames, valueTypes, paramValues); 00670 } catch (runtime_error& err) { 00671 logger(WARNING, err.what()); 00672 } 00673 00674 for (i = 0; i < nParams; i++) 00675 free(paramNames[i]); 00676 free(paramNames); 00677 free(valueTypes); 00678 free(paramValues); 00679 #endif 00680 }
| void ApMon::sendJobInfo | ( | ) | [protected] |
Sends datagrams containing information about the jobs that are currently being monitored.
Definition at line 48 of file monitor_utils.cpp.
00048 { 00049 #ifndef WIN32 00050 int i; 00051 long crtTime; 00052 00053 /* the apMon_free() function calls sendJobInfo() from another thread and 00054 we need mutual exclusion */ 00055 pthread_mutex_lock(&mutexBack); 00056 00057 if (nMonJobs == 0) { 00058 logger(WARNING, "There are no jobs to be monitored, not sending job monitoring information."); 00059 pthread_mutex_unlock(&mutexBack); 00060 return; 00061 } 00062 00063 crtTime = time(NULL); 00064 logger(INFO, "Sending job monitoring information..."); 00065 lastJobInfoSend = (time_t)crtTime; 00066 00067 /* send monitoring information for all the jobs specified by the user */ 00068 for (i = 0; i < nMonJobs; i++) 00069 sendOneJobInfo(monJobs[i]); 00070 00071 pthread_mutex_unlock(&mutexBack); 00072 #endif 00073 }
| void ApMon::sendOneJobInfo | ( | MonitoredJob | job | ) | [protected] |
Sends datagrams with monitoring information about the specified job to all the destination hosts.
Definition at line 152 of file monitor_utils.cpp.
00152 { 00153 int i; 00154 int nParams = 0; 00155 00156 char **paramNames, **paramValues; 00157 int *valueTypes; 00158 00159 valueTypes = (int *)malloc(nJobMonitorParams * sizeof(int)); 00160 paramNames = (char **)malloc(nJobMonitorParams * sizeof(char *)); 00161 paramValues = (char **)malloc(nJobMonitorParams * sizeof(char *)); 00162 00163 for (i = 0; i < nJobMonitorParams; i++) { 00164 jobRetResults[i] = RET_SUCCESS; 00165 currentJobVals[i] = 0; 00166 } 00167 00168 updateJobInfo(job); 00169 00170 for (i = 0; i < nJobMonitorParams; i++) { 00171 if (actJobMonitorParams[i] && jobRetResults[i] != RET_ERROR) { 00172 00173 paramNames[nParams] = jobMonitorParams[i]; 00174 paramValues[nParams] = (char *)¤tJobVals[i]; 00175 valueTypes[nParams] = XDR_REAL64; 00176 nParams++; 00177 } 00178 /* don't disable the parameter (maybe for another job it can be 00179 obtained) */ 00180 /* 00181 else 00182 if (autoDisableMonitoring) 00183 actJobMonitorParams[ind] = 0; 00184 */ 00185 } 00186 00187 if (nParams == 0) { 00188 free(paramNames); free(valueTypes); 00189 free(paramValues); 00190 return; 00191 } 00192 00193 try { 00194 if (nParams > 0) 00195 sendParameters(job.clusterName, job.nodeName, nParams, 00196 paramNames, valueTypes, paramValues); 00197 } catch (runtime_error& err) { 00198 logger(WARNING, err.what()); 00199 } 00200 00201 free(paramNames); 00202 free(valueTypes); 00203 free(paramValues); 00204 }
| int ApMon::sendParameter | ( | char * | clusterName, | |
| char * | nodeName, | |||
| char * | paramName, | |||
| char * | paramValue | |||
| ) | throw (runtime_error) |
Sends a parameter of type string and its value to the MonALISA module.
| clusterName | The name of the cluster that is monitored. If it is NULL, we keep the same cluster and node name as in the previous datagram. | |
| nodeName | The name of the node from the cluster from which the value was taken. | |
| paramName | The name of the parameter. | |
| paramValue | The value of the parameter. |
Definition at line 801 of file ApMon.cpp.
00802 { 00803 00804 return sendParameter(clusterName, nodeName, paramName, XDR_STRING, 00805 paramValue); 00806 }
| int ApMon::sendParameter | ( | char * | clusterName, | |
| char * | nodeName, | |||
| char * | paramName, | |||
| double | paramValue | |||
| ) | throw (runtime_error) |
Sends a parameter of type double and its value to the MonALISA module.
| clusterName | The name of the cluster that is monitored. If it is NULL,we keep the same cluster and node name as in the previous datagram. | |
| nodeName | The name of the node from the cluster from which the value was taken. | |
| paramName | The name of the parameter. | |
| paramValue | The value of the parameter. |
Definition at line 794 of file ApMon.cpp.
00795 { 00796 00797 return sendParameter(clusterName, nodeName, paramName, XDR_REAL64, 00798 (char *)¶mValue); 00799 }
| int ApMon::sendParameter | ( | char * | clusterName, | |
| char * | nodeName, | |||
| char * | paramName, | |||
| float | paramValue | |||
| ) | throw (runtime_error) |
Sends a parameter of type float and its value to the MonALISA module.
| clusterName | The name of the cluster that is monitored. If it is NULL, we keep the same cluster and node name as in the previous datagram. | |
| nodeName | The name of the node from the cluster from which the value was taken. | |
| paramName | The name of the parameter. | |
| paramValue | The value of the parameter. |
Definition at line 787 of file ApMon.cpp.
00788 { 00789 00790 return sendParameter(clusterName, nodeName, paramName, XDR_REAL32, 00791 (char *)¶mValue); 00792 }
| int ApMon::sendParameter | ( | char * | clusterName, | |
| char * | nodeName, | |||
| char * | paramName, | |||
| int | paramValue | |||
| ) | throw (runtime_error) |
Sends an integer parameter and its value to the MonALISA module.
| clusterName | The name of the cluster that is monitored. If it is NULL, we keep the same cluster and node name as in the previous datagram. | |
| nodeName | The name of the node from the cluster from which the value was taken. | |
| paramName | The name of the parameter. | |
| paramValue | The value of the parameter. |
Definition at line 780 of file ApMon.cpp.
00781 { 00782 00783 return sendParameter(clusterName, nodeName, paramName, XDR_INT32, 00784 (char *)¶mValue); 00785 }
| int ApMon::sendParameter | ( | char * | clusterName, | |
| char * | nodeName, | |||
| char * | paramName, | |||
| int | valueType, | |||
| char * | paramValue | |||
| ) | throw (runtime_error) |
Sends a parameter and its value to the MonALISA module.
| clusterName | The name of the cluster that is monitored. If it is NULL, we keep the same cluster and node name as in the previous datagram. | |
| nodeName | The name of the node from the cluster from which the value was taken. | |
| paramName | The name of the parameter. | |
| valueType | The value type of the parameter. Can be one of the constants XDR_INT32 (integer), XDR_REAL32 (float), XDR_REAL64 (double), XDR_STRING (null-terminated string). | |
| paramValue | Pointer to the value of the parameter. |
Definition at line 764 of file ApMon.cpp.
00766 { 00767 00768 return sendParameters(clusterName, nodeName, 1, ¶mName, 00769 &valueType, ¶mValue); 00770 }
| int ApMon::sendParameters | ( | char * | clusterName, | |
| char * | nodeName, | |||
| int | nParams, | |||
| char ** | paramNames, | |||
| int * | valueTypes, | |||
| char ** | paramValues | |||
| ) | throw (runtime_error) |
Sends a parameter of type string and its value to the MonALISA module.
| clusterName | The name of the cluster that is monitored.If it is NULL, we keep the same cluster and node name as in the previous datagram. | |
| nodeName | The name of the node from the cluster from which the value was taken. | |
| paramName | The name of the parameter. | |
| paramValue | The value of the parameter. |
Definition at line 621 of file ApMon.cpp.
00623 { 00624 return sendTimedParameters(clusterName, nodeName, nParams, 00625 paramNames, valueTypes, paramValues, -1); 00626 }
| void ApMon::sendSysInfo | ( | ) | [protected] |
Sends datagrams with system monitoring information to all the destination hosts.
Definition at line 371 of file monitor_utils.cpp.
00371 { 00372 #ifndef WIN32 00373 int nParams = 0, maxNParams; 00374 int i; 00375 long crtTime; 00376 00377 int *valueTypes; 00378 char **paramNames, **paramValues; 00379 00380 crtTime = time(NULL); 00381 logger(INFO, "Sending system monitoring information..."); 00382 00383 /* make some initializations only the first time this 00384 function is called */ 00385 if (this -> sysInfo_first) { 00386 for (i = 0; i < this -> nInterfaces; i++) { 00387 this -> lastBytesSent[i] = this -> lastBytesReceived[i] = 0.0; 00388 this -> lastNetErrs[i] = 0; 00389 00390 } 00391 this -> sysInfo_first = FALSE; 00392 } 00393 00394 /* the maximum number of parameters that can be included in a datagram */ 00395 /* (the last three terms are for: parameters corresponding to each possible 00396 state of the processes, parameters corresponding to the types of open 00397 sockets, parameters corresponding to each possible state of the TCP 00398 sockets.) */ 00399 maxNParams = nSysMonitorParams + (2 * nInterfaces - 1) + 15 + 4 + 00400 N_TCP_STATES; 00401 00402 valueTypes = (int *)malloc(maxNParams * sizeof(int)); 00403 paramNames = (char **)malloc(maxNParams * sizeof(char *)); 00404 paramValues = (char **)malloc(maxNParams * sizeof(char *)); 00405 00406 for (i = 0; i < nSysMonitorParams; i++) { 00407 if (actSysMonitorParams[i] > 0) /* if the parameter is enabled */ 00408 sysRetResults[i] = RET_SUCCESS; 00409 else /* mark it with RET_ERROR so that it will be not included in the 00410 datagram */ 00411 sysRetResults[i] = RET_ERROR; 00412 } 00413 00414 updateSysInfo(); 00415 00416 for (i = 0; i < nSysMonitorParams; i++) { 00417 if (i == SYS_NET_IN || i == SYS_NET_OUT || i == SYS_NET_ERRS || 00418 i == SYS_NET_SOCKETS || i == SYS_NET_TCP_DETAILS || i == SYS_PROCESSES) 00419 continue; 00420 00421 if (sysRetResults[i] == PROCUTILS_ERROR) { 00422 /* could not read the requested information from /proc, disable this 00423 parameter */ 00424 if (autoDisableMonitoring) 00425 actSysMonitorParams[i] = 0; 00426 } else if (sysRetResults[i] != RET_ERROR) { 00427 /* the parameter is enabled and there were no errors obtaining it */ 00428 paramNames[nParams] = strdup(sysMonitorParams[i]); 00429 paramValues[nParams] = (char *)¤tSysVals[i]; 00430 valueTypes[nParams] = XDR_REAL64; 00431 nParams++; 00432 } 00433 } 00434 00435 if (actSysMonitorParams[SYS_NET_IN] == 1) { 00436 if (sysRetResults[SYS_NET_IN] == PROCUTILS_ERROR) { 00437 if (autoDisableMonitoring) 00438 actSysMonitorParams[SYS_NET_IN] = 0; 00439 } else if (sysRetResults[SYS_NET_IN] != RET_ERROR) { 00440 for (i = 0; i < nInterfaces; i++) { 00441 paramNames[nParams] = (char *)malloc(20 * sizeof(char)); 00442 strcpy(paramNames[nParams], interfaceNames[i]); 00443 strcat(paramNames[nParams], "_in"); 00444 paramValues[nParams] = (char *)¤tNetIn[i]; 00445 valueTypes[nParams] = XDR_REAL64; 00446 nParams++; 00447 } 00448 } 00449 } 00450 00451 if (actSysMonitorParams[SYS_NET_OUT] == 1) { 00452 if (sysRetResults[SYS_NET_IN] == PROCUTILS_ERROR) { 00453 if (autoDisableMonitoring) 00454 actSysMonitorParams[SYS_NET_OUT] = 0; 00455 } else if (sysRetResults[SYS_NET_OUT] != RET_ERROR) { 00456 for (i = 0; i < nInterfaces; i++) { 00457 paramNames[nParams] = (char *)malloc(20 * sizeof(char)); 00458 strcpy(paramNames[nParams], interfaceNames[i]); 00459 strcat(paramNames[nParams], "_out"); 00460 paramValues[nParams] = (char *)¤tNetOut[i]; 00461 valueTypes[nParams] = XDR_REAL64; 00462 nParams++; 00463 } 00464 } 00465 } 00466 00467 if (actSysMonitorParams[SYS_NET_ERRS] == 1) { 00468 if (sysRetResults[SYS_NET_ERRS] == PROCUTILS_ERROR) { 00469 if (autoDisableMonitoring) 00470 actSysMonitorParams[SYS_NET_ERRS] = 0; 00471 } else if (sysRetResults[SYS_NET_ERRS] != RET_ERROR) { 00472 for (i = 0; i < nInterfaces; i++) { 00473 paramNames[nParams] = (char *)malloc(20 * sizeof(char)); 00474 strcpy(paramNames[nParams], interfaceNames[i]); 00475 strcat(paramNames[nParams], "_errs"); 00476 paramValues[nParams] = (char *)¤tNetErrs[i]; 00477 valueTypes[nParams] = XDR_REAL64; 00478 nParams++; 00479 } 00480 } 00481 } 00482 00483 00484 if (actSysMonitorParams[SYS_PROCESSES] == 1) { 00485 if (sysRetResults[SYS_PROCESSES] != RET_ERROR) { 00486 char act_states[] = {'D', 'R', 'S', 'T', 'Z'}; 00487 for (i = 0; i < 5; i++) { 00488 paramNames[nParams] = (char *)malloc(20 * sizeof(char)); 00489 sprintf(paramNames[nParams], "processes_%c", act_states[i]); 00490 paramValues[nParams] = (char *)¤tProcessStates[act_states[i] - 65]; 00491 valueTypes[nParams] = XDR_REAL64; 00492 nParams++; 00493 } 00494 } 00495 } 00496 00497 if (actSysMonitorParams[SYS_NET_SOCKETS] == 1) { 00498 if (sysRetResults[SYS_NET_SOCKETS] != RET_ERROR) { 00499 const char *socket_types[] = {"tcp", "udp", "icm", "unix"}; 00500 for (i = 0; i < 4; i++) { 00501 paramNames[nParams] = (char *)malloc(30 * sizeof(char)); 00502 sprintf(paramNames[nParams], "sockets_%s", socket_types[i]); 00503 paramValues[nParams] = (char *)¤tNSockets[i]; 00504 valueTypes[nParams] = XDR_REAL64; 00505 nParams++; 00506 } 00507 } 00508 } 00509 00510 if (actSysMonitorParams[SYS_NET_TCP_DETAILS] == 1) { 00511 if (sysRetResults[SYS_NET_TCP_DETAILS] != RET_ERROR) { 00512 for (i = 0; i < N_TCP_STATES; i++) { 00513 paramNames[nParams] = (char *)malloc(30 * sizeof(char)); 00514 sprintf(paramNames[nParams], "sockets_tcp_%s", socketStatesMapTCP[i]); 00515 paramValues[nParams] = (char *)¤tSocketsTCP[i]; 00516 valueTypes[nParams] = XDR_REAL64; 00517 nParams++; 00518 } 00519 } 00520 } 00521 00522 try { 00523 if (nParams > 0) 00524 sendParameters(sysMonCluster, sysMonNode, nParams, 00525 paramNames, valueTypes, paramValues); 00526 } catch (runtime_error& err) { 00527 logger(WARNING, err.what()); 00528 } 00529 00530 this -> lastSysInfoSend = crtTime; 00531 00532 if (sysRetResults[SYS_NET_IN] == RET_SUCCESS) { 00533 free(currentNetIn); 00534 free(currentNetOut); 00535 free(currentNetErrs); 00536 } 00537 00538 for (i = 0; i < nParams; i++) 00539 free(paramNames[i]); 00540 free(paramNames); 00541 free(valueTypes); 00542 free(paramValues); 00543 #endif 00544 }
| int ApMon::sendTimedParameter | ( | char * | clusterName, | |
| char * | nodeName, | |||
| char * | paramName, | |||
| int | valueType, | |||
| char * | paramValue, | |||
| int | timestamp | |||
| ) | throw (runtime_error) |
Sends a parameter and its value to the MonALISA module, together with a timestamp.
| clusterName | The name of the cluster that is monitored. If it is NULL, we keep the same cluster and node name as in the previous datagram. | |
| nodeName | The name of the node from the cluster from which the value was taken. | |
| paramName | The name of the parameter. | |
| valueType | The value type of the parameter. Can be one of the constants XDR_INT32 (integer), XDR_REAL32 (float), XDR_REAL64 (double), XDR_STRING (null-terminated string). | |
| paramValue | Pointer to the value of the parameter. | |
| timestamp | The associated timestamp (in seconds). |
Definition at line 772 of file ApMon.cpp.
00774 { 00775 00776 return sendTimedParameters(clusterName, nodeName, 1, ¶mName, 00777 &valueType, ¶mValue, timestamp); 00778 }
| int ApMon::sendTimedParameters | ( | char * | clusterName, | |
| char * | nodeName, | |||
| int | nParams, | |||
| char ** | paramNames, | |||
| int * | valueTypes, | |||
| char ** | paramValues, | |||
| int | timestamp | |||
| ) | throw (runtime_error) |
Sends a set of parameters and their values to the MonALISA module, together with a timestamp.
| clusterName | The name of the cluster that is monitored. If it is NULL, we keep the same cluster and node name as in the previous datagram. | |
| nodeName | The name of the node from the cluster from which the value was taken. | |
| nParams | The number of parameters to be sent. | |
| paramNames | Array with the parameter names. | |
| valueTypes | Array with the value types represented as integers. | |
| paramValue | Array with the parameter values. | |
| timestamp | The timestamp (in seconds) associated with the data. |
Definition at line 628 of file ApMon.cpp.
00630 { 00631 int i; 00632 int ret, ret1, ret2; 00633 char msg[100], buf2[MAX_HEADER_LENGTH+4], newBuf[MAX_DGRAM_SIZE]; 00634 #ifdef WIN32 00635 char crtAddr[20]; 00636 #endif 00637 char *headerTmp; 00638 char header[MAX_HEADER_LENGTH] = "v:"; 00639 strcat(header, APMON_VERSION); 00640 strcat(header, "_cpp"); // to indicate this is the C++ version 00641 strcat(header, "p:"); 00642 00643 pthread_mutex_lock(&mutex); 00644 00645 if(!shouldSend()) { 00646 pthread_mutex_unlock(&mutex); 00647 return RET_NOT_SENT; 00648 } 00649 00650 if (clusterName != NULL) { // don't keep the cached values for cluster name 00651 // and node name 00652 free(this -> clusterName); 00653 this -> clusterName = strdup(clusterName); 00654 00655 if (nodeName != NULL) { /* the user provided a name */ 00656 free(this -> nodeName); 00657 this -> nodeName = strdup(nodeName); 00658 } 00659 else { /* set the node name to the node's IP */ 00660 free(this -> nodeName); 00661 this -> nodeName = strdup(this -> myHostname); 00662 } // else 00663 } // if 00664 00665 if (this -> clusterName == NULL || this -> nodeName == NULL) { 00666 pthread_mutex_unlock(&mutex); 00667 throw runtime_error("[ sendTimedParameters() ] Null cluster name or node name"); 00668 } 00669 00670 //sortParams(nParams, paramNames, valueTypes, paramValues); 00671 00672 /* try to encode the parameters */ 00673 try { 00674 encodeParams(nParams, paramNames, valueTypes, paramValues, timestamp); 00675 } catch (runtime_error& err) { 00676 pthread_mutex_unlock(&mutex); 00677 throw err; 00678 } 00679 00680 headerTmp = (char *)malloc(MAX_HEADER_LENGTH * sizeof(char)); 00681 /* for each destination */ 00682 for (i = 0; i < nDestinations; i++) { 00683 XDR xdrs; 00684 struct sockaddr_in destAddr; 00685 00686 /* initialize the destination address */ 00687 memset(&destAddr, 0, sizeof(destAddr)); 00688 destAddr.sin_family = AF_INET; 00689 destAddr.sin_port = htons(destPorts[i]); 00690 #ifndef WIN32 00691 inet_pton(AF_INET, destAddresses[i], &destAddr.sin_addr); 00692 #else 00693 int dummy = sizeof(destAddr); 00694 sprintf(crtAddr, "%s:%d", destAddresses[i], destPorts[i]); 00695 ret = WSAStringToAddress(crtAddr, AF_INET, NULL, (struct sockaddr *) &destAddr, &dummy); 00696 if(ret){ 00697 ret = WSAGetLastError(); 00698 sprintf(msg, "[ sendTimedParameters() ] Error packing address %s, code %d ", crtAddr, ret); 00699 throw runtime_error(msg); 00700 } 00701 #endif 00702 /* add the header (which is different for each destination) */ 00703 00704 strcpy(headerTmp, header); 00705 strcat(headerTmp, destPasswds[i]); 00706 00707 /* initialize the XDR stream to encode the header */ 00708 xdrmem_create(&xdrs, buf2, MAX_HEADER_LENGTH, XDR_ENCODE); 00709 00710 /* encode the header */ 00711 ret = xdr_string(&xdrs, &(headerTmp), strlen(headerTmp) + 1); 00712 /* add the instance ID and the sequence number */ 00713 ret1 = xdr_int(&xdrs, &(instance_id)); 00714 ret2 = xdr_int(&xdrs, &(seq_nr)); 00715 00716 if (!ret || !ret1 || !ret2) { 00717 free(headerTmp); 00718 pthread_mutex_unlock(&mutex); 00719 throw runtime_error("[ sendTimedParameters() ] XDR encoding error for the header"); 00720 } 00721 00722 /* concatenate the header and the rest of the datagram */ 00723 int buf2Length = xdrSize(XDR_STRING, headerTmp) + 2 * xdrSize(XDR_INT32, NULL); 00724 memcpy(newBuf, buf2, buf2Length); 00725 memcpy(newBuf + buf2Length, buf, dgramSize); 00726 00727 /* send the buffer */ 00728 ret = sendto(sockfd, newBuf, dgramSize + buf2Length, 0, 00729 (struct sockaddr *)&destAddr, sizeof(destAddr)); 00730 if (ret == RET_ERROR) { 00731 free(headerTmp); 00732 pthread_mutex_unlock(&mutex); 00733 00734 /*re-initialize the socket */ 00735 #ifndef WIN32 00736 close(sockfd); 00737 #else 00738 closesocket(sockfd); 00739 #endif 00740 initSocket(); 00741 00742 /* throw exception because the datagram was not sent */ 00743 sprintf(msg, "[ sendTimedParameters() ] Error sending data to destination %s ", 00744 destAddresses[i]); 00745 throw runtime_error(msg); 00746 } 00747 else { 00748 sprintf(msg, "Datagram with size %d, instance id %d, sequence number %d, sent to %s, containing parameters:", 00749 ret, instance_id, seq_nr, destAddresses[i]); 00750 logger(FINE, msg); 00751 logParameters(FINE, nParams, paramNames, valueTypes, paramValues); 00752 } 00753 xdr_destroy(&xdrs); 00754 00755 } 00756 00757 seq_nr = (seq_nr + 1) % TWO_BILLION; 00758 free(headerTmp); 00759 pthread_mutex_unlock(&mutex); 00760 return RET_SUCCESS; 00761 }
| void ApMon::setBackgroundThread | ( | bool | val | ) | [protected] |
Sets the value of the confCheck flag.
If it is true, the configuration file and/or the URLs will be periodically checked for modifications. By default it is false.
Definition at line 1250 of file ApMon.cpp.
01250 { 01251 // mutexBack is locked 01252 if (val == true) { 01253 if (!haveBkThread) { 01254 #ifndef WIN32 01255 pthread_create(&bkThread, NULL, &bkTask, this); 01256 #else 01257 DWORD dummy; 01258 bkThread = CreateThread(NULL, 65536, &bkTask, this, 0, &dummy); 01259 #endif 01260 haveBkThread = true; 01261 } else { 01262 pthread_mutex_lock(&mutexCond); 01263 pthread_cond_signal(&confChangedCond); 01264 pthread_mutex_unlock(&mutexCond); 01265 } 01266 } 01267 if (val == false) { 01268 //if (bkThreadStarted) { 01269 if (haveBkThread) { 01270 stopBkThread = true; 01271 pthread_mutex_unlock(&mutexBack); 01272 #ifndef WIN32 01273 pthread_mutex_lock(&mutexCond); 01274 #endif 01275 pthread_cond_signal(&confChangedCond); 01276 logger(INFO, "[Stopping the background thread...]"); 01277 #ifndef WIN32 01278 pthread_mutex_unlock(&mutexCond); 01279 pthread_join(bkThread, NULL); 01280 #else 01281 WaitForSingleObject(bkThread, INFINITE); 01282 #endif 01283 pthread_mutex_lock(&mutexBack); 01284 // logger(INFO, "bk thread stopped!"); 01285 haveBkThread = false; 01286 bkThreadStarted = false; 01287 stopBkThread = false; 01288 } 01289 } 01290 }
| void ApMon::setConfRecheck | ( | bool | confRecheck | ) | [inline] |
Enables/disables the periodical check for changes in the configuration files/URLs.
If enabled, the verifications will be done at the default time interval.
Definition at line 663 of file ApMon.h.
00663 { 00664 setConfRecheck(confRecheck, RECHECK_INTERVAL); 00665 }
| void ApMon::setConfRecheck | ( | bool | confRecheck, | |
| long | interval | |||
| ) |
Enables/disables the periodical check for changes in the configuration files/URLs.
| confRecheck | If it is true, the periodical checking is enabled. | |
| interval | The time interval at which the verifications are done. If it is negative, a default value will be used. |
Definition at line 1126 of file ApMon.cpp.
01126 { 01127 char logmsg[100]; 01128 if (confCheck) { 01129 sprintf(logmsg, "Enabling configuration reloading (interval %ld)", 01130 interval); 01131 logger(INFO, logmsg); 01132 } 01133 01134 pthread_mutex_lock(&mutexBack); 01135 if (initType == DIRECT_INIT) { // no need to reload the configuration 01136 logger(WARNING, "[ setConfRecheck() } No configuration file/URL to reload."); 01137 return; 01138 } 01139 01140 this -> confCheck = confCheck; 01141 this -> recheckChanged = true; 01142 if (confCheck) { 01143 if (interval > 0) { 01144 this -> recheckInterval = interval; 01145 this -> crtRecheckInterval = interval; 01146 } else { 01147 this -> recheckInterval = RECHECK_INTERVAL; 01148 this -> crtRecheckInterval = RECHECK_INTERVAL; 01149 } 01150 setBackgroundThread(true); 01151 } 01152 else { 01153 if (jobMonitoring == false && sysMonitoring == false) 01154 setBackgroundThread(false); 01155 } 01156 pthread_mutex_unlock(&mutexBack); 01157 01158 }
| void ApMon::setCrtRecheckInterval | ( | long | val | ) | [protected] |
Definition at line 1169 of file ApMon.cpp.
01169 { 01170 pthread_mutex_lock(&mutexBack); 01171 crtRecheckInterval = val; 01172 pthread_mutex_unlock(&mutexBack); 01173 }
| void ApMon::setGenMonitoring | ( | bool | genMonitoring | ) | [inline] |
Enables/disables the sending of datagrams with general system information.
A default value is used for the number of time intervals at which the datagrams are sent.
Definition at line 752 of file ApMon.h.
00752 { 00753 setGenMonitoring(genMonitoring, GEN_MONITOR_INTERVALS); 00754 }
| void ApMon::setGenMonitoring | ( | bool | genMonitoring, | |
| int | nIntervals | |||
| ) |
Enables/disables the periodical sending of datagrams with general system information.
| genMonitoring | If it is true, enables the sending of the datagrams. | |
| interval | The number of time intervals at which the datagrams are sent (considering the interval for sending system monitoring information). If it is negative, a default value will be used. |
Definition at line 1225 of file ApMon.cpp.
01225 { 01226 char logmsg[100]; 01227 sprintf(logmsg, "Setting general information monitoring to %s ", 01228 boolStrings[(int)genMonitoring]); 01229 logger(INFO, logmsg); 01230 01231 pthread_mutex_lock(&mutexBack); 01232 this -> genMonitoring = genMonitoring; 01233 this -> sysMonChanged = true; 01234 if (genMonitoring == true) { 01235 if (nIntervals > 0) 01236 this -> genMonitorIntervals = nIntervals; 01237 else 01238 this -> genMonitorIntervals = GEN_MONITOR_INTERVALS; 01239 01240 if (this -> sysMonitoring == false) { 01241 pthread_mutex_unlock(&mutexBack); 01242 setSysMonitoring(true); 01243 pthread_mutex_lock(&mutexBack); 01244 } 01245 } // TODO: else check if we can stop the background thread (if no 01246 // system parameters are enabled for monitoring) 01247 pthread_mutex_unlock(&mutexBack); 01248 }
| void ApMon::setJobMonitoring | ( | bool | jobMonitoring | ) | [inline] |
Enables/disables the job monitoring.
If the job monitoring is enabled, the datagrams will be sent at the default time interval.
Definition at line 678 of file ApMon.h.
00678 { 00679 setJobMonitoring(jobMonitoring, JOB_MONITOR_INTERVAL); 00680 }
| void ApMon::setJobMonitoring | ( | bool | jobMonitoring, | |
| long | interval | |||
| ) |
Enables/disables the periodical sending of datagrams with job monitoring information.
| jobMonitoring | If it is true, the job monitoring is enabled | |
| interval | The time interval at which the datagrams are sent. If it is negative, a default value will be used. |
Definition at line 1175 of file ApMon.cpp.
01175 { 01176 char logmsg[100]; 01177 if (jobMonitoring) { 01178 sprintf(logmsg, "Enabling job monitoring, time interval %ld s... ", interval); 01179 logger(INFO, logmsg); 01180 } else 01181 logger(INFO, "Disabling job monitoring..."); 01182 01183 pthread_mutex_lock(&mutexBack); 01184 this -> jobMonitoring = jobMonitoring; 01185 this -> jobMonChanged = true; 01186 if (jobMonitoring == true) { 01187 if (interval > 0) 01188 this -> jobMonitorInterval = interval; 01189 else 01190 this -> jobMonitorInterval = JOB_MONITOR_INTERVAL; 01191 setBackgroundThread(true); 01192 } else { 01193 // disable the background thread if it is not needed anymore 01194 if (this -> sysMonitoring == false && this -> confCheck == false) 01195 setBackgroundThread(false); 01196 } 01197 pthread_mutex_unlock(&mutexBack); 01198 }
| void ApMon::setLogLevel | ( | char * | newLevel_s | ) | [static] |
Sets the ApMon logging level.
Possible values are 0 (FATAL), 1 (WARNING), 2 (INFO), 3 (FINE), 4 (DEBUG);
Definition at line 1343 of file ApMon.cpp.
01343 { 01344 int newLevel; 01345 const char *levels[5] = {"FATAL", "WARNING", "INFO", "FINE", "DEBUG"}; 01346 char logmsg[100]; 01347 01348 for (newLevel = 0; newLevel < 5; newLevel++) 01349 if (strcmp(newLevel_s, levels[newLevel]) == 0) 01350 break; 01351 01352 if (newLevel >= 5) { 01353 sprintf(logmsg, "[ setLogLevel() ] Invalid level value: %s", newLevel_s); 01354 logger(WARNING, logmsg); 01355 } 01356 else 01357 logger(0, NULL, newLevel); 01358 }
| void ApMon::setMaxMsgRate | ( | int | maxRate | ) |
This sets the maxim number of messages that are send to MonALISA in one second.
Default, this number is 50.
Definition at line 1361 of file ApMon.cpp.
01361 { 01362 if (maxRate > 0) 01363 this -> maxMsgRate = maxRate; 01364 }
| void ApMon::setRecheckInterval | ( | long | val | ) |
Sets the value of the time interval (in seconds) between two recheck operations for the configuration files.
The default value is 5min. If the value is negative, the configuration rechecking is turned off. If error(s) appear when reloading the configuration, the actual interval will be increased (transparently for the user).
Definition at line 1160 of file ApMon.cpp.
01160 { 01161 if (val > 0) { 01162 setConfRecheck(true, val); 01163 } 01164 else { 01165 setConfRecheck(false, val); 01166 } 01167 }
| void ApMon::setSysMonClusterNode | ( | char * | clusterName, | |
| char * | nodeName | |||
| ) |
This function is called by the user to set the cluster name and the node name for the system monitoring datagrams.
Definition at line 1337 of file ApMon.cpp.
01337 { 01338 free (sysMonCluster); free(sysMonNode); 01339 sysMonCluster = strdup(clusterName); 01340 sysMonNode = strdup(nodeName); 01341 }
| void ApMon::setSysMonitoring | ( | bool | sysMonitoring | ) | [inline] |
Enables/disables the system monitoring.
If the system monitoring is enabled, the datagrams will be sent at the default time interval.
Definition at line 714 of file ApMon.h.
00714 { 00715 setSysMonitoring(sysMonitoring, SYS_MONITOR_INTERVAL); 00716 }
| void ApMon::setSysMonitoring | ( | bool | sysMonitoring, | |
| long | interval | |||
| ) |
Enables/disables the periodical sending of datagrams with system monitoring information.
| sysMonitoring | If it is true, the system monitoring is enabled | |
| interval | The time interval at which the datagrams are sent. If it is negative, a default value will be used. |
Definition at line 1200 of file ApMon.cpp.
01200 { 01201 char logmsg[100]; 01202 if (sysMonitoring) { 01203 sprintf(logmsg, "Enabling system monitoring, time interval %ld s... ", interval); 01204 logger(INFO, logmsg); 01205 } else 01206 logger(INFO, "Disabling system monitoring..."); 01207 01208 pthread_mutex_lock(&mutexBack); 01209 this -> sysMonitoring = sysMonitoring; 01210 this -> sysMonChanged = true; 01211 if (sysMonitoring == true) { 01212 if (interval > 0) 01213 this -> sysMonitorInterval = interval; 01214 else 01215 this -> sysMonitorInterval = SYS_MONITOR_INTERVAL; 01216 setBackgroundThread(true); 01217 } else { 01218 // disable the background thread if it is not needed anymore 01219 if (this -> jobMonitoring == false && this -> confCheck == false) 01220 setBackgroundThread(false); 01221 } 01222 pthread_mutex_unlock(&mutexBack); 01223 }
| bool ApMon::shouldSend | ( | ) | [protected] |
Decides if the current datagram should be sent (so that the maximum number of datagrams per second is respected in average).
This decision is based on the number of messages previously sent.
new time, update previous counters;
reset current counter
compute the history
when we should start dropping messages
counting sent and dropped messages
Definition at line 1456 of file ApMon.cpp.
01456 { 01457 01458 long now = time(NULL); 01459 bool doSend; 01460 char msg[200]; 01461 01462 //printf("now %ld crtTime %ld\n", now, crtTime); 01463 01464 if (now != crtTime){ 01466 prvSent = hWeight * prvSent + (1.0 - hWeight) * crtSent / (now - crtTime); 01467 prvTime = crtTime; 01468 sprintf(msg, "previously sent: %ld dropped: %ld", crtSent, crtDrop); 01469 logger(DEBUG, msg); 01471 crtTime = now; 01472 crtSent = 0; 01473 crtDrop = 0; 01474 //printf("\n"); 01475 } 01476 01478 int valSent = (int)(prvSent * hWeight + crtSent * (1.0 - hWeight)); 01479 01480 doSend = true; 01482 int level = this -> maxMsgRate - this -> maxMsgRate / 10; 01483 01484 01485 if (valSent > (this -> maxMsgRate - level)) { 01486 //int max10 = this -> maxMsgRate / 10; 01487 int rnd = rand() % (this -> maxMsgRate / 10); 01488 doSend = (rnd < (this -> maxMsgRate - valSent)); 01489 } 01491 if (doSend) { 01492 crtSent++; 01493 //printf("#"); 01494 } else { 01495 crtDrop++; 01496 //printf("."); 01497 } 01498 01499 return doSend; 01500 }
| void ApMon::updateGeneralInfo | ( | ) | [protected] |
Update the general monitoring information.
Definition at line 546 of file monitor_utils.cpp.
00546 { 00547 00548 strcpy(cpuVendor, ""); strcpy(cpuFamily, ""); 00549 strcpy(cpuModel, ""); strcpy(cpuModelName, ""); 00550 00551 if (actGenMonitorParams[GEN_CPU_MHZ] == 1 || 00552 actGenMonitorParams[GEN_BOGOMIPS] == 1 || 00553 actGenMonitorParams[GEN_CPU_VENDOR_ID] == 1 || 00554 actGenMonitorParams[GEN_CPU_FAMILY] == 1 || 00555 actGenMonitorParams[GEN_CPU_MODEL] == 1 || 00556 actGenMonitorParams[GEN_CPU_MODEL_NAME] == 1) { 00557 try { 00558 ProcUtils::getCPUInfo(*this); 00559 } catch (procutils_error& err) { 00560 logger(WARNING, err.what()); 00561 genRetResults[GEN_CPU_MHZ] = genRetResults[GEN_BOGOMIPS] = PROCUTILS_ERROR; 00562 } 00563 } 00564 00565 if (actGenMonitorParams[GEN_TOTAL_MEM] == 1 || 00566 actGenMonitorParams[GEN_TOTAL_SWAP] == 1) { 00567 try { 00568 ProcUtils::getSysMem(currentGenVals[GEN_TOTAL_MEM], 00569 currentGenVals[GEN_TOTAL_SWAP]); 00570 } catch (procutils_error& perr) { 00571 logger(WARNING, perr.what()); 00572 genRetResults[GEN_TOTAL_MEM] = genRetResults[GEN_TOTAL_SWAP] = PROCUTILS_ERROR; 00573 } 00574 } 00575 00576 if (this -> numCPUs > 0) 00577 currentGenVals[GEN_NO_CPUS] = this -> numCPUs; 00578 else 00579 genRetResults[GEN_NO_CPUS] = PROCUTILS_ERROR; 00580 }
| void ApMon::updateJobInfo | ( | MonitoredJob | job | ) | [protected] |
Update the monitoring information regarding the specified job.
Definition at line 75 of file monitor_utils.cpp.
00075 { 00076 bool needJobInfo, needDiskInfo; 00077 bool jobExists = true; 00078 char err_msg[200]; 00079 00080 PsInfo jobInfo; 00081 JobDirInfo dirInfo; 00082 00083 /**** runtime, CPU & memory usage information ****/ 00084 needJobInfo = actJobMonitorParams[JOB_RUN_TIME] 00085 || actJobMonitorParams[JOB_CPU_TIME] 00086 || actJobMonitorParams[JOB_CPU_USAGE] 00087 || actJobMonitorParams[JOB_MEM_USAGE] 00088 || actJobMonitorParams[JOB_VIRTUALMEM] 00089 || actJobMonitorParams[JOB_RSS] 00090 || actJobMonitorParams[JOB_OPEN_FILES]; 00091 if (needJobInfo) { 00092 try { 00093 readJobInfo(job.pid, jobInfo); 00094 currentJobVals[JOB_RUN_TIME] = jobInfo.etime; 00095 currentJobVals[JOB_CPU_TIME] = jobInfo.cputime; 00096 currentJobVals[JOB_CPU_USAGE] = jobInfo.pcpu; 00097 currentJobVals[JOB_MEM_USAGE] = jobInfo.pmem; 00098 currentJobVals[JOB_VIRTUALMEM] = jobInfo.vsz; 00099 currentJobVals[JOB_RSS] = jobInfo.rsz; 00100 00101 if (jobInfo.open_fd < 0) 00102 jobRetResults[JOB_OPEN_FILES] = RET_ERROR; 00103 currentJobVals[JOB_OPEN_FILES] = jobInfo.open_fd; 00104 00105 } catch (runtime_error &err) { 00106 logger(WARNING, err.what()); 00107 jobRetResults[JOB_RUN_TIME] = jobRetResults[JOB_CPU_TIME] = 00108 jobRetResults[JOB_CPU_USAGE] = jobRetResults[JOB_MEM_USAGE] = 00109 jobRetResults[JOB_VIRTUALMEM] = jobRetResults[JOB_RSS] = 00110 jobRetResults[JOB_OPEN_FILES] = RET_ERROR; 00111 strcpy(err_msg, err.what()); 00112 if (strstr(err_msg, "does not exist") != NULL) 00113 jobExists = false; 00114 } 00115 } 00116 00117 /* if the monitored job has terminated, remove it */ 00118 if (!jobExists) { 00119 try { 00120 removeJobToMonitor(job.pid); 00121 } catch (runtime_error &err) { 00122 logger(WARNING, err.what()); 00123 } 00124 return; 00125 } 00126 00127 /* disk usage information */ 00128 needDiskInfo = actJobMonitorParams[JOB_DISK_TOTAL] 00129 || actJobMonitorParams[JOB_DISK_USED] 00130 || actJobMonitorParams[JOB_DISK_FREE] 00131 || actJobMonitorParams[JOB_DISK_USAGE] 00132 || actJobMonitorParams[JOB_WORKDIR_SIZE]; 00133 if (needDiskInfo) { 00134 try { 00135 readJobDiskUsage(job, dirInfo); 00136 currentJobVals[JOB_WORKDIR_SIZE] = dirInfo.workdir_size; 00137 currentJobVals[JOB_DISK_TOTAL] = dirInfo.disk_total; 00138 currentJobVals[JOB_DISK_USED] = dirInfo.disk_used; 00139 currentJobVals[JOB_DISK_USAGE] = dirInfo.disk_usage; 00140 currentJobVals[JOB_DISK_FREE] = dirInfo.disk_free; 00141 } catch (runtime_error& err) { 00142 logger(WARNING, err.what()); 00143 jobRetResults[JOB_WORKDIR_SIZE] = jobRetResults[JOB_DISK_TOTAL] 00144 = jobRetResults[JOB_DISK_USED] 00145 = jobRetResults[JOB_DISK_USAGE] 00146 = jobRetResults[JOB_DISK_FREE] 00147 = RET_ERROR; 00148 } 00149 } 00150 }
| void ApMon::updateSysInfo | ( | ) | [protected] |
Update the system monitoring information with new values obtained from the proc/ filesystem.
Definition at line 207 of file monitor_utils.cpp.
00207 { 00208 int needCPUInfo, needSwapPagesInfo, needLoadInfo, needMemInfo, 00209 needNetInfo, needUptime, needProcessesInfo, needNetstatInfo; 00210 00211 /**** CPU usage information ****/ 00212 needCPUInfo = actSysMonitorParams[SYS_CPU_USAGE] 00213 || actSysMonitorParams[SYS_CPU_USR] 00214 || actSysMonitorParams[SYS_CPU_SYS] 00215 || actSysMonitorParams[SYS_CPU_NICE] 00216 || actSysMonitorParams[SYS_CPU_IDLE]; 00217 if (needCPUInfo) { 00218 try { 00219 ProcUtils::getCPUUsage(*this, currentSysVals[SYS_CPU_USAGE], 00220 currentSysVals[SYS_CPU_USR], 00221 currentSysVals[SYS_CPU_SYS], 00222 currentSysVals[SYS_CPU_NICE], 00223 currentSysVals[SYS_CPU_IDLE], numCPUs); 00224 } catch (procutils_error &perr) { 00225 /* "permanent" error (the parameters could not be obtained) */ 00226 logger(WARNING, perr.what()); 00227 sysRetResults[SYS_CPU_USAGE] = sysRetResults[SYS_CPU_SYS] = 00228 sysRetResults[SYS_CPU_USR] = sysRetResults[SYS_CPU_NICE] = 00229 sysRetResults[SYS_CPU_IDLE] = sysRetResults[SYS_CPU_USAGE] = PROCUTILS_ERROR; 00230 } catch (runtime_error &err) { 00231 /* temporary error (next time we might be able to get the paramerers) */ 00232 logger(WARNING, err.what()); 00233 sysRetResults[SYS_CPU_USAGE] = sysRetResults[SYS_CPU_SYS] 00234 = sysRetResults[SYS_CPU_USR] 00235 = sysRetResults[SYS_CPU_NICE] 00236 = sysRetResults[SYS_CPU_IDLE] 00237 = sysRetResults[SYS_CPU_USAGE] 00238 = RET_ERROR; 00239 } 00240 } 00241 00242 needSwapPagesInfo = actSysMonitorParams[SYS_PAGES_IN] 00243 || actSysMonitorParams[SYS_PAGES_OUT] 00244 || actSysMonitorParams[SYS_SWAP_IN] 00245 || actSysMonitorParams[SYS_SWAP_OUT]; 00246 00247 if (needSwapPagesInfo) { 00248 try { 00249 ProcUtils::getSwapPages(*this, currentSysVals[SYS_PAGES_IN], 00250 currentSysVals[SYS_PAGES_OUT], 00251 currentSysVals[SYS_SWAP_IN], 00252 currentSysVals[SYS_SWAP_OUT]); 00253 } catch (procutils_error &perr) { 00254 /* "permanent" error (the parameters could not be obtained) */ 00255 logger(WARNING, perr.what()); 00256 sysRetResults[SYS_PAGES_IN] = sysRetResults[SYS_PAGES_OUT] = 00257 sysRetResults[SYS_SWAP_OUT] = sysRetResults[SYS_SWAP_IN] = PROCUTILS_ERROR; 00258 } catch (runtime_error &err) { 00259 /* temporary error (next time we might be able to get the paramerers) */ 00260 logger(WARNING, err.what()); 00261 sysRetResults[SYS_PAGES_IN] = sysRetResults[SYS_PAGES_OUT] 00262 = sysRetResults[SYS_SWAP_IN] 00263 = sysRetResults[SYS_SWAP_OUT] 00264 = RET_ERROR; 00265 } 00266 } 00267 00268 needLoadInfo = actSysMonitorParams[SYS_LOAD1] 00269 || actSysMonitorParams[SYS_LOAD5] 00270 || actSysMonitorParams[SYS_LOAD15]; 00271 00272 if (needLoadInfo) { 00273 double dummyVal; 00274 try { 00275 /* the number of processes is now obtained with the getProcesses() 00276 function, not with getLoad() */ 00277 ProcUtils::getLoad(currentSysVals[SYS_LOAD1], currentSysVals[SYS_LOAD5], 00278 currentSysVals[SYS_LOAD15],dummyVal); 00279 } catch (procutils_error& perr) { 00280 /* "permanent" error (the parameters could not be obtained) */ 00281 logger(WARNING, perr.what()); 00282 sysRetResults[SYS_LOAD1] = sysRetResults[SYS_LOAD5] 00283 = sysRetResults[SYS_LOAD15] 00284 = PROCUTILS_ERROR; 00285 } 00286 } 00287 00288 /**** get statistics about the current processes ****/ 00289 needProcessesInfo = actSysMonitorParams[SYS_PROCESSES]; 00290 if (needProcessesInfo) { 00291 try { 00292 ProcUtils::getProcesses(currentSysVals[SYS_PROCESSES], 00293 currentProcessStates); 00294 } catch (runtime_error& err) { 00295 logger(WARNING, err.what()); 00296 sysRetResults[SYS_PROCESSES] = RET_ERROR; 00297 } 00298 } 00299 00300 /**** get the amount of memory currently in use ****/ 00301 needMemInfo = actSysMonitorParams[SYS_MEM_USED] 00302 || actSysMonitorParams[SYS_MEM_FREE] 00303 || actSysMonitorParams[SYS_SWAP_USED] 00304 || actSysMonitorParams[SYS_SWAP_FREE] 00305 || actSysMonitorParams[SYS_MEM_USAGE] 00306 || actSysMonitorParams[SYS_SWAP_USAGE]; 00307 00308 if (needMemInfo) { 00309 try { 00310 ProcUtils::getMemUsed(currentSysVals[SYS_MEM_USED], 00311 currentSysVals[SYS_MEM_FREE], 00312 currentSysVals[SYS_SWAP_USED], 00313 currentSysVals[SYS_SWAP_FREE]); 00314 currentSysVals[SYS_MEM_USAGE] = 100 * currentSysVals[SYS_MEM_USED] / 00315 (currentSysVals[SYS_MEM_USED] + currentSysVals[SYS_MEM_FREE]); 00316 currentSysVals[SYS_SWAP_USAGE] = 100 * currentSysVals[SYS_SWAP_USED] / 00317 (currentSysVals[SYS_SWAP_USED] + currentSysVals[SYS_SWAP_FREE]); 00318 } catch (procutils_error &perr) { 00319 logger(WARNING, perr.what()); 00320 sysRetResults[SYS_MEM_USED] = sysRetResults[SYS_MEM_FREE] = 00321 sysRetResults[SYS_SWAP_USED] = sysRetResults[SYS_SWAP_FREE] = 00322 sysRetResults[SYS_MEM_USAGE] = sysRetResults[SYS_SWAP_USAGE] = 00323 PROCUTILS_ERROR; 00324 } 00325 } 00326 00327 00328 /**** network monitoring information ****/ 00329 needNetInfo = actSysMonitorParams[SYS_NET_IN] || 00330 actSysMonitorParams[SYS_NET_OUT] || actSysMonitorParams[SYS_NET_ERRS]; 00331 if (needNetInfo && this -> nInterfaces > 0) { 00332 try { 00333 ProcUtils::getNetInfo(*this, ¤tNetIn, ¤tNetOut, 00334 ¤tNetErrs); 00335 } catch (procutils_error &perr) { 00336 logger(WARNING, perr.what()); 00337 sysRetResults[SYS_NET_IN] = sysRetResults[SYS_NET_OUT] = 00338 sysRetResults[SYS_NET_ERRS] = PROCUTILS_ERROR; 00339 } catch (runtime_error &err) { 00340 logger(WARNING, err.what()); 00341 sysRetResults[SYS_NET_IN] = sysRetResults[SYS_NET_OUT] = 00342 sysRetResults[SYS_NET_ERRS] = RET_ERROR; 00343 } 00344 } 00345 00346 needNetstatInfo = actSysMonitorParams[SYS_NET_SOCKETS] || 00347 actSysMonitorParams[SYS_NET_TCP_DETAILS]; 00348 if (needNetstatInfo) { 00349 try { 00350 ProcUtils::getNetstatInfo(*this, this -> currentNSockets, 00351 this -> currentSocketsTCP); 00352 } catch (runtime_error &err) { 00353 logger(WARNING, err.what()); 00354 sysRetResults[SYS_NET_SOCKETS] = sysRetResults[SYS_NET_TCP_DETAILS] = 00355 RET_ERROR; 00356 } 00357 } 00358 00359 needUptime = actSysMonitorParams[SYS_UPTIME]; 00360 if (needUptime) { 00361 try { 00362 currentSysVals[SYS_UPTIME] = ProcUtils::getUpTime(); 00363 } catch (procutils_error &perr) { 00364 logger(WARNING, perr.what()); 00365 sysRetResults[SYS_UPTIME] = PROCUTILS_ERROR; 00366 } 00367 } 00368 00369 }
| void* bkTask | ( | void * | param | ) | [friend] |
This function is executed in a background thread and has two roles: it automatically sends the system/job monitoring parameters (if the user requested) and it checks the configuration file/URLs for changes.
(this is done in a separate thread).
Definition at line 918 of file ApMon.cpp.
00918 { 00919 #else 00920 DWORD WINAPI bkTask(void *param) { 00921 #endif 00922 struct stat st; 00923 #ifndef WIN32 00924 struct timespec delay; 00925 #else 00926 DWORD delay; 00927 #endif 00928 bool resourceChanged, haveChange; 00929 int nextOp = -1, i, ret; 00930 int generalInfoCount; 00931 time_t crtTime, timeRemained; 00932 time_t nextRecheck = 0, nextJobInfoSend = 0, nextSysInfoSend = 0; 00933 ApMon *apm = (ApMon *)param; 00934 char logmsg[200]; 00935 00936 logger(INFO, "[Starting background thread...]"); 00937 apm -> bkThreadStarted = true; 00938 00939 crtTime = time(NULL); 00940 00941 pthread_mutex_lock(&(apm -> mutexBack)); 00942 if (apm -> confCheck) { 00943 nextRecheck = crtTime + apm -> crtRecheckInterval; 00944 //sprintf(logmsg, "###1 crt %ld interv %ld recheck %ld ", crtTime, 00945 // apm -> crtRecheckInterval, nextRecheck); 00946 //logger(FINE, logmsg); 00947 //fflush(stdout); 00948 } 00949 if (apm -> jobMonitoring) 00950 nextJobInfoSend = crtTime + apm -> jobMonitorInterval; 00951 if (apm -> sysMonitoring) 00952 nextSysInfoSend = crtTime + apm -> sysMonitorInterval; 00953 pthread_mutex_unlock(&(apm -> mutexBack)); 00954 00955 timeRemained = -1; 00956 generalInfoCount = 0; 00957 00958 while (1) { 00959 pthread_mutex_lock(&apm -> mutexBack); 00960 if (apm -> stopBkThread) { 00961 // printf("### stopBkThread \n"); 00962 pthread_mutex_unlock(&apm -> mutexBack); 00963 break; 00964 } 00965 pthread_mutex_unlock(&apm -> mutexBack); 00966 00967 //sprintf(logmsg, "### 2 recheck %ld sys %ld ", nextRecheck, 00968 // nextSysInfoSend); 00969 //logger(FINE, logmsg); 00970 00971 /* determine the next operation that must be performed */ 00972 if (nextRecheck > 0 && (nextJobInfoSend <= 0 || 00973 nextRecheck <= nextJobInfoSend)) { 00974 if (nextSysInfoSend <= 0 || nextRecheck <= nextSysInfoSend) { 00975 nextOp = RECHECK_CONF; 00976 timeRemained = nextRecheck - crtTime; 00977 } else { 00978 nextOp = SYS_INFO_SEND; 00979 timeRemained = nextSysInfoSend - crtTime; 00980 } 00981 } else { 00982 if (nextJobInfoSend > 0 && (nextSysInfoSend <= 0 || 00983 nextJobInfoSend <= nextSysInfoSend)) { 00984 nextOp = JOB_INFO_SEND; 00985 timeRemained = nextJobInfoSend - crtTime; 00986 } else if (nextSysInfoSend > 0) { 00987 nextOp = SYS_INFO_SEND; 00988 timeRemained = nextSysInfoSend - crtTime; 00989 } 00990 } 00991 00992 if (timeRemained == -1) 00993 timeRemained = RECHECK_INTERVAL; 00994 00995 #ifndef WIN32 00996 /* the moment when the next operation should be performed */ 00997 delay.tv_sec = crtTime + timeRemained; 00998 delay.tv_nsec = 0; 00999 #else 01000 delay = (/*crtTime +*/ timeRemained) * 1000; // this is in millis 01001 #endif 01002 01003 pthread_mutex_lock(&(apm -> mutexBack)); 01004 01005 pthread_mutex_lock(&(apm -> mutexCond)); 01006 /* check for changes in the settings */ 01007 haveChange = false; 01008 if (apm -> jobMonChanged || apm -> sysMonChanged || apm -> recheckChanged) 01009 haveChange = true; 01010 if (apm -> jobMonChanged) { 01011 if (apm -> jobMonitoring) 01012 nextJobInfoSend = crtTime + apm -> jobMonitorInterval; 01013 else 01014 nextJobInfoSend = -1; 01015 apm -> jobMonChanged = false; 01016 } 01017 if (apm -> sysMonChanged) { 01018 if (apm -> sysMonitoring) 01019 nextSysInfoSend = crtTime + apm -> sysMonitorInterval; 01020 else 01021 nextSysInfoSend = -1; 01022 apm -> sysMonChanged = false; 01023 } 01024 if (apm -> recheckChanged) { 01025 if (apm -> confCheck) { 01026 nextRecheck = crtTime + apm -> crtRecheckInterval; 01027 } 01028 else 01029 nextRecheck = -1; 01030 apm -> recheckChanged = false; 01031 } 01032 pthread_mutex_unlock(&(apm -> mutexBack)); 01033 01034 if (haveChange) { 01035 pthread_mutex_unlock(&(apm -> mutexCond)); 01036 continue; 01037 } 01038 01039 /* wait until the next operation should be performed or until 01040 a change in the settings occurs */ 01041 #ifndef WIN32 01042 ret = pthread_cond_timedwait(&(apm -> confChangedCond), 01043 &(apm -> mutexCond), &delay); 01044 pthread_mutex_unlock(&(apm -> mutexCond)); 01045 #else 01046 pthread_mutex_unlock(&(apm -> mutexCond)); 01047 ret = WaitForSingleObject(apm->confChangedCond, delay); 01048 #endif 01049 if (ret == ETIMEDOUT) { 01050 // printf("### ret TIMEDOUT\n"); 01051 /* now perform the operation */ 01052 if (nextOp == JOB_INFO_SEND) { 01053 apm -> sendJobInfo(); 01054 crtTime = time(NULL); 01055 nextJobInfoSend = crtTime + apm -> getJobMonitorInterval(); 01056 } 01057 01058 if (nextOp == SYS_INFO_SEND) { 01059 apm -> sendSysInfo(); 01060 if (apm -> getGenMonitoring()) { 01061 if (generalInfoCount <= 1) 01062 apm -> sendGeneralInfo(); 01063 generalInfoCount = (generalInfoCount + 1) % apm -> genMonitorIntervals; 01064 } 01065 crtTime = time(NULL); 01066 nextSysInfoSend = crtTime + apm -> getSysMonitorInterval(); 01067 } 01068 01069 if (nextOp == RECHECK_CONF) { 01070 //logger(FINE, "### recheck conf"); 01071 resourceChanged = false; 01072 try { 01073 if (apm -> initType == FILE_INIT) { 01074 sprintf(logmsg, "Checking for modifications for file %s ", 01075 apm -> initSources[0]); 01076 logger(INFO, logmsg); 01077 stat(apm -> initSources[0], &st); 01078 if (st.st_mtime > apm -> lastModifFile) { 01079 sprintf(logmsg, "File %s modified ", apm -> initSources[0]); 01080 logger(INFO, logmsg); 01081 resourceChanged = true; 01082 } 01083 } 01084 01085 // check the configuration URLs 01086 for (i = 0; i < apm -> confURLs.nConfURLs; i++) { 01087 sprintf(logmsg, "[Checking for modifications for URL %s ] ", 01088 apm -> confURLs.vURLs[i]); 01089 logger(INFO, logmsg); 01090 if (urlModified(apm -> confURLs.vURLs[i], apm -> confURLs.lastModifURLs[i])) { 01091 sprintf(logmsg, "URL %s modified ", apm -> confURLs.vURLs[i]); 01092 logger(INFO, logmsg); 01093 resourceChanged = true; 01094 break; 01095 } 01096 } 01097 01098 if (resourceChanged) { 01099 logger(INFO, "Reloading configuration..."); 01100 if (apm -> initType == FILE_INIT) 01101 apm -> initialize(apm -> initSources[0], false); 01102 else 01103 apm -> initialize(apm -> nInitSources, apm -> initSources, false); 01104 } 01105 apm -> setCrtRecheckInterval(apm -> getRecheckInterval()); 01106 } catch (runtime_error &err) { 01107 logger(WARNING, err.what()); 01108 logger(WARNING, "Increasing the time interval for reloading the configuration..."); 01109 apm -> setCrtRecheckInterval(apm -> getRecheckInterval() * 5); 01110 } 01111 crtTime = time(NULL); 01112 nextRecheck = crtTime + apm -> getCrtRecheckInterval(); 01113 //sleep(apm -> getCrtRecheckInterval()); 01114 } 01115 } 01116 01117 } // while 01118 01119 #ifndef WIN32 01120 return NULL; // it doesn't matter what we return here 01121 #else 01122 return 0; 01123 #endif 01124 }
int ApMon::actGenMonitorParams[MAX_GEN_PARAMS] [protected] |
int ApMon::actJobMonitorParams[MAX_JOB_PARAMS] [protected] |
int ApMon::actSysMonitorParams[MAX_SYS_PARAMS] [protected] |
char ApMon::allMyIPs[20][20] [protected] |
bool ApMon::autoDisableMonitoring [protected] |
pthread_t ApMon::bkThread [protected] |
bool ApMon::bkThreadStarted [protected] |
char* ApMon::buf [protected] |
char* ApMon::clusterName [protected] |
pthread_cond_t ApMon::confChangedCond [protected] |
bool ApMon::confCheck [protected] |
ConfURLs ApMon::confURLs [protected] |
char ApMon::cpuFamily[100] [protected] |
char ApMon::cpuModel[100] [protected] |
char ApMon::cpuModelName[200] [protected] |
char ApMon::cpuVendor[100] [protected] |
long ApMon::crtDrop [protected] |
long ApMon::crtRecheckInterval [protected] |
long ApMon::crtSent [protected] |
long ApMon::crtTime [protected] |
double ApMon::currentGenVals[MAX_GEN_PARAMS] [protected] |
double ApMon::currentJobVals[MAX_JOB_PARAMS] [protected] |
double * ApMon::currentNetErrs [protected] |
double* ApMon::currentNetIn [protected] |
double * ApMon::currentNetOut [protected] |
double ApMon::currentNSockets[4] [protected] |
double ApMon::currentProcessStates[NLETTERS] [protected] |
double ApMon::currentSocketsTCP[20] [protected] |
double ApMon::currentSysVals[MAX_SYS_PARAMS] [protected] |
char** ApMon::destAddresses [protected] |
char** ApMon::destPasswds [protected] |
int* ApMon::destPorts [protected] |
int ApMon::dgramSize [protected] |
bool ApMon::genMonitoring [protected] |
int ApMon::genMonitorIntervals [protected] |
char* ApMon::genMonitorParams[MAX_GEN_PARAMS] [protected] |
int ApMon::genRetResults[MAX_GEN_PARAMS] [protected] |
char ApMon::groupname[MAX_STRING_LEN] [protected] |
bool ApMon::haveBkThread [protected] |
double ApMon::hWeight [protected] |
char** ApMon::initSources [protected] |
int ApMon::initType [protected] |
int ApMon::instance_id [protected] |
char ApMon::interfaceNames[20][20] [protected] |
bool ApMon::jobMonChanged [protected] |
bool ApMon::jobMonitoring [protected] |
long ApMon::jobMonitorInterval [protected] |
char* ApMon::jobMonitorParams[MAX_JOB_PARAMS] [protected] |
int ApMon::jobRetResults[MAX_JOB_PARAMS] [protected] |
double ApMon::lastBytesReceived[20] [protected] |
double ApMon::lastBytesSent[20] [protected] |
time_t ApMon::lastJobInfoSend [protected] |
long ApMon::lastModifFile [protected] |
double ApMon::lastNetErrs[20] [protected] |
time_t ApMon::lastSysInfoSend [protected] |
double ApMon::lastSysVals[MAX_SYS_PARAMS] [protected] |
int ApMon::maxMsgRate [protected] |
MonitoredJob* ApMon::monJobs [protected] |
pthread_mutex_t ApMon::mutex [protected] |
pthread_mutex_t ApMon::mutexBack [protected] |
pthread_mutex_t ApMon::mutexCond [protected] |
char ApMon::myHostname[MAX_STRING_LEN] [protected] |
char ApMon::myIP[MAX_STRING_LEN] [protected] |
int ApMon::nDestinations [protected] |
int ApMon::nGenMonitorParams [protected] |
int ApMon::nInitSources [protected] |
int ApMon::nInterfaces [protected] |
int ApMon::nJobMonitorParams [protected] |
int ApMon::nMonJobs [protected] |
char* ApMon::nodeName [protected] |
int ApMon::nSysMonitorParams [protected] |
int ApMon::numCPUs [protected] |
int ApMon::numIPs [protected] |
double ApMon::prvDrop [protected] |
double ApMon::prvSent [protected] |
long ApMon::prvTime [protected] |
bool ApMon::recheckChanged [protected] |
long ApMon::recheckInterval [protected] |
int ApMon::seq_nr [protected] |
char* ApMon::socketStatesMapTCP[20] [protected] |
int ApMon::sockfd [protected] |
bool ApMon::stopBkThread [protected] |
bool ApMon::sysInfo_first [protected] |
bool ApMon::sysMonChanged [protected] |
char* ApMon::sysMonCluster [protected] |
bool ApMon::sysMonitoring [protected] |
long ApMon::sysMonitorInterval [protected] |
char* ApMon::sysMonitorParams[MAX_SYS_PARAMS] [protected] |
char* ApMon::sysMonNode [protected] |
int ApMon::sysRetResults[MAX_SYS_PARAMS] [protected] |
char ApMon::username[MAX_STRING_LEN] [protected] |