00001
00008 #ifdef _WIN32
00009
00010
00011
00012
00013 #pragma warning ( disable : 4290 )
00014 #endif
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048 #ifndef ApMon_h
00049 #define ApMon_h
00050
00051 #include <sys/types.h>
00052 #include <sys/stat.h>
00053 #include <errno.h>
00054 #include <stdio.h>
00055 #include <stdlib.h>
00056 #include <string.h>
00057 #include <stdexcept>
00058 #include <ctype.h>
00059 #include <time.h>
00060
00061 #ifdef WIN32
00062 #define NOMSG
00063 #define NOGDI
00064 #include <Winsock2.h>
00065 #include <string.h>
00066 #include <process.h>
00067 #include "xdr.h"
00068 #undef NOMSG
00069 #undef NOGDI
00070 #else
00071 #include <sys/socket.h>
00072 #include <sys/ioctl.h>
00073 #include <sys/utsname.h>
00074 #include <sys/time.h>
00075 #include <arpa/inet.h>
00076 #include <netinet/in.h>
00077 #include <net/if.h>
00078 #include <rpc/rpc.h>
00079 #include <netdb.h>
00080 #include <unistd.h>
00081 #include <pthread.h>
00082 #include <pwd.h>
00083 #include <grp.h>
00084
00085 #ifdef __APPLE__
00086 #include <sys/param.h>
00087 #else
00088 #include <linux/param.h>
00089 #endif
00090
00091 #endif // ~WIN32
00092
00093 using namespace std;
00094
00095 #define XDR_STRING 0
00096 //#define XDR_INT16 1 // NOT SUPPORTED YET!
00097 #define XDR_INT32 2
00098 //#define XDR_INT64 3 // THE SAME!
00099 #define XDR_REAL32 4
00100 #define XDR_REAL64 5
00102 #define MAX_DGRAM_SIZE 8192
00103 #define MAX_STRING_LEN 512
00104 #define MAX_STRING_LEN1 (MAX_STRING_LEN + 1)
00105 #define RET_SUCCESS 0
00106 #define RET_ERROR -1
00107 #define PROCUTILS_ERROR -2
00108 #define RET_NOT_SENT -3
00111 #define MAX_N_DESTINATIONS 30
00114 #define DEFAULT_PORT 8884
00115 #define MAX_HEADER_LENGTH 40
00118 #define FILE_INIT 1
00119
00120 #ifdef __APPLE__
00121 #define OLIST_INIT 2
00122 #else
00123 #define LIST_INIT 2
00124 #endif
00125
00126 #define DIRECT_INIT 3
00127
00128 #define JOB_MONITOR_INTERVAL 20
00129
00130 #define SYS_MONITOR_INTERVAL 20
00131
00133 #define RECHECK_INTERVAL 600
00134
00138 #define GEN_MONITOR_INTERVALS 10
00139
00140 #define MAX_MONITORED_JOBS 30
00141
00142 #define MAX_SYS_PARAMS 30
00143
00144 #define MAX_GEN_PARAMS 30
00145
00146 #define MAX_JOB_PARAMS 30
00147
00149 #define MAX_MSG_RATE 20
00150
00151 #define NLETTERS 26
00152
00153 #define TWO_BILLION 2000000000
00154
00155 #define APMON_VERSION "2.2.1"
00156
00160 typedef struct ConfURLs {
00162 int nConfURLs;
00164 char *vURLs[MAX_N_DESTINATIONS];
00167 char *lastModifURLs[MAX_N_DESTINATIONS];
00168 } ConfURLs;
00169
00173 typedef struct MonitoredJob {
00174 long pid;
00175
00176 char workdir[MAX_STRING_LEN];
00177
00178 char clusterName[50];
00179
00180 char nodeName[50];
00181 } MonitoredJob;
00182
00183 #ifdef WIN32
00184 #define pthread_mutex_lock(mutex_ref) (WaitForSingleObject(*mutex_ref, INFINITE))
00185 #define pthread_mutex_unlock(mutex_ref) (ReleaseMutex(*mutex_ref))
00186 #define pthread_mutex_destroy(mutex_ref) (CloseHandle(*mutex_ref))
00187 #define pthread_cond_signal(event_ref) (SetEvent(*event_ref))
00188 #define pthread_cond_destroy(event_ref) (CloseHandle(*event_ref))
00189 #define ETIMEDOUT WAIT_TIMEOUT
00190 #endif
00191
00212 class ApMon {
00213 protected:
00214 char *clusterName;
00215 char *nodeName;
00218 char *sysMonCluster;
00220 char *sysMonNode;
00221
00222 int nDestinations;
00223 char **destAddresses;
00224 int *destPorts;
00225 char **destPasswds;
00227 char *buf;
00228 int dgramSize;
00229 #ifndef WIN32
00230 int sockfd;
00231 #else
00232 SOCKET sockfd;
00233 #endif
00234
00237 bool confCheck;
00238
00240 int nInitSources;
00242 char **initSources;
00243
00244 int initType;
00245
00249 long recheckInterval;
00250
00255 long crtRecheckInterval;
00256
00257 #ifndef WIN32
00258
00260 pthread_t bkThread;
00261
00263 pthread_mutex_t mutex;
00264
00266 pthread_mutex_t mutexBack;
00267
00269 pthread_mutex_t mutexCond;
00270
00272 pthread_cond_t confChangedCond;
00273 #else
00274 public:
00275 HANDLE bkThread;
00276 HANDLE mutex;
00277 HANDLE mutexBack;
00278 HANDLE mutexCond;
00279 HANDLE confChangedCond;
00280 protected:
00281 #endif
00282
00283 bool recheckChanged, jobMonChanged,sysMonChanged;
00284
00287 bool haveBkThread;
00288
00290 bool bkThreadStarted;
00291
00293 bool stopBkThread;
00294
00299 bool autoDisableMonitoring;
00300
00304 bool sysMonitoring;
00305
00309 bool jobMonitoring;
00310
00314 bool genMonitoring;
00315
00319 long jobMonitorInterval, sysMonitorInterval;
00320
00324 int genMonitorIntervals;
00325
00329 int nSysMonitorParams, nJobMonitorParams, nGenMonitorParams;
00330
00331
00332
00333
00334 char *sysMonitorParams[MAX_SYS_PARAMS];
00335 char *genMonitorParams[MAX_GEN_PARAMS];
00336 char *jobMonitorParams[MAX_JOB_PARAMS];
00337
00338
00339
00340
00341 int actSysMonitorParams[MAX_SYS_PARAMS];
00342 int actGenMonitorParams[MAX_GEN_PARAMS];
00343 int actJobMonitorParams[MAX_JOB_PARAMS];
00344
00345 ConfURLs confURLs;
00346
00348 int nMonJobs;
00349
00351 MonitoredJob *monJobs;
00352
00354 long lastModifFile;
00355
00356
00357
00358
00359 time_t lastJobInfoSend;
00360
00362 char username[MAX_STRING_LEN];
00364 char groupname[MAX_STRING_LEN];
00366 char myHostname[MAX_STRING_LEN];
00368 char myIP[MAX_STRING_LEN];
00370 int numIPs;
00372 char allMyIPs[20][20];
00374 int numCPUs;
00375
00376 bool sysInfo_first;
00378 time_t lastSysInfoSend;
00379
00380 double lastSysVals[MAX_SYS_PARAMS];
00381
00382 double currentSysVals[MAX_SYS_PARAMS];
00383
00384
00385 int sysRetResults[MAX_SYS_PARAMS];
00386
00387
00388 double currentJobVals[MAX_JOB_PARAMS];
00389
00390
00391 int jobRetResults[MAX_JOB_PARAMS];
00392
00393
00394 double currentGenVals[MAX_GEN_PARAMS];
00395
00396
00397 int genRetResults[MAX_GEN_PARAMS];
00398
00399
00400
00401
00402 double currentProcessStates[NLETTERS];
00403
00404
00405 char cpuVendor[100];
00406 char cpuFamily[100];
00407 char cpuModel[100];
00408 char cpuModelName[200];
00409
00411 char interfaceNames[20][20];
00413 int nInterfaces;
00416 double lastBytesSent[20];
00417 double lastBytesReceived[20];
00420 double lastNetErrs[20];
00422 double *currentNetIn, *currentNetOut, *currentNetErrs;
00423
00425 double currentNSockets[4];
00428 double currentSocketsTCP[20];
00431 char *socketStatesMapTCP[20];
00432
00433
00434 int maxMsgRate;
00435 long prvTime;
00436 double prvSent;
00437 double prvDrop;
00438 long crtTime;
00439 long crtSent;
00440 long crtDrop;
00441 double hWeight;
00442
00444 int instance_id;
00448 int seq_nr;
00449
00450 public:
00457 ApMon(char *initsource) throw(runtime_error);
00458
00459
00465 ApMon(int nDestinations, char **destinationsList) throw(runtime_error);
00466
00478 ApMon(int nDestinations, char **destAddresses, int *destPorts, char **destPasswds) throw(runtime_error);
00479
00483 ~ApMon();
00484
00500 int sendParameter(char *clusterName, char *nodeName,
00501 char *paramName, int valueType, char *paramValue)
00502 throw(runtime_error);
00503
00521 int sendTimedParameter(char *clusterName, char *nodeName,
00522 char *paramName, int valueType, char *paramValue, int timestamp)
00523 throw(runtime_error);
00524
00537 int sendParameter(char *clusterName, char *nodeName,
00538 char *paramName, int paramValue)
00539 throw(runtime_error);
00540
00553 int sendParameter(char *clusterName, char *nodeName,
00554 char *paramName, float paramValue)
00555 throw(runtime_error);
00556
00569 int sendParameter(char *clusterName, char *nodeName,
00570 char *paramName, double paramValue)
00571 throw(runtime_error);
00572
00585 int sendParameter(char *clusterName, char *nodeName,
00586 char *paramName, char *paramValue)
00587 throw(runtime_error);
00588
00589
00602 int sendParameters(char *clusterName, char *nodeName,
00603 int nParams, char **paramNames, int *valueTypes,
00604 char **paramValues) throw(runtime_error);
00605
00622 int sendTimedParameters(char *clusterName, char *nodeName,
00623 int nParams, char **paramNames, int *valueTypes,
00624 char **paramValues, int timestamp) throw(runtime_error);
00625
00631 bool getConfCheck() { return confCheck; }
00632
00639 long getRecheckInterval() { return recheckInterval; }
00640
00641
00649 void setRecheckInterval(long val);
00650
00657 void setConfRecheck(bool confRecheck, long interval);
00658
00663 void setConfRecheck(bool confRecheck) {
00664 setConfRecheck(confRecheck, RECHECK_INTERVAL);
00665 }
00666
00673 void setJobMonitoring(bool jobMonitoring, long interval);
00674
00678 void setJobMonitoring(bool jobMonitoring) {
00679 setJobMonitoring(jobMonitoring, JOB_MONITOR_INTERVAL);
00680 }
00681
00685 long getJobMonitorInterval() {
00686 long i = -1;
00687 pthread_mutex_lock(&mutexBack);
00688 if (jobMonitoring)
00689 i = jobMonitorInterval;
00690 pthread_mutex_unlock(&mutexBack);
00691 return i;
00692 }
00693
00695 bool getJobMonitoring() {
00696 bool b;
00697 pthread_mutex_lock(&mutexBack);
00698 b = jobMonitoring;
00699 pthread_mutex_unlock(&mutexBack);
00700 return b;
00701 }
00702
00709 void setSysMonitoring(bool sysMonitoring, long interval);
00710
00714 void setSysMonitoring(bool sysMonitoring) {
00715 setSysMonitoring(sysMonitoring, SYS_MONITOR_INTERVAL);
00716 }
00717
00721 long getSysMonitorInterval() {
00722 long i = -1;
00723 pthread_mutex_lock(&mutexBack);
00724 if (sysMonitoring)
00725 i = sysMonitorInterval;
00726 pthread_mutex_unlock(&mutexBack);
00727 return i;
00728 }
00729
00731 bool getSysMonitoring() {
00732 bool b;
00733 pthread_mutex_lock(&mutexBack);
00734 b = sysMonitoring;
00735 pthread_mutex_unlock(&mutexBack);
00736 return b;
00737 }
00738
00746 void setGenMonitoring(bool genMonitoring, int nIntervals);
00747
00752 void setGenMonitoring(bool genMonitoring) {
00753 setGenMonitoring(genMonitoring, GEN_MONITOR_INTERVALS);
00754 }
00755
00759 bool getGenMonitoring() {
00760 bool b;
00761 pthread_mutex_lock(&mutexBack);
00762 b = genMonitoring;
00763 pthread_mutex_unlock(&mutexBack);
00764 return b;
00765 }
00766
00777 void addJobToMonitor(long pid, char *workdir, char *clusterName,
00778 char *nodeName) throw(runtime_error);
00779
00784 void removeJobToMonitor(long pid) throw(runtime_error);
00785
00788 void setSysMonClusterNode(char *clusterName, char *nodeName);
00789
00793 static void setLogLevel(char *newLevel_s);
00794
00799 void setMaxMsgRate(int maxRate);
00800
00805 static void errExit(char *msg);
00806
00807
00808 protected:
00809
00819 void initialize(char *filename, bool firstTime) throw(runtime_error);
00820
00822 void constructFromList(int nDestinations, char **destinationsList)
00823 throw(runtime_error);
00824
00833 void initialize(int nDestinations, char **destList, bool firstTime) throw(runtime_error);
00834
00835
00847 void loadFile(char *filename, int *nDestinations, char **destAddresses,
00848 int *destPorts, char **destPasswds) throw(runtime_error);
00849
00850
00862 void arrayInit(int nDestinations, char **destAddresses, int *destPorts,
00863 char **destPasswds)
00864 throw(runtime_error);
00865
00879 void arrayInit(int nDestinations, char **destAddresses, int *destPorts,
00880 char **destPasswds, bool firstTime)
00881 throw(runtime_error);
00882
00893 void addToDestinations(char *line, int *nDestinations,
00894 char *destAddresses[], int destPorts[], char *destPasswds[]);
00895
00900 void getDestFromWeb(char *url, int *nDestinations, char *destAddresses[],
00901 int destPorts[], char *destPasswds[],
00902 ConfURLs& confURLs) throw(runtime_error);
00903
00904
00909 void encodeParams(int nParams, char **paramNames, int *valueTypes,
00910 char **paramValues, int timestamp) throw(runtime_error);
00911
00915 void initMonitoring();
00916
00920 void sendJobInfo();
00921
00925 void sendOneJobInfo(MonitoredJob job);
00926
00928 void updateJobInfo(MonitoredJob job);
00929
00932 void sendSysInfo();
00933
00936 void updateSysInfo();
00937
00940 void sendGeneralInfo();
00941
00943 void updateGeneralInfo();
00944
00950 void setBackgroundThread(bool val);
00951
00956 long getCrtRecheckInterval() {
00957 return crtRecheckInterval;
00958 }
00959
00960 void setCrtRecheckInterval(long val);
00961
00962
00966 void freeConf();
00967
00974 #ifndef WIN32
00975 friend void *bkTask(void *param);
00976 #else
00977 friend DWORD WINAPI bkTask(void *param);
00978 #endif
00979
00983 void parseXApMonLine(char *line);
00984
00986 void initSocket() throw(runtime_error);
00987
00991 void parseConf(FILE *fp, int *nDestinations, char **destAddresses,
00992 int *destPorts, char **destPasswds)
00993 throw(runtime_error);
00994
01000 bool shouldSend();
01001
01002 friend class ProcUtils;
01003 };
01004
01010 #ifndef WIN32
01011 void *bkTask(void *param);
01012 #else
01013 DWORD WINAPI bkTask(void *param);
01014 #endif
01015 #endif
01016
01017
01018
01019
01020
01021
01022
01023