#!/usr/local/bin/perl ## $Id: XrdOlbMonPerf,v 1.8 2004/09/10 12:01:20 elmer Exp $ ## this script collects the informations on the server activity like: ## CPU, network I/O, memory usage ... ## modified to get rid of rperf. ## J-Y Nief - CCIN2P3 - 24/05/04 ## modified to accept additional options and drop multi-home uf's. # Andy Hanushevsky - SLAC - 15/07/04 ## modified to get right network I/O result, paging I/O and changes in paths to unix commands etc. ## Pavel Jakl - RCF/BNL - 20/08/05 # # Usage: XrdOlbMonPerf [-I netintf] [-n netspeed] [-m ] [] # WARNING: if you are running this script on a Linux RH7.x server, # the network metric is disabled as the netstat output is screwed, # this info is useless. no problem for RH9 and RHEL3. # get the name of the server + platform undef($shmid); $shmkey = ''; $rhost = `/bin/uname -n`; chomp($rhost); $enBps = 131072000; $systype= $^O; #`/bin/uname`; chomp($systype); $repval = 0; $netif = ''; $netmax = 0; # bits / s $monint = 60; $| = 1; # unbuffer STDOUT $DEBUG = 0; # switch to debug script # Get options # while(($argval = shift) && (substr($argval,0,1) eq '-')) { if ($argval eq '-I') {Log("Network interface not specified.") if !($netif=shift);} elsif ($argval eq '-n') {Log("Network speed not specified.") if !($netmax=shift);} elsif ($argval eq '-m') {Log("Shared segment not specified.") if !($shmkey=shift); $shmid = shmget($shmkey, 4096, 0744) if (shmkey ne '.'); Log("Unable to attach shared memory; $!") if !defined($shmid); } else {Log("Unknown option, $argval, ignored", 1);} } # Get the interval # if ($argval) {$monint = $argval; Log("Invalid interval, '$monint'") if !($monint =~ /^\d+$/); } # get the number of CPUs on this server if ( $systype eq 'linux' ) { $numCPU = `/bin/grep -c cpu /proc/stat` - 1; } if ( $systype eq 'solaris' ) { $resp = `/bin/uname -X | /bin/grep NumCPU`; chomp($resp); @answer = split('= ', $resp); $numCPU = $answer[1]; } # get the total memory of the server if ($systype eq 'linux') { $resp = `/bin/grep MemTotal /proc/meminfo`; @answer = split('\s+', $resp); $TotMem = $answer[1]*1024; } if ($systype eq 'solaris' && -x "/etc/prtconf") { $resp = `/etc/prtconf | /bin/grep Memory`; ($TotMem) = $resp =~ /.+:\s*(\d+)/s; $TotMem *= 1024*1024; } # if it is a Linux, retrieve the distrib version. # necessary for netstat which does not give the same output on RH 7.x, RH 9, # RHEL... # netstat usable on RH 9 and RHEL 3 ==> distrib = "RH_OK", else "RH_WRONG" if ($systype eq 'linux') { $distrib = "RH_OK"; $resp = `/bin/cat /etc/redhat-release`; if ( $resp =~ /release 7./ ) { $distrib = "RH_WRONG"; } } # determine the column where the idle time and the free mem # can be found in the 'vmstat' output. # depends on the platform and on the 'vmstat' version, sigh... $ind_idle = 0; $ind_free = 0; @output = `/usr/bin/vmstat`; chomp($output[1]); @answer = split('\s+', $output[1]); foreach $type(@answer) { last if ( $type eq 'id' ); $ind_idle++; } foreach $type(@answer) { last if ( $type eq 'free' ); $ind_free++; } # in the netstat output, the number of input and output packets is not in the # same columns whether it is a Linux or SunOS platform if ( $systype eq 'solaris' ) { $ind_net1 = 5; $ind_net2 = 7; } else { $ind_net1 = 3; $ind_net2 = 7; } $indx_net = 0; # check the number of network interface on the server and their # capability (ethernet or gigabit) $mtu = 1500; if (!$netmax) {@com = `/bin/netstat -i`; foreach $line(@com) { if ( $line !~ /Kernel Interface/ && $line !~ /MTU/ ) { ( $line =~ /eth/ ) && ( $netmax = 1e8 ); ( $line =~ /ge/ || $line =~ /ce/ ) && ( $netmax = 1e9 ); } } if (!$netmax) {$netmax = 1e8;} } # Get it in a sleep $monint time loop netstat result and count network I/O $ipio_old = 0; while(true) { @resp=`/bin/netstat -i`; # what's the CPU utilization ? @respCPU = `/usr/bin/vmstat 1 2`; chomp $respCPU[3]; @resp_vmstat = split('\s+',$respCPU[3]); if ( ($resp_vmstat[$ind_idle] > 100) || ($resp_vmstat[$ind_idle] < 0) ) { $cpu = 0; } else { $cpu = $resp_vmstat[$ind_idle-2] + $resp_vmstat[$ind_idle-1]; } # what's the runq load ? chomp($respLoad = `/usr/bin/uptime`); @respSplit = split(',',$respLoad); $load = int($respSplit[4]*25/$numCPU); # Assumes some magic upper number i.e. 1 CPU load 4 = 100% $load = 100 if $load > 100; # what's the network I/O activity? foreach $resp (@resp){ if ( ( $resp !~ /lo/ && $resp !~ /Iface/ && $systype eq 'linux' && $distrib ne 'RH_WRONG' )|| ( $systype eq 'solaris' ) ) { @respSplit = split('\s+',$resp); print "DEBUG ".join("|",@respSplit)."\n" if ($DEBUG); if ( $systype eq 'solaris' ) { $net_activity = ($respSplit[$ind_net1] + $respSplit[$ind_net2]) * $mtu * 8 / $monint; $ipio = int ( $net_activity / $netmax * 100 ); }else { $measure[1] = $net[$indx_net] = $respSplit[$ind_net1] + $respSplit[$ind_net2]; print "DEBUG measure is $measure[1] $netmax\n" if ($DEBUG); if ( $indx_net ) { $net_activity = ($measure[1] - $measure[0]) * $mtu * 8 / $monint; $ipio = int ( $net_activity / $netmax * 100 ); $measure[0] = $measure[1]; } $indx_net = $indx_net ^ 1; } $ipio_old = $ipio; } else { $ipio = $ipio_old; } } $ipio = 100 if $ipio > 100; $ipio_old = $ipio; # what's the memory load ? $used = $TotMem - $resp_vmstat[$ind_free]*1024; $mem = int($used/$TotMem*100); $mem = 100 if $mem > 100; # $repval++; # what's the paging I/O activity ? # useless as this metric is highly correlated with some of the others above. # being kept for backward compatibility with the load balancer. if ( -e "/proc/meminfo"){ @items = split(" ",`/bin/cat /proc/meminfo | /bin/grep Swap:`); $pgio = int($items[2]/$items[1]*100); } else { $pgio = 0; } if (!defined($shmid)) { if ($DEBUG){ print "Load:$load CPU:$cpu MEM:$mem PGIO:$pgio IPIO:$ipio\n";} else { print "$load $cpu $mem $pgio $ipio\n";} } else { $resp = pack('LLLLL',$load,$cpu,$mem,$pgio,$ipio); Log("Unable to write into shared memory; $!") if !shmwrite($shmid, $resp, 0, length($resp)); } #Sleep some time sleep $monint; } sub Log { my($msg, $ret) = @_; print STDERR "XrdOlbMonPerf: $msg\n"; system('/bin/logger', '-p', 'daemon.notice', "XrdOlbMonPerf: $msg"); return 0 if $ret; exit(1); }