Nov 2008
In part 1 of the check script the generic functions not related directly to the checks themselves was outlined. In this text several checks are added - these are the main checks and not compulsory ones.
# Signals we are interested in dealing with, the right operand is the
# subroutine which handles the given interrupt type
$SIG{'INT' } = 'interrupt';
$SIG{'HUP' } = 'interrupt';
$SIG{'ABRT'} = 'interrupt';
$SIG{'QUIT'} = 'interrupt';
$SIG{'TRAP'} = 'interrupt';
$SIG{'STOP'} = 'interrupt';
# Globals
my $USER1="/usr/local/nagios/libexec"; # Be consistent wrt Nagios
my $CHECK="HEALTH"; # the name of the check; feel free to change
my $OUTFILE = "/var/tmp/healthcheck.tmp"; # an outfile for later use
# Where we store cherry picked results; init these to a space in case they
# are not all collected
my @LOAD_VALUES = " ";
my @SYSTIME_VALUE = " ";
my @ROOTDISK_VALUE = " ";
# Default values for LOAD, ROOTDISK Usage
my $DEF_LOAD_WARN = "4,2,2";
my $DEF_LOAD_CRIT = "5,4,3";
my $DEF_DISK_WARN = 95;
my $DEF_DISK_CRIT = 98;
my $DEF_SNMP_COMMUNITY = "public_readonly";
my $STATUS = 0; # A status var to be returned to nagios
# Flags
$DNS = 1; # do check that this host has a DNS entry
$PING = 0; # don't preping by default since nagios does, switch to 1 if
# you want to preping before bothering with the rest
# Brain dead interrupt handler
sub interrupt { # usage: interrupt \'sig\'
my($sig) = @_;
die $sig;
die;
}
# Generic sub: Load a file into an array and send the array back
sub load_file {
my ($file) = shift;
my @flist;
open(FILE, $file) or die "Unable to open logfile $file: $!\n";
@flist = <FILE>
close FILE;
return(@flist);
}
# Handle results status and print a final message with values of collated data
sub check_exit { # usage: check_exit("message string",RETVAL)
my ($msg,$ret) = @_;
# determine our status and exit appropriately
if ($ret >= 3) {
print "$CHECK UNKNOWN: $msg ";
} elsif ($ret == 2) {
print "$CHECK CRIT: $msg ";
} elsif ($ret == 1) {
print "$CHECK WARN: $msg ";
} elsif ($ret == 0) {
print "$CHECK OK: $msg ";
} else{
print "$CHECK UNKNOWN STATE: $msg ";
}
# print what we collected - note if one fails we do not collect the rest
chomp (@SYSTIME_VALUE);
chomp (@LOAD_VALUES);
print("@SYSTIME_VALUE, System Load @LOAD_VALUES, Rootdisk @ROOTDISK_VALUE");
unlink($OUTFILE); # delete the temp file for good
exit ($ret); # exit appropriately so nagios knows what to do
}
# Check the outfile in some cases for a SNMP warn or critical
# send back the appropriate signal for nagios
sub check_outfile { # usage: check_outfile
my @critical = `grep CRITICAL $OUTFILE`;
if (@critical) {
return 2;
}
my @warn = `grep WARN $OUTFILE`;
if (@warn) {
return 1;
}
return 0;
}
# ye olde usage message
sub usage {
print "Usage: $0 [-u[-H ||[ -lw -lc -dw -dc ]]\n";
print "Usage: $0 [--nodns][--noping][--snmp \"community [user] [pass]\"\n";
print "Options:\n";
print " -H Check system called (required)\n";
print " -lw Set load warning values\n";
print " Default: $DEF_LOAD_WARN\n";
print " -lc Set load critical values\n";
print " Default: $DEF_LOAD_CRIT\n";
print " -dw Set rootdisk warning percent\n";
print " Default: $DEF_DISK_WARN\n";
print " -dc Set rootdisk critical percent\n";
print " Default: $DEF_DISK_CRIT\n";
print " --nodns Do not check for DNS resolution\n";
print " --noping Do not preping to make sure the host is up\n";
print " Note: this will improve performance\n";
print " --snmp Set SNMP community name\n";
print " Default: $DEF_SNMP_COMMUNITY\n";
print " -u Print usage message and exit\n";
}
Using the usage message as a roadmap the first check is
the load check. For this the script simply calls the existing
check_snmp nagios check, note the snmp community is an argument:
# Check load
sub load { # usage: load($host_or_ip,warn,critical,community)
my ($host,$warn,$crit,$comm) = @_;
system("$USER1/check_snmp -H $host -C $comm -o \
.1.3.6.1.4.1.2021.10.1.3.1,.1.3.6.1.4.1.2021.10.1.3.2,\
.1.3.6.1.4.1.2021.10.1.3.3 -w $warn -c $crit \
-l \"Load 1min/5min/10min\" > $OUTFILE");
my $r = check_outfile();
@LOAD_VALUES = `cat $OUTFILE|\
awk '{ print \$3 \" \" \$5 \" \" \$6 \" \" \$7}'`;
if ($r > 0) {
if ($STATUS < $r) {
$STATUS = $r;
}
}
}
The function gets the values, stores and finally checks their status.
Next in order is a simple one, using snmp again check the root filesystem:
# Check rootdisk
sub rootdisk { # usage: rootdisk(host_or_ip,warn,crit,community)
my ($host,$warn,$crit,$comm) = @_;
system("$USER1/check_snmp -H $host -C $comm \
-o 1.3.6.1.4.1.2021.9.1.9.1,.1.3.6.1.4.1.2021.9.1.7.1,\
.1.3.6.1.4.1.2021.9.1.8.1,.1.3.6.1.4.1.2021.9.1.3.1,\
.1.3.6.1.4.1.2021.9.1.2.1 -w $warn -c $crit > $OUTFILE");
my $r = check_outfile();
@ROOTDISK_VALUE = `cat $OUTFILE|\
awk '{print \$4 \" \" \$5 \" \" \$6}'`;
if ($r > 0) {
if ($STATUS < $r) {
$STATUS = $r;
}
}
}
There should be a pattern emerging here (no pun intended)- the status checks are very similar.
Now that some checks are in place the final steps to finishing the meta check script are:
main() subroutine to parse options
and actually call the appropiate checks.