Nagios sam-query Plugin
From GridPP Wiki
check_lcg_same.pl
This nagios check script uses the lcg-sam-client tool (already installed on user interfaces and worker nodes) to query the SAM test database and get the latest test results.
#!/usr/bin/perl
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# check_lcg_same v0.1
# By c.a.j.brew@rl.ac.uk
#
# Thanks to mp@xmission.com who wrote the excellent
# check_traceroute-pure_perl plugin, which this plugin was modeled after.
#
#############################################################################
use strict;
use Getopt::Long;
my $version = "v0.1";
my $opt_v;
my $opt_n=undef;
my $opt_help;
my $opt_s=undef;
my %ERRORS = ('UNKNOWN' , '-1',
'OK' , '0',
'WARNING', '1',
'CRITICAL', '2');
# Set this to whatever you like, but make sure you don't hang Nagios
# for too long.
my $timeout = "30";
GetOptions
("v" => \$opt_v,
"help" => \$opt_help,
"h" => \$opt_help,
"s=s" => \$opt_s,
"n=s" => \$opt_n
);
unless ( $opt_n ) {
$opt_n = `hostname -f`;
chomp $opt_n;
}
if ($opt_v) {
print "\nThis is check_lcg_same version $version\n";
print "\n";
print "Please report errors to c\.a\.j\.\@rl\.ac\.uk";
print "\n\n";
}
#subs
sub print_help () {
print "\n\nThis is check_lcg_same.pl. It is designed to send an alert\n";
print "to Nagios if a LCG SAME reports a problem with a paticular service\n";
print "on a host.\n\n";
print "Usage:\n";
print "\n";
print "--help Display this help.\n";
print "-v Display the version number of check_lcg_same.\n";
print "-n Host that you wish to check the service on.\n";
print "-s Service on that host that you wish to query.\n";
}
sub usage() {
print "check_lcg_same -n <host> -s <service>\n";
exit(-1);
}
sub do_check() {
$ENV{'HOME'} = "/tmp";
my $command = "/opt/lcg/same/client/bin/same-query";
my $command_args = "servicestatus nodename=$opt_n serviceabbr=$opt_s voname=ops servicestatusvo=OPS";
my $qry_result = `$command $command_args 2>&1`;
print "$command $command_args"." $?\n" if ($opt_v);
print OUT "$qry_result\n" if ($opt_v);
for ($qry_result) {
/^ok/ && do { print "OK: SAM reports $opt_s on $opt_n is ok\n"; return $ERRORS{"OK"}; };
/^na/ && do { print "Warning: SAM reports $opt_s on $opt_n is na\n"; return $ERRORS{"WARNING"}; };
/^down/ && do { print "Critical: SAM reports $opt_s on $opt_n is down\n"; return $ERRORS{"CRITICAL"}; };
print "ERROR: Do not understand response from SAME query\n";
return $ERRORS{"UNKNOWN"};
}
return $ERRORS{"UNKNOWN"};
}
# Must be placed at the end for -Wall to compile cleanly, blech
if ($opt_help) {
print_help();
}
usage() unless ($opt_s);
#timeouts
$SIG{'ALRM'} = sub {
print ("ERROR: No response from SAME SERVER (timeout) in $timeout seconds\n");
exit $ERRORS{"UNKNOWN"};
};
alarm($timeout);
exit do_check();
Chris brew 09:58, 8 Feb 2007 (GMT)