#!/usr/bin/perl
#
# bottleneck - print saturation averages for CPU, memory, disk and network.
#	       May quickly highlight a system bottleneck. Solaris 8+.
#
# This Perl program uses the Sun::Solaris::Kstat library to fetch values.
#
# 22-Sep-2005, ver 0.88  (check for new versions, http://www.brendangregg.com)
#
#
# USAGE: bottleneck [-h] | [interval [count]]
#        bottleneck                 # print a 1 second sample
#        bottleneck -h              # print help
#        bottleneck 5               # print continually, every 5 seconds
#        bottleneck 1 5             # print 5 times, every 1 second
#
# This program prints the saturation values from four other programs on one
# line: loadcpu, loadmem, loaddisk and loadnet. These other programs
# contain the documentation on how these saturation or "load" values are 
# calculated and what they represent. A summary is,
#
#       CPU            # threads on the run queue
#       Memory         # scan rate of the page scanner
#       Disk           # operations on the wait queue
#       Network        # errors due to buffer saturation
#
# A load of 1.00 indicates moderate saturation of the resource (usually bad),
# a load of 4.00 would indicate heavy saturation or demand for the resource.
# A load of 0.00 does not indicate idle or unused - rather not saturated.
# See other Solaris commands for levels of usage or utilisation, or for 
# further details of saturation.
#
# The first line is the summary since boot.
#
# NOTE: For unusual disks or network cards, check their instance names are
# in this code (a few lines beneath this block comment).
#
#
# SEE ALSO: sysperfstat, loadcpu, loadmem, loaddisk, loadnet
#           http://www.brendangregg.com/k9toolkit.html
#
# COPYRIGHT: Copyright (c) 2004 Brendan Gregg.
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation; either version 2
#  of the License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software Foundation,
#  Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
#  (http://www.gnu.org/copyleft/gpl.html)
#
# Author: Brendan Gregg  [Sydney, Australia]
#
# 23-Mar-2004	Brendan Gregg	Created this.
# 19-Mar-2005	   "      "	Added summary since boot line.

use Sun::Solaris::Kstat;
my $Kstat = Sun::Solaris::Kstat->new();

#
#  Disk instance names
#
@Disk = qw(cmdk dad sd ssd);

#
#  Network card instance names
#
@Network = qw(be bge ce ci dmfe e1000g el eri elxl fa ge hme ipdptp iprb 
              lane le nf ppp qe qfe rtls sppp vge);


#
#  --- Process command line args ---
#
if ($ARGV[0] eq "-h" || $ARGV[0] eq "--help" || $ARGV[0] eq "0") { &usage(); }
$sleep = $ARGV[0];
$loop = $ARGV[1];
if ($sleep eq "") {
	$sleep = 1; $loop = 0; 
} elsif ($loop eq "") {
	$loop = 2**32;
}
$PAGESIZE = 20;				# max lines per header
$lines = $PAGESIZE;			# counter for lines printed
$| = 1;
$Disk{$_} = 1 foreach (@Disk);
$Network{$_} = 1 foreach (@Network);


#
#  --- Main ---
#

while (1) {
	if ($lines++ >= $PAGESIZE) {
		$lines = 0;
		printf("%8s %6s %6s %6s %6s\n","Time","CPU","Mem","Disk","Net");
	}

	#
	#  Store old values
	#
	$oldupdate1 = $update1;
	$oldupdate2 = $update2;
	$oldupdate3 = $update3;
	$oldupdate4 = $update4;
	$oldrunque = $runque;
	$oldscan = $scan;
	$oldwait = $wait;
	$olderror = $error;

	#
	#  Get new values
	#
	$Kstat->update();
	($runque,$update1) = fetch_cpu();
	($scan,$update2) = fetch_mem();
	($wait,$update3) = fetch_disk();
	($error,$update4) = fetch_net();

	#
	#  Calculate load averages
	#
	$cpu = ratio($runque,$oldrunque,$update1,$oldupdate1);
	$mem = ratio($scan,$oldscan,$update2,$oldupdate2);
	$disk = ratio($wait,$oldwait,$update3,$oldupdate3);
	$net = ratio($error,$olderror,$update4,$oldupdate4);

	#
	#  Print load averages
	#
	@Time = localtime();
	printf("%02d:%02d:%02d %6s %6s %6s %6s\n",$Time[2],
	 $Time[1],$Time[0],$cpu,$mem,$disk,$net);

	### Check for end
	last if $count++ == $loop;

	### Interval
	sleep ($sleep);
}


#
#  --- Subroutines ---
#


# fetch_cpu - fetch current values for runque and updates.
#
sub fetch_cpu {
	return ($Kstat->{unix}->{0}->{sysinfo}->{runque},
	 $Kstat->{unix}->{0}->{sysinfo}->{updates});	
}


# fetch_mem - fetch KStat values for the scanrate. The values used are
# 	scan and snaptime.
#
sub fetch_mem {
	my ($scan,$time,$module,$instance,$name);
	my (%Modules,%Instances,%Names);
	$scan = 0;

	$Modules = $Kstat->{cpu_stat};
	foreach $instance (keys(%$Modules)) {

	   $Instances = $Modules->{$instance};
	   foreach $name (keys(%$Instances)) {
		
		$Names = $Instances->{$name};
		if (defined $$Names{scan}) {
			$scan += $$Names{scan};
			# use the last wlastupdate value found,
			$time = $$Names{snaptime};
		}
	   }
	}

	#
	#  Divide scanrate by slowscan. This gives more sensible load averages,
	#  eg a consistant load of 1.00 indicates consistantly at slowscan.
	#  slowscan is usually 100.
	#
	$scan = $scan / $Kstat->{unix}->{0}->{system_pages}->{slowscan};

	return ($scan,$time);
}


# fetch_disk - fetch KStat values for the disks. The values used are wlentime
#	and wlastupdate.
#
sub fetch_disk {
	my ($wait,$time,$module,$instance,$name);
	my (%Modules,%Instances,%Names);
	$wait = 0;

	foreach $module (keys(%$Kstat)) {

	   ### Check that this is a disk structure,
	   next unless $Disk{$module};

	   $Modules = $Kstat->{$module};
	   foreach $instance (keys(%$Modules)) {
		$Instances = $Modules->{$instance};
		foreach $name (keys(%$Instances)) {
		
		   ### Check that this isn't a slice
		   next if $name =~ /,/;

		   $Names = $Instances->{$name};
		   if (defined $$Names{wlentime}) {
			$wait += $$Names{wlentime};
			# use the last wlastupdate value found,
			$time = $$Names{wlastupdate};
		   }
		}
	   }
	}
	return ($wait,$time);
}


# fetch_net - fetch KStat values for the network interfaces. The values used 
#	are defer, nocanput, norcvbuf and noxmtbuf. 
#
sub fetch_net {
	my ($error,$time,$module,$instance,$name);
	my (%Modules,%Instances,%Names);
	$error = 0;

	foreach $module (keys(%$Kstat)) {

	   ### Check that this is a disk structure,
	   next unless $Network{$module};

	   $Modules = $Kstat->{$module};
	   foreach $instance (keys(%$Modules)) {
		$Instances = $Modules->{$instance};
		foreach $name (keys(%$Instances)) {
		
		   $Names = $Instances->{$name};
		   if (defined $$Names{nocanput} || 
		    defined $$Names{norcvbuf}) {
			$error += $$Names{defer};
			$error += $$Names{nocanput};
			$error += $$Names{norcvbuf};
			$error += $$Names{noxmtbuf};
			# use the last wlastupdate value found,
			$time = $$Names{snaptime};
		   }
		}
	   }
	}

	#
	#  Divide errors by 200. This gives more sensible load averages,
	#  such as 4.00 meaning heavily saturated rather than 800.00.
	#  Future versions of this program may use a more elegant technique
	#  rather than a factor of 200.
	#
	$error = $error / 200;

	return ($error,$time);
}


# ratio - calculate the ratio of the count delta over time delta;
# 	given count and oldcount, time and oldtime. Returns a string
#	of the value, or a null string if not enough data was given.
#
sub ratio {
	my ($count,$oldcount,$time,$oldtime) = @_;

	$countd = $count - $oldcount;
	$timed = $time - $oldtime;
	if ($timed > 0) { 
		$ratio = $countd / $timed;
	} else {
		$ratio = 0;
	}
	return sprintf("%.2f",$ratio);
}


# usage - print usage and exit.
#
sub usage {
        print STDERR <<END;
USAGE: $0 [-h] | [interval [count]]
   eg, $0               # print a 1 second sample
       $0 5             # print continually every 5 seconds
       $0 1 5           # print 5 times, every 1 second
END
        exit 1;
}

