#!/usr/bin/perl
# by Harry Mangalam, mangalam@home.com.
# mod 4.21.00 adding check for FLAT mode, modecount =1
# mod 11.24.99 adding Mode, Mode count, Median to output.
# This is FREEWARE, and is worth exactly that!
# perloid to take stdin of as many #s as are in the stream, whether in one line
# or in many lines (only have to be separated by whitespace), calculate some
# basic stats, then spit to stdout so that the output can be
# grep'ped in the std unixy way..
# usage: stats < file.of.numbers
# or
# cmd1 | cmd2 |cmd3 | stats
# eg, to calculate a summary of the bytes used in the current directory:
#
# 3 % ls -l | cut -c31-42 |stats
#
# Sum = 158401735 158.4 MB total
# N = 503 in 503 files
# Mean = 314913.986083499 average size of file is 315 KB
# Median = 10204 median size is 10 KB
# Mode (#) = 1024 (33) mode is 1024 due to 33 directories
# Min = 0 at least 1 empty file
# Max = 27135470 got a whomper of a file at 2.7 MB
# Variance = 2903105341782.66 huge variance
# Std Dev = 1703850.15238508 etc
# SEM = 75970.9233614217
# Skew = 12.1873963279124
# Std Skew = 111.588464543135
# To use from nedit, put it in your /usr/local/bin and add the following
# to your .nedit file under shell commands:
#
# stats:::IW:\n\
# stats\n
#
#
# Feel free to add whatever additional calclations you want, but if you do and
# you think they might be of general use, let me know so I can add them to the
# original.
# Bug reports, suggestions back to the author
$N = 0;
$sum = 0;
$Min = $Max = 0;
while (<>) {
$x = split;
for ($i = 0; $i < $x; $i++) {
$sum = $sum + @_[$i]; # sum the numbers as they come in
if ($N == 0) {
$Min = $Max = @_[$i];
}
if (@_[$i] < $Min) { $Min = @_[$i]; }
if (@_[$i] > $Max) { $Max = @_[$i]; }
$Data[$N++] = @_[$i]; # store them for calcing the SD, etc
}
}
# All the numbers sucked in; now calc the values wanted
# if want to get mode, median, would help to sort $Data
@SData = sort numerically @Data;
#for ($i = 0; $i < $N; $i++) {
# print "\n $i :$Data[$i] \t $SData[$i]";
#}
if ($N % 2 < 0.001) {
# print " $N is even.\n";
#then $N is even and we can calc median via...
$Median = ($SData[($N-1)/2] + $SData[(($N-1)+2)/2]) / 2;
$even = 1;
} else {
# then $N is odd and we can calc median via...
# print " $N is odd.\n";
$Median = ($SData[($N+1)/2]) ;
$even = 0;
}
$Mean = $sum / $N;
$SumDiffs2 = 0;
$SumDiffs3 = 0;
$MaxSoFarValCnt = 0;
$ModeInd = 0;
$ValCnt = 0;
$Val = $SData[0];
for ($i=0; $i < $N; $i++){
$SumDiffs2 = $SumDiffs2 + (($Data[$i] - $Mean)**2);
$SumDiffs3 = $SumDiffs3 + (($Data[$i] - $Mean)**3);
# this next stanza calculates the Mode pointer
if ($Val == $SData[$i]) {
# if its another of the same #, incr the counters
$ValCnt++;
# print "ValCnt = $ValCnt\n";
$Val = $SData[$i];
} else { # it's a new value, so check if the run of the last set of #s
# exceeds the longest so far
# print "$MaxSoFarValCnt = $MaxSoFarValCnt \n";
if ($ValCnt > $MaxSoFarValCnt) {
# and if so, replace the old values with the new 'winners'
$MaxSoFarValCnt = $ValCnt;
$ModeInd = $i-1;
# print "ModeInd = $ModeInd \n";
}
# and reset the counters for the new
$ValCnt = 0;
}
$Val = $SData[$i];
}
if ($MaxSoFarValCnt > 1) {
$ModeNum = $MaxSoFarValCnt + 1;
$Mode = $SData[$ModeInd];
} else {
$ModeNum = "No # was represented more than once";
$Mode = "FLAT";
}
$S2 = $SumDiffs2 / ($N - 1);
$S = sqrt($S2);
$SEM = $S / sqrt($N);
print "\nSum = ", $sum,
"\nN = ", $N,
"\nMean = ", $Mean,
"\nMedian = ", $Median,
"\nMode (#) = ", $Mode, " ($ModeNum)",
"\nMin = ", $Min,
"\nMax = ", $Max,
"\nVariance = ", $S2,
"\nStd Dev = ", $S,
"\nSEM = ", $SEM, "\n";
if ($S > 0 && $N > 3) {
$Skew = ($N * $SumDiffs3) / (($N-1) * ($N-2) * ($S ** 3));
$StdSkew = $Skew / sqrt(6/$N);
print "Skew = ", $Skew,
"\nStd Skew = ", $StdSkew, "\n";
} else {
print "Std Dev = 0 or N <=3; Skipping Skewness, Std Skewness cal'n.\n";
}
exit 0;
sub numerically { $a <=> $b; }