#!/bin/sh # # stats # Shell script to collect simple statistics on numerical data. # Jacob Lundberg, jacob@chaos2.org # 2000.10.04 # # Trap for cleanup. function finish () { rm -rf $RFILE $LFILE; exit $1; } trap "echo \"stats: terminated by signal\" ; finish 0" 1 2 3 4 6 7 8 9 10 11 15 19 # Have I ever mentioned how much I _hate_ it when people # use long arguments with only one dash... Argh!!! # Not to mention that taking any starting with the same letter # to be a correct arg is seriously twisted and an obvious ploy # to make sure students know how to look at only part of an arg... if [ "`echo $1 | cut -c -2`" = "--" ]; then FORM="`echo $1 | cut -c 2-3`" else FORM="`echo $1 | cut -c 1-2`" fi if [ "$FORM" = "-h" ]; then # Usage information requester. echo "usage: stats {-rows|-cols} [file]" finish 0 elif [ $# -gt 2 ] || [ "$FORM" != "-r" ] && [ "$FORM" != "-c" ]; then # Bad arguments. echo "stats: badly formed parameters" 1>&2 echo "usage: stats {-rows|-cols} [file]" 1>&2 finish 1 fi # Select a temp file name. RFILE=`mktemp stats-rfile-XXXXXX` # Put the data into a temp file. if [ "a$2" = "a" ]; then cat > $RFILE <&0 elif [ -r "$2" ]; then cp $2 $RFILE else # Commandline wasn't acceptable. echo "stats: cannot read $2" 1>&2 finish 2 fi # Select another temp file name. LFILE=`mktemp stats-lfile-XXXXXX` # And finally do the work! if [ "$FORM" = "-r" ]; then # We'll be reading and writing in row form. echo -e "Average\tMedian" if [ "a`cat $RFILE | grep \"[0-9]\"`" = "a" ]; then echo -e "(nil)\t(nil)" finish 0 else ( while read DATA; do # Some counters. SUM=0 COUNTER=0 # Here is precalculation formatting of the data. echo -e "$DATA\t" | tr -s "\t " "\n" | sort -n -t "\t" -o $LFILE DATA=`cat $LFILE | tr -s "\n" "\t"` # Now count and sum the data. for DATUM in $DATA; do COUNTER=`expr $COUNTER + 1` SUM=`expr $SUM + $DATUM` done # Calculate the statistics on the data. AVG=`expr \( \( 10 \* $SUM / $COUNTER \) + 5 \) / 10` HALF=`expr \( $COUNTER + 1 \) / 2` MED=`echo -e "$DATA" | cut -f $HALF` # Output the calculated results. echo -e "$AVG\t$MED" done # $RFILE is the file that ``read'' above will be parsing. ) <$RFILE fi elif [ "$FORM" = "-c" ]; then # We'll be reading and writing in column form. LEVEL=0 COUNTER=1 NADA="(nil)" # We must know the width of the file beforehand. read SAMPLE <$RFILE for TEMP in $SAMPLE; do # Yup, more counters. SUM=0 COUNTER=0 unset NADA # Here is precalculation formatting of the data. LEVEL=`expr $LEVEL + 1` cat $RFILE | cut -f $LEVEL | sort -n -t "\t" -o $LFILE DATA=`cat $LFILE | tr -s "\n" "\t"` # Now count and sum the data. for DATUM in $DATA; do COUNTER=`expr $COUNTER + 1` SUM=`expr $SUM + $DATUM` done # Calculate the statistics on the data, iff there is any. if [ $COUNTER -gt 0 ]; then AVG=`expr \( \( 10 \* $SUM / $COUNTER \) + 5 \) / 10` HALF=`expr \( $COUNTER + 1 \) / 2` MED=`echo -e "$DATA" | cut -f $HALF` # Save the results onto a list to display later (silly problem space!). AVGLST="$AVGLST$AVG\t" MEDLST="$MEDLST$MED\t" fi done # Output the results. echo -e "Averages:\n$NADA$AVGLST\nMedians:\n$NADA$MEDLST" else # This should be impossible, so panic! echo "stats: misread commandline, aborting" 1>&2 finish 3 fi # Clean up after ourselves by killing off the temp files. finish 0