#!/bin/sh
#
# stats
# Shell script to collect simple statistics on numerical data.
# Jacob Lundberg, jacob@chaos2.org
# 2000.10.04
#

# Trap for cleanup.
function finish () { rm -rf $RFILE $LFILE; exit $1; }
trap "echo \"stats: terminated by signal\" ; finish 0" 1 2 3 4 6 7 8 9 10 11 15 19

# Have I ever mentioned how much I _hate_ it when people
# use long arguments with only one dash...  Argh!!!
# Not to mention that taking any starting with the same letter
# to be a correct arg is seriously twisted and an obvious ploy
# to make sure students know how to look at only part of an arg...
if [ "`echo $1 | cut -c -2`" = "--" ]; then
   FORM="`echo $1 | cut -c 2-3`"
else
   FORM="`echo $1 | cut -c 1-2`"
fi
if [ "$FORM" = "-h" ]; then
   # Usage information requester.
   echo "usage: stats {-rows|-cols} [file]"
   finish 0
elif [ $# -gt 2 ] || [ "$FORM" != "-r" ] && [ "$FORM" != "-c" ]; then
   # Bad arguments.
   echo "stats: badly formed parameters" 1>&2
   echo "usage: stats {-rows|-cols} [file]" 1>&2
   finish 1
fi

# Select a temp file name.
RFILE=`mktemp stats-rfile-XXXXXX`

# Put the data into a temp file.
if [ "a$2" = "a" ]; then
   cat > $RFILE <&0
elif [ -r "$2" ]; then
   cp $2 $RFILE
else
   # Commandline wasn't acceptable.
   echo "stats: cannot read $2" 1>&2
   finish 2
fi

# Select another temp file name.
LFILE=`mktemp stats-lfile-XXXXXX`

# And finally do the work!
if [ "$FORM" = "-r" ]; then
   # We'll be reading and writing in row form.
   echo -e "Average\tMedian"
   if [ "a`cat $RFILE | grep \"[0-9]\"`" = "a" ]; then
      echo -e "(nil)\t(nil)"
      finish 0
   else (
      while read DATA; do
         # Some counters.
         SUM=0
         COUNTER=0
         # Here is precalculation formatting of the data.
         echo -e "$DATA\t" | tr -s "\t " "\n" | sort -n -t "\t" -o $LFILE
         DATA=`cat $LFILE | tr -s "\n" "\t"`
         # Now count and sum the data.
         for DATUM in $DATA; do
            COUNTER=`expr $COUNTER + 1`
            SUM=`expr $SUM + $DATUM`
         done
         # Calculate the statistics on the data.
         AVG=`expr \( \( 10 \* $SUM / $COUNTER \) + 5 \) / 10`
         HALF=`expr \( $COUNTER + 1 \) / 2`
         MED=`echo -e "$DATA" | cut -f $HALF`
         # Output the calculated results.
         echo -e "$AVG\t$MED"
      done
   # $RFILE is the file that ``read'' above will be parsing.
   ) <$RFILE
   fi
elif [ "$FORM" = "-c" ]; then
   # We'll be reading and writing in column form.
   LEVEL=0
   COUNTER=1
   NADA="(nil)"
   # We must know the width of the file beforehand.
   read SAMPLE <$RFILE
   for TEMP in $SAMPLE; do
      # Yup, more counters.
      SUM=0
      COUNTER=0
      unset NADA
      # Here is precalculation formatting of the data.
      LEVEL=`expr $LEVEL + 1`
      cat $RFILE | cut -f $LEVEL | sort -n -t "\t" -o $LFILE
      DATA=`cat $LFILE | tr -s "\n" "\t"`
      # Now count and sum the data.
      for DATUM in $DATA; do
         COUNTER=`expr $COUNTER + 1`
         SUM=`expr $SUM + $DATUM`
      done
      # Calculate the statistics on the data, iff there is any.
      if [ $COUNTER -gt 0 ]; then
         AVG=`expr \( \( 10 \* $SUM / $COUNTER \) + 5 \) / 10`
         HALF=`expr \( $COUNTER + 1 \) / 2`
         MED=`echo -e "$DATA" | cut -f $HALF`
         # Save the results onto a list to display later (silly problem space!).
         AVGLST="$AVGLST$AVG\t"
         MEDLST="$MEDLST$MED\t"
      fi
   done
   # Output the results.
   echo -e "Averages:\n$NADA$AVGLST\nMedians:\n$NADA$MEDLST"
else
   # This should be impossible, so panic!
   echo "stats: misread commandline, aborting" 1>&2
   finish 3
fi

# Clean up after ourselves by killing off the temp files.
finish 0
