#!/bin/ksh
trap 'echo "Received signal, aborting ..."; wait; exit 1' 1 2 3 15
#begin
#
# Usage: odbprune [-i database_directory] [-r] [-u] [-n] [-C] [-E]
#        -i dbdir  : the default is "." and will look for ECMA.dd
#        -r        : do not remove, but just rename the datafiles
#        -u        : create dca-indices with -u(npack) option
#        -n        : create dca-indices with -n(o-unpack) option
#        -C        : Cold-run: do not remove/rename anything, just show what would have happened
#        -E        : similar to Coldrun, but rather than doing something Echoes expected_files, if any
#        -q        : Quiet option i.e. do not print any messages, except for fatal errors. 
#                    -q is ignored if used with -C or -E
#
# By default the dca-indices are NOT created (i.e. dcagen is not run).
#
# The purpose is to remove such ODB data files (under [0-9]*/ dirs) that according to 
# the metadata-file <dbname>.iomap are actually not part of the database, but are
# left there due to much improved parallel I/O-scheme introduced in cycle 31R2+.
#
# ***Note: Only applicable to databases with I/O-method#4.
#
# The script assumes that you have write-access to the database and that
# all datafiles reside on a shared disk.
#
# It will also remove all empty directories after the rename/remove.
#
#end
#  
# Author: Sami Saarinen, ECMWF, 13-Feb-2007
#
#

set -eu

thisdir=$(pwd)
cmd=$(\cd $(dirname $0); echo $(pwd))/$(basename $0)


abort=no
dcagen=""
dbpath=$thisdir
remove=1
coldrun=0
silence=0

FLAGS=i:runCEq
while getopts ${FLAGS} i
do
  case $i in
  i) dbpath="$OPTARG";;
  r) remove=0;;
  u) dcagen="-u";;
  n) dcagen="-n";;
  C) coldrun=1;;
  E) coldrun=2;;
  q) silence=1;;
  *) abort=yes; break;;
  esac
done

if [[ $coldrun -gt 0 ]] ; then
  silence=0
fi

dbname=""

if [[ -d "$dbpath" ]] ; then
  \cd $dbpath
  dbpath=$(pwd)
  dbname=$(basename $(\ls -C1 *.dd 2>/dev/null | head -1) .dd || echo "")
  if [[ "$dbname" = ".dd" ]] ; then
    echo "***Error: Unable to locate the main metadata file (.dd) from directory '$dbpath'" >&2
    dbname=""
    abort=yes
  else
    ddfile=$dbname.dd
  fi
else
  echo "***Error: No such database directory/schema file '$dbpath'" >&2
  dbname=""
  abort=yes
fi

if [[ "$abort" = "yes" ]] ; then
  awk '/#begin/,/#end/' $cmd | egrep -v '#(begin|end)' | sed 's/^#//' >&2
  exit 1
fi

iom=$(head -1 $ddfile | awk 'BEGIN {n=1;} {if (NF >= 3) n=$3;} END {print n;}')
if [[ $iom -ne 4 ]] ; then
#  echo "***Warning: Database '$dbname' I/O-method is not #4. Exiting gracefully ..." >&2
  cat /dev/null > .odbprune_done 2>/dev/null || :
  exit 0
fi

iomap=$dbname.iomap
if [[ ! -f $iomap ]] ; then
  echo "***Error: Database '$dbname' I/O-method is 4, but its metadata-file '$iomap' is not found" >&2
  exit 2
fi

iomtype=$(head -1 $iomap | perl -pe 's/^\s+//')
if [[ "$iomtype" != "1" ]] ; then
#  echo "***Warning: Database '$dbname' I/O-map file's type is not equal to 1. Exiting gracefully ..." >&2
  cat /dev/null > .odbprune_done 2>/dev/null || :
  exit 0
fi

#tables=$(egrep ^@ $ddfile | perl -ne 'print "$1\n" if (m/^\@(\S+)\s+\d+/)' | perl -pe 's/\n/ /g;')

expfil=expected_files.$$
curfil=current_files.$$

awk '{if (NR > 4 && NF == 3) print $3; else if (NF == 5 && $1 > 0) print $2;}' < $iomap |\
    uniq | egrep -v ^EOF | awk -F\@ '{if (NF == 2) x=$2; else printf("%s %d\n",x,$1);}' |\
    sort > $expfil &

\ls -C1 [0-9]*/[a-z0-9_]* | egrep -v '([.]|[A-Z])' | awk -F/ '{print $2,$1}' |\
    sort > $curfil &

wait

files_to_be_removed=$(diff $curfil $expfil | egrep '^< ' | awk '{printf("%d/%s ",$3,$2);}')

if [[ $silence -eq 0 ]] ; then
  [[ $coldrun -eq 2 ]] || pwd >&2
fi
if [[ "$files_to_be_removed" != "" ]] ; then
  if [[ $remove = 1 ]] ; then
    if [[ $coldrun -eq 0 ]] ; then
      set -x
      \rm -f $files_to_be_removed
      set +x
    elif [[ $coldrun -eq 1 ]] ; then
      echo "rm -f $files_to_be_removed" >&2
    fi
  else
    for f in $files_to_be_removed
    do
      if [[ $coldrun -eq 0 ]] ; then
        set -x
        \mv $f $f.REMOVED
        set +x
      elif [[ $coldrun -eq 1 ]] ; then
        echo "mv $f $f.REMOVED" >&2
      fi
    done
  fi
elif [[ $coldrun -ne 2 ]] ; then
  if [[ $silence -eq 0 ]] ; then
    echo "*** No ODB data files need to be removed/renamed" >&2
  fi
fi

if [[ $coldrun -eq 0 ]] ; then
  \rmdir [0-9]* 2>/dev/null || :
  cat $expfil > .odbprune_done || :
elif [[ $coldrun -eq 1 ]] ; then
  echo "rmdir [0-9]* 2>/dev/null || :"
  echo "cat $expfil > .odbprune_done || :"
elif [[ $coldrun -ne 2 ]] ; then
  if [[ $silence -eq 0 ]] ; then
    cat $expfil 2>/dev/null || :
  fi
fi

\rm -f $curfil $expfil

if [[ "$dcagen" != "" ]] ; then
  if [[ $silence -eq 1 ]] ; then
    dcagen="$dcagen -q"
  fi
  if [[ $coldrun -eq 0 ]] ; then
    dcagen -P -F -z $dcagen
  else
    echo "dcagen -P -F -z $dcagen"
  fi
fi

exit 0
