/********************************************************************************************************
 * QRNA - Comparative analysis of biological sequences 
 *         with pair hidden Markov models, pair stochastic context-free
 *        grammars, and probabilistic evolutionary  models.
 *       
 * Version 2.0.0 (JUN 2003)
 *
 * Copyright (C) 2000-2003 Howard Hughes Medical Institute/Washington University School of Medicine
 * All Rights Reserved
 * 
 *     This source code is distributed under the terms of the
 *     GNU General Public License. See the files COPYING and LICENSE
 *     for details.
 ***********************************************************************************************************/

/* qrna.c
 *
 * E. Rivas [St. Louis]
 * 
 * 9 april 1999.
 * 
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include <time.h>

#include "funcs.h"
#include "evolfuncs.h"
#include "globals.h"
#include "squid.h"
#include "structs.h"
#include "version.h"


static void analyze_2_sequences (FILE *ofp,
				 FILE *printqfp,  char *printqfile, 
				 FILE *regressfp, char *regressionfile, 
				 int format, int num_align,
				 SQINFO sqinfoX, char *seqX, int *isegX, int *iseqX, int *ctX,
				 SQINFO sqinfoY, char *seqY, int *isegY, int *iseqY, int *ctY,
				 long int Lmax, int Lw, long int maxlenhit, int minlenhit,
				 int window, int slide,
				 double   *targetfreq,
				 double ***cfg_node,
				 double  **hexa,
				 double   *codon_joint,
				 struct psubs_s           *pammodel_star,
				 struct psubs_s           *codprob_star,
				 struct psubs_s           *mutpxy_star,
				 struct psubs_s           *mut5pxy_star,
				 struct psubs_s           *riboprob_star,
				 struct psubs_s           *riboprobapprox_star,
				 struct pnonsubs_s        *pair5prob_star, 
				 struct psubs_s           *mutpxy_rna_unpaired_star,
				 struct psubs_s           *mut5pxy_rna_unpaired_star,
				 struct psubs_s           *mut5pxy_rna_loop_star,
				 struct psubs_s           *mutpxy_rna_paired_star,
				 struct psubs_s           *mut5pxy_rna_paired_star,
				 struct dos_s              d, 
				 struct emodel_s          *emodel, 
				 struct dpd_s             *dpd, 
				 struct dpdscan_s         *dpdscan, 
				 struct dpdscanfast_s     *dpdscanfast, 
				 struct dpf_s             *dpf, 
				 struct rnascfg_s         *mx, 
				 struct rnascfgscan_s     *mxscan, 
				 struct rnascfgscanfast_s *mxscanfast, 
				 struct scores_s          *sc, 
				 struct ali_s             *ali,
				 struct scanfast_s        *scanfast,
				 int add_codon, int add_hexamer, int allow_pseudoknots, int alignment, int use_ribo_approx, 
				 int cyk, int changefreq, int changefreqoverall, int changefreqwin,
				 int fastintloop, int logodds, int n_shuffles, int oldrna, int pedantic, 
				 int shuffle, int sre_shuffle, int con_shuffle, int strand, int sweep, 
				 double *tfactor, int traceback, int verbose, int ones, int parse, int rnass, int doends, int scan, int shtoo, 
				 int twindow, int evd, int N, double k, int shift, int *histo_sc);


static char banner[] = "qrna -- scores an alignment with the 3 models";

static char usage[]  = "\
Usage: eqrna [-options] <input_file.q> \n\
where options are:\n\
   -A             : do an all-to-all comparison between the two input files\n\
   -a             : print alignment \n\
   -B             : sre_shuffle the alignment keeping the gap structure of the window\n\
   -b             : shuffle the alignment \n\
   -c <cfgfile>   : <cfgfile> to use to train the rna model (default = tRNA+rRNA)\n\
   -C             : con_shuffle the alignment shuffling conserved, mutated, and gap positions independently\n\
   -D <codfile>   : include a file of coding-coding frequencies for the coding model\n\
   -d             : log2 form (default = log2-odds space )\n\
   -e <num>       : number of sequen skipped in second file (for multiple comparisons). default 0. \n\
   -E <censor>    : generate histogram of scores and calcule gumble parameters [censor=0 (no censorship) to 1 \n\
   -F             : change the overall base composition of the 3 models, based on nts frequencies in the whole alignment\n\
   -f             : use full dp for the probabilistic models (do not conserve the aligment-default is diagonal dp)\n\
   -G             : change the overall base composition of the 3 models, based on nts frequencies for each window\n\
   -g             : do forward (default is viterbi)\n\
   -H <Hexfile>   : include a file of Hexamer frequencies for the coding model\n\
   -h             : print short help and usage info\n\
   -i             : evolutionary time factor (default i=1)\n\
   -j             : use semi-full dp for the probabilistic model (use the alignment created by OTH)\n\
   -k             : allow pseudoknots (not implemented)\n\
   -l <minlenhit> : change the minlenhit parameter (default 0)\n\
   -L <maxlenhit> : change the maxlenhit parameter (default provided by longuest sequence)\n\
   -m             : do Forward and Viterbi Diagonal dp\n\
   -n             : do Forward and Viterbi Full     dp\n\
   -N <num>       : in combination with -E number of shuffles. default 1. \n\
   -o <outfile>   : direct structure-annotated sequence to <outfile>\n\
   -p <pamfile>   : <pamfile> to use (default = BLOSUM62)\n\
   -P             : pedantic, check your evolutionary models for inconsistencies\n\
   -q             : do Forward and Viterbi semi-full dp\n\
   -r             : do Nussinov rna model (default is a 3-state model) \n\
   -R <ribofile>  : <ribofile> to use to train the rna model (default = RIBOPROB85-60)\n\
   -s             : do global (not dp)\n\
   -S             : sweep a collection of motifs(seqfile1) across another bunch of sequences(seqfile2)\n\
   -t             : print traceback\n\
   -v             : verbose debugging output\n\
   -w <num>       : scanning window (default is full length)\n\
   -x <num>       : slide positions (default is 50)\n\
   -y <num>       : grab n sequences at random from the second data file to compare to each one of the first file\n\
";

static char experts[] = "\
   --cyk         : use CYK algorithm to calculate RNA score (default is Inside).\n\
   --latte       : I just called starbucks with your order...\n\
   --ones        : score with the three models only the given strand.\n\
   --parse       : input is a selex file. Por a given ss, it calculates the probablity of either the best or all the possible parses with the RNA grammar.\n\
   --print <f>   : print to file <f> the actual alignments scored (useful if you want to store shuffled alignments) \n\
   --rnass       : print the alignment with the predicted RNA secondary structure.\n\
   --noends      : do not evaluate ends. Default now: calculate the actual boundaries of the model calls\n\
   --scan        : scanning version. no windows. faster.\n\
   --shtoo       : qrna the alignment and also give one shuffled score.\n\
   --twindow     : select the divergence time based on the %id of the window. Default is by %id of alignment.\n\
\n\
 Debugging, experimentation:\n\
   --regress <f> : save regression test information to file <f>\n\
\n";                                                                                                                        

static struct opt_s OPTIONS[] = {
  { "-A",        TRUE,  sqdARG_NONE},
  { "-a",        TRUE,  sqdARG_NONE},
  { "-B",        TRUE,  sqdARG_NONE},
  { "-b",        TRUE,  sqdARG_NONE},
  { "-c",        TRUE,  sqdARG_STRING},
  { "-C",        TRUE,  sqdARG_NONE},
  { "-D",        TRUE,  sqdARG_STRING},
  { "-d",        TRUE,  sqdARG_NONE},
  { "-e",        TRUE,  sqdARG_STRING},
  { "-E",        TRUE,  sqdARG_STRING},
  { "-F",        TRUE,  sqdARG_NONE},
  { "-f",        TRUE,  sqdARG_NONE},
  { "-G",        TRUE,  sqdARG_NONE},
  { "-g",        TRUE,  sqdARG_NONE},
  { "-h",        TRUE,  sqdARG_NONE},
  { "-H",        TRUE,  sqdARG_STRING},
  { "-i",        TRUE,  sqdARG_STRING},
  { "-j",        TRUE,  sqdARG_NONE},
  { "-k",        TRUE,  sqdARG_NONE},
  { "-l",        TRUE,  sqdARG_STRING},
  { "-L",        TRUE,  sqdARG_STRING},
  { "-m",        TRUE,  sqdARG_NONE},
  { "-n",        TRUE,  sqdARG_NONE},
  { "-N",        TRUE,  sqdARG_STRING},
  { "-o",        TRUE,  sqdARG_STRING},
  { "-O",        TRUE,  sqdARG_STRING},
  { "-p",        TRUE,  sqdARG_STRING},
  { "-P",        TRUE,  sqdARG_NONE},
  { "-q",        TRUE,  sqdARG_NONE},
  { "-r",        TRUE,  sqdARG_NONE},
  { "-R",        TRUE,  sqdARG_STRING},
  { "-S",        TRUE,  sqdARG_NONE},
  { "-s",        TRUE,  sqdARG_NONE},
  { "-t",        TRUE,  sqdARG_NONE},
  { "-u",        TRUE,  sqdARG_NONE},
  { "-v",        TRUE,  sqdARG_NONE},
  { "-w",        TRUE,  sqdARG_STRING},
  { "-x",        TRUE,  sqdARG_STRING},
  { "-y",        TRUE,  sqdARG_STRING},
  { "--approx",  FALSE, sqdARG_NONE},
  { "--cyk",     FALSE, sqdARG_NONE},
  { "--latte",   FALSE, sqdARG_NONE},
  { "--noends",  FALSE, sqdARG_NONE},
  { "--ones",    FALSE, sqdARG_NONE},
  { "--oldrna",  FALSE, sqdARG_NONE},
  { "--parse",   FALSE, sqdARG_NONE},
  { "--printq",  FALSE, sqdARG_STRING},
  { "--regress", FALSE, sqdARG_STRING},
  { "--rnass",   FALSE, sqdARG_NONE},
  { "--scan",    FALSE, sqdARG_NONE},
  { "--shtoo",   FALSE, sqdARG_NONE},
  { "--twindow", FALSE, sqdARG_NONE},
};

#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))

int
main(int argc, char **argv)
{
  SQFILE  *sqfp;	          /* open sequence file                                     */
  SQFILE  *sqfp_b;	          /* open sequence file                                     */
  SQINFO   sqinfoX;               /* info structures for seqX                               */
  SQINFO   sqinfoY;               /* info structures for seqY                               */
  char    *seqfile;               /* input sequence file                                    */
  char    *seqfile_b;             /* input sequence file                                    */
  char    *seqX, *revX;	          /* sequence X                                             */
  char    *seqY;	          /* sequence to compare                                    */
  int     *iseqX, *isegX;	  /* sequence X integer form (without and with gaps)        */
  int     *iseqY, *isegY;	  /* sequence Y integer form (without and with gaps)        */
  int     *ctX;                   /* .ct notation for seqX's RNA structure (if any)         */
  int     *ctY;                   /* .ct notation for seqY's RNA structure (if any)         */
  long int curr_Lmax, Lmax;       /* length of the max alignment                            */
  int      format;                /* format of seq file                                     */

  char     *codonfile;            /* codon-codon joint frequencies                          */
  char     *hexapfile;            /* Hexamer joint frequencies                              */
  char     *pamfile;              /* PAM substitution matrix                                */
  char     *cfgfile;              /* RNA grammar file                                       */
  char     *ribofile;             /* RNA pair-to-pair probs                                 */
  char     *targetfreqfile;       /* background probabilities                               */

  double                  ***cfg_node;             /* CFG node frequencies extracted form cfgfile            */
  double                   **hexa;                 /* Hexamer  frquencies extracted form hexapfile           */
  double                    *codon_joint;          /* codon-codon joint probabilbities                       */
  double                    *targetfreq;
  struct psubs_s            *pam_star;
  struct psubs_s            *codprob_star;
  struct psubs_s            *mutpxy_star;
  struct psubs_s            *mut5pxy_star;
  struct psubs_s            *mutpxy_rna_unpaired_star;
  struct psubs_s            *mut5pxy_rna_unpaired_star;
  struct psubs_s            *mut5pxy_rna_loop_star;
  struct psubs_s            *mutpxy_rna_paired_star;
  struct psubs_s            *mut5pxy_rna_paired_star;
  struct psubs_s            *riboprob_star;
  struct psubs_s            *riboprobapprox_star;
  struct pnonsubs_s         *pair5prob_star;

  struct   emodel_s          *emodel;        /* transition + emission probs 3 models + null model                         */
  struct   rnascfg_s         *mx;           /* matrices for SCFG part of RNA dp calculation                              */
  struct   rnascfgscan_s     *mxscan;       /* matrices for SCFG part of RNA dp calculation -- scanning version          */
  struct   rnascfgscanfast_s *mxscanfast;   /* matrices for SCFG part of RNA dp calculation -- scanning version          */
  struct   dpd_s             *dpd;          /* structure with dp matrices -- three models                                */
  struct   dpdscan_s         *dpdscan;      /* structure with dp matrices -- three models                                */
  struct   dpdscanfast_s     *dpdscanfast;  /* structure with dp matrices -- three models                                */
  struct   dpf_s             *dpf;          /* structure with dp matrices -- three models                                */
  struct   ali_s             *ali;          /* arrays to store the alignment created by full viterbi                     */ 
  struct   scanfast_s        *scanfast;     /* arrays to store scan scores and ends                                      */ 
  struct   scores_s          *sc;           /* structure containing all possible scores                                  */
  struct   dos_s              d;            /* collection of flags to decide the scoring method(s)                       */

  double   tfactor;               /* evolutionary time factor                                                  */
  int      add_codon;	          /* TRUE adds codon-codon frequencies for coding model                        */
  int      add_hexamer;	          /* TRUE adds hexamer frequencies for coding model                            */
  int      alignment;	          /* TRUE prints alignment                                                     */
  int      allow_pseudoknots;	  /* TRUE to logodds space                                                     */
  int      alltoall;	          /* TRUE all-to-all comparison between the two input files                    */
  int      approx;                /* TRUE use approximation to calculate the RNA riboprobs (default RIBOSUM)   */
  int      changefreq;            /* TRUE change the baseline base composition of 3 models according to freqs of  whole alignment */
  int      changefreqoverall;     /* TRUE change the baseline base composition of 3 models to a fixed value    */
  int      changefreqwin;         /* TRUE change the baseline base composition of 3 models according to freqs of each window     */
  int      cyk;                   /* TRUE use CYK algorithm to calculate RNA scores (default is Inside)        */
  int      evd;                   /* TRUE to calculate the histogram of scors and evd fit                      */
  int      fastintloop;           /* TRUE  ==  use L^3 formalism for internal loops                            */
  int      logodds;               /* TRUE to wok in logodds mode                                               */
  int      modelsareon;           /* TRUE for eqrna.c FALSE for eqrna_sample.c              */
  int      oldrna;                /* TRUE use 2.0.2 method to calculate the rna pair probabilities             */
  int      pedantic;              /* TRUE do some checks for evolutionary models to debug                      */
  int      ran;                   /* TRUE to pick a random seq from the second data file                       */
  int      rani = -1;             /* number of shuffles per sequence of file A if ran==TRUE                    */
  int      ranc;                  /* counter for the number of shuffles                                        */
  int      shuffle;               /* TRUE to shuffle alignment                                                 */
  int      sre_shuffle;           /* TRUE to sre_shuffle alignment                                             */
  int      con_shuffle;           /* TRUE to con_shuffle alignment                                             */
  int      sweep;                 /* TRUE sweep one motif along a sequence                                     */
  int      traceback;             /* TRUE to traceback alignment                                               */
  int      verbose;               /* TRUE to be extremely verbose to debug                                     */

  int      latte   = FALSE;       /* friday lunch 31 AUG 2001                                                  */  
  int      ones    = FALSE;       /* TRUE score with the three models only the given strand                    */
  int      parse   = FALSE;       /* TRUE input is a selex file. Por a given ss, itcalculates the probablity   *
				   *      of either the best or all the possible parses with the RNA grammar   */ 
  int      rnass   = FALSE;       /* TRUE prints the alignment with the predicted RNA secondary structure      */ 
  int      scan    = FALSE;       /* TRUE scanning version. no windows. faster.                                */ 
  int      doends  = TRUE;        /* TRUE calculate the boundaries of the model's calls                        */ 
  int      shtoo   = FALSE;       /* TRUE also gives one shuflled score                                        */ 
  int      twindow = FALSE;       /* TRUE calculates divergence time by window instead of by alingment         */ 

  char    *outfile;               /* where to send the output                               */
  FILE    *ofp;	                  /* open output file                                       */
  FILE   *targetfp;               /* open target frequencies file                           */
  char    *optname;
  char    *optarg; 
  char    *printqfile;            /* file to write the actual alignments scored             */
  FILE    *printqfp;              /* open file to write the actual alignments scored        */
  char    *regressionfile;        /* file to dump regression test info to                   */
  FILE    *regressfp;             /* open file to dump regression test info                 */
  int      optind;	
  int      s, s_new, seed;
  int      num_seqs = 0;
  int      num = 0, skip;

  int      minlenhit = 0;         /* min length hit by default 0                                */
  long int maxlenhit = 0;         /* max length hit by default provided by the longer aligments */
  int      nseqs;                 /* total number of sequences                                  */
  int      nXseq = 0, nYseq = 0;
  int      num_ali = 0;
  int      slide, window, winop;
  int      Lw;

  int     *histo_sc;
  int      N = 500;
  double   k = 0.5;
  int      shift = 250;
  double   censor_cutoff; /* how much area of the distribution is going to be censored */
  int      n_shuffles = 1;

  int      x;

  /* re-seed the random number generator.
   */
  seed = (int) time ((time_t *) NULL);
  sre_srandom(seed); /* reinit sre_random each time you shuffle a sequence */

  /* Parse command line
   */
  add_codon         = FALSE;  /* TRUE  ==  adds codon-codon frequencies for coding                       */
  add_hexamer       = FALSE;  /* TRUE  ==  adds hexamer frequencies for coding                           */
  alignment         = FALSE;  /* TRUE  ==  prints alignment                                              */
  allow_pseudoknots = FALSE;  /* TRUE  ==  allow pseudoknots                                             */
  alltoall          = FALSE;  /* TRUE  ==  all-to-all comparison                                         */
  approx            = FALSE;  /* TRUE use approximation to calculate the RNA riboprobs (default RIBOSUM) */
  changefreq        = FALSE;  /* TRUE  ==  change the base comp of 3 models for whole alignment          */
  changefreqoverall = FALSE;  /* TRUE  ==  change the base comp of 3 models to a fixed value             */
  changefreqwin     = FALSE;  /* TRUE  ==  change the base comp of 3 models for each window              */
  cyk               = FALSE;  /* TRUE  ==  use CYK algorithm to calculate RNA scores (default is Inside) */
  evd               = FALSE;  /* TRUE  ==  calculate the histogram of scors and evd fit                  */
  fastintloop       = TRUE;   /* TRUE  ==  use L^3 formalism for internal loops                          */
  logodds           = TRUE;   /* FALSE ==  no logoods                                                    */
  modelsareon       = TRUE;   /* TRUE for eqrna.c FALSE for eqrna_sample.c                               */
  oldrna            = FALSE;  /* TRUE use 2.0.2 version to calculate the RNA riboprobs                   */
  pedantic          = FALSE;  /* TRUE  ==  check your evolutionary models                                */
  ran               = FALSE;  /* TRUE  ==  pick a random seq from the second file                        */
  shuffle           = FALSE;  /* TRUE  ==  shuffle alignment                                             */
  sre_shuffle       = FALSE;  /* TRUE  ==  sre_shuffle alignment                                         */
  con_shuffle       = FALSE;  /* TRUE  ==  con_shuffle alignment                                         */
  sweep             = FALSE;  /* TRUE  ==  sweep one motif along a sequence                              */
  traceback         = FALSE;  /* TRUE  ==  traceback alignment                                           */
  verbose           = FALSE;  /* TRUE  ==  for debuging                                                  */
  winop             = FALSE;  /* TRUE  ==  use a window; FALSE == use whole alignment                    */

  d.frdiag         = FALSE;   /* TRUE  ==  calculates forward diagonal                                   */
  d.frfull         = FALSE;   /* TRUE  ==  calculates forward full                                       */
  d.frsemi         = FALSE;   /* TRUE  ==  calculates forward semi-full                                  */
  d.fulldp         = FALSE;   /* TRUE  ==  calculates full dp scores                                     */
  d.global         = FALSE;   /* TRUE  ==  calculates global alignments                                  */
  d.nus            = FALSE;   /* TRUE  ==  uses NUS model for rna                                        */
  d.semidp         = FALSE;   /* TRUE  ==  calculates semi-full dp scores                                */
  d.frdiag         = FALSE;   /* TRUE  ==  calculates forward diagonal                                   */
  d.vifull         = FALSE;   /* TRUE  ==  calculates viterbi full                                       */
  d.visemi         = FALSE;   /* TRUE  ==  calculates viterbi semi-full                                  */
  d.vidiag         = TRUE;    /* FALSE ==  calculates viterbi dp scores                                  */
  d.forward        = FALSE;   /* TRUE  ==  calculates forward dp scores                                  */
  d.twodiag        = FALSE;   /* TRUE  ==  both Forward and Viterbi diagonal dp                          */
  d.twofull        = FALSE;   /* TRUE  ==  both Forward and Viterbi full dp                              */
  d.twosemi        = FALSE;   /* TRUE  ==  both Forward and Viterbi semi-full dp                         */
  
  tfactor        = -1.0;
  slide          = 50;
  skip           = 0;

  cfgfile        = NULL;
  codonfile      = NULL;
  hexapfile      = NULL;
  pamfile        = "BLOSUM62";
  outfile        = NULL;
  printqfile     = NULL;
  regressionfile = NULL;
  ribofile       = NULL;
  seqfile        = NULL;
  seqfile_b      = NULL;
   
  while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
		&optind, &optname, &optarg))
    {
      if      (strcmp(optname, "-A") == 0)   alltoall              = TRUE;
      else if (strcmp(optname, "-a") == 0)   alignment             = TRUE;
      else if (strcmp(optname, "-B") == 0)   sre_shuffle           = TRUE;
      else if (strcmp(optname, "-b") == 0)   shuffle               = TRUE;
      else if (strcmp(optname, "-c") == 0)   cfgfile               = optarg;
      else if (strcmp(optname, "-C") == 0)   con_shuffle           = TRUE;
      else if (strcmp(optname, "-D") == 0)   codonfile             = optarg;
      else if (strcmp(optname, "-d") == 0)   logodds               = FALSE;
      else if (strcmp(optname, "-e") == 0)   skip                  = atoi(optarg);
      else if (strcmp(optname, "-E") == 0) { censor_cutoff         = atof(optarg); evd = TRUE; }
      else if (strcmp(optname, "-F") == 0)   changefreq            = TRUE;
      else if (strcmp(optname, "-f") == 0)   d.fulldp              = TRUE;
      else if (strcmp(optname, "-G") == 0)   changefreqwin         = TRUE;
      else if (strcmp(optname, "-g") == 0)   d.forward             = TRUE;
      else if (strcmp(optname, "-H") == 0)   hexapfile             = optarg;
      else if (strcmp(optname, "-i") == 0) { tfactor               = atof(optarg);   if (tfactor < 0.0) Die("time has to be positive!"); }
      else if (strcmp(optname, "-j") == 0)   d.semidp              = TRUE;
      else if (strcmp(optname, "-k") == 0)   allow_pseudoknots     = TRUE;
      else if (strcmp(optname, "-l") == 0)   minlenhit             = atoi(optarg);
      else if (strcmp(optname, "-L") == 0)   maxlenhit             = atoi(optarg);
      else if (strcmp(optname, "-m") == 0)   d.twodiag             = TRUE;
      else if (strcmp(optname, "-n") == 0)   d.twofull             = TRUE;
      else if (strcmp(optname, "-N") == 0)   n_shuffles            = atoi(optarg);
      else if (strcmp(optname, "-o") == 0)   outfile               = optarg;
      else if (strcmp(optname, "-O") == 0) { changefreqoverall     = TRUE; targetfreqfile = optarg; }
      else if (strcmp(optname, "-p") == 0)   pamfile               = optarg;
      else if (strcmp(optname, "-P") == 0)   pedantic              = TRUE;
      else if (strcmp(optname, "-q") == 0)   d.twosemi             = TRUE;
      else if (strcmp(optname, "-r") == 0)   d.nus                 = TRUE;
      else if (strcmp(optname, "-R") == 0)   ribofile              = optarg;
      else if (strcmp(optname, "-S") == 0)   sweep                 = TRUE;
      else if (strcmp(optname, "-s") == 0)   d.global              = TRUE;
      else if (strcmp(optname, "-t") == 0)   traceback             = TRUE;
      else if (strcmp(optname, "-u") == 0)   fastintloop           = FALSE;
      else if (strcmp(optname, "-v") == 0)   verbose               = TRUE;
      else if (strcmp(optname, "-w") == 0) { window                = atoi(optarg); winop = TRUE; }
      else if (strcmp(optname, "-x") == 0)   slide                 = atoi(optarg);
      else if (strcmp(optname, "-y") == 0)   rani                  = atoi(optarg);
      else if (strcmp(optname, "--approx")  == 0)   approx         = TRUE;
      else if (strcmp(optname, "--cyk")     == 0)   cyk            = TRUE;
      else if (strcmp(optname, "--latte")   == 0)   latte          = TRUE;
      else if (strcmp(optname, "--noends")  == 0)   doends         = FALSE;
      else if (strcmp(optname, "--ones")    == 0)   ones           = TRUE;
      else if (strcmp(optname, "--oldrna")  == 0)   oldrna         = TRUE;
      else if (strcmp(optname, "--parse")   == 0)   parse          = TRUE;
      else if (strcmp(optname, "--printq")  == 0)   printqfile     = optarg;
      else if (strcmp(optname, "--regress") == 0)   regressionfile = optarg;
      else if (strcmp(optname, "--rnass")   == 0)   rnass          = TRUE;
      else if (strcmp(optname, "--scan")    == 0)   scan           = TRUE;
      else if (strcmp(optname, "--shtoo")   == 0)   shtoo          = TRUE;
      else if (strcmp(optname, "--twindow") == 0)   twindow        = TRUE;
      else if (strcmp(optname, "-h")        == 0) 
	{
	  puts(banner);
	  printf("          %s (%s)", RELEASE, RELEASEDATE);
	  printf(" using squid %s (%s)\n", squid_version, squid_date);
	  puts(usage);
	  puts(experts);                                                                                                      
	  exit(0);
	}
    }
  
  if (argc - optind == 1) {
    seqfile  = argv[optind]; 
  }
  else if (argc - optind == 2) {
    seqfile   = argv[optind++]; 
    seqfile_b = argv[optind]; 
  }
  else {
    puts(banner);
    printf("version: %s (%s)\n", RELEASE, RELEASEDATE);
    Die("Incorrect number of command line arguments.\n%s \n\n%s\n", usage, experts);
  }
  
  if (rani > 0) ran = TRUE;

  /* options incompatibilities
   */
  if (scan && twindow) Die("scan version and twindow are not compatible options\n");
  if (scan && rnass)   Die("scan version and rnass are not compatible options\n");
  if (winop && window <= 0) Die("window has to be larger than zero\n");
  if (!winop) window = -1;

  if (cfgfile == NULL) /* SCFG is not provided */
    cfgfile = FileConcat("", "mix_tied_linux.cfg");

  if (ribofile == NULL) /* RIBORPROB is not provided */
    ribofile = FileConcat("", "RIBOPROB85-60.SEP04.mat");
  
  if (seqfile == NULL) {
    puts(usage);
    puts(experts);                      
    exit(0);
  }
  
  /* Load the target frequencies
   */
  if (changefreqoverall) {
    if ((targetfp = fopen(targetfreqfile, "r")) == NULL &&
	(targetfp = EnvFileOpen(targetfreqfile, "QRNADB")) == NULL)
      Die("Failed to open targetfreq file %s", targetfreqfile);
    
    if (! ParseTargetFreqFile(targetfp, &targetfreq))
      Die("Failed to parse targetfreq file");
    
    fclose(targetfp);
  }
  else {
    targetfreq  = (double *) MallocOrDie (sizeof(double) * 4);
    for (x = 0; x < 4; x ++) targetfreq[x] = 0.0;
  }
  if (verbose) {
    printf("Target frequencies\n");
    PrintVectorProbs(stdout, targetfreq, 4);
  }

  if (codonfile) add_codon = TRUE;
  else           add_codon = FALSE;

  if (hexapfile) add_hexamer = TRUE;
  else           add_hexamer = FALSE;


  /* Determine which DP we are asked to do
   *
   * (very very not elegant, but oh well, bite me!)
   */
  if (d.global)  { d.vidiag = FALSE; }
  if (d.forward && !d.fulldp && !d.semidp) { d.vidiag = FALSE; d.vifull = FALSE; d.frdiag = TRUE; }

  if (d.twodiag) { d.vidiag = TRUE; d.frdiag = TRUE; }

  if ((d.fulldp || d.semidp || d.twofull || d.twosemi) && !d.twodiag) d.vidiag = FALSE;

  if ((d.fulldp &&  d.forward) || d.twofull) d.frfull = TRUE;
  if ((d.fulldp && !d.forward) || d.twofull) d.vifull = TRUE;
  if ((d.semidp &&  d.forward) || d.twosemi) d.frsemi = TRUE;
  if ((d.semidp && !d.forward) || d.twosemi) d.visemi = TRUE;
  
  /* Open output file 
   */
  ofp = stdout;
  if (outfile != NULL && (ofp = fopen(outfile, "w")) == NULL)
    Die("Failed to open output file %s", outfile);
  
  /* Open sequence file(s)
   */
  if (! SeqfileFormat(seqfile, &format, NULL))
    Die("Failed to determine format of sequence file %s\n", seqfile);     
  
   /* Open regression test file 
    */
  if (regressionfile != NULL) {
    if ((regressfp = fopen(regressionfile, "w")) == NULL)
      Die("Failed to open regression test file %s", regressionfile);
  }
  
   /* Open printq file 
    */
  if (printqfile != NULL) {
    if ((printqfp = fopen(printqfile, "w")) == NULL)
      Die("Failed to open printq file %s", printqfile);
  }
  
  if (maxlenhit > 0) 
    {
      nseqs = 0;

      if (!winop) Lmax = maxlenhit;
      else        Lmax = (maxlenhit < window)? maxlenhit : window;
    }
  else 
    {
      /* determine maximum length for sequence file (Lmax)
       * (this is too expensive to do)
       */
      if (winop) 
	maxlenhit = BIGINT;
      else /* if no window given, limit the max length of aligments for memoery reasons */
	maxlenhit = 1000;
      
      if ((sqfp = SeqfileOpen(seqfile, format, NULL)) == NULL)
	Die("Failed to open sequence file %s", seqfile);
      CheckMaxLength(sqfp, format, &curr_Lmax, &nseqs, maxlenhit); 
      Lmax = curr_Lmax; 
      SeqfileClose(sqfp);
    }

  if ((sqfp = SeqfileOpen(seqfile, format, NULL)) == NULL)
    Die("Failed to open sequence file %s", seqfile);
  
  
  if(seqfile_b != NULL) {
    if (! SeqfileFormat(seqfile_b, &format, NULL))
      Die("Failed to determine format of sequence file %s\n", seqfile_b);
    
    if (nseqs > 0) {
      /* determine maximum length for sequence file (Lmax)
       *
       */
      if ((sqfp_b = SeqfileOpen(seqfile_b, format, NULL)) == NULL)
	Die("Failed to open sequence file %s", seqfile_b);
      CheckMaxLength(sqfp_b, format, &curr_Lmax, &nseqs, maxlenhit);
      if (curr_Lmax > Lmax) Lmax = curr_Lmax;
      SeqfileClose(sqfp_b);
    }
    
    if ((sqfp_b = SeqfileOpen(seqfile_b, format, NULL)) == NULL)
      Die("Failed to open sequence file %s", seqfile_b);
  }

  if (alltoall && seqfile_b == NULL) {
    seqfile_b = seqfile;
    if ((sqfp_b = SeqfileOpen(seqfile_b, format, NULL)) == NULL)
      Die("Failed to open sequence file %s", seqfile_b);
  }
  
  /* Print banner
   */
   PrintBanner(ofp, pamfile, cfgfile, ribofile, seqfile, seqfile_b, allow_pseudoknots, nseqs, 
	       Lmax, maxlenhit, minlenhit, approx, sweep, window, slide, scan);
  
  /* Allocate space for iseg's, these arrays keep the full alignments
   */
  AllocIntSeqs(Lmax, &isegX, &isegY);
  
  /* Allocate space for arrays used in the calculation [iseq's, dpd, dpf, mx, sc->ardiag, sc->arfull]
   */
  Lw = (window > -1)? window : Lmax; /* window of calculation */
  AllocIntSeqs(2*Lw, &iseqX, &iseqY);
  if (d.vidiag || d.vifull || d.visemi || d.frsemi) AllocAli(2*Lw, &ali);

  /* Allocate the Models and read the input files 
   *
   * eqrna.c
   *
   *
   *                            |   function: EvolConstructModels_phase1()
   *                            |                                      changefreq = FALSE*
   *           IF (changefreq)  |
   *                            |
   *                            |   function: EvolConstructModels_phase2()
   *                            |                                      changefreq = TRUE
   *
   *                                * we need to wait until we are given the sequence to know the background basecomposition
   *
   *
   *                            |   function: EvolConstructModels_phase1()
   *                            |                                      changefreq = FALSE
   *           ELSE             |
   *      set:  targetfreq=0.0  |
   *                            |   function: EvolConstructModels_phase2()
   *                            |                                      changefreq = TRUE [change background freqs to the 
   *                            |                                                         single nt marginalizations of P(c1,c2|t).
   *                            |                                                         This is set in analyze_2_sequences().]
   *
   *
   *
   */
  EvolConstructModels_phase1(ofp, codonfile, hexapfile, pamfile, cfgfile, ribofile, 
			     targetfreq, &emodel, &cfg_node, &hexa, &codon_joint, 
			     &pam_star, &codprob_star, &mutpxy_star, &mut5pxy_star, &riboprob_star, &riboprobapprox_star, 
			     &pair5prob_star, 
			     &mutpxy_rna_unpaired_star, &mut5pxy_rna_unpaired_star, 
			     &mut5pxy_rna_loop_star, 
			     &mutpxy_rna_paired_star, &mut5pxy_rna_paired_star, 
			     add_codon, add_hexamer, approx, modelsareon, FALSE, changefreqoverall, logodds, pedantic, verbose);
  
  /* Allocate for DP
   */
  /* SCFG part of the RNA model 
   */
  mx = AllocScfgRNA(2*Lw, fastintloop, rnass);
  if (scan) mxscanfast = AllocScfgScanFastRNA(2*Lw, fastintloop);

  /* HMM-COMPONENT of 3 models 
   */
  if (d.global || d.frdiag || d.vidiag || d.frsemi || d.visemi) {
    dpd = AllocDpDiagMtx(2*Lw);  
    if (scan) dpdscanfast = AllocDpDiagScanFastMtx(2*Lw); 
  }

  if (d.frsemi || d.visemi) dpf = AllocDpSemiMtx(2*Lw); 
  if (d.frfull || d.vifull) dpf = AllocDpFullMtx(Lw, Lw); 
  
  /* Allocate scores
   */
  sc = AllocScores();

  if (d.frdiag || d.frsemi) sc->ardiag = AllocDpDiagScores(2*Lw);
  if (d.frfull || d.frsemi) sc->arfull = AllocDpScores(2*Lw);
 
  /* Allocate structures for scores and 'ends' in the scanfast version
   */
  if (scan) scanfast = AllocScanFast(Lmax, Lw, slide);

  if (ran) {
    if (seqfile_b == NULL) Die ("this ran version requires two files");
    while (ReadGapSeq(sqfp_b, format, &seqY, &sqinfoY)) num_seqs++;
    FreeSequence(seqY, &sqinfoY);  
  }
  else if (sweep) {
    if (seqfile_b == NULL) Die ("this motif-scanning version requires two files");
  }
  else {
    while (seqfile_b != NULL && num++ < skip) {
      ReadGapSeq(sqfp_b, format, &seqY, &sqinfoY);
      FreeSequence(seqY, &sqinfoY);  
    }
  }
  
  if (evd) AllocHistogram (N, k, &histo_sc);

  /* Read seqX (sequence may have gaps)
   */
  while (ReadGapSeq(sqfp, format, &seqX, &sqinfoX))
    {
      nXseq++;
      /* Read seqY (sequence may have gaps)
       */
      if (sweep || alltoall) 
	{
	  while (ReadGapSeq(sqfp_b, format, &seqY, &sqinfoY)) { 
	    nYseq++;
	    num_ali++;
	    analyze_2_sequences(ofp, printqfp, printqfile, regressfp, regressionfile, format, num_ali,
				sqinfoX, seqX, isegX, iseqX, ctX, 
				sqinfoY, seqY, isegY, iseqY, ctY, 
				Lmax, Lw, maxlenhit, minlenhit, window, slide, 
				targetfreq, cfg_node, hexa, codon_joint, 
				pam_star, codprob_star, mutpxy_star, mut5pxy_star, riboprob_star, riboprobapprox_star, 
				pair5prob_star, mutpxy_rna_unpaired_star, mut5pxy_rna_unpaired_star, 			     
				mut5pxy_rna_loop_star, 
				mutpxy_rna_paired_star, mut5pxy_rna_paired_star,
				d, emodel, dpd, dpdscan, dpdscanfast, dpf, mx, mxscan, mxscanfast, sc, ali, scanfast,
				add_codon, add_hexamer, allow_pseudoknots, alignment, approx, 
				cyk, changefreq, changefreqoverall, changefreqwin, fastintloop, logodds, n_shuffles,
				oldrna, pedantic, shuffle, sre_shuffle, con_shuffle, 
				0, sweep, &tfactor, traceback, verbose, ones, parse, rnass, 
				doends, scan, shtoo, twindow, evd, N, k, shift, histo_sc);
	    if (sweep) {
	      RevCompChar(revX, seqX, sqinfoX.len);
	      analyze_2_sequences(ofp, printqfp, printqfile, regressfp, regressionfile, format, num_ali,
				  sqinfoX, revX, isegX, iseqX, ctX, 
				  sqinfoY, seqY, isegY, iseqY, ctY, 
				  Lmax, Lw, maxlenhit, minlenhit, window, slide,
				  targetfreq, cfg_node, hexa, codon_joint, 
				  pam_star, codprob_star, mutpxy_star, mut5pxy_star, riboprob_star, riboprobapprox_star, 
				  pair5prob_star, mutpxy_rna_unpaired_star, mut5pxy_rna_unpaired_star, 
				  mut5pxy_rna_loop_star, 
				  mutpxy_rna_paired_star, mut5pxy_rna_paired_star,
				  d, emodel, dpd, dpdscan, dpdscanfast, dpf, mx, mxscan, mxscanfast, sc, ali, scanfast,
				  add_codon, add_hexamer, allow_pseudoknots, alignment, approx, 
				  cyk, changefreq, changefreqoverall, changefreqwin, fastintloop, logodds, n_shuffles,
				  oldrna, pedantic, shuffle, sre_shuffle, con_shuffle,
				  1, sweep, &tfactor, traceback, verbose, ones, parse, rnass, 
				  doends, scan, shtoo, twindow, evd, N, k, shift, histo_sc);
	    }
	    FreeSequence(seqY, &sqinfoY); 
	  }
	  /* reopen the sqfp_b file to go over it with the next motif
	   */
	  SeqfileClose(sqfp_b);
	  nYseq = 0;
	  if ((sqfp_b = SeqfileOpen(seqfile_b, format, NULL)) == NULL)
	    Die("Failed to open sequence file %s", seqfile_b);
	  if (seqfile_b == seqfile)     
	    while (nYseq++ < nXseq) {
	      ReadGapSeq(sqfp_b, format, &seqY, &sqinfoY);
	      FreeSequence(seqY, &sqinfoY); 
	    }
	}
      else if (ran) {
	/* reopen the sqfp_b file to go over it with the next motif
	 */
	ranc = 0;
	s_new = (int)(sre_random()*num_seqs);

	while (ranc < rani) {

	  ranc ++;
	  s = s_new;

	  SeqfileClose(sqfp_b);
	  if ((sqfp_b = SeqfileOpen(seqfile_b, format, NULL)) == NULL)
	    Die("Failed to open sequence file %s", seqfile_b);
	  
	  
	  num = 0;
	  while (seqfile_b != NULL && num++ < s) {
	    ReadGapSeq(sqfp_b, format, &seqY, &sqinfoY);
	    FreeSequence(seqY, &sqinfoY);  
	  }
	  ReadGapSeq(sqfp_b, format, &seqY, &sqinfoY);
	  
	  analyze_2_sequences(ofp, printqfp, printqfile, regressfp, regressionfile, format, ++nYseq,
			      sqinfoX, seqX, isegX, iseqX, ctX, 
			      sqinfoY, seqY, isegY, iseqY, ctY, 
			      Lmax, Lw, maxlenhit, minlenhit, window, slide,
			      targetfreq, cfg_node, hexa, codon_joint, 
			      pam_star, codprob_star, mutpxy_star, mut5pxy_star, riboprob_star, riboprobapprox_star, 
			      pair5prob_star, mutpxy_rna_unpaired_star, mut5pxy_rna_unpaired_star, 				
			      mut5pxy_rna_loop_star, 
			      mutpxy_rna_paired_star, mut5pxy_rna_paired_star, 
			      d, emodel, dpd, dpdscan, dpdscanfast, dpf, mx, mxscan, mxscanfast, sc, ali, scanfast,
			      add_codon, add_hexamer, allow_pseudoknots, alignment, approx, 
			      cyk, changefreq, changefreqoverall, changefreqwin, fastintloop, logodds, n_shuffles,
			      oldrna, pedantic, shuffle, sre_shuffle, con_shuffle,
			      0, sweep, &tfactor, traceback, verbose, ones, parse, rnass, 
			      doends, scan, shtoo, twindow, evd, N, k, shift, histo_sc);
	  
	  FreeSequence(seqY, &sqinfoY); 
	  
	  while (s_new == s) 
	    s_new = (int)(sre_random()*num_seqs);
	  
	}
	
      }
      else if (!sweep && ReadGapSeq((seqfile_b == NULL)? sqfp:sqfp_b, format, &seqY, &sqinfoY)) 
	{ 
	  nYseq++;
	  
	  if (nXseq != nYseq) 
	    Die("seqs %c and %c are not in sync", sqinfoX.name, sqinfoY.name);
	  
	  analyze_2_sequences(ofp, printqfp, printqfile, regressfp, regressionfile, format, nYseq,
			      sqinfoX, seqX, isegX, iseqX, ctX, 
			      sqinfoY, seqY, isegY, iseqY, ctY, 
			      Lmax, Lw, maxlenhit, minlenhit, window, slide,
			      targetfreq, cfg_node, hexa, codon_joint,
			      pam_star, codprob_star, mutpxy_star, mut5pxy_star, riboprob_star, riboprobapprox_star, 
			      pair5prob_star, mutpxy_rna_unpaired_star, mut5pxy_rna_unpaired_star, 				
			      mut5pxy_rna_loop_star, 
			      mutpxy_rna_paired_star, mut5pxy_rna_paired_star,
			      d, emodel, dpd, dpdscan, dpdscanfast, dpf, mx, mxscan, mxscanfast, sc, ali, scanfast,
			      add_codon, add_hexamer, allow_pseudoknots, alignment, approx, 
			      cyk, changefreq, changefreqoverall, changefreqwin, fastintloop, logodds, n_shuffles,
			      oldrna, pedantic, shuffle, sre_shuffle, con_shuffle, 
			      0, sweep, &tfactor, traceback, verbose, ones, parse, rnass, 
			      doends, scan, shtoo, twindow, evd, N, k, shift, histo_sc);
	  
	  FreeSequence(seqY, &sqinfoY);  
	}
      FreeSequence(seqX, &sqinfoX);    

    }

  if (evd) FitHistoEVD (stdout, censor_cutoff, N, k, shift, histo_sc);

  if (regressionfile != NULL) printf("done. [%s]\n", regressionfile);
  
  /* Cleanup */
  if (outfile        != NULL) fclose(ofp); 
  if (printqfile     != NULL) fclose(printqfp);
  if (regressionfile != NULL) fclose(regressfp);
  if (seqfile        != NULL) SeqfileClose(sqfp); 
  if (seqfile_b      != NULL) SeqfileClose(sqfp_b); 
  
  if (sweep) { free(revX); }
  
  free(isegX); 
  free(isegY); 
  free(iseqX); 
  free(iseqY);  
  
  free(codon_joint);
  FreeSubsProbs(pam_star);
  FreeSubsProbs(codprob_star);
  FreeSubsProbs(mutpxy_star);
  FreeSubsProbs(mut5pxy_star);
  FreeSubsProbs(riboprob_star);
  if (approx) FreeSubsProbs(riboprobapprox_star);
  FreeNonSubsProbs(pair5prob_star);
  FreeSubsProbs(mutpxy_rna_unpaired_star);
  FreeSubsProbs(mut5pxy_rna_unpaired_star);
  FreeSubsProbs(mut5pxy_rna_loop_star);
  FreeSubsProbs(mutpxy_rna_paired_star);
  FreeSubsProbs(mut5pxy_rna_paired_star);
  FreeSCFGNode(cfg_node);
  if (add_hexamer) free(hexa);
  free(targetfreq);

  FreeScfgRNA(mx, rnass); 
  if (scan) FreeScfgScanFastRNA(mxscanfast); 
  
  if (d.vidiag || d.vifull) FreeAli(ali); 
  
  if (d.global || d.frdiag || d.vidiag || d.frsemi || d.visemi) {
    FreeDpDiagMtx(dpd);  
    if (scan)  FreeDpDiagScanFastMtx(dpdscanfast);  
  }
  if (d.frsemi || d.visemi) FreeDpSemiMtx(dpf); 
  if (d.frfull || d.vifull) FreeDpFullMtx(dpf); 
  
  if (d.frdiag || d.frsemi) FreeDpScores(sc->ardiag); 
  if (d.frfull || d.frsemi) FreeDpScores(sc->arfull);

  if (scan) FreeScanFast(scanfast, Lmax, Lw, slide);

  EvolFreeModels(emodel); 
  FreeScores(sc);

  if (evd) free(histo_sc);

   return EXIT_SUCCESS;
}


void
analyze_2_sequences (FILE *ofp, 
		     FILE *printqfp,  char *printqfile, 
		     FILE *regressfp, char *regressionfile, 
		     int format, int num_align,
		     SQINFO sqinfoX, char *seqX, int *isegX, int *iseqX, int *ctX,
		     SQINFO sqinfoY, char *seqY, int *isegY, int *iseqY, int *ctY,
		     long int Lmax, int Lw, long int maxlenhit, int minlenhit,
		     int window, int slide,		
		     double   *targetfreq,
		     double ***cfg_node,
		     double  **hexa,
		     double   *codon_joint,
		     struct psubs_s           *pam_star,
		     struct psubs_s           *codprob_star,
		     struct psubs_s           *mutpxy_star,
		     struct psubs_s           *mut5pxy_star,
		     struct psubs_s           *riboprob_star,
		     struct psubs_s           *riboprobapprox_star,
		     struct pnonsubs_s        *pair5prob_star,
		     struct psubs_s           *mutpxy_rna_unpaired_star,
		     struct psubs_s           *mut5pxy_rna_unpaired_star,
		     struct psubs_s           *mut5pxy_rna_loop_star,
		     struct psubs_s           *mutpxy_rna_paired_star,
		     struct psubs_s           *mut5pxy_rna_paired_star,
		     struct dos_s              d, 
		     struct emodel_s          *emodel, 
		     struct dpd_s             *dpd, 
		     struct dpdscan_s         *dpdscan, 
		     struct dpdscanfast_s     *dpdscanfast, 
		     struct dpf_s             *dpf, 
		     struct rnascfg_s         *mx, 
		     struct rnascfgscan_s     *mxscan, 
		     struct rnascfgscanfast_s *mxscanfast, 
		     struct scores_s          *sc, 
		     struct ali_s             *ali,
		     struct scanfast_s        *scanfast,
		     int add_codon, int add_hexamer, int allow_pseudoknots, int alignment, int use_ribo_approx, 
		     int cyk, int changefreq, int changefreqoverall, int changefreqwin,
		     int fastintloop, int logodds, int n_shuffles, int oldrna, int pedantic, 
		     int shuffle, int sre_shuffle, int con_shuffle, int strand, int sweep, 
		     double *tfactor, int traceback, int verbose, int ones, int parse, int rnass, int doends, int scan, int shtoo, 
		     int twindow, int evd, int N, double k, int shift, int *histo_sc)
{
  struct psubs_s            *riboprob;
  struct three_divergence_s  rnadiv;
  char                      *aliss;
  int                        i;
  int                        win;
  int                        shft;
  int                        leg;
  double                     id;
  double                     mut;
  double                     gap;
  double                     id_win_mean;
  double                     id_win_sd;
  struct three_times_s       time;
  double                    *freqX;  /* base composition of fragment X */
  double                    *freqY;  /* base composition of fragment Y */
  int                        shuffles = 0;
  int                        idx;
  int                        is_fix_time = FALSE;
 
  if (minlenhit >= maxlenhit) Die ("You want alignments with positive length, right?\n");
  if ((sqinfoX.len > maxlenhit || sqinfoY.len > maxlenhit)) {
    Warn("Too long ++ skip hit # %d\n>%s (%d)\n>%s (%d)\n", num_align, sqinfoX.name, sqinfoX.len, 
	 sqinfoY.name, sqinfoY.len);
    return;
  }
  if ((sqinfoX.len < minlenhit || sqinfoY.len < minlenhit)) {
    Warn("Too short ++ skip hit # %d\n>%s (%d)\n>%s (%d)\n", num_align, sqinfoX.name, sqinfoX.len, 
	 sqinfoY.name, sqinfoY.len);
    return;
  }

  /* Print alignment banner
   */
  PrintHitBanner(ofp, regressfp, regressionfile, sqinfoX, sqinfoY, num_align, ones, shuffle, sre_shuffle, con_shuffle, strand);

  if (sweep) window = sqinfoX.len;

  /* Assign the ss with gaps to char array aliss.
   *
   *   sqinfoX.ss and sqinfoY.ss are identical, and the one given by the multiple aligment.
   *   Functions: AdjustAliSS() and RemoveJointGaps() will possibly modify them.
   *
   *  When we start a new comparison we reread the ss fresh from sqinfoY.ss,
   *  we copy it into aliss, and then modify it to remove common gaps of the two sequences compared,
   *  and to reflect the secondary structure shared by both sequences.
   */
  if ((format == kSquid || format == kSelex) && (sqinfoX.flags & SQINFO_SS)) 
    {
      aliss = sqinfoY.ss; 
      sqinfoX.ss = aliss;
    }

  if (parse && format != kSquid && format != kSelex) 
    Die ("With the '--parse' option, you must give a selex file with a secondary structure\n");
  
  /* Create arrays with given aligment:
   *    isegX[sqinfoX->len]         
   *    isegY[sqinfoY->len]    (int seqs with gaps) 
   */
  FormatSeqs(ofp, Lmax, format, seqX, seqY, &sqinfoX, &sqinfoY, &ctX, &ctY, isegX, isegY, 
	     allow_pseudoknots, verbose);
  
  /* In view of ctX and ctY modify aliss to include only the base pairs that are
   * supported by both sequences simultaneously.
   */
  if ((format == kSquid || format == kSelex) && (sqinfoX.flags & SQINFO_SS)) 
    AdjustAliSS(ofp, sqinfoX, aliss, ctX, ctY);
 
  /* (0) leg = minimum length of aligment 
   *
   * Function RemoveJointGaps() can be passed to ScoreWithModels() in "scorewithmodels.c" if 
   * one wants to remove the common gaps of the two alignments window by window.
   * The advantage of that is that you keep a common coordinate sistem,
   * the disadvantage is that the "actual" window used may be smaller than
   * the given one.
   *
   * RemoveJointGaps() modifies both sqinfoX.ss and sqinfoY.ss, as well as isegX and isegY
   */
  RemoveJointGaps(sqinfoX, isegX, sqinfoY, isegY, &leg, sweep, aliss);

  if (verbose) 
    if ((format == kSquid || format == kSelex) && (sqinfoX.flags & SQINFO_SS)) 
      PrintCtSeqs(ofp, &sqinfoX, isegX, &sqinfoY, isegY, 0, leg, aliss);
 
  if (window != -1) {
    fprintf(ofp, "length of whole alignment after removing common gaps: %d \n", leg); 
    if (regressionfile != NULL)  
      fprintf(regressfp, "length of whole alignment after removing common gaps: %d \n", leg); 
  }

  if (window == -1) { win = leg;    shft = leg;   }
  else              { win = window; shft = slide; }
  
 /* Calculate nucleotide freqs for the whole alignment
   */
  freqX = (double *) MallocOrDie (sizeof(double) * 4);
  freqY = (double *) MallocOrDie (sizeof(double) * 4);
  for (i = 0; i < 4; i++)         /* initialization         */
    {
      freqX[i] = 0.0;   
      freqY[i] = 0.0;   
    }
  
  if (changefreq) {
    /* calculate single-nt frequencies for whole alignment
     */
    BaseComp(ofp, isegX, leg-1, leg-1, freqX);    /* freqs for seqX         */
    BaseComp(ofp, isegY, leg-1, leg-1, freqY);    /* freqs for seqY         */
  }
  else if (changefreqoverall) {
    CopyVector(freqX, targetfreq, 4);
    CopyVector(freqY, targetfreq, 4);
  }

  /* even if we are not asked to change the background single nucleotide
   * frequencies we are going to adjust them to the 
   * corresponding marginals of P(c1,c2|t)
   */
  changefreq = TRUE;

  /* Percentage ID, GAP and MUT of the aligment */
  PercIdSeqs(isegX, isegY, leg-1, leg-1, &id, &gap, &mut);
  AliIdStats(isegX, isegY, leg-1, leg-1, win, shft, &id_win_mean, &id_win_sd);
  if (verbose) printf("ALI id %f id_win_mean %f id_win_st %f\n", id, id_win_mean, id_win_sd);

  if (use_ribo_approx) rnadiv = rnadiv_approx;
  else                 rnadiv = rnadiv_ribosum;

  /* Calculate the divergence time according to the id of the aligment, if not given.
   */
  if (!twindow) {
    if (*tfactor == -1.0) { /* no "-i" option. Calculate the divergence time from the id of the aligment */
      time = TimeIdCorrelation3Models(othdiv, coddiv, rnadiv, id); 
      fprintf(ofp, "Divergence time (variable): %.6f %.6f %.6f\n", time.oth, time.cod, time.rna); 
      if (regressionfile != NULL) 
	fprintf(regressfp, "Divergence time (variable): %.3f %.3f %.3f ", time.oth, time.cod, time.rna); 
    }
    else { /* fixed divergence time given with the option "-i" */
      if (id < 100.0 && *tfactor < TMIN) *tfactor = TMIN;

      is_fix_time = TRUE;

      time.oth = *tfactor;
      time.cod = *tfactor;
      time.rna = *tfactor;
      fprintf(ofp, "Divergence time (fixed): %.6f %.6f %.6f\n", time.oth, time.cod, time.rna); 
      if (regressionfile != NULL) 
	fprintf(regressfp, "Divergence time (fixed): %.6f %.6f %.6f ", time.oth, time.cod, time.rna); 
    }
  }
  
  fprintf(ofp, "[alignment ID = %.2f MUT = %.2f GAP = %.2f]\n\n", id, mut, gap); 
  if (regressionfile != NULL) 
    fprintf(regressfp, "[alignment ID = %.2f MUT = %.2f GAP = %.2f]\n", id, mut, gap); 
  
  if ((shuffle || sre_shuffle || con_shuffle) && evd) idx = n_shuffles;
  else                                                idx = 1;
  
  if (use_ribo_approx) riboprob = riboprobapprox_star;
  else                 riboprob = riboprob_star;

  while (shuffles < idx) {
    if (scan) 
      EvolScoreScanFast(ofp, printqfp, printqfile, regressfp, regressionfile, format, sqinfoX, isegX, iseqX, freqX, sqinfoY, isegY, iseqY, freqY, aliss,
			Lw, leg, win, shft, id, id_win_mean, id_win_sd,
			cfg_node, hexa, codon_joint,  
			pam_star, codprob_star, mutpxy_star, mut5pxy_star, riboprob, pair5prob_star, 
			mutpxy_rna_unpaired_star, mut5pxy_rna_unpaired_star, 
			mut5pxy_rna_loop_star, 
			mutpxy_rna_paired_star, mut5pxy_rna_paired_star, 
			d, emodel, dpdscanfast, dpd, dpf, mxscanfast, mx, sc, ali, scanfast,
			add_codon, add_hexamer, alignment, use_ribo_approx, cyk, changefreq, changefreqoverall, 
			evd, fastintloop, is_fix_time, logodds, oldrna, pedantic, shuffle, sre_shuffle, con_shuffle, 
			time, traceback, verbose, ones, parse, rnass, doends,
			N, k, shft, histo_sc);
    else
      EvolScoreWindow(ofp, printqfp, printqfile, regressfp, regressionfile, format, sqinfoX, isegX, iseqX, freqX, sqinfoY, isegY, iseqY, freqY, aliss,
		      Lw, leg, win, shft, id, id_win_mean, id_win_sd,
		      cfg_node, hexa, codon_joint, 
		      pam_star, codprob_star, mutpxy_star, mut5pxy_star, riboprob, pair5prob_star, 
		      mutpxy_rna_unpaired_star, mut5pxy_rna_unpaired_star, 
		      mut5pxy_rna_loop_star, 
		      mutpxy_rna_paired_star, mut5pxy_rna_paired_star,
		      d, emodel, dpd, dpf, mx, sc, ali, 
		      add_codon, add_hexamer, alignment, use_ribo_approx, cyk, changefreq, changefreqoverall, changefreqwin, 
		      evd, fastintloop, is_fix_time, logodds, oldrna, pedantic, shuffle, sre_shuffle, con_shuffle, sweep, 
		      time, traceback, verbose, ones, parse, rnass, doends, shtoo, twindow,
		      N, k, shft, histo_sc);
    
    shuffles ++; 
  }
  
  free(freqX); 
  free(freqY); 
  if (sqinfoY.flags & SQINFO_SS) free(ctY); 
  if (sqinfoX.flags & SQINFO_SS) free(ctX);
}

