/********************************************************************************************************
 * QRNA - Comparative analysis of biological sequences 
 *         with pair hidden Markov models, pair stochastic context-free
 *        grammars, and probabilistic evolutionary  models.
 *       
 * Version 2.0.0 (JUN 2003)
 *
 * Copyright (C) 2000-2003 Howard Hughes Medical Institute/Washington University School of Medicine
 * All Rights Reserved
 * 
 *     This source code is distributed under the terms of the
 *     GNU General Public License. See the files COPYING and LICENSE
 *     for details.
 ***********************************************************************************************************/

/* rnaio.c
 *
 * E. Rivas [St. Louis]
 * 
 * 9 april 1999.
 *
 *  I/O for RNA secondary structure
 * 
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>

#include "funcs.h"
#include "globals.h"
#include "squid.h"
#include "structs.h"

#ifdef MEMDEBUG
#include "dbmalloc.h"
#endif



int
IsCompensatory (int xl, int yl, int xr, int yr)
{
  int itis = FALSE;
  
  if ( xl < 4 && xr < 4 && yl < 4 && yr < 4 &&
       (xl != yl || xr != yr)     && 
       (xl+xr == 3 || xl+xr == 5) &&  
       (yl+yr == 3 || yl+yr == 5)              )
    itis = TRUE;
  
  return itis;
}

int
IsNonCompensatory (int xl, int yl, int xr, int yr)
{
  int itis = FALSE;
  

  if (xl < 4 && xr < 4 && yl < 4 && yr < 4 &&
      !IsCompensatory (xl, yl, xr, yr)    && 
      !IsWcPair (xl, yl, xr, yr)           ) 
    itis = TRUE;
  
  return itis;
}

int
IsWcPair (int xl, int yl, int xr, int yr)
{
  int itis = FALSE;
  
  if ( xl < 4 && xr < 4 && yl < 4 && yr < 4 &&
       xl == yl && xr == yr &&  
       (xl+xr == 3 || xl+xr == 5)             )
    itis = TRUE;
  
  return itis;
}

/* Function: KHS2ct()
 * 
 * Purpose:  Convert a secondary structure string to an array of integers
 *           representing what position each position is base-paired 
 *           to (0..len-1), or -1 if none. This is off-by-one from a
 *           Zuker .ct file representation.
 *           
 *           The .ct representation can accomodate pseudoknots but the 
 *           secondary structure string cannot easily; the string contains
 *           "Aa", "Bb", etc. pairs as a limited representation of
 *           pseudoknots. The string contains "><" for base pairs.
 *           Other symbols are ignored. If allow_pseudoknots is FALSE,
 *           the pseudoknot symbols will be ignored and these positions
 *           will be treated as single stranded.
 *           
 * Return:   ret_ct is allocated here and must be free'd by caller.
 *           Returns 1 on success, 0 if ss is somehow inconsistent.
 */
int 
KHS2ct(char *ss, int len, int allow_pseudoknots, int **ret_ct)
{
  struct intstack_s *dolist[27];
  int *ct;
  int  i;
  int  pos, pair;
  int  status = 1;              /* success or failure return status */

  for (i = 0; i < 27; i++)
    dolist[i] = InitIntStack();

  if ((ct = (int *) malloc (len * sizeof(int))) == NULL)
    Die("malloc failed");
  for (pos = 0; pos < len; pos++)
    ct[pos] = pos;

  for (pos = 0; pos < len; pos++)
    {
      if (ss[pos] > 127) status = 0; /* bulletproof against SGI buggy ctype.h */

      else if (ss[pos] == '>')  /* left side of a pair: push onto stack 0 */
        PushIntStack(dolist[0], pos);

      else if (ss[pos] == '<')  /* right side of a pair; resolve pair */
        {
          if (! PopIntStack(dolist[0], &pair))
            { status = 0; }
          else
            {
              ct[pos]  = pair;
              ct[pair] = pos;
            }
        }
                                /* same stuff for pseudoknots */
      else if (allow_pseudoknots && isupper((int) ss[pos]))
        PushIntStack(dolist[ss[pos] - 'A' + 1], pos);

      else if (allow_pseudoknots && islower((int) ss[pos]))
        {
          if (! PopIntStack(dolist[ss[pos] - 'a' + 1], &pair))
            { status = 0; }
          else
            {
              ct[pos]  = pair;
              ct[pair] = pos;
            }
        }

      else if (allow_pseudoknots && !isgap(ss[pos])) status = 0; /* bad character */
    }

  for (i = 0; i < 27; i++)
    if ( FreeIntStack(dolist[i]) > 0)
     { status = 0; }

  *ret_ct = ct;
  return status;
}

/* Function: KHS2ctInfernal()
 * 
 * ER, Wed Dec  4 10:14:39 CST 2002 [STL]
 *
 * Purpose:  Convert a secondary structure string to an array of integers
 *           representing what position each position is base-paired 
 *           to (0..len-1), or -1 if none. This is off-by-one from a
 *           Zuker .ct file representation.
 *           
 *           The .ct representation can accomodate pseudoknots but the 
 *           secondary structure string cannot easily; the string contains
 *           "Aa", "Bb", etc. pairs as a limited representation of
 *           pseudoknots. The string contains "<>" for base pairs.
 *         
 *           Other symbols according to the Infernal/Rfam convention
 *           will be incorporated.
 *
 *           If allow_pseudoknots is FALSE,
 *           the pseudoknot symbols will be ignored and these positions
 *           will be treated as single stranded.
 *           
 * Return:   ret_ct is allocated here and must be free'd by caller.
 *           Returns 1 on success, 0 if ss is somehow inconsistent.
 */
int 
KHS2ctInfernal(char *ss, int len, int allow_pseudoknots, int **ret_ct)
{
  struct intstack_s *dolist[27];
  int *ct;
  int  i;
  int  pos, pair;
  int  status = 1;              /* success or failure return status */

  for (i = 0; i < 27; i++)
    dolist[i] = InitIntStack();

  if ((ct = (int *) malloc (len * sizeof(int))) == NULL)
    Die("malloc failed");
  for (pos = 0; pos < len; pos++)
    ct[pos] = pos;

  for (pos = 0; pos < len; pos++)
    {
      if (ss[pos] > 127) status = 0; /* bulletproof against SGI buggy ctype.h */

      else if (ss[pos] == '<')  /* left side of a pair: push onto stack 0 */
        PushIntStack(dolist[0], pos);

      else if (ss[pos] == '>')  /* right side of a pair; resolve pair */
        {
          if (! PopIntStack(dolist[0], &pair))
            { status = 0; }
          else
            {
              ct[pos]  = pair;
              ct[pair] = pos;
            }
        }
                                /* same stuff for pseudoknots */
      else if (allow_pseudoknots && isupper((int) ss[pos]))
        PushIntStack(dolist[ss[pos] - 'A' + 1], pos);

      else if (allow_pseudoknots && islower((int) ss[pos]))
        {
          if (! PopIntStack(dolist[ss[pos] - 'a' + 1], &pair))
            { status = 0; }
          else
            {
              ct[pos]  = pair;
              ct[pair] = pos;
            }
        }

      else if (allow_pseudoknots && !isgap(ss[pos])) status = 0; /* bad character */
    }

  for (i = 0; i < 27; i++)
    if ( FreeIntStack(dolist[i]) > 0)
     { status = 0; }

  *ret_ct = ct;
  return status;
}


/* Function: RevSS()
 * 
 * ER, Sat Nov 10 13:38:50 CST 2001 [STL]
 * 
 * 
 * 
 */
char *
RevSS(char *ss, int L)
{
  int   pos;
  char *revss;
  
  revss = (char *) MallocOrDie(sizeof(char) * L);

  for (pos = 0; pos < L; pos++) revss[pos] = '.';

  for (pos = 0; pos < L; pos++)
    {
      if (ss[L-1-pos] == '>') revss[pos] = '<';
      if (ss[L-1-pos] == '<') revss[pos] = '>';
    }

  return revss;
}


/* Function: VerifyKHS()
 * 
 * Purpose:  Examine a possibly bad structure string, and print out diagnostics 
 *           about it if wordy is TRUE.
 *
 * Return:   1 if string is OK, 0 if string is bad.
 */
int
VerifyKHS(char *name, char *ss, int L, int wordy)
{
  int symcount[27];             /* 0 is normal pairs. 1-26 for pseudoknots */
  int i;
  int pos;
  int status = 1;

  for (i = 0; i < 27; i++)
    symcount[i] = 0;

  for (pos = 0; pos < L; pos++)
    {
      if (!sre_isascii(ss[pos]))        /* SGI ctype.h non-ANSI compliant */
        {
          status = 0;
          if (wordy)
            Die("VerifyKHS: Sequence %s no good. structure has garbage symbol (val %d) at position %d", 
		 name, (int)ss[pos], pos);
        }
      else if (ss[pos] == '>')
        symcount[0] ++;
      else if (ss[pos] == '<')
        symcount[0] --;
      else if (isupper((int) ss[pos]))
        symcount[ss[pos] - 'A' + 1] ++; /* pseudoknot-left  */
      else if (islower((int) ss[pos])) 
        symcount[ss[pos] - 'a' + 1] --; /* pseudoknot-right */
      else if (!isgap(ss[pos]))
        {
          status = 0;
          if (wordy)
            Die("VerifyKHS: Sequence %s no good, structure has invalid symbol %c at position %d", 
		 name, (int)ss[pos], pos);
        }
    }
      
  if (symcount[0] != 0)
    {
      status = 0; 
      if (wordy)
        Die("VerifyKHS: Sequence %s no good, structure has extra paired bases: %d on %s",
             name, abs(symcount[0]), 
             (symcount[0] > 0) ? "left" : "right");
    }

  for (i = 1; i <= 26; i++)
    if (symcount[i] != 0)
      {
        status = 0;
        if (wordy)
          Die("VerifyKHS: Sequence %s no good, structure has extra paired bases for pseudoknot %c: %d on %s",
               name, (char) (i + 'A' - 1),
               abs(symcount[i]), 
               (symcount[i] > 0) ? "left" : "right");
      }
  return status;
}

