#!/usr/bin/perl
#
# Program: nanotools_split_fasta
# Purpose: Split FASTA file into separate files for each read
# Author:  Richard Leggett
# Contact: richard.leggett@tgac.ac.uk

use strict;
use warnings;
use Getopt::Long;

my $input_file;
my $output_dir;
my $help_requested;
my %ids;
my $count = 0;

&GetOptions(
'i|input:s'       => \$input_file,
'o|outputdir:s' => \$output_dir,
'h|help'        => \$help_requested
);

if (defined $help_requested) {
    print "\nnanotools_split_fasta\n\n";
    print "Split a multi-read FASTA into separate files.\n\n";
    print "Usage: nanotools_split_fasta.pl <-i input> [-o output_dir]\n\n";
    print "Options:\n";
    print "    -i | -input      Input FASTA file\n";
    print "    -o | -outputdir  Output directory\n";
    print "\n";
    
    exit;
}

die "You must specify an input file\n" if not defined $input_file;
die "You must specify an output directory\n" if not defined $output_dir;

my $fh;

local $| = 1;

open(INPUTFILE, $input_file) or die "Can't open input ".$input_file."\n";

while(<INPUTFILE>) {
    my $line = $_;
    
    if ($line =~ /^>(\S+)/) {
        my $id = $1;
        
        if (not defined $ids{$id}) {
            $ids{$id} = 1;
            
            if (defined $fh) {
                close($fh);
            }
            
            my $out_filename = $output_dir."/".$id.".fasta";
            $count++;
            #print "Writing $out_filename\n";
            
            if (($count % 10) == 0) {
                print "\r$count";
            }
            
            open($fh, ">".$out_filename) or die "Can't open output ".$out_filename."\n";
        } else {
            print "WARNING: Repeat ID $id\n";
        }
    }

    if (defined $fh) {
        print $fh $line;
    } else {
        print "Eeek\n";
    }
}

if (defined $fh) {
    close($fh);
}

close(INPUTFILE);
