/*
 * $Log: blastDeposit.cc,v $
 * Revision 2.1  2001/01/17 22:52:35  dmaziuk
 * Fixed a bug where wrong sequence length was deposited into starfile.
 *
 * Revision 2.0  2001/01/11 23:44:43  dmaziuk
 * Version 2 of blast contains complete rewrite of blastParser and major rewrite
 * of (almost) everything else.
 *
// Revision 1.16  2000/10/25  21:04:37  yleigh
// Correct the PDB mol name by double quotes. Also replace the "?" for full
// list PDB mol name by real mol name.
//
// Revision 1.15  2000/10/18  21:49:45  yleigh
// modify blastParser.cc to deal with new HTML tag in blast result, multiple
// lines of name corresponding to one sequence. Disable the blastn search for
// DNA or RNA sequences. Certain blast result don't have db_code for sp entry,
// leave the db_code as ?, since they are retrievable by name. Updated some
// new databases and inserted into starfiles according to the selection rules
// set before.
//
// Revision 1.14  2000/04/25  04:25:11  madings
// re-enabled seol's _Sequence_alignment_details code, and put
// it under a command-line flag control (-seqdet), and set up
// two blastscripts - one fore external, and one for internal,
// that work on two different directories - one with the commandline
// flag, one without.  The internal directory has the flag, but is
// not on the public ftp server or website.
//
// Revision 1.13  2000/03/31  19:49:53  seol
// Add _Sequence_subject_length in the loop. Fix bugs with bmr4394.str.
//
// Revision 1.12  2000/03/10  22:06:47  seol
// edit comment.
//
// Revision 1.11  2000/02/25  21:50:58  seol
// handles starfiles with two or more seqnences( the case that one has blast
// output, but another has some kind of error). Frame having sequence with blast
// output will be updated.
//
// Revision 1.10  1999/11/08  20:57:11  seol
// compare new blast result with old one and update it when there is change(
// update _Sequence_homology_query_revised_last_date tag when revised). coded
// for handling option tag.
//
// Revision 1.9  1999/10/19  19:12:02  seol
// use NMRSTAR formatter to add proper comments to the ouput starfile
//
// Revision 1.8  1999/10/13  18:58:32  seol
// add lines to makefile to configure the memory trimming code in starlib.
//
// Revision 1.7  1999/10/08  19:49:51  seol
// deal with 2 or more sequences, reduce the output(remove _Sequence_alignment_
// details), sort output by same database group
//
// Revision 1.6  1999/07/07  19:02:20  seol
// change the loop format from tabulate to linear
//
// Revision 1.5  1999/06/17  19:41:21  seol
// add code for 'blastn'. pull out embeded data from real data. list the
// entries at most 5 except pdb.
//
// Revision 1.4  1999/03/19  16:36:56  seol
// use "SWISS_PROT" instead of "Swiss-Prot", put '?' instead of '.' for
// entry_mol_name of PDB, put "not_included" instead of '.' for seq_detail_
// value that doesn't contain value. And make entry_mol_name shorter.
//
 * Revision 1.2  1999/03/02  21:31:31  seol
 * Changed table values to use DatavalueNode::DOUBLE instead of
 * DataValueNode::NON values.  Also it now makes a new output file
 * instead of overwriting the input.
 *
 * Revision 1.1 1999/02/17  16:04:49  seol
 * query the BLAST site as if it were an actual web brower by opening
 * a socket to the BLAST 2.0 server and speaking the HTTP protocal
 *
 * $Id: blastDeposit.cc,v 2.1 2001/01/17 22:52:35 dmaziuk Exp $
 */


/**********************************************************************************
* The functions in blastDeposit class will insert the relevant data
* that is parsed by the funcitons in blastParser class into starfile.
**********************************************************************************/

#include "blastDeposit.h"

 /* depositMonoPoly() creates a loop inside "Polymer residue sequence" frame
  *
  * save_frame -- pointer to SaveFrameNode
  * data       -- vector of dataEntry objects
  * revise     -- if true, revise the loop
  *
  * return negative value on error, 0 otherwise
  */
int blastDeposit::depositMonoPoly( SaveFrameNode *save_frame,
                                   const dataEntries *data, 
                                   bool revise )
{
    DataLoopNode *newLoop, *loop_ptr;
    DataLoopNameListNode *nameListList;
    LoopTableNode *valTable;
    LoopRowNode *valRow;
    LoopNameListNode *nameList;

    List<ASTnode*> *matches;
    List<DataNode*> *save_inside = save_frame->GiveMyDataList();;

    DataItemNode *query_time, *revise_time;
    time_t tp;
    struct tm *current_tm;
    char current_time[50];
    bool no_existing_loop = false;
    int  matchIdx;

//get current time
    tp = time( NULL );
    current_tm = localtime( &tp );
    strftime( current_time, 50, "%Y-%m-%d", current_tm );

    //this part is to make sure input starfile has both "_Sequence_homology_query_last_
    //revise" and "_Sequence_homology_query_date" tag.
    //---------------------------------------------------------------------------------
// search for the data loop that contains "_Database_entry_mol_name"
    matches = save_frame->searchForTypeByTag( ASTnode::DATALOOPNODE,
                                       string( "_Database_entry_mol_name" ) );
    if( matches->size() == 0 )
        no_existing_loop = true;
    else if( matches->size() > 1 )
    {   
        cout << "***** Error: Save frame contains more than one blast loop\n";
        return -1; 
    }
    else
    {   
        loop_ptr = (DataLoopNode *)( (*matches)[0] );
// Find the match in the original savefreme vector:
        for( matchIdx = 0; matchIdx < save_inside->size(); matchIdx++ )
            if( (*save_inside)[matchIdx] == loop_ptr )
                break;
        delete matches;
    }

// if _Sequence_homology_query_revised_last_date doesn't exist, put that now
// right before the loop_ptr(or at the end of save_frame if loop doesn't exist)
    matches = save_frame->searchForTypeByTag( ASTnode::DATAITEMNODE,
                         string( "_Sequence_homology_query_revised_last_date" ) );
    if( matches->size() == 0 )
    {
        revise_time = new DataItemNode( string("_Sequence_homology_query_revised_last_date"), 
                                        current_time );
        if( no_existing_loop )   //insert at the end of save_frame
            save_inside->insert( save_inside->end(), revise_time );     
        else                     // insert right before loop_ptr
            save_inside->insert( &( (*save_inside)[matchIdx] ), revise_time );
    }
    else if (matches->size() > 1)
    {
        cout << "***** Error: Save frame has more than one " << endl;
        cout << "            _Sequence_homology_query_revised_last_date tag. " << endl;
        return -1;
    }

// now save_frame has one _Sequence_homology_query_revised_last_date tag.
// let revise_tag point that
    matches = save_frame->searchForTypeByTag( ASTnode::DATAITEMNODE,
                                string("_Sequence_homology_query_revised_last_date" ) );
    DataItemNode *revise_tag = (DataItemNode *)( (*matches)[0] );
    delete matches;

// if _Sequence_homology_query_date doesn't exist, put that now 
// right before the revise tag
    matches = save_frame->searchForTypeByTag( ASTnode::DATAITEMNODE,
                                  string("_Sequence_homology_query_date" ) );
    if( matches->size() == 0 )
    {
// Find the match in the original savefreme vector:
        for( matchIdx = 0; matchIdx < save_inside->size(); matchIdx++ )
            if( (*save_inside)[matchIdx] == revise_tag )
                break;

        query_time = new DataItemNode( string( "_Sequence_homology_query_date" ), 
                                       current_time );
        save_inside->insert( &( (*save_inside)[matchIdx] ), query_time );
    }
    else if( matches->size() == 1 )
    {
// if tag exists, update it with current time
        DataItemNode *query_tag = (DataItemNode *)( (*matches)[0] );
        query_tag->setValue( current_time );
    }
    else
    {
        cout << "***** Error: Save frame has more that one " << endl;
        cout << "             _Sequence_homology_query_date tag." << endl;
        return -1;
    }

  //if revise == true, revise blast loop and update "_Sequence_homology_query_
  //last_revise with current time
  //-----------------------------------------------------------------------------------
    if( revise )
    {
//update "_Sequence_homology_query_revised_last_date" with current time
        revise_tag->setValue( current_time );

// remove old data loop
        remove_old_dataloop( save_frame );

// check for new entries
        if( data->size() < 1 ) return 0;
        else
        {
// make the new empty data loop
            newLoop = new DataLoopNode( string( "tabulate" ) );
            nameListList = newLoop->getNamesPtr();
            valTable = newLoop->getValsPtr();

// populate the list of tag names for the new loop
            nameListList->insert( nameListList->end(), new LoopNameListNode() );
            nameList = ( *nameListList )[0];
            nameList->insert( nameList->end(), 
             new DataNameNode( string( "_Database_name" ) ) );
            nameList->insert( nameList->end(), 
             new DataNameNode( string( "_Database_accession_code" ) ) );
            nameList->insert( nameList->end(), 
             new DataNameNode( string( "_Database_entry_mol_name" ) ) );
            nameList->insert(nameList->end(), 
             new DataNameNode( string( "_Sequence_query_to_submitted_percentage" ) ) );
            nameList->insert( nameList->end(), 
             new DataNameNode( string( "_Sequence_subject_length" ) ) );
            nameList->insert( nameList->end(), 
             new DataNameNode( string( "_Sequence_identity" ) ) );
            if( BLASTP )
                nameList->insert( nameList->end(), 
                 new DataNameNode( string( "_Sequence_positive" ) ) );	 
            nameList->insert( nameList->end(), 
             new DataNameNode( string( "_Sequence_homology_expectation_value" ) ) );
            if( bl_config->isSeqDet() )
                nameList->insert( nameList->end(), 
                 new DataNameNode( string( "_Sequence_alignment_details" ) ) );

//populate the list of values for new loop, 
//by making new LoopRowNodes and attaching them to the loop's LoopTableNode
            char temp_str[MaxMolName];

            for( dataEntries::iterator i = data->begin(); i != data->end(); i++ )
            {
// new node
		valRow = new LoopRowNode( true );
// db_name
		valRow->insert( valRow->end(), new DataValueNode( i->getName(),
			       blastConfig::findNeededQuoteStyle( i->getName() ) ) ); 
// db_code
		memset( temp_str, 0, MaxMolName );
		sprintf( temp_str, "%-10s", i->getCode().c_str() );
		valRow->insert( valRow->end(), new DataValueNode( temp_str ) ); //,
			       //blastConfig::findNeededQuoteStyle( trim( temp_str ) ) ) ); 								 
// db_mol_name
		valRow->insert( valRow->end(), new DataValueNode( trim( i->getMolName() ), 
			       blastConfig::findNeededQuoteStyle( trim( i->getMolName() ) ) ) ); 
// seq. completeness
		memset( temp_str, 0, MaxMolName );
		sprintf( temp_str, "%5d%c", i->getComp(), '%' );
		valRow->insert( valRow->end(), new DataValueNode( temp_str ) ); //,
			       //blastConfig::findNeededQuoteStyle(temp_str ) ) ); 
// seq. length
		memset( temp_str, 0, MaxMolName );
//		sprintf( temp_str, "%5d", i->getSeqLen() );
		sprintf( temp_str, "%5d", i->getLength() );
		valRow->insert( valRow->end(), new DataValueNode( temp_str ) ); //,
			       //blastConfig::findNeededQuoteStyle( temp_str ) ) ); 
// seq. id
		memset( temp_str, 0, MaxMolName );
		sprintf( temp_str, "%5d%c", i->getId(), '%' );
		valRow->insert( valRow->end(), new DataValueNode( temp_str ) ); //,
			       //blastConfig::findNeededQuoteStyle( temp_str ) ) ); 
// positives
		if( BLASTP )
		{
		    memset( temp_str, 0, MaxMolName );
		    sprintf( temp_str, "%5d%c", i->getPos(), '%' );
		    valRow->insert( valRow->end(), new DataValueNode( temp_str ) ); //,
			           //blastConfig::findNeededQuoteStyle( temp_str ) ) ); 
		}
// seq. exp.
		memset( temp_str, 0, MaxMolName );
		sprintf( temp_str, "%7s", i->getExp().c_str() );
		valRow->insert( valRow->end(), new DataValueNode( temp_str ) ); //,
			       //blastConfig::findNeededQuoteStyle( temp_str ) ) ); 

		if( bl_config->isSeqDet() )
// list full entries
		{
  		    valRow->insert( valRow->end(), new DataValueNode( i->getDetails() ) ); //,
  			           //blastConfig::findNeededQuoteStyle( i->getDetails() ) ) ); 
		}
#ifdef DEBUG
 cout << "blastDeposit::depositMonoPoly() : depositing" << endl;
 cout << i->getName() << " " << i->getCode() << " " << i->getMolName() << endl;
 cout << i->getComp() << " " << i->getSeqLen() << " " << i->getId() << " ";
 cout << i->getPos() << " " << i->getExp() << endl;
#endif
		valTable->insert( valTable->end(), valRow );
            } // endfor dataEntries::iterator i

// add new data loop at the end of saveframe containg the sequence
            save_inside->insert( save_inside->end(), newLoop );
            return 0;
            
        } // endif data.size() > 0
    } // endif revise
} //------------------------------------------------------

 /* depositMolSys() creates a loop  inside "Molecular system description" frame
  *
  * @param data -- vector of dataEntry
  *
  * @return negative value on error, 0 otherwise
  */
int blastDeposit::depositMolSys( const dataEntries& data )
{
    DataLoopNode *newLoop;
    DataLoopNameListNode *nameListList;
    LoopTableNode *valTable;
    LoopRowNode *valRow;
    LoopNameListNode *nameList;

    List<ASTnode*> *matches;
    List<DataNode*> *save_inside;
    SaveFrameNode *save_frame;

// see if we have any pdb entries
    if( data.getCount( blastConfig::pdb ) < 1 ) return;

// get the saveframe that contains "_Mol_system_name" to
// remove old dataloop and add new one at the end of it
    matches = inputStarFile->searchForTypeByTag( ASTnode::SAVEFRAMENODE,
     string( "_Mol_system_name" ) );
    if( matches->size() != 1 )
    {
        cout << "***** Error: The starfile you entered contains none or more than one" << endl;
        cout << "             \"Molecular system description\" frame." <<endl;
        exit(1);
    }
    save_frame = (SaveFrameNode *)((*matches)[0]);
    delete matches;

// remove old loop and make a new one
    remove_old_dataloop(save_frame);
    newLoop = new DataLoopNode( string("tabulate"));
    nameListList = newLoop->getNamesPtr();
    valTable = newLoop->getValsPtr();
// populate the list of tag names for the new loop
    nameListList->insert( nameListList->end(), new LoopNameListNode() );
    nameList = ( *nameListList )[0];
    nameList->insert( nameList->end(), new DataNameNode( string( "_Database_name" ) ) );
    nameList->insert( nameList->end(), 
                   new DataNameNode( string( "_Database_accession_code" ) ) ); 
    nameList->insert( nameList->end(), 
                   new DataNameNode( string( "_Database_entry_mol_name" ) ) );
    nameList->insert( nameList->end(), 
                   new DataNameNode( string( "_Database_entry_details" ) ) );
// populate the list of values for the new loop, by making new LoopRowNodes 
// and attaching then to the loop's LoopTableNode
    char temp_str[MaxMolName];
            
    for( dataEntries::iterator i = data.begin(); i != data.end(); i++ )
    {
// if there are PDB entries
	if( i->getName() == blastConfig::getName( blastConfig::pdb ) )
	{
// new node
	    valRow = new LoopRowNode(true);
// db_name
	    valRow->insert( valRow->end(), new DataValueNode( i->getName() ) ); //,
                           //blastConfig::findNeededQuoteStyle( i->getName() ) ) ); 
// db_code
	    memset( temp_str, 0, MaxMolName );
	    sprintf( temp_str, "%-10s", i->getCode().c_str() );
	    valRow->insert( valRow->end(), new DataValueNode( temp_str ) ); //,
		           //blastConfig::findNeededQuoteStyle( temp_str ) ) ); 
// db_mol_name
	    valRow->insert( valRow->end(), new DataValueNode( trim( i->getMolName() ), 
                           blastConfig::findNeededQuoteStyle( trim( i->getMolName() ) ) ) ); 
// trailing dot
	    valRow->insert( valRow->end(), new DataValueNode( "." ) );

// add to table
	    valTable->insert( valTable->end(), valRow);
	} // endif
    } // endfor

// save the loop    
    save_inside = save_frame->GiveMyDataList();
    save_inside->insert( save_inside->end(), newLoop );
    return 0;
} //------------------------------------------------------------------------------

/**********************************************************************************
* This function removes old dataloop in the given save_frame
**********************************************************************************/
void blastDeposit::remove_old_dataloop(SaveFrameNode *save_frame)
{

  List<ASTnode*> *matches;
  DataLoopNode *data_loop_ptr;
  DataLoopNameListNode *orig_names;
  int curCol, colCount;


  // search for the data loop that contains "_Database_entry_mol_name"
  // for one of the column name
  matches = save_frame->searchForTypeByTag( ASTnode::DATALOOPNODE,
                                     string("_Database_entry_mol_name"));
  if (matches->size() != 0)
  {
     for (matches->Reset(); !matches->AtEnd(); matches->Next())
        delete matches->Current();
     delete matches;
  }

  // if there is no data loop that contains "_Database_entry_mol_name"
  // for one of the column name, just do nothing
  else ;
  return;
} //--------------------------------------------------------------------------------


string blastDeposit::trim( string &str )
{
    string::size_type start, end;
    start = str.find_first_not_of( " \t" );
    end = str.find_last_not_of( " \t" );
    if( start > 0 ) str.erase( 0, start );
    if( end < (str.size() - 1) ) str.erase( end + 1, str.size() );
    return str;
}

string blastDeposit::trim( char *str )
{
    string::size_type start, end;
    string tmpstr( str );
    start = tmpstr.find_first_not_of( " \t" );
    end = tmpstr.find_last_not_of( " \t" );
    if( start > 0 ) tmpstr.erase( 0, start );
    if( end < tmpstr.size() ) tmpstr.erase( end + 1, tmpstr.size() );
    return tmpstr;
}

