/*
 * FILE:        $Source: /cvs_archive/cvs/blast/blastSearch.cc,v $
 *
 * DESCRIPTION: This file contains definitions for class blastSearch.
 *              Class blastSearch queries BLAST site with residue sequence(s) 
 *              from the input file. First, it sends sequence and gets RID from 
 *              the server. Then it sends RID to the server and gets search 
 *              result. It is done by opening socket to the  BLAST server and 
 *              speaking in HTTP protocol.
 *
 * AUTHOR:      $Author: dmaziuk $
 *
 * DATE:        $Date: 2001/03/15 22:33:35 $
 *
 * UPDATE HISTORY:
 * ---------------
 * $Log: blastSearch.cc,v $
 * Revision 2.2  2001/03/15 22:33:35  dmaziuk
 * Disabled deposition of PDB entries in molecular system description frame.
 *
 * Revision 2.1  2001/02/20 21:00:07  dmaziuk
 * Few minor changes
 *
 * Revision 2.0  2001/01/11 23:44:44  dmaziuk
 * Version 2 of blast contains complete rewrite of blastParser and major rewrite
 * of (almost) everything else.
 *
 */

#include "blastSearch.h"

extern bool BLASTP;
extern bool BLASTN;
extern bool CARBO;

char buffer[HUGE_STRING_LEN];

/** constructor -- pass the blastConfig object
  *
  * @param pointer to blastConfig object
  */
blastSearch::blastSearch( blastConfig *bc )
{
    bl_config = bc;
}

/** do_blast_search() does the search.
  * It calls send_seq() and send_rid().
  *
  * @param save_frame placeholder for result, pointer to SaveFrameNode
  * @return -1 on error, 0 otherwise
  */
int blastSearch::do_blast_search( SaveFrameNode *save_frame )
{

    List<ASTnode*> *match;
    DataItemNode *mol_seq, *mol_class;
    char rid[MAX_RID];
    char str[MAX_STR];
    char seq[MAX_SEQ];

// get the sequence from the input starfile
    match = save_frame->searchForTypeByTag( ASTnode::DATAITEMNODE, 
                                    string( "_Mol_residue_sequence" ) );
    if (match->size() != 1)
    {
        cout << "***** Error: Save frame contains none or more than one" << endl; 
        cout << "             _Mol_residue_sequence tag. Can't perform blast search."<< endl;
        return -1;
    }

    mol_seq = (DataItemNode *)((*match)[0]);
    delete match;
    strcpy(seq, mol_seq->myValue().c_str());

// check _Mol_polymer_class from the input starfile to decide 
// whether to use blastp (protein) or blastn (DNA, RNA) or to do nothing (carbohydrate)
    BLASTP = BLASTN = CARBO = false;

    match = save_frame->searchForTypeByTag( ASTnode::DATAITEMNODE,
                                    string( "_Mol_polymer_class" ) );
    if (match->size() != 1)
    {
        cout << "***** Error: Save frame contains none or more than one" << endl;
        cout << "             _Mol_polymer_class tag. Can't perform blast search."<< endl;
        return -1;
    }

    mol_class = (DataItemNode *)( (*match)[0] );

#ifdef DEBUG
    cout << "mol. class = " << mol_class->myValue() << endl;
#endif
    if (mol_class->myValue() == "protein")
        BLASTP = true;
    else if ((mol_class->myValue() == "DNA") || (mol_class->myValue() == "RNA"))
    { //Disable BLASTN search , exit
        BLASTN = true;
        cout<<"***** Error: Save frame contains DNA or RNA sequence for blastn search."<< endl;
        cout<<"             Can't perform blastn search."<< endl;
        return -1; 
    }
    else if (mol_class->myValue() == "carbohydrate")
        CARBO = true;
//
    ofstream tmpseq( "tmpseq" );
    if( !tmpseq )
    {
        cerr << "Cannot open tmpseq" << endl;
        exit( 7 );
    }
    tmpseq.write( (char *)seq, strlen( seq ) );
    tmpseq.flush();
    tmpseq.close();
    char command[255];
    strcpy( command, "env BLASTDB=/blast_disk/blast/db /blast_disk/blast/blastall -T -d nr -i tmpseq -F F" );
//    strcpy( command, "env BLASTDB=/blast_disk/blast/db.update /blast_disk/blast/blastall -T -d month.aa -i tmpseq" );
    if( BLASTP ) strcat( command, " -p blastp" );
    else if( BLASTN ) strcat( command, " -p blastn" );
//    else if( CARBO ) strcat( command, " -p " );
    else
    {
        cerr << "Don't know what program to call: not blastn or blastp" << endl;
	exit( 7 );
    }
    strcat( command, " -o " );
    strcat( command, bl_config->getQueryFile() );
#ifdef DEBUG
  cout << "Running " << command << endl;
#endif
    if( system( command ) < 0 )    
    {
        cerr << "system() returned -1" << endl;
	exit( 8 );
    }
    return 0;

} //------------------------------------------------------------------------------------------------

/** send_seq() sends search request with sequence and gets RID from the server.
  * 
  * @param rid placeholder for result, array of char
  * @param seq sequence, array of char
  * @return -1 on error, 0 otherwise
  */
int blastSearch::send_seq( char rid[MAX_RID], char seq[MAX_SEQ] )
{
    char str[MAX_STR];
//    char ridFlag[] = "<INPUT TYPE=\"text\" SIZE=25 NAME=\"RID\" VALUE=";
    char ridFlag[] = "<input name=\"RID\" size=\"50\" type=\"text\" value=";

    ifstream filein;
    strstream query_str;

#ifdef DEBUG
  cout << "blastSearch:send_seq() seq = " << seq << endl;
#endif

    //check if _Mol_residue_sequence has "?" as value
    if( !strcmp(seq, "?") )
    {
        cout << "***** Error: _Mol_residue_sequence tag has \"?\" as value." << endl;
        return -1;
    }

    // create query string
    //------------------------------------------------------------------
    if( BLASTP ) query_str << "PROGRAM=blastp&DATALIB=nr&SEQUENCE=" << seq << endl;
    else if( BLASTN ) query_str << "PROGRAM=blastn&DATALIB=nr&SEQUENCE=" << seq << endl;
    else 
    {
        cout << "***** Error: inappropriate value for _Mol_polymer_class." << endl;
        return -1;
    }

#ifdef DEBUG
  cout << "blastSearch:send_seq() query_str = " << query_str.str() << endl;
#endif

    // send blast request to blast.cgi and get the RID
    //-----------------------------------------------------------
    bool done = false;
    int loop_cnt = 0;
    //repeat this loop 10 times if send_blast_request doesn't get RID
    while ( !done )
    {

#ifdef DEBUG
  cout << "blastSearch:send_seq() loop_cnt in RID = " << loop_cnt << endl;
#endif

        if ( loop_cnt > 10 )
        {
            cout << "***** Error: blast server doesn't give the RID now.\n";
            return -1;
        }
     
        // send blast request to blast.cgi and get the RID
        if( !send_blast_request( query_str.str() ) )
        {
	    cout << "***** Error: sending request to BLAST server" << endl;
            return -1;
        }

        // extract RID from blast_out file
        filein.open( bl_config->getQueryFile() );
        if ( !filein )
        {
	    cout << "***** Error: opening " << bl_config->getQueryFile() << " containg RID" << endl;
            return -1;
        }

        while( !filein.eof() )
        {
            filein.getline( str, MAX_STR, '\n' );
            if( !strncmp( str, ridFlag, 44 ) )
            {
                for (int i = 0, j = 45; str[j] != '"'; i++, j++)
                    rid[i] = str[j];
                rid[i] = '\0';
             //cout << "rid = " << rid << endl;
                done = true;
                break;
            }
        }
        filein.close();
        loop_cnt++;
        if (!done) sleep(3);
    } // endwhile ( !done )
    return 0;
} //------------------------------------------------------------------------------------------------ 

/** send_rid() sends search request with RID and gets result from the server.
  * 
  * @param rid RID, array of char
  * @return -1 on error, 0 otherwise
  */
int blastSearch::send_rid( char *RID )
{
    char str[MAX_STR];
    char sendAgainFlag[] = "<META HTTP-EQUIV=\"Refresh\" CONTENT=";
    char noHitFlag[]="No significant similarity found";
    char resultFlag[] = "Sequences producing significant alignments:";


    ofstream query_file; 
    ifstream filein;
    strstream query_str;

#ifdef DEBUG
  cout << "blastSearch:send_rid() RID = " << RID << endl;
#endif

    // create query_str
    //------------------
//    query_str << "RID=" << RID << 
//	"&DESCRIPTIONS=100&ALIGNMENTS=50&ALIGNMENT_VIEW=0HTML=on&REFRESH_DELAY=3" << endl;
//++dm
//
    if( BLASTP ) query_str << "PROGRAM=blastp&DATALIB=nr&SEQUENCE=" << RID; // << endl;
    else if( BLASTN ) query_str << "PROGRAM=blastn&DATALIB=nr&SEQUENCE=" << RID; // << endl;
    else 
    {
	cout << "***** Error: inappropriate value for _Mol_polymer_class." << endl;
	return -1;
    }
  
    
    query_str << "&DESCRIPTIONS=100&ALIGNMENTS=50&ALIGNMENT_VIEW=0HTML=on&REFRESH_DELAY=3" << endl;

#ifdef DEBUG
  cout << "blastSearch:send_rid() query_str = " << query_str.str() << endl;
#endif

    bool done = false;
    int loop_cnt = 0;
    //repeat this loop for 30 times if send_blast_request() doesn't get result
    //------------------------------------------------------------------------
    while ( !done )
    {

#ifdef DEBUG
  cout << "blastSearch:send_rid() loop_cnt = " << loop_cnt << endl;
#endif

        //if loop_cnt > 30, blast server is busy to give the result back.
        if ( loop_cnt > 30 )
        {
            cout << "***** Error: blast server doesn't give the result back now.\n";
            return -1;
        }

        //sleep 30 sec for blast_out to be refreshed with search result
        sleep( 30 );

        // send blast request to blast.cgi and get the result
        if( !send_blast_request( query_str.str() ) )
        {
            printf("***** Error: sending request to BLAST server\n");
            return -1;
        }

        filein.open( bl_config->getQueryFile() );
        if ( !filein )
        {
            cout << "can't open " << bl_config->getQueryFile() << endl;
            return -1;
        }
        while( !filein.eof() )
        {
	    filein.getline( str, MAX_STR, '\n' );
            if( strstr( str, noHitFlag ) != NULL )
            {
	        cout << "***** Error: ";
                cout << "No significant similarity found from blast search." << endl;
                return -1;
            }
            if ( !strncmp( str, resultFlag, 43 ) )
            { 
                done = true;
                break;  
            }
        }
        filein.close(); 
        loop_cnt++;
    } // endwhile ( !done )

    return 0;
} //------------------------------------------------------------------------------------------------

/** send_blast_request() opens socket to blast server, sends CGI query form
  * and gets the reply
  * 
  * @param query_string, array of char
  * @return false on error, true otherwise
  */
bool blastSearch::send_blast_request( char *query_string )
{
    int  Length;
    int sock;
    char myhostname[MAX_HNAME];
    
    struct sockaddr_in server;
    struct hostent *hp;
    int  nbytes, i;
    fd_set fds;

    char *blast_engine_host = bl_config->getBlastHost();

    if( (query_string == NULL) || (Length = strlen( query_string )) == 0 )
        return false;

    ofstream out_file( bl_config->getQueryFile() );
    if( !out_file )
        cout << "***** Error: can't open " << bl_config->getQueryFile() << " to save blast search result" << endl;
    
    // create new socket 
    memset( &server, 0, sizeof( server ) );
    
    if( (sock = socket( AF_INET, SOCK_STREAM, 0 )) <= 0 ) 
    {
        error_handler( -2 );
        cerr << "blastSearch:send_blast_request() socket() error " << sock << endl;
        exit(1);
    }
    
    // get the address of remote host
    hp = gethostbyname( blast_engine_host );
    if( hp == NULL ) 
    {
        error_handler(-3);
        cerr << "blastSearch:send_blast_request() gethostbyname() error" << endl;
        exit(1);
    }
    
    memcpy( (void *) &server.sin_addr, hp->h_addr, hp->h_length );
    
    server.sin_family = hp->h_addrtype;                         // family is AF_INET 
    server.sin_port = htons( (int) bl_config->getBlastPort() ); // server port to connect to 

#ifdef DEBUG
  cout << "Connecting to " << blast_engine_host << ":" << bl_config->getBlastPort() << endl;
#endif
    // connect to the www server
    if( connect( sock, (struct sockaddr *) &server, sizeof( server ) ) < 0 ) 
    {
        error_handler(-4);
        cerr << "blastSearch:send_blast_request() connect() error " << errno << endl;
        return false;
    }
    
    // sockets are connected. now send request using HTTP protocal
    //------------------------------------------------------------- 
    sprintf(buffer, "POST /blast/%s HTTP/1.0\n", bl_config->getBlastProg() );
    write( sock, buffer, strlen(buffer) );

    gethostname( myhostname, MAX_HNAME * sizeof( char ) );
    sprintf( buffer, "User-Agent: SpecialClient from %s\n", myhostname );
    write( sock, buffer, strlen( buffer ) );
    sprintf( buffer, "Connection: Keep-Alive\n" );
    write( sock, buffer, strlen( buffer ) );
    sprintf( buffer, "Content-type: application/x-www-form-urlencoded\n" );
    write( sock, buffer, strlen( buffer ) );
    sprintf( buffer, "Content-Length: %d\n\n", Length );
    write( sock, buffer, strlen( buffer ) );
    write( sock, query_string, Length );

    // wait for data to come back and print it to file named "blast_out"
#ifdef DEBUG
  cout << "blastSearch:send_blast_request() Commencing blast search, please wait for results. " << endl;
#endif

    while( true ) 
    {
        FD_ZERO( &fds );
        FD_SET( sock, &fds );
        
        if ( (i = select( sock + 1, &fds, NULL, NULL, NULL )) != 0 ) 
        {
            if ( FD_ISSET( sock, &fds ) ) 
            { 
            // read from socket and write to output file out_file
                if( (nbytes = read( sock, buffer, HUGE_STRING_LEN )) > 0 ) 
                    out_file.write( buffer, nbytes );
                else if( nbytes == 0 ) 
                    break;
                else 
                    break;
            } 
            else 
            {
                error_handler(-5);
                break;    // this must not happend 
            }
        } //end of if 
        else 
        {
            error_handler(-6);
            break;  // timeout expired if any ...
        }
    } //end of while
 
    close( sock );
    out_file.close();
    return true;
} //------------------------------------------------------------------------------------------------

/**
  * read_file_into_memory() reads query file and creates query_string
  *
  * @param fd file descriptor
  * @param len length of
  * @param filter
  * @return query string to be used by send_blast_request()
  */
char *blastSearch::read_file_into_memory( FILE *fd, int len, int filter )
{
    return "\0";
} //------------------------------------------------------------------------------------------------

/**
  * error_handler() prints errno to stdout
  * 
  * @param error error number
  */
void blastSearch::error_handler( int error )
{
    cout << "Error code is " << error << endl;
    cout.flush();
}




