/*
 * FILE:         $Source: /cvs_archive/cvs/blast/blastParser.h,v $
 * 
 * DESCRIPTION:  declarations for structures: 
 *               abbreviated_entry and full_entry
 *
 * AUTHOR:       $Author: dmaziuk $
 *
 * DATE:         $Date: 2001/01/11 23:48:28 $
 *
 * UPDATE HISTORY
 * --------------
 * $Log: blastParser.h,v $
 * Revision 2.1  2001/01/11 23:48:28  dmaziuk
 * No changes, just forgot to add *.h files before commit.
 *
 */
#ifndef BLASTPARSER_H
#define BLASTPARSER_H

#include <iostream.h>
#include <fstream.h>

#include <string.h>
#include <stdlib.h>        // strtod(), atoi()

#ifdef IS_IRIX
#  include <mstring.h>
#  include <vector.h>
// old STL has string::remove() instead of string::erase()
#  define string_erase(s) (s).remove()
#  define string_npos ((string::size_type)-1)
#endif

#ifdef IS_GCC
#  include <string>
#  include <vector>
#  define string_erase(s) (s).erase()
#  define string_npos (string::npos)
#endif

#include "ast.h"

#include "constants.h"
#include "blastConfig.h"
#include "dataEntry.h"
#include "dataEntries.h"

// seq_data structure
/** contains sequence data from one data entry.
    <p>
    Used by blastParser when extracting sequence data 
    from CGI query result.<br>
    It is here for convenience, to group a 
    whole bunch of data in one seq_data record.
  */
struct seq_data
{
    int len;               // Length
    char exp[MAX_DIGIT];   // Expect
//    string exp;            // Expect
  // "Identities = 101/103 (98%)" is split into
    int seq_len;           // 101
    int comp;              // completeness = 103 / res.count * 100 
    int id;                // 98 (%)
  // "Positives = 101/103 (98%)" -- only percentage is used
    int pos;               // positives (BLASTP only)
    int id_count;          // seq. id count
};

// class blastParser
/** parses the result of CGI query. 
    <p>
    Query result is saved as "/tmp/blast_out", it is an HTML page. <br>
    Main entry point into the class is <code>parse()</code> method.
  */ 
class blastParser
{
  public:
    /** constructor. <br>
        Sets up a pointer to blastConfig object.
        @see blastConfig
      */  
    blastParser( blastConfig *bc ) { bl_config = bc; };

    /** another way to set up the pointer to blastConfig object.
        @see blastConfig
      */
    inline void setConfig( blastConfig *bc ) { bl_config = bc; };

    // parse() is the main point of entry into blastParser.
    /** opens and parses the file that contains search results.
        <p>
        This is the main entry point for blastParser class.
        @param ab_entry pointer to vector of dataEntry.
        @param save_frame pointer to SaveFrameNode.
        @return 0 = OK, otherwise <br>
          -1 error getting residue count <br>
          -2 error opening CGI query result file.
        @see dataEntries
      */
    int parse( dataEntries *ab_entry, SaveFrameNode *save_frame );

    // isUpdated()
    /** returns true if results of CGI query are different from what's already in DB.
        <p>
        @param ab_entry vector of dataEntry.
        @param save_frame pointer to SaveFrameNode.
        @return true if DB needs to be updated.
        @see dataEntries
      */
    bool isUpdated( dataEntries *ab_entry, SaveFrameNode *save_frame );

  private:

    int getOneDataEntry( vector<string> *lines );
    /** parses lines vector and  fills in a dataEntry object.
        @param ab_entry -- output, dataEntry object.
        @param lines    -- input data, vector of string
        @return negative value on error, 0 = stop processing, 1 = more entries follow
      */
    int parseOneDataEntry( dataEntry *ab_entry, vector<string> *lines );
    
    //TODO: change char * to string when/if IRIX STL gets upgraded
    int removeTags( char *str );
    int getSeqData( seq_data *sd, const vector<string> *lines, 
                    const int res_count );
    int get_residue_count( SaveFrameNode *save_frame );

    blastConfig *bl_config;
};



#endif // BLASTPARSER_H
