#include "cnmrlib.h"

/*****************************************************************************************************/
/*
*/
/*****************************************************************************************************/




main(argc,argv)
int argc;
char **argv;
{  FILE *f,*fo;  int i;
   static int flag_comment=1,flag_start=1;

   if( argc<=1 ) {
printf("\n___________________________________________________________(C) Victor Jaravine ");
printf("\n%s -     Utility to convert  chem. shifts table to BMRB file format ;\n",argv[0]);
printf("\nUSAGE:   %s  cstabfile [genatomlist] > outfile \n\n",argv[0]);
printf("cstabfile  - file with chem.shift tab: \n");
printf("Colomns of chem.shifts delimitered by any number of tabs or spaces (see 2 examples below).\n");
printf("Table syntax:\n");
printf("First any number (or 0) of comment lines starting with '#'.\n");
printf("After comments there should be one header line starting with 'Res' with a valid atom name for each colomn; \n");
printf("Colomns names are e.g. CA, H, N, HB, HA or HA2 HA3 or similar.\n");
printf("two colomns like HB2 HB3 must be next to each other,\n");
printf("e.g.  \n");
printf("#\n");
printf("res     num     N       HN      HA2     HA3    HB2     HB3     CO      CA      CB\n");
printf("M       1       119.6   8.28    4.48    4.48   2.11    2.01    176.3   55.4    32.9\n");
printf("K       2       120.4   8.29    4.32    4.32   1.84    1.75    176.6   56.2    33.1\n");
printf("K       3       120.4   8.29    4.32    4.32   1.84    1.75    176.6   56.2    33.1\n");
printf("V       4       119.2   8.03    4.12    4.12   2.08    -1      176.3   62.2    32.9\n");
printf("I       5       119.9   8.00    4.17    4.17   1.87    -1      176.4   61.1    38.8\n");
printf("\n");
printf("For shifts like HB2 HB3 two fields separated by ',' are possible for convenience.\n");
printf("If a colomn is too much for several shifts, then they should be placed to the colomn named 'Others'.\n");
printf("Then an atom name is followed by ':' and the value (no spaces here), followed by ';' or ' ',\n");
printf("e.g. \n");
printf("#\n");
printf("ResNum	CA	H	N	HA	C	CB	HB2	HB3	HB	CG	Others\n");
printf("K2  	56.29	-	-	4.456	-	33.42	1.638	1.747	-	25.56	HG2:1.436;HD2:1.434;HD3:1.434;HE2:2.980;HE3:2.980;CD:29.40;CE:42.33;\n");
printf("K3  	56.48	8.552	123.66	4.214	175.50	33.74	1.771	1.668	-	24.81	HG2:1.280; HG3:1.280;HD2:1.416;HD3:1.416;HE2:2.946;HE3:2.946;CD:29.39;CE:42.07;\n");
printf("V4  	63.54	8.494	125.17	4.095	176.59	32.18	-	-    	1.966	-	HG1:0.805;HG2:0.961;CG1:21.63;CG2:21.63;\n");
printf("I5  	62.28	9.200	127.42	4.148	176.19	39.20	-	-    	1.689	-	HG:0.913;HG12:1.428;HG13:1.065;HD1:0.807;CG1:27.72;CG2:17.31;CD1:13.03;\n");
printf("A6  	52.28	8.103	121.88	4.607	175.29	22.30	-	-	1.277	-	-\n");
printf("T7  	59.11	8.312	112.37	5.004	173.19	71.70	-	-    	3.977	-	HG2:1.056;CG2:21.79;\n");
printf("K8  	57.35	8.817	122.87	3.444	174.80	30.35	1.672	1.918	-	25.53	HG2:1.333;HG3:1.333;HD2:1.629;HD3:1.629;HE2:2.991;HE3:2.991;CD:29.28;CE:42.23;\n");
printf("\n");
printf("\ngenatomlist - optional file produced by BMRB (BioMagResBank) site for your sequence; ");
printf("it is filled with the values from the given table and file 'cs2bmrb.out' is written;  ");
printf("the values are filled only if res number, name and atom name matches between your table and genatomlist.  ");
printf("The file is optional. The output to screen is always given.   ");
printf("\nThe program is not always correct due to atom names ambiguity:\n");
printf("names like HG are expanded to the first found HG?? in the library e.g. HG1 for a specific residue  ");
printf("the name for the second shift is next from the list e.g. HG2.\n");
printf("If a given name is not abbreviated and correct then there is no ambiguity, and it should work fine.   ");
printf("It may not work in some untested cases. It works at least for the examples shown.\n");
printf("To check it or to look for errors one can look into 'errout.tmp'.\n\n");
printf("The BMRB ambiguity value produced is only 1 or 2, unique or geminal. ");
printf("If the other shift from two geminal shifts is missing then the value produced is '2', otherwise it's '1'. ");
printf("Other types should be attributed depending on each case.\n");
printf("Errors for the shifts are taken from the file 'calc_cs.rc', ");
printf("which if not present is written with default values; after editing it is not overwritten.\n");
printf("----------\n");
printf("This program is provided 'as is' and in principle is not supported.\n");
printf("However, bug reports and suggestions are generally wellcome. My address is\n");
printf("\n");
printf("Victor A. Jaravine, Dr\n");
printf("Dept. of Structural Biology, BIOZENTRUM, Basel university\n");
printf("Klingelbergstr. 70, BASEL CH-4056, Switzerland\n");
printf("tel: +41-61-2672106, fax: +41-61-2672109\n");
printf("email: Victor.Jaravine@unibas.ch\n");
printf("\n");
                exit(0);
                };
             

   init_cnmrlib();
   fo=Fopen("cs2bmrb.out","w");


  
  /***************** OUTPUT CS ****************************************/
  if(argc>=2)
  {  int i,j,l,k,nline,ns,nc,nb; 
     static int atom_shift_assign=1; 
     char *atom_type;
     float average_difcs,sum_difcs;
     
     f=Fopen( argv[1],"r");if( f==NULL) return(-1);
   

/* header like in bmrb */
printf("#\n");
printf("#Atom   Residue\n");
printf("#shift  Seq     Residue Atom    Atom    Shift/  Error/  Ambiguity\n");
printf("#assign code    Label   Name    Type    ppm     ppm     Code\n");
printf("#---------------------------------------------------------------\n");
printf("#\n");

     /* get line from seq cs file */
     for(i=0; NULL!= fgets(line,MAXLINE,f); )
     { RESLIB_TAB25 *p25;  int jstart,j; 
       RESLIB_TAB *p;
       if( line[0]=='#' || !isalpha(line[0]) || strlen(line)==0 ) {flag_comment=1;continue;};

       if( flag_comment==1 && flag_start==1 && strncmp("RES",strupper(line),3)==0 )
       {  /* get atom names > cols */
          for(j=0;j<=21;j++) col[j][0]=0;
          nc = sscanf(line,"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
               col[0],col[1],col[2],col[3],col[4],col[5],col[6],col[7],col[8],col[9],col[10],
                     col[11],col[12],col[13],col[14],col[15],col[16],col[17],col[18],col[19],col[20],col[21]); 
          average_difcs=sum_difcs=0.0;
          for(j=0;j<nc;j++) strcpy(col[j],strupper(col[j]));
          fprintf(errout,"COLOMN NAMES:\n");
          for(j=0;j<nc;j++) fprintf(errout,"'%s' ",col[j]); fprintf(errout,"\n");
          flag_comment=0;flag_start=0;
          continue;
       };


       nline=strlen(line); jstart=1;
       /*fprintf(errout,"\n*** %s",line);*/

       for(j=l=0; l<(nline-1) && line[l]!=0; )
       { /* skip ' ' */
         while(line[l]==' ' || line[l]=='\t' || line[l]=='\n' || line[l+k]=='\;') l++; 
         if( line[l]==0 || l>=nline ) break;
         /* get the field -> buf[j] */
         k=0;
         while((l+k)<nline && line[l+k]!=' ' && line[l+k]!='\t' && line[l+k]!='\n' && line[l+k]!='\;' && line[l+k]!=0) 
         {buf[j][k]=line[l+k];k++;};
         if(k==0) {l++;continue;};
         l+=k;
         buf[j++][k]=0; /* got it */
       };
       nb=j;
       fprintf(errout,"%s\n",line);
       for(j=0;j<nb;j++) fprintf(errout,"'%s' ",buf[j]); fprintf(errout,"\n");

       /* get res name & num */
       if( isalpha(buf[0][0]) && isdigit(buf[0][1]) )  /* A2 */
       { jstart=1; strncpy( seq[i].rname1,strupper(buf[0]),1); seq[i].n = atof( &buf[0][1] ); 
         p=get_restab_item(1,seq[i].rname1);   strncpy( seq[i].rname3,p->rname3,3);
       };
       if( buf[0][1]==0 && isalpha(buf[0][0]) && isdigit(buf[1][0]) )  /* A 2 */
       { jstart=2; strncpy( seq[i].rname1,strupper(buf[0]),1);  seq[i].n = atof( &buf[1][0] ); 
         p=get_restab_item(1,seq[i].rname1);   strncpy( seq[i].rname3,p->rname3,3);
       };
       if( isalpha(buf[0][0]) && isalpha(buf[0][1]) && isalpha(buf[0][2]) && isdigit(buf[0][3]) ) /* ALA2 */
       {  jstart=1; strncpy( seq[i].rname3,strupper(buf[0]),3); seq[i].n = atof( &buf[0][3] ); 
          p=get_restab_item(3,seq[i].rname3);   strncpy( seq[i].rname1,p->rname1,1);
       };
       if( buf[0][3]==0 && isalpha(buf[0][0]) && isalpha(buf[0][1]) && isalpha(buf[0][2]) && isdigit(buf[1][0]) ) /* ALA 2 */
       {  jstart=2; strncpy( seq[i].rname3,strupper(buf[0]),3); seq[i].n = atof( &buf[1][0] ); 
          p=get_restab_item(3,seq[i].rname3);   strncpy( seq[i].rname1,p->rname1,1);
       };
       /*printf("%s %s%d \n",seq[i].rname3,seq[i].rname1,seq[i].n);*/
       if( seq[i].n<=0 ) continue;
       seq[i].i=(int)(seq[i].rname1[0])-(int)'A';
       p25=&rtab25[ seq[i].i ];
       fprintf(errout,"Atom list for the residue is '%s' \n",p25->anames); 
 
       /*************** main work ***************/
       { static RES_LINE_ITEM r;
         static float err,rcs,vcs,rccs,difcs;  
         static int ns1,ns2,j,ir,jr,io,nr,ambiguity_code,aani; 
         static char *pd,*aan,colname[64],digits[64],*pat;

          memset( &r, 0, sizeof(RES_LINE_ITEM) );
          r.n=seq[i].n;
          strncpy(&r.rname3[0],seq[i].rname3,3); 
          strncpy(&r.rname1[0],seq[i].rname1,1);
          /*printf("%s %s%d nb=%d jstart=%d '%s'\n",seq[i].rname3,seq[i].rname1,seq[i].n,nb,jstart,buf[jstart]);*/
          /* get colnames & cs digits */ colname[0]=digits[0]=0;

          for(j=jstart,ir=0; j<nb; j++) 
          { 
              if( strlen(buf[j])<1 ) continue;
              

              if( strncmp( col[j],"OTHERS",5)==0 || j>=nc ) 
              {  pd=strchr(buf[j],':');if(pd!=NULL)pd[0]=0;
                 ns1=sscanf(buf[j],"%s",colname);  
                 ns2=sscanf(&buf[j][strlen(colname)+1],"%s",digits);
                 if((ns1+ns2)!=2 ) fprintf(errout,"#!! field '%s' in col '%s' cant be scanned; it is not e.g. 'HG:1.50'. All I have is '%s' '%s'\n",
                           buf[j],col[j],colname,digits); 
              } else 
              {  strcpy(colname,col[j]), strcpy(digits,buf[j]);
              };
              if( strlen(digits)==0 || strcmp(digits,"-")==0 ) continue;
              strcpy(colname,strupper(colname));
              aan= get_better_aname( colname ); 
              pat=strstr(p25->anames,aan); 
              if( pat==NULL )
              {   fprintf(errout,"warning: colomn atom name '%s' not found for res '%s%d' in list '%s'\n",aan,seq[i].rname1,seq[i].n,p25->anames); 
                  aan= get_abbrev_aname( colname );
                  fprintf(errout,"trying '%s'  ",aan); 
                  pat=strstr(p25->anames,aan); 
                  if(pat==NULL) {fprintf(errout,"  Noo-opps \n"); continue;};
                  fprintf(errout," OK "); 
              };
 
              r.cs[ir]= atof(digits);  strcpy( &r.colname[ir][0], colname  ); 
              r.d[ir]=0; 
              
              sscanf(pat,"%s", r.aname[ir]);
              fprintf(errout," found '%s' \n",r.aname[ir]); 
              if(ir>=1) 
              {   if( strcmp(r.aname[ir],r.aname[ir-1])==0 ) /* the same result again */
                  { 
                     if( strlen(r.aname[ir-1])<strlen(pat) )
                     {  pat=strstr(&pat[strlen(r.aname[ir-1])],aan);  /* search further */
                        if( pat!=NULL )sscanf(pat,"%s", r.aname[ir]);
                     };
                  };
              };

              /*printf("[%s %d %d %s '%s'] ",colname,ir,j,digits,pat);*/
              ir+=1;

              if( (pd=strchr(digits,'/'))!=NULL || (pd=strchr(digits,','))!=NULL ) 
              {   r.cs[ir]= atof(pd+1);  strcpy( &r.colname[ir][0], colname );
                  pat+=strlen(r.aname[ir-1]); pat=strstr(pat,aan); sscanf(pat,"%s", r.aname[ir]);
                  ir+=1; 
              };

          }; 
          nr=ir;
          /*for(ir=0; ir<nr; ir++) printf("(%-2s %-4s%-6.2f) ",r.colname[ir],r.aname[ir],r.cs[ir]); printf("\n");*/

          for(ir=0; ir<nr; ir++)
          { r.d[ir]=0;
            for(jr=1; jr<nr; jr++)
            {  static char astr1[16],astr2[16];
               strcpy(astr1,get_abbrev_aname(r.aname[ir]));
               strcpy(astr2,get_abbrev_aname(r.aname[jr]));
               if( strcmp(astr1,astr2)==0 ) 
               r.d[ir]++; 
            }
            r.d[ir]/=2; 
          };
 

          for(ir=0; ir<nr; ir++)
          {  if( r.cs[ir]<0.01 ) continue;
             aan = get_better_aname( r.colname[ir] );
             aani= get_abbr_aname_i( aan );
             err  = get_atom_shift( aani ,CS_ERROR_SHIFTS);
             rcs  = get_atom_shift( aani ,CS_EXPER_SHIFTS);
             ambiguity_code = get_ambiguity_code( &r, ir);
             rccs  = get_rcoil_val25( seq[i].rname1, r.aname[ir] );
             vcs=r.cs[ir]+rcs;
             difcs= 100.0*((vcs-rccs)/rccs); sum_difcs+=difcs;
             if(fabs(difcs) >50.0  ) fprintf(errout,"!!");
             if(fabs(difcs) >100.0 ) fprintf(errout,"!!!!");
             if(fabs(difcs) >33.0  ) fprintf(errout,"## %s%d.%-s \tcs %6.1f deviates : %4.0f %%  from r.coilval %6.1f  ",
                                     seq[i].rname1,seq[i].n,r.aname[ir],vcs,difcs,rccs);
             pB = &bmrb_items[atom_shift_assign];
             pB->i= atom_shift_assign;  pB->n= seq[i].n;
             strcpy(pB->rname3,seq[i].rname3);
             strcpy(pB->aname ,r.aname[ir]);  pB->atype[0]= r.colname[ir][0]; pB->atype[1]=0;
             pB->cs= vcs; pB->err= err; pB->amb_code= ambiguity_code;
             printBMRBitem(errout,pB);
             printBMRBitem(stdout,pB);
             atom_shift_assign++;
          };

  
       };/*main work*/
       i++;
     };/*lines*/
     nseq=i; nBMRBitems=atom_shift_assign;
     fclose(f);


   /* print sequence information */
/*
   fprintf(stdout,"\n# sequence 1 letter \n");
   for(i=0;i<nseq;i++) fprintf(stdout,"%1s",seq[i].rname1); 
   fprintf(stdout,"\n# sequence 3 letter \n");
   for(i=0;i<nseq;i++) fprintf(stdout,"%3d    %4d     @     %3s     @\n",i+1,seq[i].n,seq[i].rname3); 
*/
  average_difcs= sum_difcs/(float)atom_shift_assign;
  fprintf(errout,"\n### average by %d atoms deviation of ch.shift from r.coil = %8.2f \n",atom_shift_assign,average_difcs);
  for(j=0;j<nc;j++) fprintf(errout,"%-7s ",col[j]); fprintf(errout,"\n");
  };

  /*****************  end of output cs****************************************/
   



  /*****************  RE gen BMRB ****************************************/
  if(argc>=3)
  {  int i,l,ns;
     f=Fopen( argv[2],"r");if( f==NULL) return(-1);
     pBc=&currentBMRBitem;
     /* get line from gen bmrb file */
     for(i=1; NULL!= fgets(line,MAXLINE,f); )
     { 
       if( line[0]=='#' || !isdigit(line[0]) || strlen(line)<20 ) {fprintf(fo,"%s",line);continue;};
       pBc->cs=pBc->err= 0.0; pBc->amb_code=0;
       ns= sscanf(line,"%d%d%s%s%s",
                 &pBc->i,&pBc->n,pBc->rname3,pBc->aname,pBc->atype );
			    /*,&pBc->cs,&pBc->err,&pBc->amb_code*/
       if( ns!=5 ) {fprintf(fo,"%s",line);continue;};
       for(l=0; l<nBMRBitems; l++)
       {  pB= &bmrb_items[l];
          if( pB->n==pBc->n && (strcmp(pB->rname3,pBc->rname3)==0 &&  strcmp(pB->aname,pBc->aname)==0 ) ) break;
       };
       pB->i=pBc->i=i++; 
       if(l< nBMRBitems) 
       {  printBMRBitem(fo,pB); 
       } else 
       {   /*if( 0==strcmp(pBc->aname,"HB3") || 0==strcmp(pBc->aname,"HG3")  */
           printBMRBitem(fo,pBc);
       };
          
     };
     fclose(f);
     
  };
  /*****************  end of RE gen BMRB  ****************************************/



   fclose(fo);
   close_cnmrlib();
   return(0);
}



#include "cnmrlib.c"


