##shft2bmrb.awk 
 
#!/bin/sh -x

echo ""
echo ""
echo "   Enter input filename:"
echo -n ">> "
read input
echo ""
echo "** Using input file $input"
echo ""
echo ""
echo "   Enter output filename:"
echo -n ">> "
read output
echo ""
echo "** Using output file $output"
echo ""
echo ""
echo ""
# err1 = error in 1H
# err2 = error in 15N
# err3 = error in 13C
# r_lngth = number of amino acids in your protein
nawk 'BEGIN {
       err1=0.03
       err2=0.25
       err3=0.50
       loop=0
       r_lngth=8
      }
{
   while (loop == 0)
   {
   for (i=1 ; i<=r_lngth ; i++) {
       if ($1 == "END_RES_DEF") getline
       if ($1=="") getline
       res_num[i]=$2
       getline
       if (($1=="RES_TYPE") && ($2=="UNK")) {$2="GLY"}
       assign[i]=$2
       getline
       if ($1=="SPIN_SYSTEM_ID") getline
       if ($1=="HETEROGENEITY") getline
       j=0
       while ($1 != "END_RES_DEF") {
          j++
          if ($1=="HN") {$1="H"}
          atom[i,j]=$1
          shift[i,j]=$2
          num_j[i]=j
          getline
       }
   }
   for (k=1 ; k<=10 ; k++) {
   for (i=1 ; i<=r_lngth ; i++) {
      for (j=1 ; j<=num_j[i] ; j++){
        atm1=substr(atom[i,j],1,1)
        atm2=substr(atom[i,j+1],1,1)
        if (((atm1=="N") && (atm2=="H")) || ((atm1=="C") && (atm2=="H")) || ((atm1=="N") && (atm2=="C")))
           {atom1=atom[i,j]
           shift1=shift[i,j]
           atom[i,j]=atom[i,j+1]
           shift[i,j]=shift[i,j+1]
           atom[i,j+1]=atom1
           shift[i,j+1]=shift1
        }
      }
   }}
   for (i=1 ; i<=r_lngth ; i++) 
   {
       for (j=1 ; j<=num_j[i] ; j++)
       {
          if (substr(atom[i,j],1,1)=="H") {err[i,j]=err1}
          if (substr(atom[i,j],1,1)=="C") {err[i,j]=err3}
          if (substr(atom[i,j],1,1)=="N") {err[i,j]=err2}
          if ((atom[i,j]=="N")||(atom[i,j]=="H")||(atom[i,j]=="CA")||(atom[i,j]=="HA")||(atom[i,j]=="C")\
             ||(atom[i,j]=="CB")||(atom[i,j]=="CG")||(atom[i,j]=="CD"))
            {amb[i,j]=1}
          if ((assign[i] == "ALA") && (atom[i,j] == "HB#"))
          {
            amb[i,j]=1
            atom[i,j]="HB"
          }
          if ((assign[i] == "VAL")||(assign[i] == "THR")||(assign[i] == "ILE"))
          {
             if (atom[i,j] == "HB") {amb[i,j]=1}
          }
          if (assign[i] == "VAL")
          {
             if ((atom[i,j] == "CG1")||(atom[i,j] == "CG2")||(atom[i,j] == "HG1#")||(atom[i,j] == "HG2#")) {amb[i,j]=1}
             if (atom[i,j] == "HG1#") {atom[i,j]="HG1"}
             if (atom[i,j] == "HG2#") {atom[i,j]="HG2"} 
          }
          if (assign[i] == "THR")
          {
             if ((atom[i,j] == "CG2")||(atom[i,j] == "HG2#")) {amb[i,j]=1}
             if (atom[i,j] == "HG2#")
                {atom[i,j]="HG2"} 
          }
          if (assign[i] == "MET")
          {
             if (atom[i,j] == "CE") {amb[i,j]=1}
             if (atom[i,j] == "HE#")
             {
                amb[i,j]=1
                atom[i,j]="HE"
             } 
          }
          if (assign[i] == "ARG")
          {
             if (atom[i,j] == "NE") {amb[i,j]=1}
             if (atom[i] == "HE") {amb[i,j]=1} 
          }
          if (assign[i] == "LEU")
          {
             if ((atom[i,j] == "CG")||(atom[i,j] == "HG")) {amb[i,j]=1}
             if ((atom[i,j] == "CD1")||(atom[i,j] == "CD2")||(atom[i,j] == "HD1#")||(atom[i,j] == "HD2#")) {amb[i,j]=1}
             if (atom[i,j] == "HD1#") {atom[i,j]="HD1"}
             if (atom[i,j] == "HD2#") {atom[i,j]="HD2"} 
          }
          if (assign[i] == "HIS")
             if ((atom[i,j]=="ND1")||(atom[i,j]=="CD2")||(atom[i,j]=="HD2")||(atom[i,j]=="CE1")||(atom[i,j]=="HE1")||(atom[i,j]=="NE2"))
                {amb[i,j]=1}
          if (assign[i] == "TRP")
          {
             if ((atom[i,j]=="CD1")||(atom[i,j]=="NE1")||(atom[i,j]=="CE3")||(atom[i,j]=="CZ2")||(atom[i,j]=="CZ3")||(atom[i,j]=="CH2"))
                {amb[i,j]=1}
             if ((atom[i,j]=="HD1")||(atom[i,j]=="HE1")||(atom[i,j]=="HE3")||(atom[i,j]=="HZ2")||(atom[i,j]=="HZ3")||(atom[i,j]=="HH2"))
                {amb[i,j]=1} 
          }
          if (assign[i] == "PHE")
             if ((atom[i,j] == "CZ") || (atom[i,j] == "HZ")) {amb[i,j]=1}
          if (assign[i]=="GLY")
          {
             if ((atom[i,j]=="HA1") || (atom[i,j] == "HA2"))
             {
                amb[i,j]=1
                if (atom[i,j] == "HA1") {atom[i,j] = "HA3"}
             }
            else if (atom[i,j]=="HA#")
            {
               amb[i,j]=2
               if (atom[i,j+1] == "HA#")
               {
                  amb[i,j+1]=2
                  atom[i,j]="HA2"
                  atom[i,j+1]="HA3"
               }
               else
               {
                  num_j[i]++
                  for (k=num_j[i] ; k>j ; k--) 
                  {
                     atom[i,k]=atom[i,k-1]
                     shift[i,k]=shift[i,k-1]
                     err[i,k]=err[i,k-1]
                     amb[i,k]=amb[i,k-1]
                  }
                  atom[i,j]="HA2"
                  atom[i,j+1]="HA3"
               }
            }
          }
          if ((assign[i] != "GLY")||(assign[i] != "ALA")||(assign[i] != "VAL")||(assign[i] != "THR")||(assign[i] != "ILE"))
          {
             if ((atom[i,j]=="HB1") || (atom[i,j] == "HB2"))
             {
                amb[i,j]=1
                if (atom[i,j] == "HB1") {atom[i,j] = "HB3"}
             }
             else if (atom[i,j]=="HB#")
             {
                amb[i,j]=2
                if (atom[i,j+1] == "HB#")
                {
                   amb[i,j+1]=2
                    atom[i,j]="HB2"
                    atom[i,j+1]="HB3"
                }
                else
                {
                   num_j[i]++
                   for (k=num_j[i] ; k>j ; k--) 
                   {
                      atom[i,k]=atom[i,k-1]
                      shift[i,k]=shift[i,k-1]
                      err[i,k]=err[i,k-1]
                      amb[i,k]=amb[i,k-1]
                   }
                   atom[i,j]="HB2"
                   atom[i,j+1]="HB3"
                }
             }
          }
          if ((assign[i]=="MET")||(assign[i]=="PRO")||(assign[i]=="LYS")||(assign[i]=="ARG")||(assign[i]=="GLN")||(assign[i]=="GLU"))
          {
             if ((atom[i,j]=="HG1") || (atom[i,j] == "HG2"))
             {
                amb[i,j]=1
                if (atom[i,j] == "HG1") {atom[i,j] = "HG3"}
             }
             else if (atom[i,j]=="HG#")
             {
                amb[i,j]=2
                if (atom[i,j+1] == "HG#")
                {
                   amb[i,j+1]=2
                    atom[i,j]="HG2"
                    atom[i,j+1]="HG3"
                }
                else
                {
                   num_j[i]++
                   for (k=num_j[i] ; k>j ; k--) 
                   {
                      atom[i,k]=atom[i,k-1]
                      shift[i,k]=shift[i,k-1]
                      err[i,k]=err[i,k-1]
                      amb[i,k]=amb[i,k-1]
                   }
                   atom[i,j]="HG2"
                   atom[i,j+1]="HG3"
                }
             }
          }
          if ((assign[i] == "MET")||(assign[i] == "PRO")||(assign[i] == "LYS")||(assign[i] == "ARG"))
          {
             if ((atom[i,j]=="HD1") || (atom[i,j] == "HD2"))
             {
                amb[i,j]=1
                if (atom[i,j] == "HD1") {atom[i,j] = "HD3"}
             }
             else if (atom[i,j]=="HD#")
             {
                amb[i,j]=2
                if (atom[i,j+1] == "HD#")
                {
                   amb[i,j+1]=2
                    atom[i,j]="HD2"
                    atom[i,j+1]="HD3"
                }
                else
                {
                   num_j[i]++
                   for (k=num_j[i] ; k>j ; k--) 
                   {
                      atom[i,k]=atom[i,k-1]
                      shift[i,k]=shift[i,k-1]
                      err[i,k]=err[i,k-1]
                      amb[i,k]=amb[i,k-1]
                   }
                   atom[i,j]="HD2"
                   atom[i,j+1]="HD3"
                }
             }
          }
          if (assign[i] == "LYS")
          {
             if (atom[i,j]=="CE") {amb[i,j]=1}
             if ((atom[i,j]=="HE1") || (atom[i,j] == "HE2"))
                {
                amb[i,j]=1
                if (atom[i,j] == "HE1") {atom[i,j] = "HE3"}
             }
             else if (atom[i,j]=="HE#")
             {
                amb[i,j]=2
                if (atom[i,j+1] == "HE#")
                {
                   amb[i,j+1]=2
                    atom[i,j]="HE2"
                    atom[i,j+1]="HE3"
                }
                else
                {
                   num_j[i]++
                   for (k=num_j[i] ; k>j ; k--) 
                   {
                      atom[i,k]=atom[i,k-1]
                      shift[i,k]=shift[i,k-1]
                      err[i,k]=err[i,k-1]
                      amb[i,k]=amb[i,k-1]
                   }
                   atom[i,j]="HE2"
                   atom[i,j+1]="HE3"
                }
             }
          }
          if (assign[i] == "ASN")
          {
             if (atom[i,j]=="ND2") {amb[i,j]=1}
             if ((atom[i,j]=="HD21") || (atom[i,j] == "HD22"))
             {
                amb[i,j]=1
                if (atom[i,j] == "HD21") {atom[i,j] = "HD23"}
             }
             else if (atom[i,j]=="HD2#")
             {
                amb[i,j]=2
                if (atom[i,j+1] == "HD2#")
                {
                   amb[i,j+1]=2
                    atom[i,j]="HD22"
                    atom[i,j+1]="HD23"
                }
                else
                {
                   num_j[i]++
                   for (k=num_j[i] ; k>j ; k--) 
                   {
                      atom[i,k]=atom[i,k-1]
                      shift[i,k]=shift[i,k-1]
                      err[i,k]=err[i,k-1]
                      amb[i,k]=amb[i,k-1]
                   }
                   atom[i,j]="HD22"
                   atom[i,j+1]="HD23"
                }
             }
          }
          if (assign[i] == "GLN")
          {
             if (atom[i,j]=="NE2") {amb[i,j]=1}
             if ((atom[i,j]=="HE21") || (atom[i,j] == "HE22"))

             {
                amb[i,j]=1
                if (atom[i,j] == "HE21") {atom[i,j] = "HE23"}
             }
             else if (atom[i,j]=="HE2#")
             {
                amb[i,j]=2
                if (atom[i,j+1] == "HE2#")
                {
                   amb[i,j+1]=2
                    atom[i,j]="HE22"
                    atom[i,j+1]="HE23"
                }
                else
                {
                   num_j[i]++
                   for (k=num_j[i] ; k>j ; k--) 
                   {
                      atom[i,k]=atom[i,k-1]
                      shift[i,k]=shift[i,k-1]
                      err[i,k]=err[i,k-1]
                      amb[i,k]=amb[i,k-1]
                   }
                   atom[i,j]="HE22"
                   atom[i,j+1]="HE23"
                }
             }
          }
          if (assign[i] == "ILE")
          {
             if ((atom[i,j]=="CG1")||(atom[i,j]=="CD1")||(atom[i,j]=="CG2")) {amb[i,j]=1}
             if (atom[i,j] == "HG2#")
             {
                amb[i,j]=1
                atom[i,j] = "HG2"
             }
             if (atom[i,j] == "HD1#")
             {
                amb[i,j]=1
                atom[i,j] = "HD1"
             }
             if ((atom[i,j]=="HG11") || (atom[i,j] == "HG12"))

             {
                amb[i,j]=1
                if (atom[i,j] == "HG11") {atom[i,j] = "HG13"}
             }
             else if (atom[i,j]=="HG1#")
             {
                amb[i,j]=2
                if (atom[i,j+1] == "HG1#")
                {
                   amb[i,j+1]=2
                    atom[i,j]="HG12"
                    atom[i,j+1]="HG13"
                }
                else
                {
                   num_j[i]++
                   for (k=num_j[i] ; k>j ; k--) 
                   {
                      atom[i,k]=atom[i,k-1]
                      shift[i,k]=shift[i,k-1]
                      err[i,k]=err[i,k-1]
                      amb[i,k]=amb[i,k-1]
                   }
                   atom[i,j]="HG12"
                   atom[i,j+1]="HG13"
                }
             }
          }
          if ((assign[i] == "PHE") || (assign[i]== "TYR"))
          {
             if ((atom[i,j]=="CD1")||(atom[i,j]=="CD2")||(atom[i,j]=="HD1")||(atom[i,j]=="HD2")) {amb[i,j]=3}
             if ((atom[i,j]=="CE1")||(atom[i,j]=="CE2")||(atom[i,j]=="HE1")||(atom[i,j]=="HE2")) {amb[i,j]=3}
             if (atom[i,j]=="HD#")
             {
                amb[i,j]=3
                if (atom[i,j+1] == "HD#")
                {
                   amb[i,j+1]=2
                   atom[i,j]="HD1"
                   atom[i,j+1]="HD2"
                }
                else
                {
                   num_j[i]++
                   for (k=num_j[i] ; k>j ; k--) 
                   {
                      atom[i,k]=atom[i,k-1]
                      shift[i,k]=shift[i,k-1]
                      err[i,k]=err[i,k-1]
                      amb[i,k]=amb[i,k-1]
                   }
                   atom[i,j]="HD1"
                   atom[i,j+1]="HD2"
                }
             }
             if (atom[i,j]=="HE#")
             {
                amb[i,j]=3
                if (atom[i,j+1] == "HE#")
                {
                   amb[i,j+1]=3
                   atom[i,j]="HE1"
                   atom[i,j+1]="HE2"
                }
                else
                {
                   num_j[i]++
                   for (k=num_j[i] ; k>j ; k--) 
                   {
                      atom[i,k]=atom[i,k-1]
                      shift[i,k]=shift[i,k-1]
                      err[i,k]=err[i,k-1]
                      amb[i,k]=amb[i,k-1]
                   }
                   atom[i,j]="HE1"
                   atom[i,j+1]="HE2"
                }
             }
             if (atom[i,j]=="CD#")
             {
                amb[i,j]=3
                if (atom[i,j+1] == "CD#")
                {
                   amb[i,j+1]=3
                   atom[i,j]="CD1"
                   atom[i,j+1]="CD2"
                }
                else
                {
                   num_j[i]++
                   for (k=num_j[i] ; k>j ; k--) 
                   {
                      atom[i,k]=atom[i,k-1]
                      shift[i,k]=shift[i,k-1]
                      err[i,k]=err[i,k-1]
                      amb[i,k]=amb[i,k-1]
                   }
                   atom[i,j]="CD1"
                   atom[i,j+1]="CD2"
                }
             }
             if (atom[i,j]=="CE#")
             {
                amb[i,j]=3
                if (atom[i,j+1] == "CE#")
                {
                   amb[i,j+1]=3
                   atom[i,j]="CE1"
                   atom[i,j+1]="CE2"
                }
                else
                {
                   num_j[i]++
                   for (k=num_j[i] ; k>j ; k--) 
                   {
                      atom[i,k]=atom[i,k-1]
                      shift[i,k]=shift[i,k-1]
                      err[i,k]=err[i,k-1]
                      amb[i,k]=amb[i,k-1]
                   }
                   atom[i,j]="CE1"
                   atom[i,j+1]="CE2"
                }
             }
          }
       }
   }
   for (m=1 ; m<=10 ; m++) {
   for (i=1 ; i<=r_lngth ; i++) {
      for (j=1 ; j<=num_j[i] ; j++){
        atm1=substr(atom[i,j],1,2)
        atm2=substr(atom[i,j+1],1,2)
        if (((atm1=="CA")||(atm1=="CB")||(atm1=="CG")||(atm1=="CD")||(atm1=="CE")||(atm1=="CZ")||(atm1=="CH")) && (atm2=="C"))
           {atom1=atom[i,j]
           shift1=shift[i,j]
           amb1=amb[i,j]
           atom[i,j]=atom[i,j+1]
           shift[i,j]=shift[i,j+1]
           amb[i,j]=amb[i,j+1]
           atom[i,j+1]=atom1
           shift[i,j+1]=shift1
           amb[i,j+1]=amb1
        }
      }
   }}
   for (m=1 ; m<=10 ; m++) {
   for (i=1 ; i<=r_lngth ; i++) {
      for (j=1 ; j<=num_j[i] ; j++){
        atm1=atom[i,j]
        atm2=atom[i,j+1]
        if (((atm1=="HB3")&&(atm2=="HB2"))||((atm1=="HG3")&&(atm2=="HG2"))||((atm1=="HD3")&&(atm2=="HD2"))\
             ||((atm1=="HE3")&&(atm2=="HE2"))||((atm1=="HE23")&&(atm2=="HE22"))||((atm1=="HD23")&&(atm2=="HD22"))\
             ||((atm1=="HA3")&&(atm2=="HA2"))||((atm1=="HG13")&&(atm2=="HG12")))
           {atom1=atom[i,j]
           shift1=shift[i,j]
           amb1=amb[i,j]
           atom[i,j]=atom[i,j+1]
           shift[i,j]=shift[i,j+1]
           amb[i,j]=amb[i,j+1]
           atom[i,j+1]=atom1
           shift[i,j+1]=shift1
           amb[i,j+1]=amb1
        }
      }
   }}
   for (i=1 ; i<=r_lngth ; i++) 
   {
      for (j=1 ; j<=num_j[i] ; j++)
      {
        atom_num++
        printf "%1.0f	%1.0f	%s	%s	%s	%3.2f	%3.2f	%1.0f\n",atom_num,res_num[i],assign[i],atom[i,j],\
           substr(atom[i,j],1,1),shift[i,j],err[i,j],amb[i,j]
      }
   }
   loop=1}
}
END {
printf "\n"
printf "stop_\n"
printf "\n"
printf "# The following loop is used to define sets of Atom-shift assignment IDs that\n"
printf "# represent related ambiguous assignments taken from the above list of\n"
printf "# assigned chemical shifts.  Each element in the set should be separated by a\n"
printf "# comma, as shown in the example below, and is the assignment ID for a chemical\n"
printf "# shift assignment that has been given as ambiguity code of 4 or 5.  Each set\n"
printf "# indicates that the observed chemical shifts are related to the defined\n"
printf "# atoms, but have not been assigned uniquely to a specific atom in the set.\n"
printf "\n"
printf "loop_\n"
printf "  _Atom-shift_assign_ID_ambiguity\n"
printf "\n"
printf "#\n"
printf "#    Sets of Atom-shift Assignment Ambiguities\n"
printf "               #\n"
printf "#    ------------------------------------------\n"
printf "# Example:    5,4,7\n"
printf "#\n"
printf "                @\n"
printf "stop_\n"
printf "\n"
}' < $input > tmp
cat header tmp > $output
rm tmp
rm header

 

