NCBI C Toolkit Cross Reference

C/biostruc/mkbioseq_vs.c


  1 /* mkbioseq_vs.c
  2  *
  3  * ===========================================================================
  4  *
  5  *                            PUBLIC DOMAIN NOTICE
  6  *            National Center for Biotechnology Information (NCBI)
  7  *
  8  *  This software/database is a "United States Government Work" under the
  9  *  terms of the United States Copyright Act.  It was written as part of
 10  *  the author's official duties as a United States Government employee and
 11  *  thus cannot be copyrighted.  This software/database is freely available
 12  *  to the public for use. The National Library of Medicine and the U.S.
 13  *  Government do not place any restriction on its use or reproduction.
 14  *  We would, however, appreciate having the NCBI and the author cited in
 15  *  any work or product based on this material
 16  *
 17  *  Although all reasonable efforts have been taken to ensure the accuracy
 18  *  and reliability of the software and data, the NLM and the U.S.
 19  *  Government do not and cannot warrant the performance or results that
 20  *  may be obtained by using this software or data. The NLM and the U.S.
 21  *  Government disclaim all warranties, express or implied, including
 22  *  warranties of performance, merchantability or fitness for any particular
 23  *  purpose.
 24  *
 25  * ===========================================================================
 26  *
 27  * File Name: mkbioseq_vs.c
 28  *
 29  * Author: Ken Addess
 30  *
 31  * $Log: mkbioseq_vs.c,v $
 32  * Revision 6.2  1999/06/15 18:12:53  addess
 33  * fixed some lines related to BioseqPtr
 34  *
 35  * Revision 6.2  1999/06/15 18:12:53  addess
 36  * fixed some lines related to BioseqPtr
 37  *
 38  * Revision 6.1  1998/07/17 18:59:57  madej
 39  * Created by Ken Addess.
 40  *
 41  */
 42 
 43 /************************************************************/
 44 /*                                                          */
 45 /*      mkBioseqs()                                         */
 46 /*                                                          */
 47 /*      Creates a Bioseq/BioseqSet object from a Biostruc   */
 48 /*      object and writes it out to a file.                 */
 49 /*                                                          */
 50 /************************************************************/ 
 51 
 52 
 53 #include "mkbioseq.h"
 54 
 55 static Int4 NumberOfBioChains(MoleculeGraphPtr mgp)
 56 {
 57   ValNodePtr vnp;
 58   Int4 mtype, nbp = 0;
 59 
 60   while (mgp != NULL)
 61   {
 62       vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
 63 
 64       if (vnp) mtype = vnp->data.intvalue;
 65 
 66      switch(mtype)
 67      {
 68          case 1:
 69          case 2:
 70          case 3:
 71             nbp++;
 72             break;
 73      }
 74         
 75       mgp = mgp->next;
 76   }
 77   
 78   return nbp;
 79 } 
 80 
 81 static MoleculeGraphPtr MakeBioGraphPtr(MoleculeGraphPtr mgp)
 82 {
 83   MoleculeGraphPtr newbp, bp = NULL, currentbp;
 84   ValNodePtr vnp;
 85   Int4 mtype;
 86 
 87   while (mgp != NULL)
 88    {
 89       vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
 90 
 91       if (vnp) mtype = vnp->data.intvalue;
 92 
 93       switch(mtype)
 94       {
 95          case 1:
 96          case 2:
 97          case 3:
 98             newbp = MoleculeGraphNew();
 99             newbp->id = mgp->id;
100             newbp->descr = mgp->descr;
101             newbp->seq_id = mgp->seq_id;
102             newbp->residue_sequence = mgp->residue_sequence;
103             newbp->inter_residue_bonds = mgp->inter_residue_bonds;
104             if (bp == NULL)
105             {
106                bp = newbp;
107             }
108             else
109             {
110                currentbp->next = newbp;
111             }
112             currentbp = newbp;
113             break;
114       }
115 
116       mgp = mgp->next;
117    }
118    
119   return bp;
120 }
121 
122 static Int4 NumberOfHetChains(MoleculeGraphPtr mgp, MoleculeGraphPtr bp)
123 {
124   ValNodePtr vnp;
125   Int4 mtype, molecule_id, nhet = 0;
126   CharPtr mname;
127 
128   while (mgp != NULL)
129   {
130     vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
131 
132     if (vnp) mtype = vnp->data.intvalue;
133 
134     switch(mtype)
135     {
136       case 6:
137       if (vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_name))
138       mname = vnp->data.ptrvalue;
139       molecule_id = atoi(mname);
140             
141       if (isBiopoly(molecule_id, bp))
142       {
143         nhet++;
144       }
145       break;
146     }
147    mgp = mgp->next;
148   }
149   return nhet;
150 }
151 
152 static MoleculeGraphPtr MakeHetGraphPtr(MoleculeGraphPtr mgp, MoleculeGraphPtr bp)
153 {
154   MoleculeGraphPtr newhet, het = NULL, currenthet;
155   ValNodePtr vnp;
156   Int4 mtype, molecule_id;
157   CharPtr mname;
158 
159   while (mgp != NULL)
160   {
161       vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
162 
163       if (vnp) mtype = vnp->data.intvalue;
164 
165       switch(mtype)
166       {
167         case 6:
168           vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_name);
169 
170           if (vnp) mname = vnp->data.ptrvalue;
171                   
172           molecule_id = atoi(mname);
173           if (isBiopoly(molecule_id, bp))
174           {
175             newhet = MoleculeGraphNew();
176             newhet->id = mgp->id;
177             newhet->descr = mgp->descr;
178             newhet->seq_id = mgp->seq_id;
179             newhet->residue_sequence = mgp->residue_sequence;
180             newhet->inter_residue_bonds = mgp->inter_residue_bonds;
181             if (het == NULL)
182             {
183               het = newhet;
184             }
185             else
186             {
187               currenthet->next = newhet;
188             }
189             currenthet = newhet;
190           }
191           break;
192       }
193 
194    mgp = mgp->next;
195   }
196    
197   return het;
198 }
199 
200 
201 SeqEntryPtr LIBCALL MakeBioseqs(BiostrucPtr bsp, BiostrucResidueGraphSetPtr stdDictionary)
202 {
203   ValNodePtr vnp, seq_set, hetval, pvnThePoints;
204   BiostrucHistoryPtr bhp;
205   BiostrucSourcePtr bssp;
206   BiostrucGraphPtr bsgp;
207   BiostrucModelPtr bsmp;
208   BiostrucFeatureSetPtr bsfsp;
209   BiostrucFeaturePtr bsfp;
210   ChemGraphPntrsPtr cgpp;
211   ResiduePtr rs;
212   ResidueGraphPtr rgp;
213   ResiduePntrsPtr rpp;
214   ResidueIntervalPntrPtr ripp;
215   ResidueExplicitPntrsPtr rpp1=NULL, rpp2=NULL;
216   MoleculeGraphPtr bp, het, currenthet, currentbp, mgp;
217   InterResidueBondPtr currentabp, abp;
218   DbtagPtr dtp;
219   SeqEntryPtr pdb_entry;
220   BioseqSetPtr biossp;
221   BioseqPtr bioseqs[MAXNUM], current_bioseq;
222   Int4 DomainNum, molId1, resId1, atmId1, molId2, resId2, atmId2;
223   Int4 nbp, nhet, num_chain, index = 0, chnidx, bpchnidx, bpresidx, hetidx, rescount, bioseq_idx;
224   Int4 ssresidx1, ssresidx2, ssmolidx1, ssmolidx2;
225   CharPtr feature_name, rname;
226   Boolean interchain, bonds, found1, found2;
227   SeqAnnotPtr sap = NULL;
228   SeqIdPtr sip;
229 
230   if (!bsp)
231   {
232     return NULL;
233   }
234       
235   vnp = ValNodeFindNext(bsp->descr, NULL, BiostrucDescr_history);
236 
237   if (vnp)
238   { 
239     bhp = (BiostrucHistoryPtr) vnp->data.ptrvalue; 
240     bssp = bhp->data_source;
241   }
242 
243   bsgp = bsp->chemical_graph; 
244   bsmp = bsp->model;
245     
246   nbp = NumberOfBioChains(bsgp->molecule_graphs);
247    
248   bp =  MakeBioGraphPtr(bsgp->molecule_graphs);
249 
250   nhet = NumberOfHetChains(bsgp->molecule_graphs, bp);
251   
252   het = MakeHetGraphPtr(bsgp->molecule_graphs, bp);
253    
254   pdb_entry = CreateSeqEntry(bssp, bsgp, bsmp, bsp->descr, nbp);
255    
256   if (IS_Bioseq(pdb_entry))
257   {
258     vnp = ValNodeFindNext(pdb_entry, NULL, 1);
259     bioseqs[index] = (BioseqPtr) vnp->data.ptrvalue;
260   }
261   else
262   {
263     vnp = ValNodeFindNext(pdb_entry, NULL, 2);
264     biossp = (BioseqSetPtr) vnp->data.ptrvalue;
265     seq_set = biossp->seq_set;
266       
267     for (num_chain = 0; num_chain < nbp, seq_set; seq_set = seq_set->next, num_chain++, index++)
268       bioseqs[index] = (BioseqPtr) seq_set->data.ptrvalue;
269   }  
270   
271   dtp = (DbtagPtr)bssp->database_entry_id->data.ptrvalue;
272    
273   for (index = 0, currentbp = bp; index < nbp, currentbp != NULL; currentbp = currentbp->next, index++)
274   {
275      current_bioseq = bioseqs[index];
276      if (currentbp->seq_id->choice == '\f')
277      {
278        current_bioseq->id = MakePDBId(bssp, currentbp, dtp);   
279        sip = ValNodeNew(NULL);
280        sip->choice = SEQID_GI;
281        sip->data.intvalue = currentbp->seq_id->data.intvalue;
282        current_bioseq->id->next = sip;
283      }
284      else if (currentbp->seq_id->choice == SEQID_LOCAL)
285        current_bioseq->id = MakeLocalID(-99999, currentbp, dtp);
286      current_bioseq->descr = MakeBioseqDescr(currentbp, current_bioseq->descr);
287      current_bioseq->mol = MakeBioseqMol(currentbp);
288      current_bioseq->length = CountNumOfResidues(currentbp);
289      
290      if (current_bioseq->mol == Seq_mol_aa)
291         current_bioseq->seq_data_type = Seq_code_iupacaa;
292      else
293         current_bioseq->seq_data_type = Seq_code_iupacna;
294      
295      current_bioseq->seq_data = AddSeqData(currentbp, current_bioseq->mol, current_bioseq->length, bsgp, stdDictionary);
296      current_bioseq->annot = AddNstdSeqAnnot(currentbp, current_bioseq->id, bsgp);
297    }  
298    /* Add information about Secondary Structure and Domains */
299   for (bsfsp = bsp->features, DomainNum = 0; bsfsp; bsfsp = bsfsp->next)
300   {
301     if (vnp = ValNodeFindNext(bsfsp->descr, NULL, BiostrucFeatureSetDescr_name))
302        feature_name = vnp->data.ptrvalue;
303      
304     if ((!StringICmp("NCBI assigned secondary structure", feature_name)) ||
305        (!StringICmp("NCBI Domains", feature_name)))
306     {
307       for (bsfp = bsfsp->features; bsfp; bsfp = bsfp->next)
308       {
309         cgpp = (ChemGraphPntrsPtr)bsfp->Location_location->data.ptrvalue;
310         rpp = (ResiduePntrsPtr)cgpp->data.ptrvalue;
311         ripp = (ResidueIntervalPntrPtr)rpp->data.ptrvalue;
312         chnidx = findChnidx(ripp->molecule_id, nbp, bp);
313         current_bioseq = bioseqs[chnidx-1];
314         
315         if (!StringICmp("NCBI Domains", feature_name)) DomainNum++;
316         
317         if (current_bioseq->annot)
318           current_bioseq->annot = AddSecDomToSeqAnnot(bsfp, feature_name, current_bioseq->annot, current_bioseq->id, DomainNum);
319         else
320           current_bioseq->annot = AddSecDomToSeqAnnot(bsfp, feature_name, NULL, current_bioseq->id, DomainNum); 
321       }
322     }
323   }
324   for (index = 0, currenthet = het; index < nhet, currenthet; currenthet = currenthet->next, index++)
325   {
326     hetval = MakeHetValNode(currenthet, stdDictionary, bsgp->residue_graphs);
327     bioseq_idx = 0;
328     interchain = FALSE;
329 
330     for (abp = bsgp->inter_molecule_bonds, bonds = FALSE, rescount = 0; abp; abp = abp->next)
331     {
332       molId1 = abp->atom_id_1->molecule_id;
333       molId2 = abp->atom_id_2->molecule_id;
334       resId1 = abp->atom_id_1->residue_id;
335       resId2 = abp->atom_id_2->residue_id;
336       atmId1 = abp->atom_id_1->atom_id;
337       atmId2 = abp->atom_id_2->atom_id;
338       
339       if (isBiopoly(molId1, bp) && isHet(molId2, het))
340       {
341         bpchnidx = molId1 - 1;
342         bpresidx = resId1 - 1;
343         hetidx = getHetIdx(molId2, het);
344         bonds = TRUE;
345       }
346       else if (isBiopoly(molId2, bp) && isHet(molId1, het))
347       {
348         bpchnidx = molId2 - 1;
349         bpresidx = resId2 - 1;
350         hetidx = getHetIdx(molId1, het);
351         bonds = TRUE;
352       }
353       
354       if (bonds)
355       {
356         if (hetidx == index)
357         {
358           if (!rescount) pvnThePoints = NULL;
359           ValNodeAddInt(&pvnThePoints, 0, bpresidx);
360           rescount++;
361           if (bioseq_idx >= 0)
362           {
363              if (bioseq_idx != bpchnidx) interchain = TRUE;
364           }
365           bioseq_idx = bpchnidx;
366         }
367       }
368     }
369     
370     if (rescount)
371     {  
372       if (!interchain)
373       {
374         current_bioseq = bioseqs[bioseq_idx];
375         if (current_bioseq->annot)
376           current_bioseq->annot = AddHetToSeqAnnot(current_bioseq->annot, current_bioseq->id, hetval, pvnThePoints, rescount);
377         else
378           current_bioseq->annot = AddHetToSeqAnnot(NULL, current_bioseq->id, hetval, pvnThePoints, rescount); 
379       }
380       if (interchain)
381       {
382         if (IS_Bioseq(pdb_entry))
383           sap = ((BioseqPtr)(pdb_entry->data.ptrvalue))->annot;
384         else
385           sap = ((BioseqSetPtr)(pdb_entry->data.ptrvalue))->annot;
386         if (sap == NULL)
387         {
388           sap = SeqAnnotNew();
389           sap->type = 1;
390           if (IS_Bioseq(pdb_entry))
391             ((BioseqPtr)(pdb_entry->data.ptrvalue))->annot = sap;
392           else
393             ((BioseqSetPtr)(pdb_entry->data.ptrvalue))->annot = sap;
394          }
395          sap = AddHetToSeqAnnot(sap, bioseqs[bioseq_idx]->id, hetval, pvnThePoints, rescount);
396       }
397     }
398     else
399     {
400       current_bioseq = bioseqs[bioseq_idx];
401       vnp = current_bioseq->descr;
402       if (vnp != NULL)
403       { 
404         while (vnp->next != NULL) vnp = vnp->next;
405         vnp->next = hetval;
406       }
407       else current_bioseq->descr = hetval;
408     }
409   }      
410   
411   mgp = bsgp->molecule_graphs;
412   abp = bsgp->inter_molecule_bonds;
413   
414   while(1)
415   {
416     if (mgp != NULL)
417     { 
418       currentabp = mgp->inter_residue_bonds;
419       mgp = mgp->next;
420     }
421     
422     while (currentabp != NULL)
423     {
424       molId1 = currentabp->atom_id_1->molecule_id;
425       molId2 = currentabp->atom_id_2->molecule_id;
426       resId1 = currentabp->atom_id_1->residue_id;
427       resId2 = currentabp->atom_id_2->residue_id;
428       atmId1 = currentabp->atom_id_1->atom_id;
429       atmId2 = currentabp->atom_id_2->atom_id;
430 
431       interchain = FALSE;
432       found1 = FALSE;
433       found2 = FALSE;
434 
435       if ((getAtomElementIdx(molId1, resId1, atmId1, bsgp, stdDictionary)==16) 
436          && (getAtomElementIdx(molId2, resId2, atmId2, bsgp, stdDictionary)==16))
437       { 
438          /* Found possible disulfide bonds. */
439 
440         if (isBiopoly(molId1, bp) && isBiopoly(molId2, bp))  
441         {
442           currentbp = bp;
443           for (index=0; index<findChnidx(molId1, nbp, bp)-1; index++)
444             currentbp = currentbp->next;
445 
446           rs = currentbp->residue_sequence;
447 
448            while (rs)
449            {
450              if (rs->id == resId1)
451              {
452                rgp = getResGraph(rs->residue_graph, bsgp, stdDictionary);
453                break;
454              }
455            
456             rs = rs->next;
457            }
458            
459           if (vnp = ValNodeFindNext(rgp->descr, NULL, BiomolDescr_name))
460              rname = vnp->data.ptrvalue;
461                  
462           if (!StringICmp(rname, "CYS")) found1 = TRUE;
463 
464           currentbp = bp;
465           for (index = 0; index < findChnidx(molId2, nbp, bp)-1; index++)
466              currentbp = currentbp->next;
467 
468           rs = currentbp->residue_sequence;
469 
470           while (rs)
471           {
472             if (rs->id == resId2)
473             {
474               rgp = getResGraph(rs->residue_graph, bsgp, stdDictionary);
475               break;
476              }
477 
478             rs = rs->next;
479           }
480 
481           if (vnp = ValNodeFindNext(rgp->descr, NULL, BiomolDescr_name))
482             rname = vnp->data.ptrvalue;
483  
484           if (!StringICmp(rname, "CYS")) found2 = TRUE;
485 
486           if (found1 && found2)
487           {
488             ssresidx1 = resId1 - 1;
489             ssresidx2 = resId2 - 1;
490             ssmolidx1 = molId1 - 1;
491             ssmolidx2 = molId2 - 1;
492             chnidx = findChnidx(molId1, nbp, bp);
493             
494             if (ssmolidx1 == ssmolidx2) 
495             {
496               current_bioseq = bioseqs[chnidx - 1];
497               if (current_bioseq->annot)
498                 current_bioseq->annot = AddDisulToSeqAnnot(current_bioseq->annot, ssresidx1, ssresidx2, current_bioseq->id, current_bioseq->id);
499               else
500                 current_bioseq->annot = AddDisulToSeqAnnot(NULL, ssresidx1, ssresidx2, current_bioseq->id, current_bioseq->id);
501             }
502             else
503             {
504               if (IS_Bioseq(pdb_entry))
505                 sap = ((BioseqPtr)(pdb_entry->data.ptrvalue))->annot;
506               else
507                 sap = ((BioseqSetPtr)(pdb_entry->data.ptrvalue))->annot;
508               if (sap == NULL)
509               {
510                 sap = SeqAnnotNew();
511                 sap->type = 1;
512                 if (IS_Bioseq(pdb_entry))
513                   ((BioseqPtr)(pdb_entry->data.ptrvalue))->annot = sap;
514                 else
515                   ((BioseqSetPtr)(pdb_entry->data.ptrvalue))->annot = sap;
516               }
517               sap = AddDisulToSeqAnnot(sap, ssresidx1, ssresidx2, bioseqs[ssmolidx1]->id, bioseqs[ssmolidx2]->id);
518             }   
519           }
520         }
521       }   
522       
523       currentabp = currentabp->next;
524     }
525     
526     if ((currentabp == NULL) && (mgp == NULL) && (abp == NULL)) break;
527     
528     else if((currentabp == NULL) && (mgp == NULL) && (abp != NULL))
529     { 
530       currentabp = abp;
531       abp = NULL;
532     }
533   }
534 
535   return pdb_entry;
536 }
537 
538 
539 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.