|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/biostruc/mkbioseq_vs.c |
source navigation diff markup identifier search freetext search file search |
1 /* mkbioseq_vs.c
2 *
3 * ===========================================================================
4 *
5 * PUBLIC DOMAIN NOTICE
6 * National Center for Biotechnology Information (NCBI)
7 *
8 * This software/database is a "United States Government Work" under the
9 * terms of the United States Copyright Act. It was written as part of
10 * the author's official duties as a United States Government employee and
11 * thus cannot be copyrighted. This software/database is freely available
12 * to the public for use. The National Library of Medicine and the U.S.
13 * Government do not place any restriction on its use or reproduction.
14 * We would, however, appreciate having the NCBI and the author cited in
15 * any work or product based on this material
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * ===========================================================================
26 *
27 * File Name: mkbioseq_vs.c
28 *
29 * Author: Ken Addess
30 *
31 * $Log: mkbioseq_vs.c,v $
32 * Revision 6.2 1999/06/15 18:12:53 addess
33 * fixed some lines related to BioseqPtr
34 *
35 * Revision 6.2 1999/06/15 18:12:53 addess
36 * fixed some lines related to BioseqPtr
37 *
38 * Revision 6.1 1998/07/17 18:59:57 madej
39 * Created by Ken Addess.
40 *
41 */
42
43 /************************************************************/
44 /* */
45 /* mkBioseqs() */
46 /* */
47 /* Creates a Bioseq/BioseqSet object from a Biostruc */
48 /* object and writes it out to a file. */
49 /* */
50 /************************************************************/
51
52
53 #include "mkbioseq.h"
54
55 static Int4 NumberOfBioChains(MoleculeGraphPtr mgp)
56 {
57 ValNodePtr vnp;
58 Int4 mtype, nbp = 0;
59
60 while (mgp != NULL)
61 {
62 vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
63
64 if (vnp) mtype = vnp->data.intvalue;
65
66 switch(mtype)
67 {
68 case 1:
69 case 2:
70 case 3:
71 nbp++;
72 break;
73 }
74
75 mgp = mgp->next;
76 }
77
78 return nbp;
79 }
80
81 static MoleculeGraphPtr MakeBioGraphPtr(MoleculeGraphPtr mgp)
82 {
83 MoleculeGraphPtr newbp, bp = NULL, currentbp;
84 ValNodePtr vnp;
85 Int4 mtype;
86
87 while (mgp != NULL)
88 {
89 vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
90
91 if (vnp) mtype = vnp->data.intvalue;
92
93 switch(mtype)
94 {
95 case 1:
96 case 2:
97 case 3:
98 newbp = MoleculeGraphNew();
99 newbp->id = mgp->id;
100 newbp->descr = mgp->descr;
101 newbp->seq_id = mgp->seq_id;
102 newbp->residue_sequence = mgp->residue_sequence;
103 newbp->inter_residue_bonds = mgp->inter_residue_bonds;
104 if (bp == NULL)
105 {
106 bp = newbp;
107 }
108 else
109 {
110 currentbp->next = newbp;
111 }
112 currentbp = newbp;
113 break;
114 }
115
116 mgp = mgp->next;
117 }
118
119 return bp;
120 }
121
122 static Int4 NumberOfHetChains(MoleculeGraphPtr mgp, MoleculeGraphPtr bp)
123 {
124 ValNodePtr vnp;
125 Int4 mtype, molecule_id, nhet = 0;
126 CharPtr mname;
127
128 while (mgp != NULL)
129 {
130 vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
131
132 if (vnp) mtype = vnp->data.intvalue;
133
134 switch(mtype)
135 {
136 case 6:
137 if (vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_name))
138 mname = vnp->data.ptrvalue;
139 molecule_id = atoi(mname);
140
141 if (isBiopoly(molecule_id, bp))
142 {
143 nhet++;
144 }
145 break;
146 }
147 mgp = mgp->next;
148 }
149 return nhet;
150 }
151
152 static MoleculeGraphPtr MakeHetGraphPtr(MoleculeGraphPtr mgp, MoleculeGraphPtr bp)
153 {
154 MoleculeGraphPtr newhet, het = NULL, currenthet;
155 ValNodePtr vnp;
156 Int4 mtype, molecule_id;
157 CharPtr mname;
158
159 while (mgp != NULL)
160 {
161 vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
162
163 if (vnp) mtype = vnp->data.intvalue;
164
165 switch(mtype)
166 {
167 case 6:
168 vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_name);
169
170 if (vnp) mname = vnp->data.ptrvalue;
171
172 molecule_id = atoi(mname);
173 if (isBiopoly(molecule_id, bp))
174 {
175 newhet = MoleculeGraphNew();
176 newhet->id = mgp->id;
177 newhet->descr = mgp->descr;
178 newhet->seq_id = mgp->seq_id;
179 newhet->residue_sequence = mgp->residue_sequence;
180 newhet->inter_residue_bonds = mgp->inter_residue_bonds;
181 if (het == NULL)
182 {
183 het = newhet;
184 }
185 else
186 {
187 currenthet->next = newhet;
188 }
189 currenthet = newhet;
190 }
191 break;
192 }
193
194 mgp = mgp->next;
195 }
196
197 return het;
198 }
199
200
201 SeqEntryPtr LIBCALL MakeBioseqs(BiostrucPtr bsp, BiostrucResidueGraphSetPtr stdDictionary)
202 {
203 ValNodePtr vnp, seq_set, hetval, pvnThePoints;
204 BiostrucHistoryPtr bhp;
205 BiostrucSourcePtr bssp;
206 BiostrucGraphPtr bsgp;
207 BiostrucModelPtr bsmp;
208 BiostrucFeatureSetPtr bsfsp;
209 BiostrucFeaturePtr bsfp;
210 ChemGraphPntrsPtr cgpp;
211 ResiduePtr rs;
212 ResidueGraphPtr rgp;
213 ResiduePntrsPtr rpp;
214 ResidueIntervalPntrPtr ripp;
215 ResidueExplicitPntrsPtr rpp1=NULL, rpp2=NULL;
216 MoleculeGraphPtr bp, het, currenthet, currentbp, mgp;
217 InterResidueBondPtr currentabp, abp;
218 DbtagPtr dtp;
219 SeqEntryPtr pdb_entry;
220 BioseqSetPtr biossp;
221 BioseqPtr bioseqs[MAXNUM], current_bioseq;
222 Int4 DomainNum, molId1, resId1, atmId1, molId2, resId2, atmId2;
223 Int4 nbp, nhet, num_chain, index = 0, chnidx, bpchnidx, bpresidx, hetidx, rescount, bioseq_idx;
224 Int4 ssresidx1, ssresidx2, ssmolidx1, ssmolidx2;
225 CharPtr feature_name, rname;
226 Boolean interchain, bonds, found1, found2;
227 SeqAnnotPtr sap = NULL;
228 SeqIdPtr sip;
229
230 if (!bsp)
231 {
232 return NULL;
233 }
234
235 vnp = ValNodeFindNext(bsp->descr, NULL, BiostrucDescr_history);
236
237 if (vnp)
238 {
239 bhp = (BiostrucHistoryPtr) vnp->data.ptrvalue;
240 bssp = bhp->data_source;
241 }
242
243 bsgp = bsp->chemical_graph;
244 bsmp = bsp->model;
245
246 nbp = NumberOfBioChains(bsgp->molecule_graphs);
247
248 bp = MakeBioGraphPtr(bsgp->molecule_graphs);
249
250 nhet = NumberOfHetChains(bsgp->molecule_graphs, bp);
251
252 het = MakeHetGraphPtr(bsgp->molecule_graphs, bp);
253
254 pdb_entry = CreateSeqEntry(bssp, bsgp, bsmp, bsp->descr, nbp);
255
256 if (IS_Bioseq(pdb_entry))
257 {
258 vnp = ValNodeFindNext(pdb_entry, NULL, 1);
259 bioseqs[index] = (BioseqPtr) vnp->data.ptrvalue;
260 }
261 else
262 {
263 vnp = ValNodeFindNext(pdb_entry, NULL, 2);
264 biossp = (BioseqSetPtr) vnp->data.ptrvalue;
265 seq_set = biossp->seq_set;
266
267 for (num_chain = 0; num_chain < nbp, seq_set; seq_set = seq_set->next, num_chain++, index++)
268 bioseqs[index] = (BioseqPtr) seq_set->data.ptrvalue;
269 }
270
271 dtp = (DbtagPtr)bssp->database_entry_id->data.ptrvalue;
272
273 for (index = 0, currentbp = bp; index < nbp, currentbp != NULL; currentbp = currentbp->next, index++)
274 {
275 current_bioseq = bioseqs[index];
276 if (currentbp->seq_id->choice == '\f')
277 {
278 current_bioseq->id = MakePDBId(bssp, currentbp, dtp);
279 sip = ValNodeNew(NULL);
280 sip->choice = SEQID_GI;
281 sip->data.intvalue = currentbp->seq_id->data.intvalue;
282 current_bioseq->id->next = sip;
283 }
284 else if (currentbp->seq_id->choice == SEQID_LOCAL)
285 current_bioseq->id = MakeLocalID(-99999, currentbp, dtp);
286 current_bioseq->descr = MakeBioseqDescr(currentbp, current_bioseq->descr);
287 current_bioseq->mol = MakeBioseqMol(currentbp);
288 current_bioseq->length = CountNumOfResidues(currentbp);
289
290 if (current_bioseq->mol == Seq_mol_aa)
291 current_bioseq->seq_data_type = Seq_code_iupacaa;
292 else
293 current_bioseq->seq_data_type = Seq_code_iupacna;
294
295 current_bioseq->seq_data = AddSeqData(currentbp, current_bioseq->mol, current_bioseq->length, bsgp, stdDictionary);
296 current_bioseq->annot = AddNstdSeqAnnot(currentbp, current_bioseq->id, bsgp);
297 }
298 /* Add information about Secondary Structure and Domains */
299 for (bsfsp = bsp->features, DomainNum = 0; bsfsp; bsfsp = bsfsp->next)
300 {
301 if (vnp = ValNodeFindNext(bsfsp->descr, NULL, BiostrucFeatureSetDescr_name))
302 feature_name = vnp->data.ptrvalue;
303
304 if ((!StringICmp("NCBI assigned secondary structure", feature_name)) ||
305 (!StringICmp("NCBI Domains", feature_name)))
306 {
307 for (bsfp = bsfsp->features; bsfp; bsfp = bsfp->next)
308 {
309 cgpp = (ChemGraphPntrsPtr)bsfp->Location_location->data.ptrvalue;
310 rpp = (ResiduePntrsPtr)cgpp->data.ptrvalue;
311 ripp = (ResidueIntervalPntrPtr)rpp->data.ptrvalue;
312 chnidx = findChnidx(ripp->molecule_id, nbp, bp);
313 current_bioseq = bioseqs[chnidx-1];
314
315 if (!StringICmp("NCBI Domains", feature_name)) DomainNum++;
316
317 if (current_bioseq->annot)
318 current_bioseq->annot = AddSecDomToSeqAnnot(bsfp, feature_name, current_bioseq->annot, current_bioseq->id, DomainNum);
319 else
320 current_bioseq->annot = AddSecDomToSeqAnnot(bsfp, feature_name, NULL, current_bioseq->id, DomainNum);
321 }
322 }
323 }
324 for (index = 0, currenthet = het; index < nhet, currenthet; currenthet = currenthet->next, index++)
325 {
326 hetval = MakeHetValNode(currenthet, stdDictionary, bsgp->residue_graphs);
327 bioseq_idx = 0;
328 interchain = FALSE;
329
330 for (abp = bsgp->inter_molecule_bonds, bonds = FALSE, rescount = 0; abp; abp = abp->next)
331 {
332 molId1 = abp->atom_id_1->molecule_id;
333 molId2 = abp->atom_id_2->molecule_id;
334 resId1 = abp->atom_id_1->residue_id;
335 resId2 = abp->atom_id_2->residue_id;
336 atmId1 = abp->atom_id_1->atom_id;
337 atmId2 = abp->atom_id_2->atom_id;
338
339 if (isBiopoly(molId1, bp) && isHet(molId2, het))
340 {
341 bpchnidx = molId1 - 1;
342 bpresidx = resId1 - 1;
343 hetidx = getHetIdx(molId2, het);
344 bonds = TRUE;
345 }
346 else if (isBiopoly(molId2, bp) && isHet(molId1, het))
347 {
348 bpchnidx = molId2 - 1;
349 bpresidx = resId2 - 1;
350 hetidx = getHetIdx(molId1, het);
351 bonds = TRUE;
352 }
353
354 if (bonds)
355 {
356 if (hetidx == index)
357 {
358 if (!rescount) pvnThePoints = NULL;
359 ValNodeAddInt(&pvnThePoints, 0, bpresidx);
360 rescount++;
361 if (bioseq_idx >= 0)
362 {
363 if (bioseq_idx != bpchnidx) interchain = TRUE;
364 }
365 bioseq_idx = bpchnidx;
366 }
367 }
368 }
369
370 if (rescount)
371 {
372 if (!interchain)
373 {
374 current_bioseq = bioseqs[bioseq_idx];
375 if (current_bioseq->annot)
376 current_bioseq->annot = AddHetToSeqAnnot(current_bioseq->annot, current_bioseq->id, hetval, pvnThePoints, rescount);
377 else
378 current_bioseq->annot = AddHetToSeqAnnot(NULL, current_bioseq->id, hetval, pvnThePoints, rescount);
379 }
380 if (interchain)
381 {
382 if (IS_Bioseq(pdb_entry))
383 sap = ((BioseqPtr)(pdb_entry->data.ptrvalue))->annot;
384 else
385 sap = ((BioseqSetPtr)(pdb_entry->data.ptrvalue))->annot;
386 if (sap == NULL)
387 {
388 sap = SeqAnnotNew();
389 sap->type = 1;
390 if (IS_Bioseq(pdb_entry))
391 ((BioseqPtr)(pdb_entry->data.ptrvalue))->annot = sap;
392 else
393 ((BioseqSetPtr)(pdb_entry->data.ptrvalue))->annot = sap;
394 }
395 sap = AddHetToSeqAnnot(sap, bioseqs[bioseq_idx]->id, hetval, pvnThePoints, rescount);
396 }
397 }
398 else
399 {
400 current_bioseq = bioseqs[bioseq_idx];
401 vnp = current_bioseq->descr;
402 if (vnp != NULL)
403 {
404 while (vnp->next != NULL) vnp = vnp->next;
405 vnp->next = hetval;
406 }
407 else current_bioseq->descr = hetval;
408 }
409 }
410
411 mgp = bsgp->molecule_graphs;
412 abp = bsgp->inter_molecule_bonds;
413
414 while(1)
415 {
416 if (mgp != NULL)
417 {
418 currentabp = mgp->inter_residue_bonds;
419 mgp = mgp->next;
420 }
421
422 while (currentabp != NULL)
423 {
424 molId1 = currentabp->atom_id_1->molecule_id;
425 molId2 = currentabp->atom_id_2->molecule_id;
426 resId1 = currentabp->atom_id_1->residue_id;
427 resId2 = currentabp->atom_id_2->residue_id;
428 atmId1 = currentabp->atom_id_1->atom_id;
429 atmId2 = currentabp->atom_id_2->atom_id;
430
431 interchain = FALSE;
432 found1 = FALSE;
433 found2 = FALSE;
434
435 if ((getAtomElementIdx(molId1, resId1, atmId1, bsgp, stdDictionary)==16)
436 && (getAtomElementIdx(molId2, resId2, atmId2, bsgp, stdDictionary)==16))
437 {
438 /* Found possible disulfide bonds. */
439
440 if (isBiopoly(molId1, bp) && isBiopoly(molId2, bp))
441 {
442 currentbp = bp;
443 for (index=0; index<findChnidx(molId1, nbp, bp)-1; index++)
444 currentbp = currentbp->next;
445
446 rs = currentbp->residue_sequence;
447
448 while (rs)
449 {
450 if (rs->id == resId1)
451 {
452 rgp = getResGraph(rs->residue_graph, bsgp, stdDictionary);
453 break;
454 }
455
456 rs = rs->next;
457 }
458
459 if (vnp = ValNodeFindNext(rgp->descr, NULL, BiomolDescr_name))
460 rname = vnp->data.ptrvalue;
461
462 if (!StringICmp(rname, "CYS")) found1 = TRUE;
463
464 currentbp = bp;
465 for (index = 0; index < findChnidx(molId2, nbp, bp)-1; index++)
466 currentbp = currentbp->next;
467
468 rs = currentbp->residue_sequence;
469
470 while (rs)
471 {
472 if (rs->id == resId2)
473 {
474 rgp = getResGraph(rs->residue_graph, bsgp, stdDictionary);
475 break;
476 }
477
478 rs = rs->next;
479 }
480
481 if (vnp = ValNodeFindNext(rgp->descr, NULL, BiomolDescr_name))
482 rname = vnp->data.ptrvalue;
483
484 if (!StringICmp(rname, "CYS")) found2 = TRUE;
485
486 if (found1 && found2)
487 {
488 ssresidx1 = resId1 - 1;
489 ssresidx2 = resId2 - 1;
490 ssmolidx1 = molId1 - 1;
491 ssmolidx2 = molId2 - 1;
492 chnidx = findChnidx(molId1, nbp, bp);
493
494 if (ssmolidx1 == ssmolidx2)
495 {
496 current_bioseq = bioseqs[chnidx - 1];
497 if (current_bioseq->annot)
498 current_bioseq->annot = AddDisulToSeqAnnot(current_bioseq->annot, ssresidx1, ssresidx2, current_bioseq->id, current_bioseq->id);
499 else
500 current_bioseq->annot = AddDisulToSeqAnnot(NULL, ssresidx1, ssresidx2, current_bioseq->id, current_bioseq->id);
501 }
502 else
503 {
504 if (IS_Bioseq(pdb_entry))
505 sap = ((BioseqPtr)(pdb_entry->data.ptrvalue))->annot;
506 else
507 sap = ((BioseqSetPtr)(pdb_entry->data.ptrvalue))->annot;
508 if (sap == NULL)
509 {
510 sap = SeqAnnotNew();
511 sap->type = 1;
512 if (IS_Bioseq(pdb_entry))
513 ((BioseqPtr)(pdb_entry->data.ptrvalue))->annot = sap;
514 else
515 ((BioseqSetPtr)(pdb_entry->data.ptrvalue))->annot = sap;
516 }
517 sap = AddDisulToSeqAnnot(sap, ssresidx1, ssresidx2, bioseqs[ssmolidx1]->id, bioseqs[ssmolidx2]->id);
518 }
519 }
520 }
521 }
522
523 currentabp = currentabp->next;
524 }
525
526 if ((currentabp == NULL) && (mgp == NULL) && (abp == NULL)) break;
527
528 else if((currentabp == NULL) && (mgp == NULL) && (abp != NULL))
529 {
530 currentabp = abp;
531 abp = NULL;
532 }
533 }
534
535 return pdb_entry;
536 }
537
538
539 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |