|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/api/acerdapi.c |
source navigation diff markup identifier search freetext search file search |
1 /*
2 * $Id: acerdapi.c,v 1.15 2008/12/02 17:13:14 bollin Exp $
3 *
4 * ===========================================================================
5 *
6 * PUBLIC DOMAIN NOTICE
7 * National Center for Biotechnology Information
8 *
9 * This software/database is a "United States Government Work" under the
10 * terms of the United States Copyright Act. It was written as part of
11 * the author's official duties as a United States Government employee and
12 * thus cannot be copyrighted. This software/database is freely available
13 * to the public for use. The National Library of Medicine and the U.S.
14 * Government have not placed any restriction on its use or reproduction.
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * Please cite the author in any work or product based on this material.
25 *
26 * ===========================================================================
27 *
28 * Authors: Colleen Bollin
29 *
30 */
31
32
33 #include <stdlib.h>
34 #include <ncbi.h>
35 #include <ncbistr.h>
36 #include <seqport.h>
37 #include <sqnutils.h>
38 #include <gather.h>
39 #include <pmfapi.h>
40 #include <alignmgr2.h>
41 #include <explore.h>
42 #include <aceread.h>
43 #include <acerdapi.h>
44
45
46 /* This constructs an ASN.1 SeqGraph that contains the quality scores from the consensus sequence */
47 static SeqGraphPtr SeqGraphFromContig (TContigPtr contig, BioseqPtr bsp)
48 {
49 SeqGraphPtr sgp;
50 ByteStorePtr bs;
51 Uint1 bytes[128];
52 Int2 max = INT2_MIN;
53 Int2 min = INT2_MAX;
54 Int4 q_pos, b_pos;
55 SeqIntPtr sintp;
56
57 if (contig == NULL || contig->num_qual_scores == 0 || contig->qual_scores == NULL
58 || bsp == NULL) {
59 return NULL;
60 }
61
62 sgp = SeqGraphNew ();
63 bs = BSNew (1000);
64 q_pos = 0;
65 while (q_pos < contig->num_qual_scores) {
66 b_pos = 0;
67 while (b_pos < sizeof (bytes) && q_pos < contig->num_qual_scores) {
68 max = MAX (max, (Int2) contig->qual_scores[q_pos]);
69 min = MIN (min, (Int2) contig->qual_scores[q_pos]);
70 bytes[b_pos++] = (Uint1) contig->qual_scores[q_pos++];
71 }
72 BSWrite (bs, (Pointer) bytes, (Int4) b_pos);
73 }
74 sgp->numval = BSLen (bs);
75 BSPutByte (bs, EOF);
76 sgp->title = StringSave ("Phrap Quality");
77 sgp->flags [0] = 0;
78 sgp->compr = 1;
79 sgp->flags [1] = 0;
80 sgp->flags [2] = 3;
81 sgp->axis.intvalue = 0;
82 sgp->min.intvalue = min;
83 sgp->max.intvalue = max;
84 sgp->a = 1.0;
85 sgp->b = 0;
86 sgp->values = (Pointer) bs;
87
88 sintp = SeqIntNew ();
89 sintp->from = 0;
90 sintp->to = bsp->length - 1;
91 sintp->id = SeqIdDup (bsp->id);
92 ValNodeAddPointer (&(sgp->loc), SEQLOC_INT, (Pointer) sintp);
93
94 return sgp;
95 }
96
97
98 NLM_EXTERN SeqEntryPtr MakeSeqEntryFromRead (TContigReadPtr read)
99 {
100 CharPtr seq_data;
101 SeqIdPtr sip;
102 SeqEntryPtr sep = NULL;
103 BioseqPtr bsp;
104 SeqDescrPtr sdp;
105 MolInfoPtr mip;
106
107 if (read == NULL) {
108 return NULL;
109 }
110
111 seq_data = AlignmentStringToSequenceString (read->read_seq, Seq_mol_na);
112 sip = MakeSeqID (read->read_id);
113 sep = SequenceStringToSeqEntry (seq_data, sip, Seq_mol_na);
114 if (sep != NULL && IS_Bioseq (sep)) {
115 bsp = (BioseqPtr) sep->data.ptrvalue;
116 bsp->mol = Seq_mol_rna;
117 if (read->is_complement) {
118 BioseqRevComp (bsp);
119 }
120 /* add molinfo */
121 sdp = bsp->descr;
122 while (sdp != NULL && sdp->choice != Seq_descr_molinfo) {
123 sdp = sdp->next;
124 }
125 if (sdp == NULL) {
126 sdp = SeqDescrNew (bsp->descr);
127 if (bsp->descr == NULL) {
128 bsp->descr = sdp;
129 }
130 sdp->choice = Seq_descr_molinfo;
131 mip = MolInfoNew ();
132 mip->biomol = MOLECULE_TYPE_MRNA;
133 sdp->data.ptrvalue = mip;
134 } else {
135 mip = (MolInfoPtr) sdp->data.ptrvalue;
136 }
137 mip->tech = MI_TECH_tsa;
138 }
139 return sep;
140 }
141
142
143 NLM_EXTERN SeqEntryPtr MakeSeqEntryFromContig (TContigPtr contig)
144 {
145 CharPtr seq_data;
146 SeqIdPtr sip;
147 SeqEntryPtr sep = NULL;
148 BioseqPtr bsp;
149 SeqGraphPtr sgp;
150 SeqAnnotPtr sap;
151 SeqDescrPtr sdp;
152 MolInfoPtr mip;
153
154 if (contig == NULL) {
155 return NULL;
156 }
157
158 seq_data = AlignmentStringToSequenceString (contig->consensus_seq, Seq_mol_na);
159 sip = MakeSeqID (contig->consensus_id);
160 sep = SequenceStringToSeqEntry (seq_data, sip, Seq_mol_na);
161 if (sep != NULL && IS_Bioseq (sep)) {
162 bsp = (BioseqPtr) sep->data.ptrvalue;
163 bsp->mol = Seq_mol_rna;
164 /* add molinfo */
165 sdp = bsp->descr;
166 while (sdp != NULL && sdp->choice != Seq_descr_molinfo) {
167 sdp = sdp->next;
168 }
169 if (sdp == NULL) {
170 sdp = SeqDescrNew (bsp->descr);
171 if (bsp->descr == NULL) {
172 bsp->descr = sdp;
173 }
174 sdp->choice = Seq_descr_molinfo;
175 mip = MolInfoNew ();
176 mip->biomol = MOLECULE_TYPE_MRNA;
177 sdp->data.ptrvalue = mip;
178 } else {
179 mip = (MolInfoPtr) sdp->data.ptrvalue;
180 }
181 mip->tech = MI_TECH_tsa;
182
183 sgp = SeqGraphFromContig (contig, bsp);
184 if (sgp != NULL) {
185 sap = SeqAnnotNew ();
186 sap->type = 3;
187 sap->data = sgp;
188 sap->next = bsp->annot;
189 bsp->annot = sap;
190 }
191 }
192 return sep;
193 }
194
195
196 /* This function compares a string of nucleotide characters to an existing Bioseq */
197 static Boolean DoesSeqStringMatchBsp (CharPtr seq_str, BioseqPtr bsp, Uint1 strand)
198 {
199 Char buf[51];
200 CharPtr cp_s, cp_b;
201 Int4 ctr, pos = 0, i, len, seq_len;
202
203 if (seq_str == NULL || bsp == NULL) return FALSE;
204 cp_s = seq_str;
205 len = sizeof (buf) - 1;
206 seq_len = StringLen (seq_str);
207
208 while (pos < bsp->length) {
209 if (strand == Seq_strand_minus) {
210 ctr = SeqPortStreamInt (bsp, MAX (0, bsp->length - pos - len), bsp->length - pos - 1, Seq_strand_minus,
211 STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
212 (Pointer) buf, NULL);
213 } else {
214 ctr = SeqPortStreamInt (bsp, pos, MIN(pos + len - 1, bsp->length - 1), Seq_strand_plus,
215 STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
216 (Pointer) buf, NULL);
217 }
218
219 for (i = 0, cp_b = buf; i < ctr && *cp_s != 0; i++, cp_b++) {
220 while (*cp_s == '*') cp_s++;
221 if (*cp_s != *cp_b) return FALSE;
222 cp_s++;
223 }
224 if (ctr < len) {
225 return TRUE;
226 } else {
227 pos = pos + len;
228 }
229 }
230 if (*cp_s != 0) return FALSE;
231 return TRUE;
232 }
233
234
235 static Int4 GetTraceID (SeqIdPtr sip)
236 {
237 DbtagPtr dbtag;
238
239 if (sip == NULL || sip->choice != SEQID_GENERAL) return 0;
240 dbtag = (DbtagPtr) sip->data.ptrvalue;
241 if (dbtag == NULL || StringCmp (dbtag->db, "ti") != 0 || dbtag->tag == NULL) {
242 return 0;
243 }
244 return dbtag->tag->id;
245 }
246
247
248 static Int4 GetTraceIDFromIdList (SeqIdPtr sip)
249 {
250 Int4 ti = 0;
251
252 while (sip != NULL && ti == 0) {
253 ti = GetTraceID (sip);
254 sip = sip->next;
255 }
256 return ti;
257 }
258
259
260
261 /* This function retrieves a sequence. It would be better to use BioseqLockById. */
262 static SeqEntryPtr FetchRead (SeqIdPtr sip)
263 {
264 Uint4 tid = 0;
265 Int4 uid = 0;
266 SeqEntryPtr sep = NULL;
267
268 if (sip == NULL) return NULL;
269
270 tid = GetTraceID (sip);
271 if (tid > 0) {
272 sep = PubSeqSynchronousQueryTI (tid, 0, -1);
273 } else {
274 uid = GetGIForSeqId (sip);
275 if (uid > 0) {
276 sep = PubSeqSynchronousQuery (uid, 0, -1);
277 }
278 }
279
280 return sep;
281 }
282
283
284
285 static SeqIdPairPtr SeqIdPairNew ()
286 {
287 SeqIdPairPtr pair;
288
289 pair = (SeqIdPairPtr) MemNew (sizeof (SeqIdPairData));
290 pair->sip_find = NULL;
291 pair->sip_replace = NULL;
292 return pair;
293 }
294
295
296 static SeqIdPairPtr SeqIdPairFree (SeqIdPairPtr pair)
297 {
298 if (pair != NULL) {
299 pair->sip_find = SeqIdFree (pair->sip_find);
300 pair->sip_replace = SeqIdFree (pair->sip_replace);
301 pair = MemFree (pair);
302 }
303 return pair;
304 }
305
306
307 static int SeqIdPairCompare (SeqIdPairPtr sp1, SeqIdPairPtr sp2)
308 {
309 if (sp1 == NULL || sp2 == NULL) {
310 return 0;
311 }
312 return StringICmp (sp1->buf_find, sp2->buf_find);
313 }
314
315
316 static int LIBCALLBACK SortSeqIdPairList (VoidPtr ptr1, VoidPtr ptr2)
317
318 {
319 ValNodePtr vnp1;
320 ValNodePtr vnp2;
321 int rval = 0;
322
323 if (ptr1 != NULL && ptr2 != NULL) {
324 vnp1 = *((ValNodePtr PNTR) ptr1);
325 vnp2 = *((ValNodePtr PNTR) ptr2);
326 if (vnp1 != NULL && vnp2 != NULL) {
327 rval = SeqIdPairCompare (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
328 }
329 }
330 return rval;
331 }
332
333
334 static ValNodePtr SeqIdPairListFree (ValNodePtr pair_list)
335 {
336 ValNodePtr vnp_next;
337
338 while (pair_list != NULL) {
339 vnp_next = pair_list->next;
340 pair_list->data.ptrvalue = SeqIdPairFree (pair_list->data.ptrvalue);
341 pair_list->next = NULL;
342 pair_list = ValNodeFree (pair_list);
343 pair_list = vnp_next;
344 }
345 return pair_list;
346 }
347
348
349 static SeqIdReplaceListPtr SeqIdReplaceListNew (ValNodePtr id_list)
350 {
351 SeqIdReplaceListPtr replace_list;
352 SeqIdPairPtr pair;
353 Int4 i;
354
355 replace_list = (SeqIdReplaceListPtr) MemNew (sizeof (SeqIdReplaceListData));
356 replace_list->num_ids = ValNodeLen (id_list);
357 replace_list->list = (SeqIdPairPtr) MemNew (sizeof (SeqIdPairData) * replace_list->num_ids);
358 for (i = 0; id_list != NULL; id_list = id_list->next, i++) {
359 pair = (SeqIdPairPtr) id_list->data.ptrvalue;
360 replace_list->list[i].sip_find = SeqIdDup (pair->sip_find);
361 StringCpy (replace_list->list[i].buf_find, pair->buf_find);
362 replace_list->list[i].sip_replace = SeqIdDup (pair->sip_replace);
363 replace_list->list[i].is_complement = pair->is_complement;
364 replace_list->list[i].trim5 = pair->trim5;
365 replace_list->list[i].trim3 = pair->trim3;
366 replace_list->list[i].is_consensus = pair->is_consensus;
367 replace_list->list[i].ti = pair->ti;
368 }
369 return replace_list;
370 }
371
372
373 NLM_EXTERN SeqIdReplaceListPtr SeqIdReplaceListFree (SeqIdReplaceListPtr replace_list)
374 {
375 Int4 i;
376 if (replace_list != NULL) {
377 for (i = 0; i < replace_list->num_ids; i++) {
378 replace_list->list[i].sip_find = SeqIdFree (replace_list->list[i].sip_find);
379 replace_list->list[i].sip_replace = SeqIdFree (replace_list->list[i].sip_replace);
380 }
381 replace_list->list = MemFree (replace_list->list);
382 replace_list = MemFree (replace_list);
383 }
384 return replace_list;
385 }
386
387
388 NLM_EXTERN SeqIdReplaceListPtr ReadSeqIdPairListFromFile (FILE *fp)
389 {
390 ReadBufferData rbd;
391 CharPtr linestring, cp, id2, buf = NULL;
392 Int4 len, buf_len = 0;
393 SeqIdPairPtr pair;
394 ValNodePtr pair_list = NULL, last = NULL, vnp;
395 SeqIdReplaceListPtr replace_list = NULL;
396
397 if (fp == NULL) return NULL;
398
399 rbd.fp = fp;
400 rbd.current_data = NULL;
401
402 linestring = AbstractReadFunction (&rbd);
403 while (linestring != NULL && linestring[0] != EOF) {
404 cp = linestring + StringSpn (linestring, " \t");
405 if (*cp != 0) {
406 len = StringCSpn (cp, " \t");
407 id2 = cp + len + StringSpn (cp + len, " \t");
408 if (*id2 != 0) {
409 if (len + 1 > buf_len) {
410 buf = MemFree (buf);
411 buf_len = len + 1;
412 buf = (CharPtr) MemNew (sizeof (Char) * buf_len);
413 }
414 StringNCpy (buf, cp, len);
415 buf[len] = 0;
416 pair = SeqIdPairNew ();
417 pair->sip_find = MakeSeqID (buf);
418 SeqIdWrite (pair->sip_find, pair->buf_find, PRINTID_REPORT, sizeof (pair->buf_find) - 1);
419 pair->sip_replace = MakeSeqID (id2);
420 vnp = ValNodeNew (NULL);
421 vnp->data.ptrvalue = pair;
422 if (last == NULL) {
423 pair_list = vnp;
424 } else {
425 last->next = vnp;
426 }
427 last = vnp;
428 }
429 }
430 free (linestring);
431 linestring = AbstractReadFunction (&rbd);
432 }
433 pair_list = ValNodeSort (pair_list, SortSeqIdPairList);
434
435 replace_list = SeqIdReplaceListNew (pair_list);
436 pair_list = SeqIdPairListFree (pair_list);
437
438 return replace_list;
439 }
440
441
442 static SeqIdPairPtr FindReplacementInSeqIdReplaceList (SeqIdPtr sip, SeqIdReplaceListPtr pair_list)
443 {
444 Int4 l, r, m;
445 Char buf_find[100];
446 int cmp;
447
448 if (sip == NULL || pair_list == NULL) return NULL;
449
450 SeqIdWrite (sip, buf_find, PRINTID_REPORT, sizeof (buf_find) - 1);
451 l = 0;
452 r = pair_list->num_ids - 1;
453 m = (r + l) / 2;
454
455 while ((cmp = StringICmp (buf_find, pair_list->list[m].buf_find)) != 0 && l <= r) {
456 if (cmp < 0) {
457 r = m - 1;
458 } else {
459 l = m + 1;
460 }
461 m = (r + l) / 2;
462 }
463 if (cmp == 0) {
464 return pair_list->list + m;
465 } else {
466 return NULL;
467 }
468 }
469
470
471
472 static void ReportInvalidReplacement (SeqIdPtr sip, CharPtr reason, char *has_errors)
473 {
474 Char buf[128];
475
476 SeqIdWrite (sip, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1);
477 PrintACEFormatErrorXMLStart (buf, has_errors);
478 printf ("%s", reason);
479 PrintACEFormatErrorXMLEnd ();
480 }
481
482
483 static Boolean OkToReplaceId (SeqIdPairPtr pair, CharPtr seq_str, char *has_errors)
484 {
485 Boolean rval = FALSE;
486 SeqEntryPtr fetched_sep, old_scope;
487 BioseqPtr bsp_replace;
488
489 if (StringHasNoText (seq_str)) {
490 rval = FALSE;
491 }
492
493 if (pair == NULL || pair->sip_replace == NULL) {
494 rval = FALSE;
495 } else if ((fetched_sep = FetchRead (pair->sip_replace)) == NULL) {
496 rval = FALSE;
497 ReportInvalidReplacement (pair->sip_replace, "Unable to fetch far sequence", has_errors);
498 } else {
499 old_scope = SeqEntrySetScope (fetched_sep);
500 bsp_replace = BioseqFind (pair->sip_replace);
501 SeqEntrySetScope (old_scope);
502 if (bsp_replace == NULL) {
503 rval = FALSE;
504 ReportInvalidReplacement (pair->sip_replace, "Unable to locate far sequence after fetch", has_errors);
505 } else if (DoesSeqStringMatchBsp (seq_str, bsp_replace, Seq_strand_plus)) {
506 /* matches */
507 rval = TRUE;
508 pair->ti = GetTraceIDFromIdList (bsp_replace->id);
509 } else if (DoesSeqStringMatchBsp (seq_str, bsp_replace, Seq_strand_minus)) {
510 /* matches on complement */
511 pair->is_complement = TRUE;
512 rval = TRUE;
513 pair->ti = GetTraceIDFromIdList (bsp_replace->id);
514 } else {
515 /* later, are we going to try to find trim lengths? */
516 rval = FALSE;
517 ReportInvalidReplacement (pair->sip_replace, "Replacement does not match local", has_errors);
518 }
519 SeqEntryFree (fetched_sep);
520 }
521 return rval;
522 }
523
524
525 static Boolean UpdateContigReadId (TContigReadPtr read, SeqIdReplaceListPtr pair_list, Boolean no_lookup, Boolean is_srr, char *has_errors)
526 {
527 SeqIdPairPtr pair;
528 SeqIdPtr sip_find;
529 Char id_buf[255];
530 Boolean rval = TRUE;
531
532 if (read == NULL || StringHasNoText (read->read_id)) {
533 rval = FALSE;
534 } else {
535 sip_find = MakeSeqID (read->read_id);
536 pair = FindReplacementInSeqIdReplaceList (sip_find, pair_list);
537 if (pair != NULL && (no_lookup || OkToReplaceId (pair, read->read_seq, has_errors))) {
538 if (pair->is_complement) {
539 if (read->is_complement) {
540 read->is_complement = FALSE;
541 } else {
542 read->is_complement = TRUE;
543 }
544 }
545 if (pair->ti > 0) {
546 read->ti = pair->ti;
547 } else {
548 if (pair->sip_replace->choice == SEQID_LOCAL) {
549 SeqIdWrite (pair->sip_replace, id_buf, PRINTID_REPORT, sizeof (id_buf) - 1);
550 } else {
551 SeqIdWrite (pair->sip_replace, id_buf, PRINTID_FASTA_LONG, sizeof (id_buf) - 1);
552 }
553 if (is_srr) {
554 if (read->srr != NULL) {
555 free (read->srr);
556 }
557 read->srr = malloc (sizeof (Char) * (StringLen (id_buf) + 1));
558 sprintf (read->srr, "%s", id_buf);
559 free (read->read_id);
560 read->read_id = NULL;
561 } else {
562 free (read->read_id);
563 read->read_id = malloc (sizeof (Char) * (StringLen (id_buf) + 1));
564 sprintf (read->read_id, "%s", id_buf);
565 }
566 }
567 read->local = FALSE;
568 }
569 sip_find = SeqIdFree (sip_find);
570 }
571 return rval;
572 }
573
574
575 NLM_EXTERN Boolean UpdateContigIds (TContigPtr contig, SeqIdReplaceListPtr pair_list, Boolean no_lookup, Boolean is_srr, char *has_errors)
576 {
577 Int4 i;
578 SeqIdPairPtr pair;
579 SeqIdPtr sip_find;
580 Char id_buf[255];
581 Boolean rval = TRUE;
582
583 if (contig == NULL) return FALSE;
584 if (pair_list == NULL) return TRUE;
585
586 if (contig->consensus_id != NULL) {
587 sip_find = MakeSeqID (contig->consensus_id);
588 pair = FindReplacementInSeqIdReplaceList (sip_find, pair_list);
589 if (pair != NULL && (no_lookup || OkToReplaceId (pair, contig->consensus_seq, has_errors))) {
590 if (pair->is_complement) {
591 if (contig->is_complement) {
592 contig->is_complement = FALSE;
593 } else {
594 contig->is_complement = TRUE;
595 }
596 }
597 SeqIdWrite (pair->sip_replace, id_buf, PRINTID_FASTA_LONG, sizeof (id_buf) - 1);
598 free (contig->consensus_id);
599 contig->consensus_id = malloc (sizeof (Char) * (StringLen (id_buf) + 1));
600 sprintf (contig->consensus_id, "%s", id_buf);
601 } else {
602 rval = FALSE;
603 }
604 sip_find = SeqIdFree (sip_find);
605 }
606 for (i = 0; i < contig->num_reads; i++) {
607 rval &= UpdateContigReadId (contig->reads[i], pair_list, no_lookup, is_srr, has_errors);
608 }
609 return rval;
610 }
611
612
613 NLM_EXTERN Boolean UpdateAceFileIds (TACEFilePtr afp, FILE *id_file, Boolean no_lookup, Boolean is_srr, char *has_errors)
614 {
615 Boolean rval = TRUE;
616 SeqIdReplaceListPtr pair_list;
617 SeqEntryPtr old_scope;
618 Int4 i;
619
620 if (afp == NULL || id_file == NULL) return FALSE;
621 old_scope = SeqEntrySetScope (NULL);
622 pair_list = ReadSeqIdPairListFromFile (id_file);
623 for (i = 0; i < afp->num_contigs; i++) {
624 rval &= UpdateContigIds (afp->contigs[i], pair_list, no_lookup, is_srr, has_errors);
625 }
626
627 pair_list = SeqIdReplaceListFree (pair_list);
628 SeqEntrySetScope (old_scope);
629 return rval;
630 }
631
632
633 static Boolean ValidateContigReadId (TContigReadPtr read, char *has_errors)
634 {
635 SeqIdPairData pair;
636 Char id_buf[255];
637 Boolean rval = TRUE;
638
639 if (read == NULL || StringHasNoText (read->read_id)) {
640 rval = FALSE;
641 } else if (!read->local) {
642 rval = TRUE;
643 } else {
644 pair.sip_find = NULL;
645 pair.is_complement = FALSE;
646 pair.is_consensus = FALSE;
647 pair.trim3 = 0;
648 pair.trim5 = 0;
649 pair.sip_replace = MakeSeqID (read->read_id);
650 pair.ti = 0;
651 if (OkToReplaceId (&pair, read->read_seq, has_errors)) {
652 if (pair.is_complement && !read->is_complement) {
653 read->is_complement = TRUE;
654 } else if (!pair.is_complement && read->is_complement) {
655 read->is_complement = FALSE;
656 }
657 if (pair.ti > 0) {
658 read->ti = pair.ti;
659 } else {
660 SeqIdWrite (pair.sip_replace, id_buf, PRINTID_FASTA_LONG, sizeof (id_buf) - 1);
661 free (read->read_id);
662 read->read_id = malloc (sizeof (Char) * (StringLen (id_buf) + 1));
663 sprintf (read->read_id, "%s", id_buf);
664 }
665 read->local = FALSE;
666 }
667 pair.sip_replace = SeqIdFree (pair.sip_replace);
668 }
669 return rval;
670 }
671
672
673 static Boolean ValidateContigIds (TContigPtr contig, char *has_errors)
674 {
675 Int4 i;
676 Boolean rval = TRUE;
677
678 if (contig == NULL) return FALSE;
679
680 if (contig->consensus_id != NULL) {
681 /* check consensus later... */
682 }
683 for (i = 0; i < contig->num_reads; i++) {
684 rval &= ValidateContigReadId (contig->reads[i], has_errors);
685 }
686 return rval;
687 }
688
689
690 NLM_EXTERN Boolean ValidateAceFileIds (TACEFilePtr afp, char *has_errors)
691 {
692 Boolean rval = TRUE;
693 SeqEntryPtr old_scope;
694 Int4 i;
695
696 if (afp == NULL) return FALSE;
697 old_scope = SeqEntrySetScope (NULL);
698 for (i = 0; i < afp->num_contigs; i++) {
699 rval &= ValidateContigIds (afp->contigs[i], has_errors);
700 }
701
702 SeqEntrySetScope (old_scope);
703 return rval;
704 }
705
706
707 NLM_EXTERN ValNodePtr GetTransitionsFromGapInfo (TGapInfoPtr gaps, Int4 offset, Int4 seq_offset, Int4 seq_len)
708 {
709 ValNodePtr list = NULL;
710 Int4 i = 0, tiling_pos = offset, seq_pos = 0, diff = 0;
711 Boolean added_gap = FALSE;
712
713 /* add a transition to the list for where a sequence "begins" in the alignment, if not at 0 */
714 if (seq_offset == 0) {
715 if (tiling_pos > 0) {
716 ValNodeAddInt (&list, 0, tiling_pos);
717 }
718 } else {
719 /* if seq_offset causes sequence to "start" in the middle of a between-gap interval, add a transition for it */
720 if (gaps == NULL || gaps->num_gaps == 0) {
721 ValNodeAddInt (&list, 0, tiling_pos + seq_offset);
722 } else {
723 while (seq_pos < seq_offset && i < gaps->num_gaps && !added_gap) {
724 if (seq_pos + gaps->gap_offsets[i] <= seq_offset) {
725 tiling_pos += gaps->gap_offsets[i] + 1;
726 seq_pos += gaps->gap_offsets[i];
727 diff += gaps->gap_offsets[i];
728 i++;
729 } else {
730 ValNodeAddInt (&list, 0, tiling_pos + seq_offset);
731 added_gap = TRUE;
732 }
733 }
734 }
735 }
736
737 if (gaps != NULL) {
738 while (i < gaps->num_gaps) {
739 seq_pos += gaps->gap_offsets[i];
740 if (gaps->gap_offsets[i] > 0) {
741 tiling_pos += gaps->gap_offsets[i];
742 ValNodeAddInt (&list, 0, tiling_pos);
743 }
744 tiling_pos++;
745 if (gaps->num_gaps == i + 1
746 || gaps->gap_offsets[i + 1] > 0) {
747 ValNodeAddInt (&list, 0, tiling_pos);
748 }
749 i++;
750 }
751 }
752 if (seq_pos < seq_len) {
753 ValNodeAddInt (&list, 0, tiling_pos + seq_len - seq_pos);
754 }
755 return list;
756 }
757
758
759 static Boolean ValidateContigAgainstSeqEntry (TContigPtr contig, SeqEntryPtr sep, char *has_errors)
760 {
761 CharPtr seq_data = NULL;
762 SeqIdPtr sip;
763 BioseqPtr bsp;
764 Boolean rval = FALSE;
765
766 if (contig == NULL || sep == NULL) {
767 return FALSE;
768 }
769
770 seq_data = AlignmentStringToSequenceString (contig->consensus_seq, Seq_mol_na);
771 sip = MakeSeqID (contig->consensus_id);
772
773 bsp = BioseqFind (sip);
774 if (bsp == NULL) {
775 PrintACEFormatErrorXML ("not found in supplied SeqEntry", contig->consensus_id, has_errors);
776 } else if (!DoesSeqStringMatchBsp (seq_data, bsp, Seq_strand_plus)) {
777 PrintACEFormatErrorXML ("does not match sequence in supplied SeqEntry", contig->consensus_id, has_errors);
778 } else {
779 rval = TRUE;
780 }
781 seq_data = MemFree (seq_data);
782 return rval;
783 }
784
785
786 NLM_EXTERN Boolean ValidateACEFileAgainstSeqEntry (TACEFilePtr ace_file, SeqEntryPtr sep, char *has_errors)
787 {
788 Boolean rval = TRUE;
789 Int4 i;
790 SeqEntryPtr oldscope;
791
792 if (ace_file == NULL || sep == NULL) {
793 return FALSE;
794 }
795
796 oldscope = SeqEntrySetScope (sep);
797
798 for (i = 0; i < ace_file->num_contigs; i++) {
799 rval |= ValidateContigAgainstSeqEntry (ace_file->contigs[i], sep, has_errors);
800 }
801 SeqEntrySetScope (oldscope);
802 return rval;
803 }
804
805 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |