NCBI C Toolkit Cross Reference

C/api/acerdapi.c


  1 /*
  2  * $Id: acerdapi.c,v 1.15 2008/12/02 17:13:14 bollin Exp $
  3  *
  4  * ===========================================================================
  5  *
  6  *                            PUBLIC DOMAIN NOTICE
  7  *               National Center for Biotechnology Information
  8  *
  9  *  This software/database is a "United States Government Work" under the
 10  *  terms of the United States Copyright Act.  It was written as part of
 11  *  the author's official duties as a United States Government employee and
 12  *  thus cannot be copyrighted.  This software/database is freely available
 13  *  to the public for use. The National Library of Medicine and the U.S.
 14  *  Government have not placed any restriction on its use or reproduction.
 15  *
 16  *  Although all reasonable efforts have been taken to ensure the accuracy
 17  *  and reliability of the software and data, the NLM and the U.S.
 18  *  Government do not and cannot warrant the performance or results that
 19  *  may be obtained by using this software or data. The NLM and the U.S.
 20  *  Government disclaim all warranties, express or implied, including
 21  *  warranties of performance, merchantability or fitness for any particular
 22  *  purpose.
 23  *
 24  *  Please cite the author in any work or product based on this material.
 25  *
 26  * ===========================================================================
 27  *
 28  * Authors:  Colleen Bollin
 29  *
 30  */
 31 
 32 
 33 #include <stdlib.h>
 34 #include <ncbi.h>
 35 #include <ncbistr.h>
 36 #include <seqport.h>
 37 #include <sqnutils.h>
 38 #include <gather.h>
 39 #include <pmfapi.h>
 40 #include <alignmgr2.h>
 41 #include <explore.h>
 42 #include <aceread.h>
 43 #include <acerdapi.h>
 44 
 45 
 46 /* This constructs an ASN.1 SeqGraph that contains the quality scores from the consensus sequence */
 47 static SeqGraphPtr SeqGraphFromContig (TContigPtr contig, BioseqPtr bsp)
 48 {
 49   SeqGraphPtr       sgp;
 50   ByteStorePtr      bs;
 51   Uint1             bytes[128]; 
 52   Int2              max = INT2_MIN;
 53   Int2              min = INT2_MAX;
 54   Int4              q_pos, b_pos;
 55   SeqIntPtr         sintp;
 56 
 57   if (contig == NULL || contig->num_qual_scores == 0 || contig->qual_scores == NULL
 58       || bsp == NULL) {
 59     return NULL;
 60   }
 61 
 62   sgp = SeqGraphNew ();
 63   bs = BSNew (1000);
 64   q_pos = 0;
 65   while (q_pos < contig->num_qual_scores) {
 66     b_pos = 0;
 67     while (b_pos < sizeof (bytes) && q_pos < contig->num_qual_scores) {
 68       max = MAX (max, (Int2) contig->qual_scores[q_pos]);
 69       min = MIN (min, (Int2) contig->qual_scores[q_pos]);
 70       bytes[b_pos++] = (Uint1) contig->qual_scores[q_pos++];
 71     }
 72     BSWrite (bs, (Pointer) bytes, (Int4) b_pos);
 73   }
 74   sgp->numval = BSLen (bs);
 75   BSPutByte (bs, EOF);
 76   sgp->title = StringSave ("Phrap Quality");
 77   sgp->flags [0] = 0;
 78   sgp->compr = 1;
 79   sgp->flags [1] = 0;
 80   sgp->flags [2] = 3;
 81   sgp->axis.intvalue = 0;
 82   sgp->min.intvalue = min;
 83   sgp->max.intvalue = max;
 84   sgp->a = 1.0;
 85   sgp->b = 0;
 86   sgp->values = (Pointer) bs;
 87 
 88   sintp = SeqIntNew ();
 89   sintp->from = 0;
 90   sintp->to = bsp->length - 1;
 91   sintp->id = SeqIdDup (bsp->id);
 92   ValNodeAddPointer (&(sgp->loc), SEQLOC_INT, (Pointer) sintp);
 93 
 94   return sgp;
 95 }
 96 
 97 
 98 NLM_EXTERN SeqEntryPtr MakeSeqEntryFromRead (TContigReadPtr read)
 99 {
100   CharPtr seq_data;
101   SeqIdPtr sip;
102   SeqEntryPtr sep = NULL;
103   BioseqPtr   bsp;
104   SeqDescrPtr sdp;
105   MolInfoPtr  mip;
106 
107   if (read == NULL) {
108     return NULL;
109   }
110 
111   seq_data = AlignmentStringToSequenceString (read->read_seq, Seq_mol_na);
112   sip = MakeSeqID (read->read_id);
113   sep = SequenceStringToSeqEntry (seq_data, sip, Seq_mol_na);
114   if (sep != NULL && IS_Bioseq (sep)) {
115     bsp = (BioseqPtr) sep->data.ptrvalue;
116     bsp->mol = Seq_mol_rna;
117     if (read->is_complement) {
118       BioseqRevComp (bsp);
119     }
120     /* add molinfo */
121     sdp = bsp->descr;
122     while (sdp != NULL && sdp->choice != Seq_descr_molinfo) {
123       sdp = sdp->next;
124     }
125     if (sdp == NULL) {
126       sdp = SeqDescrNew (bsp->descr);
127       if (bsp->descr == NULL) {
128         bsp->descr = sdp;
129       }
130       sdp->choice = Seq_descr_molinfo;
131       mip = MolInfoNew ();
132       mip->biomol = MOLECULE_TYPE_MRNA;
133       sdp->data.ptrvalue = mip;
134     } else {
135       mip = (MolInfoPtr) sdp->data.ptrvalue;
136     }
137     mip->tech = MI_TECH_tsa;
138   }
139   return sep;
140 }
141 
142 
143 NLM_EXTERN SeqEntryPtr MakeSeqEntryFromContig (TContigPtr contig)
144 {
145   CharPtr seq_data;
146   SeqIdPtr sip;
147   SeqEntryPtr sep = NULL;
148   BioseqPtr   bsp;
149   SeqGraphPtr sgp;
150   SeqAnnotPtr sap;
151   SeqDescrPtr sdp;
152   MolInfoPtr  mip;
153 
154   if (contig == NULL) {
155     return NULL;
156   }
157 
158   seq_data = AlignmentStringToSequenceString (contig->consensus_seq, Seq_mol_na);
159   sip = MakeSeqID (contig->consensus_id);
160   sep = SequenceStringToSeqEntry (seq_data, sip, Seq_mol_na);
161   if (sep != NULL && IS_Bioseq (sep)) {
162     bsp = (BioseqPtr) sep->data.ptrvalue;
163     bsp->mol = Seq_mol_rna;
164     /* add molinfo */
165     sdp = bsp->descr;
166     while (sdp != NULL && sdp->choice != Seq_descr_molinfo) {
167       sdp = sdp->next;
168     }
169     if (sdp == NULL) {
170       sdp = SeqDescrNew (bsp->descr);
171       if (bsp->descr == NULL) {
172         bsp->descr = sdp;
173       }
174       sdp->choice = Seq_descr_molinfo;
175       mip = MolInfoNew ();
176       mip->biomol = MOLECULE_TYPE_MRNA;
177       sdp->data.ptrvalue = mip;
178     } else {
179       mip = (MolInfoPtr) sdp->data.ptrvalue;
180     }
181     mip->tech = MI_TECH_tsa;
182 
183     sgp = SeqGraphFromContig (contig, bsp);
184     if (sgp != NULL) {
185       sap = SeqAnnotNew ();
186       sap->type = 3;
187       sap->data = sgp;
188       sap->next = bsp->annot;
189       bsp->annot = sap;
190     }
191   }
192   return sep;
193 }
194 
195 
196 /* This function compares a string of nucleotide characters to an existing Bioseq */
197 static Boolean DoesSeqStringMatchBsp (CharPtr seq_str, BioseqPtr bsp, Uint1 strand)
198 {
199   Char buf[51];
200   CharPtr cp_s, cp_b;
201   Int4 ctr, pos = 0, i, len, seq_len;
202 
203   if (seq_str == NULL || bsp == NULL) return FALSE;
204   cp_s = seq_str;
205   len = sizeof (buf) - 1;
206   seq_len = StringLen (seq_str);
207   
208   while (pos < bsp->length) {
209     if (strand == Seq_strand_minus) {
210       ctr = SeqPortStreamInt (bsp, MAX (0,  bsp->length - pos - len), bsp->length - pos - 1, Seq_strand_minus,
211                             STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
212                             (Pointer) buf, NULL);
213     } else {
214       ctr = SeqPortStreamInt (bsp, pos, MIN(pos + len - 1, bsp->length - 1), Seq_strand_plus,
215                             STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL,
216                             (Pointer) buf, NULL);
217     }
218 
219     for (i = 0, cp_b = buf; i < ctr && *cp_s != 0; i++, cp_b++) { 
220       while (*cp_s == '*') cp_s++;
221       if (*cp_s != *cp_b) return FALSE;
222       cp_s++;
223     }
224     if (ctr < len) {
225       return TRUE;
226     } else {
227       pos = pos + len;
228     }
229   }
230   if (*cp_s != 0) return FALSE;
231   return TRUE;                          
232 }
233 
234 
235 static Int4 GetTraceID (SeqIdPtr sip)
236 {
237   DbtagPtr dbtag;
238 
239   if (sip == NULL || sip->choice != SEQID_GENERAL) return 0;
240   dbtag = (DbtagPtr) sip->data.ptrvalue;
241   if (dbtag == NULL || StringCmp (dbtag->db, "ti") != 0 || dbtag->tag == NULL) {
242     return 0;
243   } 
244   return dbtag->tag->id;
245 }
246 
247 
248 static Int4 GetTraceIDFromIdList (SeqIdPtr sip)
249 {
250   Int4 ti = 0;
251 
252   while (sip != NULL && ti == 0) {
253     ti = GetTraceID (sip);
254     sip = sip->next;
255   }
256   return ti;
257 }
258 
259  
260 
261 /* This function retrieves a sequence.  It would be better to use BioseqLockById. */
262 static SeqEntryPtr FetchRead (SeqIdPtr sip)
263 {
264   Uint4       tid = 0;
265   Int4        uid = 0;
266   SeqEntryPtr sep = NULL;
267 
268   if (sip == NULL) return NULL;
269 
270   tid = GetTraceID (sip);
271   if (tid > 0) {
272     sep = PubSeqSynchronousQueryTI (tid, 0, -1);
273   } else {
274     uid = GetGIForSeqId (sip);
275     if (uid > 0) {
276       sep = PubSeqSynchronousQuery (uid, 0, -1);
277     }
278   }
279   
280   return sep;
281 }
282 
283 
284 
285 static SeqIdPairPtr SeqIdPairNew ()
286 {
287   SeqIdPairPtr pair;
288 
289   pair = (SeqIdPairPtr) MemNew (sizeof (SeqIdPairData));
290   pair->sip_find = NULL;
291   pair->sip_replace = NULL;
292   return pair;
293 }
294 
295 
296 static SeqIdPairPtr SeqIdPairFree (SeqIdPairPtr pair)
297 {
298   if (pair != NULL) {
299     pair->sip_find = SeqIdFree (pair->sip_find);
300     pair->sip_replace = SeqIdFree (pair->sip_replace);
301     pair = MemFree (pair);
302   }
303   return pair;
304 }
305 
306 
307 static int SeqIdPairCompare (SeqIdPairPtr sp1, SeqIdPairPtr sp2)
308 {
309   if (sp1 == NULL || sp2 == NULL) {
310     return 0;
311   }
312   return StringICmp (sp1->buf_find, sp2->buf_find);
313 }
314 
315 
316 static int LIBCALLBACK SortSeqIdPairList (VoidPtr ptr1, VoidPtr ptr2)
317 
318 {
319   ValNodePtr  vnp1;
320   ValNodePtr  vnp2;
321   int rval = 0;
322 
323   if (ptr1 != NULL && ptr2 != NULL) {
324     vnp1 = *((ValNodePtr PNTR) ptr1);
325     vnp2 = *((ValNodePtr PNTR) ptr2);
326     if (vnp1 != NULL && vnp2 != NULL) {
327       rval = SeqIdPairCompare (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
328     }
329   }
330   return rval;
331 }
332 
333 
334 static ValNodePtr SeqIdPairListFree (ValNodePtr pair_list)
335 {
336   ValNodePtr vnp_next;
337 
338   while (pair_list != NULL) {
339     vnp_next = pair_list->next;
340     pair_list->data.ptrvalue = SeqIdPairFree (pair_list->data.ptrvalue);
341     pair_list->next = NULL;
342     pair_list = ValNodeFree (pair_list);
343     pair_list = vnp_next;
344   }
345   return pair_list;
346 }
347 
348 
349 static SeqIdReplaceListPtr SeqIdReplaceListNew (ValNodePtr id_list)
350 {
351   SeqIdReplaceListPtr replace_list;
352   SeqIdPairPtr        pair;
353   Int4                i;
354 
355   replace_list = (SeqIdReplaceListPtr) MemNew (sizeof (SeqIdReplaceListData));
356   replace_list->num_ids = ValNodeLen (id_list);
357   replace_list->list = (SeqIdPairPtr) MemNew (sizeof (SeqIdPairData) * replace_list->num_ids);
358   for (i = 0; id_list != NULL; id_list = id_list->next, i++) {
359     pair = (SeqIdPairPtr) id_list->data.ptrvalue;
360     replace_list->list[i].sip_find = SeqIdDup (pair->sip_find);
361     StringCpy (replace_list->list[i].buf_find, pair->buf_find);
362     replace_list->list[i].sip_replace = SeqIdDup (pair->sip_replace);
363     replace_list->list[i].is_complement = pair->is_complement;
364     replace_list->list[i].trim5 = pair->trim5;
365     replace_list->list[i].trim3 = pair->trim3;
366     replace_list->list[i].is_consensus = pair->is_consensus;
367     replace_list->list[i].ti = pair->ti;
368   }
369   return replace_list;
370 }
371 
372 
373 NLM_EXTERN SeqIdReplaceListPtr SeqIdReplaceListFree (SeqIdReplaceListPtr replace_list)
374 {
375   Int4 i;
376   if (replace_list != NULL) {
377     for (i = 0; i < replace_list->num_ids; i++) {
378       replace_list->list[i].sip_find = SeqIdFree (replace_list->list[i].sip_find);
379       replace_list->list[i].sip_replace = SeqIdFree (replace_list->list[i].sip_replace);
380     }
381     replace_list->list = MemFree (replace_list->list);
382     replace_list = MemFree (replace_list);
383   }
384   return replace_list;
385 }
386 
387 
388 NLM_EXTERN SeqIdReplaceListPtr ReadSeqIdPairListFromFile (FILE *fp)
389 {
390   ReadBufferData rbd;
391   CharPtr        linestring, cp, id2, buf = NULL;
392   Int4           len, buf_len = 0;
393   SeqIdPairPtr   pair;
394   ValNodePtr     pair_list = NULL, last = NULL, vnp;
395   SeqIdReplaceListPtr replace_list = NULL;
396   
397   if (fp == NULL) return NULL;
398 
399   rbd.fp = fp;
400   rbd.current_data = NULL;
401 
402   linestring = AbstractReadFunction (&rbd);
403   while (linestring != NULL && linestring[0] != EOF) {
404     cp = linestring + StringSpn (linestring, " \t");
405     if (*cp != 0) {
406       len = StringCSpn (cp, " \t");
407       id2 = cp + len + StringSpn (cp + len, " \t");
408       if (*id2 != 0) {
409         if (len + 1 > buf_len) {
410           buf = MemFree (buf);
411           buf_len = len + 1;
412           buf = (CharPtr) MemNew (sizeof (Char) * buf_len);
413         }
414         StringNCpy (buf, cp, len);
415         buf[len] = 0;
416         pair = SeqIdPairNew ();
417         pair->sip_find = MakeSeqID (buf);
418         SeqIdWrite (pair->sip_find, pair->buf_find, PRINTID_REPORT, sizeof (pair->buf_find) - 1);
419         pair->sip_replace = MakeSeqID (id2);
420         vnp = ValNodeNew (NULL);
421         vnp->data.ptrvalue = pair;
422         if (last == NULL) {
423           pair_list = vnp;
424         } else {
425           last->next = vnp;
426         }
427         last = vnp;
428       }
429     }
430     free (linestring);
431     linestring = AbstractReadFunction (&rbd);     
432   }
433   pair_list = ValNodeSort (pair_list, SortSeqIdPairList);
434 
435   replace_list = SeqIdReplaceListNew (pair_list);
436   pair_list = SeqIdPairListFree (pair_list);
437 
438   return replace_list;
439 }
440 
441 
442 static SeqIdPairPtr FindReplacementInSeqIdReplaceList (SeqIdPtr sip, SeqIdReplaceListPtr pair_list)
443 {
444   Int4         l, r, m;
445   Char         buf_find[100];
446   int          cmp;
447 
448   if (sip == NULL || pair_list == NULL) return NULL;
449 
450   SeqIdWrite (sip, buf_find, PRINTID_REPORT, sizeof (buf_find) - 1);
451   l = 0;
452   r = pair_list->num_ids - 1;
453   m = (r + l) / 2;
454 
455   while ((cmp = StringICmp (buf_find, pair_list->list[m].buf_find)) != 0 && l <= r) {
456     if (cmp < 0) {
457       r = m - 1;
458     } else {
459       l = m + 1;
460     }
461     m = (r + l) / 2;
462   }
463   if (cmp == 0) {
464     return pair_list->list + m;
465   } else {
466     return NULL;
467   }
468 }
469 
470 
471 
472 static void ReportInvalidReplacement (SeqIdPtr sip, CharPtr reason, char *has_errors)
473 {
474   Char         buf[128];
475 
476   SeqIdWrite (sip, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1);
477   PrintACEFormatErrorXMLStart (buf, has_errors);
478   printf ("%s", reason);
479   PrintACEFormatErrorXMLEnd ();
480 }
481 
482 
483 static Boolean OkToReplaceId (SeqIdPairPtr pair, CharPtr seq_str, char *has_errors)
484 {
485   Boolean rval = FALSE;
486   SeqEntryPtr fetched_sep, old_scope;
487   BioseqPtr   bsp_replace;
488 
489   if (StringHasNoText (seq_str)) {
490     rval = FALSE;
491   }
492 
493   if (pair == NULL || pair->sip_replace == NULL) {
494     rval = FALSE;
495   } else if ((fetched_sep = FetchRead (pair->sip_replace)) == NULL) {
496     rval = FALSE;
497     ReportInvalidReplacement (pair->sip_replace, "Unable to fetch far sequence", has_errors);
498   } else {
499     old_scope = SeqEntrySetScope (fetched_sep);
500     bsp_replace = BioseqFind (pair->sip_replace);
501     SeqEntrySetScope (old_scope);
502     if (bsp_replace == NULL) {
503       rval = FALSE;
504       ReportInvalidReplacement (pair->sip_replace, "Unable to locate far sequence after fetch", has_errors);
505     } else if (DoesSeqStringMatchBsp (seq_str, bsp_replace, Seq_strand_plus)) {
506       /* matches */
507       rval = TRUE;
508       pair->ti = GetTraceIDFromIdList (bsp_replace->id);
509     } else if (DoesSeqStringMatchBsp (seq_str, bsp_replace, Seq_strand_minus)) {
510       /* matches on complement */
511       pair->is_complement = TRUE;
512       rval = TRUE;
513       pair->ti = GetTraceIDFromIdList (bsp_replace->id);
514     } else {
515       /* later, are we going to try to find trim lengths? */
516       rval = FALSE;
517       ReportInvalidReplacement (pair->sip_replace, "Replacement does not match local", has_errors);
518     }
519     SeqEntryFree (fetched_sep);
520   }
521   return rval;
522 }
523 
524 
525 static Boolean UpdateContigReadId (TContigReadPtr read, SeqIdReplaceListPtr pair_list, Boolean no_lookup, Boolean is_srr, char *has_errors)
526 {
527   SeqIdPairPtr pair;
528   SeqIdPtr     sip_find;
529   Char         id_buf[255];
530   Boolean      rval = TRUE;
531 
532   if (read == NULL || StringHasNoText (read->read_id)) {
533     rval = FALSE;
534   } else {
535     sip_find = MakeSeqID (read->read_id);
536     pair = FindReplacementInSeqIdReplaceList (sip_find, pair_list);
537     if (pair != NULL && (no_lookup || OkToReplaceId (pair, read->read_seq, has_errors))) {
538       if (pair->is_complement) {
539         if (read->is_complement) {
540           read->is_complement = FALSE;
541         } else {
542           read->is_complement = TRUE;
543         }
544       }
545       if (pair->ti > 0) {
546         read->ti = pair->ti;
547       } else {
548         if (pair->sip_replace->choice == SEQID_LOCAL) {
549           SeqIdWrite (pair->sip_replace, id_buf, PRINTID_REPORT, sizeof (id_buf) - 1);
550         } else {
551           SeqIdWrite (pair->sip_replace, id_buf, PRINTID_FASTA_LONG, sizeof (id_buf) - 1);
552         }
553         if (is_srr) {
554           if (read->srr != NULL) {
555             free (read->srr);
556           }
557           read->srr = malloc (sizeof (Char) * (StringLen (id_buf) + 1));
558           sprintf (read->srr, "%s", id_buf);
559           free (read->read_id);
560           read->read_id = NULL;
561         } else {
562           free (read->read_id);
563           read->read_id = malloc (sizeof (Char) * (StringLen (id_buf) + 1));
564           sprintf (read->read_id, "%s", id_buf);
565         }
566       }
567       read->local = FALSE;
568     }
569     sip_find = SeqIdFree (sip_find);
570   }
571   return rval;
572 }
573 
574 
575 NLM_EXTERN Boolean UpdateContigIds (TContigPtr contig, SeqIdReplaceListPtr pair_list, Boolean no_lookup, Boolean is_srr, char *has_errors)
576 {
577   Int4 i;
578   SeqIdPairPtr pair;
579   SeqIdPtr     sip_find;
580   Char         id_buf[255];
581   Boolean      rval = TRUE;
582 
583   if (contig == NULL) return FALSE;
584   if (pair_list == NULL) return TRUE;
585 
586   if (contig->consensus_id != NULL) {
587     sip_find = MakeSeqID (contig->consensus_id);
588     pair = FindReplacementInSeqIdReplaceList (sip_find, pair_list);
589     if (pair != NULL && (no_lookup || OkToReplaceId (pair, contig->consensus_seq, has_errors))) {
590       if (pair->is_complement) {
591         if (contig->is_complement) {
592           contig->is_complement = FALSE;
593         } else {
594           contig->is_complement = TRUE;
595         }
596       }
597       SeqIdWrite (pair->sip_replace, id_buf, PRINTID_FASTA_LONG, sizeof (id_buf) - 1);
598       free (contig->consensus_id);
599       contig->consensus_id = malloc (sizeof (Char) * (StringLen (id_buf) + 1));
600       sprintf (contig->consensus_id, "%s", id_buf);
601     } else {
602       rval = FALSE;
603     }
604     sip_find = SeqIdFree (sip_find);
605   }
606   for (i = 0; i < contig->num_reads; i++) {
607     rval &= UpdateContigReadId (contig->reads[i], pair_list, no_lookup, is_srr, has_errors);
608   }
609   return rval;
610 }
611 
612 
613 NLM_EXTERN Boolean UpdateAceFileIds (TACEFilePtr afp, FILE *id_file, Boolean no_lookup, Boolean is_srr, char *has_errors)
614 {
615   Boolean    rval = TRUE;
616   SeqIdReplaceListPtr pair_list;
617   SeqEntryPtr old_scope;
618   Int4        i;
619 
620   if (afp == NULL || id_file == NULL) return FALSE;
621   old_scope = SeqEntrySetScope (NULL);
622   pair_list = ReadSeqIdPairListFromFile (id_file);
623   for (i = 0; i < afp->num_contigs; i++) {
624     rval &= UpdateContigIds (afp->contigs[i], pair_list, no_lookup, is_srr, has_errors);
625   }  
626   
627   pair_list = SeqIdReplaceListFree (pair_list);
628   SeqEntrySetScope (old_scope);
629   return rval; 
630 }
631 
632 
633 static Boolean ValidateContigReadId (TContigReadPtr read, char *has_errors)
634 {
635   SeqIdPairData pair;
636   Char          id_buf[255];
637   Boolean       rval = TRUE;
638 
639   if (read == NULL || StringHasNoText (read->read_id)) {
640     rval = FALSE;
641   } else if (!read->local) {
642     rval = TRUE;
643   } else {
644     pair.sip_find = NULL;
645     pair.is_complement = FALSE;
646     pair.is_consensus = FALSE;
647     pair.trim3 = 0;
648     pair.trim5 = 0;
649     pair.sip_replace = MakeSeqID (read->read_id);
650     pair.ti = 0;
651     if (OkToReplaceId (&pair, read->read_seq, has_errors)) {
652       if (pair.is_complement && !read->is_complement) {
653         read->is_complement = TRUE;
654       } else if (!pair.is_complement && read->is_complement) {
655         read->is_complement = FALSE;
656       }
657       if (pair.ti > 0) {
658         read->ti = pair.ti;
659       } else {
660         SeqIdWrite (pair.sip_replace, id_buf, PRINTID_FASTA_LONG, sizeof (id_buf) - 1);
661         free (read->read_id);
662         read->read_id = malloc (sizeof (Char) * (StringLen (id_buf) + 1));
663         sprintf (read->read_id, "%s", id_buf);
664       }
665       read->local = FALSE;
666     }
667     pair.sip_replace = SeqIdFree (pair.sip_replace);
668   }
669   return rval;
670 }
671 
672 
673 static Boolean ValidateContigIds (TContigPtr contig, char *has_errors)
674 {
675   Int4 i;
676   Boolean      rval = TRUE;
677 
678   if (contig == NULL) return FALSE;
679 
680   if (contig->consensus_id != NULL) {
681     /* check consensus later... */
682   }
683   for (i = 0; i < contig->num_reads; i++) {
684     rval &= ValidateContigReadId (contig->reads[i], has_errors);
685   }
686   return rval;
687 }
688 
689 
690 NLM_EXTERN Boolean ValidateAceFileIds (TACEFilePtr afp, char *has_errors)
691 {
692   Boolean    rval = TRUE;
693   SeqEntryPtr old_scope;
694   Int4        i;
695 
696   if (afp == NULL) return FALSE;
697   old_scope = SeqEntrySetScope (NULL);
698   for (i = 0; i < afp->num_contigs; i++) {
699     rval &= ValidateContigIds (afp->contigs[i], has_errors);
700   }  
701   
702   SeqEntrySetScope (old_scope);
703   return rval; 
704 }
705 
706 
707 NLM_EXTERN ValNodePtr GetTransitionsFromGapInfo (TGapInfoPtr gaps, Int4 offset, Int4 seq_offset, Int4 seq_len)
708 {
709   ValNodePtr list = NULL;
710   Int4 i = 0, tiling_pos = offset, seq_pos = 0, diff = 0;
711   Boolean added_gap = FALSE;
712 
713   /* add a transition to the list for where a sequence "begins" in the alignment, if not at 0 */
714   if (seq_offset == 0) {
715     if (tiling_pos > 0) {
716       ValNodeAddInt (&list, 0, tiling_pos);
717     }
718   } else {
719     /* if seq_offset causes sequence to "start" in the middle of a between-gap interval, add a transition for it */
720     if (gaps == NULL || gaps->num_gaps == 0) {
721       ValNodeAddInt (&list, 0, tiling_pos + seq_offset);
722     } else {
723       while (seq_pos < seq_offset && i < gaps->num_gaps && !added_gap) {
724         if (seq_pos + gaps->gap_offsets[i] <= seq_offset) {
725           tiling_pos += gaps->gap_offsets[i] + 1;
726           seq_pos += gaps->gap_offsets[i];
727           diff += gaps->gap_offsets[i];
728           i++;
729         } else {
730           ValNodeAddInt (&list, 0, tiling_pos + seq_offset);
731           added_gap = TRUE;
732         }
733       }
734     }
735   }
736 
737   if (gaps != NULL) {
738     while (i < gaps->num_gaps) {
739       seq_pos += gaps->gap_offsets[i];
740       if (gaps->gap_offsets[i] > 0) {
741         tiling_pos += gaps->gap_offsets[i];
742         ValNodeAddInt (&list, 0, tiling_pos);
743       }
744       tiling_pos++;
745       if (gaps->num_gaps == i + 1
746           || gaps->gap_offsets[i + 1] > 0) {
747         ValNodeAddInt (&list, 0, tiling_pos);
748       }
749       i++;
750     }
751   }
752   if (seq_pos < seq_len) {
753     ValNodeAddInt (&list, 0, tiling_pos + seq_len - seq_pos);
754   }
755   return list;
756 }
757 
758 
759 static Boolean ValidateContigAgainstSeqEntry (TContigPtr contig, SeqEntryPtr sep, char *has_errors)
760 {
761   CharPtr      seq_data = NULL;
762   SeqIdPtr     sip;
763   BioseqPtr    bsp;
764   Boolean      rval = FALSE;
765 
766   if (contig == NULL || sep == NULL) {
767     return FALSE;
768   }
769 
770   seq_data = AlignmentStringToSequenceString (contig->consensus_seq, Seq_mol_na);
771   sip = MakeSeqID (contig->consensus_id);
772   
773   bsp = BioseqFind (sip);
774   if (bsp == NULL) {
775     PrintACEFormatErrorXML ("not found in supplied SeqEntry", contig->consensus_id, has_errors);
776   } else if (!DoesSeqStringMatchBsp (seq_data, bsp, Seq_strand_plus)) {
777     PrintACEFormatErrorXML ("does not match sequence in supplied SeqEntry", contig->consensus_id, has_errors);
778   } else {
779     rval = TRUE;
780   }
781   seq_data = MemFree (seq_data);
782   return rval;
783 }
784 
785 
786 NLM_EXTERN Boolean ValidateACEFileAgainstSeqEntry (TACEFilePtr ace_file, SeqEntryPtr sep, char *has_errors)
787 {
788   Boolean rval = TRUE;
789   Int4    i;
790   SeqEntryPtr oldscope;
791 
792   if (ace_file == NULL || sep == NULL) {
793     return FALSE;
794   }
795 
796   oldscope = SeqEntrySetScope (sep);
797 
798   for (i = 0; i < ace_file->num_contigs; i++) {
799     rval |= ValidateContigAgainstSeqEntry (ace_file->contigs[i], sep, has_errors);
800   }
801   SeqEntrySetScope (oldscope);
802   return rval;
803 }
804 
805 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.