NCBI C Toolkit Cross Reference

C/algo/blast/api/blast_seqalign.c


  1 /* $Id: blast_seqalign.c,v 1.62 2007/02/26 14:52:50 papadopo Exp $
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *               National Center for Biotechnology Information
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's offical duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government have not placed any restriction on its use or reproduction.
 13 *
 14 *  Although all reasonable efforts have been taken to ensure the accuracy
 15 *  and reliability of the software and data, the NLM and the U.S.
 16 *  Government do not and cannot warrant the performance or results that
 17 *  may be obtained by using this software or data. The NLM and the U.S.
 18 *  Government disclaim all warranties, express or implied, including
 19 *  warranties of performance, merchantability or fitness for any particular
 20 *  purpose.
 21 *
 22 *  Please cite the author in any work or product based on this material.
 23 *
 24 *  Author: Ilya Dondoshansky
 25 * ===========================================================================*/
 26 
 27 /** @file blast_seqalign.c
 28  * Conversion of BLAST results to the SeqAlign form
 29  */
 30 
 31 #ifndef SKIP_DOXYGEN_PROCESSING
 32 static char const rcsid[] = "$Id: blast_seqalign.c,v 1.62 2007/02/26 14:52:50 papadopo Exp $";
 33 #endif /* SKIP_DOXYGEN_PROCESSING */
 34 
 35 #include <algo/blast/api/blast_seqalign.h>
 36 
 37 extern SeqIdPtr GetTheSeqAlignID (SeqIdPtr seq_id);
 38 extern ScorePtr MakeBlastScore (ScorePtr PNTR old, CharPtr scoretype, 
 39                                 Nlm_FloatHi prob, Int4 score);
 40 
 41 /** @addtogroup CToolkitAlgoBlast
 42  *
 43  * @{
 44  */
 45 
 46 SBlastSeqalignArray* 
 47 SBlastSeqalignArrayNew(Int4 size)
 48 {
 49     SBlastSeqalignArray* retval = NULL;
 50 
 51     if (size <= 0)
 52      return retval;
 53 
 54     retval = (SBlastSeqalignArray*) malloc(sizeof(SBlastSeqalignArray));
 55     if (retval)
 56     {
 57         retval->num_queries = size;
 58         retval->array = (SeqAlign**) calloc(size, sizeof(SeqAlign*));
 59         if (retval->array == NULL)
 60         {
 61             sfree(retval);
 62             retval = NULL;
 63         }
 64     }
 65     return retval;
 66 }
 67 
 68 SBlastSeqalignArray* 
 69 SBlastSeqalignArrayFree(SBlastSeqalignArray* seqalign_vec)
 70 {
 71    Int4 index;
 72 
 73    if (seqalign_vec == NULL)
 74       return NULL;
 75 
 76    for (index=0; index<seqalign_vec->num_queries; index++)
 77    {
 78         SeqAlignSetFree(seqalign_vec->array[index]); 
 79    }
 80    sfree(seqalign_vec->array);
 81    sfree(seqalign_vec);
 82    return NULL;
 83 }
 84 
 85 ScorePtr 
 86 GetScoreSetFromBlastHsp(BlastHSP* hsp)
 87 {
 88    ScorePtr     score_set=NULL;
 89    double       prob;
 90    Int4         score;
 91    char*        scoretype;
 92 
 93    score = hsp->score;
 94    if (score > 0)
 95       MakeBlastScore(&score_set, "score", 0.0, score);
 96 
 97    score = hsp->num;
 98    scoretype = "sum_n";
 99    
100    if (score > 1)
101       MakeBlastScore(&score_set, scoretype, 0.0, score);
102    
103    prob = hsp->evalue;
104    if (hsp->num <= 1) {
105       scoretype = "e_value";
106    } else {
107       scoretype = "sum_e";
108    }
109    if (prob >= 0.) {
110       if (prob < 1.0e-180)
111          prob = 0.0;
112       MakeBlastScore(&score_set, scoretype, prob, 0);
113    }
114 
115    /* Calculate bit score from the raw score */
116    if (hsp->bit_score >= 0.)
117       MakeBlastScore(&score_set, "bit_score", hsp->bit_score, 0);
118    
119    if (hsp->num_ident > 0)
120       MakeBlastScore(&score_set, "num_ident", 0.0, hsp->num_ident);
121 
122    if (hsp->comp_adjustment_method > 0) { 
123        MakeBlastScore(&score_set, "comp_adjustment_method",0.0,
124                       hsp->comp_adjustment_method);
125    }
126    return score_set;
127 }
128 
129 /** Fills in the DenseDiag information from a BlastHSP structure.
130  * On the first call to this function *old should be
131  * NULL, after that pass in the head of the DenseDiagPtr chain.
132  * @param old Pointer to previously created DenseDiag [in]
133  * @param hsp HSP structure to get alignment information from [in]
134  * @param reverse Should query and subject sequences be reversed? [in]
135  * @param query_length Length of the query sequence [in]
136  * @param subject_length Length of the subject sequence [in]
137  * @return The appended DenseDiagPtr
138  */
139 static DenseDiagPtr
140 s_HSPToDenseDiag(DenseDiagPtr* old, BlastHSP* hsp, Boolean reverse,
141                  Int4 query_length, Int4 subject_length)
142 {
143         DenseDiagPtr            ddp, new;
144 
145         new = DenseDiagNew();
146         
147         new->dim = 2;   /* Only 2 is supported in spec. */
148         new->len = hsp->query.end - hsp->query.offset;
149         new->starts = (Int4*) calloc(2, sizeof(Int4));
150         new->strands = (Uint1*) calloc(2, sizeof(Uint1));
151         if (reverse)
152         {
153                 if (hsp->subject.frame >= 0)
154                 {
155                         new->strands[0] = Seq_strand_plus;
156                         new->starts[0] = hsp->subject.offset;
157                 }
158                 else
159                 {
160                         new->strands[0] = Seq_strand_minus;
161                         new->starts[0] = subject_length - hsp->subject.end;
162                 }
163                 if (hsp->query.frame >= 0)
164                 {
165                         new->strands[1] = Seq_strand_plus;
166                         new->starts[1] = hsp->query.offset;
167                 }
168                 else
169                 {
170                         new->strands[1] = Seq_strand_minus;
171                         new->starts[1] = query_length - hsp->query.end;
172                 }
173         }
174         else
175         {
176                 if (hsp->query.frame >= 0)
177                 {
178                         new->strands[0] = Seq_strand_plus;
179                         new->starts[0] = hsp->query.offset;
180                 }
181                 else
182                 {
183                         new->strands[0] = Seq_strand_minus;
184                         new->starts[0] = query_length - hsp->query.end;
185                 }
186                 if (hsp->subject.frame >= 0)
187                 {
188                         new->strands[1] = Seq_strand_plus;
189                         new->starts[1] = hsp->subject.offset;
190                 }
191                 else
192                 {
193                         new->strands[1] = Seq_strand_minus;
194                         new->starts[1] = subject_length - hsp->subject.end;
195                 }
196         }
197         new->scores = GetScoreSetFromBlastHsp(hsp);
198 
199 /* Go to the end of the chain, and then attach "new" */
200         if (*old)
201         {
202                 ddp = *old;
203                 while (ddp->next)
204                         ddp = ddp->next;
205                 ddp->next = new;
206         }
207         else
208         {
209                 *old = new;
210         }
211 
212         new->next = NULL;
213 
214         return new;
215 }
216 
217 /** Fills in the StdSeg information from a BlastHSP structure.
218  * On the first call to this function *old should be
219  * NULL, after that pass in the head of the StdSegPtr chain.
220  * @param old Pointer to previously created StdSeg [in]
221  * @param hsp HSP structure to get alignment information from [in]
222  * @param reverse Should query and subject sequences be reversed? [in]
223  * @param query_length Length of the query sequence [in]
224  * @param subject_length Length of the subject sequence [in]
225  * @param sip Query sequence id [in]
226  * @return The appended StdSegPtr
227  */
228 static StdSeg*
229 s_HSPToStdSeg(StdSeg** old, BlastHSP* hsp, Int4 query_length, 
230               Int4 subject_length, SeqIdPtr sip, Boolean reverse)
231 {
232         StdSeg*         ssp,* new;
233         SeqIdPtr                query_sip, subject_sip;
234         SeqIntPtr               seq_int1, seq_int2;
235         SeqLocPtr               slp=NULL;
236 
237         new = StdSegNew();
238 /* Duplicate the id and split it up into query and subject parts */
239         query_sip = SeqIdDup(sip);
240         subject_sip = SeqIdDup(sip->next);
241         
242         new->dim = 2;   /* Only 2 is supported in spec. */
243         seq_int1 = SeqIntNew();
244         if (hsp->query.frame == 0)
245         {
246                 seq_int1->from = hsp->query.offset;
247                 seq_int1->to = hsp->query.end - 1;
248                 seq_int1->strand = Seq_strand_unknown;
249         }
250         else if (hsp->query.frame < 0)
251         {
252                 seq_int1->to = 
253             query_length - CODON_LENGTH*hsp->query.offset + hsp->query.frame;
254                 seq_int1->from = 
255             query_length - CODON_LENGTH*(hsp->query.end) + hsp->query.frame + 1;
256                 seq_int1->strand = Seq_strand_minus;
257         }
258         else if (hsp->query.frame > 0)
259         {
260                 seq_int1->from = CODON_LENGTH*(hsp->query.offset) + hsp->query.frame - 1;
261                 seq_int1->to = CODON_LENGTH*(hsp->query.end) + hsp->query.frame - 2;
262                 seq_int1->strand = Seq_strand_plus;
263         }
264         seq_int1->id = query_sip;
265         seq_int2 = SeqIntNew();
266         if (hsp->subject.frame == 0)
267         {
268                 seq_int2->from = hsp->subject.offset;
269                 seq_int2->to = hsp->subject.end - 1;
270                 seq_int2->strand = Seq_strand_unknown;
271         } 
272         else if (hsp->subject.frame < 0)
273         {
274                 seq_int2->from = subject_length - CODON_LENGTH*(hsp->subject.end) + 
275             hsp->subject.frame + 1;
276                 seq_int2->to = subject_length - CODON_LENGTH*(hsp->subject.offset) +
277             hsp->subject.frame;
278                 seq_int2->strand = Seq_strand_minus;
279         }
280         else if (hsp->subject.frame > 0)
281         {
282                 seq_int2->from = 
283             CODON_LENGTH*(hsp->subject.offset) + hsp->subject.frame - 1;
284                 seq_int2->to = CODON_LENGTH*(hsp->subject.end) + hsp->subject.frame - 2;
285                 seq_int2->strand = Seq_strand_plus;
286         }
287         seq_int2->id = subject_sip;
288 
289         if (reverse)
290         {
291                 ValNodeAddPointer(&slp, SEQLOC_INT, seq_int2); 
292                 ValNodeAddPointer(&slp, SEQLOC_INT, seq_int1); 
293         }
294         else
295         {
296                 ValNodeAddPointer(&slp, SEQLOC_INT, seq_int1); 
297                 ValNodeAddPointer(&slp, SEQLOC_INT, seq_int2); 
298         }
299         new->loc = slp;
300 
301         new->scores = GetScoreSetFromBlastHsp(hsp);
302 
303 /* Go to the end of the chain, and then attach "new" */
304         if (*old)
305         {
306                 ssp = *old;
307                 while (ssp->next)
308                         ssp = ssp->next;
309                 ssp->next = new;
310         }
311         else
312         {
313                 *old = new;
314         }
315 
316         new->next = NULL;
317 
318         return new;
319 }
320 
321 /** Assembles all the components of the Seq-align from a BlastHSPList structure
322  * for an ungapped alignment search.  
323  * @param program_number Type of BLAST program.
324  * @param hsp_list HSP list structure [in]
325  * @param query_id Seq-id of the query sequence [in]
326  * @param subject_id Seq-id of the subject sequence [in]
327  * @param query_length Length of the query sequence [in]
328  * @param subject_length Length of the subject sequence [in]
329  * @param seqalign_ptr Seq-align chain to append to. [in] [out]
330  */
331 static Int2 
332 s_HSPListToSeqAlignUngapped(EBlastProgramType program_number, 
333    BlastHSPList* hsp_list, SeqIdPtr query_id, 
334    SeqIdPtr subject_id, Int4 query_length,
335    Int4 subject_length, SeqAlignPtr* seqalign_ptr)
336 {
337    BlastHSP* hsp;
338    DenseDiagPtr ddp_head=NULL, ddp;
339    SeqIdPtr sip;
340    SeqIdPtr new_sip=NULL;
341    StdSeg* ssp_head=NULL,* ssp;
342    SeqAlignPtr seqalign;
343    Int4 hsp_cnt, index2, hspset_cnt_old;
344    Boolean getdensediag = 
345       (program_number == eBlastTypeBlastn ||
346        program_number == eBlastTypeRpsBlast ||
347        program_number == eBlastTypeBlastp);
348 
349         ddp_head = NULL;
350         ssp_head = NULL;
351         sip = NULL;
352 
353 
354    seqalign = SeqAlignNew();
355    seqalign->type = 2;          /* alignment is diags */
356 
357    hspset_cnt_old = -1;
358    hsp_cnt = hsp_list->hspcnt;
359 
360    for (index2=0; index2<hsp_cnt; index2++) {
361       hsp = hsp_list->hsp_array[index2];
362 
363       sip = GetTheSeqAlignID(query_id);
364       sip->next = SeqIdDup(subject_id);
365 
366       if (getdensediag) {
367          ddp = s_HSPToDenseDiag(&ddp_head, hsp, FALSE, query_length, 
368                                 subject_length);
369          ddp->id = sip;
370       } else {
371          ssp = s_HSPToStdSeg(&ssp_head, hsp, query_length, subject_length, sip,
372                              FALSE);
373          ssp->ids = sip;
374       }
375       sip = NULL; /* This SeqIdPtr is now on the SeqAlign. */
376    }
377 
378    if (getdensediag) {
379       seqalign->segs = ddp_head;
380       seqalign->segtype = 1;  /* DenseDiag */
381    } else {
382       seqalign->segs = ssp_head;
383       seqalign->segtype = 3;  /* StdSeg */
384    }
385 
386    if (new_sip)
387       new_sip = SeqIdFree(new_sip);
388 
389    *seqalign_ptr = seqalign;
390 
391    return 0;
392 }
393 
394 /** Get the current position.
395  * @param pos Current position. On return, assigned next position. [in] [out]
396  *            @todo FIXME: Can it really be negative? If not, then this function
397  *             can be simplified or even gotten rid of.
398  * @param length Length of the next segment. [in]
399  * @return Next position
400  */
401 static Int4 
402 s_GetCurrentPos(Int4* pos, Int4 length)
403 {
404     Int4 val;
405     if(*pos < 0)
406         val = -(*pos + length -1);
407     else
408         val = *pos;
409     *pos += length;
410     return val;
411 }
412 
413 /** Finds a protein frame length.
414  * @param nuc_length Length of nucleotide sequence. [in]
415  * @param frame Frame number. [in]
416  * @return Length of corresponding protein frame.
417  */
418 static Int4
419 s_GetProteinFrameLength(Int4 nuc_length, Int2 frame)
420 {
421     return (nuc_length - (ABS(frame)-1)%CODON_LENGTH) / CODON_LENGTH;
422 }
423 
424 Int2 
425 GapCollectDataForSeqalign(BlastHSP* hsp, GapEditScript* esp, Int4 first, Int4 number,
426                           Int4 query_length, Int4 subject_length,
427                           Boolean translate1, Boolean translate2,
428                           Int4** start_out, Int4** length_out, 
429                           Uint1** strands_out, Int4* start1, Int4* start2)
430 {
431     Int2 frame1, frame2;
432     Int4 begin1, begin2, index, length1, length2;
433     Int4 original_length1, original_length2, i;
434     Int4* length,* start;
435     Uint1 strand1, strand2;
436     Uint1* strands;
437     
438     length1 = hsp->query.end - hsp->query.offset;
439     length2 = hsp->subject.end - hsp->subject.offset;
440     original_length1 = length1 = query_length;
441     original_length2 = length2 = subject_length;
442     frame1 = hsp->query.frame;
443     frame2 = hsp->subject.frame;
444     
445     if (translate1)
446         length1 = s_GetProteinFrameLength(length1, frame1);
447     if (translate2)
448         length2 = s_GetProteinFrameLength(length2, frame2);
449     
450     if (frame1 > 0)
451         strand1 = Seq_strand_plus; 
452     else if (frame1 < 0)
453         strand1 = Seq_strand_minus; 
454     else
455         strand1 = Seq_strand_unknown; 
456     
457     if (frame2 > 0)
458         strand2 = Seq_strand_plus; 
459     else if (frame2 < 0)
460         strand2 = Seq_strand_minus; 
461     else
462         strand2 = Seq_strand_unknown; 
463 
464     start = (Int4 *) calloc((2*number+1), sizeof(Int4));
465     length = (Int4 *) calloc((number+1), sizeof(Int4));
466     strands = (Uint1 *) calloc((2*number+1), sizeof(Uint1));
467 
468     index=0;
469     for (i=first; i<number; i++)
470     {
471         switch(esp->op_type[i]) {
472         case eGapAlignDecline:
473         case eGapAlignSub:
474             if (strand1 != Seq_strand_minus) {
475                 if(translate1 == FALSE)
476                     begin1 = s_GetCurrentPos(start1, esp->num[i]);
477                 else
478                     begin1 = frame1 - 1 + CODON_LENGTH*s_GetCurrentPos(start1, esp->num[i]);
479             } else {
480                 if(translate1 == FALSE)
481                     begin1 = length1 - s_GetCurrentPos(start1, esp->num[i]) - esp->num[i];
482                 else
483                     begin1 = original_length1 - CODON_LENGTH*(s_GetCurrentPos(start1, esp->num[i])+esp->num[i]) + frame1 + 1;
484             }
485                                         
486             if (strand2 != Seq_strand_minus) {
487                 if(translate2 == FALSE)
488                     begin2 = s_GetCurrentPos(start2, esp->num[i]);
489                 else
490                     begin2 = frame2 - 1 + CODON_LENGTH*s_GetCurrentPos(start2, esp->num[i]);
491             } else {
492                 if(translate2 == FALSE)
493                     begin2 = length2 - s_GetCurrentPos(start2, esp->num[i]) - esp->num[i];
494                 else
495                     begin2 = original_length2 - CODON_LENGTH*(s_GetCurrentPos(start2, esp->num[i])+esp->num[i]) + frame2 + 1;
496             }
497             
498             strands[2*index] = strand1;
499             strands[2*index+1] = strand2;
500             start[2*index] = begin1;
501             start[2*index+1] = begin2;
502             
503             break;
504             
505         case eGapAlignDel:
506             begin1 = -1;
507             if (strand2 != Seq_strand_minus) {
508                 if(translate2 == FALSE)
509                     begin2 = s_GetCurrentPos(start2, esp->num[i]);
510                 else
511                     begin2 = frame2 - 1 + CODON_LENGTH*s_GetCurrentPos(start2, esp->num[i]);
512             } else {
513                 if(translate2 == FALSE)
514                     begin2 = length2 - s_GetCurrentPos(start2, esp->num[i]) - esp->num[i];
515                 else
516                     begin2 = original_length2 - CODON_LENGTH*(s_GetCurrentPos(start2, esp->num[i])+esp->num[i]) + frame2 + 1;
517             }
518             
519             if (index > 0)
520                 strands[2*index] = strands[2*(index-1)];
521             else
522                 strands[2*index] = Seq_strand_unknown;
523             strands[2*index+1] = strand2;
524             start[2*index] = begin1;
525             start[2*index+1] = begin2;
526             
527             break;
528             
529         case eGapAlignIns:
530             if (strand1 != Seq_strand_minus) {
531                 if(translate1 == FALSE)
532                     begin1 = s_GetCurrentPos(start1, esp->num[i]);
533                 else
534                     begin1 = frame1 - 1 + CODON_LENGTH*s_GetCurrentPos(start1, esp->num[i]);
535             } else {
536                 if(translate1 == FALSE)
537                     begin1 = length1 - s_GetCurrentPos(start1, esp->num[i]) - esp->num[i];
538                 else
539                     begin1 = original_length1 - CODON_LENGTH*(s_GetCurrentPos(start1, esp->num[i])+esp->num[i]) + frame1 + 1;
540             }
541             begin2 = -1;
542             strands[2*index] = strand1;
543             if (index > 0)
544                 strands[2*index+1] = strands[2*(index-1)+1];
545             else
546                 strands[2*index+1] = Seq_strand_unknown;
547             start[2*index] = begin1;
548             start[2*index+1] = begin2;
549             
550             break;
551         default:
552             break;
553         }
554         length[index] = esp->num[i];
555         index++;
556     }    
557 
558     if (start_out)
559        *start_out = start;
560     else
561        sfree(start);
562     if (length_out)
563        *length_out = length;
564     else
565        sfree(length);
566     if (strands_out)
567        *strands_out = strands;
568     else
569        sfree(strands);
570 
571     return 0;
572 }
573 
574 /** Corrects an editing script if any decline-to-align segments are present.
575  * @param hsp HSP structure, containing the editing script. [in] [out]
576  */
577 static void 
578 s_GapCorrectUASequence(BlastHSP* hsp)
579 {
580     GapEditScript* esp = hsp->gap_info;
581     int index;
582 
583     for (index=0; index<esp->size; index++)
584     {
585         // if GAPALIGN_DECLINE immediately follows an insertion or deletion
586         if (index > 0 && esp->op_type[index] == eGapAlignDecline &&
587            (esp->op_type[index-1] == eGapAlignIns || esp->op_type[index-1] == eGapAlignDel))
588         {
589             /* This is invalid condition and regions should be
590                exchanged */
591             int temp_num = esp->num[index];
592             EGapAlignOpType temp_op = esp->op_type[index];
593 
594             esp->num[index] = esp->num[index-1];
595             esp->op_type[index] = esp->op_type[index-1];
596             esp->num[index-1] = temp_num;
597             esp->op_type[index-1] = temp_op;
598         }
599     }
600     return;
601 }
602 
603 /** Creates and fills a SeqAlign structure, given all necessary information,
604  * with either Dense-seg or Std-seg segments.
605  * @param query_id Seq-id of the query sequence. [in]
606  * @param subject_id Seq-id of the subject sequence [in]
607  * @param translate1 Is query translated? [in]
608  * @param translate2 Is subject translated? [in]
609  * @param numseg Number of segments in the alignment [in]
610  * @param length Array of segment lengths [in]
611  * @param start Array of segment starts [in]
612  * @param strands Array of segment strands [in]
613  * @return Resulting Seq-align.
614  */
615 static SeqAlignPtr 
616 s_GapMakeSeqAlign(SeqIdPtr query_id, SeqIdPtr subject_id, 
617                   Boolean translate1, Boolean translate2, Int4 numseg,
618                   Int4* length, Int4* start, Uint1* strands)
619 {
620     SeqAlignPtr sap;
621     DenseSeg* dsp;
622     StdSeg* sseg,* sseg_head,* sseg_old;
623     SeqLocPtr slp, slp1, slp2;
624     SeqIntPtr seq_int1;
625     Int4 index;
626 
627     sap = SeqAlignNew();
628     
629     sap->dim =2; /**only two dimention alignment**/
630     
631     /**make the Denseg Object for SeqAlign**/
632     if (translate1 == FALSE && translate2 == FALSE) {
633         sap->segtype = SAS_DENSEG; /** use denseg to store the alignment **/
634         sap->type = SAT_PARTIAL;   /**partial for gapped translating search.*/
635         dsp = DenseSegNew();
636         dsp->dim = 2;
637         dsp->numseg = numseg;
638         dsp->ids = SeqIdDup(query_id);
639         dsp->ids->next = SeqIdDup(subject_id);
640         dsp->starts = start;
641         dsp->strands = strands;
642         dsp->lens = length;
643         sap->segs = dsp;
644         sap->next = NULL;
645     } else { /****/
646         sap->type = SAT_PARTIAL; /**partial for gapped translating search. */
647         sap->segtype = SAS_STD;  /**use stdseg to store the alignment**/
648         sseg_head = NULL;
649         sseg_old = NULL;
650 
651         for (index=0; index<numseg; index++) {
652             sseg = StdSegNew();
653             sseg->dim = 2;
654             if (sseg_head == NULL) {
655                 sseg_head = sseg;
656             }
657             sseg->ids = SeqIdDup(query_id);
658             sseg->ids->next = SeqIdDup(subject_id);
659 
660             slp1 = NULL;
661             if (start[2*index] != -1) {
662                 seq_int1 = SeqIntNew();
663                 seq_int1->from = start[2*index];
664                 if (translate1)
665                     seq_int1->to = start[2*index] + CODON_LENGTH*length[index] - 1;
666                 else
667                     seq_int1->to = start[2*index] + length[index] - 1;
668                 seq_int1->strand = strands[2*index];
669 
670                 seq_int1->id = SeqIdDup(query_id);
671 
672                 ValNodeAddPointer(&slp1, SEQLOC_INT, seq_int1);
673             } else {
674                 ValNodeAddPointer(&slp1, SEQLOC_EMPTY, SeqIdDup(query_id));
675             }
676             slp2 = NULL;
677             if (start[2*index+1] != -1) {
678                 seq_int1 = SeqIntNew();
679                 seq_int1->from = start[2*index+1];
680                 if (translate2)
681                     seq_int1->to = start[2*index+1] + CODON_LENGTH*length[index] - 1;
682                 else
683                     seq_int1->to = start[2*index+1] + length[index] - 1;
684                 seq_int1->strand = strands[2*index+1];
685 
686                 seq_int1->id = SeqIdDup(subject_id);
687 
688                 ValNodeAddPointer(&slp2, SEQLOC_INT, seq_int1);
689             } else {
690                 ValNodeAddPointer(&slp2, SEQLOC_EMPTY, SeqIdDup(subject_id));
691             }
692 
693             slp = slp1;
694             slp1->next = slp2;
695 
696             sseg->loc = slp;
697             
698             if (sseg_old)
699                 sseg_old->next = sseg;
700             sseg_old = sseg;
701         }
702         sap->segs = sseg_head;
703         sap->next = NULL;
704         
705         sfree(start);
706         sfree(length);
707         sfree(strands);
708     }
709 
710     return sap;
711 }
712 
713 SeqAlignPtr
714 BlastHSPToSeqAlign(EBlastProgramType program, BlastHSP* hsp, 
715                    SeqIdPtr subject_id, SeqIdPtr query_id,
716                    Int4 query_length, Int4 subject_length)
717 
718 {
719     GapEditScript* esp;
720     Int4 start1, start2;
721     Int4* length,* start;
722     Uint1* strands;
723     Boolean is_disc_align = FALSE;
724     SeqAlignPtr sap, sap_disc, sap_head, sap_tail;
725     Boolean translate1, translate2;
726     int index;
727 
728     is_disc_align = FALSE;
729 
730     esp = hsp->gap_info;
731     for (index=0; index<esp->size; index++)
732     {
733         if(esp->op_type[index] == eGapAlignDecline)
734         {
735            is_disc_align = TRUE;
736            break;
737         }
738     }
739     
740     start1 = hsp->query.offset;
741     start2 = hsp->subject.offset;
742     translate1 = Blast_QueryIsTranslated(program);
743     translate2 = Blast_SubjectIsTranslated(program);
744     
745     /* If no eGapAlignDecline regions exists output seqalign will be
746        regular Den-Seg or Std-seg */
747     if(is_disc_align == FALSE) {
748         /* Please note, that edit_block passed only for data like
749            strand, translate, reverse etc. Real data is taken starting
750            from "curr" and taken only "numseg" segments */
751         
752         GapCollectDataForSeqalign(hsp, hsp->gap_info, 0, esp->size, query_length,
753                                   subject_length, translate1, translate2,
754                                   &start, &length, &strands, &start1, &start2);
755         
756         /* Result of this function will be either den-seg or Std-seg
757            depending on translation options */
758         sap = s_GapMakeSeqAlign(query_id, subject_id, translate1, translate2, 
759                                 esp->size, length, start, strands);
760     } else {
761 
762         /* By request of Steven Altschul - we need to have 
763            the unaligned part being to the left if it is adjacent to the
764            gap (insertion or deletion) - so this function will do
765            shaffeling */
766 
767         s_GapCorrectUASequence(hsp); 
768 
769         sap_disc = SeqAlignNew();
770         sap_disc->dim = 2;
771         sap_disc->type = SAT_PARTIAL; /* ordered segments, over part of seq */
772         sap_disc->segtype = SAS_DISC; /* discontinuous alignment */
773         
774         sap_head = NULL; sap_tail = NULL;
775         for (index=0; index<esp->size; index++)
776         {
777             int numseg=0;
778             Boolean skip_region = FALSE;
779             int index2 = index;
780             int first = index;
781             for (index2=first; index2<esp->size; index2++, numseg++) {
782 
783                 if(esp->op_type[index2] == eGapAlignDecline) {
784                     if(numseg != 0) { /* End of aligned area */
785                         break;
786                     } else {
787                         while (index2<esp->size && esp->op_type[index2] == eGapAlignDecline) {
788                             numseg++;
789                             index2++;
790                         }
791                         skip_region = TRUE;                        
792                         break;
793                     }
794                 }
795             }
796             
797 
798             if(!skip_region) {            
799 
800                GapCollectDataForSeqalign(hsp, esp, first, numseg, query_length,
801                                       subject_length, translate1, translate2,
802                                       &start, &length, &strands, &start1, 
803                                       &start2);
804             
805                 sap = 
806                     s_GapMakeSeqAlign(query_id, subject_id, translate1, 
807                                       translate2, numseg, length, start, 
808                                       strands);
809                 
810                 /* Collecting all seqaligns into single linked list */
811                 if(sap_tail == NULL) {
812                     sap_head = sap_tail = sap;
813                 } else {
814                     sap_tail->next = sap;
815                     sap_tail = sap;
816                 }
817             }
818         }
819         sap_disc->segs = sap_head;
820         sap = sap_disc;
821     }
822 
823     return sap;
824 }
825 
826 SeqAlignPtr
827 OOFBlastHSPToSeqAlign(EBlastProgramType program, BlastHSP* hsp, 
828                       SeqIdPtr query_id, SeqIdPtr subject_id,
829                       Int4 query_length, Int4 subject_length)
830 {
831     Boolean reverse = FALSE;
832     GapEditScript* esp;
833     Int2 frame1, frame2;
834     Int4 start1, start2;
835     Int4 original_length1, original_length2;
836     SeqAlignPtr sap;
837     SeqIntPtr seq_int1, seq_int2;
838     SeqIntPtr seq_int1_last = NULL, seq_int2_last = NULL;
839     SeqIdPtr sip, id1, id2;
840     SeqLocPtr slp, slp1, slp2;
841     StdSeg* sseg,* sseg_head,* sseg_old;
842     Uint1 strand1, strand2;
843     Boolean first_shift;
844     int index;
845 
846     if (program == eBlastTypeBlastx) {
847        reverse = TRUE;
848        start1 = hsp->subject.offset;
849        start2 = hsp->query.offset;
850        frame1 = hsp->subject.frame;
851        frame2 = hsp->query.frame;
852        original_length1 = subject_length;
853        original_length2 = query_length;
854        id1 = subject_id;
855        id2 = query_id;
856     } else { 
857        start1 = hsp->query.offset;
858        start2 = hsp->subject.offset;
859        frame1 = hsp->query.frame;
860        frame2 = hsp->subject.frame;
861        original_length1 = query_length;
862        original_length2 = subject_length;
863        id1 = query_id;
864        id2 = subject_id;
865     }
866  
867     if(frame1 > 0) 
868         strand1 = Seq_strand_plus;
869     else if (frame1 < 0)
870         strand1 = Seq_strand_minus;
871     else
872         strand1 = Seq_strand_unknown;
873     
874     if(frame2 > 0) 
875         strand2 = Seq_strand_plus;
876     else if (frame2 < 0)
877         strand2 = Seq_strand_minus;
878     else
879         strand2 = Seq_strand_unknown;
880     
881     
882     sap = SeqAlignNew();
883     
884     sap->dim =2; /**only two dimention alignment**/
885     
886     sap->type =3; /**partial for gapped translating search. */
887     sap->segtype =3; /**use denseg to store the alignment**/
888     sseg_head = NULL;
889     sseg_old = NULL;
890 
891     first_shift = FALSE;
892 
893     esp = hsp->gap_info;
894 
895     for (index=0; index<esp->size; index++)
896     {
897         slp1 = NULL;
898         slp2 = NULL;
899         
900         switch (esp->op_type[index]) {
901         case eGapAlignDel: /* deletion of three nucleotides. */
902             
903             first_shift = FALSE;
904 
905             seq_int1 = SeqIntNew();
906             seq_int1->from = s_GetCurrentPos(&start1, esp->num[index]);
907             seq_int1->to = start1 - 1;            
908 
909             if(seq_int1->to >= original_length1)
910                 seq_int1->to = original_length1-1;
911             
912             seq_int1->id = SeqIdDup(id1);
913             seq_int1->strand = strand1;
914 
915             ValNodeAddPointer(&slp1, SEQLOC_INT, seq_int1);
916 
917             /* Empty nucleotide piece */
918             ValNodeAddPointer(&slp2, SEQLOC_EMPTY, SeqIdDup(id2));
919             
920             seq_int1_last = seq_int1;
921             /* Keep previous seq_int2_last, in case there is a frame shift
922                immediately after this gap */
923             
924             break;
925 
926         case eGapAlignIns: /* insertion of three nucleotides. */
927 
928             /* If gap is followed after frameshift - we have to
929                add this element for the alignment to be correct */
930             
931             if(first_shift == TRUE) { /* Second frameshift in a row */
932                 /* Protein coordinates */
933                 seq_int1 = SeqIntNew();
934                 seq_int1->from =  s_GetCurrentPos(&start1, 1);
935                 seq_int1->to = start1 - 1;
936 
937                 if(seq_int1->to >= original_length1)
938                     seq_int1->to = original_length1-1;
939                 
940                 seq_int1->id = SeqIdDup(id1);
941                 seq_int1->strand = strand1;
942                 
943                 ValNodeAddPointer(&slp1, SEQLOC_INT, seq_int1);
944                 
945                 /* Nucleotide scale shifted by op_type */
946                 seq_int2 = SeqIntNew();
947 
948                 seq_int2->from = s_GetCurrentPos(&start2, 3);
949                 seq_int2->to = start2 - 1;
950 
951                 if(seq_int2->to >= original_length2) {
952                     seq_int2->to = original_length2 - 1;
953                     seq_int1->to--;
954                 }
955 
956                 /* Transfer to DNA minus strand coordinates */
957                 if(strand2 == Seq_strand_minus) {
958                     int tmp_int;
959                     tmp_int = seq_int2->to;
960                     seq_int2->to = original_length2 - seq_int2->from - 1;
961                     seq_int2->from = original_length2 - tmp_int - 1;
962                 }
963             
964                 seq_int2->id = SeqIdDup(id2);
965                 seq_int2->strand = strand2;
966                 
967                 ValNodeAddPointer(&slp2, SEQLOC_INT, seq_int2);
968 
969                 /* seq_int1_last = seq_int1; 
970                    seq_int2_last = seq_int2; */
971 
972                 /* first_shift = FALSE; */
973 
974                 if (reverse) {
975                     slp = slp2;
976                     slp2->next = slp1;
977                     sip = SeqIdDup(id2);
978                     sip->next = SeqIdDup(id1);
979                 } else {
980                     slp = slp1;
981                     slp1->next = slp2;
982                     sip = SeqIdDup(id1);
983                     sip->next = SeqIdDup(id2);
984                 }
985                 
986                 sseg = StdSegNew();
987                 sseg->dim = 2;
988                 
989                 if (sseg_head == NULL)
990                     sseg_head = sseg;
991                 
992                 sseg->loc = slp;
993                 sseg->ids = sip;
994                 
995                 if (sseg_old)
996                     sseg_old->next = sseg;
997                 
998                 sseg_old = sseg;
999 
1000                 slp1 = NULL;
1001                 slp2 = NULL;
1002             }
1003 
1004             first_shift = FALSE;
1005 
1006             /* Protein piece is empty */
1007             ValNodeAddPointer(&slp1, SEQLOC_EMPTY, SeqIdDup(id1));
1008             
1009             /* Nucleotide scale shifted by 3, protein gapped */
1010             seq_int2 = SeqIntNew();              
1011             seq_int2->from = s_GetCurrentPos(&start2, esp->num[index]*3);
1012             seq_int2->to = start2 - 1;
1013 
1014             if(seq_int2->to >= original_length2) {
1015                 seq_int2->to = original_length2 -1;
1016             }
1017 
1018             /* Transfer to DNA minus strand coordinates */
1019             if(strand2 == Seq_strand_minus) {
1020                 int tmp_int;
1021                 tmp_int = seq_int2->to;
1022                 seq_int2->to = original_length2 - seq_int2->from - 1;
1023                 seq_int2->from = original_length2 - tmp_int - 1;
1024             }
1025 
1026             seq_int2->id = SeqIdDup(id2);
1027             seq_int2->strand = strand2;
1028             
1029             ValNodeAddPointer(&slp2, SEQLOC_INT, seq_int2);
1030             
1031             seq_int1_last = NULL;
1032             seq_int2_last = seq_int2; /* Will be used to adjust "to" value */
1033             
1034             break;
1035 
1036         case eGapAlignSub: /* Substitution. */
1037 
1038             first_shift = FALSE;
1039 
1040             /* Protein coordinates */
1041             seq_int1 = SeqIntNew();
1042             seq_int1->from =  s_GetCurrentPos(&start1, esp->num[index]);
1043             seq_int1->to = start1 - 1;
1044 
1045             if(seq_int1->to >= original_length1)
1046                 seq_int1->to = original_length1-1;
1047             
1048             seq_int1->id = SeqIdDup(id1);
1049             seq_int1->strand = strand1;
1050 
1051             ValNodeAddPointer(&slp1, SEQLOC_INT, seq_int1);
1052            
1053             /* Nucleotide scale shifted by op_type */
1054             seq_int2 = SeqIntNew();
1055 
1056             seq_int2->from = 
1057                s_GetCurrentPos(&start2, esp->num[index]*(Uint1)esp->op_type[index]);
1058             seq_int2->to = start2 - 1;
1059 
1060                 /* Chop off three bases and one residue at a time.
1061                         Why does this happen, seems like a bug?
1062                 */
1063             while (seq_int2->to >= original_length2) {
1064                 seq_int2->to -= 3;
1065                 seq_int1->to--;
1066             }
1067 
1068             /* Transfer to DNA minus strand coordinates */
1069             if(strand2 == Seq_strand_minus) {
1070                 int tmp_int;
1071                 tmp_int = seq_int2->to;
1072                 seq_int2->to = original_length2 - seq_int2->from - 1;
1073                 seq_int2->from = original_length2 - tmp_int - 1;
1074             }
1075             
1076             seq_int2->id = SeqIdDup(id2);
1077             seq_int2->strand = strand2;
1078 
1079             ValNodeAddPointer(&slp2, SEQLOC_INT, seq_int2);
1080 
1081             seq_int1_last = seq_int1; /* Will be used to adjust "to" value */
1082             seq_int2_last = seq_int2; /* Will be used to adjust "to" value */
1083             
1084             break;
1085         case eGapAlignDel2:     /* gap of two nucleotides. */
1086         case eGapAlignDel1: /* Gap of one nucleotide. */
1087         case eGapAlignIns1: /* Insertion of one nucleotide. */
1088         case eGapAlignIns2: /* Insertion of two nucleotides. */
1089 
1090             if(first_shift == TRUE) { /* Second frameshift in a row */
1091                 /* Protein coordinates */
1092                 seq_int1 = SeqIntNew();
1093                 seq_int1->from =  s_GetCurrentPos(&start1, 1);
1094                 seq_int1->to = start1 - 1;
1095 
1096                 if(seq_int1->to >= original_length1)
1097                     seq_int1->to = original_length1-1;
1098                 
1099                 seq_int1->id = SeqIdDup(id1);
1100                 seq_int1->strand = strand1;
1101                 
1102                 ValNodeAddPointer(&slp1, SEQLOC_INT, seq_int1);
1103                 
1104                 /* Nucleotide scale shifted by op_type */
1105                 seq_int2 = SeqIntNew();
1106 
1107                 seq_int2->from = 
1108                    s_GetCurrentPos(&start2, (Uint1)esp->op_type[index]);
1109                 seq_int2->to = start2 - 1;
1110 
1111                 if(seq_int2->to >= original_length2) {
1112                     seq_int2->to = original_length2 -1;
1113                     seq_int1->to--;
1114                 }
1115 
1116                 /* Transfer to DNA minus strand coordinates */
1117                 if(strand2 == Seq_strand_minus) {
1118                     int tmp_int;
1119                     tmp_int = seq_int2->to;
1120                     seq_int2->to = original_length2 - seq_int2->from - 1;
1121                     seq_int2->from = original_length2 - tmp_int - 1;
1122                 }
1123             
1124                 seq_int2->id = SeqIdDup(id2);
1125                 seq_int2->strand = strand2;
1126                 
1127                 ValNodeAddPointer(&slp2, SEQLOC_INT, seq_int2);
1128 
1129                 seq_int1_last = seq_int1; 
1130                 seq_int2_last = seq_int2; 
1131 
1132                 /* first_shift = FALSE; */
1133 
1134                 break;
1135             }
1136             
1137             first_shift = TRUE;
1138 
1139             /* If this substitution is following simple frameshift
1140                we do not need to start new segment, but may continue
1141                old one */
1142             if(seq_int2_last != NULL) {
1143                 s_GetCurrentPos(&start2, esp->num[index]*((Uint1)esp->op_type[index]-3));
1144                 if(strand2 != Seq_strand_minus) {
1145                     seq_int2_last->to = start2 - 1;
1146                 } else {
1147                     /* Transfer to DNA minus strand coordinates */
1148                     seq_int2_last->from = original_length2 - start2;
1149                 }
1150 
1151                 /* Adjustment for multiple shifts - theoretically possible,
1152                    but very unprobable */
1153                 if(seq_int2_last->from > seq_int2_last->to) {
1154                     
1155                     if(strand2 != Seq_strand_minus) {
1156                         seq_int2_last->to += 3;
1157                     } else {
1158                         seq_int2_last->from -= 3;
1159                     }
1160                     
1161                     if(seq_int1_last != 0)
1162                         seq_int1_last++;
1163                 }
1164 
1165             } else if ((Uint1)esp->op_type[index] > 3) {
1166                 /* Protein piece is empty */
1167                 ValNodeAddPointer(&slp1, SEQLOC_EMPTY, SeqIdDup(id1));
1168                 /* Simulating insertion of nucleotides */
1169                 seq_int2 = SeqIntNew();
1170                 seq_int2->from = 
1171                    s_GetCurrentPos(&start2, 
1172                                    esp->num[index]*((Uint1)esp->op_type[index]-3));
1173                 seq_int2->to = start2 - 1;
1174                 
1175                 if(seq_int2->to >= original_length2) {
1176                     seq_int2->to = original_length2 - 1;
1177                 }
1178 
1179                 /* Transfer to DNA minus strand coordinates */
1180                 if(strand2 == Seq_strand_minus) {
1181                     int tmp_int;
1182                     tmp_int = seq_int2->to;
1183                     seq_int2->to = original_length2 - seq_int2->from - 1;
1184                     seq_int2->from = original_length2 - tmp_int - 1;
1185                 }
1186 
1187                 seq_int2->id = SeqIdDup(id2);
1188                 seq_int2->strand = strand2;
1189                 
1190                 ValNodeAddPointer(&slp2, SEQLOC_INT, seq_int2);
1191                 
1192                 seq_int1_last = NULL;
1193                 seq_int2_last = seq_int2; /* Will be used to adjust "to" value */
1194                 break;
1195             } else {
1196                 continue;       /* Main loop */
1197             }
1198             continue;       /* Main loop */
1199             /* break; */
1200         default:
1201             continue;       /* Main loop */
1202             /* break; */
1203         } 
1204 
1205         if (reverse) {
1206             slp = slp2;
1207             slp2->next = slp1;
1208             sip = SeqIdDup(id2);
1209             sip->next = SeqIdDup(id1);
1210         } else {
1211             slp = slp1;
1212             slp1->next = slp2;
1213             sip = SeqIdDup(id1);
1214             sip->next = SeqIdDup(id2);
1215         }
1216 
1217         sseg = StdSegNew();
1218         sseg->dim = 2;
1219         
1220         if (sseg_head == NULL)
1221             sseg_head = sseg;
1222         
1223         sseg->loc = slp;
1224         sseg->ids = sip;
1225         
1226         if (sseg_old)
1227             sseg_old->next = sseg;
1228 
1229         sseg_old = sseg;
1230     }
1231     sap->segs = sseg_head;
1232     sap->next = NULL;
1233     
1234     return sap;
1235 }
1236 
1237 /** Converts a list of gapped HSPs into a list of Seq-align's and appends to a
1238  * previously created Seq-align chain.
1239  * @param program_number Type of BLAST program.
1240  * @param hsp_list HSP list structure [in]
1241  * @param query_id Seq-id of the query sequence [in]
1242  * @param subject_id Seq-id of the subject sequence [in]
1243  * @param query_length Length of the query sequence [in]
1244  * @param subject_length Length of the subject sequence [in]
1245  * @param is_ooframe Is this an alignment with out-of-frame gapping? [in]
1246  * @param head_seqalign Start of the Seq-align chain [in] [out]
1247  */
1248 static Int2 
1249 s_HSPListToSeqAlignGapped(EBlastProgramType program_number, 
1250                           BlastHSPList* hsp_list, SeqIdPtr query_id, 
1251                           SeqIdPtr subject_id, Int4 query_length, 
1252                           Int4 subject_length, Boolean is_ooframe, 
1253                           SeqAlignPtr* head_seqalign)
1254 {
1255    Int2 status = 0;
1256    BlastHSP** hsp_array;
1257    SeqAlignPtr last_seqalign = NULL, seqalign = NULL;
1258    Int4 index;
1259 
1260    *head_seqalign = NULL;
1261 
1262    hsp_array = hsp_list->hsp_array;
1263 
1264    for (index=0; index<hsp_list->hspcnt; index++) { 
1265       if (is_ooframe) {
1266          seqalign = 
1267              OOFBlastHSPToSeqAlign(program_number, hsp_array[index], 
1268                                    query_id, subject_id, query_length, 
1269                                    subject_length);
1270       } else {
1271          /* The following line is needed for negative frames of translated 
1272             query */
1273          seqalign = 
1274              BlastHSPToSeqAlign(program_number, hsp_array[index], subject_id, 
1275                                 query_id, query_length, subject_length);
1276       }
1277       if (index==0) {
1278          *head_seqalign = last_seqalign = seqalign;
1279       } else {
1280          last_seqalign->next = seqalign;
1281          last_seqalign = last_seqalign->next;
1282       }
1283       seqalign->score = GetScoreSetFromBlastHsp(hsp_array[index]);
1284    }
1285 
1286    return status;
1287 }
1288 
1289 Int2 BLAST_ResultsToSeqAlign(EBlastProgramType program_number, 
1290         BlastHSPResults** results_ptr, SeqLocPtr query_slp, 
1291         ReadDBFILE* rdfp, SeqLoc* subject_slp,
1292         Boolean is_gapped, Boolean is_ooframe, 
1293         SBlastSeqalignArray* *seqalign_arr)
1294 {
1295    Int4 query_index, subject_index;
1296    SeqLocPtr slp = query_slp;
1297    SeqIdPtr query_id, subject_id = NULL;
1298    SeqLoc** subject_loc_array = NULL;
1299    BlastHSPResults* results = NULL;
1300    
1301    if (results_ptr == NULL)
1302       return 0;
1303 
1304    results = *results_ptr;
1305    
1306    if (!results || results->num_queries <= 0)
1307       return 0;
1308 
1309    if (!rdfp && !subject_slp)
1310       return -1;
1311 
1312    *seqalign_arr = SBlastSeqalignArrayNew(results->num_queries);
1313    if (*seqalign_arr == NULL)
1314       return -1;
1315    
1316 
1317    if (!rdfp) {
1318       subject_loc_array = 
1319          (SeqLoc**) malloc(ValNodeLen(subject_slp)*sizeof(SeqLoc*));
1320       for (slp = subject_slp, subject_index = 0; slp; slp = slp->next, ++subject_index)
1321          subject_loc_array[subject_index] = slp;
1322    }
1323 
1324    slp = query_slp;
1325    for (query_index = 0; slp && query_index < results->num_queries; 
1326         ++query_index, slp = slp->next) {
1327       SeqAlignPtr head_seqalign = NULL, last_seqalign = NULL;
1328       BlastHitList* hit_list = results->hitlist_array[query_index];
1329       if (!hit_list)
1330          continue;
1331 
1332       query_id = SeqLocId(slp);
1333 
1334       for (subject_index = 0; subject_index < hit_list->hsplist_count;
1335            ++subject_index) {
1336          SeqAlignPtr seqalign = NULL;
1337          Int4 subject_length = 0;
1338          BlastHSPList* hsp_list = hit_list->hsplist_array[subject_index];
1339          if (!hsp_list)
1340             continue;
1341 
1342          /* Sort HSPs with e-values as first priority and scores as 
1343             tie-breakers, since that is the order we want to see them in 
1344             in Seq-aligns. */
1345          Blast_HSPListSortByEvalue(hsp_list);
1346 
1347          if (rdfp) {
1348              /* NB: The following call allocates the SeqId structure. */
1349             readdb_get_descriptor(rdfp, hsp_list->oid, &subject_id, NULL);
1350             subject_length = readdb_get_sequence_length(rdfp, hsp_list->oid);
1351          } else {
1352              /* NB: The following call does not allocate the SeqId structure,
1353                 but returns the existing one. */
1354             subject_id = SeqLocId(subject_loc_array[hsp_list->oid]);
1355             subject_length = SeqLocLen(subject_loc_array[hsp_list->oid]);
1356          }
1357 
1358          if (is_gapped) {
1359             s_HSPListToSeqAlignGapped(program_number, hsp_list, query_id, 
1360                                       subject_id, SeqLocLen(slp), subject_length,
1361                                       is_ooframe, &seqalign);
1362          } else {
1363             s_HSPListToSeqAlignUngapped(program_number, hsp_list, query_id,
1364                                         subject_id, SeqLocLen(slp), 
1365                                         subject_length, &seqalign);
1366          }                      
1367 
1368          if (seqalign)
1369          {
1370              SeqLocPtr subject_loc = NULL;
1371              if (subject_loc_array)
1372                 subject_loc = subject_loc_array[hsp_list->oid];
1373              AdjustOffSetsInSeqAlign(seqalign, slp, subject_loc); 
1374          }
1375 
1376          /* The subject id must be deallocated only in case of a ReadDB 
1377             interface */
1378          if (rdfp)
1379              subject_id = SeqIdSetFree(subject_id);
1380 
1381          if (seqalign) {
1382             if (!last_seqalign) {
1383                head_seqalign = last_seqalign = seqalign;
1384             } else {
1385                last_seqalign->next = seqalign;
1386             }
1387             for ( ; last_seqalign->next; last_seqalign = last_seqalign->next);
1388          }
1389       }
1390       (*seqalign_arr)->array[query_index] = head_seqalign;
1391       results->hitlist_array[query_index] = Blast_HitListFree(results->hitlist_array[query_index]);
1392    }
1393 
1394    results = Blast_HSPResultsFree(results);
1395    *results_ptr = NULL;
1396    sfree(subject_loc_array);
1397 
1398    return 0;
1399 }
1400 /* @} */
1401 
1402 
1403 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.