NCBI C Toolkit Cross Reference

C/tools/blfmtutl.c


  1 static char const rcsid[] = "$Id: blfmtutl.c,v 1.39 2009/09/25 17:41:31 coulouri Exp $";
  2 
  3 /* ===========================================================================
  4 *
  5 *                            PUBLIC DOMAIN NOTICE
  6 *               National Center for Biotechnology Information
  7 *
  8 *  This software/database is a "United States Government Work" under the
  9 *  terms of the United States Copyright Act.  It was written as part of
 10 *  the author's official duties as a United States Government employee and
 11 *  thus cannot be copyrighted.  This software/database is freely available
 12 *  to the public for use. The National Library of Medicine and the U.S.
 13 *  Government have not placed any restriction on its use or reproduction.
 14 *
 15 *  Although all reasonable efforts have been taken to ensure the accuracy
 16 *  and reliability of the software and data, the NLM and the U.S.
 17 *  Government do not and cannot warrant the performance or results that
 18 *  may be obtained by using this software or data. The NLM and the U.S.
 19 *  Government disclaim all warranties, express or implied, including
 20 *  warranties of performance, merchantability or fitness for any particular
 21 *  purpose.
 22 *
 23 *  Please cite the author in any work or product based on this material.
 24 *
 25 * ===========================================================================*/
 26 
 27 /*****************************************************************************
 28 
 29 File name: blfmtutl.c
 30 
 31 Author: Tom Madden
 32 
 33 Contents: Utilities for BLAST formatting
 34 
 35 ******************************************************************************/
 36 /*
 37 * $Revision: 
 38 * $Log: blfmtutl.c,v $
 39 * Revision 1.39  2009/09/25 17:41:31  coulouri
 40 * bump to 2.2.22
 41 *
 42 * Revision 1.38  2009/06/12 18:34:11  coulouri
 43 * bump version
 44 *
 45 * Revision 1.37  2009/02/02 21:37:55  camacho
 46 * Bump version in preparation of upcoming release. JIRA SB-166.
 47 *
 48 * Revision 1.36  2008/10/29 16:51:03  coulouri
 49 * bump version
 50 *
 51 * Revision 1.35  2008/02/29 20:07:33  coulouri
 52 * bump date
 53 *
 54 * Revision 1.34  2008/02/15 21:35:46  coulouri
 55 * bump date
 56 *
 57 * Revision 1.33  2008/01/28 16:34:13  coulouri
 58 * bump date
 59 *
 60 * Revision 1.32  2008/01/25 16:19:22  coulouri
 61 * bump date
 62 *
 63 * Revision 1.31  2007/08/23 19:45:47  coulouri
 64 * bump date
 65 *
 66 * Revision 1.30  2007/08/17 12:42:52  coulouri
 67 * bump date
 68 *
 69 * Revision 1.29  2007/06/24 13:26:54  coulouri
 70 * bump version
 71 *
 72 * Revision 1.28  2007/06/14 17:58:15  papadopo
 73 * allow alignments in denseg form to contain leading or trailing gaps
 74 *
 75 * Revision 1.27  2007/06/04 20:26:18  papadopo
 76 * limit the number of subject sequences cached in the obje manager during tabular output
 77 *
 78 * Revision 1.26  2007/03/23 19:24:41  coulouri
 79 * bump release date
 80 *
 81 * Revision 1.25  2007/03/15 14:29:04  coulouri
 82 * bump release date
 83 *
 84 * Revision 1.24  2007/03/02 15:53:13  coulouri
 85 * prepare for March 11th C toolkit release
 86 *
 87 * Revision 1.23  2006/10/12 19:51:32  coulouri
 88 * bump release date
 89 *
 90 * Revision 1.22  2006/09/23 23:52:12  coulouri
 91 * bump version/date for release
 92 *
 93 * Revision 1.21  2006/05/31 17:17:44  jianye
 94 * always show plus strand for query for dendiag tabular
 95 *
 96 * Revision 1.20  2006/05/27 13:58:07  ucko
 97 * Move use_this_gi_id's declaration *above* other statements in the
 98 * block per C89.
 99 *
100 * Revision 1.19  2006/05/25 16:38:07  jianye
101 * use use_this_gi seqid for subject if present
102 *
103 * Revision 1.18  2006/05/05 13:43:28  coulouri
104 * bump date
105 *
106 * Revision 1.17  2006/04/26 12:42:36  madden
107 * BlastSetUserErrorString and BlastDeleteUserErrorString moved from blastool.c to blfmtutl.c
108 *
109 * Revision 1.16  2006/04/07 19:46:59  coulouri
110 * correction to previous commit
111 *
112 * Revision 1.15  2006/04/07 18:38:19  coulouri
113 * bump version
114 *
115 * Revision 1.14  2006/01/24 18:38:47  papadopo
116 * from Mike Gertz: Fixed a typo in a name in a format string: Aravaind -> Aravind
117 *
118 * Revision 1.13  2005/12/29 19:55:04  madden
119 * Added functions to print tabular output
120 *
121 * Revision 1.12  2005/11/22 13:44:24  coulouri
122 * bump version
123 *
124 * Revision 1.11  2005/10/17 12:47:30  camacho
125 * From Alejandro Schaffer: Updated reference for compositional adjustment
126 *
127 * Revision 1.10  2005/08/05 12:10:48  coulouri
128 * bump version
129 *
130 * Revision 1.9  2005/07/25 12:48:39  camacho
131 * Updated reference for compositional adjustment
132 *
133 * Revision 1.8  2005/06/05 02:54:41  coulouri
134 * bump date
135 *
136 * Revision 1.7  2005/05/20 15:28:03  coulouri
137 * bump date
138 *
139 * Revision 1.6  2005/05/16 17:42:19  papadopo
140 * From Alejandro Schaffer: Print references for composition-based statistics
141 * and for compositional score matrix adjustment, if either method was used.
142 *
143 * Revision 1.5  2005/05/08 13:32:52  coulouri
144 * bump version to 2.2.11
145 *
146 * Revision 1.4  2004/10/19 15:28:59  coulouri
147 * bump version and date
148 *
149 * Revision 1.3  2004/10/04 17:54:14  madden
150 * BlastPrintVersionInfo[Ex] now takes const char* as arg for program
151 *
152 * Revision 1.2  2004/07/22 15:18:45  jianye
153 * correct blast paper url
154 *
155 * Revision 1.1  2004/06/30 12:31:15  madden
156 * Structures and prototypes for blast formatting utilities
157 *
158 */
159 
160 #include <ncbi.h>
161 #include <objcode.h>
162 #include <objseq.h>
163 #include <sequtil.h>
164 #include <readdb.h>
165 #include <ncbithr.h>
166 #include <txalign.h>
167 #include <blfmtutl.h>
168 #include <jzcoll.h>
169 
170 /* the version of BLAST. */
171 #define BLAST_ENGINE_VERSION "2.2.22"
172 #define BLAST_RELEASE_DATE "Sep-27-2009"
173 
174 #define BUFFER_LENGTH 255
175 
176 /*
177         adds the new string to the buffer, separating by a tilde.
178         Checks the size of the buffer for FormatBlastParameters and
179         allocates longer replacement if needed.
180 */
181 
182 Boolean LIBCALL
183 add_string_to_bufferEx(CharPtr buffer, CharPtr *old, Int2Ptr old_length, Boolean add_tilde)
184 
185 {
186         CharPtr new, ptr;
187         Int2 length, new_length;
188 
189         length = (StringLen(*old));
190 
191         if((StringLen(buffer)+length+3) > *old_length)
192         {
193                 new_length = *old_length + 255;
194                 new = MemNew(new_length*sizeof(Char));
195                 if (*old_length > 0 && *old != NULL)
196                 {
197                         MemCpy(new, *old, *old_length);
198                         *old = MemFree(*old);
199                 }
200                 *old = new;
201                 *old_length = new_length;
202         }
203 
204         ptr = *old;
205         ptr += length;
206         if (add_tilde)
207         {
208                 *ptr = '~';
209                 ptr++;
210         }
211 
212         while (*buffer != NULLB)
213         {
214                 *ptr = *buffer;
215                 buffer++; ptr++;
216         }
217 
218         return TRUE;
219 }
220 
221 Boolean LIBCALL
222 add_string_to_buffer(CharPtr buffer, CharPtr *old, Int2Ptr old_length)
223 
224 {
225         return add_string_to_bufferEx(buffer, old, old_length, TRUE);
226 }
227 
228 /*
229         Print the buffer, adding newlines where tildes are found.
230 */
231 
232 Boolean LIBCALL
233 PrintTildeSepLines(CharPtr buffer, Int4 line_length, FILE *outfp)
234 
235 {
236         if (outfp == NULL || buffer == NULL)
237                 return FALSE;
238 
239         asn2ff_set_output(outfp, NULL);
240 
241         ff_StartPrint(0, 0, line_length, NULL);
242         while (*buffer != NULLB)
243         {
244                 if (*buffer != '~')
245                         ff_AddChar(*buffer);
246                 else
247                         NewContLine();
248                 buffer++;
249         }
250         ff_EndPrint();
251 
252         return TRUE;
253 }
254 
255 /*
256         Print the Karlin-Altschul parameters.
257 
258         if gapped is TRUE, then slightly different formatting is used.
259 */
260 
261 Boolean LIBCALL
262 PrintKAParameters(Nlm_FloatHi Lambda, Nlm_FloatHi K, Nlm_FloatHi H, Int4 line_length, FILE *outfp, Boolean gapped)
263 
264 {
265         return PrintKAParametersExtra(Lambda, K, H, 0.0, line_length, outfp, gapped);
266 }
267 
268 Boolean LIBCALL
269 PrintKAParametersExtra(Nlm_FloatHi Lambda, Nlm_FloatHi K, Nlm_FloatHi H, Nlm_FloatHi C, Int4 line_length, FILE *outfp, Boolean gapped)
270 
271 {
272         Char buffer[BUFFER_LENGTH];
273 
274         if (outfp == NULL)
275                 return FALSE;
276 
277         asn2ff_set_output(outfp, NULL);
278 
279         ff_StartPrint(0, 0, line_length, NULL);
280         if (gapped)
281         {
282                 ff_AddString("Gapped");
283                 NewContLine();
284         }
285         
286         if (C == 0.0)
287                 ff_AddString("Lambda     K      H");
288         else
289                 ff_AddString("Lambda     K      H      C");
290         NewContLine();
291         sprintf(buffer, "%#8.3g ", Lambda);
292         ff_AddString(buffer);
293         sprintf(buffer, "%#8.3g ", K);
294         ff_AddString(buffer);
295         sprintf(buffer, "%#8.3g ", H);
296         ff_AddString(buffer);
297         if (C != 0.0)
298         {
299                 sprintf(buffer, "%#8.3g ", C);
300                 ff_AddString(buffer);
301         }
302         NewContLine();
303         ff_EndPrint();
304 
305         return TRUE;
306 
307 }
308 
309 
310 TxDfDbInfoPtr LIBCALL 
311 TxDfDbInfoNew (TxDfDbInfoPtr old)
312 
313 {
314         TxDfDbInfoPtr dbinfo;
315         dbinfo = MemNew(sizeof(TxDfDbInfo));
316         if (old)
317                 old->next = dbinfo;
318         return dbinfo;
319 }
320 
321 TxDfDbInfoPtr LIBCALL 
322 TxDfDbInfoDestruct (TxDfDbInfoPtr dbinfo)
323 
324 {
325         TxDfDbInfoPtr next;
326 
327         if (dbinfo == NULL)
328                 return NULL;
329 
330         while (dbinfo)
331         {
332                 dbinfo->name = MemFree(dbinfo->name);
333                 dbinfo->definition = MemFree(dbinfo->definition);
334                 dbinfo->date = MemFree(dbinfo->date);
335                 next = dbinfo->next;
336                 dbinfo = MemFree(dbinfo);
337                 dbinfo = next;
338         }
339 
340         return dbinfo;
341 }
342 
343 Boolean LIBCALL
344 PrintDbReport(TxDfDbInfoPtr dbinfo, Int4 line_length, FILE *outfp)
345 
346 {
347 
348         if (dbinfo == NULL || outfp == NULL)
349                 return FALSE;
350 
351         asn2ff_set_output(outfp, NULL);
352 
353         ff_StartPrint(2, 2, line_length, NULL);
354 
355         if (dbinfo->subset == FALSE)
356         {
357                 ff_AddString("Database: ");
358                 ff_AddString(dbinfo->definition);
359                 NewContLine();
360                 ff_AddString("  Posted date:  ");
361                 ff_AddString(dbinfo->date);
362                 NewContLine();
363                 ff_AddString("Number of letters in database: "); 
364                 ff_AddString(Nlm_Int8tostr((Int8) dbinfo->total_length, 1));
365                 NewContLine();
366                 ff_AddString("Number of sequences in database:  ");
367                 ff_AddString(Ltostr((long) dbinfo->number_seqs, 1));
368                 NewContLine();
369         }
370         else
371         {
372                 ff_AddString("Subset of the database(s) listed below");
373                 NewContLine();
374                 ff_AddString("   Number of letters searched: "); 
375                 ff_AddString(Nlm_Int8tostr((Int8) dbinfo->total_length, 1));
376                 NewContLine();
377                 ff_AddString("   Number of sequences searched:  ");
378                 ff_AddString(Ltostr((long) dbinfo->number_seqs, 1));
379                 NewContLine();
380         }
381         ff_EndPrint();
382 
383         return TRUE;
384 }
385 
386 /*
387         Prints an acknowledgement of the Blast Query, in the standard
388         BLAST format.
389 */
390 
391 
392 Boolean LIBCALL
393 AcknowledgeBlastQuery(BioseqPtr bsp, Int4 line_length, FILE *outfp, Boolean believe_query, Boolean html)
394 
395 {
396         Char buffer[BUFFER_LENGTH];
397 
398         if (bsp == NULL || outfp == NULL)
399                 return FALSE;
400         
401         asn2ff_set_output(outfp, NULL);
402 
403         ff_StartPrint(0, 0, line_length, NULL);
404         if (html)
405                 ff_AddString("<b>Query=</b> ");
406         else
407                 ff_AddString("Query= ");
408         if (bsp->id && (bsp->id->choice != SEQID_LOCAL || believe_query))
409         {
410                 SeqIdWrite(bsp->id, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
411                 if (StringNCmp(buffer, "lcl|", 4) == 0)
412                         ff_AddString(buffer+4);
413                 else
414                         ff_AddString(buffer);
415                 ff_AddChar(' ');
416         }
417         ff_AddString(BioseqGetTitle(bsp));
418         NewContLine();
419         TabToColumn(10);
420         ff_AddChar('(');
421         ff_AddString(Ltostr((long) BioseqGetLen(bsp), 1));
422         ff_AddString(" letters)");
423         NewContLine();
424         ff_EndPrint();
425 
426         return TRUE;
427 }
428 
429 /*
430         return the version of BLAST as a char. string.
431 */
432 CharPtr LIBCALL
433 BlastGetReleaseDate (void)
434 
435 {
436         return BLAST_RELEASE_DATE;
437 }
438 
439 
440 /*
441         return the version of BLAST as a char. string.
442 */
443 CharPtr LIBCALL
444 BlastGetVersionNumber (void)
445 
446 {
447         return BLAST_ENGINE_VERSION;
448 }
449 
450 Boolean BlastPrintVersionInfo (const char* program, Boolean html, FILE *outfp)
451 
452 {
453         return BlastPrintVersionInfoEx(program, html, BlastGetVersionNumber(), BlastGetReleaseDate(), outfp);
454 }
455 
456 Boolean BlastPrintVersionInfoEx (const char* program, Boolean html, CharPtr version, CharPtr date, FILE *outfp)
457 
458 {
459         CharPtr ret_buffer;
460 
461 
462         if (outfp == NULL)
463                 return FALSE;
464 
465         ret_buffer = StringSave(program);
466         Nlm_StrUpper(ret_buffer);
467         if (html)
468                 fprintf(outfp, "<b>%s %s [%s]</b>\n", ret_buffer, version, date);
469         else
470                 fprintf(outfp, "%s %s [%s]\n", ret_buffer, version, date);
471         ret_buffer = MemFree(ret_buffer);
472 
473         return TRUE;
474 }
475 
476 /* 
477         Returns a reference for the header.
478         The newlines are represented by tildes, use PrintTildeSepLines
479         to print this.
480 */
481 
482 CharPtr LIBCALL
483 BlastGetReference(Boolean html)
484 
485 {
486         CharPtr ret_buffer;
487         Int2 ret_buffer_length;
488 
489         ret_buffer = NULL;
490         ret_buffer_length = 0;
491 
492         
493         if (html) {
494                 add_string_to_bufferEx("<b><a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids=9254694&dopt=Citation\">Reference</a>:</b>", &ret_buffer, &ret_buffer_length, TRUE);
495                 add_string_to_bufferEx("Altschul, Stephen F., Thomas L. Madden, Alejandro A. Sch&auml;ffer, ", &ret_buffer, &ret_buffer_length, TRUE);
496         } else
497                 add_string_to_bufferEx("Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ", &ret_buffer, &ret_buffer_length, TRUE);
498         add_string_to_bufferEx("Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ", &ret_buffer, &ret_buffer_length, TRUE);
499         add_string_to_bufferEx("\"Gapped BLAST and PSI-BLAST: a new generation of protein database search", &ret_buffer, &ret_buffer_length, TRUE);
500         add_string_to_bufferEx("programs\",  Nucleic Acids Res. 25:3389-3402.", &ret_buffer, &ret_buffer_length, TRUE);
501         
502         return ret_buffer;
503 }
504 
505 Boolean LIBCALL
506 MegaBlastPrintReference(Boolean html, Int4 line_length, FILE *outfp)
507 
508 {
509         CharPtr ret_buffer;
510         Int2 ret_buffer_length;
511 
512         ret_buffer = NULL;
513         ret_buffer_length = 0;
514 
515         if (outfp == NULL)
516                 return FALSE;
517         
518         if (html) {
519            add_string_to_bufferEx("<b><a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids=10890397&dopt=Citation\">Reference</a>:</b>", &ret_buffer, &ret_buffer_length, TRUE);
520            add_string_to_bufferEx("Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000),", &ret_buffer, &ret_buffer_length, TRUE);
521         } else
522            add_string_to_bufferEx("Reference: Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), ", &ret_buffer, &ret_buffer_length, TRUE);
523         add_string_to_bufferEx("\"A greedy algorithm for aligning DNA sequences\", ", 
524                                &ret_buffer, &ret_buffer_length, TRUE);
525         add_string_to_bufferEx("J Comput Biol 2000; 7(1-2):203-14.", 
526                                &ret_buffer, &ret_buffer_length, TRUE);
527         
528         PrintTildeSepLines(ret_buffer, line_length, outfp);
529         ret_buffer = MemFree(ret_buffer);
530         return TRUE;
531 }
532 
533 Boolean LIBCALL
534 BlastPrintReference(Boolean html, Int4 line_length, FILE *outfp)
535 
536 {
537         CharPtr ret_buffer;
538         
539         if (outfp == NULL)
540                 return FALSE;
541         
542         ret_buffer = BlastGetReference(html);
543         PrintTildeSepLines(ret_buffer, line_length, outfp);
544         ret_buffer = MemFree(ret_buffer);
545 
546         return TRUE;
547 }
548 
549 /* 
550         Returns a reference for the header.
551         The newlines are represented by tildes, use PrintTildeSepLines
552         to print this.
553 */
554 
555 
556 /* 
557         Returns a reference for composition-based statistics to use
558         in the header.
559         The newlines are represented by tildes, use PrintTildeSepLines
560         to print this.
561 */
562 
563 CharPtr LIBCALL
564 CBStatisticsGetReference(Boolean html, Boolean firstRound, Boolean moreRounds)
565 
566 {
567         CharPtr ret_buffer;
568         Int2 ret_buffer_length;
569 
570         ret_buffer = NULL;
571         ret_buffer_length = 0;
572 
573         
574         if (firstRound) {
575           if (html) {
576             add_string_to_bufferEx("<b><a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids=11452024&dopt=Citation\">Reference for composition-based statistics</a>:</b>", &ret_buffer, &ret_buffer_length, TRUE);
577             add_string_to_bufferEx("Sch&auml;ffer, Alejandro A., L. Aravind, Thomas L. Madden, ", &ret_buffer, &ret_buffer_length, TRUE);
578         } else
579           add_string_to_bufferEx("Reference for composition-based statistics:", &ret_buffer, &ret_buffer_length, TRUE);
580           add_string_to_bufferEx("Schaffer, Alejandro A., L. Aravind, Thomas L. Madden,", &ret_buffer, &ret_buffer_length, TRUE);
581         }
582         else {
583           if (html) {
584             add_string_to_bufferEx("<b><a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids=11452024&dopt=Citation\">Reference for composition-based statistics </a></b>", &ret_buffer, &ret_buffer_length, TRUE);
585             add_string_to_bufferEx("starting in round 2:", &ret_buffer, &ret_buffer_length, TRUE);
586 
587             add_string_to_bufferEx("Sch&auml;ffer, Alejandro A., L. Aravind, Thomas L. Madden, ", &ret_buffer, &ret_buffer_length, TRUE);
588           } else {
589             add_string_to_bufferEx("Reference for composition-based statistics starting in round 2:", &ret_buffer, &ret_buffer_length, TRUE);
590             add_string_to_bufferEx("Schaffer, Alejandro A., L. Aravind, Thomas L. Madden,", &ret_buffer, &ret_buffer_length, TRUE);
591           }
592         }
593         add_string_to_bufferEx("Sergei Shavirin, John L. Spouge, Yuri I. Wolf,  ", &ret_buffer, &ret_buffer_length, TRUE);
594         add_string_to_bufferEx("Eugene V. Koonin, and Stephen F. Altschul (2001), ", &ret_buffer, &ret_buffer_length, TRUE);
595         add_string_to_bufferEx("\"Improving the accuracy of PSI-BLAST protein database searches with ", &ret_buffer, &ret_buffer_length, TRUE);
596         add_string_to_bufferEx("composition-based statistics and other refinements\",  Nucleic Acids Res. 29:2994-3005.", &ret_buffer, &ret_buffer_length, TRUE);
597         return ret_buffer;
598 }
599 
600 /*print the reference for composition-based statistics when they are used*/
601 Boolean LIBCALL
602 CBStatisticsPrintReference(Boolean html, Int4 line_length, 
603                            Boolean firstRound, Boolean moreRounds, FILE *outfp)
604 
605 {
606         CharPtr ret_buffer;
607         
608         if (outfp == NULL)
609                 return FALSE;
610 
611         if (!(firstRound || moreRounds))
612           return FALSE;
613         
614         ret_buffer = CBStatisticsGetReference(html,firstRound, moreRounds);
615         PrintTildeSepLines(ret_buffer, line_length, outfp);
616         ret_buffer = MemFree(ret_buffer);
617 
618         return TRUE;
619 }
620 
621 /* 
622         Returns a reference for the header.
623         The newlines are represented by tildes, use PrintTildeSepLines
624         to print this.
625 */
626 
627 CharPtr LIBCALL
628 CAdjustmentGetReference(Boolean html)
629 
630 {
631         CharPtr ret_buffer;
632         Int2 ret_buffer_length;
633 
634         ret_buffer = NULL;
635         ret_buffer_length = 0;
636 
637         if (html) {
638           add_string_to_bufferEx("<b><a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids=16218944&dopt=Citation\">Reference for compositional score matrix adjustment</a>:</b>", &ret_buffer, &ret_buffer_length, TRUE);
639           add_string_to_bufferEx("Altschul, Stephen F., John C. Wootton, E. Michael Gertz, Richa Agarwala,", &ret_buffer, &ret_buffer_length, TRUE);
640           add_string_to_bufferEx("Aleksandr Morgulis, Alejandro A. Sch&auml;ffer, and Yi-Kuo Yu (2005) \"Protein database", &ret_buffer, &ret_buffer_length, TRUE);
641           add_string_to_bufferEx("searches using compositionally adjusted substitution matrices\", FEBS J. 272:5101-5109.", &ret_buffer, &ret_buffer_length, TRUE);     
642         }
643         else {
644           add_string_to_bufferEx("Reference for compositional score matrix adjustment: Altschul, Stephen F., ", &ret_buffer, &ret_buffer_length, TRUE);
645           add_string_to_bufferEx("John C. Wootton, E. Michael Gertz, Richa Agarwala, Aleksandr Morgulis,", &ret_buffer, &ret_buffer_length, TRUE);
646           add_string_to_bufferEx("Alejandro A. Schaffer, and Yi-Kuo Yu (2005) \"Protein database searches", &ret_buffer, &ret_buffer_length, TRUE);
647           add_string_to_bufferEx("using compositionally adjusted substitution matrices\", FEBS J. 272:5101-5109.", &ret_buffer, &ret_buffer_length, TRUE);      
648         }
649         return ret_buffer;
650 }
651 
652 /*print the reference for composition-based statistics when they are used*/
653 Boolean LIBCALL
654 CAdjustmentPrintReference(Boolean html, Int4 line_length, FILE *outfp)
655 
656 {
657         CharPtr ret_buffer;
658         
659         if (outfp == NULL)
660                 return FALSE;
661 
662         ret_buffer = CAdjustmentGetReference(html);
663         PrintTildeSepLines(ret_buffer, line_length, outfp);
664         ret_buffer = MemFree(ret_buffer);
665 
666         return TRUE;
667 }
668 
669 /* 
670         Returns a reference for the header.
671         The newlines are represented by tildes, use PrintTildeSepLines
672         to print this.
673 */
674 
675 
676 
677 CharPtr LIBCALL
678 BlastGetPhiReference(Boolean html)
679 
680 {
681         CharPtr ret_buffer;
682         Int2 ret_buffer_length;
683 
684         ret_buffer = NULL;
685         ret_buffer_length = 0;
686 
687         
688         if (html) {
689                 add_string_to_bufferEx("<b><a http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids=9705509&dopt=Citation\">Reference</a>:</b>", &ret_buffer, &ret_buffer_length, TRUE);
690                 add_string_to_bufferEx("Zhang, Zheng, Alejandro A. Sch&auml;ffer, Webb Miller, Thomas L. Madden, ", &ret_buffer, &ret_buffer_length, TRUE);
691         } else
692                 add_string_to_bufferEx("Reference: Zhang, Zheng, Alejandro A. Schaffer, Webb Miller, Thomas L. Madden, ", &ret_buffer, &ret_buffer_length, TRUE);
693         add_string_to_bufferEx("David J. Lipman, Eugene V. Koonin, and Stephen F. Altschul (1998), ", &ret_buffer, &ret_buffer_length, TRUE);
694         add_string_to_bufferEx("\"Protein sequence similarity searches using patterns as seeds\", ", &ret_buffer, &ret_buffer_length, TRUE);
695         add_string_to_bufferEx("Nucleic Acids Res. 26:3986-3990.", &ret_buffer, &ret_buffer_length, TRUE);
696         
697         return ret_buffer;
698 }
699 
700 Boolean LIBCALL
701 BlastPrintPhiReference(Boolean html, Int4 line_length, FILE *outfp)
702 
703 {
704         CharPtr ret_buffer;
705         
706         if (outfp == NULL)
707                 return FALSE;
708         
709         ret_buffer = BlastGetPhiReference(html);
710         PrintTildeSepLines(ret_buffer, line_length, outfp);
711         ret_buffer = MemFree(ret_buffer);
712 
713         return TRUE;
714 }
715 
716 /*
717         Counts the number of SeqAligns present.
718 */
719 
720 static Int4
721 GetSeqAlignCount(SeqAlignPtr sap)
722 
723 {
724         Int4 count = 0;
725         SeqIdPtr last_id=NULL, id;
726 
727         while (sap)
728         {
729                 id = TxGetSubjectIdFromSeqAlign(sap);
730                 if (last_id)
731                 {
732                         if(SeqIdComp(id, last_id) != SIC_YES)
733                                 count++;
734                 }
735                 else
736                 {
737                         count = 1;
738                 }
739                 last_id = id;
740                 sap = sap->next;
741         }
742 
743         return count;
744 
745 }
746 
747 /*
748         Duplicates a SeqAlignPtr, up to the number of unique
749         records specified.
750 */
751 
752 static SeqAlignPtr
753 GetPrivateSeqAlign(SeqAlignPtr sap, Int4 number, Int4Ptr number_returned)
754 
755 {
756         Int4 count=0;
757         SeqIdPtr last_id=NULL, id;
758         SeqAlignPtr new_head=NULL, var;
759 
760         last_id = TxGetSubjectIdFromSeqAlign(sap);
761 
762         while (count<number && sap)
763         {
764                 count++;
765                 while (sap)
766                 {
767                         id = TxGetSubjectIdFromSeqAlign(sap);
768                         if(SeqIdComp(id, last_id) != SIC_YES)
769                         {
770                                 last_id = id;
771                                 break;
772                         }
773                         if (new_head == NULL)
774                         {
775                                 new_head = AsnIoMemCopy(sap, (AsnReadFunc) SeqAlignAsnRead, (AsnWriteFunc) SeqAlignAsnWrite);
776                                 var = new_head;
777                         }
778                         else
779                         {
780                                 var->next = AsnIoMemCopy(sap, (AsnReadFunc) SeqAlignAsnRead, (AsnWriteFunc) SeqAlignAsnWrite);
781                                 var = var->next;
782                         }
783                         last_id = id;
784                         sap = sap->next;
785                 }
786         }
787 
788         *number_returned = count;
789 
790         return new_head;
791 }
792 
793 /*
794         Duplicate a SeqAlignPtr, keeping on the number of unique db
795         hits specified.
796 */
797 
798 BlastPruneSapStructPtr LIBCALL
799 BlastPruneHitsFromSeqAlign(SeqAlignPtr sap, Int4 number, BlastPruneSapStructPtr prune)
800 
801 {
802         if (prune == NULL)
803         {
804                 prune = MemNew(sizeof(BlastPruneSapStruct));
805         }
806         else
807         {
808                 if (prune->number == number)
809                         return prune;
810                 if (prune->allocated)
811                         prune->sap = SeqAlignSetFree(prune->sap);
812                 prune->sap = NULL;
813                 prune->allocated = FALSE;
814                 prune->original_number = 0;
815                 prune->number = 0;
816         }
817 
818         prune->original_number = GetSeqAlignCount(sap);
819 
820         if (prune->original_number < number)
821         {
822                 prune->number = prune->original_number;
823                 prune->sap = sap;
824                 prune->allocated = FALSE;
825         }
826         else
827         {
828                 prune->sap = GetPrivateSeqAlign(sap, number, &(prune->number));
829                 prune->allocated = TRUE;
830         }
831 
832         return prune;
833 }
834 
835 BlastPruneSapStructPtr LIBCALL
836 BlastPruneSapStructDestruct(BlastPruneSapStructPtr prune)
837 
838 {
839         if (prune == NULL)
840                 return NULL;
841 
842         if (prune->allocated)
843         {
844                 prune->sap = SeqAlignSetFree(prune->sap);
845         }
846         prune = MemFree(prune);
847 
848         return prune;
849 }
850 
851 
852 void PrintTabularOutputHeader(CharPtr blast_database, BioseqPtr query_bsp,
853                               SeqLocPtr query_slp, CharPtr blast_program,
854                               Int4 iteration, Boolean believe_query,
855                               FILE *outfp)
856 {
857    Char buffer[BUFFER_LENGTH+1];
858    Boolean unlock_bioseq = FALSE;
859 
860    asn2ff_set_output(outfp, NULL);
861    
862    ff_StartPrint(0, 0, BUFFER_LENGTH, NULL);
863 
864    if (blast_program) {
865       CharPtr program = StringSave(blast_program);
866       Nlm_StrUpper(program);
867       sprintf(buffer, "# %s %s [%s]", program, BlastGetVersionNumber(),
868               BlastGetReleaseDate());
869       MemFree(program);
870       ff_AddString(buffer);
871       NewContLine();
872    }
873 
874    if (iteration > 0) {
875       ff_AddString("# Iteration: ");
876       ff_AddString(Ltostr((long) iteration, 1));
877       NewContLine();
878    }
879 
880    if (query_bsp || query_slp) {
881       CharPtr title;
882       const CharPtr str = "# Query: ";
883       Int4 string_length = StrLen(str);
884 
885       ff_AddString(str);
886 
887       if (!query_bsp) {
888          Int4 num_queries = ValNodeLen(query_slp);
889          if (num_queries > 1) {
890             /* Multiple queries: just print the number, without deflines. */
891             sprintf(buffer, "%ld sequences", (long)num_queries);
892             ff_AddString(buffer);
893          } else {
894             query_bsp = BioseqLockById(SeqLocId(query_slp));
895             unlock_bioseq = TRUE;
896          }
897       }
898       if (query_bsp) {
899          if (query_bsp->id && believe_query) {
900             SeqIdWrite(query_bsp->id, buffer, PRINTID_FASTA_LONG, 
901                        BUFFER_LENGTH);
902             if (StringNCmp(buffer, "lcl|", 4) == 0) {
903                ff_AddString(buffer+4);
904             } else {
905                ff_AddString(buffer);
906             }
907             string_length += StrLen(buffer);
908             ff_AddChar(' ');
909             string_length++; /* to account for the space above. */
910          }
911 
912          if ((title = BioseqGetTitle(query_bsp)) != NULL) { 
913             /* We do this to keep the entire title on one line 
914                (of length BUFFER_LENGTH). */
915             StrNCpy(buffer, title, BUFFER_LENGTH - string_length);
916             buffer[BUFFER_LENGTH - string_length] = NULLB;
917             ff_AddString(buffer);
918          }
919 
920          if (unlock_bioseq)
921             BioseqUnlock(query_bsp);
922       }
923       NewContLine();
924    }
925    if (blast_database) {
926       ff_AddString("# Database: ");
927       ff_AddString(blast_database);
928       NewContLine();
929    }
930    if (getenv("PRINT_SEQUENCES")) {
931          ff_AddString("# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score, query seq., subject seq.");
932    } else {
933          ff_AddString("# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score");
934    }
935 
936    ff_EndPrint();
937 }
938 
939 static Int4
940 BlastBioseqGetNumIdentical(BioseqPtr q_bsp, BioseqPtr s_bsp, Int4 q_start,
941                      Int4 s_start, Int4 length,
942                      Uint1 q_strand, Uint1 s_strand)
943 {
944    SeqLocPtr q_slp, s_slp;
945    SeqPortPtr q_spp, s_spp;
946    Int4 i, ident = 0;
947    Uint1 q_res, s_res;
948 
949    if (!q_bsp || !s_bsp)
950       return 0;
951 
952    q_slp = SeqLocIntNew(q_start, q_start+length-1, q_strand, q_bsp->id);
953    s_slp = SeqLocIntNew(s_start, s_start+length-1, s_strand, s_bsp->id);
954    if (ISA_na(q_bsp->mol))
955       q_spp = SeqPortNewByLoc(q_slp, Seq_code_ncbi4na);
956    else
957       q_spp = SeqPortNewByLoc(q_slp, Seq_code_ncbistdaa);
958    if (ISA_na(s_bsp->mol))
959       s_spp = SeqPortNewByLoc(s_slp, Seq_code_ncbi4na);
960    else
961       s_spp = SeqPortNewByLoc(s_slp, Seq_code_ncbistdaa);
962 
963    for (i=0; i<length; i++) {
964       while ((q_res = SeqPortGetResidue(q_spp)) != SEQPORT_EOF &&
965              !IS_residue(q_res));
966       while ((s_res = SeqPortGetResidue(s_spp)) != SEQPORT_EOF &&
967              !IS_residue(s_res));
968       if (q_res == SEQPORT_EOF || s_res == SEQPORT_EOF)
969          break;
970       else if (q_res == s_res)
971          ident++;
972    }
973 
974    SeqLocFree(q_slp);
975    SeqLocFree(s_slp);
976    SeqPortFree(q_spp);
977    SeqPortFree(s_spp);
978 
979    return ident;
980 }
981 /* 
982    Function to print results in tab-delimited format, given a SeqAlign list.
983    q_shift and s_shift are the offsets in query and subject in case of a
984    subsequence search 
985 */
986 void BlastPrintTabulatedResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
987                                 SeqLocPtr query_slp, Int4 num_alignments, 
988                                 CharPtr blast_program, Boolean is_ungapped, 
989                                 Boolean believe_query, Int4 q_shift, 
990                                 Int4 s_shift, FILE *fp,
991                                 Boolean print_query_info)
992 {
993    BlastPrintTabulatedResultsEx(seqalign, query_bsp, query_slp, num_alignments,
994                                 blast_program, is_ungapped, believe_query,
995                                 q_shift, s_shift, fp, NULL, print_query_info);
996 }
997 
998 void BlastPrintTabulatedResultsEx(SeqAlignPtr seqalign, BioseqPtr query_bsp,
999                                 SeqLocPtr query_slp, Int4 num_alignments, 
1000                                 CharPtr blast_program, Boolean is_ungapped, 
1001                                 Boolean believe_query, Int4 q_shift, 
1002                                 Int4 s_shift, FILE *fp, 
1003                                 int *num_formatted, Boolean print_query_info)
1004 {
1005    BlastPrintTabularResults(seqalign, query_bsp, query_slp, num_alignments,
1006       blast_program, is_ungapped, FALSE, believe_query,
1007       q_shift, s_shift, fp, num_formatted, print_query_info);
1008 }
1009 
1010 void BlastPrintTabularResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
1011         SeqLocPtr query_slp, Int4 num_alignments, CharPtr blast_program, 
1012         Boolean is_ungapped, Boolean is_ooframe, Boolean believe_query, 
1013         Int4 q_shift, Int4 s_shift, FILE *fp, int *num_formatted, 
1014         Boolean print_query_info)
1015 {
1016    SeqAlignPtr sap, sap_tmp = NULL;
1017    FloatHi perc_ident, bit_score, evalue;
1018    Int4 numseg, num_gap_opens, num_mismatches, num_ident, score;
1019    Int4 number, align_length, index, i, j;
1020    Int4 q_start, q_end, s_start, s_end;
1021    Char bit_score_buff[10];
1022    CharPtr eval_buff;
1023    Boolean is_translated;
1024    SeqIdPtr query_id, old_query_id = NULL, subject_id, old_subject_id = NULL;
1025    BioseqPtr subject_bsp=NULL;
1026    Char query_buffer[BUFFER_LENGTH+1], subject_buffer[BUFFER_LENGTH+1];
1027    DenseSegPtr dsp;
1028    StdSegPtr ssp = NULL;
1029    DenseDiagPtr ddp = NULL;
1030    AlignSumPtr asp = NULL;
1031    CharPtr defline, title;
1032    SeqLocPtr slp;
1033    Int4 alignments_count;
1034    Int4 objmgr_count = 0;
1035 
1036    is_translated = (StringCmp(blast_program, "blastn") &&
1037                     StringCmp(blast_program, "blastp"));
1038    
1039    if (is_translated) {
1040       asp = MemNew(sizeof(AlignSum));
1041       asp->matrix = load_default_matrix();
1042       asp->is_aa = TRUE;
1043       asp->ooframe = is_ooframe;
1044    }
1045 
1046    if (is_ungapped)
1047       sap_tmp = SeqAlignNew();
1048 
1049    slp = query_slp;
1050    if (query_bsp)
1051       query_id = query_bsp->id;
1052 
1053    /* Evalue buffer is dynamically allocated to avoid compiler warnings 
1054       in calls to ScoreAndEvalueToBuffers. */
1055    eval_buff = Malloc(10);
1056 
1057    for (sap = seqalign; sap; sap = sap->next) {
1058       if (query_slp)
1059          query_id = TxGetQueryIdFromSeqAlign(sap);
1060       if (SeqIdComp(query_id, old_query_id) != SIC_YES) {
1061          if (old_query_id && num_formatted)
1062             (*num_formatted)++;
1063          alignments_count = num_alignments;
1064          /* New query: find the corresponding SeqLoc */
1065          while (slp && SeqIdComp(query_id, SeqLocId(slp)) != SIC_YES)
1066             slp = slp->next;
1067          if (slp != NULL) {
1068             query_id = old_query_id = SeqLocId(slp);
1069             /* Print new query information */
1070             if (print_query_info)
1071                PrintTabularOutputHeader(NULL, NULL, slp, NULL, 0, 
1072                                         believe_query, fp);
1073          } else if (query_bsp)
1074             old_query_id = query_bsp->id;
1075          defline = (CharPtr) Malloc(BUFFER_LENGTH+1);
1076          SeqIdWrite(query_id, defline, PRINTID_FASTA_LONG, BUFFER_LENGTH);
1077          if (StringNCmp(defline, "lcl|", 4))
1078             StringCpy(query_buffer, defline);
1079          else if (!believe_query) {
1080             if (slp) {
1081                BioseqUnlock(query_bsp);
1082                query_bsp = BioseqLockById(query_id);
1083             }
1084             if ((title = StringSave(BioseqGetTitle(query_bsp))) != NULL) {
1085                defline = MemFree(defline);
1086                defline = StringTokMT(title, " ", &title);
1087                StringNCpy_0(query_buffer, defline, BUFFER_LENGTH);
1088                defline = MemFree(defline);
1089             } else
1090                StringCpy(query_buffer, defline+4);
1091             defline = MemFree(defline);
1092          } else
1093             StringCpy(query_buffer, defline+4);
1094       } else
1095          query_id = old_query_id;      
1096 
1097       subject_id = TxGetSubjectIdFromSeqAlign(sap);
1098 
1099       if (SeqIdComp(subject_id, old_subject_id) != SIC_YES) {
1100          /* New subject sequence has been found in the seqalign list */
1101          if (--alignments_count < 0)
1102             continue;
1103          BioseqUnlock(subject_bsp);
1104 
1105          /* object manager cache is limited in size */
1106          if (++objmgr_count > 8000) {
1107             objmgr_count = 0;
1108             ObjMgrFreeCache(OBJ_MAX);
1109          }
1110 
1111          subject_bsp = BioseqLockById(subject_id);
1112       
1113          if (!subject_bsp || !subject_bsp->id)
1114             continue;
1115          if (subject_bsp->id->choice != SEQID_GENERAL ||
1116              StringCmp(((DbtagPtr)subject_id->data.ptrvalue)->db, "BL_ORD_ID")) {
1117             SeqIdPtr use_this_gi_id = GetUseThisGi(sap); 
1118             defline = (CharPtr) Malloc(BUFFER_LENGTH+1);
1119             if (use_this_gi_id) {
1120                 BlastDefLinePtr bdlp, actual_bdlp;
1121                 bdlp=FDGetDeflineAsnFromBioseq(subject_bsp);
1122                 actual_bdlp=getBlastDefLineForSeqId(bdlp, use_this_gi_id);
1123                 
1124                 SeqIdWrite(actual_bdlp->seqid, defline, PRINTID_FASTA_LONG, BUFFER_LENGTH);
1125                 BlastDefLineSetFree(bdlp);
1126             } else {
1127                 SeqIdWrite(subject_bsp->id, defline, PRINTID_FASTA_LONG, BUFFER_LENGTH);
1128             }
1129             if (StringNCmp(defline, "lcl|", 4))
1130                StringCpy(subject_buffer, defline);
1131             else
1132                StringCpy(subject_buffer, defline+4);
1133          } else {
1134             defline = StringSave(BioseqGetTitle(subject_bsp));
1135             defline = StringTokMT(defline, " \t", &title);
1136             StringCpy(subject_buffer, defline);
1137          }
1138          defline = MemFree(defline);
1139       }
1140       
1141       perc_ident = 0;
1142       align_length = 0;
1143       num_gap_opens = 0;
1144       num_mismatches = 0;
1145 
1146       GetScoreAndEvalue(sap, &score, &bit_score, &evalue, &number);
1147 
1148       /* Do not allow knocking off digit in evalue buffer, so parsers are 
1149          not confused. */
1150       ScoreAndEvalueToBuffers(bit_score, evalue, 
1151                               bit_score_buff, &eval_buff, 0);
1152 
1153       /* Loop on segments within this seqalign (in ungapped case) */
1154       while (TRUE) {
1155          if (sap->segtype == SAS_DENSEG) {
1156             Boolean get_num_ident = TRUE;
1157             dsp = (DenseSegPtr) sap->segs;
1158             numseg = dsp->numseg;
1159             /* Query Bioseq is needed for calculating number of identities.
1160                NB: even if number of identities is already filled in the 
1161                seqalign score list, that is not enough here, because we need to
1162                know number of identities in each segment in order to calculate
1163                number of mismatches correctly. */
1164             if (!query_bsp) {
1165                query_bsp = BioseqLockById(query_id);
1166             }
1167 
1168             for (i=0; i<numseg; i++) {
1169                align_length += dsp->lens[i];
1170                if (dsp->starts[2*i] != -1 && dsp->starts[2*i+1] != -1) {
1171                   if (get_num_ident) {
1172                      num_ident = BlastBioseqGetNumIdentical(query_bsp, subject_bsp, 
1173                                     dsp->starts[2*i], dsp->starts[2*i+1], 
1174                                     dsp->lens[i], dsp->strands[2*i], 
1175                                     dsp->strands[2*i+1]);
1176                      perc_ident += num_ident;
1177                      num_mismatches += dsp->lens[i] - num_ident;
1178                   }
1179                } else {
1180                   num_gap_opens++;
1181                }
1182             }
1183             perc_ident = perc_ident / align_length * 100;
1184 
1185             /* compute half the sequence offsets (account for
1186                leading gaps in the alignment) */
1187             if (dsp->starts[0] == -1) {
1188                 i = 1; j = 0;
1189             }
1190             else if (dsp->starts[1] == -1) {
1191                 i = 0; j = 1;
1192             }
1193             else {
1194                 i = j = 0;
1195             }
1196             if (dsp->strands[0] != dsp->strands[1]) {
1197                q_end = dsp->starts[2*i] + dsp->lens[i];
1198                s_end = dsp->starts[2*j+1] + 1;
1199             } else {
1200                q_start = dsp->starts[2*i] + 1;
1201                s_start = dsp->starts[2*j+1] + 1;
1202             }
1203 
1204             /* compute half the sequence offsets (account for
1205                trailing gaps in the alignment) */
1206             if (dsp->starts[2*numseg-2] == -1) {
1207                 i = numseg-1; j = numseg;
1208             }
1209             else if (dsp->starts[2*numseg-1] == -1) {
1210                 i = numseg; j = numseg-1;
1211             }
1212             else {
1213                 i = j = numseg;
1214             }
1215             if (dsp->strands[0] != dsp->strands[1]) {
1216                q_start = dsp->starts[2*i-2] + 1;
1217                s_start = dsp->starts[2*j-1] + dsp->lens[j-1];
1218             } else {
1219                q_end = dsp->starts[2*i-2] + dsp->lens[i-1];
1220                s_end = dsp->starts[2*j-1] + dsp->lens[j-1];
1221             }
1222 
1223          } else if (sap->segtype == SAS_STD) {
1224             if (!ssp)
1225                ssp = (StdSegPtr) sap->segs;
1226             
1227             if (is_ungapped) {
1228                sap_tmp->segtype = SAS_STD;
1229                sap_tmp->segs = ssp;
1230                GetScoreAndEvalue(sap_tmp, &score, &bit_score, &evalue, &number);
1231                ScoreAndEvalueToBuffers(bit_score, evalue, 
1232                                        bit_score_buff, &eval_buff, 0);
1233                find_score_in_align(sap_tmp, 1, asp);
1234             } else
1235                find_score_in_align(sap, 1, asp);
1236             
1237             if (asp->m_frame < 0)
1238                q_start = SeqLocStop(ssp->loc) + 1;
1239             else
1240                q_start = SeqLocStart(ssp->loc) + 1;
1241             
1242             if (asp->t_frame < 0)
1243                s_start = SeqLocStop(ssp->loc->next) + 1;
1244             else
1245                s_start = SeqLocStart(ssp->loc->next) + 1;
1246             
1247             if (!is_ungapped) {
1248                for (index=1; ssp->next; index++)
1249                   ssp = ssp->next;
1250                num_gap_opens = index / 2;
1251             } else 
1252                num_gap_opens = 0;
1253 
1254             if (asp->m_frame < 0)
1255                q_end = SeqLocStart(ssp->loc) + 1;
1256             else
1257                q_end = SeqLocStop(ssp->loc) + 1;
1258             
1259             if (asp->t_frame < 0)
1260                s_end = SeqLocStart(ssp->loc->next) + 1;
1261             else
1262                s_end = SeqLocStop(ssp->loc->next) + 1;
1263             
1264             align_length = asp->totlen;
1265             num_mismatches = asp->totlen - asp->gaps - asp->identical;
1266             perc_ident = ((FloatHi) 100*asp->identical)/ (asp->totlen);
1267          } else if (sap->segtype == SAS_DENDIAG) {
1268             if (!ddp)
1269                ddp = (DenseDiagPtr) sap->segs;
1270             sap_tmp->segtype = SAS_DENDIAG;
1271             sap_tmp->segs = ddp;
1272             GetScoreAndEvalue(sap_tmp, &score, &bit_score, &evalue, &number);
1273             ScoreAndEvalueToBuffers(bit_score, evalue, 
1274                                     bit_score_buff, &eval_buff, 0);
1275 
1276             align_length = ddp->len;
1277             /*always show plus strand for query*/
1278             if (ddp->strands[0] == Seq_strand_minus &&
1279                 ddp->strands[1] == Seq_strand_plus) { 
1280                 ddp->strands[0] = Seq_strand_plus;
1281                 ddp->strands[1] = Seq_strand_minus;   
1282             }
1283             if (ddp->strands[0] == Seq_strand_minus) {
1284                q_start = ddp->starts[0] + align_length;
1285                q_end = ddp->starts[0] + 1;
1286             } else {
1287                q_start = ddp->starts[0] + 1;
1288                q_end = ddp->starts[0] + align_length;
1289             }
1290 
1291             if (ddp->strands[1] == Seq_strand_minus) {
1292                s_start = ddp->starts[1] + align_length;
1293                s_end = ddp->starts[1] + 1;
1294             } else {
1295                s_start = ddp->starts[1] + 1;
1296                s_end = ddp->starts[1] + align_length;
1297             }
1298             num_gap_opens = 0;
1299             /* Query Bioseq is needed for calculating number of identities.
1300                NB: even if number of identities is already filled in the 
1301                seqalign score list, that is not enough here, because we need to
1302                know number of identities in each segment in order to calculate
1303                number of mismatches correctly. */
1304             if (!query_bsp) {
1305                query_bsp = BioseqLockById(query_id);
1306             }
1307 
1308             num_ident = BlastBioseqGetNumIdentical(query_bsp, subject_bsp, 
1309                            ddp->starts[0], ddp->starts[1], align_length, 
1310                            ddp->strands[0], ddp->strands[1]);
1311             num_mismatches = align_length - num_ident;
1312             perc_ident = ((FloatHi)num_ident) / align_length * 100;
1313          }
1314          if (!is_translated) {
1315             /* Adjust coordinates if query and/or subject is a subsequence */
1316             q_start += q_shift;
1317             q_end += q_shift;
1318             s_start += s_shift;
1319             s_end += s_shift;
1320          }
1321          
1322          if (perc_ident >= 99.995 && perc_ident < 100.00)
1323             perc_ident = 99.99;
1324          
1325          fprintf(fp, 
1326                  "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%s\n",
1327                  query_buffer, subject_buffer, perc_ident, align_length, 
1328                  num_mismatches, num_gap_opens, q_start, 
1329                  q_end, s_start, s_end, eval_buff, bit_score_buff);
1330          old_subject_id = subject_id;
1331          if (sap->segtype == SAS_DENSEG)
1332             break;
1333          else if (sap->segtype == SAS_DENDIAG) {
1334             if ((ddp = ddp->next) == NULL)
1335                break;
1336          } else if (sap->segtype == SAS_STD) {
1337             if ((ssp = ssp->next) == NULL)
1338                break;
1339          }
1340       }
1341    }
1342 
1343    eval_buff = MemFree(eval_buff);
1344 
1345    if (is_ungapped)
1346       sap_tmp = MemFree(sap_tmp);
1347 
1348    if (is_translated) {
1349       free_default_matrix(asp->matrix);
1350       MemFree(asp);
1351    }
1352 
1353    BioseqUnlock(subject_bsp);
1354    if (query_slp)
1355       BioseqUnlock(query_bsp);
1356 }
1357 
1358 
1359 
1360 /* Mutex for assignment of db seqs to search. */
1361 TNlmMutex err_message_mutex=NULL;
1362 
1363 #define BLAST_ERROR_BULEN 50
1364 /*
1365         The following functions fill a the Error user string with
1366         text to identify BLAST and the entry being worked on.
1367         The SeqIdPtr is used to make a FASTA id, which is appended
1368         to string.
1369 
1370         A Uint1 is returned, which allows Nlm_ErrUserDelete to delete
1371         this error string when it's done.
1372 */
1373 
1374 Uint1
1375 BlastSetUserErrorString(CharPtr string, SeqIdPtr sip, Boolean use_id)
1376 
1377 {
1378         BioseqPtr bsp;
1379         Char buffer[2*BLAST_ERROR_BULEN+1], textid[BLAST_ERROR_BULEN+1];
1380         CharPtr buf_start, ptr, title;
1381         Int2 length=0, index;
1382         Uint1 retval=0;
1383 
1384         buffer[0] = NULLB;
1385         ptr = buf_start = &buffer[0];
1386 
1387         if (string)
1388                 StringNCpy_0(ptr, string, BLAST_ERROR_BULEN);
1389 
1390         if (sip != NULL)
1391         {
1392             bsp = BioseqLockById(sip);
1393             if(bsp)
1394             {
1395                 if (use_id)
1396                         sip = bsp->id;
1397                 else
1398                         title = BioseqGetTitle(bsp);
1399             }
1400 
1401             if (string)
1402             {
1403                 length = StringLen(string);
1404                 if (length > BLAST_ERROR_BULEN)
1405                         length = BLAST_ERROR_BULEN;
1406             }
1407 
1408             ptr += length;
1409 
1410             if (use_id)
1411             {
1412                 SeqIdWrite(sip, textid, PRINTID_FASTA_LONG, BLAST_ERROR_BULEN-1);
1413                 StringNCpy_0(ptr, textid, BLAST_ERROR_BULEN-1);
1414             }
1415             else if (title)
1416             {
1417                 for (index=0; index<BLAST_ERROR_BULEN-1; index++)
1418                 {
1419                         if (title[index] == NULLB || title[index] == ' ')
1420                         {
1421                                 break;
1422                         }
1423                         *ptr = title[index];
1424                         ptr++;
1425                 }
1426                 *ptr = NULLB;
1427             }
1428             BioseqUnlock(bsp);
1429             StringCpy(ptr+StringLen(ptr), ":");
1430         }
1431         NlmMutexLockEx(&err_message_mutex);
1432         retval = Nlm_ErrUserInstall (buf_start, 0);
1433         NlmMutexUnlock(err_message_mutex);
1434 
1435         return retval;
1436 }
1437 
1438 void
1439 BlastDeleteUserErrorString(Uint1 err_id)
1440 
1441 {
1442         NlmMutexLockEx(&err_message_mutex);
1443         Nlm_ErrUserDelete(err_id);
1444         NlmMutexUnlock(err_message_mutex);
1445         return;
1446 }
1447 
1448 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.