|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/tools/blfmtutl.c |
source navigation diff markup identifier search freetext search file search |
1 static char const rcsid[] = "$Id: blfmtutl.c,v 1.39 2009/09/25 17:41:31 coulouri Exp $";
2
3 /* ===========================================================================
4 *
5 * PUBLIC DOMAIN NOTICE
6 * National Center for Biotechnology Information
7 *
8 * This software/database is a "United States Government Work" under the
9 * terms of the United States Copyright Act. It was written as part of
10 * the author's official duties as a United States Government employee and
11 * thus cannot be copyrighted. This software/database is freely available
12 * to the public for use. The National Library of Medicine and the U.S.
13 * Government have not placed any restriction on its use or reproduction.
14 *
15 * Although all reasonable efforts have been taken to ensure the accuracy
16 * and reliability of the software and data, the NLM and the U.S.
17 * Government do not and cannot warrant the performance or results that
18 * may be obtained by using this software or data. The NLM and the U.S.
19 * Government disclaim all warranties, express or implied, including
20 * warranties of performance, merchantability or fitness for any particular
21 * purpose.
22 *
23 * Please cite the author in any work or product based on this material.
24 *
25 * ===========================================================================*/
26
27 /*****************************************************************************
28
29 File name: blfmtutl.c
30
31 Author: Tom Madden
32
33 Contents: Utilities for BLAST formatting
34
35 ******************************************************************************/
36 /*
37 * $Revision:
38 * $Log: blfmtutl.c,v $
39 * Revision 1.39 2009/09/25 17:41:31 coulouri
40 * bump to 2.2.22
41 *
42 * Revision 1.38 2009/06/12 18:34:11 coulouri
43 * bump version
44 *
45 * Revision 1.37 2009/02/02 21:37:55 camacho
46 * Bump version in preparation of upcoming release. JIRA SB-166.
47 *
48 * Revision 1.36 2008/10/29 16:51:03 coulouri
49 * bump version
50 *
51 * Revision 1.35 2008/02/29 20:07:33 coulouri
52 * bump date
53 *
54 * Revision 1.34 2008/02/15 21:35:46 coulouri
55 * bump date
56 *
57 * Revision 1.33 2008/01/28 16:34:13 coulouri
58 * bump date
59 *
60 * Revision 1.32 2008/01/25 16:19:22 coulouri
61 * bump date
62 *
63 * Revision 1.31 2007/08/23 19:45:47 coulouri
64 * bump date
65 *
66 * Revision 1.30 2007/08/17 12:42:52 coulouri
67 * bump date
68 *
69 * Revision 1.29 2007/06/24 13:26:54 coulouri
70 * bump version
71 *
72 * Revision 1.28 2007/06/14 17:58:15 papadopo
73 * allow alignments in denseg form to contain leading or trailing gaps
74 *
75 * Revision 1.27 2007/06/04 20:26:18 papadopo
76 * limit the number of subject sequences cached in the obje manager during tabular output
77 *
78 * Revision 1.26 2007/03/23 19:24:41 coulouri
79 * bump release date
80 *
81 * Revision 1.25 2007/03/15 14:29:04 coulouri
82 * bump release date
83 *
84 * Revision 1.24 2007/03/02 15:53:13 coulouri
85 * prepare for March 11th C toolkit release
86 *
87 * Revision 1.23 2006/10/12 19:51:32 coulouri
88 * bump release date
89 *
90 * Revision 1.22 2006/09/23 23:52:12 coulouri
91 * bump version/date for release
92 *
93 * Revision 1.21 2006/05/31 17:17:44 jianye
94 * always show plus strand for query for dendiag tabular
95 *
96 * Revision 1.20 2006/05/27 13:58:07 ucko
97 * Move use_this_gi_id's declaration *above* other statements in the
98 * block per C89.
99 *
100 * Revision 1.19 2006/05/25 16:38:07 jianye
101 * use use_this_gi seqid for subject if present
102 *
103 * Revision 1.18 2006/05/05 13:43:28 coulouri
104 * bump date
105 *
106 * Revision 1.17 2006/04/26 12:42:36 madden
107 * BlastSetUserErrorString and BlastDeleteUserErrorString moved from blastool.c to blfmtutl.c
108 *
109 * Revision 1.16 2006/04/07 19:46:59 coulouri
110 * correction to previous commit
111 *
112 * Revision 1.15 2006/04/07 18:38:19 coulouri
113 * bump version
114 *
115 * Revision 1.14 2006/01/24 18:38:47 papadopo
116 * from Mike Gertz: Fixed a typo in a name in a format string: Aravaind -> Aravind
117 *
118 * Revision 1.13 2005/12/29 19:55:04 madden
119 * Added functions to print tabular output
120 *
121 * Revision 1.12 2005/11/22 13:44:24 coulouri
122 * bump version
123 *
124 * Revision 1.11 2005/10/17 12:47:30 camacho
125 * From Alejandro Schaffer: Updated reference for compositional adjustment
126 *
127 * Revision 1.10 2005/08/05 12:10:48 coulouri
128 * bump version
129 *
130 * Revision 1.9 2005/07/25 12:48:39 camacho
131 * Updated reference for compositional adjustment
132 *
133 * Revision 1.8 2005/06/05 02:54:41 coulouri
134 * bump date
135 *
136 * Revision 1.7 2005/05/20 15:28:03 coulouri
137 * bump date
138 *
139 * Revision 1.6 2005/05/16 17:42:19 papadopo
140 * From Alejandro Schaffer: Print references for composition-based statistics
141 * and for compositional score matrix adjustment, if either method was used.
142 *
143 * Revision 1.5 2005/05/08 13:32:52 coulouri
144 * bump version to 2.2.11
145 *
146 * Revision 1.4 2004/10/19 15:28:59 coulouri
147 * bump version and date
148 *
149 * Revision 1.3 2004/10/04 17:54:14 madden
150 * BlastPrintVersionInfo[Ex] now takes const char* as arg for program
151 *
152 * Revision 1.2 2004/07/22 15:18:45 jianye
153 * correct blast paper url
154 *
155 * Revision 1.1 2004/06/30 12:31:15 madden
156 * Structures and prototypes for blast formatting utilities
157 *
158 */
159
160 #include <ncbi.h>
161 #include <objcode.h>
162 #include <objseq.h>
163 #include <sequtil.h>
164 #include <readdb.h>
165 #include <ncbithr.h>
166 #include <txalign.h>
167 #include <blfmtutl.h>
168 #include <jzcoll.h>
169
170 /* the version of BLAST. */
171 #define BLAST_ENGINE_VERSION "2.2.22"
172 #define BLAST_RELEASE_DATE "Sep-27-2009"
173
174 #define BUFFER_LENGTH 255
175
176 /*
177 adds the new string to the buffer, separating by a tilde.
178 Checks the size of the buffer for FormatBlastParameters and
179 allocates longer replacement if needed.
180 */
181
182 Boolean LIBCALL
183 add_string_to_bufferEx(CharPtr buffer, CharPtr *old, Int2Ptr old_length, Boolean add_tilde)
184
185 {
186 CharPtr new, ptr;
187 Int2 length, new_length;
188
189 length = (StringLen(*old));
190
191 if((StringLen(buffer)+length+3) > *old_length)
192 {
193 new_length = *old_length + 255;
194 new = MemNew(new_length*sizeof(Char));
195 if (*old_length > 0 && *old != NULL)
196 {
197 MemCpy(new, *old, *old_length);
198 *old = MemFree(*old);
199 }
200 *old = new;
201 *old_length = new_length;
202 }
203
204 ptr = *old;
205 ptr += length;
206 if (add_tilde)
207 {
208 *ptr = '~';
209 ptr++;
210 }
211
212 while (*buffer != NULLB)
213 {
214 *ptr = *buffer;
215 buffer++; ptr++;
216 }
217
218 return TRUE;
219 }
220
221 Boolean LIBCALL
222 add_string_to_buffer(CharPtr buffer, CharPtr *old, Int2Ptr old_length)
223
224 {
225 return add_string_to_bufferEx(buffer, old, old_length, TRUE);
226 }
227
228 /*
229 Print the buffer, adding newlines where tildes are found.
230 */
231
232 Boolean LIBCALL
233 PrintTildeSepLines(CharPtr buffer, Int4 line_length, FILE *outfp)
234
235 {
236 if (outfp == NULL || buffer == NULL)
237 return FALSE;
238
239 asn2ff_set_output(outfp, NULL);
240
241 ff_StartPrint(0, 0, line_length, NULL);
242 while (*buffer != NULLB)
243 {
244 if (*buffer != '~')
245 ff_AddChar(*buffer);
246 else
247 NewContLine();
248 buffer++;
249 }
250 ff_EndPrint();
251
252 return TRUE;
253 }
254
255 /*
256 Print the Karlin-Altschul parameters.
257
258 if gapped is TRUE, then slightly different formatting is used.
259 */
260
261 Boolean LIBCALL
262 PrintKAParameters(Nlm_FloatHi Lambda, Nlm_FloatHi K, Nlm_FloatHi H, Int4 line_length, FILE *outfp, Boolean gapped)
263
264 {
265 return PrintKAParametersExtra(Lambda, K, H, 0.0, line_length, outfp, gapped);
266 }
267
268 Boolean LIBCALL
269 PrintKAParametersExtra(Nlm_FloatHi Lambda, Nlm_FloatHi K, Nlm_FloatHi H, Nlm_FloatHi C, Int4 line_length, FILE *outfp, Boolean gapped)
270
271 {
272 Char buffer[BUFFER_LENGTH];
273
274 if (outfp == NULL)
275 return FALSE;
276
277 asn2ff_set_output(outfp, NULL);
278
279 ff_StartPrint(0, 0, line_length, NULL);
280 if (gapped)
281 {
282 ff_AddString("Gapped");
283 NewContLine();
284 }
285
286 if (C == 0.0)
287 ff_AddString("Lambda K H");
288 else
289 ff_AddString("Lambda K H C");
290 NewContLine();
291 sprintf(buffer, "%#8.3g ", Lambda);
292 ff_AddString(buffer);
293 sprintf(buffer, "%#8.3g ", K);
294 ff_AddString(buffer);
295 sprintf(buffer, "%#8.3g ", H);
296 ff_AddString(buffer);
297 if (C != 0.0)
298 {
299 sprintf(buffer, "%#8.3g ", C);
300 ff_AddString(buffer);
301 }
302 NewContLine();
303 ff_EndPrint();
304
305 return TRUE;
306
307 }
308
309
310 TxDfDbInfoPtr LIBCALL
311 TxDfDbInfoNew (TxDfDbInfoPtr old)
312
313 {
314 TxDfDbInfoPtr dbinfo;
315 dbinfo = MemNew(sizeof(TxDfDbInfo));
316 if (old)
317 old->next = dbinfo;
318 return dbinfo;
319 }
320
321 TxDfDbInfoPtr LIBCALL
322 TxDfDbInfoDestruct (TxDfDbInfoPtr dbinfo)
323
324 {
325 TxDfDbInfoPtr next;
326
327 if (dbinfo == NULL)
328 return NULL;
329
330 while (dbinfo)
331 {
332 dbinfo->name = MemFree(dbinfo->name);
333 dbinfo->definition = MemFree(dbinfo->definition);
334 dbinfo->date = MemFree(dbinfo->date);
335 next = dbinfo->next;
336 dbinfo = MemFree(dbinfo);
337 dbinfo = next;
338 }
339
340 return dbinfo;
341 }
342
343 Boolean LIBCALL
344 PrintDbReport(TxDfDbInfoPtr dbinfo, Int4 line_length, FILE *outfp)
345
346 {
347
348 if (dbinfo == NULL || outfp == NULL)
349 return FALSE;
350
351 asn2ff_set_output(outfp, NULL);
352
353 ff_StartPrint(2, 2, line_length, NULL);
354
355 if (dbinfo->subset == FALSE)
356 {
357 ff_AddString("Database: ");
358 ff_AddString(dbinfo->definition);
359 NewContLine();
360 ff_AddString(" Posted date: ");
361 ff_AddString(dbinfo->date);
362 NewContLine();
363 ff_AddString("Number of letters in database: ");
364 ff_AddString(Nlm_Int8tostr((Int8) dbinfo->total_length, 1));
365 NewContLine();
366 ff_AddString("Number of sequences in database: ");
367 ff_AddString(Ltostr((long) dbinfo->number_seqs, 1));
368 NewContLine();
369 }
370 else
371 {
372 ff_AddString("Subset of the database(s) listed below");
373 NewContLine();
374 ff_AddString(" Number of letters searched: ");
375 ff_AddString(Nlm_Int8tostr((Int8) dbinfo->total_length, 1));
376 NewContLine();
377 ff_AddString(" Number of sequences searched: ");
378 ff_AddString(Ltostr((long) dbinfo->number_seqs, 1));
379 NewContLine();
380 }
381 ff_EndPrint();
382
383 return TRUE;
384 }
385
386 /*
387 Prints an acknowledgement of the Blast Query, in the standard
388 BLAST format.
389 */
390
391
392 Boolean LIBCALL
393 AcknowledgeBlastQuery(BioseqPtr bsp, Int4 line_length, FILE *outfp, Boolean believe_query, Boolean html)
394
395 {
396 Char buffer[BUFFER_LENGTH];
397
398 if (bsp == NULL || outfp == NULL)
399 return FALSE;
400
401 asn2ff_set_output(outfp, NULL);
402
403 ff_StartPrint(0, 0, line_length, NULL);
404 if (html)
405 ff_AddString("<b>Query=</b> ");
406 else
407 ff_AddString("Query= ");
408 if (bsp->id && (bsp->id->choice != SEQID_LOCAL || believe_query))
409 {
410 SeqIdWrite(bsp->id, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
411 if (StringNCmp(buffer, "lcl|", 4) == 0)
412 ff_AddString(buffer+4);
413 else
414 ff_AddString(buffer);
415 ff_AddChar(' ');
416 }
417 ff_AddString(BioseqGetTitle(bsp));
418 NewContLine();
419 TabToColumn(10);
420 ff_AddChar('(');
421 ff_AddString(Ltostr((long) BioseqGetLen(bsp), 1));
422 ff_AddString(" letters)");
423 NewContLine();
424 ff_EndPrint();
425
426 return TRUE;
427 }
428
429 /*
430 return the version of BLAST as a char. string.
431 */
432 CharPtr LIBCALL
433 BlastGetReleaseDate (void)
434
435 {
436 return BLAST_RELEASE_DATE;
437 }
438
439
440 /*
441 return the version of BLAST as a char. string.
442 */
443 CharPtr LIBCALL
444 BlastGetVersionNumber (void)
445
446 {
447 return BLAST_ENGINE_VERSION;
448 }
449
450 Boolean BlastPrintVersionInfo (const char* program, Boolean html, FILE *outfp)
451
452 {
453 return BlastPrintVersionInfoEx(program, html, BlastGetVersionNumber(), BlastGetReleaseDate(), outfp);
454 }
455
456 Boolean BlastPrintVersionInfoEx (const char* program, Boolean html, CharPtr version, CharPtr date, FILE *outfp)
457
458 {
459 CharPtr ret_buffer;
460
461
462 if (outfp == NULL)
463 return FALSE;
464
465 ret_buffer = StringSave(program);
466 Nlm_StrUpper(ret_buffer);
467 if (html)
468 fprintf(outfp, "<b>%s %s [%s]</b>\n", ret_buffer, version, date);
469 else
470 fprintf(outfp, "%s %s [%s]\n", ret_buffer, version, date);
471 ret_buffer = MemFree(ret_buffer);
472
473 return TRUE;
474 }
475
476 /*
477 Returns a reference for the header.
478 The newlines are represented by tildes, use PrintTildeSepLines
479 to print this.
480 */
481
482 CharPtr LIBCALL
483 BlastGetReference(Boolean html)
484
485 {
486 CharPtr ret_buffer;
487 Int2 ret_buffer_length;
488
489 ret_buffer = NULL;
490 ret_buffer_length = 0;
491
492
493 if (html) {
494 add_string_to_bufferEx("<b><a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids=9254694&dopt=Citation\">Reference</a>:</b>", &ret_buffer, &ret_buffer_length, TRUE);
495 add_string_to_bufferEx("Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schäffer, ", &ret_buffer, &ret_buffer_length, TRUE);
496 } else
497 add_string_to_bufferEx("Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ", &ret_buffer, &ret_buffer_length, TRUE);
498 add_string_to_bufferEx("Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ", &ret_buffer, &ret_buffer_length, TRUE);
499 add_string_to_bufferEx("\"Gapped BLAST and PSI-BLAST: a new generation of protein database search", &ret_buffer, &ret_buffer_length, TRUE);
500 add_string_to_bufferEx("programs\", Nucleic Acids Res. 25:3389-3402.", &ret_buffer, &ret_buffer_length, TRUE);
501
502 return ret_buffer;
503 }
504
505 Boolean LIBCALL
506 MegaBlastPrintReference(Boolean html, Int4 line_length, FILE *outfp)
507
508 {
509 CharPtr ret_buffer;
510 Int2 ret_buffer_length;
511
512 ret_buffer = NULL;
513 ret_buffer_length = 0;
514
515 if (outfp == NULL)
516 return FALSE;
517
518 if (html) {
519 add_string_to_bufferEx("<b><a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids=10890397&dopt=Citation\">Reference</a>:</b>", &ret_buffer, &ret_buffer_length, TRUE);
520 add_string_to_bufferEx("Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000),", &ret_buffer, &ret_buffer_length, TRUE);
521 } else
522 add_string_to_bufferEx("Reference: Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), ", &ret_buffer, &ret_buffer_length, TRUE);
523 add_string_to_bufferEx("\"A greedy algorithm for aligning DNA sequences\", ",
524 &ret_buffer, &ret_buffer_length, TRUE);
525 add_string_to_bufferEx("J Comput Biol 2000; 7(1-2):203-14.",
526 &ret_buffer, &ret_buffer_length, TRUE);
527
528 PrintTildeSepLines(ret_buffer, line_length, outfp);
529 ret_buffer = MemFree(ret_buffer);
530 return TRUE;
531 }
532
533 Boolean LIBCALL
534 BlastPrintReference(Boolean html, Int4 line_length, FILE *outfp)
535
536 {
537 CharPtr ret_buffer;
538
539 if (outfp == NULL)
540 return FALSE;
541
542 ret_buffer = BlastGetReference(html);
543 PrintTildeSepLines(ret_buffer, line_length, outfp);
544 ret_buffer = MemFree(ret_buffer);
545
546 return TRUE;
547 }
548
549 /*
550 Returns a reference for the header.
551 The newlines are represented by tildes, use PrintTildeSepLines
552 to print this.
553 */
554
555
556 /*
557 Returns a reference for composition-based statistics to use
558 in the header.
559 The newlines are represented by tildes, use PrintTildeSepLines
560 to print this.
561 */
562
563 CharPtr LIBCALL
564 CBStatisticsGetReference(Boolean html, Boolean firstRound, Boolean moreRounds)
565
566 {
567 CharPtr ret_buffer;
568 Int2 ret_buffer_length;
569
570 ret_buffer = NULL;
571 ret_buffer_length = 0;
572
573
574 if (firstRound) {
575 if (html) {
576 add_string_to_bufferEx("<b><a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids=11452024&dopt=Citation\">Reference for composition-based statistics</a>:</b>", &ret_buffer, &ret_buffer_length, TRUE);
577 add_string_to_bufferEx("Schäffer, Alejandro A., L. Aravind, Thomas L. Madden, ", &ret_buffer, &ret_buffer_length, TRUE);
578 } else
579 add_string_to_bufferEx("Reference for composition-based statistics:", &ret_buffer, &ret_buffer_length, TRUE);
580 add_string_to_bufferEx("Schaffer, Alejandro A., L. Aravind, Thomas L. Madden,", &ret_buffer, &ret_buffer_length, TRUE);
581 }
582 else {
583 if (html) {
584 add_string_to_bufferEx("<b><a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids=11452024&dopt=Citation\">Reference for composition-based statistics </a></b>", &ret_buffer, &ret_buffer_length, TRUE);
585 add_string_to_bufferEx("starting in round 2:", &ret_buffer, &ret_buffer_length, TRUE);
586
587 add_string_to_bufferEx("Schäffer, Alejandro A., L. Aravind, Thomas L. Madden, ", &ret_buffer, &ret_buffer_length, TRUE);
588 } else {
589 add_string_to_bufferEx("Reference for composition-based statistics starting in round 2:", &ret_buffer, &ret_buffer_length, TRUE);
590 add_string_to_bufferEx("Schaffer, Alejandro A., L. Aravind, Thomas L. Madden,", &ret_buffer, &ret_buffer_length, TRUE);
591 }
592 }
593 add_string_to_bufferEx("Sergei Shavirin, John L. Spouge, Yuri I. Wolf, ", &ret_buffer, &ret_buffer_length, TRUE);
594 add_string_to_bufferEx("Eugene V. Koonin, and Stephen F. Altschul (2001), ", &ret_buffer, &ret_buffer_length, TRUE);
595 add_string_to_bufferEx("\"Improving the accuracy of PSI-BLAST protein database searches with ", &ret_buffer, &ret_buffer_length, TRUE);
596 add_string_to_bufferEx("composition-based statistics and other refinements\", Nucleic Acids Res. 29:2994-3005.", &ret_buffer, &ret_buffer_length, TRUE);
597 return ret_buffer;
598 }
599
600 /*print the reference for composition-based statistics when they are used*/
601 Boolean LIBCALL
602 CBStatisticsPrintReference(Boolean html, Int4 line_length,
603 Boolean firstRound, Boolean moreRounds, FILE *outfp)
604
605 {
606 CharPtr ret_buffer;
607
608 if (outfp == NULL)
609 return FALSE;
610
611 if (!(firstRound || moreRounds))
612 return FALSE;
613
614 ret_buffer = CBStatisticsGetReference(html,firstRound, moreRounds);
615 PrintTildeSepLines(ret_buffer, line_length, outfp);
616 ret_buffer = MemFree(ret_buffer);
617
618 return TRUE;
619 }
620
621 /*
622 Returns a reference for the header.
623 The newlines are represented by tildes, use PrintTildeSepLines
624 to print this.
625 */
626
627 CharPtr LIBCALL
628 CAdjustmentGetReference(Boolean html)
629
630 {
631 CharPtr ret_buffer;
632 Int2 ret_buffer_length;
633
634 ret_buffer = NULL;
635 ret_buffer_length = 0;
636
637 if (html) {
638 add_string_to_bufferEx("<b><a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids=16218944&dopt=Citation\">Reference for compositional score matrix adjustment</a>:</b>", &ret_buffer, &ret_buffer_length, TRUE);
639 add_string_to_bufferEx("Altschul, Stephen F., John C. Wootton, E. Michael Gertz, Richa Agarwala,", &ret_buffer, &ret_buffer_length, TRUE);
640 add_string_to_bufferEx("Aleksandr Morgulis, Alejandro A. Schäffer, and Yi-Kuo Yu (2005) \"Protein database", &ret_buffer, &ret_buffer_length, TRUE);
641 add_string_to_bufferEx("searches using compositionally adjusted substitution matrices\", FEBS J. 272:5101-5109.", &ret_buffer, &ret_buffer_length, TRUE);
642 }
643 else {
644 add_string_to_bufferEx("Reference for compositional score matrix adjustment: Altschul, Stephen F., ", &ret_buffer, &ret_buffer_length, TRUE);
645 add_string_to_bufferEx("John C. Wootton, E. Michael Gertz, Richa Agarwala, Aleksandr Morgulis,", &ret_buffer, &ret_buffer_length, TRUE);
646 add_string_to_bufferEx("Alejandro A. Schaffer, and Yi-Kuo Yu (2005) \"Protein database searches", &ret_buffer, &ret_buffer_length, TRUE);
647 add_string_to_bufferEx("using compositionally adjusted substitution matrices\", FEBS J. 272:5101-5109.", &ret_buffer, &ret_buffer_length, TRUE);
648 }
649 return ret_buffer;
650 }
651
652 /*print the reference for composition-based statistics when they are used*/
653 Boolean LIBCALL
654 CAdjustmentPrintReference(Boolean html, Int4 line_length, FILE *outfp)
655
656 {
657 CharPtr ret_buffer;
658
659 if (outfp == NULL)
660 return FALSE;
661
662 ret_buffer = CAdjustmentGetReference(html);
663 PrintTildeSepLines(ret_buffer, line_length, outfp);
664 ret_buffer = MemFree(ret_buffer);
665
666 return TRUE;
667 }
668
669 /*
670 Returns a reference for the header.
671 The newlines are represented by tildes, use PrintTildeSepLines
672 to print this.
673 */
674
675
676
677 CharPtr LIBCALL
678 BlastGetPhiReference(Boolean html)
679
680 {
681 CharPtr ret_buffer;
682 Int2 ret_buffer_length;
683
684 ret_buffer = NULL;
685 ret_buffer_length = 0;
686
687
688 if (html) {
689 add_string_to_bufferEx("<b><a http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids=9705509&dopt=Citation\">Reference</a>:</b>", &ret_buffer, &ret_buffer_length, TRUE);
690 add_string_to_bufferEx("Zhang, Zheng, Alejandro A. Schäffer, Webb Miller, Thomas L. Madden, ", &ret_buffer, &ret_buffer_length, TRUE);
691 } else
692 add_string_to_bufferEx("Reference: Zhang, Zheng, Alejandro A. Schaffer, Webb Miller, Thomas L. Madden, ", &ret_buffer, &ret_buffer_length, TRUE);
693 add_string_to_bufferEx("David J. Lipman, Eugene V. Koonin, and Stephen F. Altschul (1998), ", &ret_buffer, &ret_buffer_length, TRUE);
694 add_string_to_bufferEx("\"Protein sequence similarity searches using patterns as seeds\", ", &ret_buffer, &ret_buffer_length, TRUE);
695 add_string_to_bufferEx("Nucleic Acids Res. 26:3986-3990.", &ret_buffer, &ret_buffer_length, TRUE);
696
697 return ret_buffer;
698 }
699
700 Boolean LIBCALL
701 BlastPrintPhiReference(Boolean html, Int4 line_length, FILE *outfp)
702
703 {
704 CharPtr ret_buffer;
705
706 if (outfp == NULL)
707 return FALSE;
708
709 ret_buffer = BlastGetPhiReference(html);
710 PrintTildeSepLines(ret_buffer, line_length, outfp);
711 ret_buffer = MemFree(ret_buffer);
712
713 return TRUE;
714 }
715
716 /*
717 Counts the number of SeqAligns present.
718 */
719
720 static Int4
721 GetSeqAlignCount(SeqAlignPtr sap)
722
723 {
724 Int4 count = 0;
725 SeqIdPtr last_id=NULL, id;
726
727 while (sap)
728 {
729 id = TxGetSubjectIdFromSeqAlign(sap);
730 if (last_id)
731 {
732 if(SeqIdComp(id, last_id) != SIC_YES)
733 count++;
734 }
735 else
736 {
737 count = 1;
738 }
739 last_id = id;
740 sap = sap->next;
741 }
742
743 return count;
744
745 }
746
747 /*
748 Duplicates a SeqAlignPtr, up to the number of unique
749 records specified.
750 */
751
752 static SeqAlignPtr
753 GetPrivateSeqAlign(SeqAlignPtr sap, Int4 number, Int4Ptr number_returned)
754
755 {
756 Int4 count=0;
757 SeqIdPtr last_id=NULL, id;
758 SeqAlignPtr new_head=NULL, var;
759
760 last_id = TxGetSubjectIdFromSeqAlign(sap);
761
762 while (count<number && sap)
763 {
764 count++;
765 while (sap)
766 {
767 id = TxGetSubjectIdFromSeqAlign(sap);
768 if(SeqIdComp(id, last_id) != SIC_YES)
769 {
770 last_id = id;
771 break;
772 }
773 if (new_head == NULL)
774 {
775 new_head = AsnIoMemCopy(sap, (AsnReadFunc) SeqAlignAsnRead, (AsnWriteFunc) SeqAlignAsnWrite);
776 var = new_head;
777 }
778 else
779 {
780 var->next = AsnIoMemCopy(sap, (AsnReadFunc) SeqAlignAsnRead, (AsnWriteFunc) SeqAlignAsnWrite);
781 var = var->next;
782 }
783 last_id = id;
784 sap = sap->next;
785 }
786 }
787
788 *number_returned = count;
789
790 return new_head;
791 }
792
793 /*
794 Duplicate a SeqAlignPtr, keeping on the number of unique db
795 hits specified.
796 */
797
798 BlastPruneSapStructPtr LIBCALL
799 BlastPruneHitsFromSeqAlign(SeqAlignPtr sap, Int4 number, BlastPruneSapStructPtr prune)
800
801 {
802 if (prune == NULL)
803 {
804 prune = MemNew(sizeof(BlastPruneSapStruct));
805 }
806 else
807 {
808 if (prune->number == number)
809 return prune;
810 if (prune->allocated)
811 prune->sap = SeqAlignSetFree(prune->sap);
812 prune->sap = NULL;
813 prune->allocated = FALSE;
814 prune->original_number = 0;
815 prune->number = 0;
816 }
817
818 prune->original_number = GetSeqAlignCount(sap);
819
820 if (prune->original_number < number)
821 {
822 prune->number = prune->original_number;
823 prune->sap = sap;
824 prune->allocated = FALSE;
825 }
826 else
827 {
828 prune->sap = GetPrivateSeqAlign(sap, number, &(prune->number));
829 prune->allocated = TRUE;
830 }
831
832 return prune;
833 }
834
835 BlastPruneSapStructPtr LIBCALL
836 BlastPruneSapStructDestruct(BlastPruneSapStructPtr prune)
837
838 {
839 if (prune == NULL)
840 return NULL;
841
842 if (prune->allocated)
843 {
844 prune->sap = SeqAlignSetFree(prune->sap);
845 }
846 prune = MemFree(prune);
847
848 return prune;
849 }
850
851
852 void PrintTabularOutputHeader(CharPtr blast_database, BioseqPtr query_bsp,
853 SeqLocPtr query_slp, CharPtr blast_program,
854 Int4 iteration, Boolean believe_query,
855 FILE *outfp)
856 {
857 Char buffer[BUFFER_LENGTH+1];
858 Boolean unlock_bioseq = FALSE;
859
860 asn2ff_set_output(outfp, NULL);
861
862 ff_StartPrint(0, 0, BUFFER_LENGTH, NULL);
863
864 if (blast_program) {
865 CharPtr program = StringSave(blast_program);
866 Nlm_StrUpper(program);
867 sprintf(buffer, "# %s %s [%s]", program, BlastGetVersionNumber(),
868 BlastGetReleaseDate());
869 MemFree(program);
870 ff_AddString(buffer);
871 NewContLine();
872 }
873
874 if (iteration > 0) {
875 ff_AddString("# Iteration: ");
876 ff_AddString(Ltostr((long) iteration, 1));
877 NewContLine();
878 }
879
880 if (query_bsp || query_slp) {
881 CharPtr title;
882 const CharPtr str = "# Query: ";
883 Int4 string_length = StrLen(str);
884
885 ff_AddString(str);
886
887 if (!query_bsp) {
888 Int4 num_queries = ValNodeLen(query_slp);
889 if (num_queries > 1) {
890 /* Multiple queries: just print the number, without deflines. */
891 sprintf(buffer, "%ld sequences", (long)num_queries);
892 ff_AddString(buffer);
893 } else {
894 query_bsp = BioseqLockById(SeqLocId(query_slp));
895 unlock_bioseq = TRUE;
896 }
897 }
898 if (query_bsp) {
899 if (query_bsp->id && believe_query) {
900 SeqIdWrite(query_bsp->id, buffer, PRINTID_FASTA_LONG,
901 BUFFER_LENGTH);
902 if (StringNCmp(buffer, "lcl|", 4) == 0) {
903 ff_AddString(buffer+4);
904 } else {
905 ff_AddString(buffer);
906 }
907 string_length += StrLen(buffer);
908 ff_AddChar(' ');
909 string_length++; /* to account for the space above. */
910 }
911
912 if ((title = BioseqGetTitle(query_bsp)) != NULL) {
913 /* We do this to keep the entire title on one line
914 (of length BUFFER_LENGTH). */
915 StrNCpy(buffer, title, BUFFER_LENGTH - string_length);
916 buffer[BUFFER_LENGTH - string_length] = NULLB;
917 ff_AddString(buffer);
918 }
919
920 if (unlock_bioseq)
921 BioseqUnlock(query_bsp);
922 }
923 NewContLine();
924 }
925 if (blast_database) {
926 ff_AddString("# Database: ");
927 ff_AddString(blast_database);
928 NewContLine();
929 }
930 if (getenv("PRINT_SEQUENCES")) {
931 ff_AddString("# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score, query seq., subject seq.");
932 } else {
933 ff_AddString("# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score");
934 }
935
936 ff_EndPrint();
937 }
938
939 static Int4
940 BlastBioseqGetNumIdentical(BioseqPtr q_bsp, BioseqPtr s_bsp, Int4 q_start,
941 Int4 s_start, Int4 length,
942 Uint1 q_strand, Uint1 s_strand)
943 {
944 SeqLocPtr q_slp, s_slp;
945 SeqPortPtr q_spp, s_spp;
946 Int4 i, ident = 0;
947 Uint1 q_res, s_res;
948
949 if (!q_bsp || !s_bsp)
950 return 0;
951
952 q_slp = SeqLocIntNew(q_start, q_start+length-1, q_strand, q_bsp->id);
953 s_slp = SeqLocIntNew(s_start, s_start+length-1, s_strand, s_bsp->id);
954 if (ISA_na(q_bsp->mol))
955 q_spp = SeqPortNewByLoc(q_slp, Seq_code_ncbi4na);
956 else
957 q_spp = SeqPortNewByLoc(q_slp, Seq_code_ncbistdaa);
958 if (ISA_na(s_bsp->mol))
959 s_spp = SeqPortNewByLoc(s_slp, Seq_code_ncbi4na);
960 else
961 s_spp = SeqPortNewByLoc(s_slp, Seq_code_ncbistdaa);
962
963 for (i=0; i<length; i++) {
964 while ((q_res = SeqPortGetResidue(q_spp)) != SEQPORT_EOF &&
965 !IS_residue(q_res));
966 while ((s_res = SeqPortGetResidue(s_spp)) != SEQPORT_EOF &&
967 !IS_residue(s_res));
968 if (q_res == SEQPORT_EOF || s_res == SEQPORT_EOF)
969 break;
970 else if (q_res == s_res)
971 ident++;
972 }
973
974 SeqLocFree(q_slp);
975 SeqLocFree(s_slp);
976 SeqPortFree(q_spp);
977 SeqPortFree(s_spp);
978
979 return ident;
980 }
981 /*
982 Function to print results in tab-delimited format, given a SeqAlign list.
983 q_shift and s_shift are the offsets in query and subject in case of a
984 subsequence search
985 */
986 void BlastPrintTabulatedResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
987 SeqLocPtr query_slp, Int4 num_alignments,
988 CharPtr blast_program, Boolean is_ungapped,
989 Boolean believe_query, Int4 q_shift,
990 Int4 s_shift, FILE *fp,
991 Boolean print_query_info)
992 {
993 BlastPrintTabulatedResultsEx(seqalign, query_bsp, query_slp, num_alignments,
994 blast_program, is_ungapped, believe_query,
995 q_shift, s_shift, fp, NULL, print_query_info);
996 }
997
998 void BlastPrintTabulatedResultsEx(SeqAlignPtr seqalign, BioseqPtr query_bsp,
999 SeqLocPtr query_slp, Int4 num_alignments,
1000 CharPtr blast_program, Boolean is_ungapped,
1001 Boolean believe_query, Int4 q_shift,
1002 Int4 s_shift, FILE *fp,
1003 int *num_formatted, Boolean print_query_info)
1004 {
1005 BlastPrintTabularResults(seqalign, query_bsp, query_slp, num_alignments,
1006 blast_program, is_ungapped, FALSE, believe_query,
1007 q_shift, s_shift, fp, num_formatted, print_query_info);
1008 }
1009
1010 void BlastPrintTabularResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
1011 SeqLocPtr query_slp, Int4 num_alignments, CharPtr blast_program,
1012 Boolean is_ungapped, Boolean is_ooframe, Boolean believe_query,
1013 Int4 q_shift, Int4 s_shift, FILE *fp, int *num_formatted,
1014 Boolean print_query_info)
1015 {
1016 SeqAlignPtr sap, sap_tmp = NULL;
1017 FloatHi perc_ident, bit_score, evalue;
1018 Int4 numseg, num_gap_opens, num_mismatches, num_ident, score;
1019 Int4 number, align_length, index, i, j;
1020 Int4 q_start, q_end, s_start, s_end;
1021 Char bit_score_buff[10];
1022 CharPtr eval_buff;
1023 Boolean is_translated;
1024 SeqIdPtr query_id, old_query_id = NULL, subject_id, old_subject_id = NULL;
1025 BioseqPtr subject_bsp=NULL;
1026 Char query_buffer[BUFFER_LENGTH+1], subject_buffer[BUFFER_LENGTH+1];
1027 DenseSegPtr dsp;
1028 StdSegPtr ssp = NULL;
1029 DenseDiagPtr ddp = NULL;
1030 AlignSumPtr asp = NULL;
1031 CharPtr defline, title;
1032 SeqLocPtr slp;
1033 Int4 alignments_count;
1034 Int4 objmgr_count = 0;
1035
1036 is_translated = (StringCmp(blast_program, "blastn") &&
1037 StringCmp(blast_program, "blastp"));
1038
1039 if (is_translated) {
1040 asp = MemNew(sizeof(AlignSum));
1041 asp->matrix = load_default_matrix();
1042 asp->is_aa = TRUE;
1043 asp->ooframe = is_ooframe;
1044 }
1045
1046 if (is_ungapped)
1047 sap_tmp = SeqAlignNew();
1048
1049 slp = query_slp;
1050 if (query_bsp)
1051 query_id = query_bsp->id;
1052
1053 /* Evalue buffer is dynamically allocated to avoid compiler warnings
1054 in calls to ScoreAndEvalueToBuffers. */
1055 eval_buff = Malloc(10);
1056
1057 for (sap = seqalign; sap; sap = sap->next) {
1058 if (query_slp)
1059 query_id = TxGetQueryIdFromSeqAlign(sap);
1060 if (SeqIdComp(query_id, old_query_id) != SIC_YES) {
1061 if (old_query_id && num_formatted)
1062 (*num_formatted)++;
1063 alignments_count = num_alignments;
1064 /* New query: find the corresponding SeqLoc */
1065 while (slp && SeqIdComp(query_id, SeqLocId(slp)) != SIC_YES)
1066 slp = slp->next;
1067 if (slp != NULL) {
1068 query_id = old_query_id = SeqLocId(slp);
1069 /* Print new query information */
1070 if (print_query_info)
1071 PrintTabularOutputHeader(NULL, NULL, slp, NULL, 0,
1072 believe_query, fp);
1073 } else if (query_bsp)
1074 old_query_id = query_bsp->id;
1075 defline = (CharPtr) Malloc(BUFFER_LENGTH+1);
1076 SeqIdWrite(query_id, defline, PRINTID_FASTA_LONG, BUFFER_LENGTH);
1077 if (StringNCmp(defline, "lcl|", 4))
1078 StringCpy(query_buffer, defline);
1079 else if (!believe_query) {
1080 if (slp) {
1081 BioseqUnlock(query_bsp);
1082 query_bsp = BioseqLockById(query_id);
1083 }
1084 if ((title = StringSave(BioseqGetTitle(query_bsp))) != NULL) {
1085 defline = MemFree(defline);
1086 defline = StringTokMT(title, " ", &title);
1087 StringNCpy_0(query_buffer, defline, BUFFER_LENGTH);
1088 defline = MemFree(defline);
1089 } else
1090 StringCpy(query_buffer, defline+4);
1091 defline = MemFree(defline);
1092 } else
1093 StringCpy(query_buffer, defline+4);
1094 } else
1095 query_id = old_query_id;
1096
1097 subject_id = TxGetSubjectIdFromSeqAlign(sap);
1098
1099 if (SeqIdComp(subject_id, old_subject_id) != SIC_YES) {
1100 /* New subject sequence has been found in the seqalign list */
1101 if (--alignments_count < 0)
1102 continue;
1103 BioseqUnlock(subject_bsp);
1104
1105 /* object manager cache is limited in size */
1106 if (++objmgr_count > 8000) {
1107 objmgr_count = 0;
1108 ObjMgrFreeCache(OBJ_MAX);
1109 }
1110
1111 subject_bsp = BioseqLockById(subject_id);
1112
1113 if (!subject_bsp || !subject_bsp->id)
1114 continue;
1115 if (subject_bsp->id->choice != SEQID_GENERAL ||
1116 StringCmp(((DbtagPtr)subject_id->data.ptrvalue)->db, "BL_ORD_ID")) {
1117 SeqIdPtr use_this_gi_id = GetUseThisGi(sap);
1118 defline = (CharPtr) Malloc(BUFFER_LENGTH+1);
1119 if (use_this_gi_id) {
1120 BlastDefLinePtr bdlp, actual_bdlp;
1121 bdlp=FDGetDeflineAsnFromBioseq(subject_bsp);
1122 actual_bdlp=getBlastDefLineForSeqId(bdlp, use_this_gi_id);
1123
1124 SeqIdWrite(actual_bdlp->seqid, defline, PRINTID_FASTA_LONG, BUFFER_LENGTH);
1125 BlastDefLineSetFree(bdlp);
1126 } else {
1127 SeqIdWrite(subject_bsp->id, defline, PRINTID_FASTA_LONG, BUFFER_LENGTH);
1128 }
1129 if (StringNCmp(defline, "lcl|", 4))
1130 StringCpy(subject_buffer, defline);
1131 else
1132 StringCpy(subject_buffer, defline+4);
1133 } else {
1134 defline = StringSave(BioseqGetTitle(subject_bsp));
1135 defline = StringTokMT(defline, " \t", &title);
1136 StringCpy(subject_buffer, defline);
1137 }
1138 defline = MemFree(defline);
1139 }
1140
1141 perc_ident = 0;
1142 align_length = 0;
1143 num_gap_opens = 0;
1144 num_mismatches = 0;
1145
1146 GetScoreAndEvalue(sap, &score, &bit_score, &evalue, &number);
1147
1148 /* Do not allow knocking off digit in evalue buffer, so parsers are
1149 not confused. */
1150 ScoreAndEvalueToBuffers(bit_score, evalue,
1151 bit_score_buff, &eval_buff, 0);
1152
1153 /* Loop on segments within this seqalign (in ungapped case) */
1154 while (TRUE) {
1155 if (sap->segtype == SAS_DENSEG) {
1156 Boolean get_num_ident = TRUE;
1157 dsp = (DenseSegPtr) sap->segs;
1158 numseg = dsp->numseg;
1159 /* Query Bioseq is needed for calculating number of identities.
1160 NB: even if number of identities is already filled in the
1161 seqalign score list, that is not enough here, because we need to
1162 know number of identities in each segment in order to calculate
1163 number of mismatches correctly. */
1164 if (!query_bsp) {
1165 query_bsp = BioseqLockById(query_id);
1166 }
1167
1168 for (i=0; i<numseg; i++) {
1169 align_length += dsp->lens[i];
1170 if (dsp->starts[2*i] != -1 && dsp->starts[2*i+1] != -1) {
1171 if (get_num_ident) {
1172 num_ident = BlastBioseqGetNumIdentical(query_bsp, subject_bsp,
1173 dsp->starts[2*i], dsp->starts[2*i+1],
1174 dsp->lens[i], dsp->strands[2*i],
1175 dsp->strands[2*i+1]);
1176 perc_ident += num_ident;
1177 num_mismatches += dsp->lens[i] - num_ident;
1178 }
1179 } else {
1180 num_gap_opens++;
1181 }
1182 }
1183 perc_ident = perc_ident / align_length * 100;
1184
1185 /* compute half the sequence offsets (account for
1186 leading gaps in the alignment) */
1187 if (dsp->starts[0] == -1) {
1188 i = 1; j = 0;
1189 }
1190 else if (dsp->starts[1] == -1) {
1191 i = 0; j = 1;
1192 }
1193 else {
1194 i = j = 0;
1195 }
1196 if (dsp->strands[0] != dsp->strands[1]) {
1197 q_end = dsp->starts[2*i] + dsp->lens[i];
1198 s_end = dsp->starts[2*j+1] + 1;
1199 } else {
1200 q_start = dsp->starts[2*i] + 1;
1201 s_start = dsp->starts[2*j+1] + 1;
1202 }
1203
1204 /* compute half the sequence offsets (account for
1205 trailing gaps in the alignment) */
1206 if (dsp->starts[2*numseg-2] == -1) {
1207 i = numseg-1; j = numseg;
1208 }
1209 else if (dsp->starts[2*numseg-1] == -1) {
1210 i = numseg; j = numseg-1;
1211 }
1212 else {
1213 i = j = numseg;
1214 }
1215 if (dsp->strands[0] != dsp->strands[1]) {
1216 q_start = dsp->starts[2*i-2] + 1;
1217 s_start = dsp->starts[2*j-1] + dsp->lens[j-1];
1218 } else {
1219 q_end = dsp->starts[2*i-2] + dsp->lens[i-1];
1220 s_end = dsp->starts[2*j-1] + dsp->lens[j-1];
1221 }
1222
1223 } else if (sap->segtype == SAS_STD) {
1224 if (!ssp)
1225 ssp = (StdSegPtr) sap->segs;
1226
1227 if (is_ungapped) {
1228 sap_tmp->segtype = SAS_STD;
1229 sap_tmp->segs = ssp;
1230 GetScoreAndEvalue(sap_tmp, &score, &bit_score, &evalue, &number);
1231 ScoreAndEvalueToBuffers(bit_score, evalue,
1232 bit_score_buff, &eval_buff, 0);
1233 find_score_in_align(sap_tmp, 1, asp);
1234 } else
1235 find_score_in_align(sap, 1, asp);
1236
1237 if (asp->m_frame < 0)
1238 q_start = SeqLocStop(ssp->loc) + 1;
1239 else
1240 q_start = SeqLocStart(ssp->loc) + 1;
1241
1242 if (asp->t_frame < 0)
1243 s_start = SeqLocStop(ssp->loc->next) + 1;
1244 else
1245 s_start = SeqLocStart(ssp->loc->next) + 1;
1246
1247 if (!is_ungapped) {
1248 for (index=1; ssp->next; index++)
1249 ssp = ssp->next;
1250 num_gap_opens = index / 2;
1251 } else
1252 num_gap_opens = 0;
1253
1254 if (asp->m_frame < 0)
1255 q_end = SeqLocStart(ssp->loc) + 1;
1256 else
1257 q_end = SeqLocStop(ssp->loc) + 1;
1258
1259 if (asp->t_frame < 0)
1260 s_end = SeqLocStart(ssp->loc->next) + 1;
1261 else
1262 s_end = SeqLocStop(ssp->loc->next) + 1;
1263
1264 align_length = asp->totlen;
1265 num_mismatches = asp->totlen - asp->gaps - asp->identical;
1266 perc_ident = ((FloatHi) 100*asp->identical)/ (asp->totlen);
1267 } else if (sap->segtype == SAS_DENDIAG) {
1268 if (!ddp)
1269 ddp = (DenseDiagPtr) sap->segs;
1270 sap_tmp->segtype = SAS_DENDIAG;
1271 sap_tmp->segs = ddp;
1272 GetScoreAndEvalue(sap_tmp, &score, &bit_score, &evalue, &number);
1273 ScoreAndEvalueToBuffers(bit_score, evalue,
1274 bit_score_buff, &eval_buff, 0);
1275
1276 align_length = ddp->len;
1277 /*always show plus strand for query*/
1278 if (ddp->strands[0] == Seq_strand_minus &&
1279 ddp->strands[1] == Seq_strand_plus) {
1280 ddp->strands[0] = Seq_strand_plus;
1281 ddp->strands[1] = Seq_strand_minus;
1282 }
1283 if (ddp->strands[0] == Seq_strand_minus) {
1284 q_start = ddp->starts[0] + align_length;
1285 q_end = ddp->starts[0] + 1;
1286 } else {
1287 q_start = ddp->starts[0] + 1;
1288 q_end = ddp->starts[0] + align_length;
1289 }
1290
1291 if (ddp->strands[1] == Seq_strand_minus) {
1292 s_start = ddp->starts[1] + align_length;
1293 s_end = ddp->starts[1] + 1;
1294 } else {
1295 s_start = ddp->starts[1] + 1;
1296 s_end = ddp->starts[1] + align_length;
1297 }
1298 num_gap_opens = 0;
1299 /* Query Bioseq is needed for calculating number of identities.
1300 NB: even if number of identities is already filled in the
1301 seqalign score list, that is not enough here, because we need to
1302 know number of identities in each segment in order to calculate
1303 number of mismatches correctly. */
1304 if (!query_bsp) {
1305 query_bsp = BioseqLockById(query_id);
1306 }
1307
1308 num_ident = BlastBioseqGetNumIdentical(query_bsp, subject_bsp,
1309 ddp->starts[0], ddp->starts[1], align_length,
1310 ddp->strands[0], ddp->strands[1]);
1311 num_mismatches = align_length - num_ident;
1312 perc_ident = ((FloatHi)num_ident) / align_length * 100;
1313 }
1314 if (!is_translated) {
1315 /* Adjust coordinates if query and/or subject is a subsequence */
1316 q_start += q_shift;
1317 q_end += q_shift;
1318 s_start += s_shift;
1319 s_end += s_shift;
1320 }
1321
1322 if (perc_ident >= 99.995 && perc_ident < 100.00)
1323 perc_ident = 99.99;
1324
1325 fprintf(fp,
1326 "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%s\n",
1327 query_buffer, subject_buffer, perc_ident, align_length,
1328 num_mismatches, num_gap_opens, q_start,
1329 q_end, s_start, s_end, eval_buff, bit_score_buff);
1330 old_subject_id = subject_id;
1331 if (sap->segtype == SAS_DENSEG)
1332 break;
1333 else if (sap->segtype == SAS_DENDIAG) {
1334 if ((ddp = ddp->next) == NULL)
1335 break;
1336 } else if (sap->segtype == SAS_STD) {
1337 if ((ssp = ssp->next) == NULL)
1338 break;
1339 }
1340 }
1341 }
1342
1343 eval_buff = MemFree(eval_buff);
1344
1345 if (is_ungapped)
1346 sap_tmp = MemFree(sap_tmp);
1347
1348 if (is_translated) {
1349 free_default_matrix(asp->matrix);
1350 MemFree(asp);
1351 }
1352
1353 BioseqUnlock(subject_bsp);
1354 if (query_slp)
1355 BioseqUnlock(query_bsp);
1356 }
1357
1358
1359
1360 /* Mutex for assignment of db seqs to search. */
1361 TNlmMutex err_message_mutex=NULL;
1362
1363 #define BLAST_ERROR_BULEN 50
1364 /*
1365 The following functions fill a the Error user string with
1366 text to identify BLAST and the entry being worked on.
1367 The SeqIdPtr is used to make a FASTA id, which is appended
1368 to string.
1369
1370 A Uint1 is returned, which allows Nlm_ErrUserDelete to delete
1371 this error string when it's done.
1372 */
1373
1374 Uint1
1375 BlastSetUserErrorString(CharPtr string, SeqIdPtr sip, Boolean use_id)
1376
1377 {
1378 BioseqPtr bsp;
1379 Char buffer[2*BLAST_ERROR_BULEN+1], textid[BLAST_ERROR_BULEN+1];
1380 CharPtr buf_start, ptr, title;
1381 Int2 length=0, index;
1382 Uint1 retval=0;
1383
1384 buffer[0] = NULLB;
1385 ptr = buf_start = &buffer[0];
1386
1387 if (string)
1388 StringNCpy_0(ptr, string, BLAST_ERROR_BULEN);
1389
1390 if (sip != NULL)
1391 {
1392 bsp = BioseqLockById(sip);
1393 if(bsp)
1394 {
1395 if (use_id)
1396 sip = bsp->id;
1397 else
1398 title = BioseqGetTitle(bsp);
1399 }
1400
1401 if (string)
1402 {
1403 length = StringLen(string);
1404 if (length > BLAST_ERROR_BULEN)
1405 length = BLAST_ERROR_BULEN;
1406 }
1407
1408 ptr += length;
1409
1410 if (use_id)
1411 {
1412 SeqIdWrite(sip, textid, PRINTID_FASTA_LONG, BLAST_ERROR_BULEN-1);
1413 StringNCpy_0(ptr, textid, BLAST_ERROR_BULEN-1);
1414 }
1415 else if (title)
1416 {
1417 for (index=0; index<BLAST_ERROR_BULEN-1; index++)
1418 {
1419 if (title[index] == NULLB || title[index] == ' ')
1420 {
1421 break;
1422 }
1423 *ptr = title[index];
1424 ptr++;
1425 }
1426 *ptr = NULLB;
1427 }
1428 BioseqUnlock(bsp);
1429 StringCpy(ptr+StringLen(ptr), ":");
1430 }
1431 NlmMutexLockEx(&err_message_mutex);
1432 retval = Nlm_ErrUserInstall (buf_start, 0);
1433 NlmMutexUnlock(err_message_mutex);
1434
1435 return retval;
1436 }
1437
1438 void
1439 BlastDeleteUserErrorString(Uint1 err_id)
1440
1441 {
1442 NlmMutexLockEx(&err_message_mutex);
1443 Nlm_ErrUserDelete(err_id);
1444 NlmMutexUnlock(err_message_mutex);
1445 return;
1446 }
1447
1448 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |