00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #ifndef SKIP_DOXYGEN_PROCESSING
00035 static char const rcsid[] = "$Id: blastxml_format.cpp 169810 2009-09-03 13:17:44Z madden $";
00036 #endif
00037
00038 #include <ncbi_pch.hpp>
00039 #include <objmgr/object_manager.hpp>
00040 #include <objects/seqloc/Seq_interval.hpp>
00041 #include <objmgr/util/sequence.hpp>
00042 #include <objects/seqloc/Seq_id.hpp>
00043
00044 #include <objects/seqalign/Dense_diag.hpp>
00045 #include <objects/seqalign/Dense_seg.hpp>
00046 #include <objects/seqalign/Std_seg.hpp>
00047
00048 #include <algo/blast/format/blastxml_format.hpp>
00049 #include <algo/blast/format/blastfmtutil.hpp>
00050 #include <objtools/align_format/showdefline.hpp>
00051 #include <objtools/align_format/align_format_util.hpp>
00052
00053 #include <serial/objostrxml.hpp>
00054
00055 #include <algo/blast/api/version.hpp>
00056
00057 #include <algorithm>
00058
00059 BEGIN_NCBI_SCOPE
00060 USING_SCOPE(objects);
00061 USING_SCOPE(blast);
00062 USING_SCOPE(align_format);
00063
00064 ncbi::TMaskedQueryRegions mask;
00065
00066
00067
00068 struct SRangeStartSort {
00069 bool operator()(CRange<int>* const& range1, CRange<int>* const& range2)
00070 {
00071 return (range1->GetFrom() < range2->GetFrom());
00072 }
00073 };
00074
00075
00076
00077 class CRangeVector : public vector<CRange<int>* >
00078 {
00079 public:
00080
00081 ~CRangeVector()
00082 {
00083 for (iterator pItem = begin(); pItem != end(); ++pItem)
00084 delete *pItem;
00085 }
00086 };
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096 static bool s_SerializeAndSplitBy(const CSerialObject &object,
00097 const char *tag,
00098 string &start_part,
00099 string &end_part,
00100 bool add_reference_dtdi = false,
00101 bool add_xml_versioni = false );
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111 static void
00112 s_MaskQuerySeq(CAlnVec& alnvec, string& query_seq,
00113 const ncbi::TMaskedQueryRegions& mask_info,
00114 CDisplaySeqalign::SeqLocCharOption mask_char,
00115 int query_frame)
00116 {
00117 const int kNumSegs = alnvec.GetNumSegs();
00118 vector<CRange<int> > segs_v;
00119 for (int index = 0; index < kNumSegs; ++index) {
00120 CRange<int> range(alnvec.GetAlnStart(index),
00121 alnvec.GetAlnStop(index));
00122 segs_v.push_back(range);
00123 }
00124
00125 CRangeVector masks_v;
00126 int aln_stop = query_seq.size() - 1;
00127 ITERATE(ncbi::TMaskedQueryRegions, mask_iter, mask_info) {
00128 if ((*mask_iter)->GetFrame() != query_frame)
00129 continue;
00130 int start =
00131 alnvec.GetAlnPosFromSeqPos(0,
00132 (*mask_iter)->GetInterval().GetFrom());
00133 int stop =
00134 alnvec.GetAlnPosFromSeqPos(0,
00135 (*mask_iter)->GetInterval().GetTo());
00136
00137 if (query_frame < 0) {
00138 int tmp = start;
00139 start = stop;
00140 stop = tmp;
00141 }
00142 if (start >= 0) {
00143 if (stop < 0)
00144 stop = aln_stop;
00145 CRange<int>* range = new CRange<int>(start, stop);
00146 masks_v.push_back(range);
00147 }
00148 }
00149
00150 sort(masks_v.begin(), masks_v.end(), SRangeStartSort());
00151
00152
00153 int mask_index = 0;
00154 for (int seg_index = 0;
00155 seg_index < (int) segs_v.size() && mask_index < (int) masks_v.size();
00156 ++seg_index) {
00157 if (segs_v[seg_index].Empty())
00158 continue;
00159 int seg_start = segs_v[seg_index].GetFrom();
00160 int seg_stop = segs_v[seg_index].GetTo();
00161 int mask_pos;
00162 while (mask_index < (int) masks_v.size() &&
00163 (mask_pos = max(seg_start, masks_v[mask_index]->GetFrom()))
00164 <= seg_stop) {
00165 int mask_stop = min(seg_stop, masks_v[mask_index]->GetTo());
00166
00167 for ( ; mask_pos <= mask_stop; ++mask_pos) {
00168 if( query_seq[mask_pos] == '-' ) continue;
00169 if (mask_char == CDisplaySeqalign::eX) {
00170 query_seq[mask_pos] = 'X';
00171 } else if (mask_char == CDisplaySeqalign::eN){
00172 query_seq[mask_pos]='N';
00173 } else if (mask_char == CDisplaySeqalign::eLowerCase) {
00174 query_seq[mask_pos] =
00175 tolower((unsigned char)query_seq[mask_pos]);
00176 }
00177 }
00178
00179
00180 if (mask_pos < seg_stop)
00181 ++mask_index;
00182 else
00183 break;
00184 }
00185 }
00186 }
00187
00188
00189
00190
00191
00192
00193
00194
00195 static int
00196 s_GetTranslationFrame(bool plus_strand, int start, int end, int seq_length)
00197 {
00198 int frame;
00199
00200 if (plus_strand) {
00201 frame = (start - 1) % 3 + 1;
00202 } else {
00203 frame = -((seq_length - end) % 3 + 1);
00204 }
00205
00206 return frame;
00207 }
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217 static void
00218 s_SeqAlignSetToXMLHsps(list<CRef<CHsp> >& xhsp_list,
00219 const CSeq_align_set& alnset, CScope* scope,
00220 const CBlastFormattingMatrix* matrix,
00221 const ncbi::TMaskedQueryRegions* mask_info,
00222 int master_gentice_code, int slave_genetic_code)
00223 {
00224 int index = 1;
00225 ITERATE(CSeq_align_set::Tdata, iter, alnset.Get()) {
00226 CRef<CHsp> xhsp(new CHsp());
00227 const CSeq_align& kAlign = *(*iter);
00228 xhsp->SetNum(index);
00229 ++index;
00230 bool query_is_na, subject_is_na;
00231 int query_length, subject_length;
00232
00233 int score, num_ident;
00234 double bit_score;
00235 double evalue;
00236 int sum_n;
00237 list<int> use_this_gi;
00238 CBlastFormatUtil::GetAlnScores(kAlign, score, bit_score, evalue, sum_n,
00239 num_ident, use_this_gi);
00240 xhsp->SetBit_score(bit_score);
00241 xhsp->SetScore(score);
00242 xhsp->SetEvalue(evalue);
00243
00244
00245 try {
00246 const CBioseq_Handle& kQueryBioseqHandle =
00247 scope->GetBioseqHandle(kAlign.GetSeq_id(0));
00248 query_is_na = kQueryBioseqHandle.IsNa();
00249 query_length = kQueryBioseqHandle.GetBioseqLength();
00250 const CBioseq_Handle& kSubjBioseqHandle =
00251 scope->GetBioseqHandle(kAlign.GetSeq_id(1));
00252 subject_is_na = kSubjBioseqHandle.IsNa();
00253 subject_length = kSubjBioseqHandle.GetBioseqLength();
00254 } catch (const CException&) {
00255
00256
00257
00258 xhsp->SetQuery_from(0);
00259 xhsp->SetQuery_to(0);
00260 xhsp->SetHit_from(0);
00261 xhsp->SetHit_to(0);
00262 xhsp->SetIdentity(num_ident);
00263
00264 xhsp->SetQseq(NcbiEmptyString);
00265 xhsp->SetHseq(NcbiEmptyString);
00266 xhsp_list.push_back(xhsp);
00267 continue;
00268 }
00269
00270 CRef<CSeq_align> final_aln(0);
00271
00272
00273
00274
00275 const bool kTranslated = kAlign.GetSegs().IsStd();
00276
00277 if (kTranslated) {
00278 CRef<CSeq_align> densegAln = kAlign.CreateDensegFromStdseg();
00279
00280
00281 if (query_is_na && subject_is_na)
00282 final_aln = densegAln->CreateTranslatedDensegFromNADenseg();
00283 else
00284 final_aln = densegAln;
00285 } else if (kAlign.GetSegs().IsDendiag()) {
00286 final_aln = CBlastFormatUtil::CreateDensegFromDendiag(kAlign);
00287 }
00288
00289 const CDense_seg& kDenseg = (final_aln ? final_aln->GetSegs().GetDenseg() :
00290 kAlign.GetSegs().GetDenseg());
00291
00292 CRef<CAlnVec> aln_vec;
00293
00294
00295
00296
00297 if (!kTranslated && kDenseg.IsSetStrands() &&
00298 kDenseg.GetStrands().front() == eNa_strand_minus) {
00299 CRef<CDense_seg> reversed_ds(new CDense_seg);
00300 reversed_ds->Assign(kDenseg);
00301 reversed_ds->Reverse();
00302 aln_vec.Reset(new CAlnVec(*reversed_ds, *scope));
00303 } else {
00304 aln_vec.Reset(new CAlnVec(kDenseg, *scope));
00305 }
00306
00307
00308 aln_vec->SetGenCode(slave_genetic_code);
00309 aln_vec->SetGenCode(master_gentice_code, 0);
00310
00311 int align_length, num_gaps, num_gap_opens;
00312 CBlastFormatUtil::GetAlignLengths(*aln_vec, align_length, num_gaps,
00313 num_gap_opens);
00314
00315 int q_start, q_end, s_start, s_end, q_frame=0, s_frame=0;
00316
00317 q_start = aln_vec->GetSeqStart(0) + 1;
00318 q_end = aln_vec->GetSeqStop(0) + 1;
00319 s_start = aln_vec->GetSeqStart(1) + 1;
00320 s_end = aln_vec->GetSeqStop(1) + 1;
00321
00322 if (!kTranslated && query_is_na && subject_is_na) {
00323 q_frame = s_frame = 1;
00324
00325
00326 if (aln_vec->IsNegativeStrand(1)) {
00327 s_frame = -1;
00328 int tmp = s_start;
00329 s_start = s_end;
00330 s_end = tmp;
00331 }
00332 } else if (kTranslated) {
00333 if (query_is_na)
00334 q_frame = s_GetTranslationFrame(aln_vec->IsPositiveStrand(0),
00335 q_start, q_end, query_length);
00336 if (subject_is_na)
00337 s_frame = s_GetTranslationFrame(aln_vec->IsPositiveStrand(1),
00338 s_start, s_end, subject_length);
00339 }
00340
00341 xhsp->SetQuery_frame(q_frame);
00342 xhsp->SetHit_frame(s_frame);
00343
00344 xhsp->SetQuery_from(q_start);
00345 xhsp->SetQuery_to(q_end);
00346 xhsp->SetHit_from(s_start);
00347 xhsp->SetHit_to(s_end);
00348
00349
00350
00351
00352
00353 string query_seq;
00354 string subject_seq;
00355 string middle_seq;
00356 aln_vec->SetGapChar('-');
00357 aln_vec->GetWholeAlnSeqString(0, query_seq);
00358
00359
00360
00361
00362
00363
00364 const bool kIsBlastn =
00365 (query_is_na && subject_is_na && !kTranslated);
00366
00367 aln_vec->GetWholeAlnSeqString(1, subject_seq);
00368
00369 num_ident = 0;
00370 int num_positives = 0;
00371 middle_seq = query_seq;
00372
00373
00374
00375
00376
00377 const unsigned int kMaxOffset = min(query_seq.size(),
00378 subject_seq.size());
00379 for (unsigned int i = 0; i < kMaxOffset; ++i) {
00380 if (query_seq[i] == subject_seq[i]) {
00381 ++num_ident;
00382 ++num_positives;
00383 if (kIsBlastn)
00384 middle_seq[i] = '|';
00385 } else if (matrix &&
00386 (*matrix)(query_seq[i], subject_seq[i]) > 0 &&
00387 !kIsBlastn) {
00388 ++num_positives;
00389 middle_seq[i] = kIsBlastn ? ' ' : '+';
00390 } else {
00391 middle_seq[i] = ' ';
00392 }
00393 }
00394
00395 xhsp->SetIdentity(num_ident);
00396 xhsp->SetGaps(num_gaps);
00397 xhsp->SetAlign_len(align_length);
00398
00399
00400
00401 if (mask_info) {
00402 const CDisplaySeqalign::SeqLocCharOption kMaskCharOpt =
00403 (kIsBlastn ? CDisplaySeqalign::eN : CDisplaySeqalign::eX);
00404 s_MaskQuerySeq(*aln_vec, query_seq, *mask_info, kMaskCharOpt,
00405 q_frame);
00406 }
00407
00408 xhsp->SetQseq(query_seq);
00409 xhsp->SetHseq(subject_seq);
00410 xhsp->SetMidline(middle_seq);
00411 xhsp->SetPositive(num_positives);
00412
00413
00414 xhsp_list.push_back(xhsp);
00415 }
00416 }
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426 static void
00427 s_SeqAlignToXMLHit(CRef<CHit>& hit, const CSeq_align& align_in, CScope* scope,
00428 const CBlastFormattingMatrix* matrix,
00429 const ncbi::TMaskedQueryRegions* mask_info,
00430 bool ungapped, int master_gentice_code, int slave_genetic_code)
00431 {
00432 _ASSERT(align_in.GetSegs().IsDisc());
00433 const CSeq_align_set& kAlignSet = align_in.GetSegs().GetDisc();
00434
00435
00436 if (kAlignSet.Get().empty())
00437 return;
00438
00439
00440 hit.Reset(new CHit());
00441
00442 const CSeq_id& kSeqId = kAlignSet.Get().front()->GetSeq_id(1);
00443
00444 try {
00445 const CBioseq_Handle& kSubjBioseqHandle = scope->GetBioseqHandle(kSeqId);
00446
00447
00448 list<int> use_this_gi;
00449 string seqid;
00450 string defline;
00451
00452
00453 CShowBlastDefline::GetBioseqHandleDeflineAndId(kSubjBioseqHandle,
00454 use_this_gi, seqid,
00455 defline, true);
00456 if (defline == NcbiEmptyString)
00457 defline = "No definition line";
00458
00459 hit->SetId(seqid);
00460 hit->SetDef(defline);
00461
00462
00463 CSeq_id_Handle idh =
00464 sequence::GetId(kSubjBioseqHandle, sequence::eGetId_Best);
00465 string accession = CAlignFormatUtil::GetLabel(idh.GetSeqId());
00466 hit->SetAccession(accession);
00467
00468 int length = sequence::GetLength(kSeqId, scope);
00469 hit->SetLen(length);
00470 } catch (const CException&) {
00471
00472
00473
00474 hit->SetId(kSeqId.AsFastaString());
00475 hit->SetDef("Unknown");
00476 hit->SetAccession("Unknown");
00477 hit->SetLen(0);
00478 };
00479
00480
00481
00482
00483
00484
00485 if (ungapped) {
00486 CRef<CSeq_align_set> expanded_align_set =
00487 CDisplaySeqalign::PrepareBlastUngappedSeqalign(kAlignSet);
00488
00489 s_SeqAlignSetToXMLHsps(hit->SetHsps(), *expanded_align_set, scope,
00490 matrix, mask_info, master_gentice_code, slave_genetic_code);
00491 } else {
00492 s_SeqAlignSetToXMLHsps(hit->SetHsps(), kAlignSet, scope, matrix,
00493 mask_info, master_gentice_code, slave_genetic_code);
00494 }
00495 }
00496
00497
00498
00499
00500 static const CSeq_id*
00501 s_GetSubjectId(const CSeq_align& align)
00502 {
00503 if (align.GetSegs().IsDenseg()) {
00504 return align.GetSegs().GetDenseg().GetIds()[1];
00505 } else if (align.GetSegs().IsDendiag()) {
00506 return align.GetSegs().GetDendiag().front()->GetIds()[1];
00507 } else if (align.GetSegs().IsStd()) {
00508 return align.GetSegs().GetStd().front()->GetIds()[1];
00509 }
00510
00511 return NULL;
00512 }
00513
00514
00515
00516
00517
00518
00519
00520
00521
00522 static void
00523 s_SeqAlignSetToXMLHits(list <CRef<CHit> >& hits, const CSeq_align_set& alnset,
00524 CScope* scope, const CBlastFormattingMatrix* matrix,
00525 const ncbi::TMaskedQueryRegions* mask_info,
00526 bool ungapped, int master_gentice_code, int slave_genetic_code,
00527 CNcbiOstream *out_stream)
00528 {
00529
00530 if (alnset.Get().empty())
00531 return;
00532
00533 CSeq_align_set::Tdata::const_iterator iter = alnset.Get().begin();
00534
00535 int index = 1;
00536 bool incremental_output = (bool)out_stream;
00537 while (iter != alnset.Get().end()) {
00538 CRef<CHit> new_hit;
00539
00540
00541
00542
00543
00544 if ((*iter)->GetSegs().IsDisc()) {
00545 s_SeqAlignToXMLHit(new_hit, *(*iter), scope, matrix, mask_info,
00546 ungapped, master_gentice_code, slave_genetic_code);
00547 ++iter;
00548 } else {
00549 CSeq_align_set one_subject_alnset;
00550 CConstRef<CSeq_id> current_id(s_GetSubjectId(*(*iter)));
00551 for ( ; iter != alnset.Get().end(); ++iter) {
00552 CConstRef<CSeq_id> next_id(s_GetSubjectId(*(*iter)));
00553 if (!current_id->Match(*next_id)) {
00554 break;
00555 }
00556 one_subject_alnset.Set().push_back(*iter);
00557 }
00558 CSeq_align disc_align_wrap;
00559 disc_align_wrap.SetSegs().SetDisc(one_subject_alnset);
00560 s_SeqAlignToXMLHit(new_hit, disc_align_wrap, scope, matrix,
00561 mask_info, ungapped, master_gentice_code, slave_genetic_code);
00562 }
00563
00564 if (new_hit) {
00565 new_hit->SetNum(index);
00566 ++index;
00567 if( !incremental_output ) hits.push_back(new_hit);
00568 else
00569 {
00570 CNcbiOstrstream one_hit_os;
00571 auto_ptr<CObjectOStreamXml> xml_one_hit_os (new CObjectOStreamXml (one_hit_os,false));
00572 xml_one_hit_os->SetEncoding(eEncoding_Ascii);
00573 xml_one_hit_os->SetReferenceDTD(false);
00574 xml_one_hit_os->Write( &(*new_hit), new_hit->GetThisTypeInfo() );
00575
00576 string out_str = string(CNcbiOstrstreamToString(one_hit_os));
00577 string::size_type start_xml_pos = out_str.find("<?xml");
00578 if( start_xml_pos != string::npos ) {
00579 string::size_type end_xml_pos = out_str.find_first_of("\n\r");
00580 out_str.erase(0,end_xml_pos+1);
00581 }
00582 *out_stream << out_str ;
00583 }
00584
00585 }
00586 }
00587 }
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601 static void
00602 s_BlastXMLAddIteration(CBlastOutput& bxmlout, const CSeq_align_set* alnset,
00603 const CSeq_loc& seqloc, CScope* scope,
00604 const CBlastFormattingMatrix* matrix,
00605 const ncbi::TMaskedQueryRegions* mask_info,
00606 int index, CStatistics& stat, bool is_ungapped,
00607 int master_gentice_code, int slave_genetic_code,
00608 const vector<string>& messages,
00609 CNcbiOstream *out_stream)
00610 {
00611 bool incremental_output = (bool) out_stream;
00612 list<CRef<CIteration> >& iterations = bxmlout.SetIterations();
00613
00614 CRef<CIteration> one_query_iter(new CIteration());
00615
00616 one_query_iter->SetIter_num(index + 1);
00617
00618 string query_def = NcbiEmptyString;
00619
00620
00621
00622 try {
00623 CBioseq_Handle bh = scope->GetBioseqHandle(seqloc);
00624
00625 const CBioseq& kQueryBioseq = *bh.GetBioseqCore();
00626 one_query_iter->SetQuery_ID(
00627 CBlastFormatUtil::GetSeqIdString(kQueryBioseq));
00628 query_def = sequence::GetTitle(bh);
00629 } catch (const CException&) {
00630 const CSeq_id& kSeqId = sequence::GetId(seqloc, scope);
00631 one_query_iter->SetQuery_ID(kSeqId.AsFastaString());
00632 };
00633
00634 if (query_def == NcbiEmptyString)
00635 query_def = "No definition line";
00636 one_query_iter->SetQuery_def(query_def);
00637
00638 one_query_iter->SetQuery_len(sequence::GetLength(seqloc, scope));
00639 one_query_iter->SetStat(stat);
00640 if (messages.size() > 0 && !messages[index].empty())
00641 one_query_iter->SetMessage(messages[index]);
00642
00643 string serial_xml_start, serial_xml_end;
00644 if( incremental_output ) {
00645
00646 bool split_res = s_SerializeAndSplitBy( *one_query_iter, "</Iteration_query-len>",
00647 serial_xml_start, serial_xml_end);
00648 *out_stream << serial_xml_start << "\n<Iteration_hits>\n";
00649 }
00650
00651
00652 if (alnset) {
00653 s_SeqAlignSetToXMLHits(one_query_iter->SetHits(), *alnset,
00654 scope, matrix, mask_info, is_ungapped,
00655 master_gentice_code, slave_genetic_code,
00656 out_stream);
00657 }
00658
00659 if( incremental_output ) *out_stream << "</Iteration_hits>" << serial_xml_end;
00660 else
00661 iterations.push_back(one_query_iter);
00662 }
00663
00664
00665
00666
00667
00668 static void
00669 s_SetBlastXMLParameters(CBlastOutput& bxmlout, const IBlastXMLReportData* data)
00670 {
00671 CParameters& params = bxmlout.SetParam();
00672 string matrix_name = data->GetMatrixName();
00673 if (matrix_name != NcbiEmptyString)
00674 params.SetMatrix(matrix_name);
00675 params.SetExpect(data->GetEvalueThreshold());
00676 params.SetGap_open(data->GetGapOpeningCost());
00677 params.SetGap_extend(data->GetGapExtensionCost());
00678
00679 int val;
00680 if ((val = data->GetMatchReward()) != 0)
00681 params.SetSc_match(val);
00682
00683 if ((val = data->GetMismatchPenalty()) != 0)
00684 params.SetSc_mismatch(val);
00685
00686 string str;
00687 if ((str = data->GetPHIPattern()) != NcbiEmptyString)
00688 params.SetPattern(str);
00689
00690 if ((str = data->GetFilterString()) != NcbiEmptyString)
00691 params.SetFilter(str);
00692 }
00693
00694
00695
00696
00697
00698 static void
00699 s_BlastXMLGetStatistics(vector<CRef<CStatistics> >& stat_vec,
00700 const IBlastXMLReportData* data)
00701 {
00702 int db_numseq = data->GetDbNumSeqs();
00703 Int8 db_length = data->GetDbLength();
00704
00705 for (unsigned int index = 0; index < data->GetNumQueries(); ++index) {
00706 CRef<CStatistics> stat(new CStatistics());
00707 stat->SetDb_num(db_numseq);
00708 stat->SetDb_len((int)db_length);
00709 stat->SetHsp_len(data->GetLengthAdjustment(index));
00710 stat->SetEff_space((double)data->GetEffectiveSearchSpace(index));
00711 stat->SetKappa(data->GetKappa(index));
00712 stat->SetLambda(data->GetLambda(index));
00713 stat->SetEntropy(data->GetEntropy(index));
00714 stat_vec.push_back(stat);
00715 }
00716 }
00717
00718
00719
00720
00721
00722 static CReference::EPublication
00723 s_GetBlastPublication(EProgram program)
00724 {
00725 CReference::EPublication publication = CReference::eMaxPublications;
00726
00727 switch (program) {
00728 case eMegablast:
00729 publication = CReference::eMegaBlast; break;
00730 case ePHIBlastp: case ePHIBlastn:
00731 publication = CReference::ePhiBlast; break;
00732 case ePSIBlast:
00733 publication = CReference::eCompBasedStats; break;
00734 default:
00735 publication = CReference::eGappedBlast; break;
00736 }
00737 return publication;
00738 }
00739
00740
00741
00742
00743
00744
00745 void
00746 BlastXML_FormatReport(CBlastOutput& bxmlout, const IBlastXMLReportData* data, CNcbiOstream *out_stream)
00747 {
00748 bool incremental_output = (bool)out_stream;
00749 string program_name = data->GetBlastProgramName();
00750 bxmlout.SetProgram(program_name);
00751 bxmlout.SetVersion(CBlastFormatUtil::BlastGetVersion(program_name));
00752 EProgram blast_task = data->GetBlastTask();
00753 bxmlout.SetReference(CReference::GetString(s_GetBlastPublication(blast_task)));
00754 bxmlout.SetDb(data->GetDatabaseName());
00755
00756 const CSeq_loc* kSeqLoc = data->GetQuery(0);
00757 if (!kSeqLoc)
00758 NCBI_THROW(CException, eUnknown, "Query Seq-loc is not available");
00759
00760 CRef<CScope> scope(data->GetScope(0));
00761
00762 string query_def = NcbiEmptyString;
00763
00764
00765
00766 try {
00767 CBioseq_Handle bh = scope->GetBioseqHandle(*kSeqLoc);
00768
00769 const CBioseq& kQueryBioseq = *bh.GetBioseqCore();
00770 bxmlout.SetQuery_ID(CBlastFormatUtil::GetSeqIdString(kQueryBioseq));
00771 query_def = sequence::GetTitle(bh);
00772 } catch (const CException&) {
00773 const CSeq_id& seqid = sequence::GetId(*kSeqLoc, scope);
00774 bxmlout.SetQuery_ID(seqid.AsFastaString());
00775 };
00776
00777 if (query_def == NcbiEmptyString)
00778 query_def = "No definition line";
00779
00780 bxmlout.SetQuery_def(query_def);
00781
00782 bxmlout.SetQuery_len(sequence::GetLength(*kSeqLoc, scope));
00783
00784 s_SetBlastXMLParameters(bxmlout, data);
00785
00786 auto_ptr< CBlastFormattingMatrix > matrix(data->GetMatrix());
00787
00788 vector<CRef<CStatistics> > stat_vec;
00789 s_BlastXMLGetStatistics(stat_vec, data);
00790
00791 string serial_xml_start, serial_xml_end;
00792 if( incremental_output ) {
00793 bool add_dtd_reference = true, add_xml_version = true;
00794 bool dummy_res = s_SerializeAndSplitBy( bxmlout, "</BlastOutput_param>",
00795 serial_xml_start, serial_xml_end,
00796 add_dtd_reference, add_xml_version );
00797
00798 *out_stream << serial_xml_start << "\n<BlastOutput_iterations>" ;
00799 }
00800
00801 for (unsigned int index = 0; index < data->GetNumQueries(); ++index) {
00802
00803 const CSeq_loc* seqloc = data->GetQuery(index);
00804 if (!seqloc) {
00805 string message =
00806 "Unable to retrieve query " + NStr::IntToString(index);
00807 NCBI_THROW(CException, eUnknown, message);
00808 }
00809 s_BlastXMLAddIteration(bxmlout, data->GetAlignment(index), *seqloc,
00810 data->GetScope(index), matrix.get(),
00811 data->GetMaskLocations(index), index,
00812 *stat_vec[index], !data->GetGappedMode(),
00813 data->GetMasterGeneticCode(), data->GetSlaveGeneticCode(),
00814 data->GetMessages(),
00815 out_stream);
00816 }
00817 if(incremental_output) *out_stream << "\n</BlastOutput_iterations>" << serial_xml_end << endl;
00818 }
00819
00820
00821
00822
00823 static bool s_SerializeAndSplitBy(const CSerialObject &object,
00824 const char *tag,
00825 string &start_part,
00826 string &end_part,
00827 bool add_reference_dtd,
00828 bool add_xml_version )
00829 {
00830 bool res_code = false;
00831 TTypeInfo typeInfo = object.GetThisTypeInfo();
00832 string breake_by_tag = tag;
00833 start_part="<NOT SET>";
00834 end_part="</NOT SET>";
00835 CNcbiOstrstream one_iter_ss_os;
00836 {
00837 auto_ptr<CObjectOStreamXml> xml_one_iter_os(new CObjectOStreamXml (one_iter_ss_os,false));
00838 xml_one_iter_os->SetEncoding(eEncoding_Ascii);
00839 xml_one_iter_os->SetVerifyData( eSerialVerifyData_No );
00840 xml_one_iter_os->SetReferenceDTD(add_reference_dtd);
00841 if( add_xml_version )
00842 xml_one_iter_os->Write(&object, typeInfo );
00843 else
00844 xml_one_iter_os->WriteObject(&object, typeInfo );
00845 }
00846 string out_str = string(CNcbiOstrstreamToString(one_iter_ss_os));
00847 string::size_type iterations_insert_point = out_str.find( breake_by_tag );
00848 if( iterations_insert_point != string::npos ){
00849 iterations_insert_point += breake_by_tag.length();
00850 start_part = out_str.substr(0,iterations_insert_point);
00851 end_part = out_str.substr(iterations_insert_point);
00852 res_code = true;
00853 }
00854 else {
00855 start_part = out_str;
00856 }
00857 return res_code;
00858 }
00859
00860 END_NCBI_SCOPE
00861
00862