NCBI C++ ToolKit
cuHitsDistributor.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuHitsDistributor.cpp 58284 2013-05-28 18:34:25Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Charlie Liu
27  *
28  * File Description:
29  *
30  * Distribute PSI-BLAST hits among CDs
31  *
32  * ===========================================================================
33  */
34 
35 #include <ncbi_pch.hpp>
38 
40 BEGIN_SCOPE(cd_utils)
41 
43 {
44  const CSeq_id& seqId = seqAlign->GetSeq_id(1);
45  gi = ZERO_GI;
46  if (seqId.IsGi())
47  gi = seqId.GetGi();
48  from = seqAlign->GetSeqStart(1);
49  to = seqAlign->GetSeqStop(1);
50 }
51 
53 {
54 
55 }
56 
58 {
59 }
60 
62 {
63  if (!seqAlignSet.Empty())
64  {
65  m_batches.push_back(seqAlignSet);
66  if (seqAlignSet->IsSet())
67  {
68  list< CRef< CSeq_align > >& seqAlignList = seqAlignSet->Set();
69  list< CRef< CSeq_align > >::iterator lit = seqAlignList.begin();
70  //cdLog::Printf(0, "This batch has %d hits.\n", seqAlignList.size());
71  for(; lit != seqAlignList.end(); lit++)
72  {
73  GiFootPrint gfp(*lit);
74  if (gfp.gi > ZERO_GI)
75  m_hitTable[gfp].push_back(&(*lit));
76  else
77  ERR_POST("A SeqAlign without a GI is detected."); //should not happen normally for BLAST hits
78  }
79  }
80  else
81  ERR_POST("No hit for this Blast.");
82  }
83  else
84  ERR_POST("No hit for this Blast.");
85 }
86 
88 {
90  for (; fit != m_hitTable.end(); fit++)
91  {
92  vector< CRef< CSeq_align >* >& hits = fit->second;
93  if (hits.size() <= 1)
94  continue;
95  CRef< CSeq_align >* seqAlignRef = (hits[0]);
96  double min_evalue, evalue;
97  int min_id = 0;
98  if (!(*seqAlignRef)->GetNamedScore("e_value", min_evalue))
99  {
100  ERR_POST("Can't get evalue from SeqAlign. Something is wrong");
101  continue;
102  }
103  for (int i = 1; i < (int) hits.size(); i++)
104  {
105  seqAlignRef = hits[i];
106  if (!(*seqAlignRef)->GetNamedScore("e_value", evalue))
107  {
108  ERR_POST("Can't get evalue from SeqAlign. Something is wrong");
109  continue;
110  }
111  if (evalue < min_evalue)
112  {
113  min_evalue = evalue;
114  min_id = i;
115  }
116  }
117  for (int i = 0; i < (int) hits.size(); i++)
118  {
119  seqAlignRef = hits[i];
120  if (i != min_id)
121  {
122  /*
123  cdLog::Printf(0, "Remove (%d:%d-%d)from %s\n", fit->first.gi, fit->first.from,
124  fit->first.to, (*seqAlignRef)->GetSeq_id(0).AsFastaString().c_str());*/
125  seqAlignRef->Reset();
126  }
127  /*
128  else
129  cdLog::Printf(0, "Keep (%d:%d-%d)for %s\n", fit->first.gi, fit->first.from,
130  fit->first.to, (*seqAlignRef)->GetSeq_id(0).AsFastaString().c_str());*/
131  }
132  }
133  //remove all Empty CRef from m_batches
134  for (unsigned int b =0; b < m_batches.size(); b++)
135  {
136  list< CRef< CSeq_align > >& seqAlignList = m_batches[b]->Set();
137  list< CRef< CSeq_align > >::iterator lit = seqAlignList.begin();
138  int num = 0;
139  while(lit != seqAlignList.end())
140  {
141  if ( lit->Empty() )
142  {
143  lit = seqAlignList.erase(lit);
144  num++;
145  }
146  else
147  lit++;
148  }
149  //ERR_POST("Number of hitremoved from this batch. It now has %d hits.\n",num, seqAlignList.size());
150  }
151 }
152 
153 void HitDistributor::dump(string filename)
154 {
155  CNcbiOfstream outStream(filename.c_str(), IOS_BASE::out | IOS_BASE::binary);
156  string err;
157  if (!outStream) {
158  err = "Cannot open file for writing";
159  return;
160  }
162  for (; fit != m_hitTable.end(); fit++)
163  {
164  vector< CRef< CSeq_align >* >& hits = fit->second;
165  const GiFootPrint& gfp = fit->first;
166  outStream<<"GI-Footprint"<<gfp.gi<<':'<<gfp.from<<'-'<<gfp.to<<endl;
167  for (unsigned int i = 0; i < hits.size(); i++)
168  {
169  if (!WriteASNToStream(outStream, **hits[i], false,&err))
170  LOG_POST("Failed to write to "<<filename<<" because of "<<err);
171  }
172  }
173 }
174 
175 END_SCOPE(cd_utils)
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
void dump(string filename)
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:73
std::ofstream out("events_result.xml")
main entry point for tests
virtual void Reset(void)
Reset the whole object.
Definition: Seq_align_.cpp:332
Tdata & Set(void)
Assign a value to data member.
int i
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
const_iterator end() const
Definition: map.hpp:152
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:185
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:702
bool IsSet(void) const
Check if a value has been assigned to data member.
#define ZERO_GI
Definition: ncbimisc.hpp:1119
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:198
static bool WriteASNToStream(ncbi::CNcbiOstream &os, const ASNClass &ASNobject, bool isBinary, std::string *err, ncbi::EFixNonPrint fixNonPrint=ncbi::eFNP_Default)
FootprintToHitMap m_hitTable
CRef –.
Definition: ncbiobj.hpp:616
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
vector< CRef< CSeq_align_set > > m_batches
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:70
const_iterator begin() const
Definition: map.hpp:151
void addBatch(CRef< CSeq_align_set > seqAlignSet)
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:326
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
unsigned int
Definition: types.hpp:1153
Modified on Wed Sep 28 19:26:56 2016 by modify_doxy.py rev. 506947