NCBI C++ ToolKit
pairwise_aln_app.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

00001 /*  $Id: pairwise_aln_app.cpp 33815 2007-05-04 17:18:18Z kazimird $
00002 * ===========================================================================
00003 *
00004 *                            PUBLIC DOMAIN NOTICE
00005 *               National Center for Biotechnology Information
00006 *
00007 *  This software/database is a "United States Government Work" under the
00008 *  terms of the United States Copyright Act.  It was written as part of
00009 *  the author's official duties as a United States Government employee and
00010 *  thus cannot be copyrighted.  This software/database is freely available
00011 *  to the public for use. The National Library of Medicine and the U.S.
00012 *  Government have not placed any restriction on its use or reproduction.
00013 *
00014 *  Although all reasonable efforts have been taken to ensure the accuracy
00015 *  and reliability of the software and data, the NLM and the U.S.
00016 *  Government do not and cannot warrant the performance or results that
00017 *  may be obtained by using this software or data. The NLM and the U.S.
00018 *  Government disclaim all warranties, express or implied, including
00019 *  warranties of performance, merchantability or fitness for any particular
00020 *  purpose.
00021 *
00022 *  Please cite the author in any work or product based on this material.
00023 *
00024 * ===========================================================================
00025 *
00026 * Author:  Kamen Todorov
00027 *
00028 * File Description:
00029 *   Demo of extracting a pairwise alignment from a file with Seq-align(s).
00030 *
00031 * ===========================================================================
00032 */
00033 
00034 #include <ncbi_pch.hpp>
00035 #include <corelib/ncbistd.hpp>
00036 #include <corelib/ncbiapp.hpp>
00037 #include <corelib/ncbienv.hpp>
00038 #include <corelib/ncbiargs.hpp>
00039 #include <connect/ncbi_core_cxx.hpp>
00040 
00041 #include <common/test_assert.h>
00042 
00043 #include <serial/objistr.hpp>
00044 #include <serial/iterator.hpp>
00045 
00046 /// Obj Manager
00047 #include <objmgr/object_manager.hpp>
00048 #include <objmgr/scope.hpp>
00049 #include <objtools/data_loaders/genbank/gbloader.hpp>
00050 
00051 /// Aln Manager
00052 #include <objtools/alnmgr/aln_asn_reader.hpp>
00053 #include <objtools/alnmgr/pairwise_aln.hpp>
00054 #include <objtools/alnmgr/aln_container.hpp>
00055 #include <objtools/alnmgr/aln_tests.hpp>
00056 #include <objtools/alnmgr/aln_stats.hpp>
00057 
00058 
00059 using namespace ncbi;
00060 using namespace objects;
00061 
00062 
00063 /// Types we use here:
00064 // typedef CSeq_align::TDim TDim;
00065 // typedef vector<const CSeq_align*> TAlnVector;
00066 // typedef const CSeq_id* TSeqIdPtr;
00067 // typedef vector<TSeqIdPtr> TSeqIdVector;
00068 // typedef SCompareOrdered<TSeqIdPtr> TComp;
00069 // typedef CAlnSeqIdVector<TAlnVector, TComp> TAlnSeqIdVector;
00070 // typedef CSeqIdAlnBitmap<TAlnSeqIdVector> TSeqIdAlnBitmap;
00071 // typedef CAlnStats<TAlnVector, TSeqIdVector, TAlnSeqIdVector> TAlnStats;
00072 // typedef TAlnStats::TBaseWidths TBaseWidths;
00073 // typedef TAlnStats::TAnchorRows TAnchorRows;
00074 
00075 
00076 class CPairwiseAlnApp : public CNcbiApplication
00077 {
00078 public:
00079     virtual void Init         (void);
00080     virtual int  Run          (void);
00081     CScope&      GetScope     (void) const;
00082     void         LoadInputAlns(void);
00083     bool         InsertAln    (const CSeq_align* aln) {
00084         aln->Validate(true);
00085         m_AlnContainer.insert(*aln);
00086         return true;
00087     }
00088 
00089 private:
00090     mutable CRef<CObjectManager> m_ObjMgr;
00091     mutable CRef<CScope>         m_Scope;
00092     CAlnContainer                m_AlnContainer;
00093     int                          m_QueryRow;
00094     int                          m_SubjectRow;
00095 };
00096 
00097 
00098 void CPairwiseAlnApp::Init(void)
00099 {
00100     auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
00101 
00102     arg_desc->AddDefaultKey
00103         ("in", "InputFileName",
00104          "Name of file to read from (standard input by default)",
00105          CArgDescriptions::eInputFile, "-");
00106 
00107     arg_desc->AddDefaultKey
00108         ("b", "bin_obj_type",
00109          "This forces the input file to be read in binary ASN.1 mode\n"
00110          "and specifies the type of the top-level ASN.1 object.\n",
00111          CArgDescriptions::eString, "");
00112 
00113     arg_desc->AddDefaultKey
00114         ("q", "QueryRow",
00115          "Query (anchor) row (zero-based)",
00116          CArgDescriptions::eInteger, "0");
00117 
00118     arg_desc->AddDefaultKey
00119         ("s", "SubjectRow",
00120          "Subject row (zero-based)",
00121          CArgDescriptions::eInteger, "1");
00122 
00123     // Program description
00124     string prog_description = "Alignment build application.\n";
00125     arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
00126                               prog_description, false);
00127 
00128     SetupArgDescriptions(arg_desc.release());
00129 }
00130 
00131 
00132 void CPairwiseAlnApp::LoadInputAlns(void)
00133 {
00134     const CArgs& args = GetArgs();
00135     string sname = args["in"].AsString();
00136     
00137     /// get the asn type of the top-level object
00138     string asn_type = args["b"].AsString();
00139     bool binary = !asn_type.empty();
00140     auto_ptr<CObjectIStream> in
00141         (CObjectIStream::Open(binary?eSerial_AsnBinary:eSerial_AsnText, sname));
00142     
00143     CAlnAsnReader reader;
00144     reader.Read(in.get(),
00145                 bind1st(mem_fun(&CPairwiseAlnApp::InsertAln), this),
00146                 asn_type);
00147 }
00148 
00149 
00150 CScope& CPairwiseAlnApp::GetScope(void) const
00151 {
00152     if (!m_Scope) {
00153         m_ObjMgr = CObjectManager::GetInstance();
00154         CGBDataLoader::RegisterInObjectManager(*m_ObjMgr);
00155         
00156         m_Scope = new CScope(*m_ObjMgr);
00157         m_Scope->AddDefaults();
00158     }
00159     return *m_Scope;
00160 }
00161 
00162 
00163 int CPairwiseAlnApp::Run(void)
00164 {
00165     // Setup application registry, error log, and MT-lock for CONNECT library
00166     CONNECT_Init(&GetConfig());
00167 
00168     m_QueryRow = GetArgs()["q"].AsInteger();
00169     m_SubjectRow = GetArgs()["s"].AsInteger();
00170 
00171     LoadInputAlns();
00172 
00173 //     /// Create a vector of alignments
00174 //     TAlnVector aln_vector(m_AlnContainer.size());
00175 //     aln_vector.assign(m_AlnContainer.begin(), m_AlnContainer.end());
00176 
00177 
00178 //     /// Create a comparison functor
00179 //     TComp comp;
00180 
00181 
00182 //     /// Create a vector of seq-ids per seq-align
00183 //     TAlnSeqIdVector aln_seq_id_vector(aln_vector, comp);
00184 
00185 
00186 //     /// Create an alignment bitmap to obtain statistics.
00187 //     TSeqIdAlnBitmap id_aln_bitmap(aln_seq_id_vector, GetScope());
00188 //     id_aln_bitmap.Dump(cout);
00189 
00190 //     /// Determine anchor row for each alignment
00191 //     TBaseWidths base_widths;
00192 //     bool translated = id_aln_bitmap.GetTranslatedAlnCount();
00193 //     if (translated) {
00194 //         base_widths.resize(id_aln_bitmap.GetAlnCount());
00195 //         for (size_t aln_idx = 0;  aln_idx < aln_seq_id_vector.size();  ++aln_idx) {
00196 //             const TSeqIdVector& ids = aln_seq_id_vector[aln_idx];
00197 //             base_widths[aln_idx].resize(ids.size());
00198 //             for (size_t row = 0; row < ids.size(); ++row)   {
00199 //                 CBioseq_Handle bioseq_handle = m_Scope->GetBioseqHandle(*ids[row]);
00200 //                 if (bioseq_handle.IsProtein()) {
00201 //                     base_widths[aln_idx][row] = 3;
00202 //                 } else if (bioseq_handle.IsNucleotide()) {
00203 //                     base_widths[aln_idx][row] = 1;
00204 //                 } else {
00205 //                     string err_str =
00206 //                         string("Cannot determine molecule type for seq-id: ")
00207 //                         + ids[row]->AsFastaString();
00208 //                     NCBI_THROW(CSeqalignException, eInvalidSeqId, err_str);
00209 //                 }
00210 //             }
00211 //         }
00212 //     }
00213 
00214 
00215 //     /// Determine anchor rows;
00216 //     TAnchorRows anchor_rows;
00217 //     bool anchored = id_aln_bitmap.IsQueryAnchored();
00218 //     if (anchored) {
00219 //         TSeqIdPtr anchor_id = id_aln_bitmap.GetAnchorHandle().GetSeqId();
00220 //         anchor_rows.resize(id_aln_bitmap.GetAlnCount(), -1);
00221 //         for (size_t aln_idx = 0;  aln_idx < anchor_rows.size();  ++aln_idx) {
00222 //             const TSeqIdVector& ids = aln_seq_id_vector[aln_idx];
00223 //             for (size_t row = 0; row < ids.size(); ++row)   {
00224 //                 if ( !(comp(ids[row], anchor_id) ||
00225 //                        comp(anchor_id, ids[row])) ) {
00226 //                     anchor_rows[aln_idx] = row;
00227 //                     break;
00228 //                 }
00229 //             }
00230 //             _ASSERT(anchor_rows[aln_idx] >= 0);
00231 //         }
00232 //     }
00233 
00234 
00235 //     /// Store all retrieved statistics in the aln hints
00236 //     TAlnStats aln_stats(aln_vector,
00237 //                         aln_seq_id_vector,
00238 //                         anchored ? &anchor_rows : 0,
00239 //                         translated ? &base_widths : 0);
00240 //     aln_stats.Dump(cout);
00241 
00242 
00243 //     /// Construct pairwise alignmenst based on the aln hints
00244 //     for (size_t aln_idx = 0;  
00245 //          aln_idx < aln_stats.GetAlnCount();
00246 //          ++aln_idx) {
00247 
00248 //         CPairwiseAln 
00249 //             pairwise_aln(*aln_stats.GetAlnVector()[aln_idx],
00250 //                          m_QueryRow,
00251 //                          m_SubjectRow,
00252 //                          aln_stats.GetBaseWidthForAlnRow(aln_idx, m_QueryRow),
00253 //                          aln_stats.GetBaseWidthForAlnRow(aln_idx, m_SubjectRow));
00254 
00255 //         pairwise_aln.Dump(cout);
00256 //     }
00257 //     cout << endl;
00258 
00259 
00260     return 0;
00261 }
00262 
00263 
00264 int main(int argc, const char* argv[])
00265 {
00266     return CPairwiseAlnApp().AppMain(argc, argv, 0, eDS_Default, 0);
00267 }
Modified on Thu Nov 20 15:23:50 2014 by modify_doxy.py rev. 426318