src/app/oligofar/caligntest.cpp

Go to the documentation of this file.
00001 #include <ncbi_pch.hpp>
00002 #include "caligntest.hpp"
00003 #include "aligners.hpp"
00004 
00005 #include "cprogressindicator.hpp"
00006 #include "ialigner.hpp"
00007 #include "string-util.hpp"
00008 
00009 #include <iostream>
00010 #include <iomanip>
00011 #include <fstream>
00012 #include <sstream>
00013 
00014 #ifndef _WIN32
00015 #include <sys/resource.h>
00016 #else
00017 #define strtoll( a, b, c ) strtoui64( a, b, c )
00018 #endif
00019 
00020 USING_OLIGOFAR_SCOPES;
00021 
00022 CAlignTest::CAlignTest( int argc, char ** argv ) : 
00023     CApp( argc, argv ),
00024     m_algo( eAlignment_fast ),
00025     m_flags( 0 ),
00026     m_offsetQuery( 0 ),
00027     m_offsetSubject( 0 ),
00028     m_xDropoff( 2 ),
00029     m_identityScore( 1 ),
00030     m_mismatchScore( -1 ),
00031     m_gapOpeningScore( -3 ),
00032     m_gapExtentionScore( -1.5 )
00033 {}
00034                                                    
00035 void CAlignTest::Help( const char * arg )
00036 {
00037     cout << "usage: [-hV] [-r h|s|f] [-c +|-] [-q rc] [-s rc] [-R +|-] [-Q qoffset] [-S soffset] query subj\n"
00038         << "where:\n"
00039         << "  --algorithm=h|s|f       -r h|s|f      algorithm to use (HSP, Smith-Waterman, Fast) [" << char( m_algo ) << "]\n"
00040         << "  --colorspace=yes|no     -c yes|no     align in colorspace [" << (m_flags & fAlign_colorspace ? "yes" : "no") << "]\n"
00041         << "  --query-strand=+|-      -q +|-        use query strand [" << (m_flags & fQuery_reverse ? "-":"+" ) << "]\n"
00042         << "  --subject-strand=+|-    -s +|-        use subject strand [" << (m_flags & fSubject_reverse ? "-":"+" ) << "]\n"
00043         << "  --query-offset=off      -Q off        use position off on query as seeding position [" << m_offsetQuery << "]\n"
00044         << "  --subject-offset=off    -S off        use position off on subject as seeding position [" << m_offsetQuery << "]\n"
00045         << "  --score-identity=val    -I val        identity score [" << m_identityScore << "]\n"
00046         << "  --score-mismatch=val    -M val        mismatch score [" << m_mismatchScore << "]\n"
00047         << "  --score-gap-opening=val -G val        gap opening score [" << m_gapOpeningScore << "]\n"
00048         << "  --score-gap-extention=v -g val        gap extention score [" << m_gapExtentionScore << "]\n"
00049         << "  --x-dropoff=val         -X val        maximal gap width for -rs [" << m_xDropoff << "]\n"
00050         ;
00051 }
00052 
00053 const option * CAlignTest::GetLongOptions() const
00054 {
00055     static struct option opt[] = {
00056         {"help", 0, 0, 'h'},
00057         {"version", 0, 0, 'V'},
00058         {"algorithm", 1, 0, 'r'},
00059         {"colorspace", 1, 0, 'c'},
00060         {"query-strand", 1, 0, 'q'},
00061         {"subject-strand", 1, 0, 's'},
00062         {"query-offset", 1, 0, 'Q'},
00063         {"subject-offset", 1, 0, 'S'},
00064         {"score-identity", 1, 0, 'I'},
00065         {"score-mismatch", 1, 0, 'M'},
00066         {"score-gap-opening", 1, 0, 'G'},
00067         {"score-gap-extention", 1, 0, 'g'},
00068         {"x-dropoff", 1, 0, 'X'},
00069         {0,0,0,0}
00070     };
00071     return opt;
00072 }
00073 
00074 const char * CAlignTest::GetOptString() const
00075 {
00076     return "hVr:c:q:s:Q:S:I:M:G:g:X:";
00077 }
00078 
00079 int CAlignTest::ParseArg( int opt, const char * arg, int longindex )
00080 {
00081     switch( opt ) {
00082     case 'r': m_algo = *arg == 'h' ? eAlignment_HSP : *arg == 's' ? eAlignment_SW : *arg == 'f' ? eAlignment_fast : ((cerr << "Warning: unknown alignment algorithm " << arg << endl), m_algo ); break;
00083     case 'c': SetFlags( fAlign_colorspace, arg ); break;
00084     case 'q': SetFlags( fQuery_reverse, arg ); break;
00085     case 's': SetFlags( fSubject_reverse, arg ); break;
00086     case 'Q': m_offsetQuery = Convert( arg ); break;
00087     case 'S': m_offsetSubject = Convert( arg ); break;
00088     case 'X': m_xDropoff = Convert( arg ); break;
00089     case 'I': m_identityScore = Convert( arg ); break;
00090     case 'M': m_mismatchScore = Convert( arg ); break;
00091     case 'G': m_gapOpeningScore = Convert( arg ); break;
00092     case 'g': m_gapExtentionScore = Convert( arg ); break;
00093     default: return CApp::ParseArg( opt, arg, longindex );
00094     }
00095     return 0;
00096 }
00097 
00098 int CAlignTest::Execute()
00099 {
00100     CScoreTbl scoreTbl( m_identityScore, m_mismatchScore, m_gapOpeningScore, m_gapExtentionScore );
00101     auto_ptr<IAligner> aligner( CreateAligner( m_algo, &scoreTbl ) );
00102     CAlignerBase::SetPrintDebug( true );
00103 
00104     if( m_flags & fAlign_colorspace )
00105         THROW( logic_error, "Colorspace option is not immplemented" );
00106 
00107     if( GetArgIndex() + 2 > GetArgCount() )
00108         THROW( runtime_error, "Two sequences are required!" );
00109 
00110     if( m_offsetQuery < 0 || m_offsetSubject < 0 )
00111         THROW( runtime_error, "Offset should never be negative" );
00112 
00113     const char * qs = GetArg( GetArgIndex() );
00114     const char * ss = GetArg( GetArgIndex() + 1 );
00115     int ql = strlen( qs );
00116     int sl = strlen( ss );
00117 
00118     cerr << DISPLAY( qs ) << DISPLAY( ql ) << endl;
00119     cerr << DISPLAY( ss ) << DISPLAY( sl ) << endl;
00120 
00121     if( ql == 0 || sl == 0 ) 
00122         THROW( runtime_error, "Sequences should have at least one base!" );
00123 
00124     vector<char> query;
00125     vector<char> subject;
00126     query.reserve( ql );
00127     subject.reserve( sl );
00128 
00129     while( *qs ) query.push_back( CNcbi8naBase( CIupacnaBase( *qs++ ) ) );
00130     while( *ss ) subject.push_back( CNcbi8naBase( CIupacnaBase( *ss++ ) ) );
00131 
00132     qs = &query[0];
00133     ss = &subject[0];
00134 
00135     qs += m_offsetQuery; ql -= m_offsetQuery;
00136     ss += m_offsetSubject; sl -= m_offsetSubject;
00137 
00138     if( ql <= 0 || sl <= 0 ) 
00139         THROW( runtime_error, "Offset is too big" );
00140 
00141     if( GetFlags( fQuery_reverse ) ) { qs = qs + ql; ql = -ql; }
00142     if( GetFlags( fSubject_reverse ) ) { ss = ss + sl; sl = -sl; }
00143 
00144     int aflags = CAlignerBase::fComputePicture | CAlignerBase::fComputeScore | CAlignerBase::fPictureSubjectStrand;
00145 
00146     aligner->SetBestPossibleQueryScore( min( ql, sl ) * m_identityScore );
00147     aligner->Align( CSeqCoding::eCoding_ncbi8na, qs, ql, CSeqCoding::eCoding_ncbi8na, ss, sl, aflags );
00148 
00149     const CAlignerBase& abase = aligner->GetAlignerBase();
00150 
00151     cout << ( GetFlags( fQuery_reverse ) ? 3 : 5 ) << "'=" << abase.GetQueryString() << "=" << ( GetFlags( fQuery_reverse ) ? 5 : 3 ) << "'\n";
00152     cout << "   " << abase.GetAlignmentString() << "  "
00153         << "  i=" << abase.GetIdentityCount() 
00154         << ", m=" << abase.GetMismatchCount() 
00155         << ", g=" << abase.GetIndelCount() 
00156         << ", s=" << abase.GetRawScore() << "/" << abase.GetBestQueryScore() << "=" << abase.GetScore() << "%\n";
00157     cout << "5'=" << abase.GetSubjectString() << "=3'\n";
00158 
00159     return 0;
00160 }
00161 
00162 IAligner * CAlignTest::NewAligner( EAlignmentAlgo algo, int xdropoff ) const
00163 {
00164 //    if( xdropoff == 0 ) return new CAligner_HSP();
00165     switch( algo ) {
00166     case eAlignment_fast: return new CAligner_fast();
00167     case eAlignment_HSP: return new CAligner_HSP();
00168     case eAlignment_SW:
00169         do {
00170             auto_ptr<CAligner_SW> swalign( new CAligner_SW() );
00171             swalign->SetMatrix().resize( 2*xdropoff + 1 );
00172             return swalign.release();
00173         } while(0); break;
00174     default: THROW( logic_error, "Bad value for m_algo" );
00175     }
00176 }
00177 
00178 IAligner * CAlignTest::CreateAligner( EAlignmentAlgo algo, CScoreTbl * scoreTbl ) const 
00179 {
00180     auto_ptr<IAligner> aligner( NewAligner( algo, m_xDropoff ) );
00181     ASSERT( aligner.get() );
00182     aligner->SetScoreTbl( *scoreTbl );
00183     return aligner.release();
00184 }
00185 
00186 

Generated on Wed Dec 9 04:09:53 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Wed Dec 09 08:17:54 2009 by modify_doxy.py rev. 173732