00001 #include <ncbi_pch.hpp>
00002 #include "caligntest.hpp"
00003 #include "aligners.hpp"
00004
00005 #include "cprogressindicator.hpp"
00006 #include "ialigner.hpp"
00007 #include "string-util.hpp"
00008
00009 #include <iostream>
00010 #include <iomanip>
00011 #include <fstream>
00012 #include <sstream>
00013
00014 #ifndef _WIN32
00015 #include <sys/resource.h>
00016 #else
00017 #define strtoll( a, b, c ) strtoui64( a, b, c )
00018 #endif
00019
00020 USING_OLIGOFAR_SCOPES;
00021
00022 CAlignTest::CAlignTest( int argc, char ** argv ) :
00023 CApp( argc, argv ),
00024 m_algo( eAlignment_fast ),
00025 m_flags( 0 ),
00026 m_offsetQuery( 0 ),
00027 m_offsetSubject( 0 ),
00028 m_xDropoff( 2 ),
00029 m_identityScore( 1 ),
00030 m_mismatchScore( -1 ),
00031 m_gapOpeningScore( -3 ),
00032 m_gapExtentionScore( -1.5 )
00033 {}
00034
00035 void CAlignTest::Help( const char * arg )
00036 {
00037 cout << "usage: [-hV] [-r h|s|f] [-c +|-] [-q rc] [-s rc] [-R +|-] [-Q qoffset] [-S soffset] query subj\n"
00038 << "where:\n"
00039 << " --algorithm=h|s|f -r h|s|f algorithm to use (HSP, Smith-Waterman, Fast) [" << char( m_algo ) << "]\n"
00040 << " --colorspace=yes|no -c yes|no align in colorspace [" << (m_flags & fAlign_colorspace ? "yes" : "no") << "]\n"
00041 << " --query-strand=+|- -q +|- use query strand [" << (m_flags & fQuery_reverse ? "-":"+" ) << "]\n"
00042 << " --subject-strand=+|- -s +|- use subject strand [" << (m_flags & fSubject_reverse ? "-":"+" ) << "]\n"
00043 << " --query-offset=off -Q off use position off on query as seeding position [" << m_offsetQuery << "]\n"
00044 << " --subject-offset=off -S off use position off on subject as seeding position [" << m_offsetQuery << "]\n"
00045 << " --score-identity=val -I val identity score [" << m_identityScore << "]\n"
00046 << " --score-mismatch=val -M val mismatch score [" << m_mismatchScore << "]\n"
00047 << " --score-gap-opening=val -G val gap opening score [" << m_gapOpeningScore << "]\n"
00048 << " --score-gap-extention=v -g val gap extention score [" << m_gapExtentionScore << "]\n"
00049 << " --x-dropoff=val -X val maximal gap width for -rs [" << m_xDropoff << "]\n"
00050 ;
00051 }
00052
00053 const option * CAlignTest::GetLongOptions() const
00054 {
00055 static struct option opt[] = {
00056 {"help", 0, 0, 'h'},
00057 {"version", 0, 0, 'V'},
00058 {"algorithm", 1, 0, 'r'},
00059 {"colorspace", 1, 0, 'c'},
00060 {"query-strand", 1, 0, 'q'},
00061 {"subject-strand", 1, 0, 's'},
00062 {"query-offset", 1, 0, 'Q'},
00063 {"subject-offset", 1, 0, 'S'},
00064 {"score-identity", 1, 0, 'I'},
00065 {"score-mismatch", 1, 0, 'M'},
00066 {"score-gap-opening", 1, 0, 'G'},
00067 {"score-gap-extention", 1, 0, 'g'},
00068 {"x-dropoff", 1, 0, 'X'},
00069 {0,0,0,0}
00070 };
00071 return opt;
00072 }
00073
00074 const char * CAlignTest::GetOptString() const
00075 {
00076 return "hVr:c:q:s:Q:S:I:M:G:g:X:";
00077 }
00078
00079 int CAlignTest::ParseArg( int opt, const char * arg, int longindex )
00080 {
00081 switch( opt ) {
00082 case 'r': m_algo = *arg == 'h' ? eAlignment_HSP : *arg == 's' ? eAlignment_SW : *arg == 'f' ? eAlignment_fast : ((cerr << "Warning: unknown alignment algorithm " << arg << endl), m_algo ); break;
00083 case 'c': SetFlags( fAlign_colorspace, arg ); break;
00084 case 'q': SetFlags( fQuery_reverse, arg ); break;
00085 case 's': SetFlags( fSubject_reverse, arg ); break;
00086 case 'Q': m_offsetQuery = Convert( arg ); break;
00087 case 'S': m_offsetSubject = Convert( arg ); break;
00088 case 'X': m_xDropoff = Convert( arg ); break;
00089 case 'I': m_identityScore = Convert( arg ); break;
00090 case 'M': m_mismatchScore = Convert( arg ); break;
00091 case 'G': m_gapOpeningScore = Convert( arg ); break;
00092 case 'g': m_gapExtentionScore = Convert( arg ); break;
00093 default: return CApp::ParseArg( opt, arg, longindex );
00094 }
00095 return 0;
00096 }
00097
00098 int CAlignTest::Execute()
00099 {
00100 CScoreTbl scoreTbl( m_identityScore, m_mismatchScore, m_gapOpeningScore, m_gapExtentionScore );
00101 auto_ptr<IAligner> aligner( CreateAligner( m_algo, &scoreTbl ) );
00102 CAlignerBase::SetPrintDebug( true );
00103
00104 if( m_flags & fAlign_colorspace )
00105 THROW( logic_error, "Colorspace option is not immplemented" );
00106
00107 if( GetArgIndex() + 2 > GetArgCount() )
00108 THROW( runtime_error, "Two sequences are required!" );
00109
00110 if( m_offsetQuery < 0 || m_offsetSubject < 0 )
00111 THROW( runtime_error, "Offset should never be negative" );
00112
00113 const char * qs = GetArg( GetArgIndex() );
00114 const char * ss = GetArg( GetArgIndex() + 1 );
00115 int ql = strlen( qs );
00116 int sl = strlen( ss );
00117
00118 cerr << DISPLAY( qs ) << DISPLAY( ql ) << endl;
00119 cerr << DISPLAY( ss ) << DISPLAY( sl ) << endl;
00120
00121 if( ql == 0 || sl == 0 )
00122 THROW( runtime_error, "Sequences should have at least one base!" );
00123
00124 vector<char> query;
00125 vector<char> subject;
00126 query.reserve( ql );
00127 subject.reserve( sl );
00128
00129 while( *qs ) query.push_back( CNcbi8naBase( CIupacnaBase( *qs++ ) ) );
00130 while( *ss ) subject.push_back( CNcbi8naBase( CIupacnaBase( *ss++ ) ) );
00131
00132 qs = &query[0];
00133 ss = &subject[0];
00134
00135 qs += m_offsetQuery; ql -= m_offsetQuery;
00136 ss += m_offsetSubject; sl -= m_offsetSubject;
00137
00138 if( ql <= 0 || sl <= 0 )
00139 THROW( runtime_error, "Offset is too big" );
00140
00141 if( GetFlags( fQuery_reverse ) ) { qs = qs + ql; ql = -ql; }
00142 if( GetFlags( fSubject_reverse ) ) { ss = ss + sl; sl = -sl; }
00143
00144 int aflags = CAlignerBase::fComputePicture | CAlignerBase::fComputeScore | CAlignerBase::fPictureSubjectStrand;
00145
00146 aligner->SetBestPossibleQueryScore( min( ql, sl ) * m_identityScore );
00147 aligner->Align( CSeqCoding::eCoding_ncbi8na, qs, ql, CSeqCoding::eCoding_ncbi8na, ss, sl, aflags );
00148
00149 const CAlignerBase& abase = aligner->GetAlignerBase();
00150
00151 cout << ( GetFlags( fQuery_reverse ) ? 3 : 5 ) << "'=" << abase.GetQueryString() << "=" << ( GetFlags( fQuery_reverse ) ? 5 : 3 ) << "'\n";
00152 cout << " " << abase.GetAlignmentString() << " "
00153 << " i=" << abase.GetIdentityCount()
00154 << ", m=" << abase.GetMismatchCount()
00155 << ", g=" << abase.GetIndelCount()
00156 << ", s=" << abase.GetRawScore() << "/" << abase.GetBestQueryScore() << "=" << abase.GetScore() << "%\n";
00157 cout << "5'=" << abase.GetSubjectString() << "=3'\n";
00158
00159 return 0;
00160 }
00161
00162 IAligner * CAlignTest::NewAligner( EAlignmentAlgo algo, int xdropoff ) const
00163 {
00164
00165 switch( algo ) {
00166 case eAlignment_fast: return new CAligner_fast();
00167 case eAlignment_HSP: return new CAligner_HSP();
00168 case eAlignment_SW:
00169 do {
00170 auto_ptr<CAligner_SW> swalign( new CAligner_SW() );
00171 swalign->SetMatrix().resize( 2*xdropoff + 1 );
00172 return swalign.release();
00173 } while(0); break;
00174 default: THROW( logic_error, "Bad value for m_algo" );
00175 }
00176 }
00177
00178 IAligner * CAlignTest::CreateAligner( EAlignmentAlgo algo, CScoreTbl * scoreTbl ) const
00179 {
00180 auto_ptr<IAligner> aligner( NewAligner( algo, m_xDropoff ) );
00181 ASSERT( aligner.get() );
00182 aligner->SetScoreTbl( *scoreTbl );
00183 return aligner.release();
00184 }
00185
00186