/*  $Id: adapter_search.cpp 60914 2013-12-12 15:38:06Z astashya $
 * ===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 * ===========================================================================
 *
 */

#include <ncbi_pch.hpp>
#include <corelib/ncbidbg.hpp>
#include <corelib/ncbimisc.hpp>
#include <algo/sequence/adapter_search.hpp>

#include <cmath>
#include <algorithm>
#include <functional>
#include <vector>
#include <set>
#include <queue>

USING_NCBI_SCOPE;

string NAdapterSearch::s_AsIUPAC(
        TWord w, 
        size_t mer_size)
{
    string s("");
    s.resize(mer_size);
    static const char* alphabet = "ACGT";
    for(size_t i = 0; i < mer_size; i++) {
        s[mer_size - 1 - i] = alphabet[w & 3];
        w >>= 2;
    }
    return s;
}

string NAdapterSearch::s_AsIUPAC(
        const TWords& words, 
        size_t mer_size)
{
    if(words.size() == 0) {
        return "";
    }

    string iupac;
    iupac.resize(words.size() - 1);
    static const char* alphabet = "ACGT";
    for(size_t i = 0; i < words.size() - 1; i++) {
        iupac[i] = alphabet[words[i] >> (mer_size * 2 - 2)];
    }
    iupac += s_AsIUPAC(words.back(), mer_size);
    return iupac;
}


double NAdapterSearch::s_GetWordComplexity(TWord word)
{
    vector<Uint1> counts(64, 0);
    Uint8 w2 = word | (word << (MER_LENGTH * 2));
    for(size_t i = 0; i < MER_LENGTH; i++) {
        counts[w2 & 0x3F] += 1; //counting trinucleotides (6 bits each)
        w2 >>= 2;
    }
    size_t sumsq(0);
    for(size_t i = 0; i < 64; i++) {
        size_t c = counts[i];
        sumsq += c*c;
    }
    return (144 - sumsq)/132.0; //normalize to [0..1]
}



void NAdapterSearch::s_Translate(
        const char* const seq,
        size_t seq_size, 
        bool revcomp, TWords& words)
{
    if(seq_size < MER_LENGTH) {
        words.resize(0);
        return;
    } else {
        words.resize(seq_size - (MER_LENGTH - 1));
    }

    //(A,C,G,T,*) -> (0, 1, 2, 3, 0)
    static const Uint1 char2letter[] = {
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 
        0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 
        0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    };

    // mask of set bits; spanning MER_LENGTH-1 letters
    TWord mask = (1 << ((MER_LENGTH - 1) * 2)) - 1;

    if(!revcomp) {
        TWord w0 = 0;
        for(size_t i = 0; i < MER_LENGTH; i++) {
            Uint1 c = char2letter[static_cast<int>(seq[i])];
            w0 = (w0 << 2) | c;
        }
        words[0] = w0;
        
        int k = MER_LENGTH - 1;
        for(size_t i = 1; i < words.size(); i++) {
            Uint1 c = char2letter[static_cast<int>(seq[i + k])];
            words[i] = ((words[i - 1] & mask) << 2) | c;
        }
    } else {
        TWord w0 = 0;
        for(size_t i = 0; i < MER_LENGTH; i++) {
            Uint1 c = 3 - char2letter[static_cast<int>(seq[seq_size - 1 - i])];
            w0 = (w0 << 2) | c;
        }
        words[0] = w0;

        int k = seq_size - MER_LENGTH;
        for(size_t i = 1; i < words.size(); i++) {
            Uint1 c = 3 - char2letter[static_cast<int>(seq[k - i])];
            words[i] = ((words[i - 1] & mask) << 2) | c;
        }
    }
}


void NAdapterSearch
   ::CPairedEndAdapterDetector
   ::CConsensusPattern
   ::AddExemplar(
        TWords::const_iterator begin, 
        TWords::const_iterator end)
{
    size_t words_size = end - begin;
    size_t len = min(words_size, m_len);

    // Store position-specific 10-mer counts; 
    // (10-mer is in upper bits of 12-mer)
    for(size_t i = 0; i < len; i++) {
        x_IncrCount(i, *(begin + i) >> 4);
    }

    // Can get two more 10-mers out of last 12-mer
    if(words_size > 0 && words_size < m_len) {
        size_t len = min<size_t>(2, m_len - words_size);
        TWord w = *(begin + words_size);
        for(size_t i = 0; i < len; i++) {
            size_t pos = words_size + i;
            TWord w2 = (w >> (2 - i*2)) & 0xFFFFF;
            x_IncrCount(pos, w2);
        }
    }
}


NAdapterSearch::TWord NAdapterSearch
     ::CPairedEndAdapterDetector
     ::CConsensusPattern
     ::x_NextWord(size_t pos, TWord prev_word) const
{
    TWord best_word = 0;
    size_t best_count = 0;
    for(Uint1 letter = 0; letter < 4; letter++) {
        TWord word = ((prev_word << 2) & 0xFFFFF) | letter;
        TCount count = x_GetCount(pos + 1, word);
        if(count > best_count) {
            best_count = count;
            best_word = word;
        }
    }
    return best_word;
}


string NAdapterSearch
     ::CPairedEndAdapterDetector
     ::CConsensusPattern
     ::InferConsensus(const SParams& params) const
{
    // Find the best (most-frequent) and 2nd-best word at position 0
    size_t top_sup(0);
    size_t secondbest_sup(0);
    size_t best_word(0);
    for(size_t word = 0; word < NMERS10; word++) {
        size_t sup = m_counts[word];
        if(sup > top_sup && !NAdapterSearch::s_IsLowComplexity(word)) {
            secondbest_sup = top_sup;
            top_sup = sup;
            best_word = word;
        }
    }

    if(!params.HaveInitialSupport(top_sup, secondbest_sup)) {
        return "";
    } 

    TWords words(m_len);
    words[0] = best_word;

    ERR_POST(Info 
          << "Seed: " 
          << s_AsIUPAC(best_word,10) 
          << "; overrepresentation: " 
          << top_sup 
          << "/" 
          << secondbest_sup);

    // Extend the sequence from the starting word
    for(size_t i = 1; i < words.size(); i++) {
        TWord last_word = words[i - 1];
        TCount last_sup = x_GetCount(i - 1, last_word);

        TWord curr_word = x_NextWord(i - 1, last_word);
        TCount curr_sup = x_GetCount(i,     curr_word);

        if(params.HaveContinuedSupport(top_sup, last_sup, curr_sup)) {
            words[i] = curr_word;
        } else {
            words.resize(i);
            break;
        }
    }

    return s_AsIUPAC(words, 10);
}

///////////////////////////////////////////////////////////////////////

pair<size_t, size_t> NAdapterSearch
    ::CPairedEndAdapterDetector
    ::s_FindAdapterStartPos(
        const TWords& fwd_read, 
        const TWords& rev_read)
{
    return pair<size_t, size_t>(
        find(fwd_read.begin(), fwd_read.end(), rev_read.back()) 
            - fwd_read.begin() + MER_LENGTH,

        find(rev_read.begin(), rev_read.end(), fwd_read.front()) 
            - rev_read.begin());
}


void NAdapterSearch
    ::CPairedEndAdapterDetector
    ::AddExemplar(
        const char* spot,
        size_t spot_len)
{
    size_t len = spot_len / 2;
    const char* fwd_read = spot;
    const char* rev_read = spot+len;

    TWords fwd_words;
    TWords rev_words;
    NAdapterSearch::s_Translate(fwd_read, len, false, fwd_words);
    NAdapterSearch::s_Translate(rev_read, len, true,  rev_words); 
        // note: translating in rev_read reverse-orientation,
        // such that translations are in consistent orientation
        // so we can check for overlap

    pair<size_t, size_t> pos = s_FindAdapterStartPos(fwd_words, rev_words);

    bool is_consistent = (pos.first + pos.second == len); 
        // this is a likely true-positive rather than a random match

    size_t adapter_len = len - pos.first; // remaining portion of the read
    if(is_consistent && adapter_len >= MER_LENGTH) {
        m_cons5.AddExemplar(fwd_words.begin() + pos.first, fwd_words.end());

        // We translated R-read in rev-comp above for the purpose of 
        // detecting self-overlap within a spot, but here we'll need to do it 
        // again without rev-comp for the purpose of consensus-finding. 
        // We don't need to translate the whole read, just the adapter part.
        TWords words2;
        s_Translate(rev_read + pos.first, adapter_len, false, words2);
        m_cons3.AddExemplar(words2.begin(), words2.end());
    }
}

///////////////////////////////////////////////////////////////////////////

void NAdapterSearch::CUnpairedAdapterDetector::AddExemplar(
        const char* seq,
        size_t len)
{
    TWords words;
    s_Translate(seq, len, false, words);
    ITERATE(TWords, it, words) {
        m_counts[*it] += 1;
    }        
}


string NAdapterSearch::CUnpairedAdapterDetector::InferAdapterSeq() const
{
    TWord seed = x_FindAdapterSeed();
    if(!seed) {
        return "";  
    }

    // subsequent words will need to satisfy minimum support
    // relative to the originating seed.
    size_t top_count = m_counts[seed];

    // Get sequence of overlapping words starting at the seed
    TWords words;
    words.push_back(seed);
    x_ExtendSeed(words, top_count, false);  // extend left
    reverse(words.begin(), words.end());    // fix the order left->right
    x_ExtendSeed(words, top_count, true);   // extend right

    // The non-biological sequence following a read is often a homopolymer 
    // (usually A+), so statistically it looks like part of the adapter.
    // We'll truncate the homopolymer suffix if last word is a homopolymer

    string seq = s_AsIUPAC(words);
    if(NAdapterSearch::s_IsHomopolymer(words.back())) {
        char c = seq[seq.size() - 1]; 
        while(seq.size() > 0 && seq[seq.size() - 1] == c) {
            seq.resize(seq.size() - 1); 
            words.pop_back();
        }   
    }   
    return seq;
}


NAdapterSearch::TWord NAdapterSearch
    ::CUnpairedAdapterDetector
    ::x_FindAdapterSeed() const
{
    typedef pair<TCount, TWord> TPair; 
    typedef priority_queue< TPair, vector<TPair>, greater<TPair> > TTopWords;
    TTopWords top_words;
    
    // Collect most frequent words. The count must be large enough such that 
    // that most of these words are biological (not read-specific). The idea
    // is to check whether most frequent word (potentially adapter-specific)
    // is overrepresented relative to biological seqs
    for(TWord w = 0; w < m_counts.size(); w++) {
        Uint8 count = m_counts[w];
        if(count > m_params.min_support && !NAdapterSearch::s_IsLowComplexity(w)) {
            top_words.push(TPair(count, w));
            while(top_words.size() > m_params.top_n) {
                top_words.pop();
            }
        }
    }

    // Calculate geometric mean of counts
    double sumlogs(0);
    TWord word(0); // last word and occ will be the most frequent one
    TCount sup(0);
    size_t n = top_words.size();
    for(; !top_words.empty(); top_words.pop()) {
        sup = top_words.top().first;
        word = top_words.top().second;
        sumlogs += log(sup + 1.0);
    }

    size_t gmean = n == 0 ? 0 : exp(sumlogs / n) - 1;

    ERR_POST(Info 
          << "Seed: "
          << s_AsIUPAC(word) 
          << "; overrepresentation: " 
          << sup 
          << "/" 
          << static_cast<size_t>(gmean));

    return m_params.HaveInitialSupport(sup, gmean) ? word : 0;
}


NAdapterSearch::TWord NAdapterSearch::CUnpairedAdapterDetector::x_GetAdjacent(
        TWord w, 
        bool right) const
{
    TCount best_count(0);
    TWord best_word(0);

    for(Uint1 letter = 0; letter < 4; letter++) {
        TWord w2 = right ? ((w << 2) & 0xFFFFFF) | letter
                         :  (w >> 2)             | (letter << 22);

        TCount count = m_counts[w2];
        if(count > best_count) {
            best_count = count;
            best_word = w2;
        }
    }
    return best_word;
}


void NAdapterSearch::CUnpairedAdapterDetector::x_ExtendSeed(
        TWords& words,
        size_t top_sup,
        bool right) const
{
    while(true) {
        TWord prev_word = words.back();
        TWord curr_word = x_GetAdjacent(prev_word, right);
        TCount prev_sup = m_counts[prev_word];
        TCount curr_sup = m_counts[curr_word];

        if(!m_params.HaveContinuedSupport(top_sup, prev_sup, curr_sup)
           || find(words.begin(), words.end(), curr_word) != words.end()) 
        {
            // Stop extending if support is too low, or
            // this word is already in the sequence (to prevent loops)
            break;
        } else {
            words.push_back(curr_word);
        }
    }
}


///////////////////////////////////////////////////////////////////////

struct SOpLess_Second
{
    bool operator()(const NAdapterSearch::CSimpleUngappedAligner::TRange& a, 
                    const NAdapterSearch::CSimpleUngappedAligner::TRange& b) const
    {
        return a.second < b.second;
    }
};

NAdapterSearch::CSimpleUngappedAligner::TRange
  NAdapterSearch
::CSimpleUngappedAligner
::GetSeqRange(TPos pos) const
{
    TRanges::const_iterator it = lower_bound(
            m_subseq_ranges.begin(),
            m_subseq_ranges.end(),
            TRange(pos, pos),
            SOpLess_Second());

    return it == m_subseq_ranges.end() ? TRange(NULLPOS, NULLPOS) : *it;
}

const NAdapterSearch::CSimpleUngappedAligner::SMatch& 
  NAdapterSearch
::CSimpleUngappedAligner
::x_GetBetterOf(const SMatch& a, const SMatch& b) const
{
    if(a.len > 0 && b.len == 0) {
        return a;
    } else if(a.len == 0 && b.len > 0) {
        return b;
    } else {
        TRange ar = GetSeqRange((a.second - a.first) / 2);
        TRange br = GetSeqRange((b.second - b.first) / 2);

        // calculate unaligned length 
        // note ::len is doubled on antidiag
        TPos a_un = (ar.second - ar.first) - a.len/2;
        TPos b_un = (br.second - br.first) - b.len/2;

#if 0
        LOG_POST((a.second + a.first) / 2 
                << " " << (a.second - a.first) / 2 
                << " " << a.len 
                << " " << a_un 
                << " " << m_seq.substr(ar.first, ar.second - ar.first));
        LOG_POST((b.second + b.first) / 2 
                << " " << (b.second - b.first) / 2 
                << " " << b.len 
                << " " << b_un 
                << " " << m_seq.substr(br.first, br.second - br.first));
        LOG_POST("\n");
#endif
       
        // score is proportional to the length, and penalized
        // proportonally to the log of unaligned length 
        double a_score = a.len - log(1.0 + a_un)*5;
        double b_score = b.len - log(1.0 + b_un)*5;
        return a_score < b_score ? b : a;
    }
}


void NAdapterSearch::CSimpleUngappedAligner::s_IndexWord(
        TWord word,
        TPos pos,
        TPositions& vec_index, 
        TCoordSet& coordset)
{
    TWords approx_words;
    s_PermuteMismatches(word, approx_words);

    ITERATE(TWords, it2, approx_words) {
        TWord w = *it2;

        // store word->pos in vec-index;
        // if encountered multiplicity, move
        // the values to coordset and mark as
        // MULTIPOS in vec-index

        TPos& pos_ref = vec_index[w];
        if(pos_ref == pos || pos_ref == NULLPOS) {
            pos_ref = pos;
        } else {
            if(pos_ref != MULTIPOS) {
                coordset.insert(TCoordSet::value_type(w, pos_ref));
                pos_ref = MULTIPOS;
            }
            coordset.insert(TCoordSet::value_type(w, pos));
        }
    }
}


void NAdapterSearch::CSimpleUngappedAligner::s_CoordSetToMapIndex(
        const TCoordSet& coordset,
        TMapIndex& map_index,
        TPositions& positions)
{
     map_index.clear();
     positions.resize(coordset.size());
     TPositions::iterator dest = positions.begin();

     ITERATE(TCoordSet, it, coordset) {
        TWord w = it->first;
        TPos pos = it->second;
       
        *dest = pos; 
        if(map_index.find(w) == map_index.end()) {
            map_index[w].first = dest;
        }
        dest++;
        map_index[w].second = dest;
     }
}


void NAdapterSearch::CSimpleUngappedAligner::Init(const char* seq, size_t len)
{
    m_seq.resize(len);
    m_seq.assign(seq, len);

    m_subseq_ranges.clear();

    m_vec_index.resize(1 << (MER_LENGTH*2));
    fill(m_vec_index.begin(), m_vec_index.end(), (TPos)NULLPOS);

    m_positions.clear();
    m_map_index.clear();

    // process each '-'-delimited substring.
    TCoordSet coordset;
    const char* endend = seq+len;
    for(const char *begin = seq,    *end = find(begin, endend, '-');
                    begin < endend;
                    begin = end + 1, end = find(begin, endend, '-'))
    {
        TRange r(begin - seq, end - seq);
        m_subseq_ranges.push_back(r);

        TWords words;
        s_Translate(begin, r.second - r.first, false, words);

        for(size_t i = 0; i < words.size(); i++) {
            TPos pos = r.first + i;
            s_IndexWord(words[i], pos, m_vec_index, coordset);
        }
    }

    s_CoordSetToMapIndex(coordset, m_map_index, m_positions);
}


void NAdapterSearch::CSimpleUngappedAligner::s_PermuteMismatches(
        TWord w, 
        TWords& words)
{
    words.resize(240); // nCr(6,2)*4^2
    TWords::iterator dest = words.begin();
    for(size_t i1 = 3; i1 < 9; i1++) {

        for(Uint1 c1 = 0; c1 < 4; c1++) {
            TWord w1 = s_Put(w, i1, c1);

            for(size_t i2 = i1+1; i2 < 9; i2++) {

                for(Uint1 c2 = 0; c2 < 4; c2++) {
                    *(dest++) = s_Put(w1, i2, c2);
                }
            }
        }
    }
}


bool NAdapterSearch::CSimpleUngappedAligner::s_Merge(
        SMatch& m1, 
        const SMatch& m2)
{
    if(m1.first == NULLPOS) {
        m1 = m2;
        return true;
    } else if(m1.first == m2.first
           && m1.second + m1.len + 2 >= m2.second)
    {
        // on same diag and [almost-]overlapping on antidiag 
        // - extend the length of m1 to the end of m2
        m1.len = m2.len + m2.second - m1.second;
        return true;
    } else {
        return false;
    }
}


NAdapterSearch::CSimpleUngappedAligner::SMatch 
NAdapterSearch::CSimpleUngappedAligner::x_CreateMatch(
        TPos q_pos,
        TPos s_pos) const
{
    SMatch m;
    m.first  = q_pos - s_pos;
    m.second = q_pos + s_pos;
    m.len = MER_LENGTH * 2; // lengths are doubled on antidiag
    return m;
}


void NAdapterSearch::CSimpleUngappedAligner::x_Extend(
        SMatch& seg, 
        const char* q_seq, 
        const size_t query_len,
        const bool direction, //true iff forward
        const int match_score,
        const int mismatch_score,
        const int dropoff_score) const
{   
    int delta_pos = direction ? 1 : -1; 

    // Create a point at the first or last position of
    // the initial segment, depending on direction
    typedef pair<TPos, TPos> TPoint;
    TPoint p(seg.first  + (direction ? seg.len - 1 : 0),
             seg.second + (direction ? seg.len - 1 : 0)); 

    Int8 score(0), max_score(0);
    TPoint best_p = p;

    // bounds are [min, max)
    TRange q_bounds(0, query_len);
    TRange s_bounds = GetSeqRange(seg.second);

    // the initial point is apriori matched, so advance
    p.first  += delta_pos;
    p.second += delta_pos;
    while(score + dropoff_score > max_score
          && p.first  >= q_bounds.first 
          && p.first  <  q_bounds.second 
          && p.second >= s_bounds.first
          && p.second <  s_bounds.second)
    {   
        score += (q_seq[p.first] == m_seq[p.second]) ? match_score 
                                                     : mismatch_score;
#if 0
        LOG_POST(direction 
                 << " " << q_seq[p.first] 
                 << " " << m_seq[p.second] 
                 << " " << score 
                 << " " << best_p.first);
#endif
        
        if(score > max_score) {
            max_score = score;
            best_p = p;
        }   

        p.first  += delta_pos;
        p.second += delta_pos;
    }   

    if(direction) {
        // starting coordinates are the same; length increased
        seg.len = best_p.first - seg.first + 1;
    } else {
        // starting coordinates are at best_p; 
        // length is distance to the starting coords + original length
        SMatch seg2;
        seg2.first  = best_p.first;
        seg2.second = best_p.second;
        seg2.len    = seg.len + seg.first - best_p.first;
        seg = seg2;
    }   
} 


NAdapterSearch::CSimpleUngappedAligner::SMatch 
NAdapterSearch::CSimpleUngappedAligner::FindSingleBest(
        const char* query,
        size_t len) const
{
    SMatch best_match;
 
    typedef map<TPos, SMatch> TMatches;
    TMatches matches; //diag -> match

    TWords words;
    s_Translate(query, len, false, words);

    for(size_t i = 0; i < words.size(); i++) {
        TWord w = words[i];
       
        TPosRange r(m_vec_index.begin() + w,
                    m_vec_index.begin() + w + 1);

        if(*r.first == NULLPOS) {
            continue;
        } else if(*r.first == MULTIPOS) {
            r = m_map_index.find(w)->second;
        }

        // For each matched position in db for this word
        // create a Match and try to merge it with existing
        // overlapping Match on this diag, if any. If can't merge,
        // we'll never be able to merge anything into the old mMatch
        // because following matches on this diag will be even
        // farther on the antidiag, so we can drop the old match
        // (but keep it it is best so far), and replace it with the
        // new one.
        for(TPositions::const_iterator it = r.first; it != r.second; ++it) {
            SMatch new_match = x_CreateMatch(i, *it);
            SMatch& old_match = matches[new_match.first];

            if(!s_Merge(old_match, new_match)) {
                best_match = x_GetBetterOf(best_match, old_match);
                old_match = new_match;
            }
        }
    }

    ITERATE(TMatches, it, matches) {
        best_match = x_GetBetterOf(best_match, it->second);
    }

    // Convert (diag,antidiag) representation to (query,target)
    SMatch match;
    match.len            = best_match.len / 2;
    match.first          = (best_match.first + best_match.second) / 2;
    match.second         = best_match.second - match.first;

    // Do ungapped extension both ways.
    x_Extend(match, query, len, true);
    x_Extend(match, query, len, false);

    return match;
}
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165
0166
0167
0168
0169
0170
0171
0172
0173
0174
0175
0176
0177
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221
0222
0223
0224
0225
0226
0227
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296
0297
0298
0299
0300
0301
0302
0303
0304
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421
0422
0423
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433
0434
0435
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451
0452
0453
0454
0455
0456
0457
0458
0459
0460
0461
0462
0463
0464
0465
0466
0467
0468
0469
0470
0471
0472
0473
0474
0475
0476
0477
0478
0479
0480
0481
0482
0483
0484
0485
0486
0487
0488
0489
0490
0491
0492
0493
0494
0495
0496
0497
0498
0499
0500
0501
0502
0503
0504
0505
0506
0507
0508
0509
0510
0511
0512
0513
0514
0515
0516
0517
0518
0519
0520
0521
0522
0523
0524
0525
0526
0527
0528
0529
0530
0531
0532
0533
0534
0535
0536
0537
0538
0539
0540
0541
0542
0543
0544
0545
0546
0547
0548
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558
0559
0560
0561
0562
0563
0564
0565
0566
0567
0568
0569
0570
0571
0572
0573
0574
0575
0576
0577
0578
0579
0580
0581
0582
0583
0584
0585
0586
0587
0588
0589
0590
0591
0592
0593
0594
0595
0596
0597
0598
0599
0600
0601
0602
0603
0604
0605
0606
0607
0608
0609
0610
0611
0612
0613
0614
0615
0616
0617
0618
0619
0620
0621
0622
0623
0624
0625
0626
0627
0628
0629
0630
0631
0632
0633
0634
0635
0636
0637
0638
0639
0640
0641
0642
0643
0644
0645
0646
0647
0648
0649
0650
0651
0652
0653
0654
0655
0656
0657
0658
0659
0660
0661
0662
0663
0664
0665
0666
0667
0668
0669
0670
0671
0672
0673
0674
0675
0676
0677
0678
0679
0680
0681
0682
0683
0684
0685
0686
0687
0688
0689
0690
0691
0692
0693
0694
0695
0696
0697
0698
0699
0700
0701
0702
0703
0704
0705
0706
0707
0708
0709
0710
0711
0712
0713
0714
0715
0716
0717
0718
0719
0720
0721
0722
0723
0724
0725
0726
0727
0728
0729
0730
0731
0732
0733
0734
0735
0736
0737
0738
0739
0740
0741
0742
0743
0744
0745
0746
0747
0748
0749
0750
0751
0752
0753
0754
0755
0756
0757
0758
0759
0760
0761
0762
0763
0764
0765
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775
0776
0777
0778
0779
0780
0781
0782
0783
0784
0785
0786
0787
0788
0789
0790