NCBI C++ ToolKit
blast_kappa.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_kappa.c 74807 2016-09-27 14:54:05Z rackerst $
2  * ==========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Alejandro Schaffer, Mike Gertz (ported to algo/blast by Tom Madden)
27  *
28  */
29 
30 /** @file blast_kappa.c
31  * Utilities for doing Smith-Waterman alignments and adjusting the scoring
32  * system for each match in blastpgp
33  */
34 
35 #include <float.h>
45 #include "blast_psi_priv.h"
46 #include "blast_gapalign_priv.h"
47 #include "blast_hits_priv.h"
48 #include "blast_posit.h"
51 
52 #ifdef _OPENMP
53 #include <omp.h>
54 #endif
55 
61 
62 /* Define KAPPA_PRINT_DIAGNOSTICS to turn on printing of
63  * diagnostic information from some routines. */
64 
65 /** Compile-time option; if set to a true value, then blastp runs
66  that use Blast_RedoAlignmentCore to compute the traceback will not
67  SEG the subject sequence */
68 #ifndef KAPPA_BLASTP_NO_SEG_SEQUENCE
69 #define KAPPA_BLASTP_NO_SEG_SEQUENCE 0
70 #endif
71 
72 
73 /** Compile-time option; if set to a true value, then blastp runs
74  that use Blast_RedoAlignmentCore to compute the traceback will not
75  SEG the subject sequence */
76 #ifndef KAPPA_TBLASTN_NO_SEG_SEQUENCE
77 #define KAPPA_TBLASTN_NO_SEG_SEQUENCE 0
78 #endif
79 
80 
81 /**
82  * Given a list of HSPs with (possibly) high-precision scores, rescale
83  * the scores to have standard precision and set the scale-independent
84  * bit scores. This routine does *not* resort the list; it is assumed
85  * that the list is already sorted according to e-values that have been
86  * computed using the initial, higher-precision scores.
87  *
88  * @param hsp_list the HSP list
89  * @param logK Karlin-Altschul statistical parameter [in]
90  * @param lambda Karlin-Altschul statistical parameter [in]
91  * @param scoreDivisor the value by which reported scores are to be
92  */
93 static void
95  double lambda,
96  double logK,
97  double scoreDivisor)
98 {
99  int hsp_index;
100  for(hsp_index = 0; hsp_index < hsp_list->hspcnt; hsp_index++) {
101  BlastHSP * hsp = hsp_list->hsp_array[hsp_index];
102 
103  hsp->score = BLAST_Nint(((double) hsp->score) / scoreDivisor);
104  /* Compute the bit score using the newly computed scaled score. */
105  hsp->bit_score = (hsp->score*lambda*scoreDivisor - logK)/NCBIMATH_LN2;
106  }
107 }
108 
109 
110 /**
111  * Adjusts the E-values in a BLAST_HitList to be composites of
112  * a composition-based P-value and a score/alignment-based P-value
113  *
114  * @param hsp_list the hitlist whose E-values need to be adjusted
115  * @param comp_p_value P-value from sequence composition
116  * @param seqSrc a source of sequence data
117  * @param subject_length length of database sequence
118  * @param query_context info about this query context; needed when
119  * multiple queries are being used
120  * @param LambdaRatio the ratio between the observed value of Lambda
121  * and the predicted value of lambda (used to print
122  * diagnostics)
123  * @param subject_id the subject id of this sequence (used to print
124  * diagnostics)
125  **/
126 static void
128  BlastHSPList *hsp_list,
129  double comp_p_value,
130  const BlastSeqSrc* seqSrc,
131  Int4 subject_length,
132  const BlastContextInfo * query_context,
133  double LambdaRatio,
134  int subject_id)
135 {
136  /* Smallest observed evalue after adjustment */
137  double best_evalue = DBL_MAX;
138 
139  /* True length of the query */
140  int query_length = query_context->query_length;
141  /* Length adjustment to compensate for edge effects */
142  int length_adjustment = query_context->length_adjustment;
143 
144  /* Effective lengths of the query, subject, and database */
145  double query_eff = MAX((query_length - length_adjustment), 1);
146  double subject_eff = MAX((subject_length - length_adjustment), 1.0);
147  double dblen_eff = (double) query_context->eff_searchsp / query_eff;
148 
149  /* Scale factor to convert the database E-value to the sequence E-value */
150  double db_to_sequence_scale = subject_eff / dblen_eff;
151 
152  int hsp_index;
153  for (hsp_index = 0; hsp_index < hsp_list->hspcnt; hsp_index++) {
154  /* for all HSPs */
155  double align_p_value; /* P-value for the alignment score */
156  double combined_p_value; /* combination of two P-values */
157 
158  /* HSP for this iteration */
159  BlastHSP * hsp = hsp_list->hsp_array[hsp_index];
160 #ifdef KAPPA_PRINT_DIAGNOSTICS
161  /* Original E-value, saved if diagnostics are printed. */
162  double old_e_value = hsp->evalue;
163 #endif
164  hsp->evalue *= db_to_sequence_scale;
165 
166  align_p_value = BLAST_KarlinEtoP(hsp->evalue);
167  combined_p_value = Blast_Overall_P_Value(comp_p_value,align_p_value);
168  hsp->evalue = BLAST_KarlinPtoE(combined_p_value);
169  hsp->evalue /= db_to_sequence_scale;
170 
171  if (hsp->evalue < best_evalue) {
172  best_evalue = hsp->evalue;
173  }
174 
175 #ifdef KAPPA_PRINT_DIAGNOSTICS
176  if (seqSrc){
177  int sequence_gi; /*GI of a sequence*/
178  Blast_GiList* gi_list; /*list of GI's for a sequence*/
179  gi_list = BlastSeqSrcGetGis(seqSrc, (void *) (&subject_id));
180  if ((gi_list) && (gi_list->num_used > 0)) {
181  sequence_gi = gi_list->data[0];
182  } else {
183  sequence_gi = (-1);
184  }
185  printf("GI %d Lambda ratio %e comp. p-value %e; "
186  "adjust E-value of query length %d match length "
187  "%d from %e to %e\n",
188  sequence_gi, LambdaRatio, comp_p_value,
189  query_length, subject_length, old_e_value, hsp->evalue);
190  Blast_GiListFree(gi_list);
191  }
192 #endif
193  } /* end for all HSPs */
194 
195  hsp_list->best_evalue = best_evalue;
196 
197  /* suppress unused parameter warnings if diagnostics are not printed */
198  (void) seqSrc;
199  (void) query_length;
200  (void) LambdaRatio;
201  (void) subject_id;
202 }
203 
204 
205 /**
206  * Remove from a hitlist all HSPs that are completely contained in an
207  * HSP that occurs earlier in the list and that:
208  * - is on the same strand; and
209  * - has equal or greater score. T
210  * The hitlist should be sorted by some measure of significance before
211  * this routine is called.
212  * @param hsp_array array to be reaped
213  * @param hspcnt length of hsp_array
214  */
215 static void
216 s_HitlistReapContained(BlastHSP * hsp_array[], Int4 * hspcnt)
217 {
218  Int4 iread; /* iteration index used to read the hitlist */
219  Int4 iwrite; /* iteration index used to write to the hitlist */
220  Int4 old_hspcnt; /* number of HSPs in the hitlist on entry */
221 
222  old_hspcnt = *hspcnt;
223 
224  for (iread = 1; iread < *hspcnt; iread++) {
225  /* for all HSPs in the hitlist */
226  Int4 ireadBack; /* iterator over indices less than iread */
227  BlastHSP *hsp1; /* an HSP that is a candidate for deletion */
228 
229  hsp1 = hsp_array[iread];
230  for (ireadBack = 0; ireadBack < iread && hsp1 != NULL; ireadBack++) {
231  /* for all HSPs before hsp1 in the hitlist and while hsp1
232  * has not been deleted */
233  BlastHSP *hsp2; /* an HSP that occurs earlier in hsp_array
234  * than hsp1 */
235  hsp2 = hsp_array[ireadBack];
236 
237  if( hsp2 == NULL ) { /* hsp2 was deleted in a prior iteration. */
238  continue;
239  }
240  if (hsp2->query.frame == hsp1->query.frame &&
241  hsp2->subject.frame == hsp1->subject.frame) {
242  /* hsp1 and hsp2 are in the same query/subject frame. */
243  if (CONTAINED_IN_HSP
244  (hsp2->query.offset, hsp2->query.end, hsp1->query.offset,
245  hsp2->subject.offset, hsp2->subject.end,
246  hsp1->subject.offset) &&
248  (hsp2->query.offset, hsp2->query.end, hsp1->query.end,
249  hsp2->subject.offset, hsp2->subject.end,
250  hsp1->subject.end) &&
251  hsp1->score <= hsp2->score) {
252  hsp1 = hsp_array[iread] = Blast_HSPFree(hsp_array[iread]);
253  }
254  } /* end if hsp1 and hsp2 are in the same query/subject frame */
255  } /* end for all HSPs before hsp1 in the hitlist */
256  } /* end for all HSPs in the hitlist */
257 
258  /* Condense the hsp_array, removing any NULL items. */
259  iwrite = 0;
260  for (iread = 0; iread < *hspcnt; iread++) {
261  if (hsp_array[iread] != NULL) {
262  hsp_array[iwrite++] = hsp_array[iread];
263  }
264  }
265  *hspcnt = iwrite;
266  /* Fill the remaining memory in hsp_array with NULL pointers. */
267  for ( ; iwrite < old_hspcnt; iwrite++) {
268  hsp_array[iwrite] = NULL;
269  }
270 }
271 
272 
273 /** A callback used to free an EditScript that has been stored in a
274  * BlastCompo_Alignment. */
275 static void s_FreeEditScript(void * edit_script)
276 {
277  if (edit_script != NULL)
278  GapEditScriptDelete(edit_script);
279 }
280 
281 
282 /**
283  * Converts a list of objects of type BlastCompo_Alignment to an
284  * new object of type BlastHSPList and returns the result. Conversion
285  * in this direction is lossless. The list passed to this routine is
286  * freed to ensure that there is no aliasing of fields between the
287  * list of BlastCompo_Alignments and the new hitlist.
288  *
289  * @param hsp_list The hsp_list to populate
290  * @param alignments A list of distinct alignments; freed before return [in]
291  * @param oid Ordinal id of a database sequence [in]
292  * @param queryInfo information about all queries in this search [in]
293  * @param frame query frame
294  * @return Allocated and filled BlastHSPList structure.
295  */
296 static int
298  BlastCompo_Alignment ** alignments,
299  int oid,
300  const BlastQueryInfo* queryInfo,
301  int frame)
302 {
303  int status = 0; /* return code for any routine called */
304  static const int unknown_value = 0; /* dummy constant to use when a
305  parameter value is not known */
306  BlastCompo_Alignment * align; /* an alignment in the list */
307 
308  if (hsp_list == NULL) {
309  return -1;
310  }
311  hsp_list->oid = oid;
312 
313  for (align = *alignments; NULL != align; align = align->next) {
314  BlastHSP * new_hsp = NULL;
315  GapEditScript * editScript = align->context;
316  align->context = NULL;
317 
318  status = Blast_HSPInit(align->queryStart, align->queryEnd,
319  align->matchStart, align->matchEnd,
320  unknown_value, unknown_value,
321  align->queryIndex,
322  frame, (Int2) align->frame, align->score,
323  &editScript, &new_hsp);
324  switch (align->matrix_adjust_rule) {
325  case eDontAdjustMatrix:
327  break;
330  break;
331  default:
333  break;
334  }
335  if (status != 0)
336  break;
337  /* At this point, the subject and possibly the query sequence have
338  * been filtered; since it is not clear that num_ident of the
339  * filtered sequences, rather than the original, is desired,
340  * explicitly leave num_ident blank. */
341  new_hsp->num_ident = 0;
342 
343  status = Blast_HSPListSaveHSP(hsp_list, new_hsp);
344  if (status != 0)
345  break;
346  }
347  if (status == 0) {
349  Blast_HSPListSortByScore(hsp_list);
350  } else {
351  hsp_list = Blast_HSPListFree(hsp_list);
352  }
353  return 0;
354 }
355 
356 Int4 s_GetSubjectLength(Int4 total_subj_length, EBlastProgramType program_number)
357 {
358  return ((program_number == eBlastTypeRpsTblastn) ?
359  (GET_NUCL_LENGTH(total_subj_length) - 1 ) /3 : total_subj_length);
360 }
361 
362 
363 /**
364  * Adding evalues to a list of HSPs and remove those that do not have
365  * sufficiently good (low) evalue.
366  *
367  * @param *pbestScore best (highest) score in the list
368  * @param *pbestEvalue best (lowest) evalue in the list
369  * @param hsp_list the list
370  * @param seqSrc a source of sequence data
371  * @param subject_length length of the subject sequence
372  * @param program_number the type of BLAST search being performed
373  * @param queryInfo information about the queries
374  * @param context_index the index of the query corresponding to
375  * the HSPs in hsp_list
376  * @param sbp the score block for this search
377  * @param hitParams parameters used to assign evalues and
378  * decide whether to save hits.
379  * @param pvalueForThisPair composition p-value
380  * @param LambdaRatio lambda ratio, if available
381  * @param subject_id index of subject
382  *
383  * @return 0 on success; -1 on failure (can fail because some methods
384  * of generating evalues use auxiliary structures)
385  */
386 static int
387 s_HitlistEvaluateAndPurge(int * pbestScore, double *pbestEvalue,
388  BlastHSPList * hsp_list,
389  const BlastSeqSrc* seqSrc,
390  int subject_length,
391  EBlastProgramType program_number,
392  const BlastQueryInfo* queryInfo,
393  int context_index,
394  BlastScoreBlk* sbp,
395  const BlastHitSavingParameters* hitParams,
396  double pvalueForThisPair,
397  double LambdaRatio,
398  int subject_id)
399 {
400  int status = 0;
401  *pbestEvalue = DBL_MAX;
402  *pbestScore = 0;
403  if (hitParams->do_sum_stats) {
404  status = BLAST_LinkHsps(program_number, hsp_list, queryInfo,
405  subject_length, sbp,
406  hitParams->link_hsp_params, TRUE);
407  } else {
408 
409 
410  status =
411  Blast_HSPListGetEvalues(program_number, queryInfo,
412  s_GetSubjectLength(subject_length, program_number),
413  hsp_list, TRUE, FALSE, sbp,
414  0.0, /* use a non-zero gap decay
415  only when linking HSPs */
416  1.0); /* Use scaling factor equal to
417  1, because both scores and
418  Lambda are scaled, so they
419  will cancel each other. */
420  }
421  if (eBlastTypeBlastp == program_number ||
422  eBlastTypeBlastx == program_number) {
423  if ((0 <= pvalueForThisPair) && (pvalueForThisPair <= 1)) {
424  s_AdjustEvaluesForComposition(hsp_list, pvalueForThisPair, seqSrc,
425  subject_length,
426  &queryInfo->contexts[context_index],
427  LambdaRatio, subject_id);
428  }
429  }
430  if (status == 0) {
431  Blast_HSPListReapByEvalue(hsp_list, hitParams->options);
432  if (hsp_list->hspcnt > 0) {
433  *pbestEvalue = hsp_list->best_evalue;
434  *pbestScore = hsp_list->hsp_array[0]->score;
435  }
436  }
437  return status == 0 ? 0 : -1;
438 }
439 
440 /** Compute the number of identities for the HSPs in the hsp_list
441  * @note Should work for blastp and tblastn now.
442  *
443  * @param query_blk the query sequence data [in]
444  * @param query_info structure describing the query_blk structure [in]
445  * @param seq_src source of subject sequence data [in]
446  * @param hsp_list list of HSPs to be processed [in|out]
447  * @param scoring_options scoring options [in]
448  * @gen_code_string Genetic code for tblastn [in]
449  */
450 static void
452  const BlastQueryInfo* query_info,
453  BLAST_SequenceBlk* subject_blk,
454  const BlastSeqSrc* seq_src,
455  BlastHSPList* hsp_list,
456  const BlastScoringOptions* scoring_options,
457  const Uint1* gen_code_string,
458  const BlastScoreBlk* sbp)
459 {
460  Uint1* query = NULL;
461  Uint1* query_nomask = NULL;
462  Uint1* subject = NULL;
463  const EBlastProgramType program_number = scoring_options->program_number;
464  const Boolean kIsOutOfFrame = scoring_options->is_ooframe;
465  const EBlastEncoding encoding = Blast_TracebackGetEncoding(program_number);
466  BlastSeqSrcGetSeqArg seq_arg;
467  Int2 status = 0;
468  int i;
469  SBlastTargetTranslation* target_t = NULL;
470 
471  if ( !hsp_list) return;
472 
473  /* Initialize the subject */
474  if (seq_src){
475  memset((void*) &seq_arg, 0, sizeof(seq_arg));
476  seq_arg.oid = hsp_list->oid;
477  seq_arg.encoding = encoding;
478  seq_arg.check_oid_exclusion = TRUE;
479  status = BlastSeqSrcGetSequence(seq_src, (void*) &seq_arg);
480  ASSERT(status == 0);
481  (void)status; /* to pacify compiler warning */
482 
483  if (program_number == eBlastTypeTblastn) {
484  subject_blk = seq_arg.seq;
486  subject_blk,
487  gen_code_string,
489  kIsOutOfFrame,
490  &target_t
491  );
492  } else {
493  subject = seq_arg.seq->sequence;
494  }
495  } else {
496  subject = subject_blk->sequence;
497  }
498 
499  for (i = 0; i < hsp_list->hspcnt; i++) {
500  BlastHSP* hsp = hsp_list->hsp_array[i];
501 
502  /* Initialize the query */
503  if (program_number == eBlastTypeBlastx && kIsOutOfFrame) {
504  Int4 context = hsp->context - hsp->context % CODON_LENGTH;
505  Int4 context_offset = query_info->contexts[context].query_offset;
506  query = query_blk->oof_sequence + CODON_LENGTH + context_offset;
507  query_nomask = query_blk->oof_sequence + CODON_LENGTH + context_offset;
508  } else {
509  query = query_blk->sequence +
510  query_info->contexts[hsp->context].query_offset;
511  query_nomask = query_blk->sequence_nomask +
512  query_info->contexts[hsp->context].query_offset;
513  }
514 
515  /* Translate subject if needed. */
516  if (program_number == eBlastTypeTblastn) {
517  const Uint1* target_sequence = Blast_HSPGetTargetTranslation(target_t, hsp, NULL);
518  status = Blast_HSPGetNumIdentitiesAndPositives(query, target_sequence, hsp, scoring_options, 0, sbp);
519  }
520  else
521  status = Blast_HSPGetNumIdentitiesAndPositives(query_nomask, subject, hsp, scoring_options, 0, sbp);
522 
523  ASSERT(status == 0);
524  }
525  target_t = BlastTargetTranslationFree(target_t);
526  if (seq_src) {
527  BlastSeqSrcReleaseSequence(seq_src, (void*) &seq_arg);
528  BlastSequenceBlkFree(seq_arg.seq);
529  }
530 }
531 
532 
533 /**
534  * A callback routine: compute lambda for the given score
535  * probabilities.
536  * (@sa calc_lambda_type).
537  */
538 static double
539 s_CalcLambda(double probs[], int min_score, int max_score, double lambda0)
540 {
541 
542  int i; /* loop index */
543  int score_range; /* range of possible scores */
544  double avg; /* expected score of aligning two characters */
545  Blast_ScoreFreq freq; /* score frequency data */
546 
547  score_range = max_score - min_score + 1;
548  avg = 0.0;
549  for (i = 0; i < score_range; i++) {
550  avg += (min_score + i) * probs[i];
551  }
552  freq.score_min = min_score;
553  freq.score_max = max_score;
554  freq.obs_min = min_score;
555  freq.obs_max = max_score;
556  freq.sprob0 = probs;
557  freq.sprob = &probs[-min_score];
558  freq.score_avg = avg;
559 
560  return Blast_KarlinLambdaNR(&freq, lambda0);
561 }
562 
563 
564 /** Fill a two-dimensional array with the frequency ratios that
565  * underlie a position specific score matrix (PSSM).
566  *
567  * @param returnRatios a two-dimensional array with BLASTAA_SIZE
568  * columns
569  * @param numPositions the number of rows in returnRatios
570  * @param query query sequence data, of length numPositions
571  * @param matrixName the name of the position independent matrix
572  * corresponding to this PSSM
573  * @param startNumerator position-specific data used to generate the
574  * PSSM
575  * @return 0 on success; -1 if the named matrix isn't known, or if
576  * there was a memory error
577  * @todo find out what start numerator is.
578  */
579 static int
580 s_GetPosBasedStartFreqRatios(double ** returnRatios,
581  Int4 numPositions,
582  Uint1 * query,
583  const char *matrixName,
584  double **startNumerator)
585 {
586  Int4 i,j; /* loop indices */
587  SFreqRatios * stdFreqRatios = NULL; /* frequency ratios for the
588  named matrix. */
589  double *standardProb; /* probabilities of each
590  letter*/
591  const double kPosEpsilon = 0.0001; /* values below this cutoff
592  are treated specially */
593 
594  stdFreqRatios = _PSIMatrixFrequencyRatiosNew(matrixName);
595  if (stdFreqRatios == NULL) {
596  return -1;
597  }
598  for (i = 0; i < numPositions; i++) {
599  for (j = 0; j < BLASTAA_SIZE; j++) {
600  returnRatios[i][j] = stdFreqRatios->data[query[i]][j];
601  }
602  }
603  stdFreqRatios = _PSIMatrixFrequencyRatiosFree(stdFreqRatios);
604 
605  standardProb = BLAST_GetStandardAaProbabilities();
606  if(standardProb == NULL) {
607  return -1;
608  }
609  /*reverse multiplication done in posit.c*/
610  for (i = 0; i < numPositions; i++) {
611  for (j = 0; j < BLASTAA_SIZE; j++) {
612  if ((standardProb[query[i]] > kPosEpsilon) &&
613  (standardProb[j] > kPosEpsilon) &&
614  (j != eStopChar) && (j != eXchar) &&
615  (startNumerator[i][j] > kPosEpsilon)) {
616  returnRatios[i][j] = startNumerator[i][j] / standardProb[j];
617  }
618  }
619  }
620  sfree(standardProb);
621 
622  return 0;
623 }
624 
625 
626 /**
627  * Fill a two-dimensional array with the frequency ratios that underlie the
628  * named score matrix.
629  *
630  * @param returnRatios a two-dimensional array of size
631  * BLASTAA_SIZE x BLASTAA_SIZE
632  * @param matrixName the name of a matrix
633  * @return 0 on success; -1 if the named matrix isn't known, or if
634  * there was a memory error
635  */
636 static int
637 s_GetStartFreqRatios(double ** returnRatios,
638  const char *matrixName)
639 {
640  /* Loop indices */
641  int i,j;
642  /* Frequency ratios for the matrix */
643  SFreqRatios * stdFreqRatios = NULL;
644 
645  stdFreqRatios = _PSIMatrixFrequencyRatiosNew(matrixName);
646  if (stdFreqRatios == NULL) {
647  return -1;
648  }
649  for (i = 0; i < BLASTAA_SIZE; i++) {
650  for (j = 0; j < BLASTAA_SIZE; j++) {
651  returnRatios[i][j] = stdFreqRatios->data[i][j];
652  }
653  }
654  stdFreqRatios = _PSIMatrixFrequencyRatiosFree(stdFreqRatios);
655 
656  return 0;
657 }
658 
659 
660 /** SCALING_FACTOR is a multiplicative factor used to get more bits of
661  * precision in the integer matrix scores. It cannot be arbitrarily
662  * large because we do not want total alignment scores to exceed
663  * -(BLAST_SCORE_MIN) */
664 #define SCALING_FACTOR 32
665 
666 
667 /**
668  * Produce a scaled-up version of the position-specific matrix
669  * with a given set of position-specific residue frequencies.
670  *
671  * @param fillPosMatrix is the matrix to be filled
672  * @param matrixName name of the standard substitution matrix [in]
673  * @param posFreqs PSSM's frequency ratios [in]
674  * @param query Query sequence data [in]
675  * @param queryLength Length of the query sequence above [in]
676  * @param sbp stores various parameters of the search
677  * @param scale_factor amount by which ungapped parameters should be
678  * scaled.
679  * @return 0 on success; -1 on failure
680  */
681 static int
682 s_ScalePosMatrix(int ** fillPosMatrix,
683  const char * matrixName,
684  double ** posFreqs,
685  Uint1 * query,
686  int queryLength,
687  BlastScoreBlk* sbp,
688  double scale_factor)
689 {
690  /* Data used by scaling routines */
691  Kappa_posSearchItems *posSearch = NULL;
692  /* A reduced collection of search parameters used by PSI-blast */
693  Kappa_compactSearchItems *compactSearch = NULL;
694  /* Representation of a PSSM internal to PSI-blast */
695  _PSIInternalPssmData* internal_pssm = NULL;
696  /* return code */
697  int status = 0;
698 
699  posSearch = Kappa_posSearchItemsNew(queryLength, matrixName,
700  fillPosMatrix, posFreqs);
701  compactSearch = Kappa_compactSearchItemsNew(query, queryLength, sbp);
702  /* Copy data into new structures */
703  internal_pssm = _PSIInternalPssmDataNew(queryLength, BLASTAA_SIZE);
704  if (posSearch == NULL || compactSearch == NULL || internal_pssm == NULL) {
705  status = -1;
706  goto cleanup;
707  }
708  _PSICopyMatrix_int(internal_pssm->pssm, posSearch->posMatrix,
709  internal_pssm->ncols, internal_pssm->nrows);
710  _PSICopyMatrix_int(internal_pssm->scaled_pssm,
711  posSearch->posPrivateMatrix,
712  internal_pssm->ncols, internal_pssm->nrows);
713  _PSICopyMatrix_double(internal_pssm->freq_ratios,
714  posSearch->posFreqs, internal_pssm->ncols,
715  internal_pssm->nrows);
716  status = _PSIConvertFreqRatiosToPSSM(internal_pssm, query, sbp,
717  compactSearch->standardProb);
718  if (status != 0) {
719  goto cleanup;
720  }
721  /* Copy data from new structures to posSearchItems */
722  _PSICopyMatrix_int(posSearch->posMatrix, internal_pssm->pssm,
723  internal_pssm->ncols, internal_pssm->nrows);
725  internal_pssm->scaled_pssm,
726  internal_pssm->ncols, internal_pssm->nrows);
727  _PSICopyMatrix_double(posSearch->posFreqs,
728  internal_pssm->freq_ratios,
729  internal_pssm->ncols, internal_pssm->nrows);
730  status = Kappa_impalaScaling(posSearch, compactSearch, (double)
731  scale_factor, FALSE, sbp);
732 cleanup:
733  internal_pssm = _PSIInternalPssmDataFree(internal_pssm);
734  posSearch = Kappa_posSearchItemsFree(posSearch);
735  compactSearch = Kappa_compactSearchItemsFree(compactSearch);
736 
737  return status;
738 }
739 
740 
741 /**
742  * Convert an array of HSPs to a list of BlastCompo_Alignment objects.
743  * The context field of each BlastCompo_Alignment is set to point to the
744  * corresponding HSP.
745  *
746  * @param self the array of alignment to be filled
747  * @param numAligns number of alignments
748  * @param hsp_array an array of HSPs
749  * @param hspcnt the length of hsp_array
750  * @param init_context the initial context to process
751  * @param queryInfo information about the concatenated query
752  * @param localScalingFactor the amount by which this search is scaled
753  *
754  * @return the new list of alignments; or NULL if there is an out-of-memory
755  * error (or if the original array is empty)
756  */
757 static int
759  int *numAligns,
760  BlastHSP * hsp_array[], Int4 hspcnt,
761  int init_context,
762  const BlastQueryInfo* queryInfo,
763  double localScalingFactor)
764 {
765  BlastCompo_Alignment * tail[6]; /* last element in aligns */
766  int hsp_index; /* loop index */
767  int frame_index;
768 
769  for (frame_index = 0; frame_index < 6; frame_index++) {
770  tail[frame_index] = NULL;
771  numAligns[frame_index] = 0;
772  }
773 
774  for (hsp_index = 0; hsp_index < hspcnt; hsp_index++) {
775  BlastHSP * hsp = hsp_array[hsp_index]; /* current HSP */
776  BlastCompo_Alignment * new_align; /* newly-created alignment */
777  frame_index = hsp->context - init_context;
778  ASSERT(frame_index < 6 && frame_index >= 0);
779  /* Incoming alignments will have coordinates of the query
780  portion relative to a particular query context; they must
781  be shifted for used in the composition_adjustment library.
782  */
783  new_align =
784  BlastCompo_AlignmentNew((int) (hsp->score * localScalingFactor),
786  hsp->query.offset, hsp->query.end, hsp->context,
787  hsp->subject.offset, hsp->subject.end,
788  hsp->subject.frame, hsp);
789  if (new_align == NULL) /* out of memory */
790  return -1;
791  if (tail[frame_index] == NULL) { /* if the list aligns is empty; */
792  /* make new_align the first element in the list */
793  self[frame_index] = new_align;
794  } else {
795  /* otherwise add new_align to the end of the list */
796  tail[frame_index]->next = new_align;
797  }
798  tail[frame_index] = new_align;
799  numAligns[frame_index]++;
800  }
801  return 0;
802 }
803 
804 
805 /**
806  * Redo a S-W alignment using an x-drop alignment. The result will
807  * usually be the same as the S-W alignment. The call to ALIGN_EX
808  * attempts to force the endpoints of the alignment to match the
809  * optimal endpoints determined by the Smith-Waterman algorithm.
810  * ALIGN_EX is used, so that if the data structures for storing BLAST
811  * alignments are changed, the code will not break
812  *
813  * @param query the query data
814  * @param queryStart start of the alignment in the query sequence
815  * @param queryEnd end of the alignment in the query sequence,
816  * as computed by the Smith-Waterman algorithm
817  * @param subject the subject (database) sequence
818  * @param matchStart start of the alignment in the subject sequence
819  * @param matchEnd end of the alignment in the query sequence,
820  * as computed by the Smith-Waterman algorithm
821  * @param gap_align parameters for a gapped alignment
822  * @param scoringParams Settings for gapped alignment.[in]
823  * @param score score computed by the Smith-Waterman algorithm
824  * @param queryAlignmentExtent length of the alignment in the query sequence,
825  * as computed by the x-drop algorithm
826  * @param matchAlignmentExtent length of the alignment in the subject
827  * sequence, as computed by the x-drop algorithm
828  * @param newScore alignment score computed by the x-drop
829  * algorithm
830  */
831 static void
833  Int4 queryStart,
834  Int4 queryEnd,
836  Int4 matchStart,
837  Int4 matchEnd,
838  BlastGapAlignStruct* gap_align,
839  const BlastScoringParameters* scoringParams,
840  Int4 score,
841  Int4 * queryAlignmentExtent,
842  Int4 * matchAlignmentExtent,
843  Int4 * newScore)
844 {
845  Int4 XdropAlignScore; /* alignment score obtained using X-dropoff
846  * method rather than Smith-Waterman */
847  Int4 doublingCount = 0; /* number of times X-dropoff had to be
848  * doubled */
849  Int4 gap_x_dropoff_orig = gap_align->gap_x_dropoff;
850 
853  do {
854  XdropAlignScore =
855  ALIGN_EX(&(query->data[queryStart]) - 1,
856  &(subject->data[matchStart]) - 1,
857  queryEnd - queryStart + 1, matchEnd - matchStart + 1,
858  queryAlignmentExtent,
859  matchAlignmentExtent, gap_align->fwd_prelim_tback,
860  gap_align, scoringParams, queryStart - 1, FALSE, FALSE,
861  NULL);
862 
863  gap_align->gap_x_dropoff *= 2;
864  doublingCount++;
865  if((XdropAlignScore < score) && (doublingCount < 3)) {
867  }
868  } while((XdropAlignScore < score) && (doublingCount < 3));
869 
870  gap_align->gap_x_dropoff = gap_x_dropoff_orig;
871  *newScore = XdropAlignScore;
872 }
873 
874 
875 /**
876  * BLAST-specific information that is associated with a
877  * BlastCompo_MatchingSequence.
878  */
879 typedef struct
881  EBlastProgramType prog_number; /**< identifies the type of blast
882  search being performed. The type
883  of search determines how sequence
884  data should be obtained. */
885  const BlastSeqSrc* seq_src; /**< BLAST sequence data source */
886  BlastSeqSrcGetSeqArg seq_arg; /**< argument to GetSequence method
887  of the BlastSeqSrc (@todo this
888  structure was designed to be
889  allocated on the stack, i.e.: in
890  Kappa_MatchingSequenceInitialize) */
892 
893 
894 /** Release the resources associated with a matching sequence. */
895 static void
897 {
898  if (self != NULL) {
899  if (self->index >=0) {
900  BlastKappa_SequenceInfo * local_data = self->local_data;
901  if (self->length > 0) {
903  &local_data->seq_arg);
904  BlastSequenceBlkFree(local_data->seq_arg.seq);
905  }
906  free(self->local_data);
907  }
908  self->local_data = NULL;
909  }
910 }
911 
912 /**
913  * Do a simple gapped extension to the right from the beginning of query and
914  * subject ranges examining only matches and mismatches. The extension stops
915  * when there are more than max_shift mismatches or mismatches or gaps are not
916  * followed by two identical matches. This is a simplified version of the
917  * Danielle and Jean Thierry-Miegs' jumper
918  * alignment implemented in NCBI Magic
919  * http://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/Download/Downloads.html
920  *
921  * @param query_seq Query sequence [in]
922  * @param query_len Query length [in]
923  * @param subject_seq Subject sequence [in]
924  * @param subject_len Subject length [in]
925  * @param max_shift Maximum number of mismatches or gaps, extension stops if
926  * this number is reached [in]
927  * @param query_ext_len Extension length on the query [out]
928  * @param subject_ext_len Extension length on the subject [out]
929  * @param align_len Alignment length [out]
930  * @return Number of identical residues
931  */
932 static int s_ExtendRight(Uint1* query_seq, int query_len,
933  Uint1* subject_seq, int subject_len,
934  int max_shift,
935  int* query_ext_len, int* subject_ext_len,
936  int* align_len)
937 {
938  int num_identical = 0;
939  int q_pos, s_pos;
940  int gaps_in_query = 0;
941  int gaps_in_subject = 0;
942  q_pos = 0;
943  s_pos = 0;
944  while (q_pos < query_len && s_pos < subject_len) {
945  int n;
946  int match = 0;
947 
948  while (q_pos < query_len && s_pos < subject_len
949  && query_seq[q_pos] == subject_seq[s_pos]) {
950 
951  num_identical++;
952  q_pos++;
953  s_pos++;
954  }
955 
956  /* try to skip mismatches or gaps */
957  for (n=1; n < max_shift && q_pos + n + 1 < query_len
958  && s_pos + n + 1 < subject_len && !match; n++) {
959 
960  /* mismatches */
961  if (query_seq[q_pos + n] == subject_seq[s_pos + n]
962  && query_seq[q_pos + n + 1] == subject_seq[s_pos + n + 1]) {
963 
964  /* we have already checked that two positions behind mismatches
965  match so we can advance further */
966  q_pos += n + 2;
967  s_pos += n + 2;
968  num_identical += 2;
969  match = 1;
970  }
971 
972  /* gap in subject */
973  if (!match && query_seq[q_pos + n] == subject_seq[s_pos]
974  && query_seq[q_pos + n + 1] == subject_seq[s_pos + 1]) {
975 
976  q_pos += n + 2;
977  s_pos += 2;
978  num_identical += 2;
979  gaps_in_subject += n;
980  match = 1;
981  }
982 
983  /* gap in query */
984  if (!match && query_seq[q_pos] == subject_seq[s_pos + n]
985  && query_seq[q_pos + 1] == subject_seq[s_pos + n + 1]) {
986 
987  q_pos += 2;
988  s_pos += n + 2;
989  num_identical += 2;
990  gaps_in_query += n;
991  match = 1;
992  }
993  }
994 
995  if (match) {
996  continue;
997  }
998 
999  /* exit the loop */
1000  break;
1001  }
1002  *query_ext_len = q_pos;
1003  *subject_ext_len = s_pos;
1004  *align_len = q_pos > s_pos ? q_pos + gaps_in_query : s_pos + gaps_in_subject;
1005 
1006  return num_identical;
1007 }
1008 
1009 
1010 /**
1011  * Extend left from the end of the sequence and subject ranges and count
1012  * identities. The extension stops when there are more than max_shift
1013  * mismatches or mismatches or gaps are not followed by two identical matches.
1014  * See description for s_ExtendRight for more details.
1015  *
1016  * @param query_seq Query sequence [in]
1017  * @param query_len Query length [in]
1018  * @param subject_seq Subject sequence [in]
1019  * @param subject_len Subject length [in]
1020  * @param max_shift Maximum number of mismatches or gaps, extension stops if
1021  * this number is reached [in]
1022  * @param query_ext_len Extension length on the query [out]
1023  * @param subject_ext_len Extension length on the subject [out]
1024  * @param align_len Alignment length [out]
1025  * @return Number of identical residues
1026  */
1027 static int s_ExtendLeft(Uint1* query_seq, int query_len,
1028  Uint1* subject_seq, int subject_len,
1029  int max_shift,
1030  int* query_ext_len, int* subject_ext_len,
1031  int* align_len)
1032 {
1033  int q_pos = query_len - 1;
1034  int s_pos = subject_len - 1;
1035  int num_identical = 0;
1036  int gaps_in_query = 0;
1037  int gaps_in_subject = 0;
1038  while (q_pos >= 0 && s_pos >= 0) {
1039  int n;
1040  int match = 0;
1041 
1042  /* process identies */
1043  while (q_pos > 0 && s_pos > 0 && query_seq[q_pos] == subject_seq[s_pos]) {
1044  num_identical++;
1045  q_pos--;
1046  s_pos--;
1047  }
1048 
1049  /* try to skip mismatches or gaps */
1050  for (n=1;n < max_shift && q_pos - n - 1 > 0 && s_pos - n - 1 > 0
1051  && !match; n++) {
1052 
1053  /* mismatch */
1054  if (query_seq[q_pos - n] == subject_seq[s_pos - n]
1055  && query_seq[q_pos - n - 1] == subject_seq[s_pos - n - 1]) {
1056  q_pos -= n + 2;
1057  s_pos -= n + 2;
1058  num_identical += 2;
1059  match = 1;
1060  }
1061 
1062  /* gap in subject */
1063  if (!match && query_seq[q_pos - n] == subject_seq[s_pos]
1064  && query_seq[q_pos - n - 1] == subject_seq[s_pos - 1]) {
1065  q_pos -= n + 2;
1066  s_pos -= 2;
1067  num_identical += 2;
1068  gaps_in_subject += n;
1069  match = 1;
1070  }
1071 
1072  /* gap in query */
1073  if (!match && query_seq[q_pos] == subject_seq[s_pos - n]
1074  && query_seq[q_pos - 1] == subject_seq[s_pos - n - 1]) {
1075  q_pos -= 2;
1076  s_pos -= n + 2;
1077  num_identical += 2;
1078  gaps_in_query += n;
1079  match = 1;
1080  }
1081  }
1082 
1083  if (match) {
1084  continue;
1085  }
1086 
1087  break;
1088  }
1089  *query_ext_len = query_len - q_pos - 1;
1090  *subject_ext_len = subject_len - s_pos - 1;
1091  *align_len += *query_ext_len > *subject_ext_len ?
1092  *query_ext_len + gaps_in_query : *subject_ext_len + gaps_in_subject;
1093 
1094  return num_identical;
1095 }
1096 
1097 
1098 /**
1099  * Get hash for a word of word_size residues assuming 28-letter alphabet
1100  *
1101  * @param data Sequence [in]
1102  * @param word_size Word size [in]
1103  * @return Hash value
1104  */
1105 static Uint8 s_GetHash(const Uint1* data, int word_size)
1106 {
1107  Uint8 hash = 0;
1108  int k;
1109  for (k=0;k < word_size;k++) {
1110  hash <<= 5;
1111  hash += (Int8)data[k];
1112  }
1113  return hash;
1114 }
1115 
1116 
1117 /**
1118  * Find a local number of identical residues in two aligned sequences by
1119  * finding word matches and doing a simple gapped extensions from the word hits
1120  *
1121  * @param query_seq Query sequence [in]
1122  * @param query_hashes Array of query words with index of each word
1123  * corresponding to word position in the query [in]
1124  * @param query_len Query length [in]
1125  * @param subject_seq Subject sequence [in]
1126  * @param subject_len Subject length [in]
1127  * @param max_shift Maximum number of local mismatches or gaps for extensions
1128  * [in]
1129  * @return Number of identical residues
1130  */
1131 static int s_FindNumIdentical(Uint1* query_seq,
1132  const Uint8* query_hashes,
1133  int query_len,
1134  Uint1* subject_seq,
1135  int subject_len,
1136  int max_shift)
1137 {
1138  int word_size = 8; /* word size for k-mer matching */
1139  Uint8 hash = 0;
1140  Uint8 mask = NCBI_CONST_UINT8(0xFFFFFFFFFF); /* mask for computing hash
1141  values */
1142  int query_from = 0;
1143  int subject_from = 0;
1144 
1145  int s_pos; /* position in the subject sequence */
1146  int num_identical = 0; /* number of identical residues found */
1147  Boolean match = FALSE;
1148 
1149  /* if query or subject length is smaller than word size, exit */
1150  if (!query_seq || !query_hashes || !subject_seq
1151  || query_len < word_size || subject_len < word_size) {
1152 
1153  return 0;
1154  }
1155 
1156  /* for each subject position */
1157  for (s_pos = 0; s_pos < subject_len - word_size; s_pos++) {
1158  int q_pos;
1159 
1160  /* find word hash */
1161  if (s_pos == 0 || match) {
1162  hash = s_GetHash(&subject_seq[s_pos], word_size);
1163  }
1164  else {
1165  hash <<= 5;
1166  hash &= mask;
1167  hash += subject_seq[s_pos + word_size - 1];
1168  }
1169 
1170  /* find matching query word; index of hash is position of the word
1171  the query */
1172  for (q_pos = query_from;q_pos < query_len - word_size; q_pos++) {
1173  if (query_hashes[q_pos] == hash) {
1174  break;
1175  }
1176  }
1177 
1178  /* if match */
1179  if (q_pos < query_len - word_size) {
1180  int query_start = q_pos;
1181  int subject_start = s_pos;
1182 
1183  int query_left_len, query_right_len;
1184  int subject_left_len, subject_right_len;
1185  int align_len_left=0, align_len_right=0;
1186 
1187  match = TRUE;
1188  num_identical += word_size;
1189 
1190  /* extend left from word match */
1191  num_identical += s_ExtendLeft(query_seq + query_start - 1,
1192  query_start - query_from,
1193  subject_seq + subject_start - 1,
1194  subject_start - subject_from,
1195  max_shift,
1196  &query_left_len, &subject_left_len,
1197  &align_len_left);
1198 
1199  /* extend right from word match */
1200  num_identical += s_ExtendRight(query_seq + query_start + word_size,
1201  query_len - query_start - word_size,
1202  subject_seq + subject_start + word_size,
1203  subject_len - subject_start - word_size,
1204  max_shift,
1205  &query_right_len, &subject_right_len,
1206  &align_len_right);
1207 
1208 
1209  /* disregard already matched and extended words when matching
1210  further positions */
1211 
1212  query_from = query_start + word_size + query_right_len;
1213  subject_from = subject_start + word_size + subject_right_len;
1214  /* s_pos will be incremented in the loop */
1215  s_pos = subject_from - 1;
1216  }
1217  else {
1218  match = FALSE;
1219  }
1220  }
1221 
1222  return num_identical;
1223 }
1224 
1225 /**
1226  * Test whether the aligned parts of two sequences that
1227  * have a high-scoring gapless alignment are nearly identical.
1228  *
1229  * First extend from the left end of the query and subject ranges and stop if
1230  * there are too manu mismatches. Then extend from the right end. Then for the
1231  * remaining protion of ths sequences find matching words and extend left and
1232  * right from the word hit. Repeat the last steo until the whole alignment
1233  * ranges are processed.
1234  *
1235  * @params seqData Subject sequence [in]
1236  * @params seqOffse Starting offset of the subject sequence in alignment data
1237  * [in]
1238  * @params queryData Query sequence [in]
1239  * @params queryOffset Starting offset of the query sequence in alignment data
1240  * [in]
1241  * @param query_words Array of query words with word index corresponding to
1242  * word's position in the query [in]
1243  * @param align Alignment data [in]
1244  * @return True if sequence parts are nearly identical, false otherwise
1245  */
1246 static Boolean
1248  const int seqOffset,
1249  const BlastCompo_SequenceData* queryData,
1250  const int queryOffset,
1251  const Uint8* query_words,
1252  const BlastCompo_Alignment* align)
1253 {
1254  int qStart = align->queryStart - queryOffset;
1255  /* align->queryEnd points to one position past alignment end */
1256  int qEnd = align->queryEnd - queryOffset - 1;
1257  int sStart = align->matchStart - seqOffset;
1258  int sEnd = align->matchEnd - seqOffset - 1;
1259  const double kMinFractionNearIdentical = 0.96;
1260  int max_shift = 8;
1261 
1262  int query_len = qEnd - qStart + 1;
1263  int subject_len = sEnd - sStart + 1;
1264  int align_len = MIN(query_len, subject_len);
1265 
1266  int query_left_len = 0;
1267  int subject_left_len = 0;
1268  int query_right_len = 0;
1269  int subject_right_len = 0;
1270  int align_left_len = 0;
1271  int align_right_len = 0;
1272 
1273  double fraction_identical;
1274 
1275  /* first find number of identies going from the beginning of the query
1276  and subject ranges */
1277  int num_identical = s_ExtendRight(queryData->data + qStart, query_len,
1278  seqData->data + sStart, subject_len,
1279  max_shift,
1280  &query_right_len, &subject_right_len,
1281  &align_right_len);
1282 
1283  /* if the whole query range was processed return near identical status */
1284  if (query_right_len >= query_len || subject_right_len >= subject_len) {
1285  fraction_identical = (double)num_identical / (double)align_len;
1286  ASSERT(fraction_identical - 1.0 < 1e-10);
1287  return fraction_identical > kMinFractionNearIdentical;
1288  }
1289 
1290  /* find the number of identies going from the end of the query and subject
1291  ranges */
1292  num_identical += s_ExtendLeft(queryData->data + qStart + query_right_len,
1293  query_len - query_right_len,
1294  seqData->data + sStart + subject_right_len,
1295  subject_len - subject_right_len,
1296  max_shift,
1297  &query_left_len, &subject_left_len,
1298  &align_left_len);
1299 
1300  /* if the whole alignment ranges where covered, return the near identical
1301  status */
1302  if (query_left_len + query_right_len >= query_len
1303  || subject_left_len + subject_right_len >= subject_len) {
1304 
1305  fraction_identical = (double)num_identical / (double)(align_len);
1306  ASSERT(fraction_identical - 1.0 < 1e-10);
1307  return fraction_identical > kMinFractionNearIdentical;
1308  }
1309 
1310  /* find the number of identical matches in the middle portion of the
1311  alignment ranges */
1312  num_identical += s_FindNumIdentical(queryData->data + qStart + query_right_len,
1313  query_words + qStart + query_right_len,
1314  query_len - query_left_len - query_right_len,
1315  seqData->data + sStart + subject_right_len,
1316  subject_len - subject_left_len - subject_right_len,
1317  max_shift);
1318 
1319  fraction_identical = (double)num_identical / (double)align_len;
1320  ASSERT(fraction_identical - 1.0 < 1e-10);
1321  if (fraction_identical > kMinFractionNearIdentical) {
1322  return TRUE;
1323  }
1324  else {
1325  return FALSE;
1326  }
1327 }
1328 
1329 
1330 /**
1331  * Initialize a new matching sequence, obtaining information about the
1332  * sequence from the search.
1333  *
1334  * @param self object to be initialized
1335  * @param seqSrc A pointer to a source from which sequence data
1336  * may be obtained
1337  * @param program_number identifies the type of blast search being
1338  * performed.
1339  * @param default_db_genetic_code default genetic code to use when
1340  * subject sequences are translated and there is
1341  * no other guidance on what code to use
1342  * @param subject_index index of the matching sequence in the database
1343  */
1344 static int
1346  EBlastProgramType program_number,
1347  const BlastSeqSrc* seqSrc,
1348  Int4 default_db_genetic_code,
1349  Int4 subject_index)
1350 {
1351  BlastKappa_SequenceInfo * seq_info; /* BLAST-specific sequence
1352  information */
1353  self->length = 0;
1354  self->local_data = NULL;
1355 
1356  seq_info = malloc(sizeof(BlastKappa_SequenceInfo));
1357  if (seq_info != NULL) {
1358  self->local_data = seq_info;
1359 
1360  seq_info->seq_src = seqSrc;
1361  seq_info->prog_number = program_number;
1362 
1363  memset((void*) &seq_info->seq_arg, 0, sizeof(seq_info->seq_arg));
1364  seq_info->seq_arg.oid = self->index = subject_index;
1365  seq_info->seq_arg.check_oid_exclusion = TRUE;
1366 
1367  if( program_number == eBlastTypeTblastn ) {
1369  } else {
1371  }
1372  if (BlastSeqSrcGetSequence(seqSrc, &seq_info->seq_arg) >= 0) {
1373  self->length =
1374  BlastSeqSrcGetSeqLen(seqSrc, (void*) &seq_info->seq_arg);
1375 
1376  /* If the subject is translated and the BlastSeqSrc implementation
1377  * doesn't provide a genetic code string, use the default genetic
1378  * code for all subjects (as in the C toolkit) */
1379  if (Blast_SubjectIsTranslated(program_number) &&
1380  seq_info->seq_arg.seq->gen_code_string == NULL) {
1381  seq_info->seq_arg.seq->gen_code_string =
1382  GenCodeSingletonFind(default_db_genetic_code);
1383  ASSERT(seq_info->seq_arg.seq->gen_code_string);
1384  }
1385  } else {
1386  self->length = 0;
1387  }
1388  }
1389  if (self->length == 0) {
1390  /* Could not obtain the required data */
1392  return -1;
1393  } else {
1394  return 0;
1395  }
1396 }
1397 
1398 
1399 /** NCBIstdaa encoding for 'X' character */
1400 #define BLASTP_MASK_RESIDUE 21
1401 /** Default instructions and mask residue for SEG filtering */
1402 #define BLASTP_MASK_INSTRUCTIONS "S 10 1.8 2.1"
1403 
1404 
1405 /**
1406  * Filter low complexity regions from the sequence data; uses the SEG
1407  * algorithm.
1408  *
1409  * @param seqData data to be filtered
1410  * @param program_name type of search being performed
1411  * @return 0 for success; -1 for out-of-memory
1412  */
1413 static int
1415  EBlastProgramType program_name,
1416  Boolean* is_seq_biased)
1417 {
1418  int status = 0;
1419  BlastSeqLoc* mask_seqloc = NULL;
1420  SBlastFilterOptions* filter_options = NULL;
1421 
1422  status = BlastFilteringOptionsFromString(program_name,
1424  &filter_options, NULL);
1425  if (status == 0) {
1426  status = BlastSetUp_Filter(program_name, seqData->data,
1427  seqData->length, 0, filter_options,
1428  &mask_seqloc, NULL);
1429  filter_options = SBlastFilterOptionsFree(filter_options);
1430  }
1431  if (is_seq_biased) {
1432  *is_seq_biased = (mask_seqloc != NULL);
1433  }
1434  if (status == 0) {
1435  Blast_MaskTheResidues(seqData->data, seqData->length,
1436  FALSE, mask_seqloc, FALSE, 0);
1437  }
1438  if (mask_seqloc != NULL) {
1439  mask_seqloc = BlastSeqLocFree(mask_seqloc);
1440  }
1441  return status;
1442 }
1443 
1444 
1445 /**
1446  * Obtain a string of translated data
1447  *
1448  * @param self the sequence from which to obtain the data [in]
1449  * @param range the range and translation frame to get [in]
1450  * @param seqData the resulting data [out]
1451  * @param queryData the query sequence [in]
1452  * @param queryOffset offset for align if there are multiple queries
1453  * @param align information about the alignment between query and subject
1454  * @param shouldTestIdentical did alignment pass a preliminary test in
1455  * redo_alignment.c that indicates the sequence
1456  * pieces may be near identical
1457  *
1458  * @return 0 on success; -1 on failure
1459  */
1460 static int
1462  const BlastCompo_SequenceRange * range,
1463  BlastCompo_SequenceData * seqData,
1464  const BlastCompo_SequenceRange * q_range,
1465  BlastCompo_SequenceData * queryData,
1466  const Uint8* query_words,
1467  const BlastCompo_Alignment *align,
1468  const Boolean shouldTestIdentical,
1469  const ECompoAdjustModes compo_adjust_mode,
1470  const Boolean isSmithWaterman,
1471  Boolean* subject_maybe_biased)
1472 {
1473  int status = 0;
1474  BlastKappa_SequenceInfo * local_data; /* BLAST-specific
1475  information associated
1476  with the sequence */
1477  Uint1 * translation_buffer; /* a buffer for the translated,
1478  amino-acid sequence */
1479  Int4 translated_length; /* length of the translated sequence */
1480  int translation_frame; /* frame in which to translate */
1481  Uint1 * na_sequence; /* the nucleotide sequence */
1482  int translation_start; /* location in na_sequence to start
1483  translating */
1484  int num_nucleotides; /* the number of nucleotides to be translated */
1485 
1486  local_data = self->local_data;
1487  na_sequence = local_data->seq_arg.seq->sequence_start;
1488 
1489  /* Initialize seqData to nil, in case this routine fails */
1490  seqData->buffer = NULL;
1491  seqData->data = NULL;
1492  seqData->length = 0;
1493 
1494  translation_frame = range->context;
1495  if (translation_frame > 0) {
1496  translation_start = 3 * range->begin;
1497  } else {
1498  translation_start =
1499  self->length - 3 * range->end + translation_frame + 1;
1500  }
1501  num_nucleotides =
1502  3 * (range->end - range->begin) + ABS(translation_frame) - 1;
1503 
1504  status = Blast_GetPartialTranslation(na_sequence + translation_start,
1505  num_nucleotides,
1506  (Int2) translation_frame,
1507  local_data->seq_arg.seq->gen_code_string,
1508  &translation_buffer,
1509  &translated_length,
1510  NULL);
1511  if (status == 0) {
1512  seqData->buffer = translation_buffer;
1513  seqData->data = translation_buffer + 1;
1514  seqData->length = translated_length;
1515 
1516  if ( !(KAPPA_TBLASTN_NO_SEG_SEQUENCE) ) {
1517  if (compo_adjust_mode
1518  && (!subject_maybe_biased || *subject_maybe_biased)) {
1519 
1520  if ( (!shouldTestIdentical)
1521  || (shouldTestIdentical
1522  && (!s_TestNearIdentical(seqData, range->begin,
1523  queryData, q_range->begin,
1524  query_words, align)))) {
1525 
1526  status = s_DoSegSequenceData(seqData, eBlastTypeTblastn,
1527  subject_maybe_biased);
1528  if (status != 0) {
1529  free(seqData->buffer);
1530  seqData->buffer = NULL;
1531  seqData->data = NULL;
1532  seqData->length = 0;
1533  }
1534  }
1535  }
1536  }
1537  }
1538  return status;
1539 }
1540 
1541 
1542 /**
1543  * Get a string of protein data from a protein sequence.
1544  *
1545  * @param self a protein sequence [in]
1546  * @param range the range to get [in]
1547  * @param seqData the resulting data [out]
1548  * @param queryData the query sequence [in]
1549  * @param queryOffset offset for align if there are multiple queries
1550  * @param align information about the alignment
1551  * between query and subject [in]
1552  * @param shouldTestIdentical did alignment pass a preliminary test in
1553  * redo_alignment.c that indicates the sequence
1554  * pieces may be near identical [in]
1555  *
1556  * @return 0 on success; -1 on failure
1557  */
1558 static int
1560  const BlastCompo_SequenceRange * range,
1561  BlastCompo_SequenceData * seqData,
1562  const BlastCompo_SequenceRange * q_range,
1563  BlastCompo_SequenceData * queryData,
1564  const Uint8* query_words,
1565  const BlastCompo_Alignment *align,
1566  const Boolean shouldTestIdentical,
1567  const ECompoAdjustModes compo_adjust_mode,
1568  const Boolean isSmithWaterman,
1569  Boolean* subject_maybe_biased)
1570 
1571 {
1572  int status = 0; /* return status */
1573  Int4 idx; /* loop index */
1574  Uint1 *origData; /* the unfiltered data for the sequence */
1575  /* BLAST-specific sequence information */
1576  BlastKappa_SequenceInfo * local_data = self->local_data;
1577  BLAST_SequenceBlk * seq = self->local_data;
1578 
1579  if (self->local_data == NULL)
1580  return -1;
1581 
1582  seqData->data = NULL;
1583  seqData->length = 0;
1584  /* Copy the entire sequence (necessary for SEG filtering.) */
1585  seqData->buffer = calloc((self->length + 2), sizeof(Uint1));
1586  if (seqData->buffer == NULL) {
1587  return -1;
1588  }
1589  /* First and last characters of the buffer MUST be '\0', which is
1590  * true here because the buffer was allocated using calloc. */
1591  seqData->data = seqData->buffer + 1;
1592  seqData->length = self->length;
1593 
1594  origData = (self->index >= 0) ? local_data->seq_arg.seq->sequence
1595  : seq->sequence;
1596  if((self->index < 0) && (align->frame != 0)) {
1597  int i=0, offsets =0;
1598  int f = GET_SEQ_FRAME(align->frame);
1599  int nucl_length = GET_NUCL_LENGTH(self->length);
1600  seqData->length = GET_TRANSLATED_LENGTH(nucl_length, f);
1601  for(; i < f; i++) {
1602  offsets = GET_TRANSLATED_LENGTH(nucl_length, i) +1;
1603  origData += offsets;
1604  }
1605  }
1606  /* Copy the sequence data */
1607  for (idx = 0; idx < seqData->length; idx++) {
1608  seqData->data[idx] = origData[idx];
1609  }
1610 
1611  if ( !(KAPPA_BLASTP_NO_SEG_SEQUENCE) ) {
1612  if (compo_adjust_mode
1613  && (!subject_maybe_biased || *subject_maybe_biased)) {
1614 
1615  if ( (!shouldTestIdentical)
1616  || (shouldTestIdentical
1617  && (!s_TestNearIdentical(seqData, 0, queryData,
1618  q_range->begin, query_words,
1619  align)))) {
1620 
1621  status = s_DoSegSequenceData(seqData, eBlastTypeBlastp,
1622  subject_maybe_biased);
1623  }
1624  }
1625  }
1626  /* Fit the data to the range. */
1627  seqData ->data = &seqData->data[range->begin - 1];
1628  *seqData->data++ = '\0';
1629  seqData ->length = range->end - range->begin;
1630 
1631  if (status != 0) {
1632  free(seqData->buffer);
1633  seqData->buffer = NULL;
1634  seqData->data = NULL;
1635  }
1636  return status;
1637 }
1638 
1639 
1640 /**
1641  * Obtain the sequence data that lies within the given range.
1642  *
1643  * @param self sequence information [in]
1644  * @param range range specifying the range of data [in]
1645  * @param seqData the sequence data obtained [out]
1646  * @param seqData the resulting data [out]
1647  * @param queryData the query sequence [in]
1648  * @param queryOffset offset for align if there are multiple queries
1649  * @param align information about the alignment between query and subject
1650  * @param shouldTestIdentical did alignment pass a preliminary test in
1651  * redo_alignment.c that indicates the sequence
1652  * pieces may be near identical
1653  *
1654  * @return 0 on success; -1 on failure
1655  */
1656 static int
1658  const BlastCompo_SequenceRange * s_range,
1659  BlastCompo_SequenceData * seqData,
1661  const BlastCompo_SequenceRange * q_range,
1662  BlastCompo_SequenceData * queryData,
1663  const Uint8* query_words,
1664  const BlastCompo_Alignment *align,
1665  const Boolean shouldTestIdentical,
1666  const ECompoAdjustModes compo_adjust_mode,
1667  const Boolean isSmithWaterman,
1668  Boolean* subject_maybe_biased)
1669 {
1670  Int4 idx;
1671  BlastKappa_SequenceInfo * seq_info = self->local_data;
1672  Uint1 *origData = query->data + q_range->begin;
1673  /* Copy the query sequence (necessary for SEG filtering.) */
1674  queryData->length = q_range->end - q_range->begin;
1675  queryData->buffer = calloc((queryData->length + 2), sizeof(Uint1));
1676  queryData->data = queryData->buffer + 1;
1677 
1678  for (idx = 0; idx < queryData->length; idx++) {
1679  /* Copy the sequence data, replacing occurrences of amino acid
1680  * number 24 (Selenocysteine) with number 3 (Cysteine). */
1681  queryData->data[idx] = (origData[idx] != 24) ? origData[idx] : 3;
1682  }
1683  if (seq_info && seq_info->prog_number == eBlastTypeTblastn) {
1684  /* The sequence must be translated. */
1685  return s_SequenceGetTranslatedRange(self, s_range, seqData,
1686  q_range, queryData, query_words,
1687  align, shouldTestIdentical,
1688  compo_adjust_mode, isSmithWaterman,
1689  subject_maybe_biased);
1690  } else {
1691  return s_SequenceGetProteinRange(self, s_range, seqData,
1692  q_range, queryData, query_words,
1693  align, shouldTestIdentical,
1694  compo_adjust_mode, isSmithWaterman,
1695  subject_maybe_biased);
1696  }
1697 }
1698 
1699 
1700 /** Data and data-structures needed to perform a gapped alignment */
1702  const BlastScoringParameters*
1703  scoringParams; /**< scoring parameters for a
1704  gapped alignment */
1705  BlastGapAlignStruct * gap_align; /**< additional parameters for a
1706  gapped alignment */
1707  BlastScoreBlk* sbp; /**< the score block for this search */
1708  double localScalingFactor; /**< the amount by which this
1709  search has been scaled */
1710  EBlastProgramType prog_number; /**< the type of search being
1711  performed */
1713 
1714 
1715 /**
1716  * Reads a BlastGapAlignStruct that has been used to compute a
1717  * traceback, and return a BlastCompo_Alignment representing the
1718  * alignment. The BlastGapAlignStruct is in coordinates local to the
1719  * ranges being aligned; the resulting alignment is in coordinates w.r.t.
1720  * the whole query and subject.
1721  *
1722  * @param gap_align the BlastGapAlignStruct
1723  * @param *edit_script the edit script from the alignment; on exit
1724  * NULL. The edit_script is usually
1725  * gap_align->edit_script, but we don't want
1726  * an implicit side effect on the gap_align.
1727  * @param query_range the range of the query used in this alignment
1728  * @param subject_range the range of the subject used in this alignment
1729  * @param matrix_adjust_rule the rule used to compute the scoring matrix
1730  *
1731  * @return the new alignment on success or NULL on error
1732  */
1733 static BlastCompo_Alignment *
1735  GapEditScript ** edit_script,
1736  BlastCompo_SequenceRange * query_range,
1737  BlastCompo_SequenceRange * subject_range,
1738  EMatrixAdjustRule matrix_adjust_rule)
1739 {
1740  /* parameters to BlastCompo_AlignmentNew */
1741  int queryStart, queryEnd, queryIndex, matchStart, matchEnd, frame;
1742  BlastCompo_Alignment * obj; /* the new alignment */
1743 
1744  /* In the composition_adjustment library, the query start/end are
1745  indices into the concatenated query, and so must be shifted. */
1746  queryStart = gap_align->query_start + query_range->begin;
1747  queryEnd = gap_align->query_stop + query_range->begin;
1748  queryIndex = query_range->context;
1749  matchStart = gap_align->subject_start + subject_range->begin;
1750  matchEnd = gap_align->subject_stop + subject_range->begin;
1751  frame = subject_range->context;
1752 
1753  obj = BlastCompo_AlignmentNew(gap_align->score, matrix_adjust_rule,
1754  queryStart, queryEnd, queryIndex,
1755  matchStart, matchEnd, frame,
1756  *edit_script);
1757  if (obj != NULL) {
1758  *edit_script = NULL;
1759  }
1760  return obj;
1761 }
1762 
1763 
1764 /** A callback used when performing SmithWaterman alignments:
1765  * Calculate the traceback for one alignment by performing an x-drop
1766  * alignment in the forward direction, possibly increasing the x-drop
1767  * parameter until the desired score is attained.
1768  *
1769  * The start, end and score of the alignment should be obtained
1770  * using the Smith-Waterman algorithm before this routine is called.
1771  *
1772  * @param *pnewAlign the new alignment
1773  * @param *pqueryEnd on entry, the end of the alignment in the
1774  * query, as computed by the Smith-Waterman
1775  * algorithm. On exit, the end as computed by
1776  * the x-drop algorithm
1777  * @param *pmatchEnd like as *pqueryEnd, but for the subject
1778  * sequence
1779  * @param queryStart the starting point in the query
1780  * @param matchStart the starting point in the subject
1781  * @param score the score of the alignment, as computed by
1782  * the Smith-Waterman algorithm
1783  * @param query query sequence data
1784  * @param query_range range of this query in the concatenated
1785  * query
1786  * @param ccat_query_length total length of the concatenated query
1787  * @param subject subject sequence data
1788  * @param subject_range range of subject_data in the translated
1789  * query, in amino acid coordinates
1790  * @param full_subject_length length of the full subject sequence
1791  * @param gapping_params parameters used to compute gapped
1792  * alignments
1793  * @param matrix_adjust_rule the rule used to compute the scoring matrix
1794  *
1795  * @returns 0 (posts a fatal error if it fails)
1796  * @sa new_xdrop_align_type
1797  */
1798 static int
1800  Int4 * pqueryEnd, Int4 *pmatchEnd,
1801  Int4 queryStart, Int4 matchStart, Int4 score,
1803  BlastCompo_SequenceRange * query_range,
1804  Int4 ccat_query_length,
1806  BlastCompo_SequenceRange * subject_range,
1807  Int4 full_subject_length,
1808  BlastCompo_GappingParams * gapping_params,
1809  EMatrixAdjustRule matrix_adjust_rule)
1810 {
1811  Int4 newScore;
1812  /* Extent of the alignment as computed by an x-drop alignment
1813  * (usually the same as (queryEnd - queryStart) and (matchEnd -
1814  * matchStart)) */
1815  Int4 queryExtent, matchExtent;
1816  BlastCompo_Alignment * obj = NULL; /* the new object */
1817  /* BLAST-specific parameters needed compute an X-drop alignment */
1818  BlastKappa_GappingParamsContext * context = gapping_params->context;
1819  /* Auxiliarly structure for computing gapped alignments */
1820  BlastGapAlignStruct * gap_align = context->gap_align;
1821  /* Scoring parameters for gapped alignments */
1822  const BlastScoringParameters* scoringParams = context->scoringParams;
1823  /* A structure containing the traceback of a gapped alignment */
1824  GapEditScript* editScript = NULL;
1825 
1826  /* suppress unused parameter warnings; this is a callback
1827  function, so these parameter cannot be deleted */
1828  (void) ccat_query_length;
1829  (void) full_subject_length;
1830 
1831  gap_align->gap_x_dropoff = gapping_params->x_dropoff;
1832 
1833  s_SWFindFinalEndsUsingXdrop(query, queryStart, *pqueryEnd,
1834  subject, matchStart, *pmatchEnd,
1835  gap_align, scoringParams,
1836  score, &queryExtent, &matchExtent,
1837  &newScore);
1838  *pqueryEnd = queryStart + queryExtent;
1839  *pmatchEnd = matchStart + matchExtent;
1840 
1841  editScript =
1843  gap_align->fwd_prelim_tback);
1844  if (editScript != NULL) {
1845  /* Shifted values of the endpoints */
1846  Int4 aqueryStart = queryStart + query_range->begin;
1847  Int4 aqueryEnd = *pqueryEnd + query_range->begin;
1848  Int4 amatchStart = matchStart + subject_range->begin;
1849  Int4 amatchEnd = *pmatchEnd + subject_range->begin;
1850 
1851  obj = BlastCompo_AlignmentNew(newScore, matrix_adjust_rule,
1852  aqueryStart, aqueryEnd,
1853  query_range->context,
1854  amatchStart, amatchEnd,
1855  subject_range->context, editScript);
1856  if (obj == NULL) {
1857  GapEditScriptDelete(editScript);
1858  }
1859  }
1860  *pnewAlign = obj;
1861 
1862  return obj != NULL ? 0 : -1;
1863 }
1864 
1865 
1866 /**
1867  * A callback: calculate the traceback for one alignment by
1868  * performing an x-drop alignment in both directions
1869  *
1870  * @param in_align the existing alignment, without traceback
1871  * @param matrix_adjust_rule the rule used to compute the scoring matrix
1872  * @param query_data query sequence data
1873  * @param query_range range of this query in the concatenated
1874  * query
1875  * @param ccat_query_length total length of the concatenated query
1876  * @param subject_data subject sequence data
1877  * @param subject_range range of subject_data in the translated
1878  * query, in amino acid coordinates
1879  * @param full_subject_length length of the full subject sequence
1880  * @param gapping_params parameters used to compute gapped
1881  * alignments
1882  * @sa redo_one_alignment_type
1883  */
1884 static BlastCompo_Alignment *
1886  EMatrixAdjustRule matrix_adjust_rule,
1887  BlastCompo_SequenceData * query_data,
1888  BlastCompo_SequenceRange * query_range,
1889  int ccat_query_length,
1890  BlastCompo_SequenceData * subject_data,
1891  BlastCompo_SequenceRange * subject_range,
1892  int full_subject_length,
1893  BlastCompo_GappingParams * gapping_params)
1894 {
1895  int status; /* return code */
1896  Int4 q_start, s_start; /* starting point in query and subject */
1897  /* BLAST-specific parameters needed to compute a gapped alignment */
1898  BlastKappa_GappingParamsContext * context = gapping_params->context;
1899  /* Auxiliary structure for computing gapped alignments */
1900  BlastGapAlignStruct* gapAlign = context->gap_align;
1901  /* The preliminary gapped HSP that were are recomputing */
1902  BlastHSP * hsp = in_align->context;
1903  Boolean fence_hit = FALSE;
1904 
1905  /* suppress unused parameter warnings; this is a callback
1906  function, so these parameter cannot be deleted */
1907  (void) ccat_query_length;
1908  (void) full_subject_length;
1909 
1910  /* Use the starting point supplied by the HSP. */
1911  q_start = hsp->query.gapped_start - query_range->begin;
1912  s_start = hsp->subject.gapped_start - subject_range->begin;
1913 
1914  gapAlign->gap_x_dropoff = gapping_params->x_dropoff;
1915 
1916  /*
1917  * Previously, last argument was NULL which could cause problems for
1918  * tblastn.
1919  */
1920  status =
1922  query_data->data,
1923  subject_data->data, gapAlign,
1924  context->scoringParams,
1925  q_start, s_start,
1926  query_data->length,
1927  subject_data->length,
1928  &fence_hit);
1929  if (status == 0) {
1930  return s_NewAlignmentFromGapAlign(gapAlign, &gapAlign->edit_script,
1931  query_range, subject_range,
1932  matrix_adjust_rule);
1933  } else {
1934  return NULL;
1935  }
1936 }
1937 
1938 
1939 /**
1940  * A BlastKappa_SavedParameters holds the value of certain search
1941  * parameters on entry to RedoAlignmentCore. These values are
1942  * restored on exit.
1943  */
1945  Int4 gap_open; /**< a penalty for the existence of a gap */
1946  Int4 gapExtend; /**< a penalty for each residue in the
1947  gap */
1948  double scale_factor; /**< the original scale factor */
1949  Int4 **origMatrix; /**< The original matrix values */
1950  double original_expect_value; /**< expect value on entry */
1951  /** copy of the original gapped Karlin-Altschul block
1952  * corresponding to the first context */
1954  Int4 num_queries; /**< Number of queries in this search */
1956 
1957 
1958 /**
1959  * Release the data associated with a BlastKappa_SavedParameters and
1960  * delete the object
1961  * @param searchParams the object to be deleted [in][out]
1962  */
1963 static void
1965 {
1966  /* for convenience, remove one level of indirection from searchParams */
1967  BlastKappa_SavedParameters *sp = *searchParams;
1968 
1969  if (sp != NULL) {
1970  if (sp->kbp_gap_orig != NULL) {
1971  int i;
1972  for (i = 0; i < sp->num_queries; i++) {
1973  if (sp->kbp_gap_orig[i] != NULL)
1975  }
1976  free(sp->kbp_gap_orig);
1977  }
1978  if (sp->origMatrix != NULL)
1980  }
1981  sfree(*searchParams);
1982  *searchParams = NULL;
1983 }
1984 
1985 
1986 /**
1987  * Create a new instance of BlastKappa_SavedParameters
1988  *
1989  * @param rows number of rows in the scoring matrix
1990  * @param numQueries number of queries in this search
1991  * @param compo_adjust_mode if >0, use composition-based statistics
1992  * @param positionBased if true, the search is position-based
1993  */
1996  Int4 numQueries,
1997  ECompoAdjustModes compo_adjust_mode,
1998  Boolean positionBased)
1999 {
2000  int i;
2001  BlastKappa_SavedParameters *sp; /* the new object */
2002  sp = malloc(sizeof(BlastKappa_SavedParameters));
2003 
2004  if (sp == NULL) {
2005  goto error_return;
2006  }
2007  sp->kbp_gap_orig = NULL;
2008  sp->origMatrix = NULL;
2009 
2010  sp->kbp_gap_orig = calloc(numQueries, sizeof(Blast_KarlinBlk*));
2011  if (sp->kbp_gap_orig == NULL) {
2012  goto error_return;
2013  }
2014  sp->num_queries = numQueries;
2015  for (i = 0; i < numQueries; i++) {
2016  sp->kbp_gap_orig[i] = NULL;
2017  }
2018  if (compo_adjust_mode != eNoCompositionBasedStats) {
2019  if (positionBased) {
2021  } else {
2023  }
2024  if (sp->origMatrix == NULL)
2025  goto error_return;
2026  }
2027  return sp;
2028 error_return:
2029  s_SavedParametersFree(&sp);
2030  return NULL;
2031 }
2032 
2033 
2034 /**
2035  * Record the initial value of the search parameters that are to be
2036  * adjusted.
2037  *
2038  * @param searchParams holds the recorded values [out]
2039  * @param sbp a score block [in]
2040  * @param scoring gapped alignment parameters [in]
2041  * @param query_length length of the concatenated query [in]
2042  * @param compo_adjust_mode composition adjustment mode [in]
2043  * @param positionBased is this search position-based [in]
2044  */
2045 static int
2047  BlastScoreBlk* sbp,
2048  const BlastScoringParameters* scoring,
2049  int query_length,
2050  ECompoAdjustModes compo_adjust_mode,
2051  Boolean positionBased)
2052 {
2053  int i;
2054 
2055  searchParams->gap_open = scoring->gap_open;
2056  searchParams->gapExtend = scoring->gap_extend;
2057  searchParams->scale_factor = scoring->scale_factor;
2058 
2059  for (i = 0; i < searchParams->num_queries; i++) {
2060  if (sbp->kbp_gap[i] != NULL) {
2061  /* There is a kbp_gap for query i and it must be copied */
2062  searchParams->kbp_gap_orig[i] = Blast_KarlinBlkNew();
2063  if (searchParams->kbp_gap_orig[i] == NULL) {
2064  return -1;
2065  }
2066  Blast_KarlinBlkCopy(searchParams->kbp_gap_orig[i],
2067  sbp->kbp_gap[i]);
2068  }
2069  }
2070 
2071  if (compo_adjust_mode != eNoCompositionBasedStats) {
2072  Int4 **matrix; /* scoring matrix */
2073  int j; /* iteration index */
2074  int rows; /* number of rows in matrix */
2075  if (positionBased) {
2076  matrix = sbp->psi_matrix->pssm->data;
2077  rows = query_length;
2078  } else {
2079  matrix = sbp->matrix->data;
2080  rows = BLASTAA_SIZE;
2081  }
2082 
2083  for (i = 0; i < rows; i++) {
2084  for (j = 0; j < BLASTAA_SIZE; j++) {
2085  searchParams->origMatrix[i][j] = matrix[i][j];
2086  }
2087  }
2088  }
2089  return 0;
2090 }
2091 
2092 
2093 /**
2094  * Rescale the search parameters in the search object and options
2095  * object to obtain more precision.
2096  *
2097  * @param sbp score block to be rescaled
2098  * @param sp scoring parameters to be rescaled
2099  * @param num_queries number of queries in this search
2100  * @param scale_factor amount by which to scale this search
2101  */
2102 static void
2105  int num_queries,
2106  double scale_factor)
2107 {
2108  int i;
2109  for (i = 0; i < num_queries; i++) {
2110  if (sbp->kbp_gap[i] != NULL) {
2111  Blast_KarlinBlk * kbp = sbp->kbp_gap[i];
2112  kbp->Lambda /= scale_factor;
2113  kbp->logK = log(kbp->K);
2114  }
2115  }
2116 
2117  sp->gap_open = BLAST_Nint(sp->gap_open * scale_factor);
2118  sp->gap_extend = BLAST_Nint(sp->gap_extend * scale_factor);
2119  sp->scale_factor = scale_factor;
2120 }
2121 
2122 
2123 /**
2124  * Restore the parameters that were adjusted to their original values.
2125  *
2126  * @param sbp the score block to be restored
2127  * @param scoring the scoring parameters to be restored
2128  * @param searchParams the initial recorded values of the parameters
2129  * @param query_length the concatenated query length
2130  * @param positionBased is this search position-based
2131  * @param compo_adjust_mode mode of composition adjustment
2132  */
2133 static void
2135  BlastScoringParameters* scoring,
2136  const BlastKappa_SavedParameters * searchParams,
2137  int query_length,
2138  Boolean positionBased,
2139  ECompoAdjustModes compo_adjust_mode)
2140 {
2141  int i;
2142 
2143  scoring->gap_open = searchParams->gap_open;
2144  scoring->gap_extend = searchParams->gapExtend;
2145  scoring->scale_factor = searchParams->scale_factor;
2146 
2147  for (i = 0; i < searchParams->num_queries; i++) {
2148  if (sbp->kbp_gap[i] != NULL) {
2149  Blast_KarlinBlkCopy(sbp->kbp_gap[i],
2150  searchParams->kbp_gap_orig[i]);
2151  }
2152  }
2153  if(compo_adjust_mode != eNoCompositionBasedStats) {
2154  int j; /* iteration index */
2155  Int4 ** matrix; /* matrix to be restored */
2156  int rows; /* number of rows in the matrix */
2157 
2158  if (positionBased) {
2159  matrix = sbp->psi_matrix->pssm->data;
2160  rows = query_length;
2161  } else {
2162  matrix = sbp->matrix->data;
2163  rows = BLASTAA_SIZE;
2164  }
2165  for (i = 0; i < rows; i++) {
2166  for (j = 0; j < BLASTAA_SIZE; j++) {
2167  matrix[i][j] = searchParams->origMatrix[i][j];
2168  }
2169  }
2170  }
2171 }
2172 
2173 
2174 /**
2175  * Initialize an object of type Blast_MatrixInfo.
2176  *
2177  * @param self object being initialized
2178  * @param queryBlk the query sequence data
2179  * @param sbp score block for this search
2180  * @param scale_factor amount by which ungapped parameters should be
2181  * scaled
2182  * @param matrixName name of the matrix
2183  */
2184 static int
2186  BLAST_SequenceBlk* queryBlk,
2187  BlastScoreBlk* sbp,
2188  double scale_factor,
2189  const char * matrixName)
2190 {
2191  int status = 0; /* return status */
2192  int lenName; /* length of matrixName as a string */
2193 
2194  /* copy the matrix name (strdup is not standard C) */
2195  lenName = strlen(matrixName);
2196  if (NULL == (self->matrixName = malloc(lenName + 1))) {
2197  return -1;
2198  }
2199  memcpy(self->matrixName, matrixName, lenName + 1);
2200 
2201  if (self->positionBased) {
2202  status = s_GetPosBasedStartFreqRatios(self->startFreqRatios,
2203  queryBlk->length,
2204  queryBlk->sequence,
2205  matrixName,
2206  sbp->psi_matrix->freq_ratios);
2207  if (status == 0) {
2208  status = s_ScalePosMatrix(self->startMatrix, matrixName,
2209  sbp->psi_matrix->freq_ratios,
2210  queryBlk->sequence,
2211  queryBlk->length, sbp, scale_factor);
2212  self->ungappedLambda = sbp->kbp_psi[0]->Lambda / scale_factor;
2213  }
2214  } else {
2215  self->ungappedLambda = sbp->kbp_ideal->Lambda / scale_factor;
2216  status = s_GetStartFreqRatios(self->startFreqRatios, matrixName);
2217  if (status == 0) {
2218  Blast_Int4MatrixFromFreq(self->startMatrix, self->cols,
2219  self->startFreqRatios,
2220  self->ungappedLambda);
2221  }
2222  }
2223  return status;
2224 }
2225 
2226 
2227 /* Create an array of 8-mers for a sequence, such that index of each 8-mer
2228  is the same as its position in the query */
2229 static int
2230 s_CreateWordArray(const Uint1* seq_data, Int4 seq_len, Uint8** words)
2231 {
2232  int word_size = 8; /* word size for k-mer matching */
2233  Uint8* query_hashes; /* list of hashes for query words */
2234  Uint8 mask = NCBI_CONST_UINT8(0xFFFFFFFFFF); /* mask for computing hash
2235  values */
2236  int i;
2237 
2238  /* if query or subject length is smaller than word size, exit */
2239  if (!seq_data || !words || seq_len < word_size) {
2240  return -1;
2241  }
2242 
2243  query_hashes = (Uint8*)calloc((seq_len - word_size + 1),
2244  sizeof(Uint8));
2245  *words = query_hashes;
2246 
2247  if (!query_hashes) {
2248  return -1;
2249  }
2250 
2251 
2252  /* find query word hashes */
2253  query_hashes[0] = s_GetHash(&seq_data[0], word_size);
2254  for (i = 1; i < seq_len - word_size; i++) {
2255  query_hashes[i] = query_hashes[i - 1];
2256  query_hashes[i] <<= 5;
2257  query_hashes[i] &= mask;
2258  query_hashes[i] += (Uint8)seq_data[i + word_size - 1];
2259  }
2260 
2261  return 0;
2262 }
2263 
2264 
2266  int num_queries)
2267 {
2268  int i;
2269 
2270  if (!query_info) {
2271  return;
2272  }
2273 
2274  for (i = 0;i < num_queries;i++) {
2275  if ((*query_info)[i].words) {
2276  free((*query_info)[i].words);
2277  }
2278  }
2279 
2280  free(*query_info);
2281  *query_info = NULL;
2282 }
2283 
2284 /**
2285  * Save information about all queries in an array of objects of type
2286  * BlastCompo_QueryInfo.
2287  *
2288  * @param query_data query sequence data
2289  * @param blast_query_info information about all queries, as an
2290  * internal blast data structure
2291  *
2292  * @return the new array on success, or NULL on error
2293  */
2294 static BlastCompo_QueryInfo *
2295 s_GetQueryInfo(Uint1 * query_data, const BlastQueryInfo * blast_query_info, Boolean skip)
2296 {
2297  int i; /* loop index */
2299  compo_query_info; /* the new array */
2300  int num_queries; /* the number of queries/elements in
2301  compo_query_info */
2302 
2303  num_queries = blast_query_info->last_context + 1;
2304  compo_query_info = calloc(num_queries, sizeof(BlastCompo_QueryInfo));
2305  if (compo_query_info != NULL) {
2306  for (i = 0; i < num_queries; i++) {
2307  BlastCompo_QueryInfo * query_info = &compo_query_info[i];
2308  const BlastContextInfo * query_context = &blast_query_info->contexts[i];
2309 
2310  query_info->eff_search_space =
2311  (double) query_context->eff_searchsp;
2312  query_info->origin = query_context->query_offset;
2313  query_info->seq.data = &query_data[query_info->origin];
2314  query_info->seq.length = query_context->query_length;
2315  query_info->words = NULL;
2316 
2317  s_CreateWordArray(query_info->seq.data, query_info->seq.length,
2318  &query_info->words);
2319  if (! skip) {
2321  query_info->seq.data,
2322  query_info->seq.length);
2323  }
2324  }
2325  }
2326  return compo_query_info;
2327 }
2328 
2329 
2330 /**
2331  * Create a new object of type BlastCompo_GappingParams. The new
2332  * object contains the parameters needed by the composition adjustment
2333  * library to compute a gapped alignment.
2334  *
2335  * @param context the data structures needed by callback functions
2336  * that perform the gapped alignments.
2337  * @param extendParams parameters used for a gapped extension
2338  * @param num_queries the number of queries in the concatenated query
2339  */
2340 static BlastCompo_GappingParams *
2342  const BlastExtensionParameters* extendParams,
2343  int num_queries)
2344 {
2345  int i;
2346  double min_lambda = DBL_MAX; /* smallest gapped Lambda */
2347  const BlastScoringParameters * scoring = context->scoringParams;
2348  const BlastExtensionOptions * options = extendParams->options;
2349  /* The new object */
2350  BlastCompo_GappingParams * gapping_params = NULL;
2351 
2352  gapping_params = malloc(sizeof(BlastCompo_GappingParams));
2353  if (gapping_params == NULL)
2354  return NULL;
2355 
2356  gapping_params->gap_open = scoring->gap_open;
2357  gapping_params->gap_extend = scoring->gap_extend;
2358  gapping_params->context = context;
2359 
2360  for (i = 0; i < num_queries; i++) {
2361  if (context->sbp->kbp_gap[i] != NULL &&
2362  context->sbp->kbp_gap[i]->Lambda < min_lambda) {
2363  min_lambda = context->sbp->kbp_gap[i]->Lambda;
2364  }
2365  }
2366  gapping_params->x_dropoff = (Int4)
2367  MAX(options->gap_x_dropoff_final*NCBIMATH_LN2 / min_lambda,
2368  extendParams->gap_x_dropoff_final);
2369  context->gap_align->gap_x_dropoff = gapping_params->x_dropoff;
2370 
2371  return gapping_params;
2372 }
2373 
2374 
2375 /** Callbacks used by the Blast_RedoOneMatch* routines */
2376 static const Blast_RedoAlignCallbacks
2380 };
2381 
2382 
2383 /* Bit score per alignment position threshold for preliminaru near identical
2384  test */
2385 #define NEAR_IDENTICAL_BITS_PER_POSITION (1.74)
2386 
2387 /**
2388  * Read the parameters required for the Blast_RedoOneMatch* functions from
2389  * the corresponding parameters in standard BLAST datatypes. Return a new
2390  * object representing these parameters.
2391  */
2392 static Blast_RedoAlignParams *
2394  BLAST_SequenceBlk * queryBlk,
2395  const BlastQueryInfo* queryInfo,
2396  const BlastHitSavingParameters* hitParams,
2397  const BlastExtensionParameters* extendParams)
2398 {
2399  int status = 0; /* status code */
2400  int rows; /* number of rows in the scoring matrix */
2401  int cutoff_s; /* cutoff score for saving an alignment */
2402  double cutoff_e; /* cutoff evalue for saving an alignment */
2404  gapping_params = NULL; /* parameters needed to compute a gapped
2405  alignment */
2407  scaledMatrixInfo; /* information about the scoring matrix */
2408  /* does this kind of search translate the database sequence */
2409  int subject_is_translated = (context->prog_number == eBlastTypeTblastn) || (context->prog_number == eBlastTypeRpsTblastn);
2410  int query_is_translated = context->prog_number == eBlastTypeBlastx;
2411  /* is this a positiion-based search */
2412  Boolean positionBased = (Boolean) (context->sbp->psi_matrix != NULL);
2413  /* will BLAST_LinkHsps be called to assign e-values */
2414  Boolean do_link_hsps = (hitParams->do_sum_stats);
2415  ECompoAdjustModes compo_adjust_mode =
2417 
2418  /* per position bit score cutoff for testing whether sequences are
2419  near identical */
2420  double near_identical_cutoff_bits = NEAR_IDENTICAL_BITS_PER_POSITION;
2421 
2422  /* score block is already scaled by context->localScalingFactor */
2423  double near_identical_cutoff=0;
2424  Int4 index;
2425  for (index = queryInfo->first_context;
2426  index <= queryInfo->last_context; ++index) {
2427 
2428  if ((queryInfo->contexts[index].is_valid)) {
2429  near_identical_cutoff =
2430  (near_identical_cutoff_bits * NCBIMATH_LN2)
2431  / context->sbp->kbp_gap[index]->Lambda;
2432  break;
2433  }
2434  }
2435 
2436  if (do_link_hsps) {
2437  ASSERT(hitParams->link_hsp_params != NULL);
2438  cutoff_s =
2439  (int) (hitParams->cutoff_score_min * context->localScalingFactor);
2440  } else {
2441  /* There is no cutoff score; we consider e-values instead */
2442  cutoff_s = 1;
2443  }
2444  cutoff_e = hitParams->options->expect_value;
2445  rows = positionBased ? queryInfo->max_length : BLASTAA_SIZE;
2446  scaledMatrixInfo = Blast_MatrixInfoNew(rows, BLASTAA_SIZE, positionBased);
2447  status = s_MatrixInfoInit(scaledMatrixInfo, queryBlk, context->sbp,
2448  context->localScalingFactor,
2449  context->scoringParams->options->matrix);
2450  if (status != 0) {
2451  return NULL;
2452  }
2453  gapping_params = s_GappingParamsNew(context, extendParams,
2454  queryInfo->last_context + 1);
2455  if (gapping_params == NULL) {
2456  return NULL;
2457  } else {
2458  return
2459  Blast_RedoAlignParamsNew(&scaledMatrixInfo, &gapping_params,
2460  compo_adjust_mode, positionBased,
2461  query_is_translated,
2462  subject_is_translated,
2463  queryInfo->max_length, cutoff_s, cutoff_e,
2464  do_link_hsps, &redo_align_callbacks,
2465  near_identical_cutoff);
2466  }
2467 }
2468 
2469 
2470 /**
2471  * Convert an array of BlastCompo_Heap objects to a BlastHSPResults structure.
2472  *
2473  * @param results BLAST core external results structure (pre-SeqAlign)
2474  * [out]
2475  * @param heaps an array of BlastCompo_Heap objects
2476  * @param hitlist_size size of each list in the results structure above [in]
2477  */
2478 static void
2480  BlastCompo_Heap heaps[],
2481  Int4 hitlist_size)
2482 {
2483  int query_index; /* loop index */
2484  int num_queries; /* Number of queries in this search */
2485 
2486  num_queries = results->num_queries;
2487  for (query_index = 0; query_index < num_queries; query_index++) {
2488  BlastHSPList* hsp_list;
2489  BlastHitList* hitlist;
2490  BlastCompo_Heap * heap = &heaps[query_index];
2491 
2492  results->hitlist_array[query_index] = Blast_HitListNew(hitlist_size);
2493  hitlist = results->hitlist_array[query_index];
2494 
2495  while (NULL != (hsp_list = BlastCompo_HeapPop(heap))) {
2496  Blast_HitListUpdate(hitlist, hsp_list);
2497  }
2498  }
2500 }
2501 
2502 
2503 /** Remove all matches from a BlastCompo_Heap. */
2504 static void s_ClearHeap(BlastCompo_Heap * self)
2505 {
2506  BlastHSPList* hsp_list = NULL; /* an element of the heap */
2507 
2508  while (NULL != (hsp_list = BlastCompo_HeapPop(self))) {
2509  hsp_list = Blast_HSPListFree(hsp_list);
2510  }
2511 }
2512 
2513 extern void
2515  BlastSeqSrc* seq_src,
2516  const BlastHSPList** hsp_list,
2517  Int4 num_hsplists);
2518 
2519 /**
2520  * Free a BlastGapAlignStruct copy created by s_BlastGapAlignStruct_Copy
2521  *
2522  * @param copy Pointer to BlastGapAlignStruct to be freed
2523  */
2525 {
2526  {
2527  while (copy->state_struct != NULL) {
2528  GapStateArrayStruct* cur = copy->state_struct;
2529  copy->state_struct = copy->state_struct->next;
2530  if (cur->state_array) {
2531  sfree(cur->state_array);
2532  }
2533  if (cur) {
2534  sfree(cur);
2535  }
2536  }
2537  }
2538  {
2539  if (copy->edit_script != NULL) {
2540  if (copy->edit_script->op_type) {
2541  sfree(copy->edit_script->op_type);
2542  }
2543  if (copy->edit_script->num) {
2544  sfree(copy->edit_script->num);
2545  }
2546  sfree(copy->edit_script);
2547  }
2548  }
2549  {
2550  if (copy->fwd_prelim_tback != NULL) {
2551  if (copy->fwd_prelim_tback->edit_ops) {
2553  }
2554  sfree(copy->fwd_prelim_tback);
2555  }
2556  }
2557  {
2558  if (copy->rev_prelim_tback != NULL) {
2559  if (copy->rev_prelim_tback->edit_ops) {
2561  }
2562  sfree(copy->rev_prelim_tback);
2563  }
2564  }
2565  {
2566  if (copy->greedy_align_mem != NULL) {
2567  sfree(copy->greedy_align_mem);
2568  }
2569  }
2570  {
2571  if (copy->dp_mem != NULL) {
2572  sfree(copy->dp_mem);
2573  }
2574  }
2575  {
2576  if (copy->sbp != NULL) {
2577  sfree(copy->sbp);
2578  }
2579  }
2580  sfree(copy);
2581 }
2582 
2583 /**
2584  * Create a "deep" copy of a BlastGapAlignStruct structure.
2585  *
2586  * Non-pointer structure members are copied. Pointers to data which will
2587  * only be read are copied. For data which will be changing, memory for copies
2588  * will be allocated and new pointers will be assigned to them. The process
2589  * repeats down the structure hierarchy until all pointers are dealt with.
2590  *
2591  * @param orig Pointer to BlastGapAlignStruct structure to be copied
2592  * @param sbp Pointer to BlastScoreBlk structure, required to set copy->sbp
2593  *
2594  * @return Pointer to copy of original BlastGapAlignStruct structure
2595  */
2597  BlastGapAlignStruct* orig,
2598  BlastScoreBlk* sbp
2599 )
2600 {
2602  (BlastGapAlignStruct*) calloc(1, sizeof(BlastGapAlignStruct));
2603 
2604  // Copy plain old data (ints, doubles, booleans, ...).
2605  // Any pointer members will be processed separately.
2606  memcpy(copy, orig, sizeof(BlastGapAlignStruct));
2607 
2608  {
2609  GapStateArrayStruct* o = orig->state_struct;
2610  if (o != NULL) {
2612  1,
2613  sizeof(GapStateArrayStruct)
2614  );
2615  copy->state_struct = c;
2616  memcpy(c, o, sizeof(GapStateArrayStruct));
2617  c->state_array = (Uint1*) calloc(c->length, sizeof(Uint1));
2618  int i;
2619  for (i = 0; i < c->length; ++i) {
2620  c->state_array[i] = o->state_array[i];
2621  }
2622  while (o->next != NULL) {
2623  c->next = (GapStateArrayStruct*)
2624  calloc(1, sizeof(GapStateArrayStruct));
2625  c = c->next;
2626  o = o->next;
2627  memcpy(c, o, sizeof(GapStateArrayStruct));
2628  c->state_array = (Uint1*) calloc(c->length, sizeof(Uint1));
2629  int i;
2630  for (i = 0; i < c->length; ++i) {
2631  c->state_array[i] = o->state_array[i];
2632  }
2633  }
2634  }
2635  }
2636  {
2637  GapEditScript* o = orig->edit_script;
2638  if (o != NULL) {
2639  GapEditScript* c = (GapEditScript*) calloc(
2640  1,
2641  sizeof(GapEditScript)
2642  );
2643  copy->edit_script = c;
2644  memcpy(c, o, sizeof(GapEditScript));
2645  c->op_type = (EGapAlignOpType*) calloc(
2646  o->size,
2647  sizeof(EGapAlignOpType)
2648  );
2649  c->num = (Int4*) calloc(o->size, sizeof(Int4));
2650  int i;
2651  for (i = 0; i < o->size; ++i) {
2652  c->op_type[i] = o->op_type[i];
2653  c->num[i] = o->num[i];
2654  }
2655  }
2656  }
2657  {
2659  if (o != NULL) {
2660  GapPrelimEditBlock* c = (GapPrelimEditBlock*) calloc(
2661  1,
2662  sizeof(GapPrelimEditBlock)
2663  );
2664  copy->fwd_prelim_tback = c;
2665  memcpy(c, o, sizeof(GapPrelimEditBlock));
2666  c->edit_ops = calloc(
2667  o->num_ops_allocated,
2668  sizeof(GapPrelimEditScript)
2669  );
2670  int i;
2671  for (i = 0; i < o->num_ops_allocated; ++i) {
2672  c->edit_ops[i].op_type = o->edit_ops[i].op_type;
2673  c->edit_ops[i].num = o->edit_ops[i].num;
2674  }
2675  }
2676  }
2677  {
2679  if (o != NULL) {
2680  GapPrelimEditBlock* c = (GapPrelimEditBlock*) calloc(
2681  1,
2682  sizeof(GapPrelimEditBlock)
2683  );
2684  copy->rev_prelim_tback = c;
2685  memcpy(c, o, sizeof(GapPrelimEditBlock));
2686  c->edit_ops = calloc(
2687  o->num_ops_allocated,
2688  sizeof(GapPrelimEditScript)
2689  );
2690  int i;
2691  for (i = 0; i < o->num_ops_allocated; ++i) {
2692  c->edit_ops[i].op_type = o->edit_ops[i].op_type;
2693  c->edit_ops[i].num = o->edit_ops[i].num;
2694  }
2695  }
2696  }
2697  {
2698  SGreedyAlignMem* o = orig->greedy_align_mem;
2699  if (o != NULL) {
2700  SGreedyAlignMem* c = (SGreedyAlignMem*) calloc(
2701  1,
2702  sizeof(SGreedyAlignMem)
2703  );
2704  copy->greedy_align_mem = c;
2705  memcpy(c, o, sizeof(SGreedyAlignMem));
2706  }
2707  }
2708  {
2709  BlastGapDP* o = orig->dp_mem;
2710  if (o != NULL) {
2711  BlastGapDP* c = (BlastGapDP*) calloc(
2712  orig->dp_mem_alloc,
2713  sizeof(BlastGapDP)
2714  );
2715  copy->dp_mem = c;
2716  memcpy(c, o, orig->dp_mem_alloc * sizeof(BlastGapDP));
2717  }
2718  }
2719  {
2720  copy->sbp = sbp;
2721  }
2722 
2723  return copy;
2724 }
2725 
2726 /**
2727  * Free a BlastScoreBlk copy created by s_BlastScoreBlk_Copy
2728  *
2729  * BlastScoreBlk* pointer "bsb_ptr" should be passed as (&bsb_ptr);
2730  * this function will set bsb_ptr to NULL before returning.
2731  *
2732  * @param copy Pointer to (pointer to BlastScoreBlk to be freed)
2733  */
2734 static
2736 {
2737  BlastScoreBlkFree(*copy);
2738  *copy = NULL;
2739 }
2740 
2741 /**
2742  * Create a "deep" copy of a BlastScoreBlk structure.
2743  *
2744  * Non-pointer structure members are copied. Pointers to data which will
2745  * only be read are copied. For data which will be changing, memory for copies
2746  * will be allocated and new pointers will be assigned to them. The process
2747  * repeats down the structure hierarchy until all pointers are dealt with.
2748  *
2749  * @param program The program type
2750  * @param orig Pointer to BlastScoreBlk structure to be copied
2751  * @param alphabet_code Alphabet code
2752  * @param number_of_contexts Number of contexts
2753  *
2754  * @return Pointer to copy of original BlastScoreBlk structure
2755  */
2756 static
2758  EBlastProgramType program,
2759  BlastScoreBlk* orig,
2760  Uint1 alphabet_code,
2761  Int4 number_of_contexts
2762 )
2763 {
2765  orig->alphabet_code,
2766  orig->number_of_contexts
2767  );
2768  if (copy == NULL) {
2769  return NULL;
2770  }
2771 
2772  copy->alphabet_start = orig->alphabet_start;
2773  copy->name = strdup(orig->name);
2774  copy->comments = orig->comments;
2775  /* Deep-copy orig->matrix */
2776  if (orig->matrix != NULL) {
2777  if (copy->matrix == NULL) {
2778  return BlastScoreBlkFree(copy);
2779  }
2780  SBlastScoreMatrix* m = copy->matrix;
2781  if (m->data != NULL && orig->matrix->data != NULL) {
2782  int i;
2783  for (i = 0; i < orig->matrix->ncols; ++i) {
2784  memcpy(
2785  m->data[i],
2786  orig->matrix->data[i],
2787  m->nrows * sizeof(int)
2788  );
2789  }
2790  }
2791  if (m->freqs != NULL && orig->matrix->freqs != NULL) {
2792  memcpy(
2793  m->freqs,
2794  orig->matrix->freqs,
2795  m->ncols * sizeof(double)
2796  );
2797  }
2798  m->lambda = orig->matrix->lambda;
2799  }
2800  /* Deep-copy orig->psi_matrix */
2801  if (orig->psi_matrix != NULL
2802  && orig->psi_matrix->pssm != NULL) {
2804  if (copy->psi_matrix == NULL) {
2805  return BlastScoreBlkFree(copy);
2806  }
2807  SPsiBlastScoreMatrix* pm = copy->psi_matrix;
2808  SBlastScoreMatrix* m = pm->pssm;
2809  if (m->data != NULL && orig->psi_matrix->pssm->data != NULL) {
2810  int i;
2811  for (i = 0; i < orig->psi_matrix->pssm->ncols; ++i) {
2812  memcpy(
2813  m->data[i],
2814  orig->psi_matrix->pssm->data[i],
2815  m->nrows * sizeof(int)
2816  );
2817  }
2818  }
2819  if (m->freqs != NULL
2820  && orig->psi_matrix->pssm->freqs != NULL) {
2821  memcpy(
2822  m->freqs,
2823  orig->psi_matrix->pssm->freqs,
2824  m->ncols * sizeof(double)
2825  );
2826  }
2827  m->lambda = orig->psi_matrix->pssm->lambda;
2828  if (pm->freq_ratios != NULL
2829  && orig->psi_matrix->freq_ratios != NULL) {
2830  int i;
2831  for (i = 0; i < orig->psi_matrix->pssm->ncols; ++i) {
2832  memcpy(
2833  pm->freq_ratios[i],
2834  orig->psi_matrix->freq_ratios[i],
2835  orig->psi_matrix->pssm->nrows * sizeof(double)
2836  );
2837  }
2838  }
2839  if (orig->psi_matrix->kbp != NULL) {
2840  memcpy(pm->kbp, orig->psi_matrix->kbp, sizeof(Blast_KarlinBlk));
2841  }
2842  }
2845  copy->loscore = orig->loscore;
2846  copy->hiscore = orig->hiscore;
2847  copy->penalty = orig->penalty;
2848  copy->reward = orig->reward;
2849  copy->read_in_matrix = orig->read_in_matrix;
2850  if (Blast_QueryIsPssm(program)) {
2851  copy->kbp = copy->kbp_psi;
2852  copy->kbp_gap = copy->kbp_gap_psi;
2853  } else {
2854  copy->kbp = copy->kbp_std;
2855  copy->kbp_gap = copy->kbp_gap_std;
2856  }
2857  if (orig->gbp != NULL) {
2858  memcpy(copy->gbp, orig->gbp, sizeof(Blast_GumbelBlk));
2859  }
2860  int ctx;
2861  for (ctx = 0; ctx < orig->number_of_contexts; ++ctx) {
2862  if (orig->sfp != NULL && orig->sfp[ctx] != NULL) {
2863  copy->sfp[ctx] = Blast_ScoreFreqNew(
2864  orig->sfp[ctx]->score_min,
2865  orig->sfp[ctx]->score_max
2866  );
2867  if (copy->sfp[ctx] == NULL) {
2868  return BlastScoreBlkFree(copy);
2869  }
2870  copy->sfp[ctx]->obs_min = orig->sfp[ctx]->obs_min;
2871  copy->sfp[ctx]->obs_max = orig->sfp[ctx]->obs_max;
2872  copy->sfp[ctx]->score_avg = orig->sfp[ctx]->score_avg;
2873  int r = orig->sfp[ctx]->score_max - orig->sfp[ctx]->score_min + 1;
2874  memcpy(
2875  copy->sfp[ctx]->sprob0,
2876  orig->sfp[ctx]->sprob0,
2877  r * sizeof(double)
2878  );
2879  }
2880  if (orig->kbp_std != NULL && orig->kbp_std[ctx] != NULL) {
2881  copy->kbp_std[ctx] = Blast_KarlinBlkNew();
2882  if (Blast_KarlinBlkCopy(copy->kbp_std[ctx], orig->kbp_std[ctx]) != 0) {
2883  return BlastScoreBlkFree(copy);
2884  }
2885  }
2886  if (orig->kbp_gap_std != NULL && orig->kbp_gap_std[ctx] != NULL) {
2887  copy->kbp_gap_std[ctx] = Blast_KarlinBlkNew();
2888  if (Blast_KarlinBlkCopy(copy->kbp_gap_std[ctx], orig->kbp_gap_std[ctx]) != 0) {
2889  return BlastScoreBlkFree(copy);
2890  }
2891  }
2892  if (orig->kbp_psi != NULL && orig->kbp_psi[ctx] != NULL) {
2893  copy->kbp_psi[ctx] = Blast_KarlinBlkNew();
2894  if (Blast_KarlinBlkCopy(copy->kbp_psi[ctx], orig->kbp_psi[ctx]) != 0) {
2895  return BlastScoreBlkFree(copy);
2896  }
2897  }
2898  if (orig->kbp_gap_psi != NULL && orig->kbp_gap_psi[ctx] != NULL) {
2899  copy->kbp_gap_psi[ctx] = Blast_KarlinBlkNew();
2900  if (Blast_KarlinBlkCopy(copy->kbp_gap_psi[ctx], orig->kbp_gap_psi[ctx]) != 0) {
2901  return BlastScoreBlkFree(copy);
2902  }
2903  }
2904  if (Blast_QueryIsPssm(program)) {
2905  copy->kbp[ctx] = copy->kbp_psi[ctx];
2906  copy->kbp_gap[ctx] = copy->kbp_gap_psi[ctx];
2907  } else {
2908  copy->kbp[ctx] = copy->kbp_std[ctx];
2909  copy->kbp_gap[ctx] = copy->kbp_gap_std[ctx];
2910  }
2911  }
2912  if (orig->kbp_ideal != NULL) {
2913  copy->kbp_ideal = Blast_KarlinBlkNew();
2914  if (Blast_KarlinBlkCopy(copy->kbp_ideal, orig->kbp_ideal) != 0) {
2915  return BlastScoreBlkFree(copy);
2916  }
2917  }
2918  copy->ambiguous_res = (Uint1*) calloc(orig->ambig_size, sizeof(Uint1));
2919  if (orig->ambiguous_res != NULL) {
2920  memcpy(copy->ambiguous_res, orig->ambiguous_res, orig->ambig_size);
2921  }
2922  copy->ambig_size = orig->ambig_size;
2923  copy->ambig_occupy = orig->ambig_occupy;
2924  copy->round_down = orig->round_down;
2925 
2926  return copy;
2927 }
2928 
2929 /**
2930  * Recompute alignments for each match found by the gapped BLAST
2931  * algorithm. Single-thread adapter to Blast_RedoAlignmentCore_MT.
2932  */
2933 Int2
2935  BLAST_SequenceBlk * queryBlk,
2936  const BlastQueryInfo* queryInfo,
2937  BlastScoreBlk* sbp,
2938  BLAST_SequenceBlk * subjectBlk,
2939  const BlastSeqSrc* seqSrc,
2940  Int4 default_db_genetic_code,
2941  BlastHSPList * thisMatch,
2942  BlastHSPStream* hsp_stream,
2943  BlastScoringParameters* scoringParams,
2944  const BlastExtensionParameters* extendParams,
2945  const BlastHitSavingParameters* hitParams,
2946  const PSIBlastOptions* psiOptions,
2947  BlastHSPResults* results)
2948 {
2950  program_number,
2951  1, /* number of threads */
2952  queryBlk,
2953  queryInfo,
2954  sbp,
2955  subjectBlk,
2956  seqSrc,
2957  default_db_genetic_code,
2958  thisMatch,
2959  hsp_stream,
2960  scoringParams,
2961  extendParams,
2962  hitParams,
2963  psiOptions,
2964  results
2965  );
2966 }
2967 
2968 /**
2969  * Recompute alignments for each match found by the gapped BLAST
2970  * algorithm.
2971  */
2972 Int2
2974  Uint4 num_threads,
2975  BLAST_SequenceBlk * queryBlk,
2976  const BlastQueryInfo* queryInfo,
2977  BlastScoreBlk* sbp,
2978  BLAST_SequenceBlk * subjectBlk,
2979  const BlastSeqSrc* seqSrc,
2980  Int4 default_db_genetic_code,
2981  BlastHSPList * thisMatch,
2982  BlastHSPStream* hsp_stream,
2983  BlastScoringParameters* scoringParams,
2984  const BlastExtensionParameters* extendParams,
2985  const BlastHitSavingParameters* hitParams,
2986  const PSIBlastOptions* psiOptions,
2987  BlastHSPResults* results)
2988 {
2989  int status_code = 0; /* return value code */
2990  /* the factor by which to scale the scoring system in order to
2991  * obtain greater precision */
2992  double localScalingFactor;
2993  /* forbidden ranges for each database position (used in
2994  * Smith-Waterman alignments) */
2995  Blast_ForbiddenRanges forbidden = {0,};
2996  /* a collection of alignments for each query sequence with
2997  * sequences from the database */
2998  BlastCompo_Heap* redoneMatches = NULL;
2999  /* stores all fields needed for computing a compositionally
3000  * adjusted score matrix using Newton's method */
3001  Blast_CompositionWorkspace** NRrecord_tld = NULL;
3002  /* loop index */
3003  int query_index;
3004  /* number of queries in the concatenated query */
3005  int numQueries = queryInfo->num_queries;
3006  /* number of contexts in the concatenated query */
3007  int numContexts = queryInfo->last_context + 1;
3008  /* number of contexts within a query */
3009  int numFrames = (program_number == eBlastTypeBlastx) ? 6:1;
3010  /* keeps track of gapped alignment params */
3011  BlastGapAlignStruct* gapAlign = NULL;
3012  /* the values of the search parameters that will be recorded, altered
3013  * in the search structure in this routine, and then restored before
3014  * the routine exits. */
3015  BlastKappa_SavedParameters *savedParams = NULL;
3016  /* All alignments above this value will be reported, no matter how many. */
3017  double inclusion_ethresh;
3018 
3019  BlastHSPResults* local_results = NULL;
3020 
3021  BlastCompo_QueryInfo** query_info_tld = NULL;
3022  int* numContexts_tld = NULL;
3023  int* compositionTestIndex_tld = NULL;
3024  Blast_RedoAlignParams** redo_align_params_tld = NULL;
3025  BLAST_SequenceBlk** subjectBlk_tld = NULL;
3026  Boolean positionBased = (Boolean) (sbp->psi_matrix != NULL);
3027  ECompoAdjustModes compo_adjust_mode =
3029  Boolean smithWaterman =
3030  (Boolean) (extendParams->options->eTbackExt == eSmithWatermanTbck);
3031 
3032  /* which test function do we use to see if a composition-adjusted
3033  p-value is desired; value needs to be passed in eventually*/
3034  int compositionTestIndex = extendParams->options->unifiedP;
3035  Uint1* genetic_code_string = GenCodeSingletonFind(default_db_genetic_code);
3036 
3037  ASSERT(program_number == eBlastTypeBlastp ||
3038  program_number == eBlastTypeTblastn ||
3039  program_number == eBlastTypeBlastx ||
3040  program_number == eBlastTypePsiBlast ||
3041  program_number == eBlastTypeRpsBlast ||
3042  program_number == eBlastTypeRpsTblastn);
3043 
3044  if (0 == strcmp(scoringParams->options->matrix, "BLOSUM62_20") &&
3045  compo_adjust_mode == eNoCompositionBasedStats) {
3046  return -1; /* BLOSUM62_20 only makes sense if
3047  * compo_adjust_mode is on */
3048  }
3049  if (positionBased) {
3050  /* Position based searches can only use traditional
3051  * composition based stats */
3052  if ((int) compo_adjust_mode > 1) {
3053  compo_adjust_mode = eCompositionBasedStats;
3054  }
3055  /* A position-based search can only have one query */
3056  ASSERT(queryInfo->num_queries == 1);
3057  ASSERT(queryBlk->length == (Int4)sbp->psi_matrix->pssm->ncols);
3058  }
3059 
3060  if ((int) compo_adjust_mode > 1 &&
3061  !Blast_FrequencyDataIsAvailable(scoringParams->options->matrix)) {
3062  return -1; /* Unsupported matrix */
3063  }
3064  /*****************/
3065  inclusion_ethresh = (psiOptions /* this can be NULL for CBl2Seq */
3066  ? psiOptions->inclusion_ethresh
3068  ASSERT(inclusion_ethresh != 0.0);
3069 
3070  int actual_num_threads = 1;
3071 #ifdef _OPENMP
3072  actual_num_threads = num_threads;
3073 #endif
3074 
3075  /* Initialize savedParams */
3076  savedParams =
3077  s_SavedParametersNew(queryInfo->max_length, numContexts,
3078  compo_adjust_mode, positionBased);
3079  if (savedParams == NULL) {
3080  status_code = -1;
3081  goto function_cleanup;
3082  }
3083  status_code =
3084  s_RecordInitialSearch(savedParams, sbp, scoringParams,
3085  queryInfo->max_length, compo_adjust_mode,
3086  positionBased);
3087  if (status_code != 0) {
3088  goto function_cleanup;
3089  }
3090 
3091  if (compo_adjust_mode != eNoCompositionBasedStats) {
3092  if((0 == strcmp(scoringParams->options->matrix, "BLOSUM62_20"))) {
3093  localScalingFactor = SCALING_FACTOR / 10;
3094  } else {
3095  localScalingFactor = SCALING_FACTOR;
3096  }
3097  } else {
3098  localScalingFactor = 1.0;
3099  }
3100  s_RescaleSearch(sbp, scoringParams, numContexts, localScalingFactor);
3101  status_code =
3102  BLAST_GapAlignStructNew(scoringParams, extendParams,
3103  (seqSrc) ? BlastSeqSrcGetMaxSeqLen(seqSrc)
3104  : subjectBlk->length,
3105  sbp, &gapAlign);
3106  if (status_code != 0) {
3107  return (Int2) status_code;
3108  }
3109 
3110  if(smithWaterman) {
3111  status_code =
3112  Blast_ForbiddenRangesInitialize(&forbidden, queryInfo->max_length);
3113  if (status_code != 0) {
3114  goto function_cleanup;
3115  }
3116  }
3117  redoneMatches = calloc(numQueries, sizeof(BlastCompo_Heap));
3118  if (redoneMatches == NULL) {
3119  status_code = -1;
3120  goto function_cleanup;
3121  }
3122  for (query_index = 0; query_index < numQueries; query_index++) {
3123  status_code =
3124  BlastCompo_HeapInitialize(&redoneMatches[query_index],
3125  hitParams->options->hitlist_size,
3126  inclusion_ethresh);
3127  if (status_code != 0) {
3128  goto function_cleanup;
3129  }
3130  }
3131 
3132  BlastCompo_Heap** redoneMatches_tld =
3133  (BlastCompo_Heap**) calloc(
3134  actual_num_threads,
3135  sizeof(BlastCompo_Heap*)
3136  );
3137  BlastCompo_Alignment*** alignments_tld =
3138  (BlastCompo_Alignment***) calloc(
3139  actual_num_threads,
3140  sizeof(BlastCompo_Alignment**)
3141  );
3142  BlastCompo_Alignment*** incoming_align_set_tld =
3143  (BlastCompo_Alignment***) calloc(
3144  actual_num_threads,
3145  sizeof(BlastCompo_Alignment**)
3146  );
3147  BlastKappa_SavedParameters** savedParams_tld =
3148  (BlastKappa_SavedParameters**) calloc(
3149  actual_num_threads,
3151  );
3152  BlastScoreBlk** sbp_tld =
3153  (BlastScoreBlk**) calloc(
3154  actual_num_threads,
3155  sizeof(BlastScoreBlk*)
3156  );
3157  BlastKappa_GappingParamsContext* gapping_params_context_tld =
3159  actual_num_threads,
3161  );
3162  Int4*** matrix_tld =
3163  (Int4***) calloc(
3164  actual_num_threads,
3165  sizeof(Int4**)
3166  );
3167 
3168  NRrecord_tld =
3169  (Blast_CompositionWorkspace**) calloc(
3170  actual_num_threads,
3172  );
3173 
3174  subjectBlk_tld =
3175  (BLAST_SequenceBlk**) calloc(
3176  actual_num_threads,
3177  sizeof(BLAST_SequenceBlk*)
3178  );
3179  redo_align_params_tld =
3180  (Blast_RedoAlignParams**) calloc(
3181  actual_num_threads,
3182  sizeof(Blast_RedoAlignParams*)
3183  );
3184  int* status_code_tld =
3185  (int*) calloc(
3186  actual_num_threads,
3187  sizeof(int)
3188  );
3189  BlastSeqSrc** seqsrc_tld =
3190  (BlastSeqSrc**) calloc(
3191  actual_num_threads,
3192  sizeof(BlastSeqSrc*)
3193  );
3194  BlastGapAlignStruct** gap_align_tld =
3195  (BlastGapAlignStruct**) calloc(
3196  actual_num_threads,
3197  sizeof(BlastGapAlignStruct*)
3198  );
3199  BlastScoringParameters** score_params_tld =
3200  (BlastScoringParameters**) calloc(
3201  actual_num_threads,
3202  sizeof(BlastScoringParameters*)
3203  );
3204  BlastHitSavingParameters** hit_params_tld =
3205  (BlastHitSavingParameters**) calloc(
3206  actual_num_threads,
3207  sizeof(BlastHitSavingParameters*)
3208  );
3209  BlastHSPResults** results_tld =
3210  (BlastHSPResults**) calloc(
3211  actual_num_threads,
3212  sizeof(BlastHSPResults*)
3213  );
3214  query_info_tld =
3215  (BlastCompo_QueryInfo**) calloc(
3216  actual_num_threads,
3217  sizeof(BlastCompo_QueryInfo*)
3218  );
3219  numContexts_tld =
3220  (int*) calloc(
3221  actual_num_threads,
3222  sizeof(int)
3223  );
3224  compositionTestIndex_tld =
3225  (int*) calloc(
3226  actual_num_threads,
3227  sizeof(int)
3228  );
3229 
3230  int i;
3231  for (i = 0; i < actual_num_threads; ++i) {
3232  query_info_tld[i] = s_GetQueryInfo(
3233  queryBlk->sequence,
3234  queryInfo,
3235  (program_number == eBlastTypeBlastx)
3236  );
3237  if (query_info_tld[i] == NULL) {
3238  status_code = -1;
3239  goto function_cleanup;
3240  }
3241 
3242  sbp_tld[i] = s_BlastScoreBlk_Copy(
3243  program_number,
3244  sbp,
3245  sbp->alphabet_code,
3246  sbp->number_of_contexts
3247  );
3248 
3249  numContexts_tld[i] = numContexts;
3250  compositionTestIndex_tld[i] = compositionTestIndex;
3251  seqsrc_tld[i] = BlastSeqSrcCopy(seqSrc);
3252  gap_align_tld[i] =
3253  s_BlastGapAlignStruct_Copy(gapAlign, sbp_tld[i]);
3254  score_params_tld[i] = scoringParams;
3255  hit_params_tld[i] = (BlastHitSavingParameters*) hitParams;
3256  results_tld[i] =
3257  Blast_HSPResultsNew(queryInfo->num_queries);
3258  subjectBlk_tld[i] = subjectBlk;
3259 
3260  redoneMatches_tld[i] =
3261  (BlastCompo_Heap*) calloc(numQueries, sizeof(BlastCompo_Heap));
3262  if (redoneMatches_tld[i] == NULL) {
3263  status_code = -1;
3264  goto function_cleanup;
3265  }
3266  for (query_index = 0; query_index < numQueries; query_index++) {
3267  status_code =
3268  BlastCompo_HeapInitialize(&redoneMatches_tld[i][query_index],
3269  hitParams->options->hitlist_size,
3270  inclusion_ethresh);
3271  if (status_code != 0) {
3272  goto function_cleanup;
3273  }
3274  }
3275 
3276  alignments_tld[i] = (BlastCompo_Alignment**) calloc(
3277  numContexts,
3278  sizeof(BlastCompo_Alignment*)
3279  );
3280  incoming_align_set_tld[i] = (BlastCompo_Alignment**) calloc(
3281  numFrames,
3282  sizeof(BlastCompo_Alignment*)
3283  );
3284 
3285  savedParams_tld[i] = s_SavedParametersNew(
3286  queryInfo->max_length,
3287  numContexts,
3288  compo_adjust_mode,
3289  positionBased
3290  );
3291  if (savedParams_tld[i] == NULL) {
3292  status_code = -1;
3293  goto function_cleanup;
3294  }
3295  status_code = s_RecordInitialSearch(
3296  savedParams_tld[i],
3297  sbp,
3298  scoringParams,
3299  queryInfo->max_length,
3300  compo_adjust_mode,
3301  positionBased
3302  );
3303  if (status_code != 0) {
3304  goto function_cleanup;
3305  }
3306 
3307  if ((int) compo_adjust_mode > 1 && !positionBased) {
3308  NRrecord_tld[i] = Blast_CompositionWorkspaceNew();
3309  status_code = Blast_CompositionWorkspaceInit(
3310  NRrecord_tld[i],
3311  scoringParams->options->matrix
3312  );
3313  if (status_code != 0) {
3314  goto function_cleanup;
3315  }
3316  }
3317 
3318  gapping_params_context_tld[i].gap_align = gap_align_tld[i];
3319  gapping_params_context_tld[i].scoringParams = score_params_tld[i];
3320  gapping_params_context_tld[i].sbp = sbp_tld[i];
3321  gapping_params_context_tld[i].localScalingFactor = localScalingFactor;
3322  gapping_params_context_tld[i].prog_number = program_number;
3323 
3324  redo_align_params_tld[i] =
3326  &gapping_params_context_tld[i],
3327  queryBlk,
3328  queryInfo,
3329  hitParams,
3330  extendParams
3331  );
3332  if (redo_align_params_tld[i] == NULL) {
3333  status_code = -1;
3334  goto function_cleanup;
3335  }
3336 
3337  if (positionBased) {
3338  matrix_tld[i] = sbp_tld[i]->psi_matrix->pssm->data;
3339  } else {
3340  matrix_tld[i] = sbp_tld[i]->matrix->data;
3341  }
3342  /**** Validate parameters *************/
3343  if (matrix_tld[i] == NULL) {
3344  goto function_cleanup;
3345  }
3346  }
3347 
3348  /*
3349  * There are two use cases here.
3350  * (1) hsp_stream == NULL, so single match is passed in thisMatch.
3351  * Also, seqSrc == NULL and subjectBlk are != NULL.
3352  * (2) hsp_stream != NULL, so one or more matches are taken from
3353  * hsp_stream, and thisMatch is (probably) NULL.
3354  * Also, seqSrc != NULL, subjectBlk and thisMatch are == NULL.
3355  */
3356  struct BlastHSPListLinkedList {
3358  struct BlastHSPListLinkedList* next;
3359  };
3360  typedef struct BlastHSPListLinkedList BlastHSPListLinkedList;
3361 
3362  BlastHSPList** theseMatches = NULL;
3363  int numMatches = 0;
3364  if (hsp_stream == NULL) {
3365  theseMatches = (BlastHSPList**) calloc(1, sizeof(BlastHSPList*));
3366  *theseMatches = thisMatch;
3367  numMatches = 1;
3368  } else {
3369  BlastHSPList* localMatch = NULL;
3370  BlastHSPListLinkedList* head = NULL;
3371  BlastHSPListLinkedList* tail = NULL;
3372  /*
3373  * Collect matches from stream into linked list, counting them
3374  * along the way.
3375  */
3376  while (BlastHSPStreamRead(hsp_stream, &localMatch)
3377  != kBlastHSPStream_Eof) {
3378  BlastHSPListLinkedList* entry =
3379  (BlastHSPListLinkedList*) calloc(
3380  1,
3381  sizeof(BlastHSPListLinkedList)
3382  );
3383  entry->match = localMatch;
3384  if (head == NULL) {
3385  head = entry;
3386  } else {
3387  tail->next = entry;
3388  }
3389  tail = entry;
3390  ++numMatches;
3391  }
3392  /*
3393  * Convert linked list of matches into array.
3394  */
3395  theseMatches =
3396  (BlastHSPList**) calloc(numMatches, sizeof(BlastHSPList*));
3397  int i;
3398  for (i = 0; i < numMatches; ++i) {
3399  theseMatches[i] = head->match;
3400  BlastHSPListLinkedList* here = head;
3401  head = head->next;
3402  sfree(here);
3403  }
3404  }
3405 
3406  Boolean interrupt = FALSE;
3407 #pragma omp parallel \
3408  default(none) num_threads(actual_num_threads) \
3409  if(actual_num_threads>1) \
3410  shared(interrupt, seqsrc_tld, score_params_tld, hit_params_tld, \
3411  gap_align_tld, results_tld, \
3412  redoneMatches_tld, \
3413  stderr, \
3414  numQueries, numMatches, theseMatches, \
3415  numFrames, program_number, subjectBlk_tld, positionBased, \
3416  default_db_genetic_code, localScalingFactor, queryInfo, \
3417  sbp, smithWaterman, compositionTestIndex_tld, forbidden, \
3418  NRrecord_tld, actual_num_threads, sbp_tld, \
3419  matrix_tld, query_info_tld, numContexts_tld, \
3420  genetic_code_string, queryBlk, compo_adjust_mode, \
3421  alignments_tld, incoming_align_set_tld, savedParams_tld, \
3422  scoringParams, redo_align_params_tld, \
3423  status_code_tld)
3424  {
3425  int b;
3426 #pragma omp for schedule(static)
3427  for (b = 0; b < numMatches; ++b) {
3428 #pragma omp flush(interrupt)
3429  if (!interrupt) {
3430  BlastCompo_Alignment** alignments = NULL;
3431  BlastCompo_Alignment** incoming_align_set = NULL;
3432  Blast_CompositionWorkspace* NRrecord = NULL;
3433  BlastCompo_QueryInfo* query_info = NULL;
3434 
3435  int numAligns[6];
3436  Blast_KarlinBlk* kbp = NULL;
3437  BlastCompo_MatchingSequence matchingSeq = {0,};
3438  BlastHSPList* hsp_list = NULL;
3439  BlastCompo_Alignment* incoming_aligns = NULL;
3440  Blast_RedoAlignParams* redo_align_params;
3441  double best_evalue;
3442  Int4 best_score;
3443  int query_index;
3444  int context_index;
3445  int frame_index;
3446  void* discarded_aligns = NULL;
3447  BlastSeqSrc* seqSrc;
3448  BlastScoringParameters* scoringParams;
3449  BlastHitSavingParameters* hitParams;
3450  BlastCompo_Heap* redoneMatches;
3451  BlastScoreBlk* sbp;
3452  BLAST_SequenceBlk* subjectBlk;
3453  int numContexts;
3454  int compositionTestIndex;
3455  /* existing alignments for a match */
3456  Int4** matrix; /* score matrix */
3457  int* pStatusCode;
3458 
3459  double pvalueForThisPair = (-1); /* p-value for this match
3460  for composition; -1 == no adjustment*/
3461  double LambdaRatio; /*lambda ratio*/
3462 
3463  int tid = 0;
3464 #ifdef _OPENMP
3465  tid = omp_get_thread_num();
3466 #endif
3467  seqSrc = seqsrc_tld[tid];
3468  scoringParams = score_params_tld[tid];
3469  hitParams = hit_params_tld[tid];
3470  redoneMatches = redoneMatches_tld[tid];
3471  alignments = alignments_tld[tid];
3472  incoming_align_set = incoming_align_set_tld[tid];
3473  NRrecord = NRrecord_tld[tid];
3474  sbp = sbp_tld[tid];
3475  redo_align_params = redo_align_params_tld[tid];
3476  matrix = matrix_tld[tid];
3477  pStatusCode = &status_code_tld[tid];
3478  query_info = query_info_tld[tid];
3479  numContexts = numContexts_tld[tid];
3480  compositionTestIndex = compositionTestIndex_tld[tid];
3481  subjectBlk = subjectBlk_tld[tid];
3482 
3483  BlastHSPList* localMatch = theseMatches[b];
3484 
3485  if (localMatch->hsp_array == NULL) {
3486  if (seqSrc) {
3487  continue;
3488  }
3489 #pragma omp critical(intrpt)
3490  interrupt = TRUE;
3491 #pragma omp flush(interrupt)
3492  continue;
3493  }
3494 
3496  localMatch->best_evalue,
3497  redoneMatches,
3498  numQueries
3499  )) {
3500  Blast_HSPListFree(localMatch);
3501  if (seqSrc) {
3502  continue;
3503  }
3504 #pragma omp critical(intrpt)
3505  interrupt = TRUE;
3506 #pragma omp flush(interrupt)
3507  continue;
3508  }
3509 
3510  query_index = localMatch->query_index;
3511  context_index = query_index * numFrames;
3512  /* Get the sequence for this match */
3513  if (seqSrc && BlastSeqSrcGetSupportsPartialFetching(seqSrc)) {
3515  program_number,
3516  (BlastSeqSrc*) seqSrc,
3517  (const BlastHSPList**)&localMatch,
3518  1
3519  );
3520  }
3521 
3522  if (subjectBlk) {
3523  matchingSeq.length = subjectBlk->length;
3524  matchingSeq.index = -1;
3525  matchingSeq.local_data = subjectBlk;
3526  } else {
3527  *pStatusCode = s_MatchingSequenceInitialize(
3528  &matchingSeq,
3529  program_number,
3530  seqSrc,
3531  default_db_genetic_code,
3532  localMatch->oid
3533  );
3534  if (*pStatusCode != 0) {
3535  /*
3536  * some sequences may have been excluded by membit filtering
3537  * so this is not really an exception
3538  */
3539  *pStatusCode = 0;
3540  goto match_loop_cleanup;
3541  }
3542  }
3543  *pStatusCode = s_ResultHspToDistinctAlign(
3544  incoming_align_set, /* o */
3545  numAligns, /* o */
3546  localMatch->hsp_array, /* i */
3547  localMatch->hspcnt, /* i */
3548  context_index, /* i */
3549  queryInfo, /* i */
3550  localScalingFactor /* i */
3551  );
3552  if (*pStatusCode != 0) {
3553  goto match_loop_cleanup;
3554  }
3555 
3556  hsp_list = Blast_HSPListNew(0);
3557  for (frame_index = 0;
3558  frame_index < numFrames;
3559  frame_index++, context_index++) {
3560  incoming_aligns = incoming_align_set[frame_index];
3561  if (!incoming_aligns) {
3562  continue;
3563  }
3564  /*
3565  * All alignments in thisMatch should be to the same query
3566  */
3567  kbp = sbp->kbp_gap[context_index];
3568  if (smithWaterman) {
3569  *pStatusCode =
3571  alignments,
3572  redo_align_params,
3573  incoming_aligns,
3574  numAligns[frame_index],
3575  kbp->Lambda,
3576  kbp->logK,
3577  &matchingSeq,
3578  query_info,
3579  numQueries,
3580  matrix,
3581  BLASTAA_SIZE,
3582  NRrecord,
3583  &forbidden,
3584  redoneMatches,
3585  &pvalueForThisPair,
3586  compositionTestIndex,
3587  &LambdaRatio
3588  );
3589  } else {
3590  *pStatusCode =
3592  alignments, // thread-local
3593  redo_align_params, // thread-local
3594  incoming_aligns, // thread-local
3595  numAligns[frame_index], // local
3596  kbp->Lambda, // thread-local
3597  &matchingSeq, // thread-local
3598  -1, // const
3599  query_info, // thread-local
3600  numContexts, // thread-local
3601  matrix, // thread-local
3602  BLASTAA_SIZE, // const
3603  NRrecord, // thread-local
3604  &pvalueForThisPair, // local
3605  compositionTestIndex, // thread-local
3606  &LambdaRatio // local
3607  );
3608  }
3609 
3610  if (*pStatusCode != 0) {
3611  goto match_loop_cleanup;
3612  }
3613 
3614  if (alignments[context_index] != NULL) {
3615  Int2 qframe = frame_index;
3616  if (program_number == eBlastTypeBlastx) {
3617  if (qframe < 3) {
3618  qframe++;
3619  } else {
3620  qframe = 2 - qframe;
3621  }
3622  }
3623  *pStatusCode =
3625  &alignments[context_index],
3626  matchingSeq.index,
3627  queryInfo, qframe);
3628  if (*pStatusCode) {
3629  goto match_loop_cleanup;
3630  }
3631  }
3632  BlastCompo_AlignmentsFree(&incoming_aligns, NULL);
3633  incoming_align_set[frame_index] = NULL;
3634  }
3635 
3636  if (hsp_list->hspcnt > 1) {
3638  &hsp_list->hspcnt);
3639  }
3640  *pStatusCode =
3641  s_HitlistEvaluateAndPurge(&best_score, &best_evalue,
3642  hsp_list,
3643  seqSrc,
3644  matchingSeq.length,
3645  program_number,
3646  queryInfo, context_index,
3647  sbp, hitParams,
3648  pvalueForThisPair, LambdaRatio,
3649  matchingSeq.index);
3650  if (*pStatusCode != 0) {
3651  goto query_loop_cleanup;
3652  }
3653  if (best_evalue <= hitParams->options->expect_value) {
3654  /* The best alignment is significant */
3655  s_HSPListNormalizeScores(hsp_list, kbp->Lambda, kbp->logK,
3656  localScalingFactor);
3658  queryBlk,
3659  queryInfo,
3660  subjectBlk,
3661  seqSrc,
3662  hsp_list,
3663  scoringParams->options,
3664  genetic_code_string,
3665  sbp
3666  );
3667  if (!seqSrc) {
3668  goto query_loop_cleanup;
3669  }
3671  &redoneMatches[query_index],
3672  best_evalue,
3673  best_score,
3674  localMatch->oid
3675  )) {
3676  *pStatusCode =
3678  &redoneMatches[query_index],
3679  hsp_list,
3680  best_evalue,
3681  best_score,
3682  localMatch->oid,
3683  &discarded_aligns
3684  );
3685  if (*pStatusCode == 0) {
3686  hsp_list = NULL;
3687  }
3688  } else {
3689  hsp_list = Blast_HSPListFree(hsp_list);
3690  }
3691 
3692  if (*pStatusCode) {
3693  goto query_loop_cleanup;
3694  }
3695  if (discarded_aligns != NULL) {
3696  Blast_HSPListFree(discarded_aligns);
3697  }
3698  }
3699 query_loop_cleanup:
3700 match_loop_cleanup:
3701  if (seqSrc) {
3702  localMatch = Blast_HSPListFree(localMatch);
3703  } else {
3704  Blast_HSPListSwap(localMatch, hsp_list);
3705  localMatch->oid = hsp_list->oid;
3706  }
3707  hsp_list = Blast_HSPListFree(hsp_list);
3708 
3709  if (*pStatusCode != 0) {
3710  for (context_index = 0;
3711  context_index < numContexts;
3712  context_index++) {
3714  &alignments[context_index],
3716  );
3717  }
3718  }
3719  s_MatchingSequenceRelease(&matchingSeq);
3720  BlastCompo_AlignmentsFree(&incoming_aligns, NULL);
3721  if (*pStatusCode != 0 || !seqSrc) {
3722 #pragma omp critical(intrpt)
3723  interrupt = TRUE;
3724 #pragma omp flush(interrupt)
3725  continue;
3726  }
3727 
3728  } /* end of if(!interrupt) */
3729  }
3730 #pragma omp barrier
3731 
3732  /*
3733  * end of omp parallel section
3734  */
3735  }
3736 
3737 function_cleanup:
3738 
3739  for (i = 0; i < actual_num_threads; ++i) {
3740  if (status_code_tld[i] != 0) {
3741  status_code = status_code_tld[i];
3742  }
3743  }
3744  for (i = 0; i < actual_num_threads; ++i) {
3745  if (seqSrc && status_code == 0) {
3747  results_tld[i],
3748  redoneMatches_tld[i],
3749  hitParams->options->hitlist_size
3750  );
3751  if (redoneMatches_tld[i] != NULL) {
3752  int qi;
3753  for (qi = 0; qi < numQueries; ++qi) {
3754  sfree(redoneMatches_tld[i][qi].array);
3755  sfree(redoneMatches_tld[i][qi].heapArray);
3756  }
3757  s_ClearHeap(redoneMatches_tld[i]);
3758  }
3759  } else {
3760  if (redoneMatches_tld[i] != NULL) {
3761  int qi;
3762  for (qi = 0; qi < numQueries; ++qi) {
3763  sfree(redoneMatches_tld[i][qi].array);
3764  sfree(redoneMatches_tld[i][qi].heapArray);
3765  }
3766  s_ClearHeap(redoneMatches_tld[i]);
3767  }
3768  }
3769  sfree(redoneMatches_tld[i]);
3770  }
3771  if (redoneMatches != NULL) {
3772  int qi;
3773  for (qi = 0; qi < numQueries; ++qi) {
3774  sfree(redoneMatches[qi].array);
3775  sfree(redoneMatches[qi].heapArray);
3776  }
3777  s_ClearHeap(redoneMatches);
3778  }
3779 
3780  if (hsp_stream != NULL) {
3781  /* Reduce results from all threads and continue with business as usual */
3782  SThreadLocalDataArray* thread_data =
3783  SThreadLocalDataArrayNew(actual_num_threads);
3784  int i;
3785  for (i = 0; i < actual_num_threads; ++i) {
3786  SThreadLocalData* tdi = thread_data->tld[i];
3787  BlastHSPResults* rdi = results_tld[i];
3788  tdi->hit_params = hit_params_tld[i];
3789  hit_params_tld[i] = NULL;
3790  tdi->results =
3791  (BlastHSPResults*) calloc(1, sizeof(BlastHSPResults));
3792  tdi->results->num_queries = rdi->num_queries;
3793  tdi->results->hitlist_array =
3794  (BlastHitList**) calloc(
3795  tdi->results->num_queries,
3796  sizeof(BlastHitList*)
3797  );
3798  int j;
3799  for (j = 0; j < tdi->results->num_queries; ++j) {
3800  tdi->results->hitlist_array[j] = rdi->hitlist_array[j];
3801  rdi->hitlist_array[j] = NULL;
3802  }
3803  }
3804  local_results = SThreadLocalDataArrayConsolidateResults(thread_data);
3805  ASSERT(local_results);
3806 
3807  /* post-traceback pipes */
3808  BlastHSPStreamTBackClose(hsp_stream, local_results);
3809 
3810  for (i = 0; i < local_results->num_queries; ++i) {
3811  results->hitlist_array[i] = local_results->hitlist_array[i];
3812  local_results->hitlist_array[i] = NULL;
3813  }
3814  for (i = 0; i < actual_num_threads; ++i) {
3815  thread_data->tld[i]->hit_params = NULL;
3816  int j;
3817  for (j = 0; j < local_results->num_queries; ++j) {
3818  thread_data->tld[i]->results->hitlist_array[j] =
3820  thread_data->tld[i]->results->hitlist_array[j]
3821  );
3822  }
3823  sfree(thread_data->tld[i]->results->hitlist_array);
3824  sfree(thread_data->tld[i]->results);
3825  thread_data->tld[i] = SThreadLocalDataFree(thread_data->tld[i]);
3826  results_tld[i] = Blast_HSPResultsFree(results_tld[i]);
3827  s_FreeBlastCompo_QueryInfoArray(&query_info_tld[i], numContexts);
3828  }
3829  sfree(thread_data->tld);
3830  sfree(thread_data);
3831  Blast_HSPResultsFree(local_results);
3832  }
3833 
3834  if (redoneMatches != NULL) {
3835  for (query_index = 0; query_index < numQueries; query_index++) {
3836  BlastCompo_HeapRelease(&redoneMatches[query_index]);
3837  }
3838  sfree(redoneMatches);
3839  redoneMatches = NULL;
3840  }
3841  if (smithWaterman) {
3842  Blast_ForbiddenRangesRelease(&forbidden);
3843  }
3844  if (gapAlign != NULL) {
3845  gapAlign = BLAST_GapAlignStructFree(gapAlign);
3846  }
3847  s_RestoreSearch(sbp, scoringParams, savedParams, queryBlk->length,
3848  positionBased, compo_adjust_mode);
3849  s_SavedParametersFree(&savedParams);
3850 
3851  for (i = 0; i < actual_num_threads; ++i) {
3852  s_BlastScoreBlk_Free(&sbp_tld[i]);
3853  gap_align_tld[i]->sbp = NULL;
3854  s_BlastGapAlignStruct_Free(gap_align_tld[i]);
3855  Blast_RedoAlignParamsFree(&redo_align_params_tld[i]);
3856  sfree(alignments_tld[i]);
3857  sfree(incoming_align_set_tld[i]);
3858  Blast_CompositionWorkspaceFree(&NRrecord_tld[i]);
3859  s_SavedParametersFree(&savedParams_tld[i]);
3860  BlastSeqSrcFree(seqsrc_tld[i]);
3861  }
3862  sfree(alignments_tld);
3863  sfree(compositionTestIndex_tld);
3864  sfree(gap_align_tld);
3865  sfree(gapping_params_context_tld);
3866  sfree(hit_params_tld);
3867  sfree(incoming_align_set_tld);
3868  sfree(matrix_tld);
3869  sfree(NRrecord_tld);
3870  sfree(numContexts_tld);
3871  sfree(query_info_tld);
3872  sfree(redo_align_params_tld);
3873  sfree(redoneMatches_tld);
3874  sfree(results_tld);
3875  sfree(savedParams_tld);
3876  sfree(sbp_tld);
3877  sfree(score_params_tld);
3878  sfree(seqsrc_tld);
3879  sfree(status_code_tld);
3880  sfree(subjectBlk_tld);
3881  sfree(theseMatches);
3882 
3883  return (Int2) status_code;
3884 }
3885 
3886 
3887 
size_t nrows
number of rows
Definition: blast_stat.h:143
static void s_SavedParametersFree(BlastKappa_SavedParameters **searchParams)
Release the data associated with a BlastKappa_SavedParameters and delete the object.
Definition: blast_kappa.c:1964
Structures and API used for saving BLAST hits.
double ** posFreqs
PSSM's frequency ratios [alias].
Definition: blast_posit.h:62
Data structure to support MT traceback: this encapsulates the data that each thread modifies...
static void s_BlastGapAlignStruct_Free(BlastGapAlignStruct *copy)
Free a BlastGapAlignStruct copy created by s_BlastGapAlignStruct_Copy.
Definition: blast_kappa.c:2524
int BlastHSPStreamRead(BlastHSPStream *hsp_stream, BlastHSPList **hsp_list)
Invokes the user-specified read function for this BlastHSPStream implementation.
GapPrelimEditScript * edit_ops
array of edit operations
Definition: gapinfo.h:74
#define GET_TRANSLATED_LENGTH(l, f)
BlastHSPResults * SThreadLocalDataArrayConsolidateResults(SThreadLocalDataArray *array)
Extracts a single, consolidated BlastHSPResults structure from its input for single threaded processi...
Int2 BlastFilteringOptionsFromString(EBlastProgramType program_number, const char *instructions, SBlastFilterOptions **filtering_options, Blast_Message **blast_message)
Produces SBlastFilterOptions from a string that has been traditionally supported in blast...
Definition: blast_filter.c:436
Boolean do_sum_stats
TRUE if sum stats will be used.
CS_CONTEXT * ctx
Definition: t0006.c:12
static int s_ResultHspToDistinctAlign(BlastCompo_Alignment **self, int *numAligns, BlastHSP *hsp_array[], Int4 hspcnt, int init_context, const BlastQueryInfo *queryInfo, double localScalingFactor)
Convert an array of HSPs to a list of BlastCompo_Alignment objects.
Definition: blast_kappa.c:758
void _PSICopyMatrix_int(int **dest, int **src, unsigned int ncols, unsigned int nrows)
Copies src matrix into dest matrix, both of which must be int matrices with dimensions ncols by nrows...
EBlastEncoding
Different types of sequence encodings for sequence retrieval from the BLAST database.
static int s_GetStartFreqRatios(double **returnRatios, const char *matrixName)
Fill a two-dimensional array with the frequency ratios that underlie the named score matrix...
Definition: blast_kappa.c:637
GapEditScript * GapEditScriptDelete(GapEditScript *esp)
Free edit script structure.
Definition: gapinfo.c:75
Declares a "heap" data structure that is used to store computed alignments when composition adjustmen...
BlastScoringOptions * options
User-provided values for these params.
Private interface to support the multi-threaded traceback.
Declarations for several linear algebra routines.
Within the composition adjustment module, an object of type BlastCompo_Alignment represents a distinc...
static BlastCompo_Alignment * s_RedoOneAlignment(BlastCompo_Alignment *in_align, EMatrixAdjustRule matrix_adjust_rule, BlastCompo_SequenceData *query_data, BlastCompo_SequenceRange *query_range, int ccat_query_length, BlastCompo_SequenceData *subject_data, BlastCompo_SequenceRange *subject_range, int full_subject_length, BlastCompo_GappingParams *gapping_params)
A callback: calculate the traceback for one alignment by performing an x-drop alignment in both direc...
Definition: blast_kappa.c:1885
int BlastCompo_HeapInitialize(BlastCompo_Heap *self, int heapThreshold, double ecutoff)
Initialize a new BlastCompo_Heap; parameters to this function correspond directly to fields in the Bl...
Definition: compo_heap.c:414
Structure used to pass data into the scaling routines.
Definition: blast_posit.h:73
Uint1 * gen_code_string
for nucleotide subject sequences (tblast[nx]), the genetic code used to create a translated protein s...
Definition: blast_def.h:272
double Blast_KarlinLambdaNR(Blast_ScoreFreq *sfp, double initialLambdaGuess)
Calculates the parameter Lambda given an initial guess for its value.
Definition: blast_stat.c:2566
int Blast_GetPartialTranslation(const Uint1 *nucl_seq, Int4 nucl_length, Int2 frame, const Uint1 *genetic_code, Uint1 **translation_buffer_ptr, Int4 *protein_length, Uint1 **mixed_seq_ptr)
Get one frame translation - needed when only parts of subject sequences are translated.
Definition: blast_util.c:1139
Header file for composition-based statistics.
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
Definition: blast_hits.c:1517
BlastScoreBlk * sbp
the score block for this search
Definition: blast_kappa.c:1707
BlastCompo_SequenceRange - a struct whose instances represent a range of data in a sequence...
BlastHitSavingParameters * hit_params
Hit saving parameters.
double ** data
The actual frequency ratios.
unsigned NCBI_INT8_TYPE Uint8
Unsigned 8 byte sized integer.
Definition: ncbitype.h:146
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Int4 score
This HSP's raw score.
Definition: blast_hits.h:127
signed int Int4
Alias for signed int.
Definition: ncbitype.h:120
Smith-waterman finds optimal scores, then ALIGN_EX to find alignment.
SBlastScoreMatrix * matrix
scoring matrix data
Definition: blast_stat.h:185
unsigned int Uint4
Alias for unsigned int.
Definition: ncbitype.h:121
Int2 frame
Translation frame.
Definition: blast_hits.h:97
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
Definition: blast_hits.c:1729
Int2 BLAST_GappedAlignmentWithTraceback(EBlastProgramType program, const Uint1 *query, const Uint1 *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length, Boolean *fence_hit)
Perform a gapped alignment with traceback.
SThreadLocalData * SThreadLocalDataFree(SThreadLocalData *tld)
Deallocate the SThreadLocalData structure passed in.
All filtering options.
Uint1 * oof_sequence
Mixed-frame protein representation of a nucleotide sequence for out-of-frame alignment.
Definition: blast_def.h:259
EBlastProgramType prog_number
identifies the type of blast search being performed.
Definition: blast_kappa.c:881
#define NEAR_IDENTICAL_BITS_PER_POSITION
Definition: blast_kappa.c:2385
Functions to do gapped alignment with traceback.
static int s_HSPListFromDistinctAlignments(BlastHSPList *hsp_list, BlastCompo_Alignment **alignments, int oid, const BlastQueryInfo *queryInfo, int frame)
Converts a list of objects of type BlastCompo_Alignment to an new object of type BlastHSPList and ret...
Definition: blast_kappa.c:297
SGreedyAlignMem * greedy_align_mem
Preallocated memory for the greedy gapped extension.
void * context
traceback info for a gapped alignment
const BlastSeqSrc * seq_src
BLAST sequence data source.
Definition: blast_kappa.c:885
double lambda
derived value of the matrix lambda -RMH-
Definition: blast_stat.h:145
int num_queries
Number of query sequences.
BLAST_SequenceBlk * seq
Sequence to return, if NULL, it should allocated by GetSeqBlkFnPtr (using BlastSeqBlkNew or BlastSetU...
Definition: blast_seqsrc.h:280
size_t ncols
number of columns
Definition: blast_stat.h:142
Int4 ** origMatrix
The original matrix values.
Definition: blast_kappa.c:1949
Complete type definition of Blast Sequence Source ADT.
Definition: blast_seqsrc.c:43
BLAST filtering functions.
int score
the score of this alignment
BlastLinkHSPParameters * link_hsp_params
Parameters for linking HSPs with sum statistics; linking is not done if NULL.
BlastHSPResults * results
Structure to store results from this thread.
Blast_KarlinBlk ** kbp_psi
K-A parameters for position-based alignments.
Definition: blast_stat.h:212
Int4 num
Number of operations.
Definition: gapinfo.h:67
Uint1 alphabet_code
NCBI alphabet code.
Definition: blast_stat.h:180
#define KAPPA_TBLASTN_NO_SEG_SEQUENCE
Compile-time option; if set to a true value, then blastp runs that use Blast_RedoAlignmentCore to com...
Definition: blast_kappa.c:77
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Definition: blast_hits.h:157
Uint4 ncols
number of columns (query_length)
EBlastTbackExt eTbackExt
type of traceback extension.
Uint8 * words
list words in the query, needed for testing whether the query and a subject are nearly identical ...
void * local_data
holds any sort of data that is necessary for callbacks to access the sequence
SThreadLocalDataArray * SThreadLocalDataArrayNew(Uint4 num_threads)
Allocate a new SThreadLocalDataArray structure.
BLAST-specific information that is associated with a BlastCompo_MatchingSequence. ...
Definition: blast_kappa.c:879
int ** scaled_pssm
scaled PSSM (scores)
Kappa_posSearchItems * Kappa_posSearchItemsFree(Kappa_posSearchItems *posSearch)
Deallocates the Kappa_posSearchItems structure.
Definition: blast_posit.c:75
Structure used for scoring calculations.
Definition: blast_stat.h:177
void Blast_Int4MatrixFromFreq(int **matrix, int size, double **freq, double Lambda)
Compute an integer-valued amino-acid score matrix from a set of score frequencies.
Preliminary version of GapEditBlock, used directly by the low- level dynamic programming routines...
Definition: gapinfo.h:73
Int4 BlastSeqSrcGetMaxSeqLen(const BlastSeqSrc *seq_src)
Get the length of the longest sequence in the sequence source.
Definition: blast_seqsrc.c:193
#define MIN(a, b)
returns smaller of a and b.
Definition: ncbi_std.h:110
int Blast_RedoOneMatch(BlastCompo_Alignment **alignments, Blast_RedoAlignParams *params, BlastCompo_Alignment *incoming_aligns, int hspcnt, double Lambda, BlastCompo_MatchingSequence *matchingSeq, int ccat_query_length, BlastCompo_QueryInfo query_info[], int numQueries, int **matrix, int alphsize, Blast_CompositionWorkspace *NRrecord, double *pvalueThisPair, int compositionTestIndex, double *LambdaRatio)
Recompute all alignments for one query/subject pair using composition-based statistics or composition...
struct BlastCompo_Alignment * next
the next alignment in the list
Blast_KarlinBlk ** kbp_gap
K-A parameters for gapped alignments.
Definition: blast_stat.h:208
long BLAST_Nint(double x)
Nearest integer.
Definition: ncbi_math.c:437
double * BLAST_GetStandardAaProbabilities(void)
Get the standard amino acid probabilities.
Definition: blast_util.c:1321
const Uint1 * Blast_HSPGetTargetTranslation(SBlastTargetTranslation *target_t, const BlastHSP *hsp, Int4 *translated_length)
Returns a buffer with a protein translated from nucleotide.
Definition: blast_hits.c:1122
Int2 Blast_HSPListReapByEvalue(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)
Discard the HSPs above the e-value threshold from the HSP list.
Definition: blast_hits.c:1939
Int2 BlastTargetTranslationNew(BLAST_SequenceBlk *subject_blk, const Uint1 *gen_code_string, EBlastProgramType program_number, Boolean is_ooframe, SBlastTargetTranslation **target)
Sets up structure for target translation.
Definition: blast_util.c:1266
for(len=0;yy_str[len];++len)
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Definition: blast_hits.h:185
Structure used to pass data into the scaling routines.
Definition: blast_posit.h:56
BlastHitSavingOptions * options
The original (unparsed) options.
Int4 ALIGN_EX(const Uint1 *A, const Uint1 *B, Int4 M, Int4 N, Int4 *a_offset, Int4 *b_offset, GapPrelimEditBlock *edit_block, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 query_offset, Boolean reversed, Boolean reverse_sequence, Boolean *fence_hit)
Low level function to perform dynamic programming gapped extension with traceback.
#define ASSERT
macro for assert.
Definition: ncbi_std.h:105
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
Definition: blast_program.c:63
BlastContextInfo * contexts
Information per context.
Int4 gap_open
a penalty for the existence of a gap
Definition: blast_kappa.c:1945
signed NCBI_INT8_TYPE Int8
Signed 8 byte sized integer.
Definition: ncbitype.h:143
static void s_SWFindFinalEndsUsingXdrop(BlastCompo_SequenceData *query, Int4 queryStart, Int4 queryEnd, BlastCompo_SequenceData *subject, Int4 matchStart, Int4 matchEnd, BlastGapAlignStruct *gap_align, const BlastScoringParameters *scoringParams, Int4 score, Int4 *queryAlignmentExtent, Int4 *matchAlignmentExtent, Int4 *newScore)
Redo a S-W alignment using an x-drop alignment.
Definition: blast_kappa.c:832
Boolean matrix_only_scoring
Score ungapped/gapped alignment only using the matrix parameters and with raw scores.
Definition: blast_stat.h:189
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
#define NULL
Definition: ncbistd.hpp:225
int ** posPrivateMatrix
Scaled PSSM [alias].
Definition: blast_posit.h:60
EBlastEncoding Blast_TracebackGetEncoding(EBlastProgramType program_number)
Get the subject sequence encoding type for the traceback, given a program number. ...
static int s_SequenceGetTranslatedRange(const BlastCompo_MatchingSequence *self, const BlastCompo_SequenceRange *range, BlastCompo_SequenceData *seqData, const BlastCompo_SequenceRange *q_range, BlastCompo_SequenceData *queryData, const Uint8 *query_words, const BlastCompo_Alignment *align, const Boolean shouldTestIdentical, const ECompoAdjustModes compo_adjust_mode, const Boolean isSmithWaterman, Boolean *subject_maybe_biased)
Obtain a string of translated data.
Definition: blast_kappa.c:1461
Edit script: linked list of correspondencies between two sequences.
Definition: gapinfo.h:57
int Blast_RedoOneMatchSmithWaterman(BlastCompo_Alignment **alignments, Blast_RedoAlignParams *params, BlastCompo_Alignment *incoming_aligns, int hspcnt, double Lambda, double logK, BlastCompo_MatchingSequence *matchingSeq, BlastCompo_QueryInfo query_info[], int numQueries, int **matrix, int alphsize, Blast_CompositionWorkspace *NRrecord, Blast_ForbiddenRanges *forbidden, BlastCompo_Heap *significantMatches, double *pvalueThisPair, int compositionTestIndex, double *LambdaRatio)
Recompute all alignments for one query/subject pair using the Smith-Waterman algorithm and possibly a...
Int4 last_context
Index of the last element of the context array.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Definition: blast_hits.h:153
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte...
Definition: blast_def.h:244
Blast_KarlinBlk ** kbp_gap_orig
copy of the original gapped Karlin-Altschul block corresponding to the first context ...
Definition: blast_kappa.c:1953
EBlastProgramType prog_number
the type of search being performed
Definition: blast_kappa.c:1710
void Blast_ForbiddenRangesRelease(Blast_ForbiddenRanges *self)
Release the storage associated with the fields of self, but do not delete self.
Private interfaces to support the multi-threaded traceback in conjunction with the BlastHSPStream...
#define MAX(a, b)
returns larger of a and b.
Definition: ncbi_std.h:115
void * context
a pointer to any additional gapping parameters that may be needed by the calling routine.
Blast_GumbelBlk * gbp
Gumbel parameters for FSC.
Definition: blast_stat.h:209
Int4 hitlist_size
Maximal number of database sequences to return results for.
Defines the interface to interact with the genetic code singleton object.
Int2 comp_adjustment_method
which mode of composition adjustment was used; relevant only for blastp and tblastn ...
Definition: blast_hits.h:139
Blast_KarlinBlk * Blast_KarlinBlkFree(Blast_KarlinBlk *kbp)
Deallocates the KarlinBlk.
Definition: blast_stat.c:956
int ** Nlm_Int4MatrixNew(int nrows, int ncols)
Create and return a new Int4 matrix.
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
Definition: blast_filter.c:737
int frame
the subject frame
double scale_factor
multiplier for all cutoff scores
BlastHSP * Blast_HSPFree(BlastHSP *hsp)
Deallocate memory for an HSP structure.
Definition: blast_hits.c:105
int i
double eff_search_space
effective search space of searches involving this query
Int4 query_stop
query end offseet of current alignment
Blast_KarlinBlk ** kbp_std
K-A parameters for ungapped alignments.
Definition: blast_stat.h:212
Int4 hiscore
Max.
Definition: blast_stat.h:198
Int2 Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp)
Allocates BlastHSP and inits with information from input.
Definition: blast_hits.c:126
#define KAPPA_BLASTP_NO_SEG_SEQUENCE
Compile-time option; if set to a true value, then blastp runs that use Blast_RedoAlignmentCore to com...
Definition: blast_kappa.c:69
Int4 gapped_start
Where the gapped extension started.
Definition: blast_hits.h:100
Int4 dp_mem_alloc
current number of structures allocated
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:612
struct BlastKappa_SavedParameters BlastKappa_SavedParameters
A BlastKappa_SavedParameters holds the value of certain search parameters on entry to RedoAlignmentCo...
BlastScoreBlk * BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts)
Allocates and initializes BlastScoreBlk.
Definition: blast_stat.c:884
void BlastHSPStreamTBackClose(BlastHSPStream *hsp_stream, BlastHSPResults *results)
Closes the BlastHSPStream structure after traceback.
int queryEnd
one past the end of the alignment in the query
void Blast_CompositionWorkspaceFree(Blast_CompositionWorkspace **NRrecord)
Free memory associated with a record of type Blast_CompositionWorkspace.
Int8 eff_searchsp
Effective search space for this context.
static int s_NewAlignmentUsingXdrop(BlastCompo_Alignment **pnewAlign, Int4 *pqueryEnd, Int4 *pmatchEnd, Int4 queryStart, Int4 matchStart, Int4 score, BlastCompo_SequenceData *query, BlastCompo_SequenceRange *query_range, Int4 ccat_query_length, BlastCompo_SequenceData *subject, BlastCompo_SequenceRange *subject_range, Int4 full_subject_length, BlastCompo_GappingParams *gapping_params, EMatrixAdjustRule matrix_adjust_rule)
A callback used when performing SmithWaterman alignments: Calculate the traceback for one alignment b...
Definition: blast_kappa.c:1799
void BlastCompo_AlignmentsFree(BlastCompo_Alignment **palign, void(*free_context)(void *))
Recursively free all alignments in the singly linked list whose head is *palign.
unsigned char Uint1
Alias for unsigned char.
Definition: ncbitype.h:117
Structure holding all information about an HSP.
Definition: blast_hits.h:126
#define GET_NUCL_LENGTH(l)
BlastGapDP * dp_mem
scratch structures for dynamic programming
Holds score frequencies used in calculation of Karlin-Altschul parameters for an ungapped search...
Definition: blast_stat.h:128
Boolean check_oid_exclusion
Check whether an OID is excluded due to overlapping filtering.
Definition: blast_seqsrc.h:275
double * sprob
arrays for frequency of given score, shifted down by score_min.
Definition: blast_stat.h:135
Various auxiliary BLAST utility functions.
int Kappa_impalaScaling(Kappa_posSearchItems *posSearch, Kappa_compactSearchItems *compactSearch, double scalingFactor, Boolean doBinarySearch, BlastScoreBlk *sbp)
Copied from posit2.c.
Definition: blast_posit.c:393
GapEditScript * Blast_PrelimEditBlockToGapEditScript(GapPrelimEditBlock *rev_prelim_tback, GapPrelimEditBlock *fwd_prelim_tback)
Convert the initial list of traceback actions from a non-OOF gapped alignment into a blast edit scrip...
BlastCompo_SequenceData - represents a string of amino acids or nucleotides.
BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)
Deallocates BlastScoreBlk as well as all associated structures.
Definition: blast_stat.c:965
Int4 context
Context number of query.
Definition: blast_hits.h:133
static BlastScoreBlk * s_BlastScoreBlk_Copy(EBlastProgramType program, BlastScoreBlk *orig, Uint1 alphabet_code, Int4 number_of_contexts)
Create a "deep" copy of a BlastScoreBlk structure.
Definition: blast_kappa.c:2757
double * standardProb
Array of standard residue probabilities, as those returned by BLAST_GetStandardAaProbabilities.
Definition: blast_posit.h:98
Stores the frequency ratios along with their bit scale factor.
Structure supporting the gapped alignment.
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Definition: blast_hits.h:154
Scoring matrix data used in PSI-BLAST.
Definition: blast_stat.h:149
Int4 gap_x_dropoff_final
X-dropoff value for the final gapped extension (raw)
void Blast_MaskTheResidues(Uint1 *buffer, Int4 length, Boolean is_na, const BlastSeqLoc *mask_loc, Boolean reverse, Int4 offset)
Masks the letters in buffer.
Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence pro...
Don't use composition based statistics.
Int2 Blast_HSPListGetEvalues(EBlastProgramType program_number, const BlastQueryInfo *query_info, Int4 subject_length, BlastHSPList *hsp_list, Boolean gapped_calculation, Boolean RPS_prelim, const BlastScoreBlk *sbp, double gap_decay_rate, double scaling_factor)
Calculate the expected values for all HSPs in a hit list, without using the sum statistics.
Definition: blast_hits.c:1786
Structure to keep memory for state structure.
Definition: gapinfo.h:81
ListNode * comments
Comments about scoring matrix.
Definition: blast_stat.h:184
void Blast_ReadAaComposition(Blast_AminoAcidComposition *composition, int alphsize, const Uint1 *sequence, int length)
Compute the true amino acid composition of a sequence, ignoring ambiguity characters and other nonsta...
Int4 loscore
Min.
Definition: blast_stat.h:197
Private interface for blast_gapalign.c.
Blast_KarlinBlk ** kbp_gap_psi
K-A parameters for psi alignments.
Definition: blast_stat.h:212
int origin
origin of the query in a concatenated query
Int4 size
Size of above arrays.
Definition: gapinfo.h:60
A BlastKappa_SavedParameters holds the value of certain search parameters on entry to RedoAlignmentCo...
Definition: blast_kappa.c:1944
BlastCompo_SequenceData seq
sequence data for the query
Int2 Blast_HitListUpdate(BlastHitList *hit_list, BlastHSPList *hsp_list)
Insert a new HSP list into the hit list.
Definition: blast_hits.c:3127
#define NCBI_CONST_UINT8(v)
Definition: ncbi_std.h:194
double BLAST_KarlinPtoE(double p)
Convert a P-value to an E-value.
Definition: blast_stat.c:4158
static string query
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
Definition: blast_hits.c:1533
Int4 length_adjustment
Length adjustment for boundary conditions.
double original_expect_value
expect value on entry
Definition: blast_kappa.c:1950
Blast_KarlinBlk * Blast_KarlinBlkNew(void)
Callocs a Blast_KarlinBlk.
Definition: blast_stat.c:2860
BlastSeqSrcGetSeqArg seq_arg
argument to GetSequence method of the BlastSeqSrc (
Definition: blast_kappa.c:886
Used to hold a set of positions, mostly used for filtering.
Definition: blast_def.h:204
#define ABS(a)
returns absolute value of a (|a|)
Definition: ncbi_std.h:120
Int4 index
index of this sequence in the database
static BlastCompo_GappingParams * s_GappingParamsNew(BlastKappa_GappingParamsContext *context, const BlastExtensionParameters *extendParams, int num_queries)
Create a new object of type BlastCompo_GappingParams.
Definition: blast_kappa.c:2341
static void s_RestoreSearch(BlastScoreBlk *sbp, BlastScoringParameters *scoring, const BlastKappa_SavedParameters *searchParams, int query_length, Boolean positionBased, ECompoAdjustModes compo_adjust_mode)
Restore the parameters that were adjusted to their original values.
Definition: blast_kappa.c:2134
A BlastCompo_MatchingSequence represents a subject sequence to be aligned with the query...
BlastSeg query
Query sequence info.
Definition: blast_hits.h:131
int gap_extend
penalty for extending a gapped alignment by one residue
double evalue
This HSP's e-value.
Definition: blast_hits.h:130
Boolean is_ooframe
Should out-of-frame gapping be used in a translated search?
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
BlastHitList * Blast_HitListFree(BlastHitList *hitlist)
Deallocate memory for the hit list.
Definition: blast_hits.c:3023
Definitions used to get joint probabilities for a scoring matrix.
Int4 num_queries
Number of queries in this search.
Definition: blast_kappa.c:1954
Kappa_compactSearchItems * Kappa_compactSearchItemsFree(Kappa_compactSearchItems *compactSearch)
Deallocates the Kappa_compactSearchItems structure.
Definition: blast_posit.c:140
Int4 obs_max
highest observed (actual) scores
Definition: blast_stat.h:132
int queryStart
the start of the alignment in the query
static const Blast_RedoAlignCallbacks redo_align_callbacks
Callbacks used by the Blast_RedoOneMatch* routines.
Definition: blast_kappa.c:2377
Scoring matrix used in BLAST.
Definition: blast_stat.h:139
Internal representation of a PSSM in various stages of its creation and its dimensions.
int begin
the starting index of the range
GapStateArrayStruct * state_struct
Structure to keep extension state information.
Utilities for dealing with BLAST HSPs in the core of BLAST.
static int s_SequenceGetRange(const BlastCompo_MatchingSequence *self, const BlastCompo_SequenceRange *s_range, BlastCompo_SequenceData *seqData, const BlastCompo_SequenceData *query, const BlastCompo_SequenceRange *q_range, BlastCompo_SequenceData *queryData, const Uint8 *query_words, const BlastCompo_Alignment *align, const Boolean shouldTestIdentical, const ECompoAdjustModes compo_adjust_mode, const Boolean isSmithWaterman, Boolean *subject_maybe_biased)
Obtain the sequence data that lies within the given range.
Definition: blast_kappa.c:1657
Information about target translations.
Definition: blast_def.h:311
A version of GapEditScript used to store initial results from the gapped alignment routines...
Definition: gapinfo.h:65
Int2 Blast_RedoAlignmentCore_MT(EBlastProgramType program_number, Uint4 num_threads, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *queryInfo, BlastScoreBlk *sbp, BLAST_SequenceBlk *subjectBlk, const BlastSeqSrc *seqSrc, Int4 default_db_genetic_code, BlastHSPList *thisMatch, BlastHSPStream *hsp_stream, BlastScoringParameters *scoringParams, const BlastExtensionParameters *extendParams, const BlastHitSavingParameters *hitParams, const PSIBlastOptions *psiOptions, BlastHSPResults *results)
Recompute alignments for each match found by the gapped BLAST algorithm.
Definition: blast_kappa.c:2973
int queryIndex
index of the query in a concatenated query
Int4 cutoff_score_min
smallest cutoff score across all contexts
static void s_HitlistReapContained(BlastHSP *hsp_array[], Int4 *hspcnt)
Remove from a hitlist all HSPs that are completely contained in an HSP that occurs earlier in the lis...
Definition: blast_kappa.c:216
Int4 obs_min
lowest observed (actual) scores
Definition: blast_stat.h:131
int matchStart
the start of the alignment in the subject
Boolean read_in_matrix
If TRUE, matrix is read in, otherwise produce one from penalty and reward above.
Definition: blast_stat.h:202
Prototypes for portable math library (ported from C Toolkit)
#define CODON_LENGTH
Codons are always of length 3.
Definition: blast_def.h:63
static int s_MatrixInfoInit(Blast_MatrixInfo *self, BLAST_SequenceBlk *queryBlk, BlastScoreBlk *sbp, double scale_factor, const char *matrixName)
Initialize an object of type Blast_MatrixInfo.
Definition: blast_kappa.c:2185
SBlastScoreMatrix * pssm
position-specific score matrix
Definition: blast_stat.h:150
static int match(register const unsigned char *eptr, register const uschar *ecode, const unsigned char *mstart, int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb, int flags, unsigned int rdepth)
Definition: pcre_exec.c:431
Auxiliary structure for dynamic programming gapped extension.
Int4 score_min
lowest allowed scores
Definition: blast_stat.h:129
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm. ...
Definition: blast_program.h:72
#define Boolean
Definition: ncbistd.hpp:136
#define CONTAINED_IN_HSP(a, b, c, d, e, f)
TRUE if c is between a and b; f between d and e.
int gap_open
penalty for opening a gap
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
static int s_ScalePosMatrix(int **fillPosMatrix, const char *matrixName, double **posFreqs, Uint1 *query, int queryLength, BlastScoreBlk *sbp, double scale_factor)
Produce a scaled-up version of the position-specific matrix with a given set of position-specific res...
Definition: blast_kappa.c:682
#define GET_SEQ_FRAME(f)
static void s_FreeEditScript(void *edit_script)
A callback used to free an EditScript that has been stored in a BlastCompo_Alignment.
Definition: blast_kappa.c:275
double best_evalue
Smallest e-value for HSPs in this list.
Definition: blast_hits.h:162
Collected information about a query.
Int2 ambig_occupy
How many occupied?
Definition: blast_stat.h:219
#define BLASTP_MASK_INSTRUCTIONS
Default instructions and mask residue for SEG filtering.
Definition: blast_kappa.c:1402
double expect_value
The expect value cut-off threshold for an HSP, or a combined hit if sum statistics is used...
SFreqRatios * _PSIMatrixFrequencyRatiosNew(const char *matrix_name)
Retrive the matrix's frequency ratios.
Blast_ScoreFreq ** sfp
score frequencies for scoring matrix.
Definition: blast_stat.h:205
GapPrelimEditBlock * rev_prelim_tback
traceback from left extensions
int ** pssm
PSSM (scores)
static Uint8 s_GetHash(const Uint1 *data, int word_size)
Get hash for a word of word_size residues assuming 28-letter alphabet.
Definition: blast_kappa.c:1105
void BlastCompo_HeapRelease(BlastCompo_Heap *self)
Release the storage associated with the fields of a BlastCompo_Heap.
Definition: compo_heap.c:432
Kappa_posSearchItems * Kappa_posSearchItemsNew(unsigned int queryLength, const char *matrix_name, int **posPrivateMatrix, double **posFreqs)
Allocates a new Kappa_posSearchItems structure.
Definition: blast_posit.c:44
static void s_FillResultsFromCompoHeaps(BlastHSPResults *results, BlastCompo_Heap heaps[], Int4 hitlist_size)
Convert an array of BlastCompo_Heap objects to a BlastHSPResults structure.
Definition: blast_kappa.c:2479
Blast_KarlinBlk * kbp_ideal
Ideal values (for query with average database composition).
Definition: blast_stat.h:216
Work arrays used to perform composition-based matrix adjustment.
BlastSeqSrc * BlastSeqSrcCopy(const BlastSeqSrc *seq_src)
Copy function: needed to guarantee thread safety.
Definition: blast_seqsrc.c:138
BlastScoreBlk * sbp
Pointer to the scoring information block.
static Boolean s_TestNearIdentical(const BlastCompo_SequenceData *seqData, const int seqOffset, const BlastCompo_SequenceData *queryData, const int queryOffset, const Uint8 *query_words, const BlastCompo_Alignment *align)
Test whether the aligned parts of two sequences that have a high-scoring gapless alignment are nearly...
Definition: blast_kappa.c:1247
void _PSICopyMatrix_double(double **dest, double **src, unsigned int ncols, unsigned int nrows)
Copies src matrix into dest matrix, both of which must be double matrices with dimensions ncols by nr...
static void s_AdjustEvaluesForComposition(BlastHSPList *hsp_list, double comp_p_value, const BlastSeqSrc *seqSrc, Int4 subject_length, const BlastContextInfo *query_context, double LambdaRatio, int subject_id)
Adjusts the E-values in a BLAST_HitList to be composites of a composition-based P-value and a score/a...
Definition: blast_kappa.c:127
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
Definition: blast_hits.c:1349
Structure to hold the Karlin-Altschul parameters.
Definition: blast_stat.h:66
Int4 subject_stop
subject end offset of current alignment
ECompoAdjustModes
An collection of constants that specify all permissible modes of composition adjustment.
Int4 penalty
penalty for mismatch in blastn.
Definition: blast_stat.h:199
Uint1 * buffer
if non-nil, points to memory that must be freed when this instance of BlastCompo_SequenceData is dele...
void Blast_HSPListSwap(BlastHSPList *list1, BlastHSPList *list2)
Swaps the two HSP lists via structure assignment.
Definition: blast_hits.c:1589
Parameters used to compute gapped alignments.
Int4 * num
Array of number of operations.
Definition: gapinfo.h:59
Int4 length
length of this matching sequence
Data and data-structures needed to perform a gapped alignment.
Definition: blast_kappa.c:1701
Int2 BLAST_GapAlignStructNew(const BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, Uint4 max_subject_length, BlastScoreBlk *sbp, BlastGapAlignStruct **gap_align_ptr)
Initializes the BlastGapAlignStruct structure.
struct BlastKappa_SequenceInfo BlastKappa_SequenceInfo
BLAST-specific information that is associated with a BlastCompo_MatchingSequence. ...
int Blast_FrequencyDataIsAvailable(const char *matrix_name)
Retrieve the background letter probabilities implicitly used in constructing the score matrix matrix_...
Int4 unifiedP
Indicates unified P values to be used in blastp or tblastn.
double gap_x_dropoff_final
X-dropoff value for the final gapped extension (in bits)
_PSIInternalPssmData * _PSIInternalPssmDataNew(Uint4 query_length, Uint4 alphabet_size)
Allocates a new _PSIInternalPssmData structure.
BlastSeqSrc * BlastSeqSrcFree(BlastSeqSrc *seq_src)
Frees the BlastSeqSrc structure by invoking the destructor function set by the user-defined construct...
Definition: blast_seqsrc.c:112
double scale_factor
the original scale factor
Definition: blast_kappa.c:1948
double logK
natural log of K value used in statistics
Definition: blast_stat.h:69
static string subject
EMatrixAdjustRule
An collection of constants that specify all rules that may be used to generate a compositionally adju...
int end
one beyond the last item in the range
static int s_CreateWordArray(const Uint1 *seq_data, Int4 seq_len, Uint8 **words)
Definition: blast_kappa.c:2230
Int4 subject_start
subject start offset current alignment
static void s_RescaleSearch(BlastScoreBlk *sbp, BlastScoringParameters *sp, int num_queries, double scale_factor)
Rescale the search parameters in the search object and options object to obtain more precision...
Definition: blast_kappa.c:2103
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
Boolean BlastSeqSrcGetSupportsPartialFetching(const BlastSeqSrc *seq_src)
Find if the Blast Sequence Source supports partial fetching.
Definition: blast_seqsrc.c:251
Callbacks used by Blast_RedoOneMatch and Blast_RedoOneMatchSmithWaterman routines.
SPsiBlastScoreMatrix * psi_matrix
PSSM and associated data.
Definition: blast_stat.h:186
Default implementation of BlastHSPStream.
struct BlastKappa_GappingParamsContext BlastKappa_GappingParamsContext
Data and data-structures needed to perform a gapped alignment.
Int4 s_GetSubjectLength(Int4 total_subj_length, EBlastProgramType program_number)
Definition: blast_kappa.c:356
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it...
Composition-based statistics as in NAR 29:2994-3005, 2001.
void BLAST_SetupPartialFetching(EBlastProgramType program_number, BlastSeqSrc *seq_src, const BlastHSPList **hsp_list, Int4 num_hsplists)
Attempts to set up partial fetching, if it fails (e.g.
ncbi::TMaskedQueryRegions mask
static int s_DoSegSequenceData(BlastCompo_SequenceData *seqData, EBlastProgramType program_name, Boolean *is_seq_biased)
Filter low complexity regions from the sequence data; uses the SEG algorithm.
Definition: blast_kappa.c:1414
double localScalingFactor
the amount by which this search has been scaled
Definition: blast_kappa.c:1708
char * name
name of scoring matrix.
Definition: blast_stat.h:183
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
Definition: blast_util.c:245
An instance of Blast_ForbiddenRanges is used by the Smith-Waterman algorithm to represent ranges in t...
Private interface for Position Iterated BLAST API, contains the PSSM generation engine.
Blast_CompositionWorkspace * Blast_CompositionWorkspaceNew(void)
Create a new Blast_CompositionWorkspace object, allocating memory for all its component arrays...
static int s_RecordInitialSearch(BlastKappa_SavedParameters *searchParams, BlastScoreBlk *sbp, const BlastScoringParameters *scoring, int query_length, ECompoAdjustModes compo_adjust_mode, Boolean positionBased)
Record the initial value of the search parameters that are to be adjusted.
Definition: blast_kappa.c:2046
SBlastTargetTranslation * BlastTargetTranslationFree(SBlastTargetTranslation *target_t)
Free SBlastTargetTranslation.
Definition: blast_util.c:1246
Int4 end
End of hsp.
Definition: blast_hits.h:99
Uint1 * state_array
array to be used.
Definition: gapinfo.h:84
Int4 query_length
Length of this query, strand or frame.
double score_avg
average score, must be negative for local alignment.
Definition: blast_stat.h:133
The structure to contain all BLAST results, for multiple queries.
Definition: blast_hits.h:183
int BlastCompo_HeapInsert(BlastCompo_Heap *self, void *alignments, double eValue, int score, int subject_index, void **discardedAligns)
Try to insert a collection of alignments into a heap.
Definition: compo_heap.c:330
int x_dropoff
for x-drop algorithms, once a path falls below the best score by this (positive) amount, the path is no longer searched
Int4 num_ident
Number of identical base pairs in this HSP.
Definition: blast_hits.h:128
char * matrix
Name of the matrix containing all scores: needed for finding neighboring words.
Int4 compositionBasedStats
mode of compositional adjustment to use; if zero then compositional adjustment is not used ...
Uint1 * sequence_nomask
Start of query sequence without masking.
Definition: blast_def.h:256
const BlastScoringParameters * scoringParams
scoring parameters for a gapped alignment
Definition: blast_kappa.c:1703
GapPrelimEditBlock * fwd_prelim_tback
traceback from right extensions
double inclusion_ethresh
Minimum evalue for inclusion in PSSM calculation.
static int s_FindNumIdentical(Uint1 *query_seq, const Uint8 *query_hashes, int query_len, Uint1 *subject_seq, int subject_len, int max_shift)
Find a local number of identical residues in two aligned sequences by finding word matches and doing ...
Definition: blast_kappa.c:1131
Int4 gap_x_dropoff
X-dropoff parameter to use.
const int kBlastHSPStream_Eof
Return value when the end of the stream is reached (applicable to read method only) ...
int Blast_CompositionWorkspaceInit(Blast_CompositionWorkspace *NRrecord, const char *matrixName)
Initialize the fields of a Blast_CompositionWorkspace for a specific underlying scoring matrix...
Boolean is_valid
Determine if this context is valid or not.
The context related information.
void Nlm_Int4MatrixFree(int ***mat)
Free a matrix created by Nlm_DenseMatrixNew or Nlm_LtriangMatrixNew.
Boolean round_down
Score must be rounded down to nearest even score if odd.
Definition: blast_stat.h:221
Uint4 nrows
number of rows (alphabet_size)
static BlastGapAlignStruct * s_BlastGapAlignStruct_Copy(BlastGapAlignStruct *orig, BlastScoreBlk *sbp)
Create a "deep" copy of a BlastGapAlignStruct structure.
Definition: blast_kappa.c:2596
static BlastCompo_Alignment * s_NewAlignmentFromGapAlign(BlastGapAlignStruct *gap_align, GapEditScript **edit_script, BlastCompo_SequenceRange *query_range, BlastCompo_SequenceRange *subject_range, EMatrixAdjustRule matrix_adjust_rule)
Reads a BlastGapAlignStruct that has been used to compute a traceback, and return a BlastCompo_Alignm...
Definition: blast_kappa.c:1734
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1153
#define SCALING_FACTOR
SCALING_FACTOR is a multiplicative factor used to get more bits of precision in the integer matrix sc...
Definition: blast_kappa.c:664
double lambda(size_t dimMatrix_, const Int4 *const *scoreMatrix_, const double *q_)
int strcmp(const char *str1, const char *str2)
Definition: odbc_utils.hpp:160
Uint4 max_length
Length of the longest among the concatenated queries.
int Blast_ForbiddenRangesInitialize(Blast_ForbiddenRanges *self, int capacity)
Initialize a new, empty Blast_ForbiddenRanges.
Int4 score
Return value: alignment score.
Int4 num_queries
Number of query sequences.
Definition: blast_hits.h:184
A parameter block for the Blast_RedoOneMatch and Blast_RedoOneMatchSmithWaterman routines.
double ** freq_ratios
frequency ratios
double bit_score
Bit score, calculated from score.
Definition: blast_hits.h:129
Kappa_compactSearchItems * Kappa_compactSearchItemsNew(const Uint1 *query, unsigned int queryLength, BlastScoreBlk *sbp)
Creates a new Kappa_compactSearchItems structure.
Definition: blast_posit.c:101
Blast_ScoreFreq * Blast_ScoreFreqNew(Int4 score_min, Int4 score_max)
Creates a new structure to keep track of score frequencies for a scoring system.
Definition: blast_stat.c:2112
struct GapStateArrayStruct * next
Next link in the list.
Definition: gapinfo.h:85
Boolean complexity_adjusted_scoring
Use cross_match-like complexity adjustment on raw scores.
Definition: blast_stat.h:195
Int2 Blast_KarlinBlkCopy(Blast_KarlinBlk *kbp_to, Blast_KarlinBlk *kbp_from)
Copies contents of one Karlin block to another.
Definition: blast_stat.c:2870
const double kPosEpsilon
minimum return value of s_computeRelativeEntropy
Uint1 * sequence
Sequence used for search (could be translation).
Definition: blast_def.h:243
BlastCompo_Alignment * BlastCompo_AlignmentNew(int score, EMatrixAdjustRule whichRule, int queryIndex, int queryStart, int queryEnd, int matchStart, int matchEnd, int frame, void *context)
Create a new BlastCompo_Alignment; parameters to this function correspond directly to fields of Blast...
Uint1 * ambiguous_res
Array of ambiguous res.
Definition: blast_stat.h:218
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:92
EGapAlignOpType * op_type
Array of type of operation.
Definition: gapinfo.h:58
double * sprob0
arrays for frequency of given score
Definition: blast_stat.h:134
double K
K value used in statistics.
Definition: blast_stat.h:68
Int4 query_start
query start offset of current alignment
Information about a amino-acid substitution matrix.
BlastHitList * Blast_HitListNew(Int4 hitlist_size)
Allocate memory for a hit list of a given size.
Definition: blast_hits.c:3009
Computed values used as parameters for gapped alignments.
Structure to hold a sequence.
Definition: blast_def.h:242
The structure to contain all BLAST results for one query sequence.
Definition: blast_hits.h:169
static void s_FreeBlastCompo_QueryInfoArray(BlastCompo_QueryInfo **query_info, int num_queries)
Definition: blast_kappa.c:2265
Int4 gap_extend
Penalty for each gap residue (scaled version)
int BlastCompo_HeapWouldInsert(BlastCompo_Heap *self, double eValue, int score, int subject_index)
Return true if self may insert a match that had the given eValue, score and subject_index.
Definition: compo_heap.c:252
yy_size_t n
static int s_ExtendRight(Uint1 *query_seq, int query_len, Uint1 *subject_seq, int subject_len, int max_shift, int *query_ext_len, int *subject_ext_len, int *align_len)
Do a simple gapped extension to the right from the beginning of query and subject ranges examining on...
Definition: blast_kappa.c:932
Structures and functions prototypes used for BLAST gapped extension.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
int ** data
actual scoring matrix data, stored in row-major form
Definition: blast_stat.h:140
Int4 length
length of the state_array.
Definition: gapinfo.h:82
Int4 first_context
Index of the first element of the context array.
static double s_CalcLambda(double probs[], int min_score, int max_score, double lambda0)
A callback routine: compute lambda for the given score probabilities.
Definition: blast_kappa.c:539
Blast_KarlinBlk * kbp
Karlin-Altschul block associated with this PSSM.
Definition: blast_stat.h:153
static void s_MatchingSequenceRelease(BlastCompo_MatchingSequence *self)
Release the resources associated with a matching sequence.
Definition: blast_kappa.c:896
#define PSI_INCLUSION_ETHRESH
Defaults for PSI-BLAST and DELTA-BLAST options.
Port of posit.h structures and impalaScaling for implementing composition based statistics for PSI-BL...
EGapAlignOpType op_type
Type of operation.
Definition: gapinfo.h:66
Blast_KarlinBlk ** kbp
Karlin-Altschul parameters.
Definition: blast_stat.h:207
static void s_ClearHeap(BlastCompo_Heap *self)
Remove all matches from a BlastCompo_Heap.
Definition: blast_kappa.c:2504
EBlastProgramType program_number
indicates blastn, blastp, etc.
double ** freq_ratios
PSSM's frequency ratios, dimensions are specified in pssm data above.
Definition: blast_stat.h:151
Uint1 * data
amino acid or nucleotide data
static Blast_RedoAlignParams * s_GetAlignParams(BlastKappa_GappingParamsContext *context, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *queryInfo, const BlastHitSavingParameters *hitParams, const BlastExtensionParameters *extendParams)
Read the parameters required for the Blast_RedoOneMatch* functions from the corresponding parameters ...
Definition: blast_kappa.c:2393
static int s_ExtendLeft(Uint1 *query_seq, int query_len, Uint1 *subject_seq, int subject_len, int max_shift, int *query_ext_len, int *subject_ext_len, int *align_len)
Extend left from the end of the sequence and subject ranges and count identities. ...
Definition: blast_kappa.c:1027
int length
the length of data.
A BlastCompo_Heap represents a collection of alignments between one query sequence and several matchi...
Definition: compo_heap.h:82
EBlastEncoding encoding
Encoding of sequence, i.e.
Definition: blast_seqsrc.h:259
Definition: testodbc.c:30
Int2 alphabet_start
numerical value of 1st letter.
Definition: blast_stat.h:182
BlastHSPResults * Blast_HSPResultsFree(BlastHSPResults *results)
Deallocate memory for BLAST results.
Definition: blast_hits.c:3251
GapEditScript * edit_script
The traceback (gap) information.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
Blast_MatrixInfo * Blast_MatrixInfoNew(int rows, int cols, int positionBased)
Create a Blast_MatrixInfo object.
Definitions used to redo a set of alignments, using either composition matrix adjustment or the Smith...
#define BLASTAA_SIZE
Size of aminoacid alphabet.
Int2 Blast_RedoAlignmentCore(EBlastProgramType program_number, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *queryInfo, BlastScoreBlk *sbp, BLAST_SequenceBlk *subjectBlk, const BlastSeqSrc *seqSrc, Int4 default_db_genetic_code, BlastHSPList *thisMatch, BlastHSPStream *hsp_stream, BlastScoringParameters *scoringParams, const BlastExtensionParameters *extendParams, const BlastHitSavingParameters *hitParams, const PSIBlastOptions *psiOptions, BlastHSPResults *results)
Recompute alignments for each match found by the gapped BLAST algorithm.
Definition: blast_kappa.c:2934
Headers for computing a "composition" p-value of a match, and for computing a unified p-value combini...
BlastGapAlignStruct * BLAST_GapAlignStructFree(BlastGapAlignStruct *gap_align)
Deallocates memory in the BlastGapAlignStruct structure.
The query related information.
Int2 BlastSetUp_Filter(EBlastProgramType program_number, Uint1 *sequence, Int4 length, Int4 offset, const SBlastFilterOptions *filter_options, BlastSeqLoc **seqloc_retval, Blast_Message **blast_message)
Runs seg filtering functions, according to the filtering options, returns BlastSeqLoc*.
All auxiliary memory needed for the greedy extension algorithm.
Definition: greedy_align.h:89
static int s_SequenceGetProteinRange(const BlastCompo_MatchingSequence *self, const BlastCompo_SequenceRange *range, BlastCompo_SequenceData *seqData, const BlastCompo_SequenceRange *q_range, BlastCompo_SequenceData *queryData, const Uint8 *query_words, const BlastCompo_Alignment *align, const Boolean shouldTestIdentical, const ECompoAdjustModes compo_adjust_mode, const Boolean isSmithWaterman, Boolean *subject_maybe_biased)
Get a string of protein data from a protein sequence.
Definition: blast_kappa.c:1559
static BlastKappa_SavedParameters * s_SavedParametersNew(Int4 rows, Int4 numQueries, ECompoAdjustModes compo_adjust_mode, Boolean positionBased)
Create a new instance of BlastKappa_SavedParameters.
Definition: blast_kappa.c:1995
Boolean Blast_QueryIsPssm(EBlastProgramType p)
Returns true if the query is PSSM.
Definition: blast_program.c:46
static void s_BlastScoreBlk_Free(BlastScoreBlk **copy)
Free a BlastScoreBlk copy created by s_BlastScoreBlk_Copy.
Definition: blast_kappa.c:2735
BlastExtensionOptions * options
The original (unparsed) options.
int ** posMatrix
PSSM.
Definition: blast_posit.h:58
Structure used as the second argument to functions satisfying the GetSeqBlkFnPtr signature, associated with index-based implementations of the BlastSeqSrc interface.
Definition: blast_seqsrc.h:253
double Blast_Overall_P_Value(double p_comp, double p_alignment)
This function implements the method of Fisher, R.
Int4 hspcnt
Number of HSPs saved.
Definition: blast_hits.h:158
Int2 Blast_HSPResultsReverseOrder(BlastHSPResults *results)
Reverse order of HSP lists in each hit list in the BLAST results.
Definition: blast_hits.c:3305
double BLAST_KarlinEtoP(double x)
Convert an E-value to a P-value.
Definition: blast_stat.c:4172
void Blast_RedoAlignParamsFree(Blast_RedoAlignParams **pparams)
Free a set of Blast_RedoAlignParams.
int context
integer identifier for this window, can indicate a translation frame or an index into a set of sequen...
Int4 score_max
highest allowed scores
Definition: blast_stat.h:130
int BlastCompo_EarlyTermination(double evalue, BlastCompo_Heap significantMatches[], int numQueries)
Return true if a heuristic determines that it is unlikely to be worthwhile to redo a query-subject pa...
#define strdup
Definition: ncbi_ansi_ext.h:70
static int s_MatchingSequenceInitialize(BlastCompo_MatchingSequence *self, EBlastProgramType program_number, const BlastSeqSrc *seqSrc, Int4 default_db_genetic_code, Int4 subject_index)
Initialize a new matching sequence, obtaining information about the sequence from the search...
Definition: blast_kappa.c:1345
Int4 num_ops_allocated
size of allocated array
Definition: gapinfo.h:75
static int s_HitlistEvaluateAndPurge(int *pbestScore, double *pbestEvalue, BlastHSPList *hsp_list, const BlastSeqSrc *seqSrc, int subject_length, EBlastProgramType program_number, const BlastQueryInfo *queryInfo, int context_index, BlastScoreBlk *sbp, const BlastHitSavingParameters *hitParams, double pvalueForThisPair, double LambdaRatio, int subject_id)
Adding evalues to a list of HSPs and remove those that do not have sufficiently good (low) evalue...
Definition: blast_kappa.c:387
int matchEnd
one past the end of the alignment in the subject
static int s_GetPosBasedStartFreqRatios(double **returnRatios, Int4 numPositions, Uint1 *query, const char *matrixName, double **startNumerator)
Fill a two-dimensional array with the frequency ratios that underlie a position specific score matrix...
Definition: blast_kappa.c:580
Int4 offset
Start of hsp.
Definition: blast_hits.h:98
Uint1 * GenCodeSingletonFind(Uint4 gen_code_id)
Returns the genetic code string for the requested genetic code id.
Definition: _hash_fun.h:40
void GapPrelimEditBlockReset(GapPrelimEditBlock *edit_block)
Reset a preliminary edit block without freeing it.
Definition: gapinfo.c:213
Int4 gapExtend
a penalty for each residue in the gap
Definition: blast_kappa.c:1946
Int2 BlastSeqSrcGetSequence(const BlastSeqSrc *seq_src, BlastSeqSrcGetSeqArg *getseq_arg)
Retrieve an individual sequence.
Definition: blast_seqsrc.c:271
void BlastSeqSrcReleaseSequence(const BlastSeqSrc *seq_src, BlastSeqSrcGetSeqArg *getseq_arg)
Deallocate individual sequence.
Definition: blast_seqsrc.c:289
Int4 BlastSeqSrcGetSeqLen(const BlastSeqSrc *seq_src, void *oid)
Retrieve sequence length (number of residues/bases)
Definition: blast_seqsrc.c:281
SBlastFilterOptions * SBlastFilterOptionsFree(SBlastFilterOptions *filter_options)
Frees SBlastFilterOptions and all subservient structures.
BlastSeg subject
Subject sequence info.
Definition: blast_hits.h:132
EMatrixAdjustRule matrix_adjust_rule
how the score matrix was computed
void * BlastCompo_HeapPop(BlastCompo_Heap *self)
Remove and return the element in the BlastCompo_Heap with largest (worst) evalue; ties are broken acc...
Definition: compo_heap.c:444
Definition: testodbc.c:30
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
Definition: blast_seqsrc.h:255
Structure to hold the Gumbel parameters (for FSC).
Definition: blast_stat.h:94
Int4 gap_open
Extra penalty for starting a gap (scaled version)
Options used for gapped extension These include: a.
Int4 reward
reward for match in blastn.
Definition: blast_stat.h:200
Blast_RedoAlignParams * Blast_RedoAlignParamsNew(Blast_MatrixInfo **pmatrix_info, BlastCompo_GappingParams **pgapping_params, ECompoAdjustModes compo_adjust_mode, int positionBased, int subject_is_translated, int query_is_translated, int ccat_query_length, int cutoff_s, double cutoff_e, int do_link_hsps, const Blast_RedoAlignCallbacks *callbacks, double near_identical_cutoff)
Create new Blast_RedoAlignParams object.
Int2 ambig_size
size of array above.
Definition: blast_stat.h:219
EGapAlignOpType
Operation types within the edit script.
Definition: gapinfo.h:44
Blast_KarlinBlk ** kbp_gap_std
K-A parameters for std (not position-based) alignments.
Definition: blast_stat.h:212
double Lambda
Lambda value used in statistics.
Definition: blast_stat.h:67
SPsiBlastScoreMatrix * SPsiBlastScoreMatrixNew(size_t ncols)
Allocates a new SPsiBlastScoreMatrix structure of dimensions ncols by BLASTAA_SIZE.
Definition: blast_stat.c:805
signed short Int2
Alias for signed short.
Definition: ncbitype.h:118
double * freqs
array of assumed matrix background frequencies -RMH-
Definition: blast_stat.h:144
BlastGapAlignStruct * gap_align
additional parameters for a gapped alignment
Definition: blast_kappa.c:1705
static void s_ComputeNumIdentities(const BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, BLAST_SequenceBlk *subject_blk, const BlastSeqSrc *seq_src, BlastHSPList *hsp_list, const BlastScoringOptions *scoring_options, const Uint1 *gen_code_string, const BlastScoreBlk *sbp)
Compute the number of identities for the HSPs in the hsp_list.
Definition: blast_kappa.c:451
Blast_AminoAcidComposition composition
the composition of the query
Int2 Blast_HSPGetNumIdentitiesAndPositives(const Uint1 *query, const Uint1 *subject, BlastHSP *hsp, const BlastScoringOptions *score_options, Int4 *align_length_ptr, const BlastScoreBlk *sbp)
Calculate number of identities and positives in an HSP and set the BlastHSP::num_ident and BlastHSP::...
Definition: blast_hits.c:941
Int4 length
Length of sequence.
Definition: blast_def.h:246
static void s_HSPListNormalizeScores(BlastHSPList *hsp_list, double lambda, double logK, double scoreDivisor)
Given a list of HSPs with (possibly) high-precision scores, rescale the scores to have standard preci...
Definition: blast_kappa.c:94
Int4 number_of_contexts
Used by sfp and kbp, how large are these.
Definition: blast_stat.h:217
Int4 query_index
Index of the query which this HSPList corresponds to.
Definition: blast_hits.h:155
int _PSIConvertFreqRatiosToPSSM(_PSIInternalPssmData *internal_pssm, const Uint1 *query, const BlastScoreBlk *sbp, const double *std_probs)
Converts the PSSM's frequency ratios obtained in the previous stage to a PSSM of scores.
#define NCBIMATH_LN2
Natural log(2)
Definition: ncbi_math.h:161
BlastHSPResults * Blast_HSPResultsNew(Int4 num_queries)
Initialize the results structure.
Definition: blast_hits.c:3231
SFreqRatios * _PSIMatrixFrequencyRatiosFree(SFreqRatios *freq_ratios)
Deallocate the frequency ratios structure.
_PSIInternalPssmData * _PSIInternalPssmDataFree(_PSIInternalPssmData *pssm_data)
Deallocates the _PSIInternalPssmData structure.
static BlastCompo_QueryInfo * s_GetQueryInfo(Uint1 *query_data, const BlastQueryInfo *blast_query_info, Boolean skip)
Save information about all queries in an array of objects of type BlastCompo_QueryInfo.
Definition: blast_kappa.c:2295
Modified on Tue Jul 25 19:55:56 2017 by modify_doxy.py rev. 533848