00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #ifndef SKIP_DOXYGEN_PROCESSING
00036 static char const rcsid[] =
00037 "$Id: blast_setup.c 134303 2008-07-17 17:42:49Z camacho $";
00038 #endif
00039
00040 #include <algo/blast/core/blast_setup.h>
00041 #include <algo/blast/core/blast_util.h>
00042 #include <algo/blast/core/blast_filter.h>
00043
00044
00045 Int2
00046 Blast_ScoreBlkKbpGappedCalc(BlastScoreBlk * sbp,
00047 const BlastScoringOptions * scoring_options,
00048 EBlastProgramType program,
00049 const BlastQueryInfo * query_info,
00050 Blast_Message** error_return)
00051 {
00052 Int4 index = 0;
00053
00054 if (sbp == NULL || scoring_options == NULL) {
00055 Blast_PerrorWithLocation(error_return, BLASTERR_INVALIDPARAM, -1);
00056 return 1;
00057 }
00058
00059
00060
00061
00062 for (index = query_info->first_context;
00063 index <= query_info->last_context; index++) {
00064 Int2 retval = 0;
00065
00066 if ( !query_info->contexts[index].is_valid ) {
00067 continue;
00068 }
00069
00070 sbp->kbp_gap_std[index] = Blast_KarlinBlkNew();
00071
00072
00073 if (program == eBlastTypeBlastn) {
00074 retval =
00075 Blast_KarlinBlkNuclGappedCalc(sbp->kbp_gap_std[index],
00076 scoring_options->gap_open, scoring_options->gap_extend,
00077 scoring_options->reward, scoring_options->penalty,
00078 sbp->kbp_std[index], &(sbp->round_down), error_return);
00079 } else {
00080 retval =
00081 Blast_KarlinBlkGappedCalc(sbp->kbp_gap_std[index],
00082 scoring_options->gap_open, scoring_options->gap_extend,
00083 sbp->name, error_return);
00084 }
00085 if (retval) {
00086 return retval;
00087 }
00088
00089
00090
00091 if (program != eBlastTypeBlastn) {
00092 sbp->kbp_gap_psi[index] = Blast_KarlinBlkNew();
00093 Blast_KarlinBlkCopy(sbp->kbp_gap_psi[index],
00094 sbp->kbp_gap_std[index]);
00095 }
00096 }
00097
00098
00099 sbp->kbp_gap = Blast_QueryIsPssm(program) ?
00100 sbp->kbp_gap_psi : sbp->kbp_gap_std;
00101
00102 return 0;
00103 }
00104
00105
00106
00107
00108
00109
00110
00111 static Int2
00112 s_PHIScoreBlkFill(BlastScoreBlk* sbp, const BlastScoringOptions* options,
00113 Blast_Message** blast_message, GET_MATRIX_PATH get_path)
00114 {
00115 Blast_KarlinBlk* kbp;
00116 char buffer[1024];
00117 Int2 status = 0;
00118
00119 sbp->read_in_matrix = TRUE;
00120 if ((status = Blast_ScoreBlkMatrixFill(sbp, get_path)) != 0)
00121 return status;
00122 kbp = sbp->kbp_gap_std[0] = Blast_KarlinBlkNew();
00123
00124 sbp->kbp_gap = sbp->kbp_gap_std;
00125
00126
00127
00128 kbp->H = 1.0;
00129
00130
00131
00132 sbp->sfp[0] = Blast_ScoreFreqNew(sbp->loscore, sbp->hiscore);
00133
00134
00135 status = Blast_ScoreBlkKbpIdealCalc(sbp);
00136 if (status)
00137 return status;
00138
00139 if (0 == strcmp("BLOSUM62", options->matrix)) {
00140 kbp->paramC = 0.50;
00141 if ((11 == options->gap_open) && (1 == options->gap_extend)) {
00142 kbp->Lambda = 0.270;
00143 kbp->K = 0.047;
00144 } else if ((9 == options->gap_open) && (2 == options->gap_extend)) {
00145 kbp->Lambda = 0.285;
00146 kbp->K = 0.075;
00147 } else if ((8 == options->gap_open) && (2 == options->gap_extend)) {
00148 kbp->Lambda = 0.265;
00149 kbp->K = 0.046;
00150 } else if ((7 == options->gap_open) && (2 == options->gap_extend)) {
00151 kbp->Lambda = 0.243;
00152 kbp->K = 0.032;
00153 } else if ((12 == options->gap_open) && (1 == options->gap_extend)) {
00154 kbp->Lambda = 0.281;
00155 kbp->K = 0.057;
00156 } else if ((10 == options->gap_open) && (1 == options->gap_extend)) {
00157 kbp->Lambda = 0.250;
00158 kbp->K = 0.033;
00159 } else {
00160 status = -1;
00161 }
00162 } else if (0 == strcmp("PAM30", options->matrix)) {
00163 kbp->paramC = 0.30;
00164 if ((9 == options->gap_open) && (1 == options->gap_extend)) {
00165 kbp->Lambda = 0.295;
00166 kbp->K = 0.13;
00167 } else if ((7 == options->gap_open) && (2 == options->gap_extend)) {
00168 kbp->Lambda = 0.306;
00169 kbp->K = 0.15;
00170 return status;
00171 } else if ((6 == options->gap_open) && (2 == options->gap_extend)) {
00172 kbp->Lambda = 0.292;
00173 kbp->K = 0.13;
00174 } else if ((5 == options->gap_open) && (2 == options->gap_extend)) {
00175 kbp->Lambda = 0.263;
00176 kbp->K = 0.077;
00177 } else if ((10 == options->gap_open) && (1 == options->gap_extend)) {
00178 kbp->Lambda = 0.309;
00179 kbp->K = 0.15;
00180 } else if ((8 == options->gap_open) && (1 == options->gap_extend)) {
00181 kbp->Lambda = 0.270;
00182 kbp->K = 0.070;
00183 return status;
00184 } else {
00185 status = -1;
00186 }
00187 } else if (0 == strcmp("PAM70", options->matrix)) {
00188 kbp->paramC = 0.35;
00189 if ((10 == options->gap_open) && (1 == options->gap_extend)) {
00190 kbp->Lambda = 0.291;
00191 kbp->K = 0.089;
00192 } else if ((8 == options->gap_open) && (2 == options->gap_extend)) {
00193 kbp->Lambda = 0.303;
00194 kbp->K = 0.13;
00195 } else if ((7 == options->gap_open) && (2 == options->gap_extend)) {
00196 kbp->Lambda = 0.287;
00197 kbp->K = 0.095;
00198 } else if ((6 == options->gap_open) && (2 == options->gap_extend)) {
00199 kbp->Lambda = 0.269;
00200 kbp->K = 0.079;
00201 } else if ((11 == options->gap_open) && (1 == options->gap_extend)) {
00202 kbp->Lambda = 0.307;
00203 kbp->K = 0.13;
00204 } else if ((9 == options->gap_open) && (1 == options->gap_extend)) {
00205 kbp->Lambda = 0.269;
00206 kbp->K = 0.058;
00207 } else {
00208 status = -1;
00209 }
00210 } else if (0 == strcmp("BLOSUM80", options->matrix)) {
00211 kbp->paramC = 0.40;
00212 if ((10 == options->gap_open) && (1 == options->gap_extend)) {
00213 kbp->Lambda = 0.300;
00214 kbp->K = 0.072;
00215 } else if ((8 == options->gap_open) && (2 == options->gap_extend)) {
00216 kbp->Lambda = 0.308;
00217 kbp->K = 0.089;
00218 } else if ((7 == options->gap_open) && (2 == options->gap_extend)) {
00219 kbp->Lambda = 0.295;
00220 kbp->K = 0.077;
00221 } else if ((6 == options->gap_open) && (2 == options->gap_extend)) {
00222 kbp->Lambda = 0.271;
00223 kbp->K = 0.051;
00224 } else if ((11 == options->gap_open) && (1 == options->gap_extend)) {
00225 kbp->Lambda = 0.314;
00226 kbp->K = 0.096;
00227 return status;
00228 } else if ((9 == options->gap_open) && (1 == options->gap_extend)) {
00229 kbp->Lambda = 0.277;
00230 kbp->K = 0.046;
00231 } else {
00232 status = -1;
00233 }
00234 } else if (0 == strcmp("BLOSUM45", options->matrix)) {
00235 kbp->paramC = 0.60;
00236 if ((14 == options->gap_open) && (2 == options->gap_extend)) {
00237 kbp->Lambda = 0.199;
00238 kbp->K = 0.040;
00239 } else if ((13 == options->gap_open) && (3 == options->gap_extend)) {
00240 kbp->Lambda = 0.209;
00241 kbp->K = 0.057;
00242 } else if ((12 == options->gap_open) && (3 == options->gap_extend)) {
00243 kbp->Lambda = 0.203;
00244 kbp->K = 0.049;
00245 } else if ((11 == options->gap_open) && (3 == options->gap_extend)) {
00246 kbp->Lambda = 0.193;
00247 kbp->K = 0.037;
00248 } else if ((10 == options->gap_open) && (3 == options->gap_extend)) {
00249 kbp->Lambda = 0.182;
00250 kbp->K = 0.029;
00251 } else if ((15 == options->gap_open) && (2 == options->gap_extend)) {
00252 kbp->Lambda = 0.206;
00253 kbp->K = 0.049;
00254 } else if ((13 == options->gap_open) && (2 == options->gap_extend)) {
00255 kbp->Lambda = 0.190;
00256 kbp->K = 0.032;
00257 } else if ((12 == options->gap_open) && (2 == options->gap_extend)) {
00258 kbp->Lambda = 0.177;
00259 kbp->K = 0.023;
00260 } else if ((19 == options->gap_open) && (1 == options->gap_extend)) {
00261 kbp->Lambda = 0.209;
00262 kbp->K = 0.049;
00263 } else if ((18 == options->gap_open) && (1 == options->gap_extend)) {
00264 kbp->Lambda = 0.202;
00265 kbp->K = 0.041;
00266 } else if ((17 == options->gap_open) && (1 == options->gap_extend)) {
00267 kbp->Lambda = 0.195;
00268 kbp->K = 0.034;
00269 } else if ((16 == options->gap_open) && (1 == options->gap_extend)) {
00270 kbp->Lambda = 0.183;
00271 kbp->K = 0.024;
00272 } else {
00273 status = -1;
00274 }
00275 } else {
00276 status = -2;
00277 }
00278
00279 if (status == -1) {
00280 sprintf(buffer, "The combination %d for gap opening cost and %d for "
00281 "gap extension is not supported in PHI-BLAST with matrix %s\n",
00282 options->gap_open, options->gap_extend, options->matrix);
00283 } else if (status == -2) {
00284 sprintf(buffer, "Matrix %s not allowed in PHI-BLAST\n", options->matrix);
00285 }
00286 if (status)
00287 Blast_MessageWrite(blast_message, eBlastSevWarning, kBlastMessageNoContext, buffer);
00288 else {
00289
00290 sbp->kbp_std[0] = (Blast_KarlinBlk*)
00291 BlastMemDup(sbp->kbp_gap_std[0], sizeof(Blast_KarlinBlk));
00292 sbp->kbp = sbp->kbp_std;
00293 }
00294
00295 return status;
00296 }
00297
00298 Int2
00299 Blast_ScoreBlkMatrixInit(EBlastProgramType program_number,
00300 const BlastScoringOptions* scoring_options,
00301 BlastScoreBlk* sbp,
00302 GET_MATRIX_PATH get_path)
00303 {
00304 Int2 status = 0;
00305
00306 if ( !sbp || !scoring_options ) {
00307 return 1;
00308 }
00309
00310 if (program_number == eBlastTypeBlastn) {
00311
00312 BLAST_ScoreSetAmbigRes(sbp, 'N');
00313 sbp->penalty = scoring_options->penalty;
00314 sbp->reward = scoring_options->reward;
00315 if (scoring_options->matrix && *scoring_options->matrix != NULLB) {
00316
00317 sbp->read_in_matrix = TRUE;
00318 sbp->name = strdup(scoring_options->matrix);
00319
00320 } else {
00321 char buffer[50];
00322 sbp->read_in_matrix = FALSE;
00323 sprintf(buffer, "blastn matrix:%ld %ld",
00324 (long) sbp->reward, (long) sbp->penalty);
00325 sbp->name = strdup(buffer);
00326 }
00327
00328 } else {
00329 sbp->read_in_matrix = TRUE;
00330 BLAST_ScoreSetAmbigRes(sbp, 'X');
00331 sbp->name = BLAST_StrToUpper(scoring_options->matrix);
00332 }
00333 status = Blast_ScoreBlkMatrixFill(sbp, get_path);
00334 if (status) {
00335 return status;
00336 }
00337
00338 return status;
00339 }
00340
00341 Int2
00342 BlastSetup_ScoreBlkInit(BLAST_SequenceBlk* query_blk,
00343 const BlastQueryInfo* query_info,
00344 const BlastScoringOptions* scoring_options,
00345 EBlastProgramType program_number,
00346 BlastScoreBlk* *sbpp,
00347 double scale_factor,
00348 Blast_Message* *blast_message,
00349 GET_MATRIX_PATH get_path)
00350 {
00351 BlastScoreBlk* sbp;
00352 Int2 status=0;
00353 ASSERT(blast_message);
00354
00355 if (sbpp == NULL)
00356 return 1;
00357
00358 if (program_number == eBlastTypeBlastn)
00359 sbp = BlastScoreBlkNew(BLASTNA_SEQ_CODE, query_info->last_context + 1);
00360 else
00361 sbp = BlastScoreBlkNew(BLASTAA_SEQ_CODE, query_info->last_context + 1);
00362
00363 if (!sbp) {
00364 Blast_PerrorWithLocation(blast_message, BLASTERR_MEMORY, -1);
00365 return 1;
00366 }
00367
00368 *sbpp = sbp;
00369 sbp->scale_factor = scale_factor;
00370
00371 status = Blast_ScoreBlkMatrixInit(program_number, scoring_options, sbp, get_path);
00372 if (status) {
00373 Blast_Perror(blast_message, status, -1);
00374 return status;
00375 }
00376
00377
00378 if (Blast_ProgramIsPhiBlast(program_number)) {
00379 status = s_PHIScoreBlkFill(sbp, scoring_options, blast_message, get_path);
00380 } else {
00381 status = Blast_ScoreBlkKbpUngappedCalc(program_number, sbp, query_blk->sequence,
00382 query_info, blast_message);
00383
00384 if (scoring_options->gapped_calculation) {
00385 status =
00386 Blast_ScoreBlkKbpGappedCalc(sbp, scoring_options, program_number,
00387 query_info, blast_message);
00388 } else {
00389 ASSERT(sbp->kbp_gap == NULL);
00390 }
00391 }
00392
00393 return status;
00394 }
00395
00396 Int2
00397 BlastSetup_Validate(const BlastQueryInfo* query_info,
00398 const BlastScoreBlk* score_blk)
00399 {
00400 int index;
00401 Boolean valid_context_found = FALSE;
00402 ASSERT(query_info);
00403
00404 for (index = query_info->first_context;
00405 index <= query_info->last_context;
00406 index++) {
00407 if (query_info->contexts[index].is_valid) {
00408 valid_context_found = TRUE;
00409 } else if (score_blk) {
00410 ASSERT(score_blk->kbp[index] == NULL);
00411 ASSERT(score_blk->sfp[index] == NULL);
00412 if (score_blk->kbp_gap) {
00413 ASSERT(score_blk->kbp_gap[index] == NULL);
00414 }
00415 }
00416 }
00417
00418 if (valid_context_found) {
00419 return 0;
00420 } else {
00421 return 1;
00422 }
00423 }
00424
00425 Int2 BLAST_MainSetUp(EBlastProgramType program_number,
00426 const QuerySetUpOptions *qsup_options,
00427 const BlastScoringOptions *scoring_options,
00428 BLAST_SequenceBlk *query_blk,
00429 const BlastQueryInfo *query_info,
00430 double scale_factor,
00431 BlastSeqLoc **lookup_segments,
00432 BlastMaskLoc **mask,
00433 BlastScoreBlk **sbpp,
00434 Blast_Message **blast_message,
00435 GET_MATRIX_PATH get_path)
00436 {
00437 Boolean mask_at_hash = FALSE;
00438 Int2 status = 0;
00439 BlastMaskLoc *filter_maskloc = NULL;
00440
00441 SBlastFilterOptions* filter_options = qsup_options->filtering_options;
00442 Boolean filter_options_allocated = FALSE;
00443
00444 ASSERT(blast_message);
00445
00446 if (mask)
00447 *mask = NULL;
00448
00449 if (filter_options == NULL && qsup_options->filter_string)
00450 {
00451 status = BlastFilteringOptionsFromString(program_number,
00452 qsup_options->filter_string,
00453 &filter_options,
00454 blast_message);
00455 if (status) {
00456 filter_options = SBlastFilterOptionsFree(filter_options);
00457 return status;
00458 }
00459 filter_options_allocated = TRUE;
00460 }
00461 ASSERT(filter_options);
00462
00463 status = BlastSetUp_GetFilteringLocations(query_blk,
00464 query_info,
00465 program_number,
00466 filter_options,
00467 & filter_maskloc,
00468 blast_message);
00469
00470 if (status) {
00471 if (filter_options_allocated)
00472 filter_options = SBlastFilterOptionsFree(filter_options);
00473 return status;
00474 }
00475
00476 mask_at_hash = SBlastFilterOptionsMaskAtHash(filter_options);
00477
00478 if (filter_options_allocated) {
00479 filter_options = SBlastFilterOptionsFree(filter_options);
00480 }
00481
00482
00483 if (!mask_at_hash) {
00484 BlastSetUp_MaskQuery(query_blk, query_info, filter_maskloc,
00485 program_number);
00486 }
00487
00488 if (program_number == eBlastTypeBlastx && scoring_options->is_ooframe) {
00489 BLAST_CreateMixedFrameDNATranslation(query_blk, query_info);
00490 }
00491
00492
00493
00494
00495
00496 if (lookup_segments) {
00497 BLAST_ComplementMaskLocations(program_number, query_info,
00498 filter_maskloc, lookup_segments);
00499 }
00500
00501 if (mask)
00502 {
00503 if (Blast_QueryIsTranslated(program_number)) {
00504
00505
00506 BlastMaskLocProteinToDNA(filter_maskloc, query_info);
00507 }
00508 *mask = filter_maskloc;
00509 filter_maskloc = NULL;
00510 }
00511 else
00512 filter_maskloc = BlastMaskLocFree(filter_maskloc);
00513
00514 status = BlastSetup_ScoreBlkInit(query_blk, query_info, scoring_options,
00515 program_number, sbpp, scale_factor,
00516 blast_message, get_path);
00517 if (status) {
00518 return status;
00519 }
00520
00521 if ( (status = BlastSetup_Validate(query_info, *sbpp) != 0)) {
00522 if (*blast_message == NULL) {
00523 Blast_Perror(blast_message, BLASTERR_INVALIDQUERIES, -1);
00524 }
00525 return 1;
00526 }
00527
00528 return status;
00529 }
00530
00531
00532
00533
00534
00535
00536
00537
00538 static Int8 s_GetEffectiveSearchSpaceForContext(
00539 const BlastEffectiveLengthsOptions* eff_len_options,
00540 int context_index, Blast_Message **blast_message)
00541 {
00542 Int8 retval = 0;
00543
00544 if (eff_len_options->num_searchspaces == 0) {
00545 retval = 0;
00546 } else if (eff_len_options->num_searchspaces == 1) {
00547 if (context_index != 0) {
00548 Blast_MessageWrite(blast_message, eBlastSevWarning, context_index,
00549 "One search space is being used for multiple sequences");
00550 }
00551 retval = eff_len_options->searchsp_eff[0];
00552 } else if (eff_len_options->num_searchspaces > 1) {
00553 ASSERT(context_index < eff_len_options->num_searchspaces);
00554 retval = eff_len_options->searchsp_eff[context_index];
00555 } else {
00556 abort();
00557 }
00558 return retval;
00559 }
00560
00561 Int2 BLAST_CalcEffLengths (EBlastProgramType program_number,
00562 const BlastScoringOptions* scoring_options,
00563 const BlastEffectiveLengthsParameters* eff_len_params,
00564 const BlastScoreBlk* sbp, BlastQueryInfo* query_info,
00565 Blast_Message * *blast_message)
00566 {
00567 double alpha=0, beta=0;
00568 Int4 index;
00569 Int4 db_num_seqs;
00570 Int8 db_length;
00571 Blast_KarlinBlk* *kbp_ptr;
00572 const BlastEffectiveLengthsOptions* eff_len_options = eff_len_params->options;
00573
00574 if (!query_info || !sbp)
00575 return -1;
00576
00577
00578
00579
00580 if (eff_len_options->db_length > 0)
00581 db_length = eff_len_options->db_length;
00582 else
00583 db_length = eff_len_params->real_db_length;
00584
00585
00586
00587
00588
00589
00590
00591 if (db_length == 0 &&
00592 !BlastEffectiveLengthsOptions_IsSearchSpaceSet(eff_len_options)) {
00593 return 0;
00594 }
00595
00596 if (Blast_SubjectIsTranslated(program_number))
00597 db_length = db_length/3;
00598
00599 if (eff_len_options->dbseq_num > 0)
00600 db_num_seqs = eff_len_options->dbseq_num;
00601 else
00602 db_num_seqs = eff_len_params->real_num_seqs;
00603
00604
00605 if (Blast_ProgramIsPhiBlast(program_number))
00606 {
00607 for (index = query_info->first_context;
00608 index <= query_info->last_context;
00609 index++) {
00610 Int8 effective_search_space = db_length - (db_num_seqs*(query_info->contexts[index].length_adjustment));
00611 query_info->contexts[index].eff_searchsp = effective_search_space;
00612 }
00613
00614 return 0;
00615 }
00616
00617
00618
00619 kbp_ptr = (scoring_options->gapped_calculation ? sbp->kbp_gap_std : sbp->kbp);
00620
00621 for (index = query_info->first_context;
00622 index <= query_info->last_context;
00623 index++) {
00624 Blast_KarlinBlk *kbp;
00625 Int4 length_adjustment = 0;
00626 Int4 query_length;
00627
00628
00629 Int8 effective_search_space =
00630 s_GetEffectiveSearchSpaceForContext(eff_len_options, index,
00631 blast_message);
00632
00633 kbp = kbp_ptr[index];
00634
00635 if (query_info->contexts[index].is_valid &&
00636 ((query_length = query_info->contexts[index].query_length) > 0) ) {
00637
00638
00639
00640
00641
00642 if (program_number == eBlastTypeBlastn) {
00643 Blast_GetNuclAlphaBeta(scoring_options->reward,
00644 scoring_options->penalty,
00645 scoring_options->gap_open,
00646 scoring_options->gap_extend,
00647 sbp->kbp_std[index],
00648 scoring_options->gapped_calculation,
00649 &alpha, &beta);
00650 } else {
00651 BLAST_GetAlphaBeta(sbp->name, &alpha, &beta,
00652 scoring_options->gapped_calculation,
00653 scoring_options->gap_open,
00654 scoring_options->gap_extend,
00655 sbp->kbp_std[index]);
00656 }
00657 BLAST_ComputeLengthAdjustment(kbp->K, kbp->logK,
00658 alpha/kbp->Lambda, beta,
00659 query_length, db_length,
00660 db_num_seqs, &length_adjustment);
00661
00662 if (effective_search_space == 0) {
00663
00664
00665
00666
00667
00668
00669 Int8 effective_db_length = db_length;
00670
00671 if (eff_len_options->db_length == 0)
00672 effective_db_length -= (Int8)db_num_seqs * length_adjustment;
00673
00674 effective_search_space = effective_db_length *
00675 (query_length - length_adjustment);
00676 }
00677 }
00678 query_info->contexts[index].eff_searchsp = effective_search_space;
00679 query_info->contexts[index].length_adjustment = length_adjustment;
00680 }
00681 return 0;
00682 }
00683
00684 void
00685 BLAST_GetSubjectTotals(const BlastSeqSrc* seqsrc,
00686 Int8* total_length,
00687 Int4* num_seqs)
00688 {
00689 ASSERT(total_length && num_seqs);
00690
00691 *total_length = -1;
00692 *num_seqs = -1;
00693
00694 if ( !seqsrc ) {
00695 return;
00696 }
00697
00698 *total_length = BlastSeqSrcGetTotLenStats(seqsrc);
00699 if (*total_length <= 0)
00700 *total_length = BlastSeqSrcGetTotLen(seqsrc);
00701
00702 if (*total_length > 0) {
00703 *num_seqs = BlastSeqSrcGetNumSeqsStats(seqsrc);
00704 if (*num_seqs <= 0)
00705 *num_seqs = BlastSeqSrcGetNumSeqs(seqsrc);
00706 } else {
00707
00708
00709 Int4 oid = 0;
00710 if ( (*total_length = BlastSeqSrcGetSeqLen(seqsrc, (void*) &oid)) < 0) {
00711 *total_length = -1;
00712 *num_seqs = -1;
00713 return;
00714 }
00715 *num_seqs = 1;
00716 }
00717 }
00718
00719 Int2
00720 BLAST_GapAlignSetUp(EBlastProgramType program_number,
00721 const BlastSeqSrc* seq_src,
00722 const BlastScoringOptions* scoring_options,
00723 const BlastEffectiveLengthsOptions* eff_len_options,
00724 const BlastExtensionOptions* ext_options,
00725 const BlastHitSavingOptions* hit_options,
00726 BlastQueryInfo* query_info,
00727 BlastScoreBlk* sbp,
00728 BlastScoringParameters** score_params,
00729 BlastExtensionParameters** ext_params,
00730 BlastHitSavingParameters** hit_params,
00731 BlastEffectiveLengthsParameters** eff_len_params,
00732 BlastGapAlignStruct** gap_align)
00733 {
00734 Int2 status = 0;
00735 Uint4 max_subject_length;
00736 Int8 total_length;
00737 Int4 num_seqs;
00738
00739 BLAST_GetSubjectTotals(seq_src, &total_length, &num_seqs);
00740
00741
00742
00743 BlastEffectiveLengthsParametersNew(eff_len_options, total_length, num_seqs,
00744 eff_len_params);
00745
00746 if ((status = BLAST_CalcEffLengths(program_number, scoring_options,
00747 *eff_len_params, sbp, query_info, NULL)) != 0)
00748 return status;
00749
00750 BlastScoringParametersNew(scoring_options, sbp, score_params);
00751
00752 BlastExtensionParametersNew(program_number, ext_options, sbp,
00753 query_info, ext_params);
00754
00755 BlastHitSavingParametersNew(program_number, hit_options, sbp, query_info,
00756 (Int4)(total_length/num_seqs), hit_params);
00757
00758
00759
00760 max_subject_length = BlastSeqSrcGetMaxSeqLen(seq_src);
00761
00762 if ((status = BLAST_GapAlignStructNew(*score_params, *ext_params,
00763 max_subject_length, sbp, gap_align)) != 0) {
00764 return status;
00765 }
00766
00767 return status;
00768 }
00769
00770 Int2 BLAST_OneSubjectUpdateParameters(EBlastProgramType program_number,
00771 Uint4 subject_length,
00772 const BlastScoringOptions* scoring_options,
00773 BlastQueryInfo* query_info,
00774 const BlastScoreBlk* sbp,
00775 BlastHitSavingParameters* hit_params,
00776 BlastInitialWordParameters* word_params,
00777 BlastEffectiveLengthsParameters* eff_len_params)
00778 {
00779 Int2 status = 0;
00780 eff_len_params->real_db_length = subject_length;
00781 if ((status = BLAST_CalcEffLengths(program_number, scoring_options,
00782 eff_len_params, sbp, query_info, NULL)) != 0)
00783 return status;
00784
00785 BlastHitSavingParametersUpdate(program_number, sbp, query_info, subject_length,
00786 hit_params);
00787
00788 if (word_params) {
00789
00790 BlastInitialWordParametersUpdate(program_number, hit_params, sbp, query_info,
00791 subject_length, word_params);
00792
00793 BlastLinkHSPParametersUpdate(word_params, hit_params, scoring_options->gapped_calculation);
00794 }
00795 return status;
00796 }
00797
00798 void
00799 BlastSeqLoc_RestrictToInterval(BlastSeqLoc* *mask, Int4 from, Int4 to)
00800 {
00801 BlastSeqLoc* head_loc = NULL, *last_loc = NULL, *next_loc, *seqloc;
00802
00803 to = MAX(to, 0);
00804
00805
00806
00807 if (mask == NULL || *mask == NULL || (from == 0 && to == 0))
00808 return;
00809
00810 for (seqloc = *mask; seqloc; seqloc = next_loc) {
00811 next_loc = seqloc->next;
00812 seqloc->ssr->left = MAX(0, seqloc->ssr->left - from);
00813 seqloc->ssr->right = MIN(seqloc->ssr->right, to) - from;
00814
00815
00816 if (seqloc->ssr->left > seqloc->ssr->right) {
00817
00818 if (last_loc)
00819 last_loc->next = seqloc->next;
00820 seqloc = BlastSeqLocNodeFree(seqloc);
00821 } else if (!head_loc) {
00822
00823 head_loc = last_loc = seqloc;
00824 } else {
00825
00826 last_loc->next = seqloc;
00827 last_loc = last_loc->next;
00828 }
00829 }
00830 *mask = head_loc;
00831 }
00832
00833 Int2
00834 Blast_SetPHIPatternInfo(EBlastProgramType program,
00835 const SPHIPatternSearchBlk * pattern_blk,
00836 const BLAST_SequenceBlk * query,
00837 const BlastSeqLoc * lookup_segments,
00838 BlastQueryInfo * query_info,
00839 Blast_Message** blast_message)
00840 {
00841 const Boolean kIsNa = (program == eBlastTypePhiBlastn);
00842 Int4 num_patterns = 0;
00843
00844 ASSERT(Blast_ProgramIsPhiBlast(program));
00845 ASSERT(query_info && pattern_blk);
00846
00847 query_info->pattern_info = SPHIQueryInfoNew();
00848
00849
00850 num_patterns = PHIGetPatternOccurrences(pattern_blk, query, lookup_segments, kIsNa,
00851 query_info);
00852 if (num_patterns == 0)
00853 {
00854 char buffer[512];
00855 sprintf(buffer, "The pattern %s was not found in the query.", pattern_blk->pattern);
00856 if (blast_message)
00857 Blast_MessageWrite(blast_message, eBlastSevWarning, kBlastMessageNoContext, buffer);
00858 return -1;
00859 }
00860 else if (num_patterns == INT4_MAX)
00861 {
00862 char buffer[512];
00863 sprintf(buffer, "The pattern (%s) may not cover the entire query.", pattern_blk->pattern);
00864 if (blast_message)
00865 Blast_MessageWrite(blast_message, eBlastSevWarning, kBlastMessageNoContext, buffer);
00866 return -1;
00867 }
00868 else if (num_patterns < 0)
00869 {
00870 return -1;
00871 }
00872
00873
00874
00875 query_info->pattern_info->probability = pattern_blk->patternProbability;
00876
00877
00878 query_info->pattern_info->pattern =
00879 (char*) (char *) BlastMemDup(pattern_blk->pattern, 1+strlen(pattern_blk->pattern));
00880
00881
00882
00883 query_info->contexts[0].length_adjustment =
00884 pattern_blk->minPatternMatchLength;
00885
00886 return 0;
00887 }
00888
00889