NCBI C++ ToolKit
sls_alp_data.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

00001 /* $Id: sls_alp_data.cpp 62325 2014-04-01 19:20:49Z boratyng $
00002 * ===========================================================================
00003 *
00004 *                            PUBLIC DOMAIN NOTICE
00005 *               National Center for Biotechnology Information
00006 *
00007 *  This software/database is a "United States Government Work" under the
00008 *  terms of the United States Copyright Act.  It was written as part of
00009 *  the author's offical duties as a United States Government employee and
00010 *  thus cannot be copyrighted.  This software/database is freely available
00011 *  to the public for use. The National Library of Medicine and the U.S.
00012 *  Government have not placed any restriction on its use or reproduction.
00013 *
00014 *  Although all reasonable efforts have been taken to ensure the accuracy
00015 *  and reliability of the software and data, the NLM and the U.S.
00016 *  Government do not and cannot warrant the performance or results that
00017 *  may be obtained by using this software or data. The NLM and the U.S.
00018 *  Government disclaim all warranties, express or implied, including
00019 *  warranties of performance, merchantability or fitness for any particular
00020 *  purpose.
00021 *
00022 *  Please cite the author in any work or product based on this material.
00023 *
00024 * ===========================================================================*/
00025 
00026 /*****************************************************************************
00027 
00028 File name: sls_alp_data.cpp
00029 
00030 Author: Sergey Sheetlin
00031 
00032 Contents: Input data for the ascending ladder points simulation
00033 
00034 ******************************************************************************/
00035 
00036 
00037 #include <ncbi_pch.hpp>
00038 
00039 #include <ncbi_pch.hpp>
00040 #include "sls_alp_data.hpp"
00041 
00042 USING_NCBI_SCOPE;
00043 USING_SCOPE(blast);
00044 USING_SCOPE(Sls);
00045 
00046 
00047 alp_data::alp_data(//constructor
00048                    CConstRef<CGumbelParamsOptions>& options_,
00049                    CRef<CGumbelParamsRandDiagnostics>& rand_params_)
00050 {
00051 
00052 
00053         bool ee_error_flag=false;
00054         error ee_error("",0);
00055 
00056         d_smatr=NULL;
00057         d_RR1=NULL;
00058         d_RR1_sum=NULL;
00059         d_RR1_sum_elements=NULL;
00060 
00061         d_RR2=NULL;
00062         d_RR2_sum=NULL;
00063         d_RR2_sum_elements=NULL;
00064 
00065         d_is=NULL;
00066         d_r_i_dot=NULL;
00067         d_r_dot_j=NULL;
00068 
00069         d_rand_all=NULL;
00070 
00071         d_rand_object=NULL;
00072 
00073 
00074         if(!options_)
00075         {
00076                 throw error("Unexpected error\n",4);
00077         };
00078 
00079 
00080         try
00081         {
00082         try
00083         {
00084                 
00085                 d_sentinels_flag=false;
00086 
00087 
00088                 d_memory_size_in_MB=0;
00089 
00090                 #ifndef NCBI_OS_MSWIN
00091 
00092                 #else
00093                         _CrtMemCheckpoint( &d_s1 );
00094                 #endif
00095 
00096 
00097                 Int4 number_of_AA_RR1;
00098                 Int4 number_of_AA_RR2;
00099 
00100                 
00101 
00102                 Int4 i,j;
00103                 d_number_of_AA_smatr=options_->GetNumResidues();
00104 
00105                 if(d_number_of_AA_smatr<=0)
00106                 {
00107                         throw error("Error - number of letters in the scoring matrix file must be greater than 0\n",3);
00108                 };
00109 
00110                 get_memory_for_matrix(d_number_of_AA_smatr,d_smatr);
00111 
00112 
00113                 for(i=0;i<d_number_of_AA_smatr;i++)
00114                 {
00115                         for(j=0;j<d_number_of_AA_smatr;j++)
00116                         {
00117                                 d_smatr[i][j]=(options_->GetScoreMatrix())[i][j];
00118                         };
00119                 };
00120 
00121                 d_smatr_symmetric_flag=false;
00122 
00123 
00124                 read_RR(
00125                 options_->GetSeq1ResidueProbs(),
00126                 d_RR1,
00127                 d_RR1_sum,
00128                 d_RR1_sum_elements,
00129                 number_of_AA_RR1);
00130 
00131 
00132                 read_RR(
00133                 options_->GetSeq2ResidueProbs(),
00134                 d_RR2,
00135                 d_RR2_sum,
00136                 d_RR2_sum_elements,
00137                 number_of_AA_RR2);
00138 
00139 
00140                 if(number_of_AA_RR1==d_number_of_AA_smatr)
00141                 {
00142                         d_number_of_AA=d_number_of_AA_smatr;
00143                 }
00144                 else
00145                 {
00146                         throw error("Number of letters is different for the scoring matrix and probabilities array\n",3);
00147                 };
00148 
00149                 if(number_of_AA_RR2!=d_number_of_AA_smatr)
00150                 {
00151                         throw error("Number of letters is different for the scoring matrix and probabilities array\n",3);
00152                 };
00153 
00154 
00155                 d_open=options_->GetGapOpening()+options_->GetGapExtension();
00156 
00157                 d_epen=options_->GetGapExtension();
00158 
00159                 d_max_time=options_->GetMaxCalcTime();
00160 
00161                 d_max_mem=options_->GetMaxCalcMemory();
00162 
00163                 d_eps_lambda=options_->GetLambdaAccuracy();
00164 
00165                 d_eps_K=options_->GetKAccuracy();
00166 
00167                 d_out_file_name="test.out";
00168                 d_minimum_realizations_number=40;
00169 
00170                 d_rand_all=new struct_for_randomization;
00171                 alp_data::assert_mem(d_rand_all);
00172                 d_memory_size_in_MB+=sizeof(struct_for_randomization)/mb_bytes;
00173 
00174 
00175                 //randomization
00176                 Uint4 random_factor = 0;
00177 
00178                 CRef<CGumbelParamsRandDiagnostics> AdvancedParams_tmp
00179                     = rand_params_;
00180 
00181                 if(AdvancedParams_tmp.Empty())
00182                 {
00183                         random_factor=(Uint4)time(NULL);
00184                         #ifndef NCBI_OS_MSWIN //UNIX program
00185                                 struct timeval tv;
00186                                 struct timezone tz;
00187                                 gettimeofday(&tv, &tz);
00188                                 random_factor+=tv.tv_usec*10000000;
00189                         #else
00190                                 struct _timeb timebuffer;
00191                                 char *timeline;
00192                                 _ftime( &timebuffer );
00193                                 timeline = ctime( & ( timebuffer.time ) );
00194                                 random_factor+=timebuffer.millitm*10000000;
00195                         #endif
00196 
00197                         d_rand_flag=false;
00198 
00199                 }
00200                 else
00201                 {
00202                         d_rand_flag=true;
00203                         if(d_rand_flag)
00204                         {
00205                             random_factor=AdvancedParams_tmp->GetRandomSeed();
00206 
00207                             Int4 size=AdvancedParams_tmp->GetFirstStagePrelimReNumbers().size();
00208                                 d_rand_all->d_first_stage_preliminary_realizations_numbers_ALP.resize(size);
00209 
00210                                 Int4 i;
00211                                 for(i=0;i<size;i++)
00212                                 {
00213                                     d_rand_all->d_first_stage_preliminary_realizations_numbers_ALP[i]=AdvancedParams_tmp->GetFirstStagePrelimReNumbers()[i];
00214                                 };
00215 
00216 
00217                                 size=AdvancedParams_tmp->GetPrelimReNumbers().size();
00218                                 d_rand_all->d_preliminary_realizations_numbers_ALP.resize(size);
00219                                 for(i=0;i<size;i++)
00220                                 {
00221                                     d_rand_all->d_preliminary_realizations_numbers_ALP[i]=AdvancedParams_tmp->GetPrelimReNumbers()[i];
00222                                 };
00223 
00224 
00225                                 size=AdvancedParams_tmp->GetPrelimReNumbersKilling().size();
00226                                 d_rand_all->d_preliminary_realizations_numbers_killing.resize(size);
00227                                 for(i=0;i<size;i++)
00228                                 {
00229                                     d_rand_all->d_preliminary_realizations_numbers_killing[i]=AdvancedParams_tmp->GetPrelimReNumbersKilling()[i];
00230                                 };
00231 
00232 
00233 
00234                                 d_rand_all->d_total_realizations_number_with_ALP=AdvancedParams_tmp->GetTotalReNumber();
00235                                 d_rand_all->d_total_realizations_number_with_killing=AdvancedParams_tmp->GetTotalReNumberKilling();
00236 
00237 
00238                         };
00239                 };
00240 
00241 
00242                 d_random_factor=random_factor;
00243 
00244                 d_rand_object=new CRandom;
00245                 d_rand_object->SetSeed(d_random_factor);
00246 
00247                 
00248 
00249 
00250                 d_is=new importance_sampling(
00251                         this,
00252                         d_open,
00253                         d_epen,
00254                         d_number_of_AA,
00255                         d_smatr,
00256                         d_RR1,
00257                         d_RR2);
00258 
00259                 alp_data::assert_mem(d_is);
00260 
00261                 d_memory_size_in_MB+=sizeof(*d_is)/mb_bytes;
00262 
00263                 d_r_i_dot=new double[d_number_of_AA];
00264                 alp_data::assert_mem(d_r_i_dot);
00265                 d_r_dot_j=new double[d_number_of_AA];
00266                 alp_data::assert_mem(d_r_dot_j);
00267                 Int4 k;
00268                 for(k=0;k<d_number_of_AA;k++)
00269                 {
00270                         d_r_i_dot[k]=0;
00271                         if(d_RR1[k]!=0)
00272                         {
00273                                 Int4 i;
00274                                 for(i=0;i<d_number_of_AA;i++)
00275                                 {
00276                                         if(d_RR2[i]!=0)
00277                                         {
00278                                                 d_r_i_dot[k]+=d_is->d_exp_s[k][i]*d_RR2[i];
00279                                         };
00280                                 };
00281                         };
00282                 };
00283 
00284                 for(k=0;k<d_number_of_AA;k++)
00285                 {
00286                         d_r_dot_j[k]=0;
00287                         if(d_RR2[k]!=0)
00288                         {
00289                                 Int4 i;
00290                                 for(i=0;i<d_number_of_AA;i++)
00291                                 {
00292                                         if(d_RR1[i]!=0)
00293                                         {
00294                                                 d_r_dot_j[k]+=d_is->d_exp_s[i][k]*d_RR1[i];
00295                                         };
00296                                 };
00297                         };
00298                 };
00299 
00300 
00301                 d_memory_size_in_MB+=(double)(sizeof(double)*d_number_of_AA*2.0)/mb_bytes;
00302 
00303                 double tmp_size1=kMax_I4;
00304 
00305                 double tmp_size=Tmin((double)(tmp_size1),
00306                         (
00307 
00308                         (double)mb_bytes*d_max_mem/(double)d_minimum_realizations_number
00309                         )
00310                         /(
00311                         (double)(sizeof(double)*12)+(double)(sizeof(Int4)*17)
00312                         )
00313                         );
00314 
00315                 d_dim1_tmp=(Int4)tmp_size;
00316                 d_dim2_tmp=(Int4)tmp_size;
00317         }
00318         catch (error er)
00319         {
00320                 ee_error_flag=true;
00321                 ee_error=er;                
00322         };
00323         }
00324         catch (...)
00325         { 
00326                 ee_error_flag=true;
00327                 ee_error=error("Internal error in the program\n",4);
00328         };
00329 
00330 
00331         if(ee_error_flag)
00332         {
00333                 this->~alp_data();
00334                 throw error(ee_error.st,ee_error.error_code);
00335         };
00336 
00337 };
00338 
00339 alp_data::alp_data(//constructor
00340 Int4 rand_,//randomization number
00341 Int4 open_,//gap opening penalty
00342 Int4 epen_,//gap extension penalty
00343 string smatr_file_name_,//scoring matrix file name
00344 string RR1_file_name_,//probabilities1 file name
00345 string RR2_file_name_,//probabilities2 file name
00346 double max_time_,//maximum allowed calculation time in seconds
00347 double max_mem_,//maximum allowed memory usage in MB
00348 double eps_lambda_,//relative error for lambda calculation
00349 double eps_K_,//relative error for K calculation
00350 string out_file_name_)//output file name
00351 {
00352 
00353         ifstream frand;
00354         bool ee_error_flag=false;
00355         error ee_error("",0);
00356 
00357         d_smatr=NULL;
00358         d_RR1=NULL;
00359         d_RR1_sum=NULL;
00360         d_RR1_sum_elements=NULL;
00361 
00362         d_RR2=NULL;
00363         d_RR2_sum=NULL;
00364         d_RR2_sum_elements=NULL;
00365 
00366         d_is=NULL;
00367         d_r_i_dot=NULL;
00368         d_r_dot_j=NULL;
00369 
00370         d_rand_all=NULL;
00371 
00372 
00373 
00374         try
00375         {
00376         try
00377         {
00378                 d_sentinels_flag=false;
00379 
00380 
00381                 d_memory_size_in_MB=0;
00382 
00383                 #ifndef NCBI_OS_MSWIN //UNIX program
00384 
00385                 #else
00386                         _CrtMemCheckpoint( &d_s1 );
00387                 #endif
00388 
00389 
00390                 Int4 number_of_AA_RR1;
00391                 Int4 number_of_AA_RR2;
00392 
00393                 read_smatr(
00394                 smatr_file_name_,
00395                 d_smatr,
00396                 d_number_of_AA_smatr);
00397                 
00398                 
00399 
00400                 read_RR(
00401                 RR1_file_name_,
00402                 d_RR1,
00403                 d_RR1_sum,
00404                 d_RR1_sum_elements,
00405                 number_of_AA_RR1);
00406 
00407 
00408                 read_RR(
00409                 RR2_file_name_,
00410                 d_RR2,
00411                 d_RR2_sum,
00412                 d_RR2_sum_elements,
00413                 number_of_AA_RR2);
00414 
00415 
00416                 if(number_of_AA_RR1==d_number_of_AA_smatr)
00417                 {
00418                         d_number_of_AA=d_number_of_AA_smatr;
00419                 }
00420                 else
00421                 {
00422                         throw error("Number of letters is different in the files "+smatr_file_name_+" and "+RR1_file_name_+"\n",3);
00423                 };
00424 
00425                 if(number_of_AA_RR2!=d_number_of_AA_smatr)
00426                 {
00427                         throw error("Number of letters is different in the files "+smatr_file_name_+" and "+RR2_file_name_+"\n",3);
00428                 };
00429 
00430                 Int4 t;
00431                 for(t=0;t<number_of_AA_RR1;t++)
00432                 {
00433                         if(d_RR1[t]!=d_RR2[t])
00434                         {
00435                                 d_smatr_symmetric_flag=false;
00436                                 break;
00437                         };
00438                 };
00439 
00440 
00441                 check_out_file(out_file_name_);
00442 
00443                 d_open=open_+epen_;
00444                 d_epen=epen_;
00445                 d_max_time=max_time_;
00446                 d_max_mem=max_mem_;
00447                 d_eps_lambda=eps_lambda_;
00448                 d_eps_K=eps_K_;
00449                 d_out_file_name=out_file_name_;
00450                 d_minimum_realizations_number=40;
00451 
00452                 d_rand_all=new struct_for_randomization;
00453                 alp_data::assert_mem(d_rand_all);
00454                 d_memory_size_in_MB+=sizeof(struct_for_randomization)/mb_bytes;
00455 
00456                 //randomization
00457                 Uint4 random_factor=rand_;
00458 
00459 
00460                 if((Int4)random_factor<0)
00461                 {
00462                         random_factor=(Uint4)time(NULL);
00463                         #ifndef NCBI_OS_MSWIN //UNIX program
00464                                 struct timeval tv;
00465                                 struct timezone tz;
00466                                 gettimeofday(&tv, &tz);
00467                                 random_factor+=tv.tv_usec*10000000;
00468                         #else
00469                                 struct _timeb timebuffer;
00470                                 char *timeline;
00471                                 _ftime( &timebuffer );
00472                                 timeline = ctime( & ( timebuffer.time ) );
00473                                 random_factor+=timebuffer.millitm*10000000;
00474                         #endif
00475 
00476                         d_rand_flag=false;
00477 
00478                 }
00479                 else
00480                 {
00481                         d_rand_flag=true;
00482                         if(d_rand_flag)
00483                         {
00484                                 string rand_st="rand_"+alp_data::long_to_string(random_factor)+".out";
00485                                 frand.open(rand_st.data(),ios::in);
00486                                 if(!frand)
00487                                 {
00488                                         d_rand_flag=false;
00489                                 }
00490                                 else
00491                                 {
00492 
00493                                         Int4 i,size;
00494 
00495 
00496                                         
00497                                         frand>>d_rand_all->d_random_factor;
00498 
00499 
00500                                         if((Int4)random_factor!=d_rand_all->d_random_factor)
00501                                         {
00502                                                 throw error("Unexpected error in randomization seed\n",3);
00503                                         };
00504 
00505 
00506 
00507                                         frand>>size;
00508                                         for(i=0;i<size;i++)
00509                                         {
00510                                                 Int4 tmp;
00511                                                 frand>>tmp;
00512                                                 d_rand_all->d_first_stage_preliminary_realizations_numbers_ALP.push_back(tmp);
00513                                         };
00514 
00515                                         frand>>size;
00516                                         for(i=0;i<size;i++)
00517                                         {
00518                                                 Int4 tmp;
00519                                                 frand>>tmp;
00520                                                 d_rand_all->d_preliminary_realizations_numbers_ALP.push_back(tmp);
00521                                         };
00522 
00523                                         frand>>size;
00524                                         for(i=0;i<size;i++)
00525                                         {
00526                                                 Int4 tmp;
00527                                                 frand>>tmp;
00528                                                 d_rand_all->d_preliminary_realizations_numbers_killing.push_back(tmp);
00529                                         };
00530 
00531 
00532                                         frand>>d_rand_all->d_total_realizations_number_with_ALP;
00533                                         frand>>d_rand_all->d_total_realizations_number_with_killing;
00534 
00535                                         frand.close();
00536                                 };
00537                         };
00538                 };
00539 
00540 
00541                 d_random_factor=random_factor;
00542 
00543                 d_rand_object=new CRandom;
00544                 d_rand_object->SetSeed(d_random_factor);
00545 
00546                 
00547 
00548 
00549                 d_is=new importance_sampling(
00550                         this,
00551                         d_open,
00552                         d_epen,
00553                         d_number_of_AA,
00554                         d_smatr,
00555                         d_RR1,
00556                         d_RR2);
00557 
00558                 alp_data::assert_mem(d_is);
00559 
00560                 d_memory_size_in_MB+=sizeof(*d_is)/mb_bytes;
00561 
00562                 d_r_i_dot=new double[d_number_of_AA];
00563                 alp_data::assert_mem(d_r_i_dot);
00564                 d_r_dot_j=new double[d_number_of_AA];
00565                 alp_data::assert_mem(d_r_dot_j);
00566                 Int4 k;
00567                 for(k=0;k<d_number_of_AA;k++)
00568                 {
00569                         d_r_i_dot[k]=0;
00570                         if(d_RR1[k]!=0)
00571                         {
00572                                 Int4 i;
00573                                 for(i=0;i<d_number_of_AA;i++)
00574                                 {
00575                                         if(d_RR2[i]!=0)
00576                                         {
00577                                                 d_r_i_dot[k]+=d_is->d_exp_s[k][i]*d_RR2[i];
00578                                         };
00579                                 };
00580                         };
00581                 };
00582 
00583                 for(k=0;k<d_number_of_AA;k++)
00584                 {
00585                         d_r_dot_j[k]=0;
00586                         if(d_RR2[k]!=0)
00587                         {
00588                                 Int4 i;
00589                                 for(i=0;i<d_number_of_AA;i++)
00590                                 {
00591                                         if(d_RR1[i]!=0)
00592                                         {
00593                                                 d_r_dot_j[k]+=d_is->d_exp_s[i][k]*d_RR1[i];
00594                                         };
00595                                 };
00596                         };
00597                 };
00598 
00599 
00600                 d_memory_size_in_MB+=(double)(sizeof(double)*d_number_of_AA*2.0)/mb_bytes;
00601 
00602                 double tmp_size1=kMax_I4;
00603 
00604                 double tmp_size=Tmin((double)(tmp_size1),
00605                         (
00606 
00607                         (double)mb_bytes*d_max_mem/(double)d_minimum_realizations_number
00608                         )
00609                         /(
00610                         (double)(sizeof(double)*12)+(double)(sizeof(Int4)*17)
00611                         )
00612                         );
00613 
00614                 d_dim1_tmp=(Int4)tmp_size;
00615                 d_dim2_tmp=(Int4)tmp_size;
00616         }
00617         catch (error er)
00618         {
00619                 ee_error_flag=true;
00620                 ee_error=er;                
00621         };
00622         }
00623         catch (...)
00624         { 
00625                 ee_error_flag=true;
00626                 ee_error=error("Internal error in the program\n",4);
00627         };
00628 
00629         if(frand.is_open())
00630         {
00631                 frand.close();
00632         };
00633 
00634         if(ee_error_flag)
00635         {
00636                 this->~alp_data();
00637                 throw error(ee_error.st,ee_error.error_code);
00638         };
00639 
00640 };
00641 
00642 Int4 alp_data::random_long(
00643 double value_,
00644 Int4 dim_)
00645 {
00646         if(value_<0||value_>1.0||dim_<=0)
00647         {
00648                 throw error("Unexpected error",4);
00649         };
00650 
00651         if(dim_==1)
00652         {
00653                 return 0;
00654         };
00655 
00656         Int4 tmp=(Int4)floor(value_*(double)dim_);
00657         tmp=Tmin(tmp,dim_-1);
00658         return tmp;
00659 };
00660 
00661 
00662 alp_data::~alp_data()//destructor
00663 {
00664         delete d_rand_object;
00665 
00666         delete[]d_RR1;d_RR1=NULL;
00667         delete[]d_RR1_sum;d_RR1_sum=NULL;
00668         delete[]d_RR1_sum_elements;d_RR1_sum_elements=NULL;
00669 
00670         delete[]d_RR2;d_RR2=NULL;
00671         delete[]d_RR2_sum;d_RR2_sum=NULL;
00672         delete[]d_RR2_sum_elements;d_RR2_sum_elements=NULL;
00673 
00674 
00675         d_memory_size_in_MB-=(double)(2.0*sizeof(double)+sizeof(Int4))*(double)d_number_of_AA/mb_bytes;
00676 
00677         delete_memory_for_matrix(d_number_of_AA_smatr,d_smatr);
00678 
00679         delete d_is;d_is=NULL;
00680 
00681         d_memory_size_in_MB-=sizeof(*d_is)/mb_bytes;
00682 
00683         delete[]d_r_i_dot;d_r_i_dot=NULL;
00684         delete[]d_r_dot_j;d_r_dot_j=NULL;
00685         d_memory_size_in_MB-=(double)(sizeof(double)*d_number_of_AA*2.0)/mb_bytes;
00686 
00687         delete d_rand_all;d_rand_all=NULL;
00688         d_memory_size_in_MB-=sizeof(struct_for_randomization)/mb_bytes;
00689 
00690 
00691 };
00692 
00693 void alp_data::check_out_file(
00694         string out_file_name_)
00695 {
00696         bool ee_error_flag=false;
00697         error ee_error("",0);
00698         ifstream f;
00699         char *str_ch=NULL;
00700 
00701         try
00702         {
00703         try
00704         {
00705                 f.open(out_file_name_.data(),ios::in);
00706                 if(!f)
00707                 {
00708                         return;
00709                 };
00710 
00711                 bool symmetric_case_flag;
00712                 
00713                 string str;
00714                 getline(f,str);
00715                 str_ch=new char[str.length()+1];
00716                 if(!str_ch)
00717                 {
00718                         throw error("Memory allocation error\n",41);
00719                 };
00720 
00721                 Int4 k;
00722                 for(k=0;k<(Int4)str.length();k++)
00723                 {
00724                         str_ch[k]=str[k];
00725                 };
00726                 str_ch[str.length()]='\0';
00727 
00728 
00729                 char str_for_test0[]="number of realizations with killing";
00730                 char *test_flag0= strstr(str_ch,str_for_test0);
00731 
00732                 if(!test_flag0)
00733                 {
00734                         throw error("The output file "+out_file_name_+" exists and does not have correct format;\nplease delete the file and rerun the program\n",3);
00735                 };
00736 
00737                 char str_for_test[]="0.5*";
00738 
00739                 char*test_flag= strstr(str_ch,str_for_test);
00740                 if(test_flag)
00741                 {
00742                         symmetric_case_flag=true;
00743                 }
00744                 else
00745                 {
00746                         symmetric_case_flag=false;
00747                 };
00748 
00749 
00750                 
00751 
00752                 if(symmetric_case_flag)
00753                 {
00754                         if(!d_smatr_symmetric_flag)
00755                         {
00756                                 throw error("The output file "+out_file_name_+" exists and corresponds to symmetric case; \ncurrent calculation uses non-symmetric parameters;\nplease define another output file name\n",3);
00757                         };
00758                 };
00759 
00760                 if(!symmetric_case_flag)
00761                 {
00762                         if(d_smatr_symmetric_flag)
00763                         {
00764                                 throw error("The output file "+out_file_name_+" exists and corresponds to non-symmetric case; \ncurrent calculation uses symmetric parameters;\nplease define another output file name\n",3);
00765                         };
00766                 };
00767 
00768                 f.close();
00769         }
00770         catch (error er)
00771         {
00772                 ee_error_flag=true;
00773                 ee_error=er;                
00774         };
00775         }
00776         catch (...)
00777         { 
00778                 ee_error_flag=true;
00779                 ee_error=error("Internal error in the program\n",4);
00780         };
00781 
00782         delete[]str_ch;str_ch=NULL;
00783 
00784         if(f.is_open())
00785         {
00786                 f.close();
00787         };
00788 
00789         if(ee_error_flag)
00790         {
00791                 throw error(ee_error.st,ee_error.error_code);
00792         };
00793 
00794 };
00795 
00796 
00797 double alp_data::get_allocated_memory_in_MB()
00798 {
00799 
00800         #ifndef NCBI_OS_MSWIN //UNIX program
00801 
00802                 return 0;
00803 
00804         #else
00805                 _CrtMemCheckpoint( &d_s2 );
00806 
00807                 _CrtMemDifference( &d_s3, &d_s1, &d_s2);
00808 
00809                 double total=0;
00810                 int use;
00811                 for (use = 0; use < _MAX_BLOCKS; use++)
00812                 {
00813                         total+=d_s3.lSizes[use];
00814                 }
00815 
00816                 total/=(double)1048576;
00817                 return total;
00818 
00819         #endif
00820 
00821 };
00822 
00823 // Kludge: limit optimization by ICC 10.x to avoid undesired references to
00824 // __svml_exp2 (problematic to use from DLLs on x86_64 or at all on ia32).
00825 #if defined(NCBI_COMPILER_ICC)  &&  defined(__OPTIMIZE__) \
00826     &&  NCBI_COMPILER_VERSION >= 1000  &&  NCBI_COMPILER_VERSION < 1100
00827 #  pragma optimization_level 1
00828 #endif
00829 double importance_sampling::lambda_equation(double x_,void* func_number_)
00830 {
00831         data_for_lambda_equation *data=(data_for_lambda_equation*)func_number_;
00832         Int4 d_number_of_AA=data->d_number_of_AA;
00833         Int4** d_smatr=data->d_smatr;
00834         double *d_RR1=data->d_RR1;
00835         double *d_RR2=data->d_RR2;
00836 
00837         double res=0;
00838         Int4 i,j;
00839 
00840         for(i=0;i<d_number_of_AA;i++)
00841         {
00842                 for(j=0;j<d_number_of_AA;j++)
00843                 {
00844                         res+=d_RR1[i]*d_RR2[j]*exp(x_*d_smatr[i][j]);
00845                 };
00846         };
00847 
00848         return res-1.0;
00849 };
00850 
00851 void alp_data::read_smatr(
00852 string smatr_file_name_,
00853 Int4 **&smatr_,
00854 Int4 &number_of_AA_smatr_)
00855 {
00856         bool ee_error_flag=false;
00857         error ee_error("",0);
00858         ifstream f;
00859 
00860         try
00861         {
00862         try
00863         {
00864 
00865                 Int4 i,j;
00866                 f.open(smatr_file_name_.data(),ios::in);
00867                 if(!f)
00868                 {
00869                         throw error("Error - file "+smatr_file_name_+" is not found\n",3);
00870                 };
00871 
00872                 f>>number_of_AA_smatr_;
00873 
00874                 if(number_of_AA_smatr_<=0)
00875                 {
00876                         throw error("Error - number of letters in the scoring matrix file must be greater than 0\n",3);
00877                 };
00878 
00879                 get_memory_for_matrix(number_of_AA_smatr_,smatr_);
00880 
00881 
00882                 for(i=0;i<number_of_AA_smatr_;i++)
00883                 {
00884                         for(j=0;j<number_of_AA_smatr_;j++)
00885                         {
00886                                 f>>smatr_[i][j];
00887                         };
00888                 };
00889 
00890                 f.close();
00891 
00892                 bool flag=true;
00893                 for(i=0;i<number_of_AA_smatr_;i++)
00894                 {
00895                         for(j=0;j<i;j++)
00896                         {
00897                                 if(smatr_[i][j]!=smatr_[j][i])
00898                                 {
00899                                         flag=false;
00900                                 };
00901                         };
00902                 };
00903 
00904                 d_smatr_symmetric_flag=flag;
00905 
00906                 d_smatr_symmetric_flag=false;
00907 
00908         }
00909         catch (error er)
00910         {
00911                 ee_error_flag=true;
00912                 ee_error=er;                
00913         };
00914         }
00915         catch (...)
00916         { 
00917                 ee_error_flag=true;
00918                 ee_error=error("Internal error in the program\n",4);
00919         };
00920 
00921         //memory release
00922         if(f.is_open())
00923         {
00924                 f.close();
00925         };
00926 
00927         if(ee_error_flag)
00928         {
00929                 throw error(ee_error.st,ee_error.error_code);
00930         };
00931 
00932 };
00933 
00934 void alp_data::read_RR(
00935 string RR_file_name_,
00936 double *&RR_,
00937 double *&RR_sum_,
00938 Int4 *&RR_sum_elements_,
00939 Int4 &number_of_AA_RR_)
00940 {
00941         bool ee_error_flag=false;
00942         error ee_error("",0);
00943         ifstream f;
00944 
00945         try
00946         {
00947         try
00948         {
00949 
00950                 Int4 i;
00951                 f.open(RR_file_name_.data(),ios::in);
00952                 if(!f)
00953                 {
00954                         throw error("Error - file "+RR_file_name_+" is not found\n",3);
00955                 };
00956 
00957                 f>>number_of_AA_RR_;
00958 
00959                 if(number_of_AA_RR_<=0)
00960                 {
00961                         throw error("Error - number of letters in the probabilities file must be greater than 0\n",3);
00962                 };
00963                 
00964                 RR_=new double[number_of_AA_RR_];
00965                 assert_mem(RR_);
00966 
00967                 RR_sum_=new double[number_of_AA_RR_];
00968                 assert_mem(RR_sum_);
00969 
00970                 RR_sum_elements_=new Int4 [number_of_AA_RR_];
00971                 assert_mem(RR_sum_elements_);
00972 
00973                 d_memory_size_in_MB+=(double)(2.0*sizeof(double)+sizeof(Int4))*(double)number_of_AA_RR_/mb_bytes;
00974 
00975 
00976                 for(i=0;i<number_of_AA_RR_;i++)
00977                 {
00978                         f>>RR_[i];
00979 
00980                         if(RR_[i]<0)
00981                         {
00982                                 throw error("Error - input letter's probability number "+long_to_string(i+1)+" is negative\n",3);
00983                         };
00984 
00985                         if(RR_[i]>1.0)
00986                         {
00987                                 throw error("Error - input letter's probability number "+long_to_string(i+1)+" is greater than 1.0\n",3);
00988                         };
00989 
00990 
00991                         if(i!=0)
00992                         {
00993                                 RR_sum_[i]=RR_sum_[i-1]+RR_[i];
00994                         }
00995                         else
00996                         {
00997                                 RR_sum_[i]=RR_[i];
00998                         };
00999                         RR_sum_elements_[i]=i;
01000                 };
01001 
01002                 if(fabs(RR_sum_[number_of_AA_RR_-1]-1.0)>0.000000000001)
01003                 {
01004                         //cout<<"Warning: sum of probabilities in the file "<<RR_file_name_<<" is not equal to 1\n\n";
01005                 };
01006 
01007 
01008                 f.close();
01009         }
01010         catch (error er)
01011         {
01012                 ee_error_flag=true;
01013                 ee_error=er;                
01014         };
01015         }
01016         catch (...)
01017         { 
01018                 ee_error_flag=true;
01019                 ee_error=error("Internal error in the program\n",4);
01020         };
01021 
01022         //memory release
01023         if(f.is_open())
01024         {
01025                 f.close();
01026         };
01027 
01028         if(ee_error_flag)
01029         {
01030                 throw error(ee_error.st,ee_error.error_code);
01031         };
01032 
01033 };
01034 
01035 void alp_data::read_RR(
01036 const vector<double> &vector_,
01037 double *&RR_,
01038 double *&RR_sum_,
01039 Int4 *&RR_sum_elements_,
01040 Int4 &number_of_AA_RR_)
01041 {
01042         bool ee_error_flag=false;
01043         error ee_error("",0);
01044 
01045         try
01046         {
01047         try
01048         {
01049 
01050                 Int4 i;
01051 
01052                 number_of_AA_RR_=vector_.size();
01053 
01054                 if(number_of_AA_RR_<=0)
01055                 {
01056                         throw error("Error - number of letters in the probabilities file must be greater than 0\n",3);
01057                 };
01058                 
01059                 RR_=new double[number_of_AA_RR_];
01060                 assert_mem(RR_);
01061 
01062                 RR_sum_=new double[number_of_AA_RR_];
01063                 assert_mem(RR_sum_);
01064 
01065                 RR_sum_elements_=new Int4 [number_of_AA_RR_];
01066                 assert_mem(RR_sum_elements_);
01067 
01068                 d_memory_size_in_MB+=(double)(2.0*sizeof(double)+sizeof(Int4))*(double)number_of_AA_RR_/mb_bytes;
01069 
01070 
01071                 for(i=0;i<number_of_AA_RR_;i++)
01072                 {
01073                         RR_[i]=vector_[i];
01074 
01075                         if(RR_[i]<0)
01076                         {
01077                                 throw error("Error - input letter's probability number "+long_to_string(i+1)+" is negative\n",3);
01078                         };
01079 
01080                         if(RR_[i]>1.0)
01081                         {
01082                                 throw error("Error - input letter's probability number "+long_to_string(i+1)+" is greater than 1.0\n",3);
01083                         };
01084 
01085 
01086                         if(i!=0)
01087                         {
01088                                 RR_sum_[i]=RR_sum_[i-1]+RR_[i];
01089                         }
01090                         else
01091                         {
01092                                 RR_sum_[i]=RR_[i];
01093                         };
01094                         RR_sum_elements_[i]=i;
01095                 };
01096 
01097                 if(fabs(RR_sum_[number_of_AA_RR_-1]-1.0)>0.000000000001)
01098                 {
01099                         //cout<<"Warning: sum of probabilities in the file "<<RR_file_name_<<" is not equal to 1\n\n";
01100                 };
01101 
01102 
01103         }
01104         catch (error er)
01105         {
01106                 ee_error_flag=true;
01107                 ee_error=er;                
01108         };
01109         }
01110         catch (...)
01111         { 
01112                 ee_error_flag=true;
01113                 ee_error=error("Internal error in the program\n",4);
01114         };
01115 
01116         if(ee_error_flag)
01117         {
01118                 throw error(ee_error.st,ee_error.error_code);
01119         };
01120 
01121 };
01122 
01123 
01124 string alp_data::long_to_string(//convert interer ot string
01125 Int4 number_)
01126 {
01127         string res_="";
01128         string tmp_string;
01129         if(number_>0)
01130         {
01131                 tmp_string="";
01132         }
01133         else
01134         {
01135                 if(number_==0)
01136                 {
01137                         tmp_string="";
01138                 }
01139                 else
01140                 {
01141                         tmp_string="-";
01142                 };
01143         };
01144         number_=abs(number_);
01145         do{
01146                 Int4 reminder=number_%10;
01147                 number_=(number_-reminder)/10;
01148                 res_=digit_to_string(reminder)+res_;
01149                 if (number_==0)
01150                 {
01151                         break;
01152                 };
01153         }
01154         while (true);
01155 
01156         return tmp_string+res_;
01157 };
01158 
01159 char alp_data::digit_to_string(//convert interer ot string
01160 Int4 digit_)
01161 {
01162         switch(digit_)
01163         {
01164         case 0:return '0';
01165         case 1:return '1';
01166         case 2:return '2';
01167         case 3:return '3';
01168         case 4:return '4';
01169         case 5:return '5';
01170         case 6:return '6';
01171         case 7:return '7';
01172         case 8:return '8';
01173         case 9:return '9';
01174         default:return '?';
01175         };
01176 };
01177 
01178 
01179 
01180 
01181 void alp_data::assert_mem(void *pointer_)
01182 {
01183         if(!pointer_)
01184         {
01185                 throw error("Memory allocation error\n",41);
01186         };
01187 };
01188 
01189 double alp_data::round(//returns nearest integer to x_
01190 const double &x_)
01191 {
01192         double x_floor=floor(x_);
01193         double x_ceil=ceil(x_);
01194         if(fabs(x_-x_floor)<0.5)
01195         {
01196                 return x_floor;
01197         };
01198         return x_ceil;
01199 };
01200 
01201 
01202 
01203 importance_sampling::importance_sampling(
01204 alp_data *alp_data_,
01205 Int4 open_,
01206 Int4 epen_,
01207 Int4 number_of_AA_,
01208 Int4 **smatr_,
01209 double *RR1_,
01210 double *RR2_)
01211 {
01212         d_elements=NULL;
01213         d_elements_values=NULL;
01214 
01215         d_exp_s=NULL;
01216 
01217 
01218         d_alp_data=alp_data_;
01219         if(!d_alp_data)
01220         {
01221                 throw error("Unexpected error",4);
01222         };
01223 
01224         bool ee_error_flag=false;
01225         error ee_error("",0);
01226 
01227         try
01228         {
01229         try
01230         {
01231 
01232 
01233 
01234                 {
01235 
01236                         //calculation of the importance sampling theta
01237 
01238                         data_for_lambda_equation tmp_ptr;
01239                         tmp_ptr.d_number_of_AA=number_of_AA_;
01240                         tmp_ptr.d_RR1=RR1_;
01241                         tmp_ptr.d_RR2=RR2_;
01242                         tmp_ptr.d_smatr=smatr_;
01243 
01244                         //calculate maximum of smatr_ elements
01245                         Int4 smatr_max=smatr_[0][0];
01246                         Int4 smatr_max_i=0;
01247                         Int4 smatr_max_j=0;
01248                         Int4 smatr_min=smatr_[0][0];
01249 
01250                         Int4 smatr_pos_max=kMin_I4;
01251                         Int4 smatr_neg_min=kMax_I4;
01252 
01253                         double eps=0.00001;
01254                         double threshold=DBL_MIN*10.0;
01255 
01256                         double aver_score=0;
01257                         Int4 i,j;
01258                         for(i=0;i<number_of_AA_;i++)
01259                         {
01260                                 for(j=0;j<number_of_AA_;j++)
01261                                 {
01262                                         if(RR1_[j]*RR2_[i]<=threshold)
01263                                         {
01264                                                 continue;
01265                                         };
01266 
01267                                         aver_score+=RR1_[i]*RR2_[j]*smatr_[i][j];
01268 
01269                                         if(smatr_max<smatr_[i][j])
01270                                         {
01271                                                 smatr_max=smatr_[i][j];
01272                                                 smatr_max_i=i;
01273                                                 smatr_max_j=j;
01274                                         };
01275                                         smatr_min=alp_data::Tmin(smatr_min,smatr_[i][j]);
01276                                         
01277 
01278                                         if(smatr_[i][j]>0)
01279                                         {
01280                                                 smatr_pos_max=alp_data::Tmax(smatr_pos_max,smatr_[i][j]);
01281                                         };
01282 
01283                                         if(smatr_[i][j]<0)
01284                                         {
01285                                                 smatr_neg_min=alp_data::Tmin(smatr_neg_min,smatr_[i][j]);
01286                                         };
01287 
01288                                 };
01289                         };
01290 
01291                         if(aver_score>=-threshold)
01292                         {
01293                                 throw error("Error - sum[i,j] RR1[i]*RR2[j]*smatr[i][j]>=0; the program cannot continue the calculation\n",3);
01294                         };
01295 
01296                         if(smatr_max<=0)
01297                         {
01298                                 throw error("Error - at least one element of the scoring matrix must be positive\n",3);
01299                         };
01300 
01301                         
01302 
01303                         double a=eps;
01304 
01305                         while(importance_sampling::lambda_equation(a,(void*)(&tmp_ptr))>0)
01306                         {
01307                                 a/=2.0;
01308 
01309                                 if(a<threshold*100.0)
01310                                 {
01311                                         throw error("Error - the input parameters correspond to non-logarithmic regime\n",3);
01312                                 };
01313                         };
01314 
01315                         if(a<threshold*100.0)
01316                         {
01317                                 throw error("Error - the input parameters define the regime which is too close to the critical regime\n",3);
01318                         };
01319 
01320                         eps=a/10.0;
01321 
01322 
01323                         double tmp_pr=RR1_[smatr_max_i]*RR2_[smatr_max_j];
01324                         double b=(log(1+10*eps)-log(tmp_pr))/(double)smatr_max;
01325 
01326                         
01327                         Int4 n_partition=2;
01328                         std::vector<double> res_lambda;
01329                         
01330                         
01331                         alp_reg::find_tetta_general(
01332                         importance_sampling::lambda_equation,
01333                         (void*)(&tmp_ptr),
01334                         a,
01335                         b,
01336                         n_partition,
01337                         eps,
01338                         res_lambda);
01339 
01340                         sort(res_lambda.begin(),res_lambda.end());
01341 
01342                         if(res_lambda.size()==0)
01343                         {
01344                                 throw error("Error - the program is not able to find the ungapped lambda\n",3);
01345                         };
01346 
01347                         d_lambda=res_lambda[res_lambda.size()-1];
01348                         d_ungap_lambda=d_lambda;
01349 
01350                         //cout<<"\nUngapped lambda is "<<d_ungap_lambda<<endl;
01351 
01352                         d_lambda*=1.07;
01353                 };
01354 
01355 
01356                 
01357                 d_is_number_of_AA=number_of_AA_;
01358 
01359                 d_elements=new q_elem[number_of_AA_*number_of_AA_];
01360                 alp_data::assert_mem(d_elements);
01361 
01362                 d_elements_values=new double[number_of_AA_*number_of_AA_];
01363                 alp_data::assert_mem(d_elements_values);
01364 
01365 
01366 
01367                 d_alp_data->get_memory_for_matrix(d_is_number_of_AA,d_exp_s);
01368 
01369                 Int4 ind=0;
01370                 double sum=0;
01371                 Int4 a,b;
01372                 for(a=0;a<number_of_AA_;a++)
01373                 {
01374                         for(b=0;b<number_of_AA_;b++)
01375                         {
01376                                 d_exp_s[a][b]=exp(d_lambda*smatr_[a][b]);
01377                                 d_elements_values[ind]=RR1_[a]*RR2_[b]*d_exp_s[a][b];
01378                                 sum+=d_elements_values[ind];
01379                                 ind++;
01380                         };
01381                 };
01382 
01383 
01384                 for(a=0;a<number_of_AA_;a++)
01385                 {
01386                         for(b=0;b<number_of_AA_;b++)
01387                         {
01388                                 d_exp_s[a][b]/=sum;
01389                         };
01390                 };
01391 
01392 
01393                 for(ind=0;ind<number_of_AA_*number_of_AA_;ind++)
01394                 {
01395                         d_elements_values[ind]/=sum;
01396                 };
01397 
01398                 
01399                 for(ind=1;ind<number_of_AA_*number_of_AA_;ind++)
01400                 {
01401                         d_elements_values[ind]=d_elements_values[ind-1]+d_elements_values[ind];
01402                 };
01403 
01404                 
01405                 ind=0;
01406                 for(a=0;a<number_of_AA_;a++)
01407                 {
01408                         for(b=0;b<number_of_AA_;b++)
01409                         {
01410                                 q_elem elem_tmp;
01411 
01412                                 elem_tmp.d_a=a;
01413                                 elem_tmp.d_b=b;
01414 
01415                                 d_elements[ind]=elem_tmp;
01416                                 d_elements_values[ind]=d_elements_values[ind];
01417 
01418                                 ind++;
01419 
01420                         };
01421                 };
01422 
01423 
01424 
01425                 d_mu=exp(-fabs(d_lambda)*open_);
01426                 d_nu=exp(-fabs(d_lambda)*epen_);
01427 
01428                 double tmp=1+d_mu-d_nu;
01429 
01430                 d_eta=(1-d_nu)*(1-d_nu)/(tmp*tmp);
01431                 d_mu_SI=1-d_nu;
01432                 d_mu_IS=d_mu*(1-d_nu)/(tmp*tmp);
01433                 d_mu_DS=d_mu/tmp;
01434                 d_mu_SD=(1-d_nu)*(1-d_nu)/tmp;
01435                 d_mu_ID=d_mu*(1-d_nu)/tmp;
01436 
01437 
01438                 d_for_D[0]=d_nu;                                d_for_D_states[0]='D';
01439                 d_for_D[1]=d_for_D[0]+d_mu_SD;        d_for_D_states[1]='S';
01440                 d_for_D[2]=d_for_D[1]+d_mu_ID;        d_for_D_states[2]='I';
01441 
01442                 d_for_I[0]=d_nu;                                d_for_I_states[0]='I';
01443                 d_for_I[1]=d_for_I[0]+d_mu_SI;        d_for_I_states[1]='S';
01444 
01445                 d_for_S[0]=d_eta;                                d_for_S_states[0]='S';
01446                 d_for_S[1]=d_for_S[0]+d_mu_DS;        d_for_S_states[1]='D';
01447                 d_for_S[2]=d_for_S[1]+d_mu_IS;        d_for_S_states[2]='I';
01448 
01449                 d_alp_data->d_memory_size_in_MB+=sizeof(double)*number_of_AA_/mb_bytes;
01450                 d_alp_data->d_memory_size_in_MB+=sizeof(q_elem)*number_of_AA_/mb_bytes;
01451         }
01452         catch (error er)
01453         {
01454                 ee_error_flag=true;
01455                 ee_error=er;                
01456         };
01457         }
01458         catch (...)
01459         { 
01460                 ee_error_flag=true;
01461                 ee_error=error("Internal error in the program\n",4);
01462         };
01463 
01464         //memory release
01465 
01466         if(ee_error_flag)
01467         {
01468                 this->~importance_sampling();
01469                 throw error(ee_error.st,ee_error.error_code);
01470         };
01471 
01472 };
01473 
01474 importance_sampling::~importance_sampling()
01475 {
01476         delete []d_elements;d_elements=NULL;
01477         delete []d_elements_values;d_elements_values=NULL;
01478 
01479         if(d_alp_data)
01480         {
01481                 d_alp_data->delete_memory_for_matrix(d_is_number_of_AA,d_exp_s);
01482                 d_alp_data->d_memory_size_in_MB-=sizeof(double)*d_is_number_of_AA/mb_bytes;
01483                 d_alp_data->d_memory_size_in_MB-=sizeof(q_elem)*d_is_number_of_AA/mb_bytes;
01484         };
01485 
01486 };
01487 
01488 void alp_data::get_current_time(
01489 double &seconds_)
01490 {
01491 #ifndef NCBI_OS_MSWIN //UNIX program
01492          struct timeval tv;
01493      struct timezone tz;
01494      time_t t;
01495 
01496      gettimeofday(&tv, &tz);
01497      t = tv.tv_sec;
01498      localtime(&t);
01499 
01500      seconds_=(double)(t)+(double)(tv.tv_usec) * 0.000001;
01501 
01502 #else
01503 
01504    struct _timeb timebuffer;
01505 
01506    _ftime( &timebuffer );
01507 
01508         seconds_=timebuffer.time+(double)(timebuffer.millitm)/1000.0;
01509 
01510 #endif
01511 };
01512 
01513 
Modified on Thu Aug 28 19:16:41 2014 by modify_doxy.py rev. 426318