src/util/tables/raw_scoremat.c

Go to the documentation of this file.
00001 /*  $Id: raw_scoremat.c 138208 2008-08-22 14:43:58Z ucko $
00002  * ===========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data, the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties, express or implied, including
00019  *  warranties of performance, merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the author in any work or product based on this material.
00023  *
00024  * ===========================================================================
00025  *
00026  * Author:  Aaron Ucko
00027  *
00028  * File Description:
00029  *   Protein alignment score matrices; shared between the two toolkits.
00030  *
00031  */
00032 
00033 #include <util/tables/raw_scoremat.h>
00034 
00035 #include <ctype.h>
00036 #include <string.h>
00037 
00038 #include "sm_blosum45.c"
00039 #include "sm_blosum50.c"
00040 #include "sm_blosum62.c"
00041 #include "sm_blosum80.c"
00042 #include "sm_blosum90.c"
00043 #include "sm_pam30.c"
00044 #include "sm_pam70.c"
00045 #include "sm_pam250.c"
00046 
00047 static const char kNCBIstdaa[] = "-ABCDEFGHIKLMNPQRSTVWXYZU*OJ";
00048 
00049 
00050 int NCBISM_GetIndex(const SNCBIPackedScoreMatrix* sm, int aa)
00051 {
00052     const char *p;
00053 
00054     /* Translate to NCBIeaa */
00055     if (aa >= 0  &&  aa < sizeof(kNCBIstdaa)) {
00056         aa = kNCBIstdaa[aa];
00057     } else if (islower((unsigned char) aa)) {
00058         aa = toupper((unsigned char) aa);
00059     }
00060 
00061     p = strchr(sm->symbols, aa);
00062     return p ? p - sm->symbols : -1;
00063 }
00064 
00065 
00066 TNCBIScore NCBISM_GetScore(const SNCBIPackedScoreMatrix* sm,
00067                            int aa1, int aa2)
00068 {
00069     int i1, i2;
00070     i1 = NCBISM_GetIndex(sm, aa1);
00071     i2 = NCBISM_GetIndex(sm, aa2);
00072     if (i1 >=0  &&  i2 >= 0) {
00073         return sm->scores[i1 * strlen(sm->symbols) + i2];
00074     } else {
00075         return sm->defscore;
00076     }
00077 }
00078 
00079 
00080 void NCBISM_Unpack(const SNCBIPackedScoreMatrix* psm,
00081                    SNCBIFullScoreMatrix* fsm)
00082 {
00083     const char* sym;
00084     int         dim, i, j, aa1, aa2;
00085 
00086     sym = psm->symbols;
00087     dim = strlen(sym);
00088     /* fill with default */
00089     memset(&fsm->s, psm->defscore, NCBI_FSM_DIM * NCBI_FSM_DIM);
00090     for (i = 0;  i < dim;  ++i) {
00091         aa1 = sym[i];
00092         /* get core (NCBIeaa x NCBIeaa) */
00093         for (j = 0;  j < dim;  ++j) {
00094             aa2 = sym[j];
00095             fsm->s[aa1][aa2] = psm->scores[i * dim + j];
00096         }
00097         /* extend horizontally */
00098         for (aa2 = 0;  aa2 < sizeof(kNCBIstdaa);  ++aa2) {
00099             fsm->s[aa1][aa2] = fsm->s[aa1][(int)kNCBIstdaa[aa2]];
00100         }
00101         for (aa2 = 'a';  aa2 <= 'z';  ++aa2) {
00102             fsm->s[aa1][aa2] = fsm->s[aa1][toupper((unsigned char) aa2)];
00103         }
00104     }
00105     /* extend vertically */
00106     for (aa1 = 0;  aa1 < sizeof(kNCBIstdaa);  ++aa1) {
00107         memcpy(fsm->s[aa1], fsm->s[(int)kNCBIstdaa[aa1]], NCBI_FSM_DIM);
00108     }
00109     for (aa1 = 'a';  aa1 <= 'z';  ++aa1) {
00110         memcpy(fsm->s[aa1], fsm->s[toupper((unsigned char) aa1)], NCBI_FSM_DIM);
00111     }
00112 }
00113 
00114 static
00115 int /* bool */ s_NCBISM_StartsWith(const char* str, const char* pfx)
00116 {
00117     for ( ;  *pfx;  ++str, ++pfx) {
00118         if (tolower((unsigned char)*str) != *pfx) {
00119             return 0;
00120         }
00121     }
00122     return 1;
00123 }
00124 
00125 const SNCBIPackedScoreMatrix* NCBISM_GetStandardMatrix(const char* name)
00126 {
00127     switch (name[0]) {
00128     case 'B': case 'b':
00129         if ( !s_NCBISM_StartsWith(name, "blosum") ) {
00130             return NULL;
00131         }
00132         switch (name[6]) {
00133         case '4': return strcmp(name + 6, "45") ? NULL : &NCBISM_Blosum45;
00134         case '5': return strcmp(name + 6, "50") ? NULL : &NCBISM_Blosum50;
00135         case '6': return strcmp(name + 6, "62") ? NULL : &NCBISM_Blosum62;
00136         case '8': return strcmp(name + 6, "80") ? NULL : &NCBISM_Blosum80;
00137         case '9': return strcmp(name + 6, "90") ? NULL : &NCBISM_Blosum90;
00138         default:  return NULL;
00139         }
00140 
00141     case 'P': case 'p':
00142         if ( !s_NCBISM_StartsWith(name, "pam") ) {
00143             return NULL;
00144         }
00145         switch (name[3]) {
00146         case '2': return strcmp(name + 3, "250") ? NULL : &NCBISM_Pam250;
00147         case '3': return strcmp(name + 3, "30")  ? NULL : &NCBISM_Pam30;
00148         case '7': return strcmp(name + 3, "70")  ? NULL : &NCBISM_Pam70;
00149         }
00150 
00151     default:
00152         return NULL;
00153     }
00154 }
00155 
00156 

Generated on Wed Dec 9 05:27:06 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Wed Dec 09 08:18:15 2009 by modify_doxy.py rev. 173732