NCBI C Toolkit Cross Reference

C/tools/bandalgn.h


  1 /* ===========================================================================
  2 *
  3 *                            PUBLIC DOMAIN NOTICE
  4 *               National Center for Biotechnology Information
  5 *
  6 *  This software/database is a "United States Government Work" under the
  7 *  terms of the United States Copyright Act.  It was written as part of
  8 *  the author's official duties as a United States Government employee and
  9 *  thus cannot be copyrighted.  This software/database is freely available
 10 *  to the public for use. The National Library of Medicine and the U.S.
 11 *  Government have not placed any restriction on its use or reproduction.
 12 *
 13 *  Although all reasonable efforts have been taken to ensure the accuracy
 14 *  and reliability of the software and data, the NLM and the U.S.
 15 *  Government do not and cannot warrant the performance or results that
 16 *  may be obtained by using this software or data. The NLM and the U.S.
 17 *  Government disclaim all warranties, express or implied, including
 18 *  warranties of performance, merchantability or fitness for any particular
 19 *  purpose.
 20 *
 21 *  Please cite the author in any work or product based on this material.
 22 *
 23 * ===========================================================================*/
 24 /*****************************************************************************
 25 
 26 File name: bandalgn.h
 27 
 28 Author: Gennadiy Savchuk, Jinqhui Zhang, Tom Madden
 29 
 30 Contents: prototypes to perform a global gapped alignment on two sequences.
 31 
 32 ****************************************************************************/
 33 /* $Revision: 6.7 $ 
 34 * $Log: bandalgn.h,v $
 35 * Revision 6.7  2000/07/26 17:26:25  lewisg
 36 * fix code for c++ inclusion
 37 *
 38 * Revision 6.6  2000/02/10 22:47:07  vakatov
 39 * DLL'zation for MSVC on PC, Win-NT
 40 *
 41 * Revision 6.5  1999/03/17 16:49:10  madden
 42 * Removed comment within comment
 43 *
 44 * Revision 6.4  1998/01/16 21:29:45  chappey
 45 * Remove function CC_GetExtremes and use now SeqAlignStart, SeqAlignStop in salsap.c
 46 *
 47  * Revision 6.3  1997/10/22 14:41:39  chappey
 48  * added CC_GetAlignExtremes, ChangeGlobalBandMatrix, CC_ExtendSeqAlign
 49  *
 50  * Revision 6.2  1997/10/02 16:18:20  tatiana
 51  * *** empty log message ***
 52  *
 53  * Revision 6.1  1997/10/02 15:17:15  tatiana
 54  * global align utility added
 55  *
 56  * Revision 6.0  1997/08/25 18:52:15  madden
 57  * Revision changed to 6.0
 58  *
 59  * Revision 1.3  1997/06/23 16:16:13  tatiana
 60  * GlobalBandAlign struct changed to use SeqLocs instead of SeqIds
 61  *
 62  * Revision 1.2  1997/03/05  17:31:21  savchuk
 63  * definition of data_t has been moved from bandalgn.c
 64  *
 65  * Revision 1.1  1997/01/22  14:11:05  madden
 66  * Initial revision
 67  *
 68 */
 69 
 70 #ifndef _G_BAND_H_
 71 #define _G_BAND_H_
 72 
 73 #include <ncbi.h>
 74 #include <gapxdrop.h>
 75 #include <blastkar.h>
 76 #include <seqport.h>
 77 #include <blast.h>
 78 
 79 
 80 #undef NLM_EXTERN
 81 #ifdef NLM_IMPORT
 82 #define NLM_EXTERN NLM_IMPORT
 83 #else
 84 #define NLM_EXTERN extern
 85 #endif
 86 
 87 #ifdef __cplusplus
 88 extern "C" {
 89 #endif
 90 
 91 
 92 #define MININT -999999
 93 #define MAXINT 9999999
 94 #define SMBAND 400
 95 /* Should this be removed. */
 96 #define BND_DIGIT ((FloatHi)1.0)
 97 
 98 #define PSU_MATRIX_SIZE 128
 99 
100 #ifdef notdef
101 enum {v,h};
102 #endif
103 
104 typedef struct DP {
105   Int4 CC, DD, CP, DPDP, FF, FP;
106 } PNTR dp_ptr, dp_node;
107 
108 typedef struct {
109   dp_ptr CD;
110   Int4 IP;
111   Int4Ptr MP[4];                /* save crossing points */
112                                 /* 0: rep, 1: del, 2: ins */
113   Uint1Ptr MT[4];
114   Int4Ptr FP;                   /* forward dividing points */
115   Uint1Ptr FT;
116   Int4Ptr PNTR w;         /* w = W */
117 
118   Int4 g, zzh, m, rr;          /* g = G, zzh = H, m = g+zzh */
119 
120   Int4 leggA, leggB, reggA, reggB, leghA, leghB, reghA, reghB;
121   Int4Ptr sapp, sapp0;          /* Current script append ptr */
122   Int4  last;                   /* Last script op appended */
123   Int1Ptr PNTR state;
124 } data_t;
125 
126 #define gap(k) ((k) <= 0 ? 0 : (g + zzh * (k))) /* k-symbol indel cost */
127 
128 /* k-symbol indel cost */
129 #define _gap(k) ((k) <= 0 ? 0 : (data->g + data->zzh * (k)))
130 
131 /* k-symbol indel cost */
132 #define gap_(k) ((k) <= 0 ? 0 : (data.g + data.zzh * (k)))
133 
134 /* Append "Delete k" op . Not used*/
135 #define _BND_DEL(k) \
136 last = (last < 0) ? (sapp[-1] -= (k)) : (*sapp++ = -(k));
137 
138 /* Append "Delete k" op */
139 #define _DEL(k) \
140 data->last = (data->last < 0) ? (data->sapp[-1] -= (k)) : (*data->sapp++ = -(k));
141 
142 /* Append "Delete k" op */
143 #define DEL_(k) \
144 data.last = (data.last < 0) ? (data.sapp[-1] -= (k)) : (*data.sapp++ = -(k));
145 
146 /* Append "Insert k" op  Not used*/
147 #define BND_INS(k) \
148 last = (last > 0) ? (sapp[-1] += (k)) : (*sapp++ = (k));
149 
150 /* Append "Insert k" op */
151 #define _INS(k) \
152 data->last = (data->last > 0) ? (data->sapp[-1] += (k)) : (*data->sapp++ = (k));
153 
154 /* Append "Insert k" op */
155 #define INS_(k) \
156 data.last = (data.last > 0) ? (data.sapp[-1] += (k)) : (*data.sapp++ = (k));
157 
158 /* Append "Replace" op */
159 #define REP \
160 {last = *sapp++ = 0;}
161 
162 /* Append "Replace" op */
163 #define _REP \
164 {data->last = *data->sapp++ = 0;}
165 
166 /* Append "Replace" op */
167 #define REP_ \
168 {data.last = *data.sapp++ = 0;}
169 
170 #define REPP \
171 {*sapp++ = MININT; last = 0;}
172 
173 #define _REPP \
174 {*data->sapp++ = MININT; data->last = 0;}
175 
176 #define REPP_ \
177 {*data.sapp++ = MININT; data.last = 0;}
178 
179 /***********************************************************
180 *
181 *       PSUGapOptions are used for the serise banded alignment 
182 *       (global and local) with various end gap penalty. This 
183 *       option works for DNA-DNA, protein-protein and DNA-protein 
184 *       alignment. gshift is the penalty for frame shift, which 
185 *       only works for DNA-protein
186 *       matrix was set only for DNA-protein and protein-
187 *       protein alignment
188 *
189 ***********************************************************/ 
190 typedef struct psu_gapped_options {
191   Int4 gopen;         /* gap open */
192   Int4 gext;          /* gap extend penalties */
193   Int4 gshift;        /* frame-shift penalty, only applies to DNA-protein alignment */
194   /* low and up are used to calculate start_diag and width, which are calculated
195      differently in local and global alignments. 
196   */
197   Int4 low, up;
198   Int4 start_diag;    /* start diagonal of band */
199   Int4 width;         /* width for band alignment */
200   Int4 lg1_ext;       /*the left end gap extension penalty for the first sequence */
201   Int4 rg1_ext;       /*the right end gap ext. penalty for the first sequence */
202   Int4 lg2_ext;       /*the left end gap extension penalty for the second sequence */
203   Int4 rg2_ext;       /*the right end gap ext. penalty for the second sequence */
204   Int4 lg1_open;
205   Int4 lg2_open;
206   Int4 rg1_open;
207   Int4 rg2_open;
208 } PSUGapOptions, PNTR PSUGapOptionsPtr;
209 
210 /*
211         Functions to create and delte the PSUGapOptions, as well as
212         set default values.
213 */
214 NLM_EXTERN PSUGapOptionsPtr PSUGapOptionsDelete(PSUGapOptionsPtr options);
215 NLM_EXTERN PSUGapOptionsPtr PSUGapOptionsCreate(Uint1 search_type);
216 
217 
218 
219 /* Search choices for global banded alignments are (global search type): */
220 #define G_BAND_LINEAR           0  /*global banded alignemnt in linear space*/
221 #define G_BAND_QUADRATIC        1  /*global banded alignment in quadratic space*/
222 #define G_BAND_LGAP             2  /*global banded alignment in linear space with 
223                                     options for the four end gap penalties*/
224 #define G_BAND_QGAP             3  /*global banded alignment in quadratic space, 
225                                     with options for the four end gap penalties */
226 #define G_BAND_L3GAP            4  /*global banded  alignment in linear space, 
227                                      with THREE gap penalties and options for 
228                                      setting end gap penalties. Not sure if it 
229                                      works ?*/
230 #define G_BAND_Q3GAP            5   /*same as 4 except it runs in quadratic space 
231                                      it WORKS! */
232 
233 /* Search choices for local banded alignments are (local search type): */
234 #define L_BAND_LINEAR           10  /*local banded alignemnt in linear space*/
235 #define L_BAND_QUADRATIC        11  /*local banded alignment in quadratic space*/
236 #define L_BAND_LGAP                     12  /*local banded alignment in linear space with 
237                                     options for the four end gap penalties*/
238 #define L_BAND_QGAP                     13  /*local banded alignment in quadratic space, 
239                                     with options for the four end gap penalties */
240 #define L_BAND_L3GAP            14  /*local banded  alignment in linear space, 
241                                      with THREE gap penalties and options for 
242                                      setting end gap penalties. Not sure if it 
243                                      works ?*/
244 #define L_BAND_Q3GAP            15   /*same as 4 except it runs in quadratic space 
245                                      it WORKS! */
246                                           
247 
248 /*************************************************************************
249 *
250 *       The structure that is passed in during the call to Nlm_GlobalBand
251 *
252 *****************************************************************************/
253 typedef struct global_band_struct {
254         /* The two sequences to be aligned. */
255         Uint1Ptr        seq1,
256                         seq2;
257         Int4            seq1_length,    /* length of sequence 1. */
258                         seq2_length;    /* length of sequence 2. */
259         /* used to identify sequence in GlobalBandToSeqAlign if filled in. */
260         SeqLocPtr       seqloc1,                /* SeqLoc for the first sequence. */
261                         seqloc2;                /* SeqLoc for the second. */
262 
263         Uint1           search_type; /* as in global search_type above */
264 
265         Int4Ptr PNTR matrix;  /* scoring matrix, provided by and deleted by caller */
266         PSUGapOptionsPtr        options;        /* parameters for search. */
267         /* GapXEditBlockPtr filled in by TracebackToGapXEditBlock */
268         /* A SeqAlign can be made from this. */
269         GapXEditBlockPtr edit_block;
270         Int4 score;             /* score of the alignment */
271         Int4 alignment_length;  /* length of the alignment. */
272 } GlobalBandStruct, PNTR GlobalBandStructPtr;
273 
274 /*
275         Deletes the GlobalBandStruct, including the options.
276         Does not delete the sequence matrix, or the ID's.
277 */
278 NLM_EXTERN GlobalBandStructPtr GlobalBandStructDelete(GlobalBandStructPtr gbsp);
279 
280 /*
281         Creates the GlobalBandStructPtr, needed to run GlobalBandToEditScript,
282         with the default values.
283 */
284 NLM_EXTERN GlobalBandStructPtr GlobalBandStructCreate(Uint1 search_type);
285 
286 /*
287         Performs a global alignment, producing a SeqAlign.
288 */
289 NLM_EXTERN SeqAlignPtr GlobalBandToSeqAlign(GlobalBandStructPtr gbsp);
290 
291 /*
292         Performs a global alignment, producing an EditBlock, which
293         can be made into a SeqAlign.
294 */
295 NLM_EXTERN Boolean GlobalBandToEditBlock(GlobalBandStructPtr gbsp);
296 
297 
298 /*************************************************************************
299 *
300 *       The structure that is passed in during the call to Nlm_GlobalBand
301 *
302 *****************************************************************************/
303 typedef struct local_band_struct {
304         /* The two sequences to be aligned. */
305         Uint1Ptr        seq1, seq2;
306         Int4            seq1_length,    /* length of sequence 1. */
307                                 seq2_length;    /* length of sequence 2. */
308         /* used to identify sequence in GlobalBandToSeqAlign if filled in. */
309         SeqIdPtr        seqloc1,                /* SeqLoc for the first sequence. */
310                                 seqloc2;                /* SeqLoc for the second. */
311 
312         Uint1           search_type; /* as in local search_type above */
313 
314         Int4Ptr PNTR matrix;  /* scoring matrix provided by and deleted by caller */
315         PSUGapOptionsPtr        options;        /* parameters for search. */
316         /* GapXEditBlockPtr filled in by TracebackToGapXEditBlock */
317         /* A SeqAlign can be made from this. */
318         GapXEditBlockPtr edit_block;
319         Int4            score;          /* score of the alignment */
320         Int4            seq1_start,     /* start of sequence one's alignment. */
321                                 seq2_start,     /* start of sequence two's alignment. */
322                                 seq1_end,       /* end of sequence one's alignment. */
323                                 seq2_end;       /* end of sequence two's alignment. */
324 } LocalBandStruct, PNTR LocalBandStructPtr;
325 
326 
327 /*
328         Deletes the LocalBandStruct, including the options.
329         Does not delete the sequence matrix, or the ID's.
330 */
331 NLM_EXTERN LocalBandStructPtr LocalBandStructDelete(LocalBandStructPtr gbsp);
332 
333 /*
334         Creates the LocalBandStructPtr, needed to run LocalBandToEditScript,
335         with the default values.
336 */
337 NLM_EXTERN LocalBandStructPtr LocalBandStructCreate(Uint1 search_type);
338 
339 /*
340         Performs a global alignment, producing a SeqAlign.
341 */
342 NLM_EXTERN SeqAlignPtr LocalBandToSeqAlign(LocalBandStructPtr lbsp);
343 
344 /*
345         Performs a global alignment, producing an EditBlock, which
346         can be made into a SeqAlign.
347 */
348 NLM_EXTERN Boolean LocalBandToEditBlock(LocalBandStructPtr lbsp);
349 
350 
351 /*********************************************************
352 *
353 *       Int4 gband_linear_gap(A, B, M, N, option, S, Slen)
354 *       compute the global alignment with flexible end gap 
355 *       penalty for DNA-DNA and protein protein alignment. 
356 *       The alignment is computed with linear space
357 *       This function was originally from the g_band2.c file
358 *
359 *       align the two sequences A, B
360 *       A, B starts with index 1
361 *       M, N is the length of A, B
362 *       option sets the option of the alignment, 
363 *       which includes penalties for end gaps
364 *       S is the script that contains the alignment results
365 *       Slen stores the length of the alignment (the size of S)
366 *       return the score of the alignment
367 *
368 ***********************************************************/
369 
370 extern Int4 LIBCALL gband_linear(Uint1Ptr Seq1, Uint1Ptr Seq2,
371                                  Int4 M, Int4 N,
372                                  Int4Ptr PNTR matrix,
373                                  PSUGapOptionsPtr option,
374                                  Int4Ptr S, Int4Ptr Slen);
375 
376 extern Int4 LIBCALL gband_quadratic(Uint1Ptr Seq1, Uint1Ptr Seq2,
377                                     Int4 M, Int4 N,
378                                     Int4Ptr PNTR matrix,
379                                     PSUGapOptionsPtr option,
380                                     Int4Ptr S, Int4Ptr Slen);
381 
382 extern Int4 LIBCALL gband_linear_gap(Uint1Ptr Seq1, Uint1Ptr Seq2,
383                                      Int4 M, Int4 N, 
384                                      Int4Ptr PNTR matrix,
385                                      PSUGapOptionsPtr option,
386                                      Int4Ptr S, Int4Ptr Slen);
387 
388 extern Int4 LIBCALL gband_linear_qgap(Uint1Ptr Seq1, Uint1Ptr Seq2,
389                                       Int4 M, Int4 N,
390                                       Int4Ptr PNTR matrix,
391                                       PSUGapOptionsPtr option,
392                                       Int4Ptr S, Int4Ptr Slen);
393 
394 extern Int4 LIBCALL gband_l3gap(Uint1Ptr Seq1, Uint1Ptr Seq2,
395                                 Int4 M, Int4 N,
396                                 Int4Ptr PNTR matrix,
397                                 PSUGapOptionsPtr option,
398                                 Int4Ptr S, Int4Ptr Slen);
399 
400 extern Int4 LIBCALL gband_q3gap(Uint1Ptr Seq1, Uint1Ptr Seq2,
401                                 Int4 M, Int4 N,
402                                 Int4Ptr PNTR matrix,
403                                 PSUGapOptionsPtr option,
404                                 Int4Ptr S, Int4Ptr Slen);
405 
406 extern Int4 BAND_LOCAL_ALIGN(Uint1Ptr A, Uint1Ptr B,
407                                      Int4 M, Int4 N, 
408                                      Int4Ptr PNTR matrix,
409                                      PSUGapOptionsPtr options,
410                                      Int4Ptr S,
411                                      Int4Ptr psi, Int4Ptr psj,
412                                      Int4Ptr pei, Int4Ptr pej,
413                                      Int4 align_type);
414 
415 
416 /**********************************************************************
417 *               Global Alignment utility functions
418 **********************************************************************/
419 
420 NLM_EXTERN void SetGlobaltOptions(GlobalBandStructPtr gbsp, Int4 lg1_ext, Int4 rg1_ext, Int4 lg2_ext, Int4 rg2_ext, Int4 lg1_open, Int4 lg2_open, Int4 rg1_open, Int4 rg2_open, Int2 gopen, Int2 gext);
421 
422 NLM_EXTERN GlobalBandStructPtr CreatBandStruct(SeqLocPtr slp1, SeqLocPtr slp2, Int4Ptr PNTR W, Boolean is_prot, Int2 method);
423                 
424 NLM_EXTERN void SetLowUpFromBlast(PSUGapOptionsPtr opt, Boolean is_prot, Int2 type, Int2 width, SeqLocPtr slp1, SeqLocPtr slp2);
425 
426 NLM_EXTERN SeqAlignPtr GlobalBandByLoc(GlobalBandStructPtr gbsp, SeqLocPtr slp1, SeqLocPtr slp2,  Boolean is_prot, Int2 band_method);
427 
428 NLM_EXTERN SeqAlignPtr ExtendSeqAlign(SeqAlignPtr seqalign, Int4 start1, Int4 start2, Int4 stop1, Int4 stop2, Int4 x1, Int4 y1, Int4 x2, Int4 y2);
429 
430 NLM_EXTERN SeqAlignPtr CC_ExtendSeqAlign(SeqAlignPtr sap, Int4 start1, Int4 start2, Int4 stop1, Int4 stop2, Int4 x1, Int4 y1, Int4 x2, Int4 y2, Uint1 strand1, Uint1 strand2);
431 
432 NLM_EXTERN void GetAlignExtremes(SeqAlignPtr seqalign, Int4Ptr xx1, Int4Ptr yy1, Int4Ptr xx2, Int4Ptr yy2);
433 
434 NLM_EXTERN Int2 ChangeGlobalBandMatrix(GlobalBandStructPtr gbsp, Boolean is_prot, CharPtr matrix_name, Int4 penalty, Int4 reward);
435 
436 
437 #ifdef __cplusplus
438 }
439 #endif
440 
441 #undef NLM_EXTERN
442 #ifdef NLM_EXPORT
443 #define NLM_EXTERN NLM_EXPORT
444 #else
445 #define NLM_EXTERN
446 #endif
447 
448 #endif
449 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.