|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/tools/bandalgn.h |
source navigation diff markup identifier search freetext search file search |
1 /* ===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================*/
24 /*****************************************************************************
25
26 File name: bandalgn.h
27
28 Author: Gennadiy Savchuk, Jinqhui Zhang, Tom Madden
29
30 Contents: prototypes to perform a global gapped alignment on two sequences.
31
32 ****************************************************************************/
33 /* $Revision: 6.7 $
34 * $Log: bandalgn.h,v $
35 * Revision 6.7 2000/07/26 17:26:25 lewisg
36 * fix code for c++ inclusion
37 *
38 * Revision 6.6 2000/02/10 22:47:07 vakatov
39 * DLL'zation for MSVC on PC, Win-NT
40 *
41 * Revision 6.5 1999/03/17 16:49:10 madden
42 * Removed comment within comment
43 *
44 * Revision 6.4 1998/01/16 21:29:45 chappey
45 * Remove function CC_GetExtremes and use now SeqAlignStart, SeqAlignStop in salsap.c
46 *
47 * Revision 6.3 1997/10/22 14:41:39 chappey
48 * added CC_GetAlignExtremes, ChangeGlobalBandMatrix, CC_ExtendSeqAlign
49 *
50 * Revision 6.2 1997/10/02 16:18:20 tatiana
51 * *** empty log message ***
52 *
53 * Revision 6.1 1997/10/02 15:17:15 tatiana
54 * global align utility added
55 *
56 * Revision 6.0 1997/08/25 18:52:15 madden
57 * Revision changed to 6.0
58 *
59 * Revision 1.3 1997/06/23 16:16:13 tatiana
60 * GlobalBandAlign struct changed to use SeqLocs instead of SeqIds
61 *
62 * Revision 1.2 1997/03/05 17:31:21 savchuk
63 * definition of data_t has been moved from bandalgn.c
64 *
65 * Revision 1.1 1997/01/22 14:11:05 madden
66 * Initial revision
67 *
68 */
69
70 #ifndef _G_BAND_H_
71 #define _G_BAND_H_
72
73 #include <ncbi.h>
74 #include <gapxdrop.h>
75 #include <blastkar.h>
76 #include <seqport.h>
77 #include <blast.h>
78
79
80 #undef NLM_EXTERN
81 #ifdef NLM_IMPORT
82 #define NLM_EXTERN NLM_IMPORT
83 #else
84 #define NLM_EXTERN extern
85 #endif
86
87 #ifdef __cplusplus
88 extern "C" {
89 #endif
90
91
92 #define MININT -999999
93 #define MAXINT 9999999
94 #define SMBAND 400
95 /* Should this be removed. */
96 #define BND_DIGIT ((FloatHi)1.0)
97
98 #define PSU_MATRIX_SIZE 128
99
100 #ifdef notdef
101 enum {v,h};
102 #endif
103
104 typedef struct DP {
105 Int4 CC, DD, CP, DPDP, FF, FP;
106 } PNTR dp_ptr, dp_node;
107
108 typedef struct {
109 dp_ptr CD;
110 Int4 IP;
111 Int4Ptr MP[4]; /* save crossing points */
112 /* 0: rep, 1: del, 2: ins */
113 Uint1Ptr MT[4];
114 Int4Ptr FP; /* forward dividing points */
115 Uint1Ptr FT;
116 Int4Ptr PNTR w; /* w = W */
117
118 Int4 g, zzh, m, rr; /* g = G, zzh = H, m = g+zzh */
119
120 Int4 leggA, leggB, reggA, reggB, leghA, leghB, reghA, reghB;
121 Int4Ptr sapp, sapp0; /* Current script append ptr */
122 Int4 last; /* Last script op appended */
123 Int1Ptr PNTR state;
124 } data_t;
125
126 #define gap(k) ((k) <= 0 ? 0 : (g + zzh * (k))) /* k-symbol indel cost */
127
128 /* k-symbol indel cost */
129 #define _gap(k) ((k) <= 0 ? 0 : (data->g + data->zzh * (k)))
130
131 /* k-symbol indel cost */
132 #define gap_(k) ((k) <= 0 ? 0 : (data.g + data.zzh * (k)))
133
134 /* Append "Delete k" op . Not used*/
135 #define _BND_DEL(k) \
136 last = (last < 0) ? (sapp[-1] -= (k)) : (*sapp++ = -(k));
137
138 /* Append "Delete k" op */
139 #define _DEL(k) \
140 data->last = (data->last < 0) ? (data->sapp[-1] -= (k)) : (*data->sapp++ = -(k));
141
142 /* Append "Delete k" op */
143 #define DEL_(k) \
144 data.last = (data.last < 0) ? (data.sapp[-1] -= (k)) : (*data.sapp++ = -(k));
145
146 /* Append "Insert k" op Not used*/
147 #define BND_INS(k) \
148 last = (last > 0) ? (sapp[-1] += (k)) : (*sapp++ = (k));
149
150 /* Append "Insert k" op */
151 #define _INS(k) \
152 data->last = (data->last > 0) ? (data->sapp[-1] += (k)) : (*data->sapp++ = (k));
153
154 /* Append "Insert k" op */
155 #define INS_(k) \
156 data.last = (data.last > 0) ? (data.sapp[-1] += (k)) : (*data.sapp++ = (k));
157
158 /* Append "Replace" op */
159 #define REP \
160 {last = *sapp++ = 0;}
161
162 /* Append "Replace" op */
163 #define _REP \
164 {data->last = *data->sapp++ = 0;}
165
166 /* Append "Replace" op */
167 #define REP_ \
168 {data.last = *data.sapp++ = 0;}
169
170 #define REPP \
171 {*sapp++ = MININT; last = 0;}
172
173 #define _REPP \
174 {*data->sapp++ = MININT; data->last = 0;}
175
176 #define REPP_ \
177 {*data.sapp++ = MININT; data.last = 0;}
178
179 /***********************************************************
180 *
181 * PSUGapOptions are used for the serise banded alignment
182 * (global and local) with various end gap penalty. This
183 * option works for DNA-DNA, protein-protein and DNA-protein
184 * alignment. gshift is the penalty for frame shift, which
185 * only works for DNA-protein
186 * matrix was set only for DNA-protein and protein-
187 * protein alignment
188 *
189 ***********************************************************/
190 typedef struct psu_gapped_options {
191 Int4 gopen; /* gap open */
192 Int4 gext; /* gap extend penalties */
193 Int4 gshift; /* frame-shift penalty, only applies to DNA-protein alignment */
194 /* low and up are used to calculate start_diag and width, which are calculated
195 differently in local and global alignments.
196 */
197 Int4 low, up;
198 Int4 start_diag; /* start diagonal of band */
199 Int4 width; /* width for band alignment */
200 Int4 lg1_ext; /*the left end gap extension penalty for the first sequence */
201 Int4 rg1_ext; /*the right end gap ext. penalty for the first sequence */
202 Int4 lg2_ext; /*the left end gap extension penalty for the second sequence */
203 Int4 rg2_ext; /*the right end gap ext. penalty for the second sequence */
204 Int4 lg1_open;
205 Int4 lg2_open;
206 Int4 rg1_open;
207 Int4 rg2_open;
208 } PSUGapOptions, PNTR PSUGapOptionsPtr;
209
210 /*
211 Functions to create and delte the PSUGapOptions, as well as
212 set default values.
213 */
214 NLM_EXTERN PSUGapOptionsPtr PSUGapOptionsDelete(PSUGapOptionsPtr options);
215 NLM_EXTERN PSUGapOptionsPtr PSUGapOptionsCreate(Uint1 search_type);
216
217
218
219 /* Search choices for global banded alignments are (global search type): */
220 #define G_BAND_LINEAR 0 /*global banded alignemnt in linear space*/
221 #define G_BAND_QUADRATIC 1 /*global banded alignment in quadratic space*/
222 #define G_BAND_LGAP 2 /*global banded alignment in linear space with
223 options for the four end gap penalties*/
224 #define G_BAND_QGAP 3 /*global banded alignment in quadratic space,
225 with options for the four end gap penalties */
226 #define G_BAND_L3GAP 4 /*global banded alignment in linear space,
227 with THREE gap penalties and options for
228 setting end gap penalties. Not sure if it
229 works ?*/
230 #define G_BAND_Q3GAP 5 /*same as 4 except it runs in quadratic space
231 it WORKS! */
232
233 /* Search choices for local banded alignments are (local search type): */
234 #define L_BAND_LINEAR 10 /*local banded alignemnt in linear space*/
235 #define L_BAND_QUADRATIC 11 /*local banded alignment in quadratic space*/
236 #define L_BAND_LGAP 12 /*local banded alignment in linear space with
237 options for the four end gap penalties*/
238 #define L_BAND_QGAP 13 /*local banded alignment in quadratic space,
239 with options for the four end gap penalties */
240 #define L_BAND_L3GAP 14 /*local banded alignment in linear space,
241 with THREE gap penalties and options for
242 setting end gap penalties. Not sure if it
243 works ?*/
244 #define L_BAND_Q3GAP 15 /*same as 4 except it runs in quadratic space
245 it WORKS! */
246
247
248 /*************************************************************************
249 *
250 * The structure that is passed in during the call to Nlm_GlobalBand
251 *
252 *****************************************************************************/
253 typedef struct global_band_struct {
254 /* The two sequences to be aligned. */
255 Uint1Ptr seq1,
256 seq2;
257 Int4 seq1_length, /* length of sequence 1. */
258 seq2_length; /* length of sequence 2. */
259 /* used to identify sequence in GlobalBandToSeqAlign if filled in. */
260 SeqLocPtr seqloc1, /* SeqLoc for the first sequence. */
261 seqloc2; /* SeqLoc for the second. */
262
263 Uint1 search_type; /* as in global search_type above */
264
265 Int4Ptr PNTR matrix; /* scoring matrix, provided by and deleted by caller */
266 PSUGapOptionsPtr options; /* parameters for search. */
267 /* GapXEditBlockPtr filled in by TracebackToGapXEditBlock */
268 /* A SeqAlign can be made from this. */
269 GapXEditBlockPtr edit_block;
270 Int4 score; /* score of the alignment */
271 Int4 alignment_length; /* length of the alignment. */
272 } GlobalBandStruct, PNTR GlobalBandStructPtr;
273
274 /*
275 Deletes the GlobalBandStruct, including the options.
276 Does not delete the sequence matrix, or the ID's.
277 */
278 NLM_EXTERN GlobalBandStructPtr GlobalBandStructDelete(GlobalBandStructPtr gbsp);
279
280 /*
281 Creates the GlobalBandStructPtr, needed to run GlobalBandToEditScript,
282 with the default values.
283 */
284 NLM_EXTERN GlobalBandStructPtr GlobalBandStructCreate(Uint1 search_type);
285
286 /*
287 Performs a global alignment, producing a SeqAlign.
288 */
289 NLM_EXTERN SeqAlignPtr GlobalBandToSeqAlign(GlobalBandStructPtr gbsp);
290
291 /*
292 Performs a global alignment, producing an EditBlock, which
293 can be made into a SeqAlign.
294 */
295 NLM_EXTERN Boolean GlobalBandToEditBlock(GlobalBandStructPtr gbsp);
296
297
298 /*************************************************************************
299 *
300 * The structure that is passed in during the call to Nlm_GlobalBand
301 *
302 *****************************************************************************/
303 typedef struct local_band_struct {
304 /* The two sequences to be aligned. */
305 Uint1Ptr seq1, seq2;
306 Int4 seq1_length, /* length of sequence 1. */
307 seq2_length; /* length of sequence 2. */
308 /* used to identify sequence in GlobalBandToSeqAlign if filled in. */
309 SeqIdPtr seqloc1, /* SeqLoc for the first sequence. */
310 seqloc2; /* SeqLoc for the second. */
311
312 Uint1 search_type; /* as in local search_type above */
313
314 Int4Ptr PNTR matrix; /* scoring matrix provided by and deleted by caller */
315 PSUGapOptionsPtr options; /* parameters for search. */
316 /* GapXEditBlockPtr filled in by TracebackToGapXEditBlock */
317 /* A SeqAlign can be made from this. */
318 GapXEditBlockPtr edit_block;
319 Int4 score; /* score of the alignment */
320 Int4 seq1_start, /* start of sequence one's alignment. */
321 seq2_start, /* start of sequence two's alignment. */
322 seq1_end, /* end of sequence one's alignment. */
323 seq2_end; /* end of sequence two's alignment. */
324 } LocalBandStruct, PNTR LocalBandStructPtr;
325
326
327 /*
328 Deletes the LocalBandStruct, including the options.
329 Does not delete the sequence matrix, or the ID's.
330 */
331 NLM_EXTERN LocalBandStructPtr LocalBandStructDelete(LocalBandStructPtr gbsp);
332
333 /*
334 Creates the LocalBandStructPtr, needed to run LocalBandToEditScript,
335 with the default values.
336 */
337 NLM_EXTERN LocalBandStructPtr LocalBandStructCreate(Uint1 search_type);
338
339 /*
340 Performs a global alignment, producing a SeqAlign.
341 */
342 NLM_EXTERN SeqAlignPtr LocalBandToSeqAlign(LocalBandStructPtr lbsp);
343
344 /*
345 Performs a global alignment, producing an EditBlock, which
346 can be made into a SeqAlign.
347 */
348 NLM_EXTERN Boolean LocalBandToEditBlock(LocalBandStructPtr lbsp);
349
350
351 /*********************************************************
352 *
353 * Int4 gband_linear_gap(A, B, M, N, option, S, Slen)
354 * compute the global alignment with flexible end gap
355 * penalty for DNA-DNA and protein protein alignment.
356 * The alignment is computed with linear space
357 * This function was originally from the g_band2.c file
358 *
359 * align the two sequences A, B
360 * A, B starts with index 1
361 * M, N is the length of A, B
362 * option sets the option of the alignment,
363 * which includes penalties for end gaps
364 * S is the script that contains the alignment results
365 * Slen stores the length of the alignment (the size of S)
366 * return the score of the alignment
367 *
368 ***********************************************************/
369
370 extern Int4 LIBCALL gband_linear(Uint1Ptr Seq1, Uint1Ptr Seq2,
371 Int4 M, Int4 N,
372 Int4Ptr PNTR matrix,
373 PSUGapOptionsPtr option,
374 Int4Ptr S, Int4Ptr Slen);
375
376 extern Int4 LIBCALL gband_quadratic(Uint1Ptr Seq1, Uint1Ptr Seq2,
377 Int4 M, Int4 N,
378 Int4Ptr PNTR matrix,
379 PSUGapOptionsPtr option,
380 Int4Ptr S, Int4Ptr Slen);
381
382 extern Int4 LIBCALL gband_linear_gap(Uint1Ptr Seq1, Uint1Ptr Seq2,
383 Int4 M, Int4 N,
384 Int4Ptr PNTR matrix,
385 PSUGapOptionsPtr option,
386 Int4Ptr S, Int4Ptr Slen);
387
388 extern Int4 LIBCALL gband_linear_qgap(Uint1Ptr Seq1, Uint1Ptr Seq2,
389 Int4 M, Int4 N,
390 Int4Ptr PNTR matrix,
391 PSUGapOptionsPtr option,
392 Int4Ptr S, Int4Ptr Slen);
393
394 extern Int4 LIBCALL gband_l3gap(Uint1Ptr Seq1, Uint1Ptr Seq2,
395 Int4 M, Int4 N,
396 Int4Ptr PNTR matrix,
397 PSUGapOptionsPtr option,
398 Int4Ptr S, Int4Ptr Slen);
399
400 extern Int4 LIBCALL gband_q3gap(Uint1Ptr Seq1, Uint1Ptr Seq2,
401 Int4 M, Int4 N,
402 Int4Ptr PNTR matrix,
403 PSUGapOptionsPtr option,
404 Int4Ptr S, Int4Ptr Slen);
405
406 extern Int4 BAND_LOCAL_ALIGN(Uint1Ptr A, Uint1Ptr B,
407 Int4 M, Int4 N,
408 Int4Ptr PNTR matrix,
409 PSUGapOptionsPtr options,
410 Int4Ptr S,
411 Int4Ptr psi, Int4Ptr psj,
412 Int4Ptr pei, Int4Ptr pej,
413 Int4 align_type);
414
415
416 /**********************************************************************
417 * Global Alignment utility functions
418 **********************************************************************/
419
420 NLM_EXTERN void SetGlobaltOptions(GlobalBandStructPtr gbsp, Int4 lg1_ext, Int4 rg1_ext, Int4 lg2_ext, Int4 rg2_ext, Int4 lg1_open, Int4 lg2_open, Int4 rg1_open, Int4 rg2_open, Int2 gopen, Int2 gext);
421
422 NLM_EXTERN GlobalBandStructPtr CreatBandStruct(SeqLocPtr slp1, SeqLocPtr slp2, Int4Ptr PNTR W, Boolean is_prot, Int2 method);
423
424 NLM_EXTERN void SetLowUpFromBlast(PSUGapOptionsPtr opt, Boolean is_prot, Int2 type, Int2 width, SeqLocPtr slp1, SeqLocPtr slp2);
425
426 NLM_EXTERN SeqAlignPtr GlobalBandByLoc(GlobalBandStructPtr gbsp, SeqLocPtr slp1, SeqLocPtr slp2, Boolean is_prot, Int2 band_method);
427
428 NLM_EXTERN SeqAlignPtr ExtendSeqAlign(SeqAlignPtr seqalign, Int4 start1, Int4 start2, Int4 stop1, Int4 stop2, Int4 x1, Int4 y1, Int4 x2, Int4 y2);
429
430 NLM_EXTERN SeqAlignPtr CC_ExtendSeqAlign(SeqAlignPtr sap, Int4 start1, Int4 start2, Int4 stop1, Int4 stop2, Int4 x1, Int4 y1, Int4 x2, Int4 y2, Uint1 strand1, Uint1 strand2);
431
432 NLM_EXTERN void GetAlignExtremes(SeqAlignPtr seqalign, Int4Ptr xx1, Int4Ptr yy1, Int4Ptr xx2, Int4Ptr yy2);
433
434 NLM_EXTERN Int2 ChangeGlobalBandMatrix(GlobalBandStructPtr gbsp, Boolean is_prot, CharPtr matrix_name, Int4 penalty, Int4 reward);
435
436
437 #ifdef __cplusplus
438 }
439 #endif
440
441 #undef NLM_EXTERN
442 #ifdef NLM_EXPORT
443 #define NLM_EXTERN NLM_EXPORT
444 #else
445 #define NLM_EXTERN
446 #endif
447
448 #endif
449 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |