|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/tools/dotseq.h |
source navigation diff markup identifier search freetext search file search |
1 /* ===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information (NCBI)
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government do not place any restriction on its use or reproduction.
12 * We would, however, appreciate having the NCBI and the author cited in
13 * any work or product based on this material.
14 *
15 * Although all reasonable efforts have been taken to ensure the accuracy
16 * and reliability of the software and data, the NLM and the U.S.
17 * Government do not and cannot warrant the performance or results that
18 * may be obtained by using this software or data. The NLM and the U.S.
19 * Government disclaim all warranties, express or implied, including
20 * warranties of performance, merchantability or fitness for any particular
21 * purpose.
22
23 * ===========================================================================
24 *
25 * File Name: dotseq.h
26 *
27 * Author: Fasika Aklilu
28 *
29 * Version Creation Date: 8/9/01
30 *
31 * $Revision: 6.4 $
32 *
33 * File Description: computes local alignments for dot matrix
34 *
35 * Modifications:
36 * --------------------------------------------------------------------------
37 * Date Name Description of modification
38 * ------- ---------- -----------------------------------------------------
39
40 $Revision: 6.4 $
41 $Log: dotseq.h,v $
42 Revision 6.4 2001/08/09 16:33:18 aklilu
43 added revision
44
45 Revision 6.3 2000/07/26 18:23:10 sicotte
46 added DOT_SPI_FindBestAlnByDotPlotEx, to return rejected alignments
47
48
49 */
50
51 #ifndef _DOTSEQ_
52 #define _DOTSEQ_
53
54 #ifdef __cplusplus
55 extern "C" {
56 #endif
57
58 /****************************************************************************
59
60 INCLUDE SECTION
61 ***************************************************************************/
62
63 #include <tofasta.h>
64 #include <seqport.h>
65 #include <sequtil.h>
66 #include <sqnutils.h>
67 #include <blastpri.h>
68 #include <explore.h>
69 #include <seqmgr.h>
70 #include <lookup.h>
71 #include <jsavlt.h>
72
73 /****************************************************************************
74
75 DEFINES SECTION
76 ***************************************************************************/
77
78
79 #define UNDEFINED 25 /* amino acids undefined in BLOSUM62 */
80 #define MAX_TRIM 200
81
82
83
84
85 /****************************************************************************
86
87 DATA STRUCTURE SECTION
88 ***************************************************************************/
89
90
91 typedef struct hs_diag {
92 Int4 q_start; /* left most value on the graph */
93 Int4 s_start; /* left most value on the graph */
94 Int4 length;
95 Int4 score;
96 Int4 rdmKey;
97 } DOTDiag, PNTR DOTDiagPtr;
98
99
100
101 /* coordinates of old diags -used in history binary tree */
102
103 typedef struct hist {
104 Int4 diag_constant;
105 Int4 q_stop;
106 } DOTHist, PNTR DOTHistPtr;
107
108
109
110 /* main struct */
111
112 typedef struct mainseqinfo {
113 Int4Ptr PNTR matrix; /* dna matrix */
114 Int2 maxscore; /* highest matrix score */
115 Int2 minscore;/* lowest matrix score */
116 Boolean is_na;
117 Int4 qlen; /* length of query sequence */
118 Int4 slen; /* length of subject sequence */
119 Uint1 qstrand; /* strand of query */
120 Uint1 sstrand; /* strand of subject */
121 BioseqPtr qbsp; /* query bioseq */
122 BioseqPtr sbsp; /* subject bioseq */
123 SeqLocPtr qslp; /* query seqloc pointer */
124 SeqLocPtr sslp; /* subject seqloc pointer */
125 Int4 q_start; /* left position on query bioseq */
126 Int4 q_stop; /* right position on query bioseq */
127 Int4 s_start; /* left position on subject bioseq */
128 Int4 s_stop;/* right position on subject bioseq */
129 Uint1Ptr qseq; /* query sequence buffer */
130 Uint1Ptr sseq; /* subject sequence buffer */
131 CharPtr qname; /* query accession */
132 CharPtr sname;/* subject accession */
133 /* hash value */
134 Int4 word_size; /* size of hash table index */
135 /* binary tree data */
136 Int4 cutoff_score; /* cutoff to store hits */
137 Avl_TreePtr tree; /* binary tree for collecting hits */
138 Boolean first_pass;
139 Int4Ptr score_array;/* array by score for threshold ramp */
140 Int4 unique; /* binary tree variable */
141 Int4 tree_limit; /* upper limit for size of binary tree */
142 /* sorted diag data */
143 DOTDiagPtr PNTR hitlist; /* dotseq output data -- array of hits */
144 Int4 index; /* total number of stored hits in array */
145 } DOTMainData, PNTR DOTMainDataPtr;
146
147
148 typedef struct dotdata {
149 Uint1 xstrand;
150 Uint1 ystrand;
151 Int4 xstart;
152 Int4 xstop;
153 Int4 ystart;
154 Int4 ystop;
155 Int4 index;
156 DOTDiagPtr PNTR hitlist;
157 } DOTData, PNTR DOTDataPtr;
158
159 /* information for history binary tree */
160
161 typedef struct info{
162 DOTMainDataPtr mip;
163 Uint1Ptr qseq;
164 Uint1Ptr sseq;
165 Int4 q_pos;
166 Int4 s_pos;
167 Int4 wordsize;
168 Avl_TreePtr tree;
169 Boolean first_pass;
170 } DOTInfo, PNTR DOTInfoPtr;
171
172
173
174 /****************************************************************************
175
176 FUNCTION DECLARATIONS
177 ***************************************************************************/
178
179 /* Function: Compute all matches between two sequences. Input: 2 bioseqptrs.
180 Returns: DOTMainDataPtr with hitlist structure with start/stops in bioseq coordinates
181 */
182 DOTMainDataPtr DOT_CreateAndStore (DOTMainDataPtr mip, BioseqPtr qbsp, BioseqPtr sbsp, Int4 q_start, Int4 q_stop, Int4 s_start, Int4 s_stop, Int4 word_size, Int4 tree_limit, Boolean initialize);
183 /* Function: Compute all matches between two sequences. Input: 2 seqlocptrs (can specify plus or minus strand in slp).
184 Returns: Filled DOTMainDataPtr with hitlist structure with start/stops in bioseq coordinates
185 */
186 DOTMainDataPtr DOT_CreateAndStorebyLoc (SeqLocPtr slp1, SeqLocPtr slp2, Int4 word_size, Int4 tree_limit);
187 Int2 DOT_BuildHitList(DOTMainDataPtr mip, Boolean do_sort, Boolean do_countscore);
188 Boolean DOT_GetSeqs (DOTMainDataPtr mip, Boolean is_zoom);
189 Int2 DOT_FreeMainInfo(DOTMainDataPtr mip);
190 Int2 DOT_FreeMainInfoPtrEx (DOTMainDataPtr mip);
191 Int2 DOT_FreeHitsArray (DOTDiagPtr PNTR hitlist, Int4 index);
192 Boolean DOT_GetSeqs (DOTMainDataPtr mip, Boolean is_zoom);
193 extern DOTMainDataPtr DOT_InitMainInfo (DOTMainDataPtr mip, BioseqPtr qbsp, BioseqPtr sbsp, Int4 word_size, Int4 tree_limit, Int4 qstart, Int4 qstop, Int4 sstart, Int4 sstop);
194 SeqAlignPtr DOT_SPI_FindBestAlnByDotPlot(SeqLocPtr slp1, SeqLocPtr slp2, Int4 wordsize, Int4 num_hits);
195 extern Uint2 DOT_AttachSeqAnnotToSeqEntry (Uint2 entityID, SeqAnnotPtr sap, BioseqPtr bsp);
196 extern Int4Ptr PNTR DOT_DNAScoringMatrix(Int4 mismatch, Int4 reward,Int4 alsize);
197
198 #ifdef __cplusplus
199 }
200 #endif
201
202 #endif /* ndef _DOTSEQ_ */
203 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |