|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/api/aceread.h |
source navigation diff markup identifier search freetext search file search |
1 #ifndef API_ACEREAD__H
2 #define API_ACEREAD__H
3
4 /*
5 * $Id: aceread.h,v 1.12 2008/12/02 18:58:24 bollin Exp $
6 *
7 * ===========================================================================
8 *
9 * PUBLIC DOMAIN NOTICE
10 * National Center for Biotechnology Information
11 *
12 * This software/database is a "United States Government Work" under the
13 * terms of the United States Copyright Act. It was written as part of
14 * the author's official duties as a United States Government employee and
15 * thus cannot be copyrighted. This software/database is freely available
16 * to the public for use. The National Library of Medicine and the U.S.
17 * Government have not placed any restriction on its use or reproduction.
18 *
19 * Although all reasonable efforts have been taken to ensure the accuracy
20 * and reliability of the software and data, the NLM and the U.S.
21 * Government do not and cannot warrant the performance or results that
22 * may be obtained by using this software or data. The NLM and the U.S.
23 * Government disclaim all warranties, express or implied, including
24 * warranties of performance, merchantability or fitness for any particular
25 * purpose.
26 *
27 * Please cite the author in any work or product based on this material.
28 *
29 * ===========================================================================
30 *
31 * Authors: Colleen Bollin
32 *
33 */
34
35 #include <util/creaders/creaders_export.h>
36
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40
41 /* defines from ncbistd.h */
42 #ifndef FAR
43 #define FAR
44 #endif
45 #ifndef PASCAL
46 #define PASCAL
47 #endif
48 #ifndef EXPORT
49 #define EXPORT
50 #endif
51
52 #ifndef PASCAL
53 #define PASCAL
54 #endif
55 #ifndef EXPORT
56 #define EXPORT
57 #endif
58
59 #if defined (WIN32)
60 # define ASSEMBLY_CALLBACK __stdcall
61 #else
62 # define ASSEMBLY_CALLBACK
63 #endif
64
65 typedef struct gapinfo {
66 int num_gaps;
67 int *gap_offsets;
68 } SGapInfo, * TGapInfoPtr;
69
70 extern TGapInfoPtr GapInfoNew (void);
71 extern void GapInfoFree (TGapInfoPtr g);
72 extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char *gap_chars);
73 extern void RemoveGapCharsFromSequenceString (char *seq_str, char *gap_chars);
74 extern int SeqPosFromTilingPos (int tiling_pos, TGapInfoPtr gap_info);
75 extern int TilingPosFromSeqPos (int seq_pos, TGapInfoPtr gap_info);
76
77 typedef struct SContigRead {
78 char * read_id;
79 int ti;
80 char * srr;
81 char * read_seq;
82 int read_len;
83 char is_complement;
84 int cons_start;
85 int cons_stop;
86 int read_start;
87 int read_stop;
88 int read_assem_start;
89 int read_assem_stop;
90 int tiling_start;
91 int tiling_stop;
92 TGapInfoPtr gaps;
93 int valid;
94 int local;
95 char * tag; /* notes, comments, annotation for the read */
96 /* quality scores - these are optional, used when recalculating consensus sequence */
97 int * qual_scores;
98 int num_qual_scores;
99 } SContigRead, * TContigReadPtr;
100
101 extern TContigReadPtr ContigReadNew (void);
102 extern void ContigReadFree (TContigReadPtr r);
103
104 typedef struct SConsensusReadAln {
105 int numseg;
106 int *cons_starts;
107 int *read_starts;
108 int *lens;
109 char is_complement;
110 } SConsensusReadAln, * TConsensusReadAlnPtr;
111
112 extern TConsensusReadAlnPtr ConsensusReadAlnNew (int numseg);
113 extern TConsensusReadAlnPtr ConsensusReadAlnFree (TConsensusReadAlnPtr a);
114 extern TConsensusReadAlnPtr GetConsensusReadAln (char *consensus_seq, TContigReadPtr read);
115
116
117 typedef struct SBaseSeg {
118 char * read_id;
119 int cons_start;
120 int cons_stop;
121 } SBaseSeg, * TBaseSegPtr;
122
123 extern TBaseSegPtr BaseSegNew (void);
124 extern void BaseSegFree (TBaseSegPtr b);
125
126 typedef struct SContig {
127 char * consensus_id;
128 char * consensus_seq;
129 int consensus_assem_len;
130 int consensus_seq_len;
131 char is_complement;
132 int num_qual_scores;
133 int * qual_scores;
134 TGapInfoPtr gaps;
135 int num_reads;
136 TContigReadPtr * reads;
137 int num_base_segs;
138 TBaseSegPtr *base_segs;
139 char * tag; /* notes, comments, annotation for the contig */
140 } SContig, * TContigPtr;
141
142 extern TContigPtr ContigNew (void);
143 extern void ContigFree (TContigPtr c);
144
145 typedef struct SACEFile {
146 int num_contigs;
147 TContigPtr * contigs;
148 } SACEFile, * TACEFilePtr;
149
150 extern NCBI_CREADERS_EXPORT TACEFilePtr ACEFileNew (void);
151 extern NCBI_CREADERS_EXPORT void ACEFileFree (TACEFilePtr afp);
152
153 extern NCBI_CREADERS_EXPORT TACEFilePtr ReadACEFile (
154 FReadLineFunction readfunc, /* function for reading lines of
155 * alignment file
156 */
157 void * fileuserdata, /* data to be passed back each time
158 * readfunc is invoked
159 */
160 char make_qual_scores, /* false if ignoring
161 * known-bad qual scores
162 */
163 char * has_errors /* starts false if errors have already been reported
164 * set to true if errors are encountered
165 */
166 );
167
168
169 extern NCBI_CREADERS_EXPORT TACEFilePtr ReadMAQFile (
170 FReadLineFunction readfunc, /* function for reading lines of
171 * alignment file
172 */
173 void * fileuserdata /* data to be passed back each time
174 * readfunc is invoked
175 */
176 );
177
178
179 extern void WriteACEFile (FILE *fp, TACEFilePtr afp);
180
181 extern TAlignmentFilePtr AlignmentFileFromContig (TContigPtr contig);
182
183 extern char * TraceArchiveGapStringFromACESequence (char *seq_str);
184
185 extern TContigReadPtr
186 ReadContigFromString
187 (char *str,
188 char **consensus_id,
189 int id_col,
190 int seq_col,
191 int contig_id_col,
192 int strand_col,
193 int start_col,
194 int interpret_n_col);
195 extern TContigReadPtr ASSEMBLY_CALLBACK ReadFromMAQString (char *str, char **consensus_id);
196 extern TContigReadPtr ASSEMBLY_CALLBACK ReadFromElandMostCompressed (char *str, char **consensus_id);
197 extern TContigReadPtr ASSEMBLY_CALLBACK ReadFromElandSanger (char *str, char **consensus_id);
198 extern TContigReadPtr ASSEMBLY_CALLBACK ReadFromElandStandalone (char *str, char **consensus_id);
199
200 typedef TContigReadPtr (ASSEMBLY_CALLBACK *FReadFromStringFunction) (char *str, char **consensus_id);
201 extern TACEFilePtr ReadAssemblyFile
202 (FReadLineFunction readfunc, /* function for reading lines of
203 * alignment file
204 */
205 void * fileuserdata, /* data to be passed back each time
206 * readfunc is invoked
207 */
208 FReadFromStringFunction makeread_func); /* function to transform a string into a read */
209
210 extern TACEFilePtr ReadMAQFile
211 (FReadLineFunction readfunc, /* function for reading lines of
212 * alignment file
213 */
214 void * fileuserdata); /* data to be passed back each time
215 * readfunc is invoked
216 */
217
218 extern TACEFilePtr ReadElandStandaloneFile
219 (FReadLineFunction readfunc, /* function for reading lines of
220 * alignment file
221 */
222 void * fileuserdata); /* data to be passed back each time
223 * readfunc is invoked
224 */
225
226
227 extern void
228 WriteTraceAssemblyFromAceFile
229 (TACEFilePtr afp,
230 char * subref,
231 char * center_name,
232 int taxid,
233 char * description,
234 FILE * fp);
235
236 extern void
237 WriteTraceAssemblyHeader
238 (char * assembly_type,
239 char * subref,
240 char * center_name,
241 int taxid,
242 char * description,
243 char * assembly,
244 int num_contigs,
245 unsigned int num_conbases,
246 int num_reads,
247 unsigned int num_readbases,
248 FILE * fp);
249
250 extern void WriteTraceAssemblyTrailer (FILE *fp);
251
252
253 extern void WriteTraceAssemblyFromContig (TContigPtr contig, FILE *fp);
254
255 extern void WriteTraceArchiveRead (FILE *fp, TContigReadPtr read);
256
257 extern void
258 WriteFASTAFromAceFile
259 (TACEFilePtr afp,
260 FILE *fp);
261
262 extern void PrintACEFormatErrorXMLStart (char *id, char *has_errors);
263 extern void PrintACEFormatErrorXMLEnd (void);
264 extern void PrintACEFormatErrorXML (char *msg, char *id, char *has_errors);
265
266 extern int AddReadQualScores (TACEFilePtr afp, FReadLineFunction readfunc, void *userdata, FReadLineFunction fasta_readfunc, void *fasta_userdata);
267
268 extern int ReplaceConsensusSequenceFromTraces (TContigPtr contig, char only_ns);
269 extern void RecalculateConsensusSequences (TACEFilePtr ace_file, char only_ns);
270
271 extern void WriteFASTAFromContig (TContigPtr contig, FILE *fp);
272 extern void WriteContigQualScores (TContigPtr contig, FILE *out);
273
274 typedef char (*ProcessContigFunc) (TContigPtr, void *);
275
276 extern char
277 ProcessLargeACEFileForContigFastaAndQualScores
278 (FReadLineFunction readfunc,
279 void * userdata,
280 char make_qual_scores,
281 char * has_errors,
282 ProcessContigFunc process_func,
283 void * process_data);
284
285
286 #ifdef __cplusplus
287 }
288 #endif
289
290 /*
291 * ==========================================================================
292 *
293 * $Log: aceread.h,v $
294 * Revision 1.12 2008/12/02 18:58:24 bollin
295 * Added argument to WriteTraceAssemblyHeader for assembly type.
296 *
297 * Revision 1.11 2008/12/02 18:41:39 bollin
298 * Checking in unfinished work on creating pairwise denseg alignment for consensus-read comparison. Unfinished.
299 *
300 * Revision 1.10 2008/11/26 18:30:02 bollin
301 * Changes to make aceread_tst more efficient when handling large ACE files,
302 * added TSA field tags for assembly and taxid.
303 *
304 * Revision 1.9 2008/11/19 15:21:48 bollin
305 * Changes for handling large files.
306 *
307 * Revision 1.8 2008/11/14 20:16:12 bollin
308 * Allow correction of just Ns in consensus sequences.
309 *
310 * Revision 1.7 2008/11/07 18:28:00 bollin
311 * Added functions for reading read FASTA files and quality scores for the read
312 * sequences.
313 * Also added functions for recalculating the consensus sequence and consensus
314 * sequence quality scores based on the reads and read quality scores.
315 *
316 * Revision 1.6 2008/08/13 15:35:30 bollin
317 * Added wrapping header for XML errors during ACE read.
318 *
319 * Revision 1.5 2008/08/13 14:37:23 bollin
320 * Changed error messages to use XML format, removed some unused functions.
321 *
322 * Revision 1.4 2008/08/13 12:30:01 bollin
323 * Changes to allow use of srr numbers in XML and suppress lookups. Also fixes segfault.
324 *
325 * Revision 1.3 2008/07/22 19:40:25 kans
326 * use brackets instead of quotes on includes, put void in parentheses for no argument prototypes
327 *
328 * Revision 1.2 2008/07/22 18:45:09 bollin
329 * Added function declarations
330 *
331 * Revision 1.1 2008/07/22 18:10:33 bollin
332 * New files for parsing ACE format files.
333 *
334 *
335 * ==========================================================================
336 */
337
338 #endif /* UTIL_CREADERS___ACEREAD__H */
339 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |