NCBI C Toolkit Cross Reference

C/api/aceread.h


  1 #ifndef API_ACEREAD__H
  2 #define API_ACEREAD__H
  3 
  4 /*
  5  * $Id: aceread.h,v 1.12 2008/12/02 18:58:24 bollin Exp $
  6  *
  7  * ===========================================================================
  8  *
  9  *                            PUBLIC DOMAIN NOTICE
 10  *               National Center for Biotechnology Information
 11  *
 12  *  This software/database is a "United States Government Work" under the
 13  *  terms of the United States Copyright Act.  It was written as part of
 14  *  the author's official duties as a United States Government employee and
 15  *  thus cannot be copyrighted.  This software/database is freely available
 16  *  to the public for use. The National Library of Medicine and the U.S.
 17  *  Government have not placed any restriction on its use or reproduction.
 18  *
 19  *  Although all reasonable efforts have been taken to ensure the accuracy
 20  *  and reliability of the software and data, the NLM and the U.S.
 21  *  Government do not and cannot warrant the performance or results that
 22  *  may be obtained by using this software or data. The NLM and the U.S.
 23  *  Government disclaim all warranties, express or implied, including
 24  *  warranties of performance, merchantability or fitness for any particular
 25  *  purpose.
 26  *
 27  *  Please cite the author in any work or product based on this material.
 28  *
 29  * ===========================================================================
 30  *
 31  * Authors:  Colleen Bollin
 32  *
 33  */
 34 
 35 #include <util/creaders/creaders_export.h>
 36 
 37 #ifdef __cplusplus
 38 extern "C" {
 39 #endif
 40 
 41 /* defines from ncbistd.h */
 42 #ifndef FAR
 43 #define FAR
 44 #endif
 45 #ifndef PASCAL
 46 #define PASCAL
 47 #endif
 48 #ifndef EXPORT
 49 #define EXPORT
 50 #endif
 51 
 52 #ifndef PASCAL
 53 #define PASCAL
 54 #endif
 55 #ifndef EXPORT
 56 #define EXPORT
 57 #endif
 58 
 59 #if defined (WIN32)
 60 #    define ASSEMBLY_CALLBACK __stdcall
 61 #else
 62 #    define ASSEMBLY_CALLBACK
 63 #endif
 64 
 65 typedef struct gapinfo {
 66     int num_gaps;
 67     int *gap_offsets;
 68 } SGapInfo, * TGapInfoPtr;
 69 
 70 extern TGapInfoPtr GapInfoNew (void);
 71 extern void GapInfoFree (TGapInfoPtr g);
 72 extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char *gap_chars);
 73 extern void RemoveGapCharsFromSequenceString (char *seq_str, char *gap_chars);
 74 extern int SeqPosFromTilingPos (int tiling_pos, TGapInfoPtr gap_info);
 75 extern int TilingPosFromSeqPos (int seq_pos, TGapInfoPtr gap_info);
 76 
 77 typedef struct SContigRead {
 78     char * read_id;
 79     int    ti;
 80     char * srr;
 81     char * read_seq;
 82     int    read_len;
 83     char   is_complement;
 84     int    cons_start;
 85     int    cons_stop;
 86     int    read_start;
 87     int    read_stop;
 88     int    read_assem_start;
 89     int    read_assem_stop;
 90     int    tiling_start;
 91     int    tiling_stop;
 92     TGapInfoPtr gaps;
 93     int    valid;
 94     int    local;
 95     char * tag; /* notes, comments, annotation for the read */
 96     /* quality scores - these are optional, used when recalculating consensus sequence */
 97     int  * qual_scores;
 98     int    num_qual_scores;
 99 } SContigRead, * TContigReadPtr;
100 
101 extern TContigReadPtr ContigReadNew (void);
102 extern void ContigReadFree (TContigReadPtr r);
103 
104 typedef struct SConsensusReadAln {
105     int numseg;
106     int *cons_starts;
107     int *read_starts;
108     int *lens;
109     char is_complement;
110 } SConsensusReadAln, * TConsensusReadAlnPtr;
111 
112 extern TConsensusReadAlnPtr ConsensusReadAlnNew (int numseg);
113 extern TConsensusReadAlnPtr ConsensusReadAlnFree (TConsensusReadAlnPtr a);
114 extern TConsensusReadAlnPtr GetConsensusReadAln (char *consensus_seq, TContigReadPtr read);
115 
116 
117 typedef struct SBaseSeg {
118     char * read_id;
119     int    cons_start;
120     int    cons_stop;
121 } SBaseSeg, * TBaseSegPtr;
122 
123 extern TBaseSegPtr BaseSegNew (void);
124 extern void BaseSegFree (TBaseSegPtr b);
125 
126 typedef struct SContig {
127     char  * consensus_id;
128     char  * consensus_seq;
129     int     consensus_assem_len;
130     int     consensus_seq_len;
131     char    is_complement;
132     int     num_qual_scores;
133     int   * qual_scores;
134     TGapInfoPtr gaps;
135     int     num_reads;
136     TContigReadPtr * reads;
137     int     num_base_segs;
138     TBaseSegPtr *base_segs;
139     char  * tag; /* notes, comments, annotation for the contig */
140 } SContig, * TContigPtr;
141 
142 extern TContigPtr ContigNew (void);
143 extern void ContigFree (TContigPtr c);
144    
145 typedef struct SACEFile {
146     int        num_contigs;
147     TContigPtr * contigs;
148 } SACEFile, * TACEFilePtr;
149 
150 extern NCBI_CREADERS_EXPORT TACEFilePtr ACEFileNew (void);
151 extern NCBI_CREADERS_EXPORT void ACEFileFree (TACEFilePtr afp);
152 
153 extern NCBI_CREADERS_EXPORT TACEFilePtr ReadACEFile (
154   FReadLineFunction    readfunc,      /* function for reading lines of 
155                                        * alignment file
156                                        */
157   void *               fileuserdata,  /* data to be passed back each time
158                                        * readfunc is invoked
159                                        */
160   char                 make_qual_scores, /* false if ignoring 
161                                           * known-bad qual scores
162                                           */
163   char *               has_errors        /* starts false if errors have already been reported
164                                           * set to true if errors are encountered
165                                           */
166 );
167 
168 
169 extern NCBI_CREADERS_EXPORT TACEFilePtr ReadMAQFile (
170  FReadLineFunction    readfunc,      /* function for reading lines of 
171                                        * alignment file
172                                        */
173  void *               fileuserdata  /* data to be passed back each time
174                                        * readfunc is invoked
175                                        */
176 );
177 
178 
179 extern void WriteACEFile (FILE *fp, TACEFilePtr afp);
180 
181 extern TAlignmentFilePtr AlignmentFileFromContig (TContigPtr contig);
182 
183 extern char * TraceArchiveGapStringFromACESequence (char *seq_str);
184 
185 extern TContigReadPtr 
186 ReadContigFromString 
187 (char  *str,
188  char **consensus_id,
189  int    id_col,
190  int    seq_col, 
191  int    contig_id_col,
192  int    strand_col,
193  int    start_col,
194  int    interpret_n_col);
195 extern TContigReadPtr ASSEMBLY_CALLBACK ReadFromMAQString (char *str, char **consensus_id);
196 extern TContigReadPtr ASSEMBLY_CALLBACK ReadFromElandMostCompressed (char *str, char **consensus_id);
197 extern TContigReadPtr ASSEMBLY_CALLBACK ReadFromElandSanger (char *str, char **consensus_id);
198 extern TContigReadPtr ASSEMBLY_CALLBACK ReadFromElandStandalone (char *str, char **consensus_id);
199 
200 typedef TContigReadPtr (ASSEMBLY_CALLBACK *FReadFromStringFunction) (char *str, char **consensus_id);
201 extern TACEFilePtr ReadAssemblyFile 
202 (FReadLineFunction    readfunc,      /* function for reading lines of 
203                                        * alignment file
204                                        */
205  void *               fileuserdata,  /* data to be passed back each time
206                                        * readfunc is invoked
207                                        */
208  FReadFromStringFunction makeread_func); /* function to transform a string into a read */
209 
210 extern TACEFilePtr ReadMAQFile 
211 (FReadLineFunction    readfunc,      /* function for reading lines of 
212                                        * alignment file
213                                        */
214  void *               fileuserdata);  /* data to be passed back each time
215                                        * readfunc is invoked
216                                        */
217 
218 extern TACEFilePtr ReadElandStandaloneFile 
219 (FReadLineFunction    readfunc,      /* function for reading lines of 
220                                        * alignment file
221                                        */
222  void *               fileuserdata);  /* data to be passed back each time
223                                        * readfunc is invoked
224                                        */
225 
226 
227 extern void 
228 WriteTraceAssemblyFromAceFile 
229 (TACEFilePtr afp,
230  char      * subref,
231  char      * center_name, 
232  int         taxid,
233  char      * description,
234  FILE      * fp);
235 
236 extern void
237 WriteTraceAssemblyHeader
238 (char * assembly_type,
239  char * subref,
240  char * center_name,
241  int    taxid,
242  char * description,
243  char * assembly,
244  int    num_contigs,
245  unsigned int    num_conbases,
246  int    num_reads,
247  unsigned int    num_readbases,
248  FILE * fp);
249 
250 extern void WriteTraceAssemblyTrailer (FILE *fp);
251 
252 
253 extern void WriteTraceAssemblyFromContig (TContigPtr contig, FILE *fp);
254 
255 extern void WriteTraceArchiveRead (FILE *fp, TContigReadPtr read);
256 
257 extern void
258 WriteFASTAFromAceFile
259 (TACEFilePtr afp,
260  FILE        *fp);
261 
262 extern void PrintACEFormatErrorXMLStart (char *id, char *has_errors);
263 extern void PrintACEFormatErrorXMLEnd (void);
264 extern void PrintACEFormatErrorXML (char *msg, char *id, char *has_errors);
265 
266 extern int AddReadQualScores (TACEFilePtr afp, FReadLineFunction readfunc, void *userdata, FReadLineFunction fasta_readfunc, void *fasta_userdata);
267 
268 extern int ReplaceConsensusSequenceFromTraces (TContigPtr contig, char only_ns);
269 extern void RecalculateConsensusSequences (TACEFilePtr ace_file, char only_ns);
270 
271 extern void WriteFASTAFromContig (TContigPtr contig, FILE *fp);
272 extern void WriteContigQualScores (TContigPtr contig, FILE *out);
273 
274 typedef char (*ProcessContigFunc) (TContigPtr, void *);
275 
276 extern char
277 ProcessLargeACEFileForContigFastaAndQualScores
278 (FReadLineFunction    readfunc,
279  void *               userdata,
280  char                 make_qual_scores,
281  char *               has_errors,
282  ProcessContigFunc    process_func,
283  void *               process_data);
284 
285 
286 #ifdef __cplusplus
287 }
288 #endif
289 
290 /*
291  * ==========================================================================
292  *
293  * $Log: aceread.h,v $
294  * Revision 1.12  2008/12/02 18:58:24  bollin
295  * Added argument to WriteTraceAssemblyHeader for assembly type.
296  *
297  * Revision 1.11  2008/12/02 18:41:39  bollin
298  * Checking in unfinished work on creating pairwise denseg alignment for consensus-read comparison.  Unfinished.
299  *
300  * Revision 1.10  2008/11/26 18:30:02  bollin
301  * Changes to make aceread_tst more efficient when handling large ACE files,
302  * added TSA field tags for assembly and taxid.
303  *
304  * Revision 1.9  2008/11/19 15:21:48  bollin
305  * Changes for handling large files.
306  *
307  * Revision 1.8  2008/11/14 20:16:12  bollin
308  * Allow correction of just Ns in consensus sequences.
309  *
310  * Revision 1.7  2008/11/07 18:28:00  bollin
311  * Added functions for reading read FASTA files and quality scores for the read
312  * sequences.
313  * Also added functions for recalculating the consensus sequence and consensus
314  * sequence quality scores based on the reads and read quality scores.
315  *
316  * Revision 1.6  2008/08/13 15:35:30  bollin
317  * Added wrapping header for XML errors during ACE read.
318  *
319  * Revision 1.5  2008/08/13 14:37:23  bollin
320  * Changed error messages to use XML format, removed some unused functions.
321  *
322  * Revision 1.4  2008/08/13 12:30:01  bollin
323  * Changes to allow use of srr numbers in XML and suppress lookups.  Also fixes segfault.
324  *
325  * Revision 1.3  2008/07/22 19:40:25  kans
326  * use brackets instead of quotes on includes, put void in parentheses for no argument prototypes
327  *
328  * Revision 1.2  2008/07/22 18:45:09  bollin
329  * Added function declarations
330  *
331  * Revision 1.1  2008/07/22 18:10:33  bollin
332  * New files for parsing ACE format files.
333  *
334  *
335  * ==========================================================================
336  */
337 
338 #endif /* UTIL_CREADERS___ACEREAD__H */
339 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.