|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/demo/debruijn.c |
source navigation diff markup identifier search freetext search file search |
1 /* $Id: debruijn.c,v 1.4 2004/02/09 21:24:59 ucko Exp $
2
3 * ===========================================================================
4 *
5 * PUBLIC DOMAIN NOTICE
6 * National Center for Biotechnology Information
7 *
8 * This software/database is a "United States Government Work" under the
9 * terms of the United States Copyright Act. It was written as part of
10 * the author's offical duties as a United States Government employee and
11 * thus cannot be copyrighted. This software/database is freely available
12 * to the public for use. The National Library of Medicine and the U.S.
13 * Government have not placed any restriction on its use or reproduction.
14 *
15 * Although all reasonable efforts have been taken to ensure the accuracy
16 * and reliability of the software and data, the NLM and the U.S.
17 * Government do not and cannot warrant the performance or results that
18 * may be obtained by using this software or data. The NLM and the U.S.
19 * Government disclaim all warranties, express or implied, including
20 * warranties of performance, merchantability or fitness for any particular
21 * purpose.
22 *
23 * Please cite the author in any work or product based on this material.
24 *
25 * ===========================================================================
26
27 */
28
29 static char const rcsid[] = "$Id: debruijn.c,v 1.4 2004/02/09 21:24:59 ucko Exp $";
30
31 /*
32 * example driver for de Bruijn sequences.
33 *
34 * this code generates all n-mers over a protein
35 * or dna alphabet. useful for creating test sequences.
36 */
37
38 #include <ncbi.h>
39 #include <algo/blast/core/lookup_util.h>
40
41 static Args myargs[] = {
42 { "word size",
43 NULL, NULL, NULL, FALSE, 'n', ARG_INT, 0.0, 0, NULL },
44 { "alphabet\n"
45 "(supply 'ncbistdaa' or 'ncbi2na' for standard\n"
46 "alphabets, or supply your own alphabet)\n",
47 NULL, NULL, NULL, FALSE, 'a', ARG_STRING, 0.0, 0, NULL },
48 };
49
50 Uint1 ncbistdaa[] = "-abcdefghiklmnpqrstvwxyzu*";
51 Uint1 ncbi2na[] = "acgt";
52
53 Int2 Main(void)
54 {
55 Int4 i;
56 Int4 n, k;
57 Uint1 *output;
58 Int4 outputsize;
59 Uint1 *alphabet=NULL;
60
61 if ( ! GetArgs("debruijn", sizeof(myargs)/sizeof(myargs[0]), myargs) )
62 return 1;
63
64 n = myargs[0].intvalue;
65
66 if (n < 1)
67 {
68 fprintf(stderr,"n must be greater than one.\n");
69 return 1;
70 }
71
72 alphabet = myargs[1].strvalue;
73
74 if (strcmp("ncbistdaa", myargs[1].strvalue) == 0)
75 alphabet = ncbistdaa;
76
77 if (strcmp("ncbi2na", myargs[1].strvalue) == 0)
78 alphabet = ncbi2na;
79
80 k = strlen(alphabet);
81
82 /* output array needs:
83 * k^n bytes - to store the de Bruijn sequence
84 * n-1 bytes - to unwrap (see below)
85 * 1 byte - for the terminating NUL
86 */
87
88 outputsize = iexp(k,n) + (n-1);
89 output = (char *) malloc(outputsize + 1);
90
91 /* compute the (n,k) de Bruijn sequence */
92 debruijn(n,k,output,alphabet);
93
94 /* We don't want a true cyclical de Bruijn sequence; we want
95 * all words in a straight line- copy the first n-1 letters
96 * to the end.
97 */
98
99 for(i=0;i<(n-1);i++)
100 output[outputsize-n+1+i] = output[i];
101
102 /* Terminate the string. */
103
104 output[outputsize] = '\0';
105
106 puts(output);
107
108 free(output);
109 }
110 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |