NCBI C Toolkit Cross Reference

C/demo/debruijn.c


  1 /* $Id: debruijn.c,v 1.4 2004/02/09 21:24:59 ucko Exp $
  2 
  3 * ===========================================================================
  4 *
  5 *                            PUBLIC DOMAIN NOTICE
  6 *               National Center for Biotechnology Information
  7 *
  8 *  This software/database is a "United States Government Work" under the
  9 *  terms of the United States Copyright Act.  It was written as part of
 10 *  the author's offical duties as a United States Government employee and
 11 *  thus cannot be copyrighted.  This software/database is freely available
 12 *  to the public for use. The National Library of Medicine and the U.S.
 13 *  Government have not placed any restriction on its use or reproduction.
 14 *
 15 *  Although all reasonable efforts have been taken to ensure the accuracy
 16 *  and reliability of the software and data, the NLM and the U.S.
 17 *  Government do not and cannot warrant the performance or results that
 18 *  may be obtained by using this software or data. The NLM and the U.S.
 19 *  Government disclaim all warranties, express or implied, including
 20 *  warranties of performance, merchantability or fitness for any particular
 21 *  purpose.
 22 *
 23 *  Please cite the author in any work or product based on this material.
 24 *
 25 * ===========================================================================
 26 
 27 */
 28 
 29 static char const rcsid[] = "$Id: debruijn.c,v 1.4 2004/02/09 21:24:59 ucko Exp $";
 30 
 31 /*
 32  * example driver for de Bruijn sequences.
 33  *
 34  * this code generates all n-mers over a protein
 35  * or dna alphabet. useful for creating test sequences.
 36  */
 37 
 38 #include <ncbi.h>
 39 #include <algo/blast/core/lookup_util.h>
 40 
 41 static Args myargs[] = {
 42   { "word size",
 43     NULL, NULL, NULL, FALSE, 'n', ARG_INT, 0.0, 0, NULL },
 44   { "alphabet\n"
 45     "(supply 'ncbistdaa' or 'ncbi2na' for standard\n"
 46     "alphabets, or supply your own alphabet)\n",
 47     NULL, NULL, NULL, FALSE, 'a', ARG_STRING, 0.0, 0, NULL },
 48 };
 49 
 50 Uint1 ncbistdaa[] = "-abcdefghiklmnpqrstvwxyzu*";
 51 Uint1 ncbi2na[] = "acgt";
 52 
 53 Int2 Main(void)
 54 {
 55   Int4 i;
 56   Int4 n, k;
 57   Uint1 *output;
 58   Int4 outputsize;
 59   Uint1 *alphabet=NULL;
 60   
 61   if ( ! GetArgs("debruijn", sizeof(myargs)/sizeof(myargs[0]), myargs) )
 62    return 1;
 63   
 64   n = myargs[0].intvalue;
 65 
 66   if (n < 1)
 67     {
 68       fprintf(stderr,"n must be greater than one.\n");
 69       return 1;
 70     }
 71 
 72   alphabet = myargs[1].strvalue;
 73 
 74   if (strcmp("ncbistdaa", myargs[1].strvalue) == 0)
 75     alphabet = ncbistdaa;
 76 
 77   if (strcmp("ncbi2na", myargs[1].strvalue) == 0)
 78     alphabet = ncbi2na;
 79 
 80   k = strlen(alphabet);
 81   
 82   /* output array needs:
 83    * k^n bytes - to store the de Bruijn sequence
 84    * n-1 bytes - to unwrap (see below)
 85    * 1   byte  - for the terminating NUL
 86    */
 87 
 88   outputsize = iexp(k,n) + (n-1);
 89   output = (char *) malloc(outputsize + 1);
 90 
 91   /* compute the (n,k) de Bruijn sequence */  
 92   debruijn(n,k,output,alphabet);
 93   
 94   /* We don't want a true cyclical de Bruijn sequence; we want
 95    * all words in a straight line- copy the first n-1 letters
 96    * to the end.
 97    */
 98 
 99   for(i=0;i<(n-1);i++)
100     output[outputsize-n+1+i] = output[i];
101 
102   /* Terminate the string. */
103 
104   output[outputsize] = '\0';
105 
106   puts(output);
107 
108   free(output);
109 }
110 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.