NCBI C Toolkit Cross Reference

C/demo/dst.c


  1 /*
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *               National Center for Biotechnology Information
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's official duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government have not placed any restriction on its use or reproduction.
 13 *
 14 *  Although all reasonable efforts have been taken to ensure the accuracy
 15 *  and reliability of the software and data, the NLM and the U.S.
 16 *  Government do not and cannot warrant the performance or results that
 17 *  may be obtained by using this software or data. The NLM and the U.S.
 18 *  Government disclaim all warranties, express or implied, including
 19 *  warranties of performance, merchantability or fitness for any particular
 20 *  purpose.
 21 *
 22 *  Please cite the author in any work or product based on this material.
 23 *
 24 * ===========================================================================
 25 *
 26 * File Name: dst.c
 27 *
 28 * Author(s): John Kuzio
 29 *
 30 * Version Creation Date: 98-01-01
 31 *
 32 * $Revision: 6.7 $
 33 *
 34 * File Description: a dust utility
 35 *
 36 * Modifications:
 37 * --------------------------------------------------------------------------
 38 * Date       Name        Description of modification
 39 * --------------------------------------------------------------------------
 40 * $Log: dst.c,v $
 41 * Revision 6.7  1998/12/18 16:24:54  kuzio
 42 * big GIs
 43 *
 44 * Revision 6.6  1998/09/16 18:19:28  kuzio
 45 * cvs logging
 46 *
 47 *
 48 *
 49 *
 50 * ==========================================================================
 51 */
 52 
 53 #include <ncbi.h>
 54 #include <accentr.h>
 55 #include <gather.h>
 56 #include <tofasta.h>
 57 #include <urkutil.h>
 58 #include <urkdust.h>
 59 
 60 #define TOP_ERROR 1
 61 static char _this_module[] = "dust";
 62 #undef  THIS_MODULE
 63 #define THIS_MODULE _this_module
 64 static char _this_file[] = __FILE__;
 65 #undef  THIS_FILE
 66 #define THIS_FILE _this_file
 67 
 68 typedef struct gather_Bioseq
 69 {
 70   Int4      gi;
 71   BioseqPtr bsp;
 72 } Gather_BS, PNTR Gather_BSPtr;
 73 
 74 Args myargs[] =
 75 {
 76   { "FastA file", NULL, NULL, NULL, TRUE,
 77     'f', ARG_STRING, 0.0, 0, NULL},
 78   { "nucleotide GI", "0", "0", "9000000", TRUE,
 79     'g', ARG_INT, 0.0, 0, NULL},
 80   { "dust window", "64", "16", "256", TRUE,
 81     'w', ARG_INT, 0.0, 0, NULL},
 82   { "dust cutoff", "20", "10", "100", TRUE,
 83     'c', ARG_INT, 0.0, 0, NULL},
 84   { "brute force", "FALSE", "FALSE", "TRUE", TRUE,
 85     'b', ARG_BOOLEAN, 0.0, 0, NULL},
 86   { "output line length", "50", "40", "160", TRUE,
 87     'l', ARG_INT, 0.0, 0, NULL},
 88   { "n-out for Blast", "FALSE", "FALSE", "TRUE", TRUE,
 89     'n', ARG_BOOLEAN, 0.0, 0, NULL},
 90   { "numeric output", "FALSE", "FALSE", "TRUE", TRUE,
 91     'N', ARG_BOOLEAN, 0.0, 0, NULL}
 92 };
 93 
 94 static Boolean GetNucleotideBioseq (GatherContextPtr gcp)
 95 {
 96   Gather_BSPtr   gbsp;
 97   BioseqPtr      bsp;
 98   Int4           gi, entrezgi;
 99 
100   if (gcp == NULL)
101     return FALSE;
102   if ((gbsp = (Gather_BSPtr) gcp->userdata) == NULL)
103     return FALSE;
104 
105   if (gbsp->bsp != NULL)
106     return TRUE;
107   if (gcp->thistype != OBJ_BIOSEQ)
108     return TRUE;
109   if ((bsp = (BioseqPtr) (gcp->thisitem)) == NULL)
110     return TRUE;
111 
112   gi = gbsp->gi;
113   if (gi > 0)
114   {
115     entrezgi = GetGIForSeqId (bsp->id);
116     if (gi == entrezgi)
117       gbsp->bsp = bsp;
118     return TRUE;
119   }
120   else
121   {
122     gbsp->bsp = bsp;
123     return TRUE;
124   }
125 }
126 
127 Int2 Main (void)
128 {
129   Int2        argcount;
130   Boolean     flagHaveNet;
131 
132   Int4        gi;
133   SeqEntryPtr sep;
134 
135   FILE        *fiop;
136   Char        fastafile[256];
137   CharPtr     title;
138 
139   static GatherScope  gs;
140   GatherScopePtr      gsp;
141   static Gather_BS    gbs;
142   Gather_BSPtr        gbsp;
143 
144   Int4           i, start, stop, linelen;
145   SeqLocPtr      slp, slpn;
146   Uint1Ptr       sequence;
147   SeqPortPtr     spp;
148 
149   DustDataPtr    ddp;
150   DustRegionPtr  drp;
151 
152   argcount = sizeof (myargs) / sizeof (Args);
153   if (!GetArgs ("dust", argcount, myargs))
154     return 1;
155 
156   gsp = &gs;
157   gbsp = &gbs;
158 
159   MemSet ((Pointer) gsp, 0, sizeof (GatherScope));
160   MemSet ((Pointer) gsp->ignore, (int) (TRUE),
161           (size_t) (OBJ_MAX * sizeof (Boolean)));
162   gsp->ignore[OBJ_BIOSEQ] = FALSE;
163 
164   gbsp->bsp = NULL;
165 
166   if (myargs[1].intvalue == 0 && myargs[0].strvalue == NULL)
167   {
168     ErrPostEx (SEV_ERROR, TOP_ERROR, 100,
169                "No gi or FastA file given :: for help :   dust -");
170     ErrShow ();
171     exit (1);
172   }
173 
174   gi = myargs[1].intvalue;
175   if (myargs[0].strvalue != NULL)
176     StrCpy (fastafile, myargs[0].strvalue);
177   else
178     fastafile[0] = '\0';
179 
180   linelen = myargs[5].intvalue;
181 
182   if (gi > 0)
183   {
184     if (!EntrezInit ("dust", FALSE, &flagHaveNet))
185     {
186       ErrPostEx (SEV_ERROR, TOP_ERROR, 102,
187                  "Entrez init failed");
188       ErrShow ();
189       exit (1);
190     }
191   }
192 
193   if (gi > 0)
194   {
195     sep = EntrezSeqEntryGet (gi, SEQENTRY_READ_BIOSEQ);
196   }
197   else
198   {
199     if ((fiop = FileOpen (fastafile, "r")) == NULL)
200     {
201       ErrPostEx (SEV_ERROR, TOP_ERROR, 103,
202                  "Failed to open FastA file");
203       ErrShow ();
204       exit (1);
205     }
206     sep = FastaToSeqEntry (fiop, TRUE);
207   }
208 
209   if (sep == NULL)
210   {
211     ErrPostEx (SEV_ERROR, TOP_ERROR, 104,
212                "No seqentry found");
213     ErrShow ();
214     exit (1);
215   }
216 
217   while (sep != NULL)
218   {
219     gbsp->gi = gi;
220     GatherSeqEntry (sep, (Pointer) gbsp, GetNucleotideBioseq,
221                     (Pointer) gsp);
222 
223     if (gbsp->bsp != NULL)
224     {
225       if (ISA_na (gbsp->bsp->mol))
226       {
227         ddp = DustDataNew ();
228         ddp->windowsize = myargs[2].intvalue;
229         ddp->level = myargs[3].intvalue;
230         ddp->percent = myargs[3].floatvalue;
231         ddp->flagBrute = (Boolean) myargs[4].intvalue;
232         drp = DustBioseq (gbsp->bsp, 0, gbsp->bsp->length-1, ddp);
233         slpn = slp = DustFilter (drp, ddp->percent, gbsp->bsp->id);
234         DustDataFree (ddp);
235         DustRegionFree (drp);
236 
237         title = FastaTitle (gbsp->bsp, ">", NULL);
238         printf ("%s\n", title);
239         MemFree (title);
240         if (myargs[7].intvalue == TRUE)
241         {
242           slp = slpn;
243           while (slp != NULL)
244           {
245             start = SeqLocStart (slp) + 1;
246             stop = SeqLocStop (slp) + 1;
247             printf ("%9ld %9ld\n", (long) start, (long) stop);
248             slp = slp->next;
249           }
250         }
251         else
252         {
253           sequence = (Uint1Ptr) MemNew ((size_t) (sizeof (Uint1) *
254                                                   gbsp->bsp->length+1));
255           spp = SeqPortNew (gbsp->bsp, 0, gbsp->bsp->length-1, 0,
256                             Seq_code_iupacna);
257           SeqPortSeek (spp, 0, SEEK_SET);
258 
259           i = 0;
260           while ((sequence[i] = SeqPortGetResidue (spp)) != SEQPORT_EOF)
261           {
262             if (('a' <= (Char) sequence[i] && (Char) sequence[i] <= 'z') ||
263                 ('A' <= (Char) sequence[i] && (Char) sequence[i] <= 'Z'))
264               i++;
265           }
266           sequence[i] = 0;
267 
268           i = 0;
269           while (sequence[i] != 0)
270           {
271             sequence[i] = (Uint1) TO_UPPER ((Char) sequence[i]);
272             i++;
273           }
274 
275           slp = slpn;
276           while (slp != NULL)
277           {
278             start = SeqLocStart (slp);
279             stop = SeqLocStop (slp);
280             for (i = start; i <= stop; i++)
281             {
282               if (myargs[6].intvalue == TRUE)
283                 sequence[i] = (Uint1) 'n';
284               else
285                 sequence[i] = (Uint1) TO_LOWER ((Char) sequence[i]);
286             }
287             slp = slp->next;
288           }
289 
290           i = 0;
291           while (sequence[i] != 0)
292           {
293             printf ("%c", (Char) sequence[i]);
294             i++;
295             if (i % linelen == 0)
296             {
297               if (myargs[6].intvalue == TRUE)
298                 printf ("\n");
299               else
300                 printf (" %8ld\n", (long) i);
301             }
302           }
303           if (i % linelen != 0)
304             printf ("\n");
305 
306           SeqPortFree (spp);
307           MemFree (sequence);
308         }
309         slp = slpn;
310         while (slp != NULL)
311         {
312           slpn = slp->next;
313           SeqLocFree (slp);
314           slp = slpn;
315         }
316       }
317     }
318     sep = SeqEntryFree (sep);
319     if (gi <= 0)
320     {
321       sep = FastaToSeqEntry (fiop, TRUE);
322       gbsp->bsp = NULL;
323     }
324   }
325 
326   if (gi > 0)
327     EntrezFini ();
328   else
329     FileClose (fiop);
330   return 0;
331 }
332 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.