NCBI C Toolkit Cross Reference

C/demo/alint.c


  1 /*   alint.c
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *            National Center for Biotechnology Information (NCBI)
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's official duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government do not place any restriction on its use or reproduction.
 13 *  We would, however, appreciate having the NCBI and the author cited in
 14 *  any work or product based on this material
 15 *
 16 *  Although all reasonable efforts have been taken to ensure the accuracy
 17 *  and reliability of the software and data, the NLM and the U.S.
 18 *  Government do not and cannot warrant the performance or results that
 19 *  may be obtained by using this software or data. The NLM and the U.S.
 20 *  Government disclaim all warranties, express or implied, including
 21 *  warranties of performance, merchantability or fitness for any particular
 22 *  purpose.
 23 *
 24 * ===========================================================================
 25 *
 26 * File Name:  alint.c
 27 *
 28 * Author:  Jonathan Kans
 29 *
 30 * Version Creation Date:   11/10/08
 31 *
 32 * $Revision: 1.1 $
 33 *
 34 * File Description:
 35 *
 36 *  Lint for Alignments in FASTA format - upper cases points of exact match
 37 *
 38 * Modifications:  
 39 * --------------------------------------------------------------------------
 40 * Date     Name        Description of modification
 41 * -------  ----------  -----------------------------------------------------
 42 *
 43 * ==========================================================================
 44 */
 45 
 46 #include <ncbi.h>
 47 #include <sqnutils.h>
 48 
 49 static CharPtr GetSequence (
 50   CharPtr str,
 51   Boolean skiptoken
 52 )
 53 
 54 {
 55   Char  ch;
 56 
 57   if (str == NULL) return NULL;
 58 
 59   if (! skiptoken) return str;
 60 
 61   ch = *str;
 62   while (ch != '\0' && ch != ' ') {
 63     str++;
 64     ch = *str;
 65   }
 66   if (ch == ' ') {
 67     str++;
 68   }
 69 
 70   return str;
 71 }
 72 
 73 static void ProcessAlignedFASTA (
 74   FILE *ifp,
 75   FILE *ofp,
 76   Boolean skiptoken
 77 )
 78 
 79 {
 80   CharPtr PNTR  array;
 81   Char          ch, ch0;
 82   FileCache     fc;
 83   ValNodePtr    head = NULL, last = NULL, vnp;
 84   Int2          i, j, num = 0, len, minlen = INT2_MAX, matches = 0, mismatches = 0;
 85   Char          line [4096];
 86   Boolean       match;
 87   CharPtr       ptr, str;
 88 
 89   FileCacheSetup (&fc, ifp);
 90 
 91   str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
 92   if (str == NULL) return;
 93 
 94   while (str != NULL) {
 95     TrimSpacesAroundString (str);
 96     if (StringDoesHaveText (str)) {
 97       vnp = ValNodeCopyStr (&last, 0, str);
 98       if (head == NULL) {
 99         head = vnp;
100       }
101       last = vnp;
102       num++;
103       str = GetSequence (str, skiptoken);
104       len = (Int2) StringLen (str);
105       if (minlen > len) {
106         minlen = len;
107       }
108     }
109     str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
110   }
111 
112   if (num < 1 || minlen < 1) return;
113 
114   array = (CharPtr PNTR) MemNew (sizeof (CharPtr) * (num + 1));
115   if (array == NULL) return;
116 
117   for (vnp = head, i = 0; vnp != NULL; vnp = vnp->next, i++) {
118     str = (CharPtr) vnp->data.ptrvalue;
119     array [i] = str;
120   }
121 
122   for (j = 0; j < minlen; j++) {
123     ptr = GetSequence (array [0], skiptoken);
124     ch0 = ptr [j];
125     match = TRUE;
126 
127     for (i = 1; i < num; i++) {
128       ptr = GetSequence (array [i], skiptoken);
129       ch = ptr [j];
130       if (ch != ch0) {
131         match = FALSE;
132       }
133     }
134 
135     if (match) {
136       matches++;
137     } else {
138       mismatches++;
139     }
140 
141     for (i = 0; i < num; i++) {
142       ptr = GetSequence (array [i], skiptoken);
143       ch = ptr [j];
144       if (match) {
145         ptr [j] = TO_UPPER (ch);
146       } else {
147         ptr [j] = TO_LOWER (ch);
148       }
149     }
150   }
151 
152   for (vnp = head, i = 0; vnp != NULL; vnp = vnp->next, i++) {
153     str = (CharPtr) vnp->data.ptrvalue;
154     fprintf (ofp, "%s\n", str);
155   }
156 
157   fprintf (ofp, "\n%d matches, %d mismatches, length %d, %d percent matching\n",
158            (int) matches, (int) mismatches, (int) minlen,
159            (int) (matches * 100 / minlen));
160 
161   MemFree (array);
162   ValNodeFreeData (head);
163 }
164 
165 #define i_argInputFile    0
166 #define o_argOutputFile   1
167 #define s_argSkipToken    2
168 
169 Args myargs [] = {
170   {"Input File", "stdin", NULL, NULL,
171     FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
172   {"Output File", "stdout", NULL, NULL,
173     FALSE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
174  {"Skip First Token", "F", NULL, NULL,
175     TRUE, 's', ARG_BOOLEAN, 0.0, 0, NULL},
176 };
177 
178 Int2 Main (void)
179 
180 {
181   FILE     *ifp, *ofp;
182   CharPtr  infile, outfile;
183   Boolean  skiptoken;
184 
185   /* standard setup */
186 
187   ErrSetFatalLevel (SEV_MAX);
188   ErrClearOptFlags (EO_SHOW_USERSTR);
189   ErrPathReset ();
190 
191   if (! GetArgs ("alint", sizeof (myargs) / sizeof (Args), myargs)) {
192     return 0;
193   }
194 
195   infile = (CharPtr) myargs [i_argInputFile].strvalue;
196   outfile = (CharPtr) myargs [o_argOutputFile].strvalue;
197   skiptoken = (Boolean) myargs [s_argSkipToken].intvalue;
198 
199   ifp = FileOpen (infile, "r");
200   if (ifp == NULL) {
201     Message (MSG_FATAL, "Unable to open input file");
202     return 1;
203   }
204 
205   ofp = FileOpen (outfile, "w");
206   if (ofp == NULL) {
207     Message (MSG_FATAL, "Unable to open output file");
208     return 1;
209   }
210 
211   ProcessAlignedFASTA (ifp, ofp, skiptoken);
212 
213   FileClose (ofp);
214   FileClose (ifp);
215 
216   return 0;
217 }
218 
219 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.