|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/demo/alint.c |
source navigation diff markup identifier search freetext search file search |
1 /* alint.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: alint.c
27 *
28 * Author: Jonathan Kans
29 *
30 * Version Creation Date: 11/10/08
31 *
32 * $Revision: 1.1 $
33 *
34 * File Description:
35 *
36 * Lint for Alignments in FASTA format - upper cases points of exact match
37 *
38 * Modifications:
39 * --------------------------------------------------------------------------
40 * Date Name Description of modification
41 * ------- ---------- -----------------------------------------------------
42 *
43 * ==========================================================================
44 */
45
46 #include <ncbi.h>
47 #include <sqnutils.h>
48
49 static CharPtr GetSequence (
50 CharPtr str,
51 Boolean skiptoken
52 )
53
54 {
55 Char ch;
56
57 if (str == NULL) return NULL;
58
59 if (! skiptoken) return str;
60
61 ch = *str;
62 while (ch != '\0' && ch != ' ') {
63 str++;
64 ch = *str;
65 }
66 if (ch == ' ') {
67 str++;
68 }
69
70 return str;
71 }
72
73 static void ProcessAlignedFASTA (
74 FILE *ifp,
75 FILE *ofp,
76 Boolean skiptoken
77 )
78
79 {
80 CharPtr PNTR array;
81 Char ch, ch0;
82 FileCache fc;
83 ValNodePtr head = NULL, last = NULL, vnp;
84 Int2 i, j, num = 0, len, minlen = INT2_MAX, matches = 0, mismatches = 0;
85 Char line [4096];
86 Boolean match;
87 CharPtr ptr, str;
88
89 FileCacheSetup (&fc, ifp);
90
91 str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
92 if (str == NULL) return;
93
94 while (str != NULL) {
95 TrimSpacesAroundString (str);
96 if (StringDoesHaveText (str)) {
97 vnp = ValNodeCopyStr (&last, 0, str);
98 if (head == NULL) {
99 head = vnp;
100 }
101 last = vnp;
102 num++;
103 str = GetSequence (str, skiptoken);
104 len = (Int2) StringLen (str);
105 if (minlen > len) {
106 minlen = len;
107 }
108 }
109 str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
110 }
111
112 if (num < 1 || minlen < 1) return;
113
114 array = (CharPtr PNTR) MemNew (sizeof (CharPtr) * (num + 1));
115 if (array == NULL) return;
116
117 for (vnp = head, i = 0; vnp != NULL; vnp = vnp->next, i++) {
118 str = (CharPtr) vnp->data.ptrvalue;
119 array [i] = str;
120 }
121
122 for (j = 0; j < minlen; j++) {
123 ptr = GetSequence (array [0], skiptoken);
124 ch0 = ptr [j];
125 match = TRUE;
126
127 for (i = 1; i < num; i++) {
128 ptr = GetSequence (array [i], skiptoken);
129 ch = ptr [j];
130 if (ch != ch0) {
131 match = FALSE;
132 }
133 }
134
135 if (match) {
136 matches++;
137 } else {
138 mismatches++;
139 }
140
141 for (i = 0; i < num; i++) {
142 ptr = GetSequence (array [i], skiptoken);
143 ch = ptr [j];
144 if (match) {
145 ptr [j] = TO_UPPER (ch);
146 } else {
147 ptr [j] = TO_LOWER (ch);
148 }
149 }
150 }
151
152 for (vnp = head, i = 0; vnp != NULL; vnp = vnp->next, i++) {
153 str = (CharPtr) vnp->data.ptrvalue;
154 fprintf (ofp, "%s\n", str);
155 }
156
157 fprintf (ofp, "\n%d matches, %d mismatches, length %d, %d percent matching\n",
158 (int) matches, (int) mismatches, (int) minlen,
159 (int) (matches * 100 / minlen));
160
161 MemFree (array);
162 ValNodeFreeData (head);
163 }
164
165 #define i_argInputFile 0
166 #define o_argOutputFile 1
167 #define s_argSkipToken 2
168
169 Args myargs [] = {
170 {"Input File", "stdin", NULL, NULL,
171 FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
172 {"Output File", "stdout", NULL, NULL,
173 FALSE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
174 {"Skip First Token", "F", NULL, NULL,
175 TRUE, 's', ARG_BOOLEAN, 0.0, 0, NULL},
176 };
177
178 Int2 Main (void)
179
180 {
181 FILE *ifp, *ofp;
182 CharPtr infile, outfile;
183 Boolean skiptoken;
184
185 /* standard setup */
186
187 ErrSetFatalLevel (SEV_MAX);
188 ErrClearOptFlags (EO_SHOW_USERSTR);
189 ErrPathReset ();
190
191 if (! GetArgs ("alint", sizeof (myargs) / sizeof (Args), myargs)) {
192 return 0;
193 }
194
195 infile = (CharPtr) myargs [i_argInputFile].strvalue;
196 outfile = (CharPtr) myargs [o_argOutputFile].strvalue;
197 skiptoken = (Boolean) myargs [s_argSkipToken].intvalue;
198
199 ifp = FileOpen (infile, "r");
200 if (ifp == NULL) {
201 Message (MSG_FATAL, "Unable to open input file");
202 return 1;
203 }
204
205 ofp = FileOpen (outfile, "w");
206 if (ofp == NULL) {
207 Message (MSG_FATAL, "Unable to open output file");
208 return 1;
209 }
210
211 ProcessAlignedFASTA (ifp, ofp, skiptoken);
212
213 FileClose (ofp);
214 FileClose (ifp);
215
216 return 0;
217 }
218
219 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |