|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/demo/blastalign.c |
source navigation diff markup identifier search freetext search file search |
1 /* This is a sample program to call BandAlign */
2
3 #include <ncbi.h>
4 #include <tofasta.h>
5 #include <lsqfetch.h>
6 #include <blast.h>
7 #include <objalign.h>
8 #include <salutil.h>
9 #include <salsap.h>
10 #include <salstruc.h>
11 #include <bandalgn.h>
12 #include <txalign.h>
13 #include <uputil.h>
14 #include <salign.h>
15
16 /*the length of alignment*/
17 #define LINE 60
18
19 /*#define TXFORMAT_OPTIONS (TXALIGN_END_NUM+TXALIGN_HTML+TXALIGN_COMPRESS+TXALIGN_MISMATCH) */
20 #define TXFORMAT_OPTIONS (TXALIGN_COMPRESS)
21 #define ALN_FASTA 1
22 #define ALN_SIM2ALN 2
23 #define ALN_SIM3ALN 3
24 #define ALN_CSIM 4
25 #define ALN_SIM4 5
26 #define ALN_BANDALIGN 6
27 #define ALN_BLAST 7
28 #define ALN_BLASTBAND 8
29 #define ALN_SPLICING 9
30
31 #define MYARGI 0
32 #define MYARGOD 1
33 #define MYARGOS 2
34 #define MYARGOF 3
35 #define MYARGTRS 4
36 #define MYARGPROT 5
37 #define MYARGMET 6
38 #define MYARGBA 7
39 #define MYARGMA 8
40 #define MYARGMS 9
41 #define MYARGGON 10
42 #define MYARGGEN 11
43 #define MYARGGO 12
44 #define MYARGGE 13
45
46 #define MYARGMAT 14
47 #define MYARGWS 15
48 #define MYARGGXD 16
49 #define MYARGGXDF 17
50 #define MYARGFIL 18
51 #define MYARGGAPPED 19
52 #define MYARGDOTS 20
53 #define MYARGMDIN 21
54
55 #define NUMARGS 22
56 Args myargs[NUMARGS] = {
57 {"Input file", NULL, NULL, NULL, TRUE, 'i', ARG_STRING, 0.0,0,NULL},
58 {"Output file for Text Alignment(NULL==stdout)", NULL, NULL, NULL, TRUE, 'o', ARG_STRING, 0.0,0,NULL},
59 {"Output file for SeqAlign", "blastalign.sat", NULL, NULL, FALSE, 'O', ARG_STRING, 0.0,0,NULL},
60 {"The output format 1=TEXT 2=SeqAlign 3=both 4=FASTA+gap", "3", "1", "4", TRUE, 'f', ARG_INT, 0.0, 0, NULL},
61 {"Align translation", "F", NULL, NULL, TRUE, 'T', ARG_BOOLEAN, 0.0, 0, NULL},
62 {"Input file contains Proteins", "F", NULL, NULL, TRUE, 'P', ARG_BOOLEAN, 0.0, 0, NULL},
63 {"Alignment method 1=No 4=CSIM 5=SIM4 6=BandAlign 7=BLAST 10=BlastBandAlign 9=mRNA2genomic", "10", "1", "10", TRUE, 'A', ARG_INT, 0.0, 0, NULL},
64 {"Banded alignment method", "2", "0", "5", TRUE, 't', ARG_INT, 0.0, 0, NULL},
65
66 {"Match reward (nucleotide alignment)", "2", NULL, NULL, TRUE, 'r', ARG_INT, 0.0, 0, NULL},
67 {"Mismatch penalty (nucleotide alignment)", "-3", NULL, NULL, TRUE, 'p', ARG_INT, 0.0, 0, NULL},
68 {"Gap open penalty(nuc)", "10", NULL, NULL, TRUE, 'G', ARG_INT, 0.0, 0, NULL},
69 {"Gap extension penalty(nuc)", "2", NULL, NULL, TRUE, 'E', ARG_INT, 0.0, 0, NULL},
70 {"Gap open penalty(aa)", "10", NULL, NULL, TRUE, 'g', ARG_INT, 0.0, 0, NULL},
71 {"Gap extension penalty(aa)", "2", NULL, NULL, TRUE, 'e', ARG_INT, 0.0, 0, NULL},
72 {"Matrix", "BLOSUM62", NULL, NULL, TRUE, 'M', ARG_STRING, 0.0,0,NULL},
73
74 {"Blast: Word size", "11", NULL, NULL, TRUE, 'w', ARG_INT, 0.0, 0, NULL},
75 {"Blast: Gapx dropoff", "50", NULL, NULL, TRUE, 'X', ARG_INT, 0.0, 0, NULL},
76 {"Blast: Gapx dropoff final", "50", NULL, NULL, TRUE, 'Z', ARG_INT, 0.0, 0, NULL},
77 {"Blast: Filter", "F", NULL, NULL, TRUE, 'F', ARG_BOOLEAN, 0.0, 0, NULL},
78 {"Use New Gapped Blast when using blast", "T", NULL, NULL, TRUE, 'N', ARG_BOOLEAN, 0.0, 0, NULL},
79 {"Options for display", "4", NULL, NULL, TRUE, 'D', ARG_INT, 0.0, 0, NULL},
80 {"Display multi-dimensional alignment", "T", NULL, NULL, TRUE, 'm', ARG_BOOLEAN, 0.0, 0, NULL},
81
82 };
83
84 static void seqalign_write (SeqAlignPtr salp, CharPtr name)
85 {
86 SeqAnnotPtr annot;
87 AsnIoPtr aip;
88
89 annot = SeqAnnotNew();
90 if (annot==NULL)
91 return;
92 annot->type = 2;
93 annot->data = salp;
94
95 aip = AsnIoOpen(name, "w");
96 if(aip !=NULL)
97 {
98 SeqAnnotAsnWrite(annot, aip, NULL);
99 AsnIoClose(aip);
100 }
101 }
102
103 static SeqAlignPtr write_output(SeqAlignPtr align, Uint1 output_type, CharPtr sat_name, CharPtr ali_name, Uint4 option)
104 {
105 SeqAnnotPtr annot;
106 AsnIoPtr aip;
107 FILE *fp;
108 Uint1 featureOrder[FEATDEF_ANY];
109 Uint1 groupOrder[FEATDEF_ANY];
110
111 if(align == NULL)
112 return NULL;
113 annot = SeqAnnotNew();
114 if (annot==NULL)
115 return NULL;
116 annot->type = 2;
117 annot->data = align;
118
119 if((output_type&1) == 1 ) {
120 if(ali_name==NULL) {
121 fp=stdout;
122 } else {
123 fp = FileOpen(ali_name, "w");
124 }
125 if(fp !=NULL)
126 {
127 fprintf(fp, "\n\n\nALIGNMENT\n\n");
128 MemSet((Pointer)(featureOrder), 0, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
129 MemSet((Pointer)(groupOrder), 0, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
130 /**
131 featureOrder[FEATDEF_CDS] = 1;
132 groupOrder[FEATDEF_CDS] = 1;
133 **/
134 ShowTextAlignFromAnnot(annot, LINE, fp, (Uint1Ptr)(&featureOrder), (Uint1Ptr)(&groupOrder), option, NULL, NULL, NULL);
135 if (fp!=stdout) FileClose(fp);
136 }
137 else
138 Message(MSG_ERROR, "Fail to write permission for %s", ali_name);
139 }
140 if((output_type &2)== 2 )
141 {
142 aip = AsnIoOpen(sat_name, "w");
143 if(aip !=NULL)
144 {
145 SeqAnnotAsnWrite(annot, aip, NULL);
146 AsnIoClose(aip);
147 }
148 else
149 Message(MSG_ERROR, "Fail to write permission for %s", ali_name);
150 }
151 annot->data = NULL;
152 return align;
153 }
154
155 static void FindNuc(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
156 {
157 BioseqPtr PNTR bp;
158 BioseqPtr local_bsp;
159
160 bp = (BioseqPtr PNTR) data;
161 if (IS_Bioseq(sep))
162 {
163 local_bsp = (BioseqPtr) sep->data.ptrvalue;
164 if (ISA_na(local_bsp->mol))
165 *bp = local_bsp;
166 }
167 }
168
169 Int2 Main(void)
170 {
171 FILE *ifp,
172 *efp;
173 Char file_name[20];
174 CharPtr out_name=NULL,ali_name=NULL;
175 CharPtr path;
176 SeqAlignPtr align = NULL;
177 SeqEntryPtr sep = NULL,
178 septmp = NULL,
179 presep = NULL;
180 BioseqSetPtr bssp;
181 ValNodePtr sqlocs=NULL;
182 MashPtr msh;
183 BLAST_ScoreBlkPtr sbp;
184 Int4 k;
185 Int2 nseq,
186 i,
187 j = 0;
188 Int2 method;
189 Uint1 output_type,
190 mol_type;
191 Uint4 option = TXALIGN_MISMATCH;
192 Boolean rpt_err;
193 Boolean use_entrez = TRUE;
194 Boolean is_prot;
195
196
197 if(!GetArgs("blastalign", NUMARGS, myargs)) {
198 return 1;
199 }
200 if (! SeqEntryLoad()) return 1;
201
202 BioseqFetchInit(TRUE);
203 if(use_entrez)
204 EntrezBioseqFetchEnable ("Blastalign", TRUE);
205
206 /***** OPEN **********/
207 is_prot = (Boolean) myargs[MYARGPROT].intvalue;
208
209 /***READING FASTA file **/
210 if(myargs[MYARGI].strvalue ==NULL) {
211 ifp=stdin;
212 } else {
213 StringMove(file_name, myargs[MYARGI].strvalue);
214 if((ifp = FileOpen(file_name, "r")) == NULL)
215 {
216 fprintf(stderr,"Fail to open file %s\n", file_name);
217 return 1;
218 }
219 }
220 while ((septmp = FastaToSeqEntry (ifp, (Boolean)(is_prot==FALSE))) != NULL)
221 {
222 if (j == 0) sep = septmp;
223 else presep->next = septmp;
224 presep = septmp;
225 j++;
226 }
227 FileClose (ifp);
228 if (sep == NULL) {
229 fprintf(stderr,"No sequences read.\n");
230 return 1;
231 }
232 septmp = NULL;
233
234 if ( (bssp=BioseqSetNew()) != NULL ) {
235 bssp->_class = 14;
236 bssp->seq_set = sep;
237 septmp = SeqEntryNew ();
238 if ( septmp != NULL ) {
239 septmp->choice = 2;
240 septmp->data.ptrvalue = (Pointer) bssp;
241 sep = septmp;
242 }
243 }
244 if (septmp == NULL) {
245 fprintf(stderr,"No sequences read.\n");
246 return 1;
247 }
248 /* FASTA file ***/
249 /***** OUPUT **********/
250
251 output_type = myargs[MYARGOF].intvalue;
252 if ((output_type&1)==1) {
253 if(myargs[MYARGOD].strvalue!=NULL) {
254 ali_name=Malloc(StringLen(myargs[MYARGOD].strvalue)+1);
255 sprintf(ali_name, "%s", myargs[MYARGOD].strvalue);
256 } else {
257 ali_name=NULL;
258 }
259 }
260
261 if ((output_type&2)==2) {
262 if(myargs[MYARGOS].strvalue!=NULL) {
263 out_name=Malloc(StringLen(myargs[MYARGOS].strvalue)+1);
264 sprintf(out_name, "%s", myargs[MYARGOS].strvalue);
265 } else {
266 out_name=(CharPtr) Malloc(50);
267 sprintf(out_name, "blastalign.sat");
268 }
269 }
270 /***** PARAMETERS **********/
271
272 method = (Int2) myargs[MYARGMET].intvalue;
273 if (method < 1 && method > ALN_BLAST) method = ALN_BLAST;
274
275 msh = MashNew (is_prot);
276 msh->band_method = (Int2) myargs[MYARGBA].intvalue;
277 msh->reward = (Int2) myargs[MYARGMA].intvalue;
278 msh->penalty = (Int2) myargs[MYARGMS].intvalue;
279
280 if(msh->penalty > 0) {
281 fprintf(stderr,"The mismatch weight should be negative.\n");
282 return 1;
283 }
284 if(is_prot ||(Boolean) myargs[MYARGTRS].intvalue ) {
285 msh->gap_open = (Int4) 11;
286 msh->gap_extend = (Int4) 1;
287 } else {
288 msh->gap_open = (Int4) myargs[MYARGGON].intvalue;
289 msh->gap_extend = (Int4) myargs[MYARGGEN].intvalue;
290 }
291
292 if(msh->gap_open < 0) {
293 fprintf(stderr,"The gap-open penalty should be positive.\n");
294 msh->gap_open = 0;
295 }
296 if(msh->gap_extend < 0) {
297 fprintf(stderr,"The gap-extend penalty should be positive.\n");
298 msh->gap_extend = 0;
299 }
300 if (!is_prot) {
301 msh->wordsize = (Int4) myargs[MYARGWS].intvalue;
302 msh->gap_x_dropoff = (Int4) myargs[MYARGGXD].intvalue;
303 msh->gap_x_dropoff_final = (Int4) myargs[MYARGGXDF].intvalue;
304 }
305 msh->is_prot = is_prot;
306 msh->multidim = (Boolean) myargs[MYARGMDIN].intvalue;
307 msh->splicing = TRUE; /*** FALSE; **/
308 if (method == ALN_SPLICING)
309 msh->splicing = TRUE;
310 msh->map_align = FALSE; /****************TRUE;**************/
311
312 if(is_prot) {
313 msh->translate_prot = (Boolean)FALSE;
314 } else {
315 msh->translate_prot = (Boolean)myargs[MYARGTRS].intvalue;
316 }
317 if((Boolean)myargs[MYARGFIL].intvalue) {
318 if(msh->translate_prot || msh->is_prot) {
319 msh->filter = FILTER_SEG;
320 } else {
321 msh->filter = FILTER_DUST;
322 }
323 } else {
324 msh->filter= FILTER_NONE;
325 }
326 if(myargs[MYARGMAT].strvalue!=NULL) {
327 msh->matrixname=Malloc(StringLen(myargs[MYARGMAT].strvalue)+1);
328 StringCpy(msh->matrixname,myargs[MYARGMAT].strvalue);
329 } else {
330 msh->matrixname=Malloc(StringLen(myargs[MYARGMAT].strvalue)+1);
331 StringCpy(msh->matrixname,"BLOSUM62");
332 }
333
334 msh->use_gapped_blast = (Boolean) myargs[MYARGGAPPED].intvalue;
335
336 /***** RUN **********/
337 if (is_prot)
338 mol_type = Seq_mol_aa;
339 else
340 mol_type = Seq_mol_na;
341 /**/
342 sqlocs = SeqEntryToSeqLoc (sep, &nseq, mol_type);
343 /***/
344 /**
345 sqlocs = read_gifile ("uid");
346 sqlocs = gilst2seqloclst (sqlocs);
347 **/
348 align = SeqLocListToSeqAlign (sqlocs, (Int2)method, (Pointer)msh);
349
350 if (align!=NULL) {
351 if (output_type <4) {
352 option = myargs[MYARGDOTS].intvalue;
353 write_output (align, output_type, out_name, ali_name, option);
354 write_output (msh->transalp, output_type, out_name, ali_name, option);
355 }
356 else if (output_type == 4) {
357 showfastagap_fromalign (align, (Int4)LINE, stdout);
358 showfastagap_fromalign (msh->transalp, (Int4)LINE, stdout);
359 }
360 else fprintf (stderr, "Wrong output format\n");
361 }
362 else {
363 fprintf (stderr, "No alignment\n");
364 }
365 align=SeqAlignFree(align);
366 BioseqFetchDisable();
367 if(use_entrez) EntrezBioseqFetchDisable();
368 if (myargs[MYARGMAT].strvalue!=NULL) Free(msh->matrixname);
369 if(ali_name!=NULL) Free(ali_name);
370 if(out_name!=NULL) Free(out_name);
371 return 0;
372 }
373 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |