NCBI C Toolkit Cross Reference

C/demo/blastalign.c


  1 /* This is a sample program to call BandAlign */
  2 
  3 #include <ncbi.h>
  4 #include <tofasta.h>
  5 #include <lsqfetch.h>
  6 #include <blast.h>
  7 #include <objalign.h>
  8 #include <salutil.h>
  9 #include <salsap.h>
 10 #include <salstruc.h>
 11 #include <bandalgn.h>
 12 #include <txalign.h>
 13 #include <uputil.h>
 14 #include <salign.h>
 15 
 16 /*the length of alignment*/
 17 #define LINE 60 
 18 
 19 /*#define TXFORMAT_OPTIONS (TXALIGN_END_NUM+TXALIGN_HTML+TXALIGN_COMPRESS+TXALIGN_MISMATCH) */
 20 #define TXFORMAT_OPTIONS (TXALIGN_COMPRESS) 
 21 #define ALN_FASTA     1
 22 #define ALN_SIM2ALN   2
 23 #define ALN_SIM3ALN   3
 24 #define ALN_CSIM      4
 25 #define ALN_SIM4      5
 26 #define ALN_BANDALIGN 6
 27 #define ALN_BLAST     7
 28 #define ALN_BLASTBAND 8
 29 #define ALN_SPLICING  9
 30 
 31 #define MYARGI    0
 32 #define MYARGOD   1 
 33 #define MYARGOS   2 
 34 #define MYARGOF   3 
 35 #define MYARGTRS  4
 36 #define MYARGPROT 5
 37 #define MYARGMET  6
 38 #define MYARGBA   7
 39 #define MYARGMA   8
 40 #define MYARGMS   9
 41 #define MYARGGON   10
 42 #define MYARGGEN   11
 43 #define MYARGGO  12
 44 #define MYARGGE  13
 45 
 46 #define MYARGMAT  14
 47 #define MYARGWS   15
 48 #define MYARGGXD  16
 49 #define MYARGGXDF 17
 50 #define MYARGFIL  18
 51 #define MYARGGAPPED 19
 52 #define MYARGDOTS 20
 53 #define MYARGMDIN 21
 54 
 55 #define NUMARGS 22
 56 Args myargs[NUMARGS] = {
 57         {"Input file", NULL, NULL, NULL, TRUE, 'i', ARG_STRING, 0.0,0,NULL},
 58         {"Output file for Text Alignment(NULL==stdout)", NULL, NULL, NULL, TRUE, 'o', ARG_STRING, 0.0,0,NULL},
 59         {"Output file for SeqAlign", "blastalign.sat", NULL, NULL, FALSE, 'O', ARG_STRING, 0.0,0,NULL},
 60         {"The output format 1=TEXT 2=SeqAlign 3=both 4=FASTA+gap", "3", "1", "4", TRUE, 'f', ARG_INT, 0.0, 0, NULL},
 61         {"Align translation", "F", NULL, NULL, TRUE, 'T', ARG_BOOLEAN, 0.0, 0, NULL},
 62         {"Input file contains Proteins", "F", NULL, NULL, TRUE, 'P', ARG_BOOLEAN, 0.0, 0, NULL},
 63         {"Alignment method 1=No 4=CSIM 5=SIM4 6=BandAlign 7=BLAST 10=BlastBandAlign 9=mRNA2genomic", "10", "1", "10", TRUE, 'A', ARG_INT, 0.0, 0, NULL},
 64         {"Banded alignment method", "2", "0", "5", TRUE, 't', ARG_INT, 0.0, 0, NULL},
 65 
 66         {"Match reward (nucleotide alignment)", "2", NULL, NULL, TRUE, 'r', ARG_INT, 0.0, 0, NULL},
 67         {"Mismatch penalty (nucleotide alignment)", "-3", NULL, NULL, TRUE, 'p', ARG_INT, 0.0, 0, NULL},
 68         {"Gap open penalty(nuc)", "10", NULL, NULL, TRUE, 'G', ARG_INT, 0.0, 0, NULL},
 69         {"Gap extension penalty(nuc)", "2", NULL, NULL, TRUE, 'E', ARG_INT, 0.0, 0, NULL},
 70         {"Gap open penalty(aa)", "10", NULL, NULL, TRUE, 'g', ARG_INT, 0.0, 0, NULL},
 71         {"Gap extension penalty(aa)", "2", NULL, NULL, TRUE, 'e', ARG_INT, 0.0, 0, NULL},
 72         {"Matrix", "BLOSUM62", NULL, NULL, TRUE, 'M', ARG_STRING, 0.0,0,NULL},
 73 
 74         {"Blast: Word size", "11", NULL, NULL, TRUE, 'w', ARG_INT, 0.0, 0, NULL},
 75         {"Blast: Gapx dropoff", "50", NULL, NULL, TRUE, 'X', ARG_INT, 0.0, 0, NULL},
 76         {"Blast: Gapx dropoff final", "50", NULL, NULL, TRUE, 'Z', ARG_INT, 0.0, 0, NULL},
 77         {"Blast: Filter", "F", NULL, NULL, TRUE, 'F', ARG_BOOLEAN, 0.0, 0, NULL},
 78         {"Use New Gapped Blast when using blast", "T", NULL, NULL, TRUE, 'N', ARG_BOOLEAN, 0.0, 0, NULL},
 79         {"Options for display", "4", NULL, NULL, TRUE, 'D', ARG_INT, 0.0, 0, NULL},
 80         {"Display multi-dimensional alignment", "T", NULL, NULL, TRUE, 'm', ARG_BOOLEAN, 0.0, 0, NULL},
 81 
 82 };
 83 
 84 static void seqalign_write (SeqAlignPtr salp, CharPtr name)
 85 {
 86         SeqAnnotPtr annot;
 87         AsnIoPtr aip;
 88 
 89         annot = SeqAnnotNew();
 90         if (annot==NULL)
 91            return;
 92         annot->type = 2;
 93         annot->data = salp;
 94 
 95         aip = AsnIoOpen(name, "w");
 96         if(aip !=NULL)
 97         {
 98                         SeqAnnotAsnWrite(annot, aip, NULL);
 99                         AsnIoClose(aip);
100         }
101 }
102 
103 static SeqAlignPtr write_output(SeqAlignPtr align, Uint1 output_type, CharPtr sat_name, CharPtr ali_name, Uint4 option)
104 {
105         SeqAnnotPtr annot;
106         AsnIoPtr aip;
107         FILE *fp;
108         Uint1 featureOrder[FEATDEF_ANY];
109         Uint1 groupOrder[FEATDEF_ANY];
110 
111         if(align == NULL)
112                 return NULL;
113         annot = SeqAnnotNew();
114         if (annot==NULL)
115            return NULL;
116         annot->type = 2;
117         annot->data = align;
118 
119         if((output_type&1) == 1 ) {
120           if(ali_name==NULL) {
121             fp=stdout;
122           } else {
123             fp = FileOpen(ali_name, "w");
124           }
125                 if(fp !=NULL)
126                 {
127                         fprintf(fp, "\n\n\nALIGNMENT\n\n");
128                         MemSet((Pointer)(featureOrder), 0, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
129                         MemSet((Pointer)(groupOrder), 0, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
130 /**
131                         featureOrder[FEATDEF_CDS] = 1;
132                         groupOrder[FEATDEF_CDS] = 1;
133 **/
134                         ShowTextAlignFromAnnot(annot, LINE, fp, (Uint1Ptr)(&featureOrder), (Uint1Ptr)(&groupOrder), option, NULL, NULL, NULL);
135                         if (fp!=stdout) FileClose(fp);
136                 }
137                 else
138                         Message(MSG_ERROR, "Fail to write permission for %s", ali_name);
139         }
140         if((output_type &2)== 2 )
141         {
142                 aip = AsnIoOpen(sat_name, "w");
143                 if(aip !=NULL)
144                 {
145                         SeqAnnotAsnWrite(annot, aip, NULL);
146                         AsnIoClose(aip);
147                 }
148                 else
149                         Message(MSG_ERROR, "Fail to write permission for %s", ali_name);
150         }
151         annot->data = NULL;
152         return align;
153 }
154 
155 static void FindNuc(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
156 {
157     BioseqPtr PNTR bp;
158     BioseqPtr local_bsp;
159  
160     bp = (BioseqPtr PNTR) data;
161     if (IS_Bioseq(sep))
162     {
163         local_bsp = (BioseqPtr) sep->data.ptrvalue;
164         if (ISA_na(local_bsp->mol))
165           *bp = local_bsp;
166     }
167 }
168 
169 Int2 Main(void)
170 {
171   FILE        *ifp, 
172               *efp;
173   Char        file_name[20];                   
174   CharPtr     out_name=NULL,ali_name=NULL;
175   CharPtr     path;
176   SeqAlignPtr align = NULL;
177   SeqEntryPtr sep = NULL,
178               septmp = NULL, 
179               presep = NULL;
180   BioseqSetPtr bssp;
181   ValNodePtr  sqlocs=NULL;
182   MashPtr     msh;
183   BLAST_ScoreBlkPtr sbp;
184   Int4        k;
185   Int2        nseq,
186               i, 
187               j = 0;
188   Int2        method;
189   Uint1       output_type,
190               mol_type;
191   Uint4       option = TXALIGN_MISMATCH; 
192   Boolean     rpt_err;
193   Boolean     use_entrez = TRUE;
194   Boolean     is_prot;
195 
196 
197   if(!GetArgs("blastalign", NUMARGS, myargs)) {
198      return 1;
199   }
200   if (! SeqEntryLoad())  return 1;
201 
202   BioseqFetchInit(TRUE);
203   if(use_entrez)
204      EntrezBioseqFetchEnable ("Blastalign", TRUE);
205 
206   /***** OPEN **********/
207   is_prot = (Boolean) myargs[MYARGPROT].intvalue;
208 
209 /***READING FASTA file **/
210   if(myargs[MYARGI].strvalue ==NULL) {
211     ifp=stdin;
212   } else {
213     StringMove(file_name, myargs[MYARGI].strvalue);
214     if((ifp = FileOpen(file_name, "r")) == NULL)
215       {
216         fprintf(stderr,"Fail to open file %s\n", file_name);
217         return 1;
218       }
219   }
220   while ((septmp = FastaToSeqEntry (ifp, (Boolean)(is_prot==FALSE))) != NULL)
221   {
222      if (j == 0) sep = septmp;
223      else  presep->next = septmp;
224      presep = septmp;
225      j++;
226   }
227   FileClose (ifp);
228   if (sep == NULL) {
229      fprintf(stderr,"No sequences read.\n");
230      return 1;
231   }
232   septmp = NULL;
233 
234   if ( (bssp=BioseqSetNew()) != NULL ) {
235      bssp->_class = 14;
236      bssp->seq_set = sep;
237      septmp = SeqEntryNew ();
238      if ( septmp  != NULL ) {
239         septmp->choice = 2;
240         septmp->data.ptrvalue = (Pointer) bssp;
241         sep = septmp;
242      }
243   }
244   if (septmp == NULL) {
245      fprintf(stderr,"No sequences read.\n");
246      return 1;
247   }
248 /* FASTA file ***/
249   /***** OUPUT **********/
250     
251   output_type = myargs[MYARGOF].intvalue;
252   if ((output_type&1)==1) {
253     if(myargs[MYARGOD].strvalue!=NULL) {
254       ali_name=Malloc(StringLen(myargs[MYARGOD].strvalue)+1);
255       sprintf(ali_name, "%s", myargs[MYARGOD].strvalue);
256     } else {
257       ali_name=NULL;
258     }
259   }
260 
261   if ((output_type&2)==2) {
262     if(myargs[MYARGOS].strvalue!=NULL) {
263       out_name=Malloc(StringLen(myargs[MYARGOS].strvalue)+1);
264       sprintf(out_name, "%s", myargs[MYARGOS].strvalue);
265     } else {
266       out_name=(CharPtr) Malloc(50);
267       sprintf(out_name, "blastalign.sat");
268     }
269   }
270   /***** PARAMETERS **********/
271 
272   method = (Int2) myargs[MYARGMET].intvalue;
273   if (method < 1 && method > ALN_BLAST) method = ALN_BLAST;
274 
275   msh = MashNew (is_prot);
276   msh->band_method = (Int2) myargs[MYARGBA].intvalue;
277   msh->reward = (Int2) myargs[MYARGMA].intvalue;
278   msh->penalty = (Int2) myargs[MYARGMS].intvalue;
279 
280   if(msh->penalty > 0) {
281      fprintf(stderr,"The mismatch weight should be negative.\n");
282      return 1;
283   }
284   if(is_prot ||(Boolean) myargs[MYARGTRS].intvalue ) {
285     msh->gap_open = (Int4) 11; 
286     msh->gap_extend = (Int4) 1;
287   } else {
288     msh->gap_open = (Int4) myargs[MYARGGON].intvalue;
289     msh->gap_extend = (Int4) myargs[MYARGGEN].intvalue;
290   }
291 
292   if(msh->gap_open < 0) {
293      fprintf(stderr,"The gap-open penalty should be positive.\n");
294      msh->gap_open = 0;
295   }
296   if(msh->gap_extend < 0) {
297      fprintf(stderr,"The gap-extend penalty should be positive.\n");
298      msh->gap_extend = 0;
299   }
300   if (!is_prot) {
301      msh->wordsize = (Int4) myargs[MYARGWS].intvalue;
302      msh->gap_x_dropoff = (Int4) myargs[MYARGGXD].intvalue;
303      msh->gap_x_dropoff_final = (Int4) myargs[MYARGGXDF].intvalue;
304   }
305   msh->is_prot = is_prot;
306   msh->multidim = (Boolean) myargs[MYARGMDIN].intvalue;
307   msh->splicing = TRUE;    /*** FALSE; **/
308   if (method == ALN_SPLICING) 
309   msh->splicing = TRUE;
310   msh->map_align = FALSE;   /****************TRUE;**************/
311 
312   if(is_prot) {
313     msh->translate_prot = (Boolean)FALSE;
314   } else {
315     msh->translate_prot = (Boolean)myargs[MYARGTRS].intvalue;
316   }
317   if((Boolean)myargs[MYARGFIL].intvalue) {
318     if(msh->translate_prot || msh->is_prot) {
319       msh->filter = FILTER_SEG;
320     } else {
321       msh->filter = FILTER_DUST;
322     }
323   } else {
324     msh->filter= FILTER_NONE;
325   }
326   if(myargs[MYARGMAT].strvalue!=NULL) {
327     msh->matrixname=Malloc(StringLen(myargs[MYARGMAT].strvalue)+1);
328     StringCpy(msh->matrixname,myargs[MYARGMAT].strvalue);
329   } else {
330     msh->matrixname=Malloc(StringLen(myargs[MYARGMAT].strvalue)+1);
331     StringCpy(msh->matrixname,"BLOSUM62");
332   }
333 
334   msh->use_gapped_blast = (Boolean) myargs[MYARGGAPPED].intvalue;
335 
336   /***** RUN **********/
337   if (is_prot)
338      mol_type = Seq_mol_aa;
339   else
340      mol_type = Seq_mol_na; 
341 /**/
342   sqlocs = SeqEntryToSeqLoc (sep,  &nseq, mol_type);
343 /***/
344 /**
345   sqlocs = read_gifile ("uid");
346   sqlocs = gilst2seqloclst (sqlocs);
347 **/
348   align = SeqLocListToSeqAlign (sqlocs, (Int2)method, (Pointer)msh);
349 
350   if (align!=NULL) {
351     if (output_type <4) {
352        option = myargs[MYARGDOTS].intvalue;
353        write_output (align, output_type, out_name, ali_name, option);
354        write_output (msh->transalp, output_type, out_name, ali_name, option);
355     }
356     else if (output_type == 4) {
357        showfastagap_fromalign (align, (Int4)LINE, stdout);
358        showfastagap_fromalign (msh->transalp, (Int4)LINE, stdout);
359     }
360     else fprintf (stderr, "Wrong output format\n");
361   }
362   else {
363      fprintf (stderr, "No alignment\n");
364   }
365   align=SeqAlignFree(align);
366   BioseqFetchDisable();
367   if(use_entrez) EntrezBioseqFetchDisable();
368   if (myargs[MYARGMAT].strvalue!=NULL) Free(msh->matrixname);
369   if(ali_name!=NULL) Free(ali_name);
370   if(out_name!=NULL) Free(out_name);
371   return 0;
372 }
373 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.