NCBI C++ ToolKit
seqfetch.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seqfetch.cpp 55647 2012-09-05 14:54:38Z ludwigf $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aaron Ucko, Mati Shomrat, NCBI
27 *
28 * File Description:
29 * fasta-file generator application
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbiapp.hpp>
35 #include <corelib/ncbienv.hpp>
36 #include <corelib/ncbiargs.hpp>
37 #include <corelib/ncbireg.hpp>
38 
40 #include <serial/serial.hpp>
41 #include <serial/objistr.hpp>
42 
43 #include <objects/general/Date.hpp>
47 
55 
60 
62 #include <objmgr/graph_ci.hpp>
63 #include <objmgr/scope.hpp>
64 #include <objmgr/util/sequence.hpp>
65 
67 
70 
73 
74 // ----------------------------------------------------------------------------
75 enum EOutFormat {
76 // ----------------------------------------------------------------------------
81  OF_FASTA = 5,
85 };
86 
87 // ----------------------------------------------------------------------------
89 // ----------------------------------------------------------------------------
95 };
96 
97 // ----------------------------------------------------------------------------
98 enum EMaxPlex {
99 // ----------------------------------------------------------------------------
105 };
106 
107 // ----------------------------------------------------------------------------
108 struct CTextColumn
109 // ----------------------------------------------------------------------------
110 {
111  size_t m_width;
112  vector<string> m_entries;
113 
114  CTextColumn(): m_width(0) {};
115 
116  CTextColumn& AddStr(const string& str) {
117  m_entries.push_back(str);
118  if (str.size() < m_width) m_width = str.size();
119  return *this;
120  }
121 
122  string GetStr(size_t index) const {
123  const string& str = m_entries[index];
124  return str + string(m_width - str.size(), ' ');
125  }
126 
127  size_t Width() const { return m_width; };
128  size_t Height() const { return m_entries.size(); };
129 };
130 
131 // ----------------------------------------------------------------------------
133 // ----------------------------------------------------------------------------
134 {
135 public:
136  void Init();
137  int Run();
138 
139 private:
140  bool xWriteSequence(
141  const string&);
143  const CID1server_back&);
144 
145  bool xLookupFeatPlex(
146  const string&,
148  bool xLookupState(
149  const string&,
151  bool xLookupIds(
152  const string&,
154  bool xLookupHistory(
155  const string&,
157  bool xLookupRevisions(
158  const string&,
160 
161  int xProcessFastaId();
162  int xProcessGi();
163  int xProcessGiFile();
164  int xProcessEntrezQuery();
165  int xProcessFlatId();
166 
167  bool xPoliceArguments();
168 
169  bool xNextId(
170  string&);
171  bool xFlatIdToGi(
172  const string&,
173  int&);
174 
177  string xEntrezQuery();
178  string xDatabase();
181 
184  list<string> m_Ids;
185  list<string>::const_iterator m_Idit;
186 };
187 
188 // ----------------------------------------------------------------------------
190 // ----------------------------------------------------------------------------
191 {
192  auto_ptr<CArgDescriptions> pArgDesc(new CArgDescriptions);
193  pArgDesc->SetUsageContext(
194  GetArguments().GetProgramBasename(),
195  "Fetch sequence by ID",
196  false);
197 
198  {{ //output
199  pArgDesc->AddDefaultKey(
200  "o",
201  "OutputFile",
202  "Output file name",
204  "-",
206  }}
207 
208  {{ //type
209  pArgDesc->AddDefaultKey(
210  "t",
211  "Format",
212  "Output Format",
214  "1",
216  pArgDesc->SetConstraint(
217  "t",
218  new CArgAllow_Integers(1, 8));
219  }}
220 
221  {{ //database
222  pArgDesc->AddOptionalKey(
223  "d",
224  "Database",
225  "Database to use",
228  pArgDesc->SetConstraint(
229  "d",
230  &(*new CArgAllow_Strings, "n", "p"));
231  }}
232 
233  {{ //entity
234  pArgDesc->AddOptionalKey(
235  "e",
236  "Entity",
237  "Entity (retrieval) number to dump",
240  pArgDesc->SetConstraint(
241  "e",
242  new CArgAllow_Integers(0, kMax_Int));
243  }}
244 
245  {{ //lookup type
246  pArgDesc->AddDefaultKey(
247  "i",
248  "Lookup",
249  "Lookup type",
251  0,
253  pArgDesc->SetConstraint(
254  "i",
255  new CArgAllow_Integers(0, 4));
256  }}
257 
258  {{ //GI
259  pArgDesc->AddOptionalKey(
260  "g",
261  "GI",
262  "GI to fetch",
265  pArgDesc->SetConstraint(
266  "g",
267  new CArgAllow_Integers(0, kMax_Int));
268  }}
269 
270  {{ //ids from feedfile
271  pArgDesc->AddOptionalKey(
272  "G",
273  "FeedFile",
274  "File containing IDs to fetch",
277  }}
278 
279  {{ //compexity
280  pArgDesc->AddDefaultKey(
281  "c",
282  "MaxComplexity",
283  "Maximum complexity",
285  0,
288  pArgDesc->SetConstraint(
289  "c",
290  new CArgAllow_Integers(0, 4));
291  }}
292 
293  {{ //flattened
294  pArgDesc->AddOptionalKey(
295  "f",
296  "Flattened",
297  "Flattened SeqID, format may be "
298  "'type([name][,[accession][,[release][,version]]])' "
299  "[e.g., '5(HUMHBB)'], or "
300  "type=accession[.version], "
301  "or type:number",
304  }}
305 
306  {{ //fasta ID
307  pArgDesc->AddOptionalKey(
308  "s",
309  "Seqid",
310  "FASTA style ID to fetch",
313  }}
314 
315  {{ //log file
316  pArgDesc->AddOptionalKey(
317  "l",
318  "Log",
319  "Log file",
321  0);
322  }}
323 
324  {{ //by entrez query
325  pArgDesc->AddOptionalKey(
326  "q",
327  "QueryString",
328  "Generate GIs by Entrez query from command line",
330  }}
331 
332  {{ //by entrez query
333  pArgDesc->AddOptionalKey(
334  "Q",
335  "QueryFile",
336  "Generate GIs by Entrez query from file",
338  }}
339 
340  {{ //gi only list
341  pArgDesc->AddFlag(
342  "n",
343  "GiList");
344  }}
345 
346  {{ //extra features
347  pArgDesc->AddOptionalKey(
348  "F",
349  "ExtraFeats",
350  "Add features, delimited by ',': "
351  "Allowed Values are "
352  "SNP, SNP_graph, CDD, MGC, HPRD, STS, tRNA, microRNA",
354  }}
355  SetupArgDescriptions(pArgDesc.release());
356 }
357 
358 // ----------------------------------------------------------------------------
360 // ----------------------------------------------------------------------------
361 {
362  const CArgs& args = GetArgs();
363  if (args["l"]) {
364  SetDiagStream(&args["l"].AsOutputFile());
365  }
366 
372 
373  if (!xPoliceArguments()) {
374  return 1;
375  }
376 
377  if (args["s"]) {
378  return xProcessFastaId();
379  }
380  if (args["g"]) {
381  return xProcessGi();
382  }
383  if (args["G"]) {
384  return xProcessGiFile();
385  }
386  if (args["f"]) {
387  return xProcessFlatId();
388  }
389  if (args["q"]) {
390  return xProcessEntrezQuery();
391  }
392  if (args["Q"]) {
393  return xProcessEntrezQuery();
394  }
395 
396  return 0;
397 }
398 
399 // ----------------------------------------------------------------------------
401 // ----------------------------------------------------------------------------
402 {
403  string idstr = GetArgs()["s"].AsString();
404  if (!xWriteSequence(idstr)) {
405  return 1;
406  }
407  return 0;
408 }
409 
410 // ----------------------------------------------------------------------------
412 // ----------------------------------------------------------------------------
413 {
414  string idstr = GetArgs()["g"].AsString();
415  if (!xWriteSequence(idstr)) {
416  return 1;
417  }
418  return 0;
419 }
420 
421 // ----------------------------------------------------------------------------
423 // ----------------------------------------------------------------------------
424 {
425  CNcbiIstream& feedfile = GetArgs()["G"].AsInputFile();
426  while (feedfile && !feedfile.eof()) {
427  string idstr;
428  feedfile >> idstr;
429  if (idstr.empty()) {
430  continue;
431  }
432  size_t sep = idstr.find_first_of(":=(");
433  if (sep == string::npos) {
434  if (!xWriteSequence(idstr)) {
435  return 1;
436  }
437  continue;
438  }
439  else {
440  int gi(0);
441  if (!xFlatIdToGi(idstr, gi)) {
442  return 1;
443  }
444  if (!xWriteSequence(NStr::IntToString(gi))) {
445  return 1;
446  }
447  continue;
448  }
449  }
450  return 0;
451 }
452 
453 // ----------------------------------------------------------------------------
455 // ----------------------------------------------------------------------------
456 {
457  CEntrez2Client e2Client;
458  e2Client.SetDefaultRequest().SetTool("seqfetch");
459 
462  pE2Element->SetStr(xEntrezQuery());
463 
464  CEntrez2_eval_boolean e2Eval;
465  e2Eval.SetReturn_UIDs(true);
466  CEntrez2_boolean_exp& e2Query = e2Eval.SetQuery();
467  e2Query.SetExp().push_back(pE2Element);
468  e2Query.SetDb() = CEntrez2_db_id(xDatabase());
469  CRef<CEntrez2_boolean_reply> pE2Reply = e2Client.AskEval_boolean(e2Eval);
470 
471  if (!pE2Reply->GetCount()) {
472  cerr << "Query error: Entrez query return no results" << endl;
473  return 1;
474  }
475  for (CEntrez2_id_list::TConstUidIterator cit = pE2Reply->GetUids()
477  !cit.AtEnd();
478  ++cit) {
479  if (!xWriteSequence(NStr::IntToString(*cit))) {
480  return 1;
481  }
482  }
483  return 0;
484 }
485 
486 // ----------------------------------------------------------------------------
488 // ----------------------------------------------------------------------------
489 {
490  CID1Client id1Client;
491  string flatid(GetArgs()["f"].AsString());
492  int gi(0);
493  if (!xFlatIdToGi(flatid, gi)) {
494  return 1;
495  }
497  return 0;
498 }
499 
500 // ----------------------------------------------------------------------------
502  const string& idstr,
503  CRef<CID1server_back> pLookup)
504 // ----------------------------------------------------------------------------
505 {
506  CID1Client id1Client;
507  EEntry_complexities maxplex = xMaxPlex();
508  if (maxplex == eEntry_complexities_entry) {
509  return false;
510  }
512  pMaxPlex->SetMaxplex(maxplex);
513  pMaxPlex->SetGi(id1Client.AskGetgi(CSeq_id(idstr)));
514  id1Client.AskGetsefromgi(*pMaxPlex, pLookup);
515  return true;
516 }
517 
518 // ----------------------------------------------------------------------------
520  const string& idstr,
521  CRef<CID1server_back> pLookup)
522 // ----------------------------------------------------------------------------
523 {
524  CID1Client id1Client;
525  int gi = id1Client.AskGetgi(CSeq_id(idstr));
526  int state = id1Client.AskGetgistate(gi, pLookup);
527  if (xOutFormat() == OF_FASTA) {
528  xOutStream() << "gi=" << gi << ", states: ";
529  switch(state & 0xFF) {
530  default:
531  xOutStream() << "UNKNOWN";
532  break;
533  case 0x0:
534  xOutStream() << "NONEXISTENT";
535  break;
536  case 0x10:
537  xOutStream() << "DELETED";
538  break;
539  case 0x20:
540  xOutStream() << "REPLACED";
541  break;
542  case 0x40:
543  xOutStream() << "LIVE";
544  break;
545  }
546  if (state & 0x100) {
547  xOutStream() << "|SUPPRESSED";
548  }
549  if (state & 0x200) {
550  xOutStream() << "|WITHDRAWN";
551  }
552  if (state & 0x400) {
553  xOutStream() << "|CONFIDENTIAL";
554  }
555  xOutStream() << endl;
556  }
557  return true;
558 }
559 
560 // ----------------------------------------------------------------------------
562  const string& idstr,
563  CRef<CID1server_back> pLookup)
564 // ----------------------------------------------------------------------------
565 {
566  CID1Client id1Client;
567  int gi = id1Client.AskGetgi(CSeq_id(idstr));
568  id1Client.AskGetseqidsfromgi(gi, pLookup);
569  return true;
570 }
571 
572 // ----------------------------------------------------------------------------
574  const string& idstr,
575  CRef<CID1server_back> pLookup)
576 // ----------------------------------------------------------------------------
577 {
578  CID1Client id1Client;
579  int gi = id1Client.AskGetgi(CSeq_id(idstr));
580  id1Client.AskGetgihist(gi, pLookup);
581  return true;
582 }
583 
584 // ----------------------------------------------------------------------------
586  const string& idstr,
587  CRef<CID1server_back> pLookup)
588 // ----------------------------------------------------------------------------
589 {
590  CID1Client id1Client;
591  int gi = id1Client.AskGetgi(CSeq_id(idstr));
592  id1Client.AskGetgirev(gi, pLookup);
593  return true;
594 }
595 
596 // ----------------------------------------------------------------------------
598 // ----------------------------------------------------------------------------
599 {
600  const CArgs& args = GetArgs();
601 
602  int idspecs = 0;
603  if (args["s"]) ++idspecs;
604  if (args["g"]) ++idspecs;
605  if (args["G"]) ++idspecs;
606  if (args["f"]) ++idspecs;
607  if (args["q"]) ++idspecs;
608  if (args["Q"]) ++idspecs;
609  if (1 != idspecs) {
610  cerr << "Command line error: Need exactly one out of [s|g|G|f|q|Q]" << endl;
611  return false;
612  }
613  if (args["q"] && !args["d"]) {
614  cerr << "Command line error: Option \"q\" needs option \"d\" specified" << endl;
615  return false;
616  }
617  if (args["Q"] && !args["d"]) {
618  cerr << "Command line error: Option \"Q\" needs option \"d\" specified" << endl;
619  return false;
620  }
621  return true;
622 }
623 
624 // ----------------------------------------------------------------------------
626  const string& idstr)
627 // ----------------------------------------------------------------------------
628 {
629  const CArgs& args = GetArgs();
631 
632  //factor by lookup type
633  if (LT_SEQ_ENTRY == xLookupType()) {
635  xLookupFeatPlex(idstr, pLookup);
636  pReply = pLookup;
637  }
638 
639  if (LT_SEQ_STATE == xLookupType()) {
641  xLookupState(idstr, pLookup);
642  if (xOutFormat() != OF_FASTA) {
643  pReply = pLookup;
644  }
645  }
646 
647  if (LT_SEQ_IDS == xLookupType()) {
649  xLookupIds(idstr, pLookup);
650  if (xOutFormat() != OF_FASTA) {
651  pReply = pLookup;
652  }
653  }
654 
655  if (LT_SEQ_HISTORY == xLookupType()) {
657  xLookupHistory(idstr, pLookup);
658  if (xOutFormat() == OF_FASTA) {
659  WriteHistoryTable(*pLookup);
660  }
661  else {
662  pReply = pLookup;
663  }
664  }
665 
666  if (LT_SEQ_REVISIONS == xLookupType()) {
668  xLookupRevisions(idstr, pLookup);
669  if (xOutFormat() == OF_FASTA) {
670  WriteHistoryTable(*pLookup);
671  }
672  else {
673  pReply = pLookup;
674  }
675  }
676 
677  //based on what's in the reply object, setup a bioseq handle
678  CBioseq_Handle bsh;
679  if (pReply) {
680  const CSeq_entry* pSe = dynamic_cast<const CSeq_entry*>(
681  pReply.GetPointer());
683  if(pSe && (format == OF_FASTA || format == OF_GENBANK ||
684  format == OF_GENPEPT || format == OF_QUALITY_SCORES)) {
685  //need handle for formatting
688  CSeq_id id(idstr);
689  bsh = m_pScope->GetBioseqHandleFromTSE(id, seh);
690  if (!bsh) {
691  cerr << "Query error: Bioseq not found: " << id.AsFastaString()
692  << endl;
693  return false;
694  }
695  }
696  }
697  else {//use object manager
699  CSeq_id id(idstr);
701  if (!bsh) {
702  cerr << "Query error: Bioseq not found: " << id.AsFastaString()
703  << endl;
704  }
705  pReply = bsh.GetTopLevelEntry().GetCompleteSeq_entry();
706  }
707 
708 
709  switch(xOutFormat()) {
710  default: {
711  break;
712  }
713 
715  /*TODO*/
716  break;
717  }
718 
719  case OF_ENTREZ_DOCSUMS: {
720  CEntrez2Client e2Client;
721  e2Client.SetDefaultRequest().SetTool("seqfetch");
722 
723  CEntrez2_id_list idlist;
724  idlist.SetDb() = CEntrez2_db_id(xDatabase());
725  idlist.SetNum(1);
726  idlist.SetUids().resize(idlist.sm_UidSize);
727 
729  *it = NStr::StringToInt(idstr);
730  CRef<CEntrez2_docsum_list> pDocsums = e2Client.AskGet_docsum(idlist);
731  if (!pDocsums->GetCount()) {
732  cerr << "Query error: Entrez query return no results" << endl;
733  return false;
734  }
735 
736  string caption, title;
738  it; ++it) {
739  if (it->GetField_name() == "Caption") {
740  caption = it->GetField_value();
741  }
742  else if (it->GetField_name() == "Title") {
743  title = it->GetField_value();
744  }
745  }
746  xOutStream() << '>';
747  if ( !caption.empty() ) {
748  xOutStream() << caption;
749  }
750  xOutStream() << ' ';
751  if ( !title.empty() ) {
752  xOutStream() << title;
753  }
754  break;
755  }
756 
757  case OF_QUALITY_SCORES: {
758  string best_id = FindBestChoice(
760  ->GetSeqIdString(true);
761  for (CGraph_CI it(bsh); it; ++it) {
762  string title = it->GetTitle();
763  if (title.find("uality") == string::npos) {
764  continue;
765  }
766 
767  const CByte_graph& bg = it->GetGraph().GetByte();
768  xOutStream() << ">" << best_id << " " << title
769  << " (Length: " << it->GetNumval()
770  << ", Min: " << bg.GetMin()
771  << ", Max: " << bg.GetMax() << ")" << endl;
772 
773  for (unsigned int u=0; u < bg.GetValues().size(); ++u) {
774  int value = static_cast<int>(bg.GetValues()[u]);
775  xOutStream() << setw(3) << value;
776  if (19 == u % 20) {
777  xOutStream() << endl;
778  }
779  }
780  }
781  break;
782  }
783 
784  case OF_FASTA: {
786  if (LT_SEQ_ENTRY == lt) {
790  out.Write(bsh);
791  xOutStream() << endl;
792  break;
793  }
794  if (LT_SEQ_IDS == lt) {
795  list<CRef<CSeq_id > > ids = bsh.GetBioseqCore()->GetId();
796  for (list<CRef<CSeq_id > >::const_iterator cit = ids.begin();
797  cit != ids.end(); cit++) {
798  if (cit != ids.begin()) xOutStream() << '|';
799  (*cit)->WriteAsFasta(xOutStream());
800  }
801  break;
802  }
803  }
804 
805  case OF_GENPEPT: {
806  CSeq_id id(idstr);
808 
814  fc.SetViewProt();
815 
816  CFlatFileGenerator fg(fc);
818  fg.Generate(seh, xOutStream());
819  break;
820  }
821 
822  case OF_GENBANK: {
823  CSeq_id id(idstr);
825 
831 
832  CFlatFileGenerator fg(fc);
834  fg.Generate(seh, xOutStream());
835  break;
836  }
837 
838  case OF_ASN_BINARY: {
839  CConstRef<CSeq_entry> pSeqEntry =
841  xOutStream() << MSerial_AsnBinary << *pSeqEntry;
842  break;
843  }
844 
845  case OF_ASN_TEXT: {
846  CConstRef<CSeq_entry> pSeqEntry =
848  xOutStream() << MSerial_AsnText << *pSeqEntry << endl;
849  break;
850  }
851  }
852  return true;
853 }
854 
855 // ----------------------------------------------------------------------------
857  const CID1server_back& id1Reply)
858 // ----------------------------------------------------------------------------
859 {
860  CTextColumn gis, dates, dbs, retrievals;
861  gis.AddStr("GI").AddStr("--");
862  dates.AddStr("Loaded").AddStr("------");
863  dbs.AddStr("DB").AddStr("--");
864  retrievals.AddStr("Retrieval No.").AddStr("-------------");
865  for (CTypeConstIterator<CSeq_hist_rec> it = ConstBegin(id1Reply);
866  it; ++it) {
867  int gi = 0;
868  string db, retrieval;
869 
870  if ( it->GetDate().IsStr() ) {
871  dates.AddStr(it->GetDate().GetStr());
872  }
873  else {
874  CNcbiOstrstream oss;
875  const CDate_std& date = it->GetDate().GetStd();
876  oss << setfill('0') << setw(2) << date.GetMonth() << '/'
877  << setw(2) << date.GetDay() << '/' << date.GetYear();
878  dates.AddStr(CNcbiOstrstreamToString(oss));
879  }
880 
881  for (CSeq_hist_rec::TIds::const_iterator cit=it->GetIds().begin();
882  cit != it->GetIds().end(); ++cit) {
883  if ((*cit)->IsGi()) {
884  gi = (*cit)->GetGi();
885  continue;
886  }
887  if ((*cit)->IsGeneral()) {
888  db = (*cit)->GetGeneral().GetDb();
889  const CObject_id& tag = (*cit)->GetGeneral().GetTag();
890  if (tag.IsStr()) {
891  retrieval = tag.GetStr();
892  }
893  else {
894  retrieval = NStr::IntToString(tag.GetId());
895  }
896  }
897  }
898  gis.AddStr(NStr::IntToString(gi));
899  dbs.AddStr(db);
900  retrievals.AddStr(retrieval);
901  }
902 
903  for (unsigned int n = 0; n < gis.Height(); n++) {
904  xOutStream() << gis.GetStr(n) << " " << dates.GetStr(n) << " "
905  << dbs.GetStr(n) << " " << retrievals.GetStr(n) << endl;
906  }
907  return true;
908 }
909 
910 // ----------------------------------------------------------------------------
912  string& idstr)
913 // ----------------------------------------------------------------------------
914 {
915  if ( m_Idit != m_Ids.end()) {
916  idstr = *m_Idit;
917  m_Idit++;
918  return true;
919  }
920  return false;
921 }
922 
923 // ----------------------------------------------------------------------------
925 // ----------------------------------------------------------------------------
926 {
927  static CNcbiOstream& ostr = GetArgs()["o"].AsOutputFile();
928  return ostr;
929 }
930 
931 // ----------------------------------------------------------------------------
933 // ----------------------------------------------------------------------------
934 {
935  return EOutFormat(GetArgs()["t"].AsInteger());
936 }
937 
938 // ----------------------------------------------------------------------------
940 // ----------------------------------------------------------------------------
941 {
942  return ELookupType(GetArgs()["i"].AsInteger());
943 }
944 
945 // ----------------------------------------------------------------------------
947 // ----------------------------------------------------------------------------
948 {
949  unsigned int mpReturn(eEntry_complexities_entry);
950  const CArgs& args = GetArgs();
951  list<string> extraFeats;
952  if (args["F"]) {
953  NStr::Split(args["F"].AsString(), ",", extraFeats);
954  }
955  EMaxPlex maxPlex = EMaxPlex(args["c"].AsInteger());
956 
957  if (extraFeats.empty() && MP_SEQENTRY == maxPlex) {
958  return EEntry_complexities(mpReturn);
959  }
960 
961  switch(maxPlex) {
962  default:
963  break;
964  case MP_BIOSEQ:
965  mpReturn = eEntry_complexities_bioseq;
966  break;
967  case MP_BIOSEQSET:
969  break;
970  case MP_NUCPROT:
971  mpReturn = eEntry_complexities_nuc_prot;
972  break;
973  case MP_PUBSET:
974  mpReturn = eEntry_complexities_pub_set;
975  break;
976  }
977 
978  unsigned int featMask(0);
979  for (list<string>::const_iterator cit = extraFeats.begin();
980  cit != extraFeats.end(); ++cit) {
981  string feat = *cit;
982  NStr::ToLower(feat);
983  if (feat == "snp") {
984  featMask |= (1<<4);
985  }
986  else if (feat == "snp_graph") {
987  featMask |= (1<<6);
988  }
989  else if (feat == "cdd") {
990  featMask |= (1<<7);
991  }
992  else if (feat == "mgc") {
993  featMask |= (1<<8);
994  }
995  else if (feat == "hprd") {
996  featMask |= (1<<9);
997  }
998  else if (feat == "sts") {
999  featMask |= (1<<10);
1000  }
1001  else if (feat == "trna") {
1002  featMask |= (1<<11);
1003  }
1004  else if (feat == "exon") {
1005  featMask |= (1<<13);
1006  }
1007  }
1008  return EEntry_complexities(mpReturn | featMask);
1009 }
1010 
1011 // ----------------------------------------------------------------------------
1013  const string& flatid,
1014  int& gi)
1015 // ----------------------------------------------------------------------------
1016 {
1017  CID1Client id1Client;
1018 
1019  CSeq_id::E_Choice idtype = static_cast<CSeq_id::E_Choice>(
1020  atoi(flatid.c_str()));
1021  size_t sep = flatid.find_first_of(":=(");
1022  string data = flatid.substr(sep+1);
1023 
1024  switch(flatid[sep]) {
1025  default:
1026  return false;
1027  case ':':
1028  case '=': {
1029  CSeq_id id(idtype, data, "");
1030  gi = id1Client.AskGetgi(id);
1031  break;
1032  }
1033  case '(': {
1034  data.erase(data.end() - 1);
1035  vector<string> parts;
1036  NStr::Tokenize(data, ",", parts);
1037  parts.resize(4, "");
1038  CSeq_id id(idtype, parts[1], parts[0], NStr::StringToInt(parts[3]),
1039  parts[2]);
1040  gi = id1Client.AskGetgi(id);
1041  break;
1042  }
1043  }
1044  return true;
1045 }
1046 
1047 // ----------------------------------------------------------------------------
1049 // ----------------------------------------------------------------------------
1050 {
1051  const CArgs& args = GetArgs();
1052 
1053  if (args["q"]) {
1054  return args["q"].AsString();
1055  }
1056  if (args["Q"]) {
1057  CNcbiIstream& istr(args["Q"].AsInputFile());
1058  CNcbiOstrstream ostr;
1059  ostr << istr.rdbuf();
1060  string query(ostr.str(), ostr.pcount());
1061  for (size_t i=0; i < query.size(); ++i) {
1062  if (iscntrl(query[i])) {
1063  query[i] = ' ';
1064  }
1065  }
1066  return query;
1067  }
1068  return "";
1069 }
1070 
1071 // ---------------------------------------------------------------------------
1073 // ---------------------------------------------------------------------------
1074 {
1075  string arg = GetArgs()["d"].AsString();
1076  if (arg == "n") {
1077  return "Nucleotide";
1078  }
1079  if (arg == "p") {
1080  return "Protein";
1081  }
1082  return arg;
1083 }
1084 
1087 
1088 // ===========================================================================
1089 int main(int argc, const char** argv)
1090 // ===========================================================================
1091 {
1092  return CSeqFetchApp().AppMain(argc, argv, 0, eDS_ToStderr, 0);
1093 }
EEntry_complexities
CBioseq_Handle –.
TConstUidIterator GetConstUidIterator() const
void Generate(CSeq_submit &submit, CScope &scope, CNcbiOstream &os)
CFlatFileConfig & SetShowContigSources(bool val=true)
CNcbiOstream & xOutStream()
Definition: seqfetch.cpp:924
int xProcessEntrezQuery()
Definition: seqfetch.cpp:454
CEntrez2_eval_boolean –.
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)
Get bioseq handle for sequence withing one TSE.
Definition: scope.cpp:241
CEntrez2_boolean_element –.
EOutFormat
Definition: seqfetch.cpp:75
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:682
virtual list< CRef< CID1Seq_hist > > AskGetgirev(const NCBI_NS_NCBI::TGi &req, TReply *reply=0)
bool xLookupFeatPlex(const string &, CRef< CID1server_back >)
Definition: seqfetch.cpp:501
TCount GetCount(void) const
Get the Count member data.
std::ofstream out("events_result.xml")
main entry point for tests
TStr & SetStr(void)
Select the variant.
Defines command line argument related classes.
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:740
E_Choice
Choice variants.
Definition: Seq_id_.hpp:93
bool xLookupHistory(const string &, CRef< CID1server_back >)
Definition: seqfetch.cpp:573
int iscntrl(Uchar c)
Definition: ncbictype.hpp:63
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:941
USING_SCOPE(objects)
void SetReturn_UIDs(TReturn_UIDs value)
Assign a value to Return_UIDs data member.
FASTA-format output; see also ReadFasta in
Definition: sequence.hpp:743
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:568
CArgAllow_Integers –.
Definition: ncbiargs.hpp:1669
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:463
void SetDb(const TDb &value)
Assign a value to Db data member.
#define MSerial_AsnBinary
Definition: serialbase.hpp:683
virtual int AskGetgistate(const NCBI_NS_NCBI::TGi &req, TReply *reply=0)
const TValues & GetValues(void) const
Get the Values member data.
void SetFormat(const TFormat &format)
any seg-set it may be part of
honor specifed gap mode; on by default
Definition: sequence.hpp:747
EMaxPlex
Definition: seqfetch.cpp:98
string
Definition: cgiapp.hpp:512
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
TCount GetCount(void) const
Get the Count member data.
const TUids & GetUids(void) const
Get the Uids member data.
CFlatFileConfig & SetHideSNPFeatures(bool val=true)
Defines unified interface to application:
CRef< CScope > m_pScope
Definition: seqfetch.cpp:183
string GetStr(size_t index) const
Definition: seqfetch.cpp:122
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:298
USING_NCBI_SCOPE
Definition: seqfetch.cpp:1086
For arguments with fOptionalSeparator flag, allow other arguments which names begin with the same cha...
Definition: ncbiargs.hpp:629
CEntrez2_boolean_exp –.
void Init()
Initialize the application.
Definition: seqfetch.cpp:189
bool WriteHistoryTable(const CID1server_back &)
Definition: seqfetch.cpp:856
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:626
any nuc-prot it may be part of
bool xPoliceArguments()
Definition: seqfetch.cpp:597
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:262
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
#define fc
TMax GetMax(void) const
Get the Max member data.
ESerialDataFormat format
virtual CRef< CEntrez2_docsum_list > AskGet_docsum(const CEntrez2_id_list &req, TReply *reply=0)
CByte_graph –.
Definition: Byte_graph.hpp:65
size_t Width() const
Definition: seqfetch.cpp:127
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
int i
User-defined methods of the data storage class.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:481
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:243
CFlatFileConfig & SetShowContigFeatures(bool val=true)
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5079
TMin GetMin(void) const
Get the Min member data.
bool xWriteSequence(const string &)
Definition: seqfetch.cpp:625
Open as binary file; for eInputFile, eOutputFile, eIOFile.
Definition: ncbiargs.hpp:593
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
int xProcessGiFile()
Definition: seqfetch.cpp:422
virtual list< CRef< CID1Seq_hist > > AskGetgihist(const NCBI_NS_NCBI::TGi &req, TReply *reply=0)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
ELookupType xLookupType()
Definition: seqfetch.cpp:939
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:566
CID1server_maxcomplex –.
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
void SetDiagStream(CNcbiOstream *os, bool quick_flush=true, FDiagCleanup cleanup=0, void *cleanup_data=0, const string &stream_name="")
Set diagnostic stream.
Definition: ncbidiag.cpp:7551
bool xLookupIds(const string &, CRef< CID1server_back >)
Definition: seqfetch.cpp:561
only the bioseq identified
const char * tag
User-defined methods of the data storage class.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TUids & SetUids(void)
Assign a value to Uids data member.
void SetGi(TGi value)
Assign a value to Gi data member.
TMonth GetMonth(void) const
Get the Month member data.
Definition: Date_std_.hpp:472
vector< string > m_entries
Definition: seqfetch.cpp:112
virtual TRequest & SetDefaultRequest(void)
virtual list< CRef< CSeq_id > > AskGetseqidsfromgi(const NCBI_NS_NCBI::TGi &req, TReply *reply=0)
list< string > m_Ids
Definition: seqfetch.cpp:184
static string query
.hpp User-defined methods of the data storage class.
string GetSeqIdString(const CSeq_id &id)
Definition: compartp.cpp:100
EEntry_complexities xMaxPlex()
Definition: seqfetch.cpp:946
int Run()
Run the application.
Definition: seqfetch.cpp:359
User-defined methods of the data storage class.
virtual void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=0)
Unspecified locations designate complete sequences; non-empty custom titles override the usual title ...
Definition: sequence.cpp:2607
size_t m_width
Definition: seqfetch.cpp:111
list< string >::const_iterator m_Idit
Definition: seqfetch.cpp:185
string xDatabase()
Definition: seqfetch.cpp:1072
EOutFormat xOutFormat()
Definition: seqfetch.cpp:932
TYear GetYear(void) const
Get the Year member data.
Definition: Date_std_.hpp:425
size_t Height() const
Definition: seqfetch.cpp:128
User-defined methods of the data storage class.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:142
bool xLookupRevisions(const string &, CRef< CID1server_back >)
Definition: seqfetch.cpp:585
To standard error stream.
Definition: ncbidiag.hpp:1755
User-defined methods of the data storage class.
int xProcessFlatId()
Definition: seqfetch.cpp:487
void SetTool(const TTool &value)
Assign a value to Tool data member.
void SetMode(const TMode &mode)
CSeq_entry_Handle –.
static const size_t sm_UidSize
void SetNum(TNum value)
Assign a value to Num data member.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:198
The Object manager core.
assemble FAR delta sequences; on by dflt
Definition: sequence.hpp:746
void SetMaxplex(TMaxplex value)
Assign a value to Maxplex data member.
virtual NCBI_NS_NCBI::TGi AskGetgi(const CSeq_id &req, TReply *reply=0)
Definition: id1_client_.cpp:93
CArgDescriptions –.
Definition: ncbiargs.hpp:514
CEntrez2_db_id –.
char value[7]
Definition: config.c:428
void GetDate(string *label, const string &format) const
Append a custom string representation of the date to the label.
Definition: Date_std.cpp:159
void CONNECT_Init(const IRWRegistry *reg=0, CRWLock *lock=0, TConnectInitFlags flag=eConnectInit_OwnNothing, FSSLSetup ssl=0)
Init [X]CONNECT library with the specified "reg" and "lock" (ownerhsip for either or both can be deta...
bool xFlatIdToGi(const string &, int &)
Definition: seqfetch.cpp:1012
the "natural" entry for this (nuc-prot)
bool xNextId(string &)
Definition: seqfetch.cpp:911
An arbitrary string.
Definition: ncbiargs.hpp:563
Process information in the NCBI Registry, including working with configuration files.
Open file right away; for eInputFile, eOutputFile, eIOFile.
Definition: ncbiargs.hpp:591
void SetQuery(TQuery &value)
Assign a value to Query data member.
ELookupType
Definition: seqfetch.cpp:88
TDay GetDay(void) const
Get the Day member data.
Definition: Date_std_.hpp:519
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
Definition: ncbiapp.hpp:681
SAnnotSelector & ExcludeNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to exclude.
CScope –.
Definition: scope.hpp:90
TExp & SetExp(void)
Assign a value to Exp data member.
CRef –.
Definition: ncbiobj.hpp:616
CArgs –.
Definition: ncbiargs.hpp:356
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
Definition: ncbiapp.hpp:696
Name of file (must be writable)
Definition: ncbiargs.hpp:569
Definition: Seq_entry.hpp:55
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
static const char * str(char *buf, int n)
Definition: stats.c:84
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
string xEntrezQuery()
Definition: seqfetch.cpp:1048
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CTextColumn & AddStr(const string &str)
Definition: seqfetch.cpp:116
virtual CRef< CEntrez2_boolean_reply > AskEval_boolean(const CEntrez2_eval_boolean &req, TReply *reply=0)
int xProcessFastaId()
Definition: seqfetch.cpp:400
CNcbiApplication –.
Definition: ncbiapp.hpp:120
bool lt(T x_, T y_, T round_)
Definition: njn_approx.hpp:83
SAnnotSelector & SetAnnotSelector(void)
yy_size_t n
CGraph_CI –.
Definition: graph_ci.hpp:210
IO_PREFIX::ostrstream CNcbiOstrstream
Portable alias for ostrstream.
Definition: ncbistre.hpp:155
User-defined methods of the data storage class.
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1621
void SetDb(const TDb &value)
Assign a value to Db data member.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3362
User-defined methods of the data storage class.
int main(int argc, const char **argv)
Definition: seqfetch.cpp:1089
User-defined methods of the data storage class.
TUidIterator GetUidIterator()
User-defined methods of the data storage class.
bool xLookupState(const string &, CRef< CID1server_back >)
Definition: seqfetch.cpp:519
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:139
#define kMax_Int
Definition: ncbi_limits.h:184
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1566
Allow to ignore separator between the argument's name and value.
Definition: ncbiargs.hpp:626
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
int xProcessGi()
Definition: seqfetch.cpp:411
virtual CRef< CSeq_entry > AskGetsefromgi(const CID1server_maxcomplex &req, TReply *reply=0)
Definition: id1_client.cpp:69
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:768
void SetFlag(EFlags flag)
Definition: sequence.hpp:826
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
Definition: iterator.hpp:966
CRef< CObjectManager > m_pObjMgr
Definition: seqfetch.cpp:182
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:531
void ResetDataAndHistory(void)
Clear all information in the scope except added data loaders.
Definition: scope.cpp:319
static int Score(const CRef< CSeq_id > &id)
Wrappers for use with FindBestChoice from
Definition: Seq_id.hpp:568
Modified on Fri Dec 15 16:42:21 2017 by modify_doxy.py rev. 546573