|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/doc/access.txt |
source navigation diff markup identifier search freetext search file search |
1 NCBI NETWORK SERVICES FOR ENTREZ QUERIES AND SEQUENCE FETCHING
2
3 NCBI has new network services for Entrez (term lists, Boolean queries,
4 document summaries, neighbors and links) and PubMed and Sequence record
5 fetching. These are all CGI-based, were written by the group that maintains
6 our web services, and have a lot of advantages over our older network
7 services.
8
9 The code is all in the ncbi/access folder, and is compiled into the ncbiobj
10 library, along with the object loaders and api functions. This library is
11 always linked in with our applications, and does not depend upon netcli or
12 netentr libraries. The network connection functions are in the ncbi and
13 ncbiconn libraries, also always linked in. The header files are ent2api.h
14 for Entrez2 services and pmfapi.h for PubMed and sequence fetching.
15
16 These functions are now accessed as named services, so the underlying URL is
17 not present in the client code. This allows us to redirect requests between
18 multiple servers for load balancing, and to change minor URL parameters as
19 needed without breaking the client. The only parameters the client sends are
20 essential ones (e.g., gi number). These services will thus be stable and
21 supported over time, regardless of what we do behind the scenes.
22
23 Another advantage is that the services can be called either synchronously or
24 asynchronously. Synchronous queries wait for the reply before returning
25 control back to your program, and are easier to code. For example:
26
27 PubmedEntryPtr pep = PubMedSynchronousQuery (pmid);
28
29 SeqEntryPtr sep = PubSeqSynchronousQuery (gi, 0, 0);
30
31 These do not automatically try to reconnect if the network request failed,
32 and the client can decide whether to put these calls in a small retry loop.
33
34 Asynchronous queries are slighly more complicated to code, but allow better
35 responsiveness, without the need for multi-threading in interactive programs.
36 Asynchronous calls send the request to the server, add a block of data with
37 the connection and your callback function to a queue, and immediately return
38 control to your program. You are responsible for calling a queue checking
39 function every so often, typically on a timer. When the results are ready,
40 the queue checker calls the callback you specified. Your callback reads the
41 data, does whatever you want with it, and returns to the queue checker, which
42 cleans up the connection before returning to the timer event loop.
43
44 Some NCBI toolkit library functions require the ability to fetch sequences
45 from a registered service. For this, you can call
46
47 PubSeqFetchEnable ();
48
49 to enable access and
50
51 PubSeqFetchDisable ();
52
53 to disable access. The asn2gb -r flag (for remote access) works this way.
54
55 The demo/entrez2 application uses desktop/e2trmlst.c and desktop/e2docsum.c
56 to access the new servers.
57
58 Some examples of how to use the new functions are provided below.
59
60
61 /* standard startup */
62
63 #include <sqnutils.h>
64 #include <objall.h>
65 #include <objsub.h>
66 #include <objfdef.h>
67
68 static void StandardStartup (void)
69
70 {
71 ErrSetFatalLevel (SEV_MAX);
72 ErrClearOptFlags (EO_SHOW_USERSTR);
73 UseLocalAsnloadDataAndErrMsg ();
74 ErrPathReset ();
75
76 AllObjLoad ();
77 SubmitAsnLoad ();
78 FeatDefSetLoad ();
79 SeqCodeSetLoad ();
80 GeneticCodeTableLoad ();
81 }
82
83 /* enabling automatic fetch of components from SeqFetch service */
84
85 #include <pmfapi.h>
86
87 static void OnStartup (void)
88
89 {
90 PubSeqFetchEnable ();
91 }
92
93 static void OnShutdown (void)
94
95 {
96 PubSeqFetchDisable ();
97 }
98
99 /* example of fetching PubMed record */
100
101 #include <pmfapi.h>
102 #include <tomedlin.h>
103
104 static void SavePubMedRecord (Int4 pmid, FILE *fp)
105
106 {
107 PubmedEntryPtr pep;
108
109 pep = PubMedSynchronousQuery (pmid);
110 if (pep == NULL) return;
111
112 MedlineEntryToDataFile ((MedlineEntryPtr) pep->medent, fp);
113 PubmedEntryFree (pep);
114 }
115
116 /* example of fetching sequence record */
117
118 #include <pmfapi.h>
119 #include <asn2gnbk.h>
120
121 static void SaveSeqRecord (Int4 gi, FILE *fp)
122
123 {
124 SeqEntryPtr sep;
125
126 sep = PubSeqSynchronousQuery (gi, 0, 0);
127 if (sep == NULL) return;
128
129 SeqEntryToGnbk (sep, NULL, GENBANK_FMT, RELEASE_MODE, NORMAL_STYLE,
130 0, 0, 0, NULL, fp);
131 SeqEntryFree (sep);
132 }
133
134 /* example of Entrez2 Boolean query */
135
136 #include <ent2api.h>
137
138 static void DoBooleanRequest (FILE *fp)
139
140 {
141 Entrez2BooleanReplyPtr e2br;
142 Entrez2IdListPtr e2id;
143 Entrez2RequestPtr e2rq;
144 Entrez2ReplyPtr e2ry;
145 Int4 i;
146 Int4Ptr uids;
147
148 e2rq = EntrezCreateBooleanRequest (TRUE, FALSE, "Nucleotide", NULL,
149 0, 0, NULL, 0, 0);
150 if (e2rq == NULL) return;
151
152 EntrezAddToBooleanRequest (e2rq, NULL, 0, "ORGN", "Saccharomyces cerevisiae",
153 NULL, 0, 0, NULL, NULL, TRUE, TRUE);
154 EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_AND, NULL, NULL,
155 NULL, 0, 0, NULL, NULL, TRUE, TRUE);
156 EntrezAddToBooleanRequest (e2rq, NULL, 0, "PROP", "biomol mrna",
157 NULL, 0, 0, NULL, NULL, TRUE, TRUE);
158
159 e2ry = EntrezSynchronousQuery (e2rq);
160 e2rq = Entrez2RequestFree (e2rq);
161 if (e2ry == NULL) return;
162
163 e2br = EntrezExtractBooleanReply (e2ry);
164 if (e2br == NULL) return;
165
166 if (e2br->count > 0) {
167 e2id = e2br->uids;
168
169 if (e2id != NULL && e2id->num > 0 && e2id->uids != NULL) {
170 fprintf (fp, "count=%ld\n", (long) e2id->num);
171
172 uids = (Int4Ptr) BSMerge (e2id->uids, NULL);
173 if (uids != NULL) {
174 for (i = 0; i < e2id->num; i++) {
175 fprintf (fp, "pmid=%ld\n", (long) uids [i]);
176 }
177 MemFree (uids);
178 }
179 }
180 }
181
182 Entrez2BooleanReplyFree (e2br);
183 }
184
185 /* example of fetching Entrez2 document summary */
186
187 #include <ent2api.h>
188
189 static void DoDocsumRequest (Int4 pmid, FILE *fp)
190
191 {
192 Entrez2DocsumDataPtr e2ddp;
193 Entrez2DocsumListPtr e2dlp;
194 Entrez2DocsumPtr e2dp;
195 Entrez2RequestPtr e2rq;
196 Entrez2ReplyPtr e2ry;
197
198 e2rq = EntrezCreateDocSumRequest ("PubMed", pmid, 0, NULL, NULL);
199 if (e2rq == NULL) return;
200
201 e2ry = EntrezSynchronousQuery (e2rq);
202 e2rq = Entrez2RequestFree (e2rq);
203 if (e2ry == NULL) return;
204
205 e2dlp = EntrezExtractDocsumReply (e2ry);
206 if (e2dlp == NULL) return;
207
208 for (e2dp = e2dlp->list; e2dp != NULL; e2dp = e2dp->next) {
209 for (e2ddp = e2dp->docsum_data; e2ddp != NULL; e2ddp = e2ddp->next) {
210 if (StringHasNoText (e2ddp->field_name)) continue;
211 if (StringHasNoText (e2ddp->field_value)) continue;
212 fprintf (fp, "%s - %s\n", e2ddp->field_name, e2ddp->field_value);
213 }
214 }
215
216 Entrez2DocsumListFree (e2dlp);
217 }
218
|
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |