NCBI C Toolkit Cross Reference

C/access/tax3api.c


  1 /*   tax3api.c
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *            National Center for Biotechnology Information (NCBI)
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's official duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government do not place any restriction on its use or reproduction.
 13 *  We would, however, appreciate having the NCBI and the author cited in
 14 *  any work or product based on this material
 15 *
 16 *  Although all reasonable efforts have been taken to ensure the accuracy
 17 *  and reliability of the software and data, the NLM and the U.S.
 18 *  Government do not and cannot warrant the performance or results that
 19 *  may be obtained by using this software or data. The NLM and the U.S.
 20 *  Government disclaim all warranties, express or implied, including
 21 *  warranties of performance, merchantability or fitness for any particular
 22 *  purpose.
 23 *
 24 * ===========================================================================
 25 *
 26 * File Name:  tax3api.c
 27 *
 28 * Author:  Jonathan Kans
 29 *
 30 * Version Creation Date:   7/8/04
 31 *
 32 * $Revision: 1.47 $
 33 *
 34 * File Description: 
 35 *
 36 * Modifications:  
 37 * --------------------------------------------------------------------------
 38 * Date     Name        Description of modification
 39 * -------  ----------  -----------------------------------------------------
 40 *
 41 *
 42 * ==========================================================================
 43 */
 44 
 45 #include <ncbi.h>
 46 #include <objseq.h>
 47 #include <objsset.h>
 48 #include <tax3api.h>
 49 #include <sqnutils.h>
 50 #include <subutil.h>
 51 #include <findrepl.h>
 52 #define NLM_GENERATED_CODE_PROTO
 53 #include <objmacro.h>
 54 #include <macroapi.h>
 55 
 56 /* low-level connection functions */
 57 
 58 static Boolean text_tax_asn = FALSE;
 59 static Boolean text_tax_set = FALSE;
 60 
 61 #if 1
 62 static const CharPtr tax3servicename = "TaxService3";
 63 #else
 64 static const CharPtr tax3servicename = "TaxService3Test";
 65 #endif
 66 
 67 NLM_EXTERN CONN Tax3OpenConnection (
 68   void
 69 )
 70 
 71 {
 72 #ifdef OS_UNIX
 73   CharPtr  str;
 74 
 75   if (! text_tax_set) {
 76     str = (CharPtr) getenv ("TEXT_TAX_ASN");
 77     if (StringDoesHaveText (str)) {
 78       if (StringICmp (str, "TRUE") == 0) {
 79         text_tax_asn = TRUE;
 80       }
 81     }
 82     text_tax_set = TRUE;
 83   }
 84 #endif
 85 
 86   return QUERY_OpenServiceQuery (text_tax_asn ? "TaxService3Text" : tax3servicename, NULL, 30);
 87 }
 88 
 89 #ifdef OS_MAC
 90 #include <Events.h>
 91 #endif
 92 
 93 NLM_EXTERN Taxon3ReplyPtr Tax3WaitForReply (
 94   CONN conn
 95 )
 96 
 97 {
 98   AsnIoConnPtr    aicp;
 99   time_t          currtime, starttime;
100   time_t          max = 0;
101   EIO_Status      status;
102   STimeout        timeout;
103   Taxon3ReplyPtr  t3ry = NULL;
104 #ifdef OS_MAC
105   EventRecord     currEvent;
106 #endif
107 
108   if (conn == NULL) return NULL;
109 
110 #ifdef OS_MAC
111   timeout.sec = 0;
112   timeout.usec = 0;
113 #else
114   timeout.sec = 300;
115   timeout.usec = 0;
116 #endif
117 
118   starttime = GetSecs ();
119   while ((status = CONN_Wait (conn, eIO_Read, &timeout)) == eIO_Timeout && max < 300) {
120     currtime = GetSecs ();
121     max = currtime - starttime;
122 #ifdef OS_MAC
123     WaitNextEvent (0, &currEvent, 0, NULL);
124 #endif
125   }
126   if (status == eIO_Success) {
127     aicp = QUERY_AsnIoConnOpen (text_tax_asn ? "r" : "rb", conn);
128     t3ry = Taxon3ReplyAsnRead (aicp->aip, NULL);
129     QUERY_AsnIoConnClose (aicp);
130   }
131   CONN_Close (conn);
132 
133   return t3ry;
134 }
135 
136 /* high-level connection functions */
137 
138 NLM_EXTERN Taxon3ReplyPtr Tax3SynchronousQuery (
139   Taxon3RequestPtr t3rq
140 )
141 
142 {
143   AsnIoConnPtr    aicp;
144   CONN            conn;
145   Taxon3ReplyPtr  t3ry;
146 
147   if (t3rq == NULL) return NULL;
148 
149   conn = Tax3OpenConnection ();
150 
151   if (conn == NULL) return NULL;
152 
153   aicp = QUERY_AsnIoConnOpen (text_tax_asn ? "w" : "wb", conn);
154 
155   Taxon3RequestAsnWrite (t3rq, aicp->aip, NULL);
156 
157   AsnIoFlush (aicp->aip);
158   QUERY_AsnIoConnClose (aicp);
159 
160   QUERY_SendQuery (conn);
161 
162   t3ry = Tax3WaitForReply (conn);
163 
164   return t3ry;
165 }
166 
167 NLM_EXTERN Boolean Tax3AsynchronousQuery (
168   Taxon3RequestPtr t3rq,
169   QUEUE* queue,
170   QueryResultProc resultproc,
171   VoidPtr userdata
172 )
173 
174 {
175   AsnIoConnPtr  aicp;
176   CONN          conn;
177 
178   if (t3rq == NULL) return FALSE;
179 
180   conn = Tax3OpenConnection ();
181 
182   if (conn == NULL) return FALSE;
183 
184   aicp = QUERY_AsnIoConnOpen (text_tax_asn ? "w" : "wb", conn);
185 
186   Taxon3RequestAsnWrite (t3rq, aicp->aip, NULL);
187 
188   AsnIoFlush (aicp->aip);
189   QUERY_AsnIoConnClose (aicp);
190 
191   QUERY_SendQuery (conn);
192 
193   QUERY_AddToQueue (queue, conn, resultproc, userdata, TRUE);
194 
195   return TRUE;
196 }
197 
198 NLM_EXTERN Int4 Tax3CheckQueue (
199   QUEUE* queue
200 )
201 
202 {
203   return QUERY_CheckQueue (queue);
204 }
205 
206 NLM_EXTERN Taxon3ReplyPtr Tax3ReadReply (
207   CONN conn,
208   EIO_Status status
209 )
210 
211 {
212   AsnIoConnPtr    aicp;
213   Taxon3ReplyPtr  t3ry = NULL;
214 
215   if (conn != NULL && status == eIO_Success) {
216     aicp = QUERY_AsnIoConnOpen (text_tax_asn ? "r" : "rb", conn);
217     t3ry = Taxon3ReplyAsnRead (aicp->aip, NULL);
218     QUERY_AsnIoConnClose (aicp);
219   }
220   return t3ry;
221 }
222 
223 NLM_EXTERN Taxon3RequestPtr CreateTaxon3Request (
224   Int4 taxid,
225   CharPtr name,
226   OrgRefPtr orp
227 )
228 
229 {
230   Taxon3RequestPtr  t2rp;
231 
232   t2rp = Taxon3RequestNew ();
233   if (t2rp == NULL) return NULL;
234 
235   if (StringDoesHaveText (name)) {
236     ValNodeCopyStr (&(t2rp->request), 2, name);
237   } else if (taxid > 0) {
238     ValNodeAddInt (&(t2rp->request), 1, taxid);
239   } else if (orp != NULL) {
240     orp = AsnIoMemCopy ((Pointer) orp,
241                         (AsnReadFunc) OrgRefAsnRead,
242                         (AsnWriteFunc) OrgRefAsnWrite);
243     ValNodeAddPointer (&(t2rp->request), 3, (Pointer) orp);
244   }
245 
246   return t2rp;
247 }
248 
249 NLM_EXTERN Taxon3RequestPtr CreateMultiTaxon3Request (ValNodePtr org_list)
250 {
251   ValNodePtr vnp;
252   Taxon3RequestPtr t3rp;
253   OrgRefPtr orp;
254   
255   t3rp = Taxon3RequestNew ();
256   if (t3rp == NULL) return NULL;
257 
258   for (vnp = org_list; vnp != NULL; vnp = vnp->next)
259   {
260     switch (vnp->choice)
261     {
262       case 1:
263         ValNodeAddInt (&(t3rp->request), 1, vnp->data.intvalue);
264         break;
265       case 2:
266         ValNodeCopyStr (&(t3rp->request), 2, vnp->data.ptrvalue);
267         break;
268       case 3:
269         orp = AsnIoMemCopy (vnp->data.ptrvalue,
270                         (AsnReadFunc) OrgRefAsnRead,
271                         (AsnWriteFunc) OrgRefAsnWrite);
272         ValNodeAddPointer (&(t3rp->request), 3, (Pointer) orp);
273         break;
274     }
275   }
276   return t3rp;
277 }
278 
279 
280 static Boolean HasMisspellingFlag (T3DataPtr t)
281 {
282   T3StatusFlagsPtr status;
283 
284   if (t == NULL) return FALSE;
285   status = t->status;
286   while (status != NULL) {
287     if (StringCmp (status->property, "misspelled_name") == 0) {
288       return TRUE;
289     }
290     status = status->next;
291   }
292   return FALSE;
293 }
294 
295 
296 static int LIBCALLBACK SortVnpByOrgRef (VoidPtr ptr1, VoidPtr ptr2)
297 
298 {
299   ValNodePtr  vnp1;
300   ValNodePtr  vnp2;
301 
302   if (ptr1 != NULL && ptr2 != NULL) {
303     vnp1 = *((ValNodePtr PNTR) ptr1);
304     vnp2 = *((ValNodePtr PNTR) ptr2);
305     if (vnp1 != NULL && vnp2 != NULL) {
306       return OrgRefCompare (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
307     }
308   }
309   return 0;
310 }
311 
312 
313 NLM_EXTERN ValNodePtr Taxon3GetOrgRefList (ValNodePtr org_list)
314 {
315   Taxon3RequestPtr t3rq;
316   Taxon3ReplyPtr   t3ry;
317   T3DataPtr        tdp;
318   OrgRefPtr        t3orp = NULL;
319   T3ReplyPtr       trp;
320   T3ErrorPtr       tep;
321   ValNodePtr       uniq_list, response_list = NULL, next_org_list, last_org;
322   Int4             request_num, max_requests = 2000;
323   ValNodePtr PNTR  ptr_array;
324   ValNodePtr       vnp, vnp_rq, vnp_rp;
325   Int4             i, num_orgs;
326 
327   if (org_list == NULL) {
328     return NULL;
329   }
330 
331   /* make a copy of the original list - we will prepare the response list by substituting the OrgRef */
332   org_list = ValNodeCopyPtr (org_list);
333 
334   /* make array to show original order of ValNodes, so that we can restore after sorting */
335   num_orgs = ValNodeLen (org_list);
336   ptr_array = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * num_orgs);
337   for (vnp = org_list, i = 0; vnp != NULL; vnp = vnp->next, i++) {
338     ptr_array[i] = vnp;
339   }
340 
341   org_list = ValNodeSort (org_list, SortVnpByOrgRef);
342 
343   /* now make a list of just the unique requests */
344   uniq_list = ValNodeCopyPtr (org_list);
345   ValNodeUnique (&uniq_list, SortVnpByOrgRef, ValNodeFree);
346   
347   /* now break large lists into manageable chunks */
348   vnp = uniq_list;
349   while (vnp != NULL) {
350     next_org_list = vnp->next;
351     last_org = vnp; 
352     request_num = 1;
353     while (next_org_list != NULL && request_num < max_requests) {
354       last_org = next_org_list;
355       next_org_list = next_org_list->next;
356       request_num++;
357     }
358     if (last_org != NULL) {
359       last_org->next = NULL;
360     }
361       
362     /* now create the request */
363   
364     t3rq = CreateMultiTaxon3Request (vnp);
365     if (t3rq == NULL) return NULL;
366     t3ry = Tax3SynchronousQuery (t3rq);
367     Taxon3RequestFree (t3rq);
368     if (t3ry != NULL) {
369       for (trp = t3ry->reply; trp != NULL; trp = trp->next) {
370         switch (trp->choice) {
371           case T3Reply_error :
372             tep = (T3ErrorPtr) trp->data.ptrvalue;
373             if (tep != NULL) {
374               ErrPostEx (SEV_ERROR, 0, 0, tep->message);
375             }
376             if (tep != NULL && StringStr (tep->message, "ambiguous") != NULL) {
377               ValNodeAddPointer (&response_list, eReturnedOrgFlag_ambiguous, NULL);
378             } else {
379               ValNodeAddPointer (&response_list, eReturnedOrgFlag_error, NULL);
380             }
381             break;
382           case T3Reply_data :
383             tdp = (T3DataPtr) trp->data.ptrvalue;
384             if (tdp != NULL) {
385               t3orp = (OrgRefPtr)(tdp->org);
386               if (HasMisspellingFlag (tdp)) {
387                 ValNodeAddPointer (&response_list, eReturnedOrgFlag_misspelled, (Pointer) t3orp);
388               } else {
389                 ValNodeAddPointer (&response_list, eReturnedOrgFlag_normal, (Pointer) t3orp);
390               }
391               tdp->org = NULL;
392             }
393             break;
394           default :
395             break;
396         }
397       }
398       Taxon3ReplyFree (t3ry);
399     }
400     
401     if (last_org != NULL) {
402         last_org->next = next_org_list;
403     }
404     vnp = next_org_list;
405   }  
406   
407   /* now put responses in list */
408   vnp = uniq_list;
409   vnp_rq = org_list;
410   vnp_rp = response_list;
411 
412   while (vnp != NULL && vnp_rq != NULL && vnp_rp != NULL) {
413     while (vnp_rq != NULL && OrgRefCompare (vnp->data.ptrvalue, vnp_rq->data.ptrvalue) == 0) {
414       vnp_rq->data.ptrvalue = AsnIoMemCopy (vnp_rp->data.ptrvalue, (AsnReadFunc) OrgRefAsnRead, (AsnWriteFunc) OrgRefAsnWrite);
415       vnp_rq->choice = vnp_rp->choice;
416       vnp_rq = vnp_rq->next;
417     }
418     vnp_rp->data.ptrvalue = OrgRefFree (vnp_rp->data.ptrvalue);
419     vnp_rp = vnp_rp->next;
420     vnp = vnp->next;
421   }
422   /* if there were more requests than responses, set responses to NULL */
423   while (vnp_rq != NULL) {
424     vnp_rq->data.ptrvalue = NULL;
425     vnp_rq = vnp_rq->next;
426   }
427   /* if there were more responses than requests, free extra responses */
428   while (vnp_rp != NULL) {
429     vnp_rp->data.ptrvalue = OrgRefFree (vnp_rp->data.ptrvalue);
430     vnp_rp = vnp_rp->next;
431   }
432   response_list = ValNodeFree (response_list);
433   uniq_list = ValNodeFree (uniq_list);
434 
435   /* now restore original order */
436   for (i = 0; i < num_orgs - 1; i++) {
437     ptr_array[i]->next = ptr_array[i + 1];
438   }
439   ptr_array[num_orgs - 1]->next = NULL;
440   org_list = ptr_array[0];
441   ptr_array = MemFree (ptr_array);
442   
443   return org_list;
444 }
445 
446 
447 NLM_EXTERN TaxFixItemPtr TaxFixItemNew (void)
448 {
449   TaxFixItemPtr t;
450 
451   t = (TaxFixItemPtr) MemNew (sizeof (TaxFixItemData));
452   MemSet (t, 0, sizeof (TaxFixItemData));
453   return t;
454 }
455 
456 
457 NLM_EXTERN TaxFixItemPtr TaxFixItemCopy (TaxFixItemPtr orig)
458 {
459   TaxFixItemPtr t = NULL;
460 
461   if (orig != NULL) {
462     t = (TaxFixItemPtr) MemNew (sizeof (TaxFixItemData));
463     t->data_choice = orig->data_choice;
464     t->data = orig->data;
465     t->response_org = AsnIoMemCopy (orig->response_org, (AsnReadFunc) OrgRefAsnRead, (AsnWriteFunc) OrgRefAsnWrite);
466     if (orig->taxname != NULL) {
467       t->taxname = StringSave (orig->taxname);
468     }
469     if (orig->suggested_fix != NULL) {
470       t->suggested_fix = StringSave (orig->suggested_fix);
471     }
472     if (orig->rank != NULL) {
473       t->rank = StringSave (orig->rank);
474     }
475   }
476   return t;
477 }
478 
479 
480 NLM_EXTERN TaxFixItemPtr TaxFixItemFree (TaxFixItemPtr t)
481 {
482   if (t != NULL) {
483     t->response_org = OrgRefFree (t->response_org);
484     t->taxname = MemFree (t->taxname);
485     t->suggested_fix = MemFree (t->suggested_fix);
486     t->rank = MemFree (t->rank);
487     t = MemFree (t);
488   }
489   return t;
490 }
491 
492 
493 NLM_EXTERN ValNodePtr LIBCALLBACK TaxFixItemListFree (ValNodePtr vnp)
494 {
495   ValNodePtr vnp_next;
496 
497   while (vnp != NULL) {
498     vnp_next = vnp->next;
499     vnp->next = NULL;
500     vnp->data.ptrvalue = TaxFixItemFree (vnp->data.ptrvalue);
501     vnp = ValNodeFree (vnp);
502     vnp = vnp_next;
503   }
504   return vnp;
505 }
506 
507 
508 static Boolean LIBCALLBACK TaxFixItemOrigIsOk (ValNodePtr vnp)
509 {
510   TaxFixItemPtr t;
511 
512   if (vnp == NULL || (t = (TaxFixItemPtr) vnp->data.ptrvalue) == NULL) {
513     return TRUE;
514   } else if (StringCmp (t->taxname, t->suggested_fix) == 0 && StringICmp (t->rank, "species") == 0) {
515     return TRUE;
516   } else {
517     return FALSE;
518   }
519 }
520 
521 
522 static CharPtr StringSum (CharPtr str1, CharPtr str2) 
523 {
524   CharPtr sum = NULL;
525 
526   if (str1 == NULL && str2 == NULL) {
527     sum = NULL;
528   } else if (str1 == NULL) {
529     sum = StringSave (str2);
530   } else if (str2 == NULL) {
531     sum = StringSave (str1);
532   } else {
533     sum = (CharPtr) MemNew (sizeof (Char) * (StringLen (str1) + StringLen (str2) + 1));
534     sprintf (sum, "%s%s", str1, str2);
535   }
536   return sum;
537 }
538 
539 
540 static CharPtr SuggestedTaxNameFixFromOrgAndRank (CharPtr taxname, OrgRefPtr response_org, CharPtr rank)
541 {
542   CharPtr fix = NULL, tmp;
543 
544   if (response_org == NULL) {
545     return NULL;
546   }
547 
548   if (StringICmp (rank, "species") == 0) {
549     fix = StringSave (response_org->taxname);
550   } else if (response_org->orgname != NULL) {
551     if (((StringNICmp (taxname, "uncultured ", 11) == 0
552            && StringICmp (taxname + 11, response_org->taxname) == 0)
553          || StringICmp (taxname, response_org->taxname) == 0)
554         && (StringISearch (response_org->orgname->lineage, "archaea") != NULL
555             || StringISearch (response_org->orgname->lineage, "bacteria") != NULL)) {
556       if (StringICmp (rank, "genus") == 0) {
557         fix = StringSum (response_org->taxname, " sp.");
558       } else if (StringNICmp (response_org->orgname->lineage, "bacteria", 8) == 0) {
559         fix = StringSum (response_org->taxname, " bacterium");
560       } else if (StringNICmp (response_org->orgname->lineage, "Archaea", 7) == 0) {
561         fix = StringSum (response_org->taxname, " archaeon");
562       }
563       if (fix != NULL 
564           && StringNICmp (fix, "uncultured ", 11) != 0) {
565         tmp = fix;
566         fix = StringSum ("uncultured ", tmp);
567         tmp = MemFree (tmp);
568       }
569     }
570   }
571   return fix;
572 }
573 
574 
575 static ValNodePtr MakeTaxFixRequestList (ValNodePtr biop_list)
576 {
577   ValNodePtr rq_list = NULL, prev = NULL;
578   BioSourcePtr biop;
579   OrgRefPtr  org;
580   CharPtr    new_name;
581   Int4       len;
582 
583   while (biop_list != NULL) {
584     biop = GetBioSourceFromObject (biop_list->choice, biop_list->data.ptrvalue);
585     org = AsnIoMemCopy (biop->org, (AsnReadFunc) OrgRefAsnRead, (AsnWriteFunc) OrgRefAsnWrite);
586     if ((len = StringLen (org->taxname)) > 3 && StringCmp (org->taxname + len - 3, " sp") == 0) {
587       new_name = StringSum (org->taxname, ".");
588       org->taxname = MemFree (org->taxname);
589       org->taxname = new_name;
590     }
591 
592     ValNodeAddPointer (&prev, 3, org);
593     if (rq_list == NULL) {
594       rq_list = prev;
595     }
596     biop_list = biop_list->next;
597   }
598   return rq_list;
599 }
600 
601 
602 static void CheckSuggestedFixes (ValNodePtr tax_fix_list) 
603 {
604   ValNodePtr rq_list = NULL, rp_list = NULL, prev, vnp_rq, vnp_rp;
605   ValNodePtr vnp, next_org_list, last_org;
606   Int4             request_num, max_requests = 2000;
607   TaxFixItemPtr t;
608   Taxon3RequestPtr t3rq;
609   Taxon3ReplyPtr   t3ry;
610   T3DataPtr        tdp;
611   T3ReplyPtr       trp;
612   T3ErrorPtr       tep;
613   T3StatusFlagsPtr tfp;
614   OrgRefPtr        org;
615   Boolean          is_species;
616 
617   prev = NULL;
618   for (vnp = tax_fix_list; vnp != NULL; vnp = vnp->next) {
619     t = (TaxFixItemPtr) vnp->data.ptrvalue;
620     if (t != NULL && t->suggested_fix != NULL) {
621       ValNodeAddPointer (&prev, 2, StringSave (t->suggested_fix));
622     }
623     if (rq_list == NULL) {
624       rq_list = prev;
625     }
626   }
627 
628   /* now break large lists into manageable chunks */
629   vnp = rq_list;
630   while (vnp != NULL) {
631     next_org_list = vnp->next;
632     last_org = vnp; 
633     request_num = 1;
634     while (next_org_list != NULL && request_num < max_requests) {
635       last_org = next_org_list;
636       next_org_list = next_org_list->next;
637       request_num++;
638     }
639     if (last_org != NULL) {
640       last_org->next = NULL;
641     }
642       
643     /* now create the request */
644   
645     t3rq = CreateMultiTaxon3Request (vnp);
646     if (t3rq == NULL) return;
647     t3ry = Tax3SynchronousQuery (t3rq);
648     Taxon3RequestFree (t3rq);
649     if (t3ry != NULL) {
650       for (trp = t3ry->reply; trp != NULL; trp = trp->next) {
651         switch (trp->choice) {
652           case T3Reply_error :
653             tep = (T3ErrorPtr) trp->data.ptrvalue;
654             ValNodeAddPointer (&rp_list, 0, NULL);
655             break;
656           case T3Reply_data :
657             tdp = (T3DataPtr) trp->data.ptrvalue;
658             is_species = FALSE;
659             if (tdp != NULL) {
660               for (tfp = tdp->status; tfp != NULL; tfp = tfp->next) {
661                 if (StringICmp (tfp->property, "rank") == 0
662                     && tfp->Value_value != NULL
663                     && tfp->Value_value->choice == Value_value_str
664                     && StringICmp (tfp->Value_value->data.ptrvalue, "species") == 0) {
665                   is_species = TRUE;
666                 }
667               }
668             }
669             if (is_species) {
670               org = (OrgRefPtr) tdp->org;
671               ValNodeAddPointer (&rp_list, 0, StringSave (org->taxname));
672             } else {
673               ValNodeAddPointer (&rp_list, 0, NULL);
674             }
675             break;
676           default :
677             break;
678         }
679       }
680       Taxon3ReplyFree (t3ry);
681     }
682     
683     if (last_org != NULL) {
684         last_org->next = next_org_list;
685     }
686     vnp = next_org_list;
687   }  
688   rq_list = ValNodeFreeData (rq_list);
689 
690   /* adjust suggested fixes */
691   vnp_rq = tax_fix_list;
692   vnp_rp = rp_list;
693 
694   while (vnp_rq != NULL && vnp_rp != NULL) {
695     while (vnp_rq != NULL && ((t = (TaxFixItemPtr) vnp_rq->data.ptrvalue) == NULL || t->suggested_fix == NULL)) {
696       vnp_rq = vnp_rq->next;
697     }
698     if (t != NULL) {
699       t->suggested_fix = MemFree (t->suggested_fix);
700       if (vnp_rq != NULL) {
701         t->suggested_fix = vnp_rp->data.ptrvalue;
702         vnp_rp->data.ptrvalue = NULL;
703         vnp_rq = vnp_rq->next;
704         vnp_rp = vnp_rp->next;
705       }
706     }
707   }
708   rp_list = ValNodeFreeData (rp_list);
709 }
710 
711 
712 NLM_EXTERN ValNodePtr Taxon3GetTaxFixList (ValNodePtr biop_list)
713 {
714   Taxon3RequestPtr t3rq;
715   Taxon3ReplyPtr   t3ry;
716   T3DataPtr        tdp;
717   T3ReplyPtr       trp;
718   T3ErrorPtr       tep;
719   ValNodePtr       uniq_list, response_list = NULL, next_org_list, last_org, request_list;
720   Int4             request_num, max_requests = 2000;
721   ValNodePtr PNTR  ptr_array;
722   ValNodePtr       vnp, vnp_rq, vnp_rp, vnp_b;
723   T3StatusFlagsPtr tfp;
724   Int4             i, num_orgs;
725   TaxFixItemPtr    t;
726   BioSourcePtr     biop;
727 
728   if (biop_list == NULL) {
729     return NULL;
730   }
731 
732   /* make a copy of the original list, removing uncultured */
733   request_list = MakeTaxFixRequestList (biop_list);
734 
735   /* make array to show original order of ValNodes, so that we can restore after sorting */
736   num_orgs = ValNodeLen (request_list);
737   ptr_array = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * num_orgs);
738   for (vnp = request_list, i = 0; vnp != NULL; vnp = vnp->next, i++) {
739     ptr_array[i] = vnp;
740   }
741 
742   request_list = ValNodeSort (request_list, SortVnpByOrgRef);
743 
744   /* now make a list of just the unique requests */
745   uniq_list = ValNodeCopyPtr (request_list);
746   ValNodeUnique (&uniq_list, SortVnpByOrgRef, ValNodeFree);
747   
748   /* now break large lists into manageable chunks */
749   vnp = uniq_list;
750   while (vnp != NULL) {
751     next_org_list = vnp->next;
752     last_org = vnp; 
753     request_num = 1;
754     while (next_org_list != NULL && request_num < max_requests) {
755       last_org = next_org_list;
756       next_org_list = next_org_list->next;
757       request_num++;
758     }
759     if (last_org != NULL) {
760       last_org->next = NULL;
761     }
762       
763     /* now create the request */
764   
765     t3rq = CreateMultiTaxon3Request (vnp);
766     if (t3rq == NULL) return NULL;
767     t3ry = Tax3SynchronousQuery (t3rq);
768     Taxon3RequestFree (t3rq);
769     if (t3ry != NULL) {
770       for (trp = t3ry->reply; trp != NULL; trp = trp->next) {
771         switch (trp->choice) {
772           case T3Reply_error :
773             tep = (T3ErrorPtr) trp->data.ptrvalue;
774             t = TaxFixItemNew ();
775             ValNodeAddPointer (&response_list, 0, t);
776             break;
777           case T3Reply_data :
778             tdp = (T3DataPtr) trp->data.ptrvalue;
779             if (tdp != NULL) {
780               t = TaxFixItemNew ();
781               t->response_org = (OrgRefPtr)(tdp->org);
782               tdp->org = NULL;
783               for (tfp = tdp->status; tfp != NULL; tfp = tfp->next) {
784                 if (StringICmp (tfp->property, "rank") == 0
785                     && tfp->Value_value != NULL
786                     && tfp->Value_value->choice == Value_value_str) {
787                   t->rank = StringSave (tfp->Value_value->data.ptrvalue);
788                 }
789               }
790               t->taxname = StringSave (t->response_org->taxname);
791               t->suggested_fix = SuggestedTaxNameFixFromOrgAndRank (t->taxname, t->response_org, t->rank);
792               ValNodeAddPointer (&response_list, 0, t);
793             }
794             break;
795           default :
796             break;
797         }
798       }
799       Taxon3ReplyFree (t3ry);
800     }
801     
802     if (last_org != NULL) {
803         last_org->next = next_org_list;
804     }
805     vnp = next_org_list;
806   }  
807 
808   CheckSuggestedFixes (response_list);
809   
810   /* now put responses in list */
811   vnp = uniq_list;
812   vnp_rq = request_list;
813   vnp_rp = response_list;
814 
815   while (vnp != NULL && vnp_rq != NULL && vnp_rp != NULL) {
816     while (vnp_rq != NULL && OrgRefCompare (vnp->data.ptrvalue, vnp_rq->data.ptrvalue) == 0) {
817       t = TaxFixItemCopy (vnp_rp->data.ptrvalue);
818       vnp_rq->data.ptrvalue = t;
819       vnp_rq = vnp_rq->next;
820     }
821     vnp_rp = vnp_rp->next;
822     vnp = vnp->next;
823   }
824   /* if there were more requests than responses, set responses to NULL */
825   while (vnp_rq != NULL) {
826     vnp_rq->data.ptrvalue = NULL;
827     vnp_rq = vnp_rq->next;
828   }
829 
830   /* free response list */
831   response_list = TaxFixItemListFree (response_list);
832 
833   uniq_list = ValNodeFree (uniq_list);
834 
835   /* now restore original order */
836   for (i = 0; i < num_orgs - 1; i++) {
837     ptr_array[i]->next = ptr_array[i + 1];
838   }
839   ptr_array[num_orgs - 1]->next = NULL;
840   request_list = ptr_array[0];
841   ptr_array = MemFree (ptr_array);
842 
843   /* now reassociate with original objects */
844   for (vnp_b = biop_list, vnp_rp = request_list; vnp_b != NULL && vnp_rp != NULL; vnp_b = vnp_b->next, vnp_rp = vnp_rp->next) {
845     t = vnp_rp->data.ptrvalue;
846     t->data_choice = vnp_b->choice;
847     t->data = vnp_b->data.ptrvalue;
848     t->taxname = MemFree (t->taxname);
849     biop = GetBioSourceFromObject (t->data_choice, t->data);
850     if (biop != NULL && biop->org != NULL) {
851       t->taxname = StringSave (biop->org->taxname);
852     }
853   }
854 
855   /* now remove items for which the original and suggested taxnames are the same */
856   
857   ValNodePurge (&request_list, TaxFixItemOrigIsOk, TaxFixItemListFree);
858   return request_list;
859 }
860 
861 
862 NLM_EXTERN OrgRefPtr Taxon3GetOrg (OrgRefPtr orp)
863 
864 {
865   Taxon3RequestPtr t3rq;
866   Taxon3ReplyPtr   t3ry;
867   T3DataPtr        tdp;
868   OrgRefPtr        t3orp = NULL;
869   T3ReplyPtr        trp;
870   T3ErrorPtr        tep;
871 
872   if (orp == NULL) return NULL;
873   
874   t3rq = CreateTaxon3Request (0, NULL, orp);
875   if (t3rq == NULL) return NULL;
876   t3ry = Tax3SynchronousQuery (t3rq);
877   Taxon3RequestFree (t3rq);
878   if (t3ry != NULL) {
879     for (trp = t3ry->reply; trp != NULL; trp = trp->next) {
880       switch (trp->choice) {
881         case T3Reply_error :
882           tep = (T3ErrorPtr) trp->data.ptrvalue;
883           if (tep != NULL) {
884             ErrPostEx (SEV_ERROR, 0, 0, tep->message);
885           }
886           break;
887         case T3Reply_data :
888           tdp = (T3DataPtr) trp->data.ptrvalue;
889           if (tdp != NULL) {
890             t3orp = (OrgRefPtr)(tdp->org);
891             tdp->org = NULL;
892           }
893           break;
894         default :
895           break;
896       }
897     }
898     Taxon3ReplyFree (t3ry);
899   }
900   
901   return t3orp;
902 }
903 
904 static Boolean DoOrgIdsMatch(BioSourcePtr b1, BioSourcePtr b2)
905 {
906   DbtagPtr d1 = NULL, d2 = NULL;
907   ValNodePtr vnp;
908 
909   if (b1 == NULL || b2 == NULL) 
910   {
911     return FALSE;
912   }
913   if (b1->org ==  NULL || b2->org == NULL) 
914   {
915     return FALSE;
916   }
917   for (vnp = b1->org->db; vnp; vnp = vnp->next) 
918   {
919     d1 = (DbtagPtr) vnp->data.ptrvalue;
920     if (StringCmp(d1->db, "taxon") == 0) 
921     {
922       break;
923     }
924   }
925   for (vnp = b2->org->db; vnp; vnp = vnp->next) 
926   {
927     d2 = (DbtagPtr) vnp->data.ptrvalue;
928     if (StringCmp(d2->db, "taxon") == 0) 
929     {
930       break;
931     }
932   }
933   if (d1 && d2) 
934   {
935     if (d1->tag->id == d2->tag->id) 
936     {
937       return TRUE;
938     }
939   }
940   else if (StringICmp(b1->org->taxname, b2->org->taxname) == 0) 
941   {
942     return TRUE;
943   }
944   return FALSE;
945 }
946 
947 static BioSourcePtr Tax3BioSourceMerge(BioSourcePtr host, BioSourcePtr guest)
948 {
949   SubSourcePtr ssp, sp, last_ssp;
950   OrgModPtr omp, homp, last_omp;
951   OrgNamePtr    onp;
952     
953   if (host == NULL && guest == NULL) 
954   {
955     return NULL;
956   }
957   if (host == NULL && guest != NULL) 
958   {
959     host = AsnIoMemCopy(guest, (AsnReadFunc) BioSourceAsnRead, 
960                                    (AsnWriteFunc) BioSourceAsnWrite);
961     return host;
962   }
963   if (host != NULL && guest == NULL) 
964   {
965     return host;
966   }
967   if (host->genome == 0 && guest->genome != 0) 
968   {
969     host->genome = guest->genome;
970   }
971   if (host->origin == 0 && guest->origin != 0) 
972   {
973     host->origin = guest->origin;
974   }
975   last_ssp = host->subtype;
976   while (last_ssp != NULL && last_ssp->next != NULL)
977   {
978       last_ssp = last_ssp->next;
979   }
980   for (ssp = guest->subtype; ssp; ssp = ssp->next) 
981   {
982     sp = AsnIoMemCopy(ssp, (AsnReadFunc) SubSourceAsnRead, 
983                                    (AsnWriteFunc) SubSourceAsnWrite);
984     if (last_ssp == NULL)
985     {
986       host->subtype = sp;
987     }
988     else
989     {
990       last_ssp->next = sp;
991       last_ssp = sp;
992     }
993   }
994   if (guest->org->orgname) 
995   {
996        if ((onp = host->org->orgname)    == NULL) 
997        {
998          onp = OrgNameNew();
999          host->org->orgname = onp;
1000     }    
1001     last_omp = onp->mod;        
1002     while (last_omp != NULL && last_omp->next != NULL)
1003     {
1004       last_omp = last_omp->next;
1005     }
1006     for (omp = guest->org->orgname->mod; omp; omp = omp->next) 
1007     {
1008       homp = AsnIoMemCopy(omp, (AsnReadFunc) OrgModAsnRead, 
1009                                    (AsnWriteFunc) OrgModAsnWrite);
1010       if (last_omp == NULL)
1011       {
1012           onp->mod = homp;
1013       }
1014       else
1015       {
1016           last_omp->next = homp;
1017           last_omp = homp;
1018       }
1019     }
1020   }
1021   return host;
1022 }
1023 
1024 
1025 /**************************************************************************
1026 *    Compare BioSources in one bioseq->descr using Taxonomy to find
1027 *    their join parent
1028 *    merge if organisms are the same or create a feature if different
1029 *
1030 **************************************************************************/
1031 NLM_EXTERN void Tax3MergeSourceDescr (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1032 {
1033     BioseqPtr    bsp = NULL;
1034     ValNodePtr   vnp, newlist;
1035     SeqFeatPtr   sfp;
1036     BioSourcePtr first_biop = NULL;
1037     BioSourcePtr other_biop;
1038     BioSourcePtr tmp_biop;
1039     ObjValNodePtr ovp;
1040 
1041     if (!IS_Bioseq(sep)) {
1042         return;
1043     }
1044     newlist = (ValNodePtr) data;
1045     bsp = (BioseqPtr) sep->data.ptrvalue;
1046     if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const) 
1047             && (bsp->repr != Seq_repr_delta))
1048         return;
1049 
1050     if (! ISA_na(bsp->mol))
1051         return;
1052     
1053     /* add the descriptors in newlist to the end of the list in bsp->descr*/
1054     if (bsp->descr == NULL)
1055     {
1056       bsp->descr = newlist;
1057     }
1058     else
1059     {
1060       for (vnp = bsp->descr; vnp->next != NULL; vnp = vnp->next)
1061       {    
1062       }
1063       vnp->next = newlist;
1064     }
1065     
1066     /* now find the first source descriptor in bsp->descr that has an org*/
1067     /* note - we can't use SeqMgrGetNextDescriptor here because we have just
1068      * added to the descriptors, so they are not indexed. */
1069     for (vnp = bsp->descr; vnp != NULL; vnp = vnp->next)
1070     {
1071       if (vnp->choice != Seq_descr_source) continue;
1072       if (vnp->data.ptrvalue == NULL)
1073       {
1074           ErrPostStr(SEV_WARNING, 0, 0, "Source descriptor missing data");
1075           if (vnp->extended)
1076           {
1077             ovp = (ObjValNodePtr) vnp;
1078             ovp->idx.deleteme = TRUE;
1079           }
1080       }
1081       if (first_biop == NULL)
1082       {
1083           first_biop = vnp->data.ptrvalue;
1084       }
1085       else
1086       {
1087         other_biop = vnp->data.ptrvalue;
1088         /* detach biosource pointer from descr, so that it will not be freed
1089          * when the descriptor is deleted.
1090          */
1091         vnp->data.ptrvalue = NULL;
1092         if (vnp->extended)
1093         {
1094           ovp = (ObjValNodePtr) vnp;
1095             ovp->idx.deleteme = TRUE;
1096         }
1097         if (DoOrgIdsMatch(first_biop, other_biop)) 
1098         {
1099           /* merge the two sources */
1100           tmp_biop = Tax3BioSourceMerge(first_biop, other_biop);
1101           if (tmp_biop == NULL)
1102           {
1103               ErrPostStr (SEV_WARNING, 0, 0, "Failed to merge biosources");
1104           }
1105           else
1106           {
1107               first_biop = tmp_biop;
1108           }
1109           other_biop = BioSourceFree (other_biop);
1110         } else {
1111           /* create a source feature */
1112           sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_BIOSRC, NULL);
1113           if (sfp != NULL)
1114           {
1115             sfp->data.value.ptrvalue = other_biop;
1116           }
1117         }
1118       }
1119     }
1120     return;
1121 }
1122 
1123 static Int4 GetTaxIdFromOrgRef (OrgRefPtr orp)
1124 {
1125   Int4       tax_id = -1;
1126   ValNodePtr vnp;
1127   DbtagPtr   d;
1128 
1129   if (orp != NULL)
1130   {
1131     for (vnp = orp->db; vnp != NULL; vnp = vnp->next) 
1132     {
1133       d = (DbtagPtr) vnp->data.ptrvalue;
1134       if (StringCmp(d->db, "taxon") == 0) 
1135       {
1136         tax_id = d->tag->id;
1137         break;
1138       }
1139     }
1140   }
1141   return tax_id;
1142 }
1143 
1144 NLM_EXTERN Int4 Taxon3GetTaxIdByOrgRef (OrgRefPtr orp)
1145 {
1146   OrgRefPtr  orp_repl;
1147   Int4       tax_id = -1;
1148   
1149   if (orp == NULL) return -1;
1150   
1151   orp_repl = Taxon3GetOrg (orp);
1152   tax_id = GetTaxIdFromOrgRef (orp_repl);
1153   OrgRefFree (orp_repl);
1154   
1155   return tax_id;
1156 }
1157 
1158 NLM_EXTERN OrgRefPtr Taxon3GetOrgRefByName (CharPtr orgname)
1159 {
1160   OrgRefPtr request, org;
1161   
1162   request = OrgRefNew ();
1163   if (request == NULL) return NULL;
1164   request->taxname = orgname;
1165   org = Taxon3GetOrg (request);
1166   request->taxname = NULL;
1167   OrgRefFree (request);
1168   return org;
1169 }
1170 
1171 NLM_EXTERN Int4 Taxon3GetTaxIdByName (CharPtr orgname)
1172 {
1173   OrgRefPtr orp;
1174   Int4      tax_id;
1175   
1176   orp = Taxon3GetOrgRefByName (orgname);
1177   tax_id = GetTaxIdFromOrgRef (orp);
1178 
1179   OrgRefFree(orp);
1180   return tax_id;
1181 }
1182 
1183 static void AddBioSourceToList (BioSourcePtr biop, Pointer userdata)
1184 {
1185   ValNodePtr PNTR list;
1186   
1187   if (biop == NULL || userdata == NULL) return;
1188   list = (ValNodePtr PNTR) userdata;
1189   ValNodeAddPointer (list, 4, (Pointer) biop);
1190 }
1191 
1192 NLM_EXTERN void Taxon3ReplaceOrgInSeqEntry (SeqEntryPtr sep, Boolean keep_syn)
1193 {
1194   ValNodePtr   biop_list = NULL;
1195   ValNodePtr   request_list = NULL;
1196   ValNodePtr   response_list = NULL;
1197   ValNodePtr   biop_vnp, response_vnp;
1198   BioSourcePtr biop;
1199   OrgRefPtr    swap_org, response_org;
1200   
1201   VisitBioSourcesInSep (sep, &biop_list, AddBioSourceToList);
1202 
1203   for (biop_vnp = biop_list; biop_vnp != NULL; biop_vnp = biop_vnp->next)
1204   {
1205     biop = (BioSourcePtr) biop_vnp->data.ptrvalue;
1206     ValNodeAddPointer (&request_list, 3, biop->org);
1207   }
1208   response_list = Taxon3GetOrgRefList (request_list);
1209  
1210   if (ValNodeLen (response_list) != ValNodeLen (request_list))
1211   {
1212     Message (MSG_POST, "Unable to retrieve information from tax server");
1213     return;
1214   }
1215 
1216   for (biop_vnp = biop_list, response_vnp = response_list;
1217        biop_vnp != NULL && response_vnp != NULL;
1218        biop_vnp = biop_vnp->next, response_vnp = response_vnp->next)
1219   {
1220     biop = (BioSourcePtr) biop_vnp->data.ptrvalue;
1221     swap_org = biop->org;
1222     response_org = response_vnp->data.ptrvalue;
1223     if (response_org != NULL)
1224     {
1225       biop->org = response_org;
1226       response_vnp->data.ptrvalue = NULL;
1227       OrgRefFree (swap_org);
1228       if (! keep_syn)
1229       {
1230         biop->org->syn = ValNodeFreeData(biop->org->syn);
1231       }
1232     }
1233   }
1234   ValNodeFree (request_list);
1235   ValNodeFree (response_list);
1236   ValNodeFree (biop_list);   
1237 }
1238 
1239 
1240 static void GetBioSourceFeaturesForCheck (SeqFeatPtr sfp, Pointer userdata)
1241 {
1242   ValNodePtr PNTR list = (ValNodePtr PNTR) userdata;
1243   if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || list == NULL
1244       || sfp->data.value.ptrvalue == NULL) {
1245     return;
1246   }
1247   ValNodeAddPointer (list, OBJ_SEQFEAT, sfp);
1248 }
1249 
1250 
1251 static void GetBioSourceDescriptorsForCheck (SeqDescrPtr sdp, Pointer userdata)
1252 {
1253   ValNodePtr PNTR list = (ValNodePtr PNTR) userdata;
1254   if (sdp == NULL || sdp->choice != Seq_descr_source || list == NULL
1255       || sdp->data.ptrvalue == NULL) {
1256     return;
1257   }
1258   ValNodeAddPointer (list, OBJ_SEQDESC, sdp);
1259 }
1260 
1261 
1262 static DbtagPtr GetTaxonXref (OrgRefPtr org)
1263 {
1264   ValNodePtr vnp;
1265   DbtagPtr   dbt = NULL;
1266   
1267   if (org == NULL) return NULL;
1268   vnp = org->db;
1269   while (vnp != NULL && dbt == NULL) {
1270     dbt = (DbtagPtr) vnp->data.ptrvalue;
1271     if (dbt != NULL && StringICmp ((CharPtr) dbt->db, "taxon") != 0) {
1272       dbt = NULL;
1273     }
1274     vnp = vnp->next;
1275   }
1276   return dbt;
1277 }
1278   
1279 static Boolean DoTaxonIdsMatch (OrgRefPtr org1, OrgRefPtr org2)
1280 {
1281   DbtagPtr   dbt1 = NULL, dbt2 = NULL;
1282   
1283   if (org1 == NULL || org2 == NULL) return FALSE;
1284   
1285   dbt1 = GetTaxonXref (org1);
1286   if (dbt1 == NULL) return FALSE;
1287   dbt2 = GetTaxonXref (org2);
1288   if (dbt2 == NULL) return FALSE;
1289   
1290   return DbtagMatch(dbt1, dbt2);
1291 }
1292 
1293 
1294 NLM_EXTERN void Taxon3CheckOrgInSeqEntry (SeqEntryPtr sep, ValNodePtr PNTR not_found, ValNodePtr PNTR bad_match)
1295 {
1296   ValNodePtr   request_list = NULL;
1297   ValNodePtr   response_list = NULL;
1298   ValNodePtr   biop_vnp, response_vnp;
1299   BioSourcePtr biop;
1300   OrgRefPtr    orig_org, response_org;
1301   ValNodePtr   item_list = NULL;
1302   SeqFeatPtr   sfp;
1303   SeqDescrPtr  sdp;
1304   
1305   VisitFeaturesInSep (sep, &item_list, GetBioSourceFeaturesForCheck);
1306   VisitDescriptorsInSep (sep, &item_list, GetBioSourceDescriptorsForCheck);
1307   
1308   for (biop_vnp = item_list; biop_vnp != NULL; biop_vnp = biop_vnp->next) {
1309     biop = NULL;
1310     if (biop_vnp->choice == OBJ_SEQFEAT) {
1311       sfp = (SeqFeatPtr) biop_vnp->data.ptrvalue;  
1312       if (sfp != NULL) {  
1313         biop = (BioSourcePtr) sfp->data.value.ptrvalue;      
1314       }
1315     } else if (biop_vnp->choice == OBJ_SEQDESC) {
1316       sdp = (SeqDescrPtr) biop_vnp->data.ptrvalue;
1317       if (sdp != NULL) {
1318         biop = (BioSourcePtr) sdp->data.ptrvalue;
1319       }
1320     }
1321     if (biop != NULL) {
1322       ValNodeAddPointer (&request_list, 3, biop->org);
1323     }
1324   }
1325 
1326   response_list = Taxon3GetOrgRefList (request_list);
1327  
1328   if (ValNodeLen (response_list) != ValNodeLen (request_list))
1329   {
1330     Message (MSG_POST, "Unable to retrieve information from tax server");
1331     ValNodeFree (request_list);
1332     ValNodeFree (item_list);
1333     return;
1334   }
1335 
1336   for (biop_vnp = item_list, response_vnp = response_list;
1337        biop_vnp != NULL && response_vnp != NULL;
1338        biop_vnp = biop_vnp->next, response_vnp = response_vnp->next)
1339   {
1340     response_org = response_vnp->data.ptrvalue;  
1341     biop = NULL;
1342     orig_org = NULL;
1343     if (biop_vnp->choice == OBJ_SEQFEAT) {
1344       sfp = (SeqFeatPtr) biop_vnp->data.ptrvalue;    
1345       if (sfp != NULL) {  
1346         biop = (BioSourcePtr) sfp->data.value.ptrvalue;
1347       }
1348     } else if (biop_vnp->choice == OBJ_SEQDESC) {
1349       sdp = (SeqDescrPtr) biop_vnp->data.ptrvalue;
1350       if (sdp != NULL) {
1351         biop = (BioSourcePtr) sdp->data.ptrvalue;
1352       }
1353     }
1354     if (biop == NULL) {
1355       Message (MSG_POST, "Error collecting data");
1356       ValNodeFree (request_list);
1357       ValNodeFree (item_list);
1358       return;
1359     } else {
1360       orig_org = biop->org;
1361       if (orig_org != NULL) {
1362         if (response_org == NULL) {
1363           ValNodeAddPointer (not_found, biop_vnp->choice, biop_vnp->data.ptrvalue);          
1364         } else if (StringCmp (orig_org->taxname, response_org->taxname) != 0) {
1365           ValNodeAddPointer (bad_match, biop_vnp->choice, biop_vnp->data.ptrvalue);
1366         } else if (!DoTaxonIdsMatch(orig_org, response_org)) {
1367           ValNodeAddPointer (bad_match, biop_vnp->choice, biop_vnp->data.ptrvalue);
1368         }        
1369       }
1370     }
1371     OrgRefFree (response_org);
1372   }
1373   ValNodeFree (request_list);
1374   ValNodeFree (response_list);
1375   ValNodeFree (item_list);   
1376 }
1377 
1378 
1379 NLM_EXTERN void CheckTaxNamesAgainstTaxDatabase (ValNodePtr PNTR discrepancy_list, ValNodePtr sep_list)
1380 {
1381   ValNodePtr  vnp;
1382   SeqEntryPtr sep;
1383   SeqEntryPtr orig_scope;
1384   ValNodePtr  not_found = NULL, bad_match = NULL;
1385   CharPtr     bad_match_fmt = "%d tax names do not match taxonomy lookup.";
1386   CharPtr     no_match_fmt = "%d organisms are not found in taxonomy lookup.";
1387   ClickableItemPtr dip;
1388   
1389   if (discrepancy_list == NULL) return;
1390 
1391   
1392   orig_scope = SeqEntryGetScope ();
1393   for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
1394     sep = vnp->data.ptrvalue;
1395     SeqEntrySetScope (sep);
1396     Taxon3CheckOrgInSeqEntry (sep, &not_found, &bad_match);
1397   }
1398   SeqEntrySetScope (orig_scope);
1399   if (not_found != NULL) {
1400     dip = NewClickableItem (DISC_NO_TAXLOOKUP, no_match_fmt, not_found);
1401     dip->subcategories = NULL;
1402     ValNodeAddPointer (discrepancy_list, 0, dip);
1403   }
1404   if (bad_match != NULL) {
1405     dip = NewClickableItem (DISC_BAD_TAXLOOKUP, bad_match_fmt, bad_match);
1406     dip->subcategories = NULL;
1407     ValNodeAddPointer (discrepancy_list, 0, dip);
1408   }
1409 }
1410 
1411 
1412 static ValNodePtr FreeOrgRefValNodeList (ValNodePtr vnp)
1413 {
1414   ValNodePtr vnp_next;
1415   OrgRefPtr  org;
1416 
1417   while (vnp != NULL)
1418   { 
1419     vnp_next = vnp->next;
1420     vnp->next = NULL;
1421     org = (OrgRefPtr) vnp->data.ptrvalue;
1422     vnp->data.ptrvalue = OrgRefFree (org);
1423     vnp = ValNodeFree (vnp);
1424     vnp = vnp_next;
1425   }
1426   return vnp;
1427 }
1428 
1429 
1430 static Boolean EndsWithSp (CharPtr str)
1431 {
1432   Int4 len;
1433 
1434   if (StringHasNoText (str)) return FALSE;
1435   len = StringLen (str);
1436   if (len < 4) return FALSE;
1437   if (StringCmp (str + len - 4, " sp.") == 0) return TRUE;
1438   return FALSE;
1439 }
1440 
1441 
1442 static CharPtr RemoveSp (CharPtr orig)
1443 {
1444   CharPtr cpy = NULL;
1445   Int4    len;
1446 
1447   len = StringLen (orig);
1448   if (len >= 4 && StringCmp (orig + len - 4, " sp.") == 0) {
1449     cpy = (CharPtr) MemNew (sizeof (Char) * len - 3);
1450     StringNCpy (cpy, orig, len - 4);
1451     cpy[len - 4] = 0;
1452   }
1453   return cpy;
1454 }
1455 
1456   
1457 static void AddRequestOrgForString (CharPtr str, CharPtr host, ValNodePtr PNTR request_list, ValNodePtr PNTR req_host_list)
1458 {
1459   OrgRefPtr    request_org;
1460   CharPtr      cp, cpy;
1461 
1462   if (StringHasNoText (str) || host == NULL || request_list == NULL || req_host_list == NULL)
1463   {
1464     return;
1465   }
1466 
1467   /* if ends with " sp.", remove " sp." */
1468   cpy = RemoveSp (host);
1469   if (cpy != NULL) {
1470     request_org = OrgRefNew();
1471     request_org->taxname = StringSave (cpy);
1472     ValNodeAddPointer (request_list, 3, request_org);
1473     ValNodeAddPointer (req_host_list, 0, StringSave (host));
1474   } else {
1475     request_org = OrgRefNew();
1476     request_org->taxname = StringSave (str);
1477     ValNodeAddPointer (request_list, 3, request_org);
1478     ValNodeAddPointer (req_host_list, 0, StringSave (host));
1479 
1480      
1481     /* if more than one word, try chopping off last to see if abbreviated name looks up */
1482     cp = StringRChr (str, ' ');
1483     if (cp != NULL)
1484     {
1485       cpy = StringSave (str);    
1486       cp = StringRChr (cpy, ' ');
1487       if (cp != NULL)
1488       {
1489         *cp = 0;
1490         AddRequestOrgForString (cpy, host, request_list, req_host_list);
1491       }
1492       cpy = MemFree (cpy);
1493     }
1494   }
1495 }
1496 
1497 typedef struct specifichostcheck {
1498   CharPtr      spec_host;
1499   ValNodePtr   request_list;  /* ValNodeList of orgs */
1500   ValNodePtr   response_list; /* ValNodeList of orgs */
1501   ValNodePtr   biop_list;     /* ValNodeList of sources with this spec_host value */
1502 } SpecificHostCheckData, PNTR SpecificHostCheckPtr;
1503 
1504 
1505 static ValNodePtr SpecificHostCheckListFree (ValNodePtr vnp)
1506 {
1507   ValNodePtr vnp_next;
1508   SpecificHostCheckPtr p;
1509 
1510   while (vnp != NULL)
1511   {
1512     vnp_next = vnp->next;
1513     vnp->next = NULL;
1514     p = (SpecificHostCheckPtr) vnp->data.ptrvalue;
1515     if (p != NULL)
1516     {
1517       p->request_list = FreeOrgRefValNodeList (p->request_list);
1518       p->response_list = FreeOrgRefValNodeList (p->response_list);
1519       p->spec_host = MemFree (p->spec_host);
1520       p->biop_list = ValNodeFree (p->biop_list);
1521     }
1522     vnp = ValNodeFreeData (vnp);
1523     vnp = vnp_next;
1524   }
1525   return vnp;
1526 }
1527 
1528 
1529 static ValNodePtr SortSpecificHostOrgs (ValNodePtr host_list, ValNodePtr request_list, ValNodePtr response_list)
1530 {
1531   ValNodePtr           check_list = NULL;
1532   SpecificHostCheckPtr p = NULL;
1533   CharPtr              host, prev_host = NULL;
1534 
1535   while (host_list != NULL
1536          && request_list != NULL
1537          && response_list != NULL)
1538   {
1539     host = (CharPtr) host_list->data.ptrvalue;
1540     if (StringCmp (host, prev_host) != 0)
1541     {
1542       p = (SpecificHostCheckPtr) MemNew (sizeof (SpecificHostCheckData));
1543       p->spec_host = StringSave (host);
1544       ValNodeAddPointer (&check_list, 0, p);
1545       prev_host = host;
1546     }
1547     ValNodeAddPointer (&(p->request_list), request_list->choice, request_list->data.ptrvalue);
1548     ValNodeAddPointer (&(p->response_list), response_list->choice, response_list->data.ptrvalue);
1549     request_list->data.ptrvalue = NULL;
1550     response_list->data.ptrvalue = NULL;
1551     host_list = host_list->next;
1552     request_list = request_list->next;
1553     response_list = response_list->next;
1554   }
1555   return check_list;        
1556 }
1557 
1558 
1559 static Boolean StringAlreadyInValNodeList (CharPtr str, ValNodePtr list) 
1560 {
1561   if (StringHasNoText (str))
1562   {
1563     return TRUE;
1564   }
1565   
1566   while (list != NULL)
1567   {
1568     if (StringCmp (str, list->data.ptrvalue) == 0)
1569     {
1570       return TRUE;
1571     }
1572     list = list->next;
1573   }
1574   return FALSE;
1575 }
1576 
1577 
1578 static BioSourcePtr GetBioSourceFromValNode (ValNodePtr vnp)
1579 {
1580   SeqFeatPtr sfp;
1581   SeqDescrPtr sdp;
1582   BioSourcePtr biop = NULL;
1583 
1584   if (vnp == NULL || vnp->data.ptrvalue == NULL) return NULL;
1585 
1586   if (vnp->choice == OBJ_SEQFEAT)
1587   {
1588     sfp = (SeqFeatPtr) vnp->data.ptrvalue;
1589     biop = (BioSourcePtr) sfp->data.value.ptrvalue;
1590   } 
1591   else if (vnp->choice == OBJ_SEQDESC)
1592   {
1593     sdp = (SeqDescrPtr) vnp->data.ptrvalue;
1594     biop = (BioSourcePtr) sdp->data.ptrvalue;
1595   }
1596   return biop;
1597 }
1598 
1599 
1600 static CharPtr extract_list[] = {
1601   "cf.",
1602   "cf ",
1603   "aff ",
1604   "aff.",
1605   "near",
1606   "nr.",
1607   "nr ",
1608   NULL};
1609 
1610 static void AdjustSpecificHostForTaxServer (CharPtr spec_host)
1611 {
1612   CharPtr cp, src, dst;
1613   Int4 i;
1614 
1615   /* ignore separator words */
1616   for (i = 0; extract_list[i] != NULL; i++) {
1617     if ((cp = StringSearch (spec_host, extract_list[i])) != NULL && cp > spec_host && isspace (*(cp - 1))) {
1618       src = cp + StringLen (extract_list[i]);
1619       dst = cp;
1620       while (isspace (*src)) {
1621         src++;
1622       }
1623       while (*src != 0) {
1624         *dst = *src;
1625         dst++;
1626         src++;
1627       }
1628       *dst = 0;
1629     }
1630   }
1631 }
1632 
1633 
1634 static void AddBioSourcesToSpecificHostChecklist (ValNodePtr biop_list, ValNodePtr check_list)
1635 {
1636   ValNodePtr biop_vnp, last_vnp = NULL, stop_search;
1637   BioSourcePtr biop;
1638   OrgModPtr    mod;
1639   SpecificHostCheckPtr p;
1640   CharPtr tmp;
1641 
1642   if (biop_list == NULL || check_list == NULL) return;
1643 
1644   for (biop_vnp = biop_list; biop_vnp != NULL; biop_vnp = biop_vnp->next)
1645   {
1646 
1647     biop = GetBioSourceFromValNode (biop_vnp);
1648     if (biop == NULL) continue;
1649 
1650     if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL) continue;
1651     mod = biop->org->orgname->mod;
1652     while (mod != NULL)
1653     {
1654       if (mod->subtype == ORGMOD_nat_host
1655           && !StringHasNoText (mod->subname))
1656       {
1657         if (last_vnp == NULL)
1658         {
1659           last_vnp = check_list;
1660           stop_search = NULL;
1661         }
1662         else
1663         {
1664           stop_search = last_vnp;
1665         }
1666         tmp = StringSave (mod->subname);
1667         AdjustSpecificHostForTaxServer (tmp);
1668         p = NULL;
1669         while (last_vnp != NULL 
1670                && (p = (SpecificHostCheckPtr) last_vnp->data.ptrvalue) != NULL
1671                && StringCmp (p->spec_host, tmp) != 0)
1672         {
1673           p = NULL;
1674           last_vnp = last_vnp->next;
1675         }
1676         if (p == NULL && stop_search != NULL)
1677         {
1678           last_vnp = check_list;
1679           while (last_vnp != stop_search 
1680                  && (p = (SpecificHostCheckPtr) last_vnp->data.ptrvalue) != NULL
1681                  && StringCmp (p->spec_host, tmp) != 0)
1682           {
1683             p = NULL;
1684             last_vnp = last_vnp->next;
1685           }
1686         }
1687         tmp = MemFree (tmp);
1688         if (p != NULL)
1689         {
1690           ValNodeAddPointer (&(p->biop_list), biop_vnp->choice, biop_vnp->data.ptrvalue);
1691         }
1692       }
1693       mod = mod->next;
1694     }
1695   }
1696 }
1697 
1698 
1699 static Boolean ShouldCheckSpecificHostValueForValidator (CharPtr spec_host)
1700 {
1701   if (StringHasNoText (spec_host) || !isupper (*spec_host)) {
1702     return FALSE;
1703   } else {
1704     return TRUE;
1705   }
1706 }
1707 
1708 static CharPtr GetSpecificHostValueToCheckForValidator (CharPtr spec_host)
1709 {
1710   CharPtr cp, check_val = NULL;
1711   Int4    len = 0;
1712 
1713   if (ShouldCheckSpecificHostValueForValidator(spec_host)) {
1714     cp = spec_host;
1715     /* skip first word */
1716     while (*cp != 0 && !isspace (*cp)) {
1717       cp++;
1718       len++;
1719     }
1720     while (isspace (*cp)) {
1721       cp++;
1722       len++;
1723     }
1724     if (*cp != '(' && StringNCmp (cp, "sp.", 3) != 0 && *cp != 0) {
1725       /* collect second word */
1726       while (*cp != 0 && !isspace (*cp)) {
1727         cp++;
1728         len++;
1729       }
1730     }
1731     check_val = (CharPtr) MemNew (sizeof (Char) * (len + 1));
1732     StringNCpy (check_val, spec_host, len);
1733     check_val[len] = 0;
1734     TrimSpacesAroundString (check_val);
1735   }
1736   return check_val;
1737 }
1738 
1739 static Boolean ShouldCheckSpecificHostInBioSource (BioSourcePtr biop)
1740 {
1741   OrgModPtr mod;
1742   Boolean   rval = FALSE;
1743 
1744   if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL) {
1745     return FALSE;
1746   }
1747   for (mod = biop->org->orgname->mod; mod != NULL && !rval; mod = mod->next) {
1748     if (mod->subtype == ORGMOD_nat_host) {
1749       rval = ShouldCheckSpecificHostValueForValidator (mod->subname);
1750     }
1751   }
1752   return rval;
1753 }
1754 
1755 
1756 
1757 static void AddValidatorSpecificHostBioSourceFeatToList (SeqFeatPtr sfp, Pointer userdata)
1758 {
1759   if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) return;
1760 
1761   if (ShouldCheckSpecificHostInBioSource (sfp->data.value.ptrvalue))
1762   {
1763     ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_SEQFEAT, sfp);
1764   }
1765 }
1766 
1767 
1768 static void AddValidatorSpecificHostBioSourceDescToList (SeqDescrPtr sdp, Pointer userdata)
1769 {
1770   if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return;
1771 
1772   if (ShouldCheckSpecificHostInBioSource (sdp->data.ptrvalue))
1773   {
1774     ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_SEQDESC, sdp);
1775   }
1776 }
1777 
1778 
1779 static ValNodePtr GetValidatorSpecificHostBioSourceList (SeqEntryPtr sep)
1780 {
1781   ValNodePtr list = NULL;
1782 
1783   VisitFeaturesInSep (sep, &list, AddValidatorSpecificHostBioSourceFeatToList);
1784   VisitDescriptorsInSep (sep, &list, AddValidatorSpecificHostBioSourceDescToList);
1785   return list;
1786 }
1787 
1788 
1789 static void 
1790 FormatValidatorSpecificHostRequests 
1791 (ValNodePtr spec_host_list,
1792  ValNodePtr PNTR request_list,
1793  ValNodePtr PNTR req_host_list)
1794 {
1795   ValNodePtr vnp;
1796   CharPtr    orig;
1797   OrgRefPtr  request_org;
1798   
1799   /* now format requests for unique specific_host values */
1800   for (vnp = spec_host_list; vnp != NULL; vnp = vnp->next)
1801   {
1802     orig = (CharPtr) vnp->data.ptrvalue;
1803     request_org = OrgRefNew();
1804     request_org->taxname = GetSpecificHostValueToCheckForValidator (orig);
1805     ValNodeAddPointer (request_list, 3, request_org);
1806     ValNodeAddPointer (req_host_list, 0, StringSave (orig));    
1807   }
1808 }
1809 
1810 static Boolean MatchesSynonym (CharPtr txt, OrgRefPtr response_org)
1811 {
1812   ValNodePtr syn;
1813   Boolean    rval = FALSE;
1814   if (StringHasNoText (txt) || response_org == NULL) return FALSE;
1815 
1816   for (syn = response_org->syn; syn != NULL && !rval; syn = syn->next)
1817   {
1818     if (StringCmp (txt, syn->data.ptrvalue) == 0)
1819     {
1820       rval = TRUE;
1821     }
1822   }
1823   return rval;
1824 }
1825 
1826 
1827 static Boolean MatchesGenBankSynonym (CharPtr txt, OrgRefPtr response_org)
1828 {
1829   OrgModPtr mod;
1830   Boolean   rval = FALSE;
1831 
1832   if (StringHasNoText (txt) || response_org == NULL || response_org->orgname == NULL) return FALSE;
1833   mod = response_org->orgname->mod;
1834   while (mod != NULL) 
1835   {
1836     if ((mod->subtype == ORGMOD_gb_synonym || mod->subtype == ORGMOD_old_name) && StringCmp (txt, mod->subname) == 0)
1837     {
1838       rval = TRUE;
1839     }
1840     mod = mod->next;
1841   }
1842   return rval;
1843 }
1844 
1845 
1846 static ValNodePtr GetListOfUniqueSpecificHostValues (ValNodePtr biop_list)
1847 {
1848   ValNodePtr   biop_vnp;
1849   BioSourcePtr biop;
1850   OrgModPtr    mod;
1851   ValNodePtr   spec_host_list = NULL;
1852   CharPtr      tmp;
1853   
1854   /* get a list of unique specific_host values */
1855   for (biop_vnp = biop_list; biop_vnp != NULL; biop_vnp = biop_vnp->next)
1856   {
1857     if (biop_vnp->data.ptrvalue == NULL) continue;
1858     biop = GetBioSourceFromValNode (biop_vnp);
1859     if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL) continue;
1860     mod = biop->org->orgname->mod;
1861     while (mod != NULL)
1862     {
1863       if (mod->subtype == ORGMOD_nat_host
1864           && !StringHasNoText (mod->subname))
1865       {
1866         tmp = StringSave (mod->subname);
1867         AdjustSpecificHostForTaxServer (tmp);
1868         ValNodeAddPointer (&spec_host_list, 0, tmp);
1869       }
1870       mod = mod->next;
1871     }
1872   }
1873   spec_host_list = ValNodeSort (spec_host_list, SortVnpByString);
1874   ValNodeUnique (&spec_host_list, SortVnpByString, ValNodeFreeData);
1875   return spec_host_list;
1876 }
1877 
1878 
1879 static Boolean StringIsExactMatchForOrgRef (CharPtr str, OrgRefPtr org)
1880 {
1881   if (StringHasNoText (str) || org == NULL) {
1882     return FALSE;
1883   } else if (StringCmp (org->taxname, str) == 0
1884              || StringCmp (org->common, str) == 0 
1885              || MatchesSynonym (str, org) 
1886              || MatchesGenBankSynonym (str, org)) {
1887     return TRUE;
1888   } else {
1889     return FALSE;
1890   }
1891 }
1892 
1893 static CharPtr FindMatchInOrgRef (CharPtr str, OrgRefPtr org)
1894 {
1895   ValNodePtr syn;
1896   OrgModPtr  mod;
1897   CharPtr    rval = NULL;
1898 
1899   if (StringHasNoText (str) || org == NULL) {
1900     rval = NULL;
1901   } else if (StringICmp (org->taxname, str) == 0) {
1902     rval = org->taxname;
1903   } else if (StringICmp (org->common, str) == 0) {
1904     rval = org->common;
1905   } else {
1906     for (syn = org->syn; syn != NULL && rval == NULL; syn = syn->next) {
1907       if (StringICmp (str, syn->data.ptrvalue) == 0) {
1908         rval = syn->data.ptrvalue;
1909       }
1910     }
1911     if (org->orgname != NULL) {
1912       for (mod = org->orgname->mod; mod != NULL && rval == NULL; mod = mod->next) {
1913         if ((mod->subtype == ORGMOD_gb_synonym || mod->subtype == ORGMOD_old_name)
1914             && StringICmp (str, mod->subname) == 0) {
1915           rval = mod->subname;
1916         }
1917       }
1918     }
1919   }
1920   return rval;
1921 }
1922 
1923 
1924 /* Want to check that specific host names are valid */
1925 NLM_EXTERN void 
1926 Taxon3ValidateSpecificHostsInSeqEntry 
1927 (SeqEntryPtr sep,
1928  ValNodePtr PNTR misspelled_list,
1929  ValNodePtr PNTR bad_caps_list,
1930  ValNodePtr PNTR ambiguous_list,
1931  ValNodePtr PNTR unrecognized_list)
1932 {
1933   ValNodePtr   biop_list = NULL;
1934   ValNodePtr   req_host_list = NULL, spec_host_list = NULL;
1935   ValNodePtr   request_list = NULL;
1936   ValNodePtr   response_list = NULL;
1937   ValNodePtr   response_vnp, request_vnp;
1938   ValNodePtr   check_list, check_vnp;
1939   OrgRefPtr    request_org, response_org;
1940   SpecificHostCheckPtr p;
1941   Boolean              has_match;
1942   ErrSev               level;
1943   Boolean              misspelled_flag;
1944   Boolean              bad_caps_flag;
1945   Boolean              ambiguous_flag;
1946   CharPtr              match;
1947     
1948   biop_list = GetValidatorSpecificHostBioSourceList (sep);
1949 
1950   /* get a list of unique specific_host values */
1951   spec_host_list = GetListOfUniqueSpecificHostValues (biop_list);
1952 
1953   /* now format requests for unique specific_host values */
1954   FormatValidatorSpecificHostRequests (spec_host_list, &request_list, &req_host_list);
1955 
1956   spec_host_list = ValNodeFreeData (spec_host_list);
1957 
1958   level = ErrSetMessageLevel (SEV_MAX);
1959   response_list = Taxon3GetOrgRefList (request_list);
1960   ErrSetMessageLevel (level);
1961  
1962   if (ValNodeLen (response_list) != ValNodeLen (request_list))
1963   {
1964     Message (MSG_POST, "Unable to retrieve information from tax server");
1965   }
1966   else
1967   {
1968     /* resort requests so that we can check all responses for the same BioSource together */
1969     check_list = SortSpecificHostOrgs (req_host_list, request_list, response_list);
1970     AddBioSourcesToSpecificHostChecklist (biop_list, check_list);  
1971 
1972     /* now look at responses */
1973     check_vnp = check_list;
1974     while (check_vnp != NULL)
1975     {
1976       p = (SpecificHostCheckPtr) check_vnp->data.ptrvalue;
1977       if (p != NULL)
1978       {
1979         has_match = FALSE;
1980         misspelled_flag = FALSE;
1981         bad_caps_flag = FALSE;
1982         ambiguous_flag = FALSE;
1983 
1984         request_vnp = p->request_list;
1985         response_vnp = p->response_list;
1986         while (!has_match && request_vnp != NULL && response_vnp != NULL)
1987         {
1988           request_org = (OrgRefPtr) request_vnp->data.ptrvalue;
1989           response_org = (OrgRefPtr) response_vnp->data.ptrvalue;
1990           if (response_vnp->choice == eReturnedOrgFlag_misspelled)
1991           {
1992             misspelled_flag = TRUE;
1993           }
1994           else if (response_vnp->choice == eReturnedOrgFlag_ambiguous)
1995           {
1996             ambiguous_flag = TRUE;
1997           }
1998           else
1999           {
2000             match = FindMatchInOrgRef (request_org->taxname, response_org);
2001             if (StringCmp (match, request_org->taxname) == 0)
2002             {
2003               has_match = TRUE;
2004             }
2005             else if (StringICmp (match, request_org->taxname) == 0)
2006             {
2007               bad_caps_flag = TRUE;
2008             }
2009           }  
2010           request_vnp = request_vnp->next;
2011           response_vnp = response_vnp->next;
2012         }     
2013         if (!has_match)
2014         {
2015           /* add to the list of bad */
2016           if (misspelled_flag) {
2017             if (misspelled_list != NULL) {
2018               ValNodeLink (misspelled_list, p->biop_list);
2019               p->biop_list = NULL;
2020             }
2021           } else if (bad_caps_flag) {
2022             if (bad_caps_list != NULL) {
2023               ValNodeLink (bad_caps_list, p->biop_list);
2024               p->biop_list = NULL;
2025             }
2026           } else if (ambiguous_flag) {
2027             if (ambiguous_list != NULL) {
2028               ValNodeLink (ambiguous_list, p->biop_list);
2029               p->biop_list = NULL;
2030             }
2031           } else {
2032             if (unrecognized_list != NULL) {
2033               ValNodeLink (unrecognized_list, p->biop_list);
2034               p->biop_list = NULL;
2035             }
2036           }
2037         }
2038       }
2039       check_vnp = check_vnp->next;
2040     }
2041     check_list = SpecificHostCheckListFree (check_list);
2042   }
2043 
2044   biop_list = ValNodeFree (biop_list);
2045   request_list = FreeOrgRefValNodeList (request_list);
2046   response_list = FreeOrgRefValNodeList (response_list);
2047   req_host_list = ValNodeFreeData (req_host_list);
2048 }
2049 
2050 
2051 typedef struct spechostgather {
2052   ValNodePtr list;
2053   Boolean    caps; /* if true, check only when first letter of first word is capitalized */
2054   Boolean    paren; /* if true, check portion inside parentheses as separate string */
2055 } SpecHostGatherData, PNTR SpecHostGatherPtr;
2056 
2057 
2058 static Boolean ShouldCheckSpecificHostString (CharPtr str, SpecHostGatherPtr p)
2059 {
2060   CharPtr cp_start;
2061   Boolean rval = FALSE;
2062 
2063   if (StringHasNoText (str) || p == NULL) {
2064     return FALSE;
2065   }
2066 
2067   if (!p->caps) {
2068     rval = TRUE;
2069   } else if (isupper (*str)) {
2070     rval = TRUE;
2071   } else if (p->paren) {
2072     cp_start = StringChr (str, '(');
2073     if (cp_start != NULL && ShouldCheckSpecificHostString (cp_start + 1, p)) {
2074       rval = TRUE;
2075     } 
2076   }
2077   return rval;  
2078 }
2079   
2080     
2081 static Boolean HasSpecificHostToBeChecked (BioSourcePtr biop, SpecHostGatherPtr p)
2082 {
2083   OrgModPtr mod;
2084   Boolean   rval = FALSE;
2085 
2086   if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL || p == NULL) return FALSE;
2087   
2088   for (mod = biop->org->orgname->mod; mod != NULL && !rval; mod = mod->next) {
2089     if (mod->subtype == ORGMOD_nat_host && ShouldCheckSpecificHostString (mod->subname, p)) {
2090       rval = TRUE;
2091     }
2092   }
2093   return rval;
2094 }
2095 
2096 
2097 static void AddSpecificHostBioSourceFeatToList (SeqFeatPtr sfp, Pointer userdata)
2098 {
2099   SpecHostGatherPtr p;
2100 
2101   if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) return;
2102 
2103   p = (SpecHostGatherPtr) userdata;
2104   if (HasSpecificHostToBeChecked (sfp->data.value.ptrvalue, p))
2105   {
2106     ValNodeAddPointer (&(p->list), OBJ_SEQFEAT, sfp);
2107   }
2108 }
2109 
2110 
2111 static void AddSpecificHostBioSourceDescToList (SeqDescrPtr sdp, Pointer userdata)
2112 {
2113   SpecHostGatherPtr p;
2114 
2115   if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return;
2116 
2117   p = (SpecHostGatherPtr) userdata;
2118   if (HasSpecificHostToBeChecked (sdp->data.ptrvalue, p))
2119   {
2120     ValNodeAddPointer (&(p->list), OBJ_SEQDESC, sdp);
2121   }
2122 }
2123 
2124 
2125 static ValNodePtr GetSpecificHostBioSourceList (SeqEntryPtr sep, Boolean caps, Boolean paren)
2126 {
2127   SpecHostGatherData   d;
2128   
2129   d.caps = caps;
2130   d.paren = paren;
2131   d.list = NULL;
2132   VisitFeaturesInSep (sep, &d, AddSpecificHostBioSourceFeatToList);
2133   VisitDescriptorsInSep (sep, &d, AddSpecificHostBioSourceDescToList);
2134   return d.list;
2135 }
2136 
2137 
2138 static void 
2139 FormatSpecificHostRequests 
2140 (ValNodePtr spec_host_list,
2141  ValNodePtr PNTR request_list,
2142  ValNodePtr PNTR req_host_list,
2143  Boolean caps,
2144  Boolean paren)
2145 {
2146   ValNodePtr vnp;
2147   CharPtr    orig, cp, str, cp2 = NULL;
2148   
2149   /* now format requests for unique specific_host values */
2150   for (vnp = spec_host_list; vnp != NULL; vnp = vnp->next)
2151   {
2152     orig = (CharPtr) vnp->data.ptrvalue;
2153     /* if we have a value in parentheses, submit it separately */
2154     cp = StringChr (orig, '(');
2155     if (cp != NULL)
2156     {
2157       cp2 = StringChr (cp, ')');
2158     }
2159     if (cp != NULL && cp2 != NULL 
2160         && ((cp > orig && orig[StringLen (orig) - 1] == ')') /* ends with paren */
2161             || (cp == orig))) /* starts with paren */
2162     {
2163       if (cp > orig && orig[StringLen (orig) - 1] == ')')
2164       {
2165         str = StringSave (orig);
2166         /* remove trailing parenthesis */
2167         str [StringLen(str) - 1] = 0;
2168 
2169         cp = str + (cp - orig);
2170 
2171         /* remove opening parenthesis */
2172         *cp = 0;
2173         cp++;
2174       }
2175       else
2176       {
2177         str = StringSave (orig);
2178         /* remove leading parenthesis */
2179         str[0] = ' ';
2180         cp = str + (cp2 - orig);
2181         /* remove trailing parenthesis */
2182         *cp = 0; 
2183         cp++;
2184       }
2185       TrimSpacesAroundString (cp);
2186       TrimSpacesAroundString (str);
2187       if (paren && (!caps || isupper (*cp))) {
2188         AddRequestOrgForString (cp, orig, request_list, req_host_list);
2189       }
2190       if (!caps || isupper (*str)) {
2191         AddRequestOrgForString (str, orig, request_list, req_host_list);
2192       }
2193     }
2194     else
2195     {
2196       if (!caps || isupper (*orig)) {
2197         AddRequestOrgForString (orig, orig, request_list, req_host_list);
2198       }
2199     }
2200   }
2201 }
2202 
2203 
2204 typedef struct replacementpair {
2205   CharPtr find;
2206   CharPtr repl;
2207 } ReplacementPairData, PNTR ReplacementPairPtr;
2208 
2209 static ReplacementPairPtr ReplacementPairNew (CharPtr find, CharPtr repl)
2210 {
2211   ReplacementPairPtr r;
2212 
2213   r = (ReplacementPairPtr) MemNew (sizeof (ReplacementPairData));
2214   r->find = StringSave (find);
2215   r->repl = StringSave (repl);
2216   return r;
2217 }
2218 
2219 static ReplacementPairPtr ReplacementPairFree (ReplacementPairPtr r)
2220 {
2221   if (r != NULL) {
2222     r->find = MemFree (r->find);
2223     r->repl = MemFree (r->repl);
2224     r = MemFree (r);
2225   }
2226   return r;
2227 }
2228 
2229 static ValNodePtr ReplacementPairListFree (ValNodePtr list)
2230 {
2231   ValNodePtr list_next;
2232 
2233   while (list != NULL) {
2234     list_next = list->next;
2235     list->next = NULL;
2236     list->data.ptrvalue = ReplacementPairFree (list->data.ptrvalue);
2237     list = ValNodeFree (list);
2238     list = list_next;
2239   }
2240   return list;
2241 }
2242 
2243 
2244 static SpecificHostFixPtr 
2245 SpecificHostFixNew 
2246 (ValNodePtr feat_or_desc,
2247  CharPtr    bad_host,
2248  CharPtr    old_taxname,
2249  CharPtr    new_taxname,
2250  Uint1      fix_type)
2251 {
2252   SpecificHostFixPtr s;
2253 
2254   s = (SpecificHostFixPtr) MemNew (sizeof (SpecificHostFixData));
2255   if (feat_or_desc != NULL) 
2256   {
2257     s->feat_or_desc = ValNodeNew(NULL);
2258     s->feat_or_desc->choice = feat_or_desc->choice;
2259     s->feat_or_desc->data.ptrvalue = feat_or_desc->data.ptrvalue;
2260   }
2261   s->bad_specific_host = StringSave (bad_host);
2262   s->old_taxname = StringSave (old_taxname);
2263   s->new_taxname = StringSave (new_taxname);
2264   s->fix_type = fix_type;
2265   return s;
2266 }
2267 
2268 
2269 static SpecificHostFixPtr SpecificHostFixFree (SpecificHostFixPtr s)
2270 {
2271   if (s != NULL)
2272   {
2273     s->feat_or_desc = ValNodeFree (s->feat_or_desc);
2274     s->bad_specific_host = MemFree (s->bad_specific_host);
2275     s->old_taxname = MemFree (s->old_taxname);
2276     s->new_taxname = MemFree (s->new_taxname);
2277     s = MemFree (s);
2278   }
2279   return s;
2280 }
2281 
2282 
2283 extern ValNodePtr SpecificHostFixListFree (ValNodePtr vnp)
2284 {
2285   ValNodePtr vnp_next;
2286 
2287   while (vnp != NULL)
2288   {
2289     vnp_next = vnp->next;
2290     vnp->next = NULL;
2291     vnp->data.ptrvalue = SpecificHostFixFree (vnp->data.ptrvalue);
2292     vnp = ValNodeFree (vnp);
2293     vnp = vnp_next;
2294   }
2295   return vnp;
2296 }
2297 
2298 
2299 static ValNodePtr GetFixesForOneSpecificHostValue (SpecificHostCheckPtr p)
2300 {
2301   CharPtr      prev_success = NULL, new_val, prev_fail = NULL;
2302   Boolean      fix_needed = FALSE;
2303   ValNodePtr   suggested_fixes = NULL;
2304   OrgRefPtr    request_org, response_org;
2305   ValNodePtr   biop_vnp, response_vnp, request_vnp, vnp;
2306   SpecificHostFixPtr s;
2307   ValNodePtr         fix_list = NULL;
2308   ReplacementPairPtr r;
2309   Uint1              fix_type;
2310   Boolean            add_nontrunc_fix;
2311   Boolean            ambiguous = FALSE;
2312 
2313   if (p == NULL) return NULL;
2314 
2315   request_vnp = p->request_list;
2316   response_vnp = p->response_list;
2317   
2318   while (request_vnp != NULL && response_vnp != NULL)
2319   {
2320     request_org = (OrgRefPtr) request_vnp->data.ptrvalue;
2321     response_org = (OrgRefPtr) response_vnp->data.ptrvalue;
2322     if (prev_success != NULL 
2323         && StringNCmp (request_org->taxname, prev_success, StringLen (request_org->taxname)) == 0) {
2324       /* we don't need to check this one */
2325     } else if (response_org == NULL) {
2326       fix_needed = TRUE;
2327       if (response_vnp->choice == eReturnedOrgFlag_ambiguous) {
2328         ambiguous = TRUE;
2329       }
2330       if (prev_fail == NULL) {
2331         prev_fail = request_org->taxname;
2332       } else if (StringNCmp (prev_fail, request_org->taxname, StringLen (request_org->taxname)) != 0) {
2333         if (response_vnp->choice == eReturnedOrgFlag_ambiguous) {
2334           ValNodeAddPointer (&suggested_fixes, eSpecificHostFix_ambiguous, ReplacementPairNew (request_org->taxname, NULL));
2335         } else {
2336           ValNodeAddPointer (&suggested_fixes, eSpecificHostFix_unrecognized, ReplacementPairNew (request_org->taxname, NULL));
2337         }
2338         prev_fail = request_org->taxname;      
2339       }
2340     } else {
2341       prev_success = request_org->taxname;
2342       add_nontrunc_fix = FALSE;
2343       if (response_vnp->choice == eReturnedOrgFlag_misspelled) {
2344         fix_needed = TRUE;
2345         fix_type = eSpecificHostFix_spelling;
2346         new_val = response_org->taxname;
2347         add_nontrunc_fix = TRUE;
2348       } else {
2349         new_val = FindMatchInOrgRef (request_org->taxname, response_org);
2350         if (new_val == NULL) {
2351           fix_needed = TRUE;
2352           fix_type = eSpecificHostFix_replacement;
2353           new_val = response_org->taxname;
2354           add_nontrunc_fix = TRUE;
2355         } else if (StringCmp (new_val, request_org->taxname) != 0) {
2356           fix_needed = TRUE;
2357           fix_type = eSpecificHostFix_capitalization;
2358           add_nontrunc_fix = TRUE;
2359         }
2360       }
2361 
2362       /* add fix to truncate and correct spelling and capitalization first */
2363       /* this way the truncation won't fail when it looks for the old version that's already been corrected */
2364       if (prev_fail != NULL) {
2365         if (StringNCmp (prev_fail, request_org->taxname, StringLen (request_org->taxname)) == 0) {
2366           if (new_val != NULL) {
2367             ValNodeAddPointer (&suggested_fixes, eSpecificHostFix_truncation, ReplacementPairNew (prev_fail, new_val));            
2368             fix_needed = TRUE;
2369           }
2370         } else {
2371           ValNodeAddPointer (&suggested_fixes, eSpecificHostFix_unrecognized, ReplacementPairNew (prev_fail, NULL));
2372         }
2373       }
2374       /* add fix for just spelling and capitalization after */
2375       if (add_nontrunc_fix) {
2376         ValNodeAddPointer (&suggested_fixes, fix_type, ReplacementPairNew (request_org->taxname, new_val));
2377       }
2378 
2379       prev_fail = NULL;
2380     }
2381     request_vnp = request_vnp->next;
2382     response_vnp = response_vnp->next;
2383   }
2384 
2385   if (fix_needed) {
2386     for (biop_vnp = p->biop_list; biop_vnp != NULL; biop_vnp = biop_vnp->next) {
2387       if (suggested_fixes == NULL) {
2388         s = SpecificHostFixNew (biop_vnp, p->spec_host, p->spec_host, NULL, ambiguous ? eSpecificHostFix_ambiguous : eSpecificHostFix_unrecognized);
2389         ValNodeAddPointer (&fix_list, 0, s);
2390       } else {
2391         for (vnp = suggested_fixes; vnp != NULL; vnp = vnp->next) {
2392           r = (ReplacementPairPtr) vnp->data.ptrvalue;
2393           s = SpecificHostFixNew (biop_vnp, p->spec_host, r->find, r->repl, vnp->choice);
2394           ValNodeAddPointer (&fix_list, 0, s);
2395         }
2396       }
2397     }
2398   }
2399   suggested_fixes = ReplacementPairListFree (suggested_fixes);
2400   return fix_list;
2401 }
2402 
2403 
2404 NLM_EXTERN ValNodePtr Taxon3GetSpecificHostFixesInSeqEntry (SeqEntryPtr sep, Boolean caps, Boolean paren)
2405 {
2406   ValNodePtr   biop_list = NULL;
2407   ValNodePtr   req_host_list = NULL, spec_host_list = NULL;
2408   ValNodePtr   request_list = NULL;
2409   ValNodePtr   response_list = NULL;
2410   ValNodePtr   check_list, check_vnp;
2411   SpecificHostCheckPtr p;
2412   ErrSev               level;
2413   ValNodePtr           fix_list = NULL;
2414   
2415   biop_list = GetSpecificHostBioSourceList (sep, caps, paren);
2416 
2417   /* get a list of unique specific_host values */
2418   spec_host_list = GetListOfUniqueSpecificHostValues (biop_list);
2419 
2420   /* now format requests for unique specific_host values */
2421   FormatSpecificHostRequests (spec_host_list, &request_list, &req_host_list, caps, paren);
2422 
2423   spec_host_list = ValNodeFreeData (spec_host_list);
2424 
2425   level = ErrSetMessageLevel (SEV_MAX);
2426   response_list = Taxon3GetOrgRefList (request_list);
2427   ErrSetMessageLevel (level);
2428  
2429   if (ValNodeLen (response_list) != ValNodeLen (request_list))
2430   {
2431     Message (MSG_POST, "Unable to retrieve information from tax server");
2432   }
2433   else
2434   {
2435     /* resort requests so that we can check all responses for the same BioSource together */
2436     check_list = SortSpecificHostOrgs (req_host_list, request_list, response_list);
2437     AddBioSourcesToSpecificHostChecklist (biop_list, check_list);  
2438 
2439     /* now look at responses */
2440     check_vnp = check_list;
2441     while (check_vnp != NULL)
2442     {
2443       p = (SpecificHostCheckPtr) check_vnp->data.ptrvalue;
2444       ValNodeLink (&fix_list, GetFixesForOneSpecificHostValue (p));
2445       check_vnp = check_vnp->next;
2446     }
2447     check_list = SpecificHostCheckListFree (check_list);
2448   }
2449 
2450   biop_list = ValNodeFree (biop_list);
2451   request_list = FreeOrgRefValNodeList (request_list);
2452   response_list = FreeOrgRefValNodeList (response_list);
2453   req_host_list = ValNodeFreeData (req_host_list);
2454 
2455   return fix_list;
2456 }
2457 
2458 
2459 extern Boolean ApplyOneSpecificHostFix (SpecificHostFixPtr s)
2460 {
2461   BioSourcePtr biop = NULL;
2462   Boolean      rval = FALSE;
2463   CharPtr      new_spec_host;
2464   ValNode      vn;
2465 
2466   if (s == NULL || s->feat_or_desc == NULL 
2467       || StringHasNoText (s->bad_specific_host)
2468       || StringHasNoText (s->old_taxname)
2469       || StringHasNoText (s->new_taxname)) {
2470     return rval;
2471   }
2472   biop = GetBioSourceFromValNode (s->feat_or_desc);
2473   if (biop == NULL) return rval;
2474 
2475   vn.choice = SourceQualChoice_textqual;
2476   vn.data.intvalue = Source_qual_nat_host;
2477   vn.next = NULL;
2478 
2479   new_spec_host = GetSourceQualFromBioSource (biop, &vn, NULL);  
2480   FindReplaceString (&new_spec_host, s->old_taxname, s->new_taxname, TRUE, TRUE);
2481   if (StringCmp (new_spec_host, s->bad_specific_host) != 0)
2482   {
2483     rval = SetSourceQualInBioSource (biop, &vn, NULL, new_spec_host, ExistingTextOption_replace_old);
2484   }
2485   new_spec_host = MemFree (new_spec_host);
2486   return rval;
2487 }
2488 
2489 static void AddBioSourceFeatToList (SeqFeatPtr sfp, Pointer userdata)
2490 {
2491   if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) return;
2492 
2493   ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_SEQFEAT, sfp);
2494 }
2495 
2496 
2497 static void AddBioSourceDescToList (SeqDescrPtr sdp, Pointer userdata)
2498 {
2499 
2500   if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return;
2501 
2502   ValNodeAddPointer ((ValNodePtr PNTR) userdata, OBJ_SEQDESC, sdp);
2503 }
2504 
2505 
2506 static ValNodePtr GetBioSourceList (SeqEntryPtr sep)
2507 {
2508   ValNodePtr list = NULL;
2509   
2510   VisitFeaturesInSep (sep, &list, AddBioSourceFeatToList);
2511   VisitDescriptorsInSep (sep, &list, AddBioSourceDescToList);
2512   return list;
2513 }
2514 
2515 
2516 static ValNodePtr GetListOfOrganismNames (ValNodePtr biop_list)
2517 {
2518   ValNodePtr   biop_vnp;
2519   BioSourcePtr biop;
2520   ValNodePtr   list = NULL;
2521   
2522   /* get a list of unique specific_host values */
2523   for (biop_vnp = biop_list; biop_vnp != NULL; biop_vnp = biop_vnp->next)
2524   {
2525     if (biop_vnp->data.ptrvalue == NULL) continue;
2526     biop = GetBioSourceFromValNode (biop_vnp);
2527     if (biop == NULL || biop->org == NULL || StringHasNoText (biop->org->taxname)) continue;
2528     if (!StringAlreadyInValNodeList (biop->org->taxname, list))
2529     {
2530       ValNodeAddPointer (&list, 0, biop->org->taxname);
2531     }
2532   }
2533   return list;
2534 }
2535 
2536 
2537 static void AddBioSourcesToChecklist (ValNodePtr biop_list, ValNodePtr check_list)
2538 {
2539   ValNodePtr biop_vnp, last_vnp = NULL, stop_search;
2540   BioSourcePtr biop;
2541   SpecificHostCheckPtr p;
2542 
2543   if (biop_list == NULL || check_list == NULL) return;
2544 
2545   for (biop_vnp = biop_list; biop_vnp != NULL; biop_vnp = biop_vnp->next)
2546   {
2547 
2548     biop = GetBioSourceFromValNode (biop_vnp);
2549     if (biop == NULL) continue;
2550 
2551     if (biop == NULL || biop->org == NULL || biop->org->orgname == NULL) continue;
2552     if (last_vnp == NULL)
2553     {
2554       last_vnp = check_list;
2555       stop_search = NULL;
2556     }
2557     else
2558     {
2559       stop_search = last_vnp;
2560     }
2561     p = NULL;
2562     while (last_vnp != NULL 
2563            && (p = (SpecificHostCheckPtr) last_vnp->data.ptrvalue) != NULL
2564            && StringCmp (p->spec_host, biop->org->taxname) != 0)
2565     {
2566       p = NULL;
2567       last_vnp = last_vnp->next;
2568     }
2569     if (p == NULL && stop_search != NULL)
2570     {
2571       last_vnp = check_list;
2572       while (last_vnp != stop_search 
2573               && (p = (SpecificHostCheckPtr) last_vnp->data.ptrvalue) != NULL
2574               && StringCmp (p->spec_host, biop->org->taxname) != 0)
2575       {
2576         p = NULL;
2577         last_vnp = last_vnp->next;
2578       }
2579     }
2580 
2581     if (p != NULL)
2582     {
2583       ValNodeAddPointer (&(p->biop_list), biop_vnp->choice, biop_vnp->data.ptrvalue);
2584     }
2585   }
2586 }
2587 
2588 
2589 static ValNodePtr GetBioSourcesWithTaxName (CharPtr taxname, ValNodePtr biop_list)
2590 {
2591   SeqFeatPtr sfp;
2592   SeqDescrPtr sdp;
2593   BioSourcePtr biop;
2594   ValNodePtr match_list = NULL, vnp;
2595 
2596   if (StringHasNoText (taxname) || biop_list == NULL) return NULL;
2597 
2598   for (vnp = biop_list; vnp != NULL; vnp = vnp->next) {
2599     biop = NULL;
2600     if (vnp->choice == OBJ_SEQFEAT) {
2601       sfp = (SeqFeatPtr) vnp->data.ptrvalue;
2602       if (sfp != NULL && sfp->data.choice == SEQFEAT_BIOSRC) {
2603         biop = (BioSourcePtr) sfp->data.value.ptrvalue;
2604       }
2605     } else if (vnp->choice == OBJ_SEQDESC) {
2606       sdp = (SeqDescrPtr) vnp->data.ptrvalue;
2607       if (sdp != NULL && sdp->choice == Seq_descr_source) {
2608         biop = (BioSourcePtr) sdp->data.ptrvalue;
2609       }
2610     }
2611     if (biop != NULL && biop->org != NULL && StringCmp (taxname, biop->org->taxname) == 0) {
2612       ValNodeAddPointer (&match_list, vnp->choice, vnp->data.ptrvalue);
2613     }
2614   }
2615   return match_list;
2616 }
2617 
2618 
2619 NLM_EXTERN ValNodePtr GetOrganismTaxLookupFailuresInSeqEntry (SeqEntryPtr sep)
2620 {
2621   ValNodePtr   biop_list = NULL;
2622   ValNodePtr   unique_list = NULL;
2623   ValNodePtr   request_list = NULL;
2624   ValNodePtr   response_list = NULL;
2625   ValNodePtr   req_vnp, resp_vnp;
2626   ErrSev               level;
2627   ValNodePtr           failed_list = NULL, vnp;
2628   OrgRefPtr            request_org;
2629   
2630   biop_list = GetBioSourceList (sep);
2631 
2632   /* get a list of unique specific_host values */
2633   unique_list = GetListOfOrganismNames (biop_list);
2634 
2635   /* now format requests for unique taxname values */
2636   for (vnp = unique_list; vnp != NULL; vnp = vnp->next) 
2637   {
2638     request_org = OrgRefNew();
2639     request_org->taxname = StringSave (vnp->data.ptrvalue);
2640     ValNodeAddPointer (&request_list, 3, request_org);
2641   }
2642 
2643   unique_list = ValNodeFree (unique_list);
2644 
2645   level = ErrSetMessageLevel (SEV_MAX);
2646   response_list = Taxon3GetOrgRefList (request_list);
2647   ErrSetMessageLevel (level);
2648  
2649   if (ValNodeLen (response_list) != ValNodeLen (request_list))
2650   {
2651     Message (MSG_POST, "Unable to retrieve information from tax server");
2652   }
2653   else
2654   {
2655     for (req_vnp = request_list, resp_vnp = response_list;
2656          req_vnp != NULL && resp_vnp != NULL;
2657          req_vnp = req_vnp->next, resp_vnp = resp_vnp->next)
2658     {
2659       if (resp_vnp->data.ptrvalue == NULL)
2660       {        
2661         request_org = (OrgRefPtr) req_vnp->data.ptrvalue;
2662         vnp = GetBioSourcesWithTaxName (request_org->taxname, biop_list);
2663         if (vnp != NULL) {
2664           ValNodeAddPointer (&failed_list, 0, StringSave (request_org->taxname));
2665           ValNodeLink (&failed_list, vnp);
2666         }
2667       }
2668     }
2669   }
2670 
2671   biop_list = ValNodeFree (biop_list);
2672   request_list = FreeOrgRefValNodeList (request_list);
2673   response_list = FreeOrgRefValNodeList (response_list);
2674 
2675   return failed_list;  
2676 }
2677 
2678 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.