|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/api/edutil.c |
source navigation diff markup identifier search freetext search file search |
1 /* edutil.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name: edutil.c
27 *
28 * Author: James Ostell
29 *
30 * Version Creation Date: 2/4/94
31 *
32 * $Revision: 6.66 $
33 *
34 * File Description: Sequence editing utilities
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date Name Description of modification
39 * ------- ---------- -----------------------------------------------------
40 *
41 * $Log: edutil.c,v $
42 * Revision 6.66 2009/10/02 19:46:00 kans
43 * address clang static analyzer warnings
44 *
45 * Revision 6.65 2009/03/04 16:34:15 bollin
46 * Added function for removing contigs from scaffolds.
47 *
48 * Revision 6.64 2008/08/26 20:21:01 bollin
49 * Fixed bug in function for converting raw sequences to delta, where the
50 * gap is specified by location and is a replacement rather than an insertion.
51 *
52 * Revision 6.63 2007/07/02 19:17:26 bollin
53 * Corrected functions for inserting and deleting from locations to handle
54 * locations on segmented sets, corrected functions for inserting and deleting
55 * from sequences to adjust the length of the master sequence when adjusting the
56 * length of a segment.
57 *
58 * Revision 6.62 2007/05/08 17:18:32 bollin
59 * Added functions for identifying AGP gap DeltaSeqs
60 *
61 * Revision 6.61 2007/05/07 17:43:03 bollin
62 * Made functions IsDeltaSeqGap and IsDeltaSeqUnknownGap extern.
63 *
64 * Revision 6.60 2007/05/07 17:35:02 kans
65 * can handle Seq-lit.seq-data.gap
66 *
67 * Revision 6.59 2007/05/07 13:28:35 kans
68 * added casts for Seq-data.gap (SeqDataPtr, SeqGapPtr, ByteStorePtr)
69 *
70 * Revision 6.58 2007/01/19 14:55:07 bollin
71 * Do not set partial when deleting location from feature.
72 *
73 * Revision 6.57 2006/07/13 17:06:38 bollin
74 * use Uint4 instead of Uint2 for itemID values
75 * removed unused variables
76 * resolved compiler warnings
77 *
78 * Revision 6.56 2006/04/04 18:00:47 kans
79 * SeqLocAddEx properly returns value to &last argument, makes SeqLocMix from DeltaSeqsToSeqLocs
80 *
81 * Revision 6.55 2006/03/30 19:50:15 kans
82 * DeltaSeqsToSeqLocs calls SeqLocAddEx for efficient list usage
83 *
84 * Revision 6.54 2006/02/07 13:41:29 bollin
85 * added function AdjustFeatureForGapChange, which changes a feature to accommodate
86 * a change in the length of a gap
87 *
88 * Revision 6.53 2005/12/12 14:12:54 bollin
89 * BioseqCopyEx was not correctly handling copying the data contents of a
90 * delta sequence
91 *
92 * Revision 6.52 2005/09/22 19:21:34 bollin
93 * In the sequence editor, if the user inserts Ns into a gap of known length,
94 * the gap length will be increased instead of creating two gaps on either side
95 * with N sequence characters in the middle.
96 *
97 * Revision 6.51 2005/09/13 15:21:57 bollin
98 * fixed bug when inserting characters inside a gap that was incorrectly setting
99 * the lengths of the split gap
100 *
101 * Revision 6.50 2005/09/13 14:14:31 bollin
102 * fixed bug that was preventing the removal of gaps of length 1
103 *
104 * Revision 6.49 2005/07/15 19:01:37 kans
105 * minor fixes for Xcode warnings
106 *
107 * Revision 6.48 2005/05/02 14:20:02 bollin
108 * when inserting gaps, adjust coding region locations to not include gaps.
109 * when removing gaps, if a feature location has intervals that stop and start
110 * again at the point where the gap was removed, connect the intervals.
111 *
112 * Revision 6.47 2005/04/28 20:10:31 bollin
113 * added new function AdjustFeaturesForInsertion which is called by BioseqInsert
114 * and also by a new function in sequin3.c for converting a raw bioseq to a delta
115 * and inserting gaps
116 *
117 * Revision 6.46 2005/04/06 19:33:15 bollin
118 * made it possible to insert and remove gaps from delta sequences
119 *
120 * Revision 6.45 2005/03/18 20:51:10 bollin
121 * only change frame when CDS location has been changed, change anticodon locations
122 * and code breaks when locations have just been shifted
123 *
124 * Revision 6.44 2005/03/08 21:14:44 bollin
125 * strand argument in SeqLocCopyRegion is Seq_strand_minus when features
126 * should be reverse-complemented, does not actually indicate the strand to
127 * which a feature should be copied
128 *
129 * Revision 6.43 2005/02/28 16:53:40 bollin
130 * corrected Unix compiler warnings
131 *
132 * Revision 6.42 2005/02/28 16:08:35 bollin
133 * added utilities for editing delta sequences
134 *
135 * Revision 6.41 2005/01/24 17:00:58 bollin
136 * only change frames, fix code break locations, and fix anticodon locations
137 * when feature location is changed in SeqFeatDelete
138 *
139 * Revision 6.40 2004/11/17 21:19:18 lavr
140 * AffectedFeatFree() to return NULL on afp == NULL
141 *
142 * Revision 6.39 2004/10/08 16:04:16 bollin
143 * added ability to check when an action will remove a feature
144 *
145 * Revision 6.38 2004/10/08 15:19:07 bollin
146 * do not set partial flag when deleting from bioseq location in feature
147 *
148 * Revision 6.37 2004/09/29 18:49:57 bollin
149 * fixed bugs in sequence editing, can now undo a nucleotide deletion that
150 * removes an entire feature location (feature will be restored)
151 *
152 * Revision 6.36 2004/09/23 14:59:51 bollin
153 * moved functions that depend on functions that depend on BLAST functions
154 * into seqpanel.c, made function scalled by those functions extern
155 *
156 * Revision 6.35 2004/09/22 20:12:27 bollin
157 * fixed error in deleting sequence location for point features
158 *
159 * Revision 6.34 2004/09/22 18:20:32 bollin
160 * added functions for playing and unplaying a sequence editor action to translate
161 * a CDS
162 *
163 * Revision 6.33 2004/09/07 14:52:29 bollin
164 * when deleting location from a feature, adjust frame if deleting from 5' end of 5' partial feature.
165 *
166 * Revision 6.32 2004/08/24 13:16:57 bollin
167 * do not free list of product features taken from ObjectMgrDataPtr
168 *
169 * Revision 6.31 2004/08/06 19:56:20 bollin
170 * allow deletion from the end of a sequence
171 *
172 * Revision 6.30 2004/08/05 18:15:02 bollin
173 * when maintaining partials during feature drag, use partial in orig_loc
174 * instead of current feature location
175 *
176 * Revision 6.29 2004/08/05 18:07:03 bollin
177 * maintain partials for features when dragging or sliding intervals
178 *
179 * Revision 6.28 2004/07/30 18:46:55 bollin
180 * added function for reordering intervals after they have been dragged by
181 * the sequence editor
182 *
183 * Revision 6.27 2004/07/30 13:34:50 bollin
184 * in SeqLocCopyRegion, when copying from the minus strand to a non-minus-strand,
185 * be sure to set the strand.
186 *
187 * Revision 6.26 2004/07/28 20:06:19 bollin
188 * added journaling for undo/redo of dragged sequence location changes
189 *
190 * Revision 6.25 2004/07/28 15:22:15 bollin
191 * moved functions for moving feature locations around to edutil.c from
192 * seqpanel.c
193 *
194 * Revision 6.24 2004/07/27 19:46:42 bollin
195 * fixed errors in feature location adjustment when deleting nucleotides
196 * with new sequence editor
197 *
198 * Revision 6.23 2004/07/22 16:08:20 bazhin
199 * Changes to parse gaps of unknown lengths (like "gap(unk100)")
200 * within location strings.
201 *
202 * Revision 6.22 2004/07/12 12:29:45 bollin
203 * moved new sequence editor editing functions here
204 *
205 * Revision 6.21 2003/11/03 19:37:42 bollin
206 * SegLocToPartsEx now handles SEQLOC_PNT as well as SEQLOC_INT
207 *
208 * Revision 6.20 2003/06/03 20:25:34 kans
209 * SeqLocReplaceID works on bonds if both ends bonded to the same Seq-id
210 *
211 * Revision 6.19 2003/02/10 22:57:45 kans
212 * added BioseqCopyEx, which takes a BioseqPtr instead of a SeqIdPtr for the source
213 *
214 * Revision 6.18 2002/07/26 20:15:55 kans
215 * BioseqInsert can do feature indexed collection of features to adjust
216 *
217 * Revision 6.17 2002/07/17 15:39:40 kans
218 * BioseqInsert calls Nlm_BSAdd, need to figure out when not to call
219 *
220 * Revision 6.16 2002/07/11 17:45:53 kans
221 * BioseqInsert does not call Nlm_BSAdd due to a bug in that code
222 *
223 * Revision 6.15 2002/07/02 13:23:42 kans
224 * added SeqLocDeleteEx
225 *
226 * Revision 6.14 2001/06/01 18:07:20 kans
227 * changes to SeqLocAdd to allow one plus and one unknown strand to be accepted
228 *
229 * Revision 6.13 2001/02/23 21:30:09 shkeda
230 * Fixed SeqLocAdd: Int-fuzz pointers should be set to NULL after IntFuzzFree
231 *
232 * Revision 6.12 2001/02/23 01:26:07 ostell
233 * Added support to BioseqDelete() for delta seqs
234 *
235 * Revision 6.11 2000/10/31 17:11:06 kans
236 * SeqLocReplaceID was handling SEQLOC_PACKED_PNT incorrectly
237 *
238 * Revision 6.10 1999/12/20 20:47:12 kans
239 * oldscope test was wrong everywhere
240 *
241 * Revision 6.9 1999/12/15 20:52:16 kans
242 * added IndexedSeqFeatsCopy if SeqMgrFeaturesAreIndexed
243 *
244 * Revision 6.8 1999/12/07 20:32:13 kans
245 * for most editing functions, if BioseqFind failed, temporarily clear scope/try again/reset scope
246 *
247 * Revision 6.7 1999/11/19 19:54:19 kans
248 * SeqLocAdd checks for NULL slp before dereferencing
249 *
250 * Revision 6.6 1998/09/03 20:43:52 kans
251 * added delta bioseq support to BioseqCopy
252 *
253 * Revision 6.5 1998/06/22 20:00:46 kans
254 * DelFeat was a bit too agressive when there were multiple feature tables
255 *
256 * Revision 6.4 1998/06/17 21:50:11 kans
257 * fixed unix compiler warnings, including 64-bit SGI
258 *
259 * Revision 6.3 1997/11/10 19:40:48 bazhin
260 * Fixed incorrect comment for ISAGappedSeqLoc() function.
261 *
262 * Revision 6.2 1997/10/24 19:16:17 bazhin
263 * Added three easy functions GapToSeqLoc(...), ISAGappedSeqLoc(...)
264 * and GappedSeqLocsToDeltaSeqs(...) for processing "gap(...)" tokens
265 * in CONTIG line.
266 *
267 * Revision 6.1 1997/10/10 20:18:02 ostell
268 * removed tab character from SeqLitTag for DeltaSeqsToSeqLoc
269 *
270 * Revision 6.0 1997/08/25 18:05:24 madden
271 * Revision changed to 6.0
272 *
273 * Revision 5.10 1997/07/25 20:34:51 kans
274 * added SegLocToPartsEx
275 *
276 * Revision 5.9 1997/06/19 18:37:30 vakatov
277 * [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization
278 *
279 * Revision 5.8 1996/12/20 17:59:34 kans
280 * SeqLocCopyRegion already reversed order for Seq_strand_minus, so no need
281 * to reverse it again (JO + JK)
282 *
283 * Revision 5.7 1996/10/21 18:56:19 ostell
284 * made SegLocToParts accept a complicated Seq-loc argument
285 *
286 * Revision 5.6 1996/10/09 17:27:34 chappey
287 * *** empty log message ***
288 *
289 * Revision 5.5 1996/10/09 16:34:59 chappey
290 * added SeqLocReplaceID() that replaces the Seq-Id of a Seq-Loc
291 *
292 * Revision 5.4 1996/07/15 14:43:51 epstein
293 * change SeqLocAdd() so that it merges identical SEQLOC_PNTs
294 *
295 * Revision 5.3 1996/06/12 18:29:41 epstein
296 * move SeqLocIntNew() and SeqLocPntNew() from edutil to sequtil
297 *
298 * Revision 5.1 1996/06/10 15:07:17 epstein
299 * replace make_seq_loc() with SeqLocIntNew() and make_pnt_loc with SeqLocPntNew()
300 *
301 * Revision 5.0 1996/05/28 13:23:23 ostell
302 * Set to revision 5.0
303 *
304 * Revision 4.10 1996/03/19 19:45:24 kans
305 * fix of SegLocToParts (JO)
306 *
307 * Revision 4.9 1996/03/12 22:14:22 ostell
308 * added SeqLocToParts()
309 *
310 * Revision 4.7 1996/02/19 19:58:05 ostell
311 * added support for Code-break and tRNA.anticodon
312 *
313 * Revision 4.6 1996/01/30 16:24:04 ostell
314 * changed name of SeqLocPack() to SeqLocPackage()
315 *
316 * Revision 4.5 1996/01/29 22:03:52 ostell
317 * revised SeqLocAdd
318 * added SeqLocPack
319 *
320 * Revision 4.4 1996/01/10 22:25:25 ostell
321 * added SeqLocIntNew()
322 *
323 * Revision 4.3 1995/12/29 21:31:44 ostell
324 * added mapping functions between delta seq and seq loc, for editing utilities
325 *
326 * Revision 4.2 1995/12/21 02:35:50 ostell
327 * changed call for BSAdd
328 *
329 * Revision 4.1 1995/11/15 20:40:20 ostell
330 * fixed SeqLocCopyPart so it correctly handles SEQLOC_NULL in segmented
331 * records
332 *
333 * Revision 4.0 1995/07/26 13:49:01 ostell
334 * force revision to 4.0
335 *
336 * Revision 1.22 1995/05/15 21:46:05 ostell
337 * added Log line
338 *
339 *
340 *
341 * ==========================================================================
342 */
343
344 #include <edutil.h>
345 #include <explore.h>
346 #include <sqnutils.h>
347 #include <objfdef.h>
348 #include <gather.h>
349
350 /*****************************************************************************
351 *
352 * SeqLocPackage(head)
353 * head is a chain of 1 or more SeqLocs connected by slp->next
354 * Assumes was built by SeqLocAdd to remove redundancy
355 * Frees the last element if it is a NULL.
356 * If more than one element left, then packages the chain into a SEQLOC_MIX,
357 * or SEQLOC_PACKED_INT as appropriate
358 * returns pointer to the head of the resulting single SeqLoc
359 *
360 *****************************************************************************/
361 NLM_EXTERN SeqLocPtr LIBCALL SeqLocPackage (SeqLocPtr head)
362 {
363 SeqLocPtr newhead = NULL, tmp, prev;
364 Boolean packed_int = TRUE;
365 Int4 ctr = 0;
366
367 if (head == NULL) return head;
368
369 prev = NULL; /* remove trailing NULL */
370 for (tmp = head; tmp->next != NULL; tmp = tmp->next)
371 prev = tmp;
372
373 if (tmp->choice == SEQLOC_NULL)
374 {
375 SeqLocFree(tmp);
376 if (prev != NULL)
377 prev->next = NULL;
378 else
379 return NULL; /* nothing left */
380 }
381
382 for (tmp = head; tmp != NULL; tmp = tmp->next)
383 {
384 ctr++;
385 if (tmp->choice != SEQLOC_INT)
386 packed_int = FALSE;
387 }
388
389 if (ctr == 1)
390 return head;
391
392 newhead = ValNodeNew(NULL);
393 if (packed_int)
394 newhead->choice = SEQLOC_PACKED_INT;
395 else
396 newhead->choice = SEQLOC_MIX;
397 newhead->data.ptrvalue = head;
398
399 return newhead;
400 }
401
402 /*****************************************************************************
403 *
404 * SeqLocAdd(headptr, slp, merge, do_copy)
405 * creates a linked list of SeqLocs.
406 * returns a pointer to the last SeqLoc in the chain
407 * if (merge)
408 * deletes double NULLs or Nulls at start (application must delete at stop)
409 * merges adjacent intervals on the same strand
410 * if (do_copy)
411 * Makes copies of incoming SeqLocs
412 * if incoming is merged, deletes the incoming SeqLoc
413 *
414 *****************************************************************************/
415 static SeqLocPtr LIBCALL SeqLocAddEx (SeqLocPtr PNTR head, SeqLocPtr PNTR lastp, SeqLocPtr slp, Boolean merge, Boolean do_copy)
416 {
417 SeqLocPtr tmp, last = NULL, retval = NULL;
418 Boolean merged = FALSE; /* intervals were merged */
419
420 if (slp == NULL) return NULL;
421
422 if (lastp != NULL) {
423 last = *lastp;
424 } else if (head != NULL && *head != NULL)
425 {
426 for (tmp = *head; tmp != NULL; tmp = tmp->next)
427 {
428 last = tmp;
429 }
430 }
431
432 if ((slp->choice == SEQLOC_NULL) && (merge)) /* no null at start, or two in a row */
433 {
434 if (last == NULL) /* first one */
435 {
436 merged = TRUE;
437 goto ret;
438 }
439 if (last->choice == SEQLOC_NULL) /* double NULL */
440 {
441 merged = TRUE;
442 goto ret;
443 }
444 }
445
446 if ((last != NULL) && (merge)) /* check for merging intervals */
447 {
448 if ((last->choice == SEQLOC_INT) && (slp->choice == SEQLOC_INT))
449 {
450 SeqIntPtr sip1, sip2;
451 Boolean samestrand;
452 Uint1 strand = Seq_strand_unknown;
453
454 sip1 = (SeqIntPtr)(last->data.ptrvalue);
455 sip2 = (SeqIntPtr)(slp->data.ptrvalue);
456 samestrand = FALSE;
457 if ((sip1->strand == sip2->strand) ||
458 (sip1->strand == Seq_strand_unknown && sip2->strand != Seq_strand_minus) ||
459 (sip1->strand == Seq_strand_unknown && sip2->strand != Seq_strand_minus)) {
460 samestrand = TRUE;
461 if (sip1->strand == Seq_strand_minus || sip1->strand == Seq_strand_minus) {
462 strand = Seq_strand_minus;
463 } else if (sip1->strand == Seq_strand_plus || sip1->strand == Seq_strand_plus) {
464 strand = Seq_strand_plus;
465 } else {
466 strand = Seq_strand_unknown;
467 }
468 }
469 if (samestrand && (SeqIdForSameBioseq(sip1->id, sip2->id)))
470 {
471 if (strand == Seq_strand_minus)
472 {
473 if (sip1->from == (sip2->to + 1)) /* they are adjacent */
474 {
475 sip1->from = sip2->from;
476 sip1->if_from = IntFuzzFree(sip1->if_from);
477 if (sip2->if_from != NULL) /* copy the fuzz */
478 {
479 if (do_copy)
480 sip1->if_from = (IntFuzzPtr)AsnIoMemCopy((Pointer)(sip2->if_from),
481 (AsnReadFunc)IntFuzzAsnRead, (AsnWriteFunc)IntFuzzAsnWrite);
482 else
483 {
484 sip1->if_from = sip2->if_from;
485 sip2->if_from = NULL;
486 }
487 sip1->strand = strand;
488 }
489 merged = TRUE;
490 }
491 }
492 else
493 {
494 if (sip1->to == (sip2->from - 1)) /* they are adjacent */
495 {
496 sip1->to = sip2->to;
497 sip1->if_to = IntFuzzFree(sip1->if_to);
498 if (sip2->if_to != NULL) /* copy the fuzz */
499 {
500 if (do_copy)
501 sip1->if_to = (IntFuzzPtr)AsnIoMemCopy((Pointer)(sip2->if_to),
502 (AsnReadFunc)IntFuzzAsnRead, (AsnWriteFunc)IntFuzzAsnWrite);
503 else
504 {
505 sip1->if_to = sip2->if_to;
506 sip2->if_to = NULL;
507 }
508 sip1->strand = strand;
509 }
510 merged = TRUE;
511 }
512 }
513 }
514 } else if ((last->choice == SEQLOC_PNT) && (slp->choice == SEQLOC_PNT))
515 {
516 SeqPntPtr sip1, sip2;
517
518 sip1 = (SeqPntPtr)(last->data.ptrvalue);
519 sip2 = (SeqPntPtr)(slp->data.ptrvalue);
520 if ((sip1->strand == sip2->strand) && sip1->point == sip2->point && (SeqIdForSameBioseq(sip1->id, sip2->id)))
521 {
522 sip1->fuzz = IntFuzzFree(sip1->fuzz);
523 if (sip2->fuzz != NULL) /* copy the fuzz */
524 {
525 if (do_copy)
526 sip1->fuzz = (IntFuzzPtr)AsnIoMemCopy((Pointer)(sip2->fuzz),
527 (AsnReadFunc)IntFuzzAsnRead, (AsnWriteFunc)IntFuzzAsnWrite);
528 else
529 {
530 sip1->fuzz = sip2->fuzz;
531 sip2->fuzz = NULL;
532 }
533 }
534 merged = TRUE;
535 }
536 }
537 }
538
539 ret:
540 if (! merged) /* then have to add a new one */
541 {
542 if (do_copy)
543 tmp = (SeqLocPtr)AsnIoMemCopy((Pointer)slp, (AsnReadFunc)SeqLocAsnRead, (AsnWriteFunc)SeqLocAsnWrite);
544 else
545 tmp = slp;
546
547 if (tmp != NULL) {
548 tmp->next = NULL;
549 }
550
551 if (last != NULL) {
552 last->next = tmp;
553 } else if (head != NULL) {
554 *head = tmp;
555 }
556 last = tmp;
557 retval = tmp;
558 }
559 else
560 {
561 retval = last;
562 if (! do_copy) /* got to free it here */
563 SeqLocFree(slp);
564 }
565 if (lastp != NULL) {
566 *lastp = last;
567 }
568
569 return retval;
570 }
571
572 NLM_EXTERN SeqLocPtr LIBCALL SeqLocAdd (SeqLocPtr PNTR head, SeqLocPtr slp, Boolean merge, Boolean do_copy)
573 {
574 SeqLocPtr tmp, last;
575
576 if (slp == NULL) return NULL;
577
578 last = NULL;
579 if (* head != NULL)
580 {
581 for (tmp = *head; tmp != NULL; tmp = tmp->next)
582 {
583 last = tmp;
584 }
585 }
586 return SeqLocAddEx (head, &last, slp, merge, do_copy);
587 }
588
589 /*****************************************************************************
590 *
591 * SegLocToParts(BioseqPtr seg, SeqLocPtr slp)
592 * seg must be a segmented Bioseq
593 * slp must be a SeqLoc on it
594 * function maps slp to the components of seg
595 * returns a new SeqLocPtr
596 * does not delete slp
597 *
598 *****************************************************************************/
599 NLM_EXTERN SeqLocPtr LIBCALL SegLocToPartsEx (BioseqPtr seg, SeqLocPtr slp, Boolean nullsBetween)
600 {
601 SeqLocPtr newloc = NULL, tmp, tmp2, tmp3, next, curr;
602 ValNode thead;
603 SeqIdPtr sip, tsip;
604 Int4 left_end, right_end, tlen, tstart;
605 SeqIntPtr sintp;
606 Boolean split, notFirst = FALSE;
607
608 if ((seg == NULL) || (slp == NULL)) return newloc;
609 if (seg->repr != Seq_repr_seg) return newloc;
610
611 sip = SeqLocId(slp);
612 if (sip == NULL) return newloc;
613 if (! SeqIdIn(sip, seg->id)) return newloc;
614
615 MemSet(&thead, 0, sizeof(ValNode));
616 thead.choice = SEQLOC_MIX;
617 thead.data.ptrvalue = seg->seq_ext;
618
619 curr = NULL;
620 while ((curr = SeqLocFindNext(slp, curr)) != NULL)
621 {
622 left_end = 0;
623 tmp = NULL;
624 while ((tmp = SeqLocFindNext(&thead, tmp)) != NULL)
625 {
626 tlen = SeqLocLen(tmp);
627 if (tlen > 0)
628 {
629 right_end = left_end + tlen - 1;
630 tsip = SeqLocId(tmp);
631 tstart = SeqLocStart(tmp);
632 tmp2 = SeqLocCopyRegion(tsip, curr, seg, left_end, right_end, SeqLocStrand(tmp),
633 &split);
634 while (tmp2 != NULL)
635 {
636 next = tmp2->next;
637 tmp2->next = NULL;
638 if (tmp2->choice == SEQLOC_INT)
639 {
640 if (nullsBetween && notFirst) {
641 tmp3 = ValNodeNew (NULL);
642 if (tmp3 != NULL) {
643 tmp3->choice = SEQLOC_NULL;
644 SeqLocAdd (&newloc, tmp3, TRUE, FALSE);
645 }
646 }
647 notFirst = TRUE;
648 sintp = (SeqIntPtr)(tmp2->data.ptrvalue);
649 sintp->from += tstart;
650 sintp->to += tstart;
651 SeqLocAdd(&newloc, tmp2, TRUE, FALSE);
652 }
653 else if (tmp2->choice == SEQLOC_PNT)
654 {
655 if (nullsBetween && notFirst) {
656 tmp3 = ValNodeNew (NULL);
657 if (tmp3 != NULL) {
658 tmp3->choice = SEQLOC_NULL;
659 SeqLocAdd (&newloc, tmp3, TRUE, FALSE);
660 }
661 }
662 notFirst = TRUE;
663 SeqLocAdd (&newloc, tmp2, TRUE, FALSE);
664 }
665 tmp2 = next;
666 }
667 left_end = right_end + 1;
668 }
669 }
670 }
671
672 if (newloc != NULL)
673 newloc = SeqLocPackage(newloc);
674 return newloc;
675 }
676
677 NLM_EXTERN SeqLocPtr LIBCALL SegLocToParts (BioseqPtr seg, SeqLocPtr slp)
678
679 {
680 return SegLocToPartsEx (seg, slp, FALSE);
681 }
682
683 static CharPtr seqlitdbtag = "SeqLit";
684 static CharPtr unkseqlitdbtag = "UnkSeqLit";
685 /*****************************************************************************
686 *
687 * ISADeltaSeqsToSeqLoc(slp)
688 * returns Index (> 0) if this (one) SeqLoc was converted from a Delta Seq by
689 * DeltaSeqsToSeqLocs() by looking for the special Dbtag name
690 *
691 *****************************************************************************/
692 NLM_EXTERN Int4 LIBCALL ISADeltaSeqsToSeqLoc (SeqLocPtr slp)
693 {
694 SeqIdPtr sip;
695 Int4 retval = 0;
696
697 if (slp == NULL) return retval;
698 sip = SeqLocId(slp);
699 if (sip == NULL) return retval;
700
701 if (sip->choice != SEQID_GENERAL) return retval;
702
703 if (! StringCmp(seqlitdbtag, ((DbtagPtr)(sip->data.ptrvalue))->db) ||
704 ! StringCmp(unkseqlitdbtag, ((DbtagPtr)(sip->data.ptrvalue))->db))
705 retval = (((DbtagPtr)(sip->data.ptrvalue))->tag->id);
706
707 return retval;
708 }
709
710 /*****************************************************************************
711 *
712 * DeltaSeqsToSeqLocs(dsp)
713 * converts a chain of delta seqs to seqlocs
714 * each SeqLit is converted to SeqLoc of type Int with a SeqId of type
715 * Dbtag where db="Seq\tLit" and objectId.id which is the index of the
716 * element in the delta seq chain where 1 is the first one.
717 * Returned SeqLoc is of type "mix" and must be freed by caller.
718 *
719 *****************************************************************************/
720 NLM_EXTERN SeqLocPtr LIBCALL DeltaSeqsToSeqLocs (DeltaSeqPtr dsp)
721 {
722 SeqLocPtr head = NULL, thead = NULL, last = NULL;
723 DeltaSeqPtr curr;
724 SeqInt si;
725 Dbtag db;
726 ObjectId oi;
727 ValNode vn, vn2;
728
729 MemSet(&vn, 0, sizeof(ValNode));
730 MemSet(&vn2, 0, sizeof(ValNode));
731 MemSet(&si, 0, sizeof(SeqInt));
732 MemSet(&db, 0, sizeof(Dbtag));
733 MemSet(&oi, 0, sizeof(ObjectId));
734 vn.choice = SEQLOC_INT;
735 vn.data.ptrvalue = &si;
736 si.id = &vn2;
737 vn2.choice = SEQID_GENERAL;
738 vn2.data.ptrvalue = &db;
739 db.db = seqlitdbtag;
740 db.tag = &oi;
741 oi.id = 1;
742
743
744
745 for (curr = dsp; curr != NULL; curr = curr->next)
746 {
747 if (curr->choice == 1) /* a SeqLoc */
748 SeqLocAddEx (&thead, &last, (SeqLocPtr)(curr->data.ptrvalue), TRUE, TRUE);
749 else
750 {
751 si.to = ((SeqLitPtr) (curr->data.ptrvalue))->length - 1;
752 SeqLocAddEx (&thead, &last, &vn, TRUE, TRUE);
753 }
754 oi.id++;
755 }
756
757 head = SeqLocPackage(thead);
758 return head;
759 }
760
761 /*****************************************************************************
762 * GOHERE
763 * SeqLocsToDeltaSeqs(dsp, slp)
764 * converts a chain of seqlocs generated by DeltaSeqToSeqLocs() back into
765 * delta seqs. dsp is the original chain of DeltaSeqs, which is required
766 * to convert the delta seqs back.
767 *
768 *****************************************************************************/
769 NLM_EXTERN DeltaSeqPtr LIBCALL SeqLocsToDeltaSeqs (DeltaSeqPtr dsp, SeqLocPtr slp)
770 {
771 DeltaSeqPtr dhead=NULL, dcurr=NULL, dtmp;
772 SeqLocPtr scurr;
773 Int4 ctr, index, strt, stp;
774 SeqIdPtr sip;
775 Uint1 strand, newcode;
776 SeqLitPtr slitp, slitp_new;
777 SeqPortPtr spps;
778 ByteStorePtr bsp;
779 Int2 residue;
780 ValNode vn;
781
782 if ((dsp == NULL) || (slp == NULL))
783 return dhead;
784
785 vn.choice = SEQLOC_MIX;
786 vn.next = NULL;
787 vn.data.ptrvalue = slp;
788 scurr = NULL;
789 while ((scurr = SeqLocFindNext(&vn, scurr)) != NULL)
790 {
791 dcurr = ValNodeNew(dhead);
792 if (dhead == NULL)
793 dhead = dcurr;
794
795 index = ISADeltaSeqsToSeqLoc(scurr);
796
797 if (index == 0) /* just a SeqLoc */
798 {
799 dcurr->choice = 1;
800 dcurr->data.ptrvalue = NULL;
801 dcurr->data.ptrvalue = AsnIoMemCopy((Pointer)scurr, (AsnReadFunc)SeqLocAsnRead, (AsnWriteFunc)SeqLocAsnWrite);
802
803 }
804 else /* convert to a delta seq */
805 {
806 dcurr->choice = 2;
807 sip = SeqLocId(scurr);
808 dtmp = dsp;
809 for (ctr = 1; ctr < index; ctr++)
810 dtmp = dtmp->next;
811
812 if (dtmp->choice != 2) /* wups */
813 {
814 ErrPostEx(SEV_ERROR,0,0,"Wrong type in SeqLocsToDeltaSeqs");
815 dhead = DeltaSeqFree(dhead);
816 return dhead;
817 }
818 slitp = (SeqLitPtr)(dtmp->data.ptrvalue);
819
820 strt = SeqLocStart(scurr);
821 stp = SeqLocStop(scurr);
822 strand = SeqLocStrand(scurr);
823
824 if ((strt == 0) && (stp == (slitp->length - 1)) && (strand != Seq_strand_minus)) /* no change */
825 {
826 dcurr->data.ptrvalue = AsnIoMemCopy((Pointer)slitp, (AsnReadFunc)SeqLitAsnRead, (AsnWriteFunc)SeqLitAsnWrite);
827 }
828 else /* got to copy part of it */
829 {
830 switch (slitp->seq_data_type)
831 {
832 case Seq_code_iupacna:
833 case Seq_code_iupacaa:
834 case Seq_code_ncbi8na:
835 case Seq_code_ncbi8aa:
836 case Seq_code_ncbieaa:
837 case Seq_code_ncbistdaa:
838 case Seq_code_iupacaa3:
839 newcode = slitp->seq_data_type; /* one byte codes.. fine */
840 break;
841 case Seq_code_ncbipna:
842 ErrPostEx(SEV_ERROR,0,0,"Converting from P residue codes");
843 newcode = Seq_code_ncbieaa;
844 break;
845 case Seq_code_ncbipaa:
846 ErrPostEx(SEV_ERROR,0,0,"Converting from P residue codes");
847 case Seq_code_ncbi2na:
848 case Seq_code_ncbi4na:
849 newcode = Seq_code_iupacna;
850 break;
851 case Seq_code_gap:
852 ErrPostEx(SEV_WARNING,0,0,"Seq_code_gap residue code in SeqLocsToDeltaSeqs");
853 return DeltaSeqFree(dhead);
854 break;
855 default:
856 ErrPostEx(SEV_FATAL,0,0,"Unrecognized residue code [%d] in SeqLocsToDeltaSeqs",
857 (int)(slitp->seq_data_type));
858 return DeltaSeqFree(dhead);
859 }
860 spps = MemNew(sizeof(SeqPort));
861 SeqPortSetUpFields (spps, strt, stp, strand, newcode);
862 SeqPortSetUpAlphabet(spps, slitp->seq_data_type, newcode);
863 spps->bp = (ByteStorePtr) slitp->seq_data;
864 slitp_new = SeqLitNew();
865 dcurr->data.ptrvalue = slitp_new;
866 slitp_new->seq_data_type = newcode;
867 slitp_new->length = (stp - strt + 1);
868 bsp = BSNew(slitp_new->length);
869 slitp_new->seq_data = (SeqDataPtr) bsp;
870 SeqPortSeek(spps, 0, SEEK_SET);
871 BSSeek(bsp, 0, SEEK_SET);
872 while (stp >= strt)
873 {
874 residue = SeqPortGetResidue(spps);
875 BSPutByte(bsp, residue);
876 strt++;
877 }
878 SeqPortFree(spps);
879 }
880
881 }
882
883 }
884 return dhead;
885 }
886 /*****************************************************************************
887 *
888 * BioseqDelete (target, from, to, do_feat, do_split)
889 * Deletes the region of sequence between from-to, inclusive, on the
890 * Bioseq whose SeqId is target.
891 * If do_feat, the feature table is updated to reflect the deletion
892 * using SeqEntryDelFeat()
893 * If do_split, the features across the deleted region are split into
894 * two intervals on either side. If not, the feature is just shortened.
895 *****************************************************************************/
896 NLM_EXTERN Boolean LIBCALL BioseqDelete (SeqIdPtr target, Int4 from, Int4 to, Boolean do_feat, Boolean do_split)
897 {
898 Boolean retval = FALSE;
899 BioseqPtr bsp;
900 SeqLocPtr tmp, head;
901 Int4 len, deleted;
902 Int4 totlen, templen, tfrom, tto, diff1, diff2;
903 SeqLocPtr slp, tloc, newhead, prev;
904 ValNode vn;
905 SeqInt si;
906 SeqLocPtr PNTR newheadptr;
907 SeqFeatPtr sfpcurr, sfpnext, sfpprev;
908 Int2 dropped;
909 SeqEntryPtr oldscope;
910 DeltaSeqPtr tdsp = NULL;
911
912 bsp = BioseqFind(target);
913 if (bsp == NULL) {
914 oldscope = SeqEntrySetScope (NULL);
915 if (oldscope != NULL) {
916 bsp = BioseqFind(target);
917 SeqEntrySetScope (oldscope);
918 }
919 }
920 if (bsp == NULL) return retval;
921
922 if ((from < 0) || (from >= bsp->length) || (to < 0) ||
923 (to >= bsp->length) || (from > to)) return retval;
924
925 if (do_feat)
926 SeqEntryDelFeat(NULL, target, from, to, do_split);
927
928 len = to - from + 1;
929 /* if actual sequence present */
930
931 if (((bsp->repr == Seq_repr_raw) || (bsp->repr == Seq_repr_const)) && bsp->seq_data_type != Seq_code_gap)
932 {
933 if (ISA_na(bsp->mol))
934 {
935 if (bsp->seq_data_type != Seq_code_iupacna) /* need 1 byte/base */
936 BioseqRawConvert(bsp, Seq_code_iupacna);
937 }
938 else
939 {
940 if (bsp->seq_data_type != Seq_code_ncbieaa)
941 BioseqRawConvert(bsp, Seq_code_ncbieaa);
942 }
943
944 BSSeek((ByteStorePtr) bsp->seq_data, from, SEEK_SET);
945 deleted = BSDelete((ByteStorePtr) bsp->seq_data, len);
946 if (deleted != len) /* error */
947 ErrPost(CTX_NCBIOBJ, 1, "Delete of %ld residues failed", len);
948 else
949 retval = TRUE;
950 }
951
952 /* update segmented sequence */
953 if ((bsp->repr == Seq_repr_seg) || (bsp->repr == Seq_repr_delta))
954 {
955 head = ValNodeNew(NULL); /* allocate to facilitate SeqLocFree */
956 head->choice = SEQLOC_MIX; /* make a SeqLoc out of the extension */
957 if (bsp->repr == Seq_repr_seg)
958 head->data.ptrvalue = bsp->seq_ext;
959 else
960 {
961 tdsp = (DeltaSeqPtr)(bsp->seq_ext);
962 head->data.ptrvalue = DeltaSeqsToSeqLocs(tdsp);
963 }
964
965 newhead = NULL;
966 newheadptr = &newhead;
967
968 tloc = &vn;
969 MemSet((Pointer)tloc, 0, sizeof(ValNode));
970 MemSet((Pointer)&si, 0, sizeof(SeqInt));
971 tloc->choice = SEQLOC_INT;
972 tloc->data.ptrvalue = (Pointer)(&si);
973
974 slp = NULL;
975 totlen = 0;
976 while ((slp = SeqLocFindNext(head, slp)) != NULL)
977 {
978 templen = SeqLocLen(slp);
979 tfrom = SeqLocStart(slp);
980 tto = SeqLocStop(slp);
981
982 if (((totlen + templen - 1) < from) || /* before cut */
983 (totlen > to)) /* after cut */
984 tmp = SeqLocAdd(newheadptr, slp, TRUE, TRUE); /* add whole SeqLoc */
985 else
986 {
987 retval = 1; /* will modify or drop interval */
988 diff1 = from - totlen; /* partial beginning? */
989 diff2 = (templen + totlen - 1) - to; /* partial end? */
990 si.id = SeqLocId(slp);
991 si.strand = SeqLocStrand(slp);
992
993 if (diff1 > 0) /* partial start */
994 {
995 if (si.strand != Seq_strand_minus)
996 {
997 si.from = tfrom;
998 si.to = tfrom + diff1 - 1;
999 }
1000 else
1001 {
1002 si.from = tto - diff1 + 1;
1003 si.to = tto;
1004 }
1005 tmp = SeqLocAdd(newheadptr, tloc, TRUE, TRUE);
1006 }
1007
1008 if (diff2 > 0) /* partial end */
1009 {
1010 if (si.strand != Seq_strand_minus)
1011 {
1012 si.from = tto - diff2 + 1;
1013 si.to = tto;
1014 }
1015 else
1016 {
1017 si.from = tfrom;
1018 si.to = tfrom + diff2 - 1;
1019 }
1020 tmp = SeqLocAdd(newheadptr, tloc, TRUE, TRUE);
1021 }
1022
1023 }
1024 totlen += templen;
1025 }
1026
1027 prev = NULL;
1028 for (tmp = newhead; tmp != NULL; tmp = tmp->next)
1029 {
1030 if (tmp->next == NULL) /* last one */
1031 {
1032 if (tmp->choice == SEQLOC_NULL)
1033 {
1034 if (prev != NULL)
1035 prev->next = NULL;
1036 else /* only a NULL left */
1037 {
1038 newhead = NULL;
1039 }
1040 MemFree(tmp);
1041 break;
1042 }
1043 }
1044 prev = tmp;
1045 }
1046
1047 if (bsp->repr == Seq_repr_seg)
1048 bsp->seq_ext = newhead;
1049 else
1050 {
1051 bsp->seq_ext = SeqLocsToDeltaSeqs(tdsp, newhead);
1052 DeltaSeqSetFree(tdsp);
1053 SeqLocSetFree(newhead);
1054 }
1055 SeqLocFree(head);
1056 retval = TRUE;
1057 }
1058
1059 if (bsp->repr == Seq_repr_map) /* map bioseq */
1060 {
1061 sfpprev = NULL;
1062 sfpnext = NULL;
1063 sfpcurr = (SeqFeatPtr)(bsp->seq_ext);
1064 bsp->seq_ext = NULL;
1065 for (; sfpcurr != NULL; sfpcurr = sfpnext)
1066 {
1067 sfpnext = sfpcurr->next;
1068 dropped = SeqFeatDelete(sfpcurr, target, from, to, TRUE);
1069 if (dropped == 2) /* completely gone */
1070 {
1071 SeqFeatFree(sfpcurr);
1072 }
1073 else
1074 {
1075 if (sfpprev == NULL)
1076 bsp->seq_ext = (Pointer)sfpcurr;
1077 else
1078 sfpprev->next = sfpcurr;
1079 sfpcurr->next = NULL;
1080 sfpprev = sfpcurr;
1081 }
1082 }
1083 retval = TRUE;
1084 }
1085
1086 if (bsp->repr == Seq_repr_virtual)
1087 retval = TRUE; /* nothing to do */
1088
1089 if (retval)
1090 bsp->length -= len;
1091 return retval;
1092 }
1093
1094
1095 /*****************************************************************************
1096 *
1097 * BioseqOverwrite (target, pos, residue, seqcode)
1098 * Overwrites the residue at pos with residue in the
1099 * Bioseq whose SeqId is target.
1100 * residue is iupacna for DNA or ncbieaa for protein
1101 * target MUST be a raw Bioseq right now
1102 *
1103 *****************************************************************************/
1104 NLM_EXTERN Boolean LIBCALL BioseqOverwrite (SeqIdPtr target, Int4 pos, Uint1 residue)
1105 {
1106 BioseqPtr bsp;
1107 Boolean retval = FALSE;
1108 SeqEntryPtr oldscope;
1109
1110
1111 bsp = BioseqFind(target);
1112 if (bsp == NULL) {
1113 oldscope = SeqEntrySetScope (NULL);
1114 if (oldscope != NULL) {
1115 bsp = BioseqFind(target);
1116 SeqEntrySetScope (oldscope);
1117 }
1118 }
1119 if (bsp == NULL) return retval;
1120
1121 if ((pos < 0) || (pos >= bsp->length)) return retval;
1122 if (bsp->repr != Seq_repr_raw) return retval;
1123
1124 if (bsp->seq_data_type == Seq_code_gap) return FALSE;
1125
1126 if (ISA_na(bsp->mol))
1127 {
1128 if (bsp->seq_data_type != Seq_code_iupacna) /* need 1 byte/base */
1129 BioseqRawConvert(bsp, Seq_code_iupacna);
1130 }
1131 else
1132 {
1133 if (bsp->seq_data_type != Seq_code_ncbieaa)
1134 BioseqRawConvert(bsp, Seq_code_ncbieaa);
1135 }
1136
1137 BSSeek((ByteStorePtr) bsp->seq_data, pos, SEEK_SET);
1138 BSPutByte((ByteStorePtr) bsp->seq_data, (Int2)(TO_UPPER(residue)));
1139 retval = TRUE;
1140
1141 return retval;
1142 }
1143
1144
1145 /*****************************************************************************
1146 *
1147 * SeqInsertByLoc (target, offset, fragment)
1148 *
1149 *****************************************************************************/
1150 NLM_EXTERN Boolean LIBCALL SeqInsertByLoc (SeqIdPtr target, Int4 offset, SeqLocPtr fragment)
1151 {
1152 return TRUE;
1153 }
1154
1155
1156 /*****************************************************************************
1157 *
1158 * SeqDeleteByLoc (slp, do_feat, do_split)
1159 *
1160 *****************************************************************************/
1161 NLM_EXTERN Boolean LIBCALL SeqDeleteByLoc (SeqLocPtr slp, Boolean do_feat, Boolean do_split)
1162 {
1163 SeqLocPtr tmp;
1164 Boolean retval = FALSE;
1165 Int2 numloc, i = 0, ctr, pick, totloc;
1166 SeqLocPtr PNTR locs, PNTR tlocs, PNTR theorder;
1167 BioseqPtr bsp;
1168 Int4 tstart, tstop;
1169
1170 if (slp == NULL) return retval;
1171
1172 numloc = 0;
1173 totloc = 0;
1174 locs = NULL;
1175 tmp = NULL;
1176
1177 while ((tmp = SeqLocFindNext(slp, tmp)) != NULL)
1178 {
1179 switch (tmp->choice)
1180 {
1181 case SEQLOC_INT:
1182 case SEQLOC_PNT:
1183 if (BioseqFind(SeqLocId(tmp)) != NULL)
1184 {
1185 if (numloc == totloc)
1186 {
1187 tlocs = locs;
1188 locs = (SeqLocPtr PNTR)(MemNew((totloc+20) * sizeof(SeqLocPtr)));
1189 MemCopy(locs, tlocs, (size_t)(totloc * sizeof(SeqLocPtr)));
1190 MemFree(tlocs);
1191 totloc += 20;
1192 }
1193 locs[numloc] = tmp;
1194 numloc++;
1195 }
1196 break;
1197 default:
1198 Message(MSG_ERROR, "Unsupported Seqloc [%d] in SeqDeleteByLoc",
1199 (int)(tmp->choice));
1200 break;
1201
1202 }
1203 }
1204
1205 if (! numloc) return retval;
1206
1207
1208 /***********************************************************
1209 *
1210 * first gather all the seqlocs, grouped by Bioseq, and
1211 * ordered from end to beginning. They must be ordered
1212 * before the underlying Bioseq is changed.
1213 *
1214 ***********************************************************/
1215
1216 retval = TRUE;
1217
1218 bsp = NULL;
1219 theorder = (SeqLocPtr PNTR)MemNew((sizeof(SeqLocPtr) * numloc));
1220 for (ctr = 0; ctr < numloc; ctr++)
1221 {
1222 pick = -1; /* flag none found */
1223 if (bsp != NULL)
1224 {
1225 for (i = 0; i < numloc; i++)
1226 {
1227 if (locs[i] != NULL)
1228 {
1229 if (SeqIdIn(SeqLocId(locs[i]), bsp->id))
1230 {
1231 pick = i;
1232 i++;
1233 break;
1234 }
1235 }
1236 }
1237 if (pick < 0)
1238 bsp = NULL; /* no more locs on this bioseq */
1239 }
1240
1241 if (bsp == NULL) /* have to find a new bioseq */
1242 {
1243 for (i = 0; i < numloc; i++)
1244 {
1245 if (locs[i] != NULL)
1246 {
1247 bsp = BioseqFind(SeqLocId(locs[i]));
1248 pick = i;
1249 i++;
1250 break;
1251 }
1252 }
1253 }
1254
1255 while (i < numloc)
1256 {
1257 if (SeqLocOrder(locs[pick], locs[i], bsp) == (-1)) /* it's after */
1258 pick = i;
1259 i++;
1260 }
1261
1262 theorder[ctr] = locs[pick];
1263 locs[pick] = NULL;
1264 }
1265
1266 MemFree(locs); /* finished with original list */
1267
1268 /*************************************************************
1269 *
1270 * Now do the actual deletions
1271 *
1272 *************************************************************/
1273
1274
1275 for (ctr = 0; ctr < numloc; ctr++)
1276 {
1277 tstart = SeqLocStart(theorder[ctr]);
1278 tstop = SeqLocStop(theorder[ctr]);
1279 BioseqDelete(SeqLocId(theorder[ctr]), tstart, tstop, do_feat, do_split);
1280 }
1281
1282 MemFree(theorder);
1283
1284 return retval;
1285 }
1286
1287
1288 /*****************************************************************************
1289 *
1290 * SeqFeatDelete()
1291 * 0 = no changes made to location or product
1292 * 1 = changes made but feature still has some location
1293 * 2 = all of sfp->location in deleted interval
1294 *
1295 * if (merge)
1296 * 1) correct numbers > to by subtraction
1297 * 2) do not split intervals spanning the deletion
1298 * else
1299 * 1) do not change numbers > to
1300 * 2) split intervals which span the deletions
1301 *
1302 *****************************************************************************/
1303 NLM_EXTERN Int2 LIBCALL SeqFeatDelete (SeqFeatPtr sfp, SeqIdPtr target, Int4 from, Int4 to, Boolean merge)
1304 {
1305 ValNode vn;
1306 SeqLocPtr tloc;
1307 SeqInt si;
1308 Boolean changed = FALSE, tmpbool = FALSE;
1309 CdRegionPtr crp;
1310 CodeBreakPtr cbp, prevcbp, nextcbp;
1311 RnaRefPtr rrp;
1312 tRNAPtr trp;
1313 Boolean partial5, partial3;
1314 Uint1 strand;
1315 BioseqPtr bsp;
1316 Int4 new_frame;
1317
1318 tloc = &vn;
1319 MemSet((Pointer)tloc, 0, sizeof(ValNode));
1320 MemSet((Pointer)&si, 0, sizeof(SeqInt));
1321 tloc->choice = SEQLOC_INT;
1322 tloc->data.ptrvalue = (Pointer)(&si);
1323 si.id = target;
1324 si.from = from;
1325 si.to = to;
1326
1327 CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
1328 strand = SeqLocStrand (sfp->location);
1329 bsp = BioseqFindFromSeqLoc (sfp->location);
1330 sfp->location = SeqLocDelete(sfp->location, target, from, to, merge, &changed);
1331
1332 sfp->product = SeqLocDelete(sfp->product, target, from, to, merge, &changed);
1333
1334 if (sfp->location == NULL)
1335 return 2;
1336
1337 switch (sfp->data.choice)
1338 {
1339 case SEQFEAT_CDREGION: /* cdregion */
1340 crp = (CdRegionPtr)(sfp->data.value.ptrvalue);
1341 if (changed)
1342 {
1343 /* adjust frame */
1344 if ((strand == Seq_strand_minus && bsp != NULL && to == bsp->length - 1 && partial5)
1345 || (strand != Seq_strand_minus && from == 0 && partial5))
1346 {
1347 if (crp->frame == 0)
1348 {
1349 crp->frame = 1;
1350 }
1351 new_frame = crp->frame - ((to - from + 1) % 3);
1352 if (new_frame < 1)
1353 {
1354 new_frame += 3;
1355 }
1356 crp->frame = new_frame;
1357 }
1358 }
1359 /* fix code_break locations */
1360 prevcbp = NULL;
1361 for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
1362 {
1363 nextcbp = cbp->next;
1364 cbp->loc = SeqLocDelete(cbp->loc, target, from, to, merge, &tmpbool);
1365 if (cbp->loc == NULL)
1366 {
1367 if (prevcbp != NULL)
1368 prevcbp->next = nextcbp;
1369 else
1370 crp->code_break = nextcbp;
1371 cbp->next = NULL;
1372 CodeBreakFree(cbp);
1373 }
1374 else
1375 prevcbp = cbp;
1376 }
1377 break;
1378 case SEQFEAT_RNA:
1379 rrp = (RnaRefPtr)(sfp->data.value.ptrvalue);
1380 if (rrp->ext.choice == 2) /* tRNA */
1381 {
1382 trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
1383 if (trp->anticodon != NULL)
1384 {
1385 trp->anticodon = SeqLocDelete(trp->anticodon, target, from, to, merge, &tmpbool);
1386 }
1387 }
1388 break;
1389 default:
1390 break;
1391 }
1392
1393 if (changed)
1394 {
1395 return 1;
1396 }
1397 else
1398 return 0;
1399 }
1400
1401 /*****************************************************************************
1402 *
1403 * SeqLocDelete()
1404 * returns altered head or NULL if nothing left.
1405 * sets changed=TRUE if all or part of loc is deleted
1406 * does NOT set changed if location coordinates are only moved
1407 * if (merge) then corrects coordinates upstream of to
1408 * else
1409 * splits intervals covering from-to, does not correct upstream of to
1410 *
1411 *****************************************************************************/
1412 NLM_EXTERN SeqLocPtr LIBCALL SeqLocDeleteEx (SeqLocPtr head, SeqIdPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed, BoolPtr partial5, BoolPtr partial3)
1413 {
1414 SeqIntPtr sip, sip2;
1415 SeqPntPtr spp;
1416 PackSeqPntPtr pspp, pspp2;
1417 SeqBondPtr sbp;
1418 SeqIdPtr sidp;
1419 SeqLocPtr slp, tmp, prev, next, thead;
1420 Int4 diff, numpnt, i, tpos;
1421 BioseqPtr bsp;
1422 Boolean part5, part3, first;
1423
1424 if ((head == NULL) || (target == NULL))
1425 return head;
1426
1427 head->next = NULL; /* caller maintains chains */
1428 diff = to - from + 1;
1429
1430 switch (head->choice)
1431 {
1432 case SEQLOC_BOND: /* bond -- 2 seqs */
1433 sbp = (SeqBondPtr)(head->data.ptrvalue);
1434 spp = sbp->a;
1435 if (SeqIdForSameBioseq(spp->id, target))
1436 {
1437 if (spp->point >= from)
1438 {
1439 if (spp->point <= to) /* delete it */
1440 {
1441 *changed = TRUE;
1442 sbp->a = SeqPntFree(spp);
1443 }
1444 else if (merge)
1445 spp->point -= diff;
1446 }
1447 }
1448 spp = sbp->b;
1449 if (spp != NULL)
1450 {
1451 if (SeqIdForSameBioseq(spp->id, target))
1452 {
1453 if (spp->point >= from)
1454 {
1455 if (spp->point <= to) /* delete it */
1456 {
1457 *changed = TRUE;
1458 sbp->b = SeqPntFree(spp);
1459 }
1460 else if (merge)
1461 spp->point -= diff;
1462 }
1463 }
1464 }
1465 if (sbp->a == NULL)
1466 {
1467 if (sbp->b != NULL) /* only a required */
1468 {
1469 sbp->a = sbp->b;
1470 sbp->b = NULL;
1471 }
1472 else
1473 {
1474 head = SeqLocFree(head);
1475 }
1476 }
1477 break;
1478 case SEQLOC_FEAT: /* feat -- can't track yet */
1479 case SEQLOC_NULL: /* NULL */
1480 case SEQLOC_EMPTY: /* empty */
1481 break;
1482 case SEQLOC_WHOLE: /* whole */
1483 sidp = (SeqIdPtr)(head->data.ptrvalue);
1484 if (SeqIdForSameBioseq(sidp, target))
1485 {
1486 bsp = BioseqFind(target);
1487 if (bsp != NULL) /* split it */
1488 {
1489 if ((from == 0) && (to >= (bsp->length - 1)))
1490 { /* complete delete */
1491 head = SeqLocFree(head);
1492 *changed = TRUE;
1493 break;
1494 }
1495
1496 if (! merge) /* split it up */
1497 {
1498 SeqIdFree(sidp);
1499 head->choice = SEQLOC_PACKED_INT;
1500 head->data.ptrvalue = NULL;
1501 slp = NULL;
1502 if (from != 0)
1503 {
1504 sip = SeqIntNew();
1505 sip->from = 0;
1506 sip->to = from - 1;
1507 sip->id = SeqIdDup(target);
1508 slp = ValNodeNew(NULL);
1509 slp->choice = SEQLOC_INT;
1510 slp->data.ptrvalue = sip;
1511 head->data.ptrvalue = slp;
1512 *changed = TRUE;
1513 }
1514 if (to < (bsp->length - 1))
1515 {
1516 sip = SeqIntNew();
1517 sip->from = to + 1;
1518 sip->to = bsp->length - 1;
1519 sip->id = SeqIdDup(target);
1520 tmp = ValNodeNew(NULL);
1521 tmp->choice = SEQLOC_INT;
1522 tmp->data.ptrvalue = sip;
1523 if (slp != NULL)
1524 slp->next = tmp;
1525 else
1526 head->data.ptrvalue = tmp;
1527 *changed = TRUE;
1528 }
1529
1530 }
1531 }
1532 }
1533 break;
1534 case SEQLOC_MIX: /* mix -- more than one seq */
1535 case SEQLOC_EQUIV: /* equiv -- ditto */
1536 case SEQLOC_PACKED_INT: /* packed int */
1537 prev = NULL;
1538 thead = NULL;
1539 part5 = FALSE;
1540 part3 = FALSE;
1541 first = TRUE;
1542 for (slp = (SeqLocPtr)(head->data.ptrvalue); slp != NULL; slp = next)
1543 {
1544 next = slp->next;
1545 tmp = SeqLocDeleteEx (slp, target, from, to, merge, changed, &part5, &part3);
1546 if (first) {
1547 if (partial5 != NULL) {
1548 *partial5 = part5;
1549 }
1550 }
1551 first = FALSE;
1552 if (tmp != NULL)
1553 {
1554 if (prev != NULL)
1555 {
1556 if ((merge) && (prev->choice == SEQLOC_INT) && (tmp->choice == SEQLOC_INT))
1557 {
1558 sip = (SeqIntPtr)(prev->data.ptrvalue);
1559 sip2 = (SeqIntPtr)(tmp->data.ptrvalue);
1560
1561 if (SeqIdForSameBioseq(sip->id, sip2->id))
1562 {
1563 /* merge intervals? */
1564 if ((sip->strand == Seq_strand_minus) &&
1565 (sip2->strand == Seq_strand_minus))
1566 {
1567 if (sip->from == (sip2->to + 1))
1568 {
1569 sip->from = sip2->from;
1570 sip->if_from = sip2->if_from;
1571 sip2->if_from = NULL;
1572 tmp = SeqLocFree(tmp);
1573 }
1574 }
1575 else if((sip->strand != Seq_strand_minus) &&
1576 (sip2->strand != Seq_strand_minus))
1577 {
1578 if (sip->to == (sip2->from - 1))
1579 {
1580 sip->to = sip2->to;
1581 sip->if_to = sip2->if_to;
1582 sip2->if_to = NULL;
1583 tmp = SeqLocFree(tmp);
1584 }
1585 }
1586 }
1587 }
1588 else if ((prev->choice == SEQLOC_NULL) && (tmp->choice == SEQLOC_NULL))
1589 {
1590 tmp = SeqLocFree(tmp);
1591 *changed = TRUE;
1592 }
1593 }
1594 else if (tmp->choice == SEQLOC_NULL)
1595 {
1596 tmp = SeqLocFree(tmp);
1597 *changed = TRUE;
1598 }
1599
1600 if (tmp != NULL) /* still have one? */
1601 {
1602 if (prev != NULL)
1603 prev->next = tmp;
1604 else
1605 thead = tmp;
1606 prev = tmp;
1607 }
1608 }
1609 else
1610 *changed = TRUE;
1611 }
1612 if (partial3 != NULL) {
1613 *partial3 = part3;
1614 }
1615 if (prev != NULL)
1616 {
1617 if (prev->choice == SEQLOC_NULL) /* ends with NULL */
1618 {
1619 prev = NULL;
1620 for (slp = thead; slp->next != NULL; slp = slp->next)
1621 prev = slp;
1622 if (prev != NULL)
1623 {
1624 prev->next = NULL;
1625 SeqLocFree(slp);
1626 }
1627 else
1628 {
1629 thead = SeqLocFree(thead);
1630 }
1631 *changed = TRUE;
1632 }
1633 }
1634 head->data.ptrvalue = thead;
1635 if (thead == NULL)
1636 head = SeqLocFree(head);
1637 break;
1638 case SEQLOC_INT: /* int */
1639 sip = (SeqIntPtr)(head->data.ptrvalue);
1640 if (SeqIdForSameBioseq(sip->id, target))
1641 {
1642 if (sip->to < from) /* completely before cut */
1643 break;
1644
1645 /* completely contained in cut */
1646 if ((sip->from >= from) && (sip->to <= to))
1647 {
1648 head = SeqLocFree(head);
1649 *changed = TRUE;
1650 break;
1651 }
1652
1653 if (sip->from > to) /* completely past cut */
1654 {
1655 if (merge)
1656 {
1657 sip->from -= diff;
1658 sip->to -= diff;
1659 }
1660 break;
1661 }
1662 /* overlap here */
1663
1664 if (sip->to > to)
1665 {
1666 if (merge)
1667 sip->to -= diff;
1668 }
1669 else /* to inside cut, so partial delete */
1670 {
1671 sip->to = from - 1;
1672 *changed = TRUE;
1673 if (partial3 != NULL) {
1674 *partial3 = TRUE;
1675 }
1676 }
1677
1678 if (sip->from >= from) /* from inside cut, partial del */
1679 {
1680 *changed = TRUE;
1681 sip->from = to + 1;
1682 if (merge)
1683 sip->from -= diff;
1684 if (partial5 != NULL) {
1685 *partial5 = TRUE;
1686 }
1687 }
1688
1689 if (merge)
1690 break;
1691
1692 /* interval spans cut.. only in non-merge */
1693 /* have to split */
1694
1695 if ((sip->from < from) && (sip->to > to))
1696 {
1697 *changed = TRUE;
1698 head->choice = SEQLOC_PACKED_INT;
1699 head->data.ptrvalue = NULL;
1700 tmp = ValNodeNew(NULL);
1701 tmp->choice = SEQLOC_INT;
1702 tmp->data.ptrvalue = sip;
1703
1704 sip2 = SeqIntNew();
1705 sip2->from = to + 1;
1706 sip2->to = sip->to;
1707 sip2->strand = sip->strand;
1708 sip2->if_to = sip->if_to;
1709 sip2->id = SeqIdDup(target);
1710 slp = ValNodeNew(NULL);
1711 slp->choice = SEQLOC_INT;
1712 slp->data.ptrvalue = sip2;
1713
1714 sip->if_to = NULL;
1715 sip->to = from - 1;
1716
1717 if (sip->strand == Seq_strand_minus)
1718 {
1719 head->data.ptrvalue = slp;
1720 slp->next = tmp;
1721 }
1722 else
1723 {
1724 head->data.ptrvalue = tmp;
1725 tmp->next = slp;
1726 }
1727
1728 }
1729
1730 }
1731 break;
1732 case SEQLOC_PNT: /* pnt */
1733 spp = (SeqPntPtr)(head->data.ptrvalue);
1734 if (SeqIdForSameBioseq(spp->id, target))
1735 {
1736 if ((spp->point >= from) && (spp->point <= to))
1737 {
1738 head = SeqLocFree(head);
1739 *changed = TRUE;
1740 }
1741 else if (spp->point > to)
1742 {
1743 if (merge)
1744 spp->point -= diff;
1745 }
1746 }
1747 break;
1748 case SEQLOC_PACKED_PNT: /* packed pnt */
1749 pspp = (PackSeqPntPtr)(head->data.ptrvalue);
1750 if (SeqIdForSameBioseq(pspp->id, target))
1751 {
1752 numpnt = PackSeqPntNum(pspp);
1753 pspp2 = PackSeqPntNew();
1754 head->data.ptrvalue = pspp2;
1755 for (i = 0; i < numpnt; i++)
1756 {
1757 tpos = PackSeqPntGet(pspp, i);
1758 if (tpos < from)
1759 PackSeqPntPut(pspp2, tpos);
1760 else
1761 {
1762 if (tpos > to)
1763 {
1764 if (merge)
1765 tpos -= diff;
1766 PackSeqPntPut(pspp2, tpos);
1767 }
1768 else
1769 *changed = TRUE;
1770 }
1771 }
1772 pspp2->id = pspp->id;
1773 pspp->id = NULL;
1774 pspp2->fuzz = pspp->fuzz;
1775 pspp->fuzz = NULL;
1776 pspp2->strand = pspp->strand;
1777 PackSeqPntFree(pspp);
1778 numpnt = PackSeqPntNum(pspp2);
1779 if (! numpnt)
1780 head = SeqLocFree(head);
1781
1782 }
1783 break;
1784 default:
1785 break;
1786 }
1787
1788 return head;
1789 }
1790
1791 NLM_EXTERN SeqLocPtr LIBCALL SeqLocDelete (SeqLocPtr head, SeqIdPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed)
1792
1793 {
1794 return SeqLocDeleteEx (head, target, from, to, merge, changed, NULL, NULL);
1795 }
1796
1797 typedef struct delstruct {
1798 SeqIdPtr sip;
1799 Int4 from, to;
1800 Boolean merge;
1801 } DelStruct, PNTR DelStructPtr;
1802
1803 NLM_EXTERN void DelFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent);
1804
1805 NLM_EXTERN void DelFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1806 {
1807 DelStructPtr dsp;
1808 BioseqPtr bsp;
1809 BioseqSetPtr bssp;
1810 SeqAnnotPtr sap, nextsap;
1811 SeqFeatPtr sfp, nextsfp;
1812 Pointer PNTR prevsap, PNTR prevsfp;
1813
1814 dsp = (DelStructPtr)data;
1815 if (IS_Bioseq(sep))
1816 {
1817 bsp = (BioseqPtr)(sep->data.ptrvalue);
1818 sap = bsp->annot;
1819 prevsap = (Pointer PNTR) &(bsp->annot);
1820 }
1821 else
1822 {
1823 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1824 sap = bssp->annot;
1825 prevsap = (Pointer PNTR) &(bssp->annot);
1826 }
1827
1828 while (sap != NULL)
1829 {
1830 nextsap = sap->next;
1831 if (sap->type == 1) /* feature table */
1832 {
1833 sfp = (SeqFeatPtr) sap->data;
1834 prevsfp = (Pointer PNTR) &(sap->data);
1835 while (sfp != NULL)
1836 {
1837 nextsfp = sfp->next;
1838 if (SeqFeatDelete(sfp, dsp->sip, dsp->from, dsp->to, dsp->merge) == 2)
1839 {
1840 /* location completely gone */
1841 *(prevsfp) = sfp->next;
1842 sfp->next = NULL;
1843 SeqFeatFree(sfp);
1844 } else {
1845 prevsfp = (Pointer PNTR) &(sfp->next);
1846 }
1847 sfp = nextsfp;
1848 }
1849 }
1850
1851 if (sap->data == NULL) /* all features deleted */
1852 {
1853 *(prevsap) = sap->next;
1854 sap->next = NULL;
1855 SeqAnnotFree (sap);
1856 } else {
1857 prevsap = (Pointer PNTR) &(sap->next);
1858 }
1859
1860 sap = nextsap;
1861 }
1862
1863 return;
1864 }
1865
1866 /*****************************************************************************
1867 *
1868 * SeqEntryDelFeat(sep, id, from, to, do_split)
1869 * Deletes or truncates features on Bioseq (id) in the range
1870 * from-to, inclusive
1871 *
1872 * Moves features > to left to account for decrease in length
1873 * if do_split, breaks intervals across the deletion
1874 * else just reduces their size
1875 *
1876 * If sep == NULL, then calls SeqEntryFind(id) to set scope to look
1877 * for features.
1878 *
1879 *****************************************************************************/
1880 NLM_EXTERN Boolean LIBCALL SeqEntryDelFeat (SeqEntryPtr sep, SeqIdPtr sip, Int4 from, Int4 to, Boolean do_split)
1881 {
1882
1883 DelStruct ds;
1884
1885 if (sip == NULL)
1886 return FALSE;
1887
1888 if (sep == NULL)
1889 sep = SeqEntryFind(sip);
1890
1891 if (sep == NULL) return FALSE;
1892
1893 ds.sip = sip;
1894 ds.from = from;
1895 ds.to = to;
1896 if (do_split)
1897 ds.merge = FALSE;
1898 else
1899 ds.merge = TRUE;
1900
1901 SeqEntryExplore(sep, (Pointer)(&ds), DelFeat);
1902
1903 return TRUE;
1904 }
1905
1906 /*****************************************************************************
1907 *
1908 * DescrToFeatures(sep)
1909 * Moves all Seqdescr to features in sep where possible
1910 *
1911 *****************************************************************************/
1912
1913 static DeltaSeqPtr CopyDeltaSeqPtrChain (DeltaSeqPtr dsp)
1914 {
1915 DeltaSeqPtr new_chain = NULL;
1916 SeqLocPtr slp_orig, slp_new;
1917 SeqLitPtr slip_orig, slip_new;
1918
1919 while (dsp != NULL) {
1920 if (dsp->choice == 1) {
1921 slp_orig = (SeqLocPtr) dsp->data.ptrvalue;
1922 slp_new = AsnIoMemCopy (slp_orig, (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
1923 ValNodeAddPointer (&new_chain, 1, slp_new);
1924 }
1925 else if (dsp->choice ==2)
1926 {
1927 slip_orig = (SeqLitPtr) dsp->data.ptrvalue;
1928 slip_new = AsnIoMemCopy(slip_orig, (AsnReadFunc) SeqLitAsnRead, (AsnWriteFunc) SeqLitAsnWrite);
1929 ValNodeAddPointer (&new_chain, 2, slip_new);
1930 }
1931 dsp = dsp->next;
1932 }
1933
1934 return new_chain;
1935 }
1936
1937 /*****************************************************************************
1938 *
1939 * BioseqCopy(newid, sourceid, from, to, strand, do_feat)
1940 * Creates a new Bioseq from sourceid in the range from-to inclusive.
1941 * If strand==Seq_strand_minus, reverse complements the sequence in
1942 * the copy and (if do_feat) corrects the feature table appropriately.
1943 * Names new Bioseq as newid, if not NULL
1944 * else Creates seqid.local = "Clipboard" if newid is NULL
1945 * If do_feat == TRUE copies appropriate region of feature table from
1946 * sourceid to new copy using SeqFeatsCopy().
1947 *
1948 *****************************************************************************/
1949 NLM_EXTERN BioseqPtr LIBCALL BioseqCopyEx (SeqIdPtr newid, BioseqPtr oldbsp, Int4 from, Int4 to,
1950 Uint1 strand, Boolean do_feat)
1951 {
1952 BioseqPtr newbsp=NULL, tmpbsp;
1953 SeqPortPtr spp=NULL;
1954 ByteStorePtr bsp;
1955 Uint1 seqtype;
1956 ValNodePtr tmp;
1957 ObjectIdPtr oid;
1958 Int4 len, i;
1959 Int2 residue;
1960 ValNode fake;
1961 SeqLocPtr the_segs, head, curr;
1962 Boolean handled = FALSE, split;
1963 SeqFeatPtr sfp, newsfp, lastsfp;
1964 DeltaSeqPtr dsp;
1965 SeqEntryPtr oldscope;
1966
1967
1968 if ((oldbsp == NULL) || (from < 0)) return FALSE;
1969
1970 len = to - from + 1;
1971 if (len <= 0) return NULL;
1972
1973 newbsp = BioseqNew();
1974 if (newid != NULL)
1975 newbsp->id = SeqIdDup(newid);
1976 else
1977 {
1978 tmp = ValNodeNew(NULL);
1979 tmp->choice = SEQID_LOCAL;
1980 oid = ObjectIdNew();
1981 tmp->data.ptrvalue = (Pointer)oid;
1982 oid->str = StringSave("Clipboard");
1983 tmpbsp = BioseqFind(tmp); /* old clipboard present? */
1984 if (tmpbsp == NULL) {
1985 oldscope = SeqEntrySetScope (NULL);
1986 if (oldscope != NULL) {
1987 tmpbsp = BioseqFind(tmp);
1988 SeqEntrySetScope (oldscope);
1989 }
1990 }
1991 if (tmpbsp != NULL)
1992 BioseqFree(tmpbsp);
1993 newbsp->id = tmp;
1994 }
1995
1996 newbsp->repr = oldbsp->repr;
1997 newbsp->mol = oldbsp->mol;
1998 newbsp->length = len;
1999 newbsp->seq_ext_type = oldbsp->seq_ext_type;
2000
2001 if (newbsp->repr == Seq_repr_virtual)
2002 handled = TRUE; /* no more to do */
2003
2004 if (((newbsp->repr == Seq_repr_raw) ||
2005 (newbsp->repr == Seq_repr_const)) && newbsp->seq_data_type != Seq_code_gap)
2006 {
2007 if (ISA_aa(newbsp->mol))
2008 {
2009 seqtype = Seq_code_ncbieaa;
2010 }
2011 else
2012 {
2013 seqtype = Seq_code_iupacna;
2014 }
2015 newbsp->seq_data_type = seqtype;
2016 bsp = BSNew(len);
2017 if (bsp == NULL) goto erret;
2018
2019 newbsp->seq_data = (SeqDataPtr) bsp;
2020 spp = SeqPortNew(oldbsp, from, to, strand, seqtype);
2021 if (spp == NULL) goto erret;
2022
2023 for (i = 0; i < len; i++)
2024 {
2025 residue = SeqPortGetResidue(spp);
2026 if (! IS_residue(residue)) goto erret;
2027 BSPutByte(bsp, residue);
2028 }
2029
2030 SeqPortFree(spp);
2031 handled = TRUE;
2032 }
2033
2034 if ((newbsp->repr == Seq_repr_seg) ||
2035 (newbsp->repr == Seq_repr_ref) ||
2036 (newbsp->repr == Seq_repr_delta))
2037 {
2038 if (newbsp->repr == Seq_repr_seg) /* segmented */
2039 {
2040 fake.choice = SEQLOC_MIX; /* make SEQUENCE OF Seq-loc, into one */
2041 fake.data.ptrvalue = oldbsp->seq_ext;
2042 fake.next = NULL;
2043 the_segs = (SeqLocPtr)&fake;
2044 head = SeqLocCopyPart (the_segs, from, to, strand, FALSE, NULL, NULL);
2045 }
2046 else if (newbsp->repr == Seq_repr_ref) /* reference: is a Seq-loc */
2047 {
2048 head = SeqLocCopyPart ((SeqLocPtr)(oldbsp->seq_ext), from, to,
2049 strand, TRUE, NULL, NULL);
2050 }
2051 else if (newbsp->repr == Seq_repr_delta)
2052 {
2053 dsp = (DeltaSeqPtr)(oldbsp->seq_ext); /* real data is here */
2054
2055 head = CopyDeltaSeqPtrChain (dsp);
2056 }
2057
2058 newbsp->seq_ext = (Pointer)head;
2059 handled = TRUE;
2060 }
2061
2062 if (newbsp->repr == Seq_repr_map)
2063 {
2064 lastsfp = NULL;
2065 for (sfp = (SeqFeatPtr)(oldbsp->seq_ext); sfp != NULL; sfp = sfp->next)
2066 {
2067 split = FALSE;
2068 curr = SeqLocCopyRegion(newbsp->id, sfp->location, oldbsp, from, to, strand, &split);
2069 if (curr != NULL) /* got one */
2070 {
2071 newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
2072 SeqLocFree(newsfp->location);
2073 newsfp->location = curr;
2074 if (split)
2075 newsfp->partial = TRUE;
2076 if (lastsfp == NULL) /* first one */
2077 newbsp->seq_ext = (Pointer)newsfp;
2078 else
2079 lastsfp->next = newsfp;
2080 lastsfp = newsfp;
2081 }
2082 }
2083 handled = TRUE;
2084 }
2085
2086
2087 if (! handled) goto erret;
2088
2089 /* get descriptors */
2090 /* get features */
2091
2092 if (do_feat)
2093 SeqFeatsCopy (newbsp, oldbsp, from, to, strand);
2094
2095 return newbsp;
2096
2097 erret:
2098 BioseqFree(newbsp);
2099 SeqPortFree(spp);
2100 return NULL;
2101 }
2102
2103 NLM_EXTERN BioseqPtr LIBCALL BioseqCopy (SeqIdPtr newid, SeqIdPtr sourceid, Int4 from, Int4 to,
2104 Uint1 strand, Boolean do_feat)
2105 {
2106 BioseqPtr oldbsp;
2107 SeqEntryPtr oldscope;
2108
2109 if ((sourceid == NULL) || (from < 0)) return FALSE;
2110
2111 oldbsp = BioseqFind(sourceid);
2112 if (oldbsp == NULL) {
2113 oldscope = SeqEntrySetScope (NULL);
2114 if (oldscope != NULL) {
2115 oldbsp = BioseqFind(sourceid);
2116 SeqEntrySetScope (oldscope);
2117 }
2118 }
2119 if (oldbsp == NULL) return NULL;
2120
2121 return BioseqCopyEx (newid, oldbsp, from, to, strand, do_feat);
2122 }
2123
2124 /*****************************************************************************
2125 *
2126 * SeqLocCopyPart (the_segs, from, to, strand, group, first_segp, last_segp)
2127 * cuts out from the_segs the part from offset from to offset to
2128 * reverse complements resulting seqloc if strand == Seq_strand_minus
2129 * if (group) puts resulting intervals into a new Seq-loc (of type
2130 * PACKED_INT if no SEQLOC_NULL, else SEQLOC_MIX).
2131 * Currently this always makes intervals or nulls. Is really for segmented and
2132 * reference sequence extensions
2133 * If first_segp and last_segp are not NULL, then they are filled in with the
2134 * ordinal number of the source segments that remain in the copy, based
2135 * on SeqLocFindNext, where 1 is the first one. Thus if the third and
2136 * fourth segments were copied, first is 3 and last is 4. If the
2137 * location was reverse complemented, first is 4 and last is 3.
2138 *
2139 *****************************************************************************/
2140 NLM_EXTERN SeqLocPtr LIBCALL SeqLocCopyPart (SeqLocPtr the_segs, Int4 from, Int4 to, Uint1 strand,
2141 Boolean group, Int2Ptr first_segp, Int2Ptr last_segp)
2142 {
2143 SeqLocPtr currseg, newhead, head, prev, curr, last;
2144 Int2 numloc, first_seg = 0, last_seg = 0, seg_ctr = 0;
2145 Int4 oldpos, tlen, tfrom, tto, tstart, tstop, xfrom, xto;
2146 Uint1 tstrand;
2147 SeqIdPtr tid;
2148 SeqIntPtr sip;
2149 Boolean done, started, wasa_null, hada_null;
2150 BioseqPtr bsp;
2151
2152 if (the_segs == NULL) return NULL;
2153 if ((from < 0) || (to < 0)) return NULL;
2154
2155 currseg = NULL;
2156 oldpos = 0; /* position in old sequence */
2157 done = FALSE;
2158 started = FALSE;
2159 head = NULL;
2160 prev = NULL;
2161 numloc = 0;
2162 wasa_null = FALSE;
2163 hada_null = FALSE;
2164 while ((oldpos <= to) && ((currseg = SeqLocFindNext(the_segs, currseg)) != NULL))
2165 {
2166 seg_ctr++;
2167 tlen = SeqLocLen(currseg);
2168 tid = SeqLocId(currseg);
2169 if (tlen < 0) {
2170 bsp = BioseqLockById (tid); /* only necessary for locations of type WHOLE */
2171 tlen = SeqLocLen (currseg);
2172 BioseqUnlock (bsp);
2173 }
2174 tstrand = SeqLocStrand(currseg);
2175 tfrom = SeqLocStart(currseg);
2176 tto = SeqLocStop(currseg);
2177
2178 if (! started)
2179 {
2180 wasa_null = FALSE;
2181 if (((oldpos + tlen - 1) >= from) &&
2182 (currseg->choice != SEQLOC_NULL))
2183 {
2184 tstart = from - oldpos;
2185 started = TRUE;
2186 first_seg = seg_ctr;
2187 }
2188 else
2189 tstart = -1;
2190 }
2191 else
2192 {
2193 if (currseg->choice == SEQLOC_NULL)
2194 {
2195 wasa_null = TRUE;
2196 tstart = -1; /* skip it till later */
2197 }
2198 else
2199 tstart = 0;
2200 }
2201
2202 if (tstart >= 0) /* have a start */
2203 {
2204 if ((oldpos + tlen - 1) >= to)
2205 {
2206 done = TRUE; /* hit the end */
2207 tstop = ((oldpos + tlen - 1) - to);
2208 }
2209 else
2210 tstop = 0;
2211
2212 if (tstrand == Seq_strand_minus)
2213 {
2214 xfrom = tfrom + tstop;
2215 xto = tto - tstart;
2216 }
2217 else
2218 {
2219 xfrom = tfrom + tstart;
2220 xto = tto - tstop;
2221 }
2222
2223 sip = SeqIntNew();
2224 sip->id = SeqIdDup(tid);
2225 sip->strand = tstrand;
2226 sip->from = xfrom;
2227 sip->to = xto;
2228 if (wasa_null) /* previous SEQLOC_NULL */
2229 {
2230 curr = ValNodeAddInt(&head, SEQLOC_NULL, 0);
2231 numloc++;
2232 wasa_null = FALSE;
2233 hada_null = TRUE;
2234 }
2235 curr = ValNodeAddPointer(&head, SEQLOC_INT, (Pointer)sip);
2236 numloc++;
2237 last_seg = seg_ctr;
2238 }
2239
2240 oldpos += tlen;
2241 }
2242
2243 if (strand == Seq_strand_minus) /* reverse order and complement */
2244 {
2245 newhead = NULL;
2246 last = NULL;
2247 while (head != NULL)
2248 {
2249 prev = NULL;
2250 for (curr = head; curr->next != NULL; curr = curr->next)
2251 prev = curr;
2252 if (prev != NULL)
2253 prev->next = NULL;
2254 else
2255 head = NULL;
2256
2257 if (newhead == NULL)
2258 newhead = curr;
2259 else
2260 last->next = curr;
2261 last = curr;
2262 if (curr->choice == SEQLOC_INT)
2263 {
2264 sip = (SeqIntPtr)(curr->data.ptrvalue);
2265 sip->strand = StrandCmp(sip->strand);
2266 }
2267 }
2268
2269 head = newhead;
2270 seg_ctr = last_seg;
2271 last_seg = first_seg;
2272 first_seg = seg_ctr;
2273 }
2274
2275 if ((numloc) && (group))
2276 {
2277 curr = ValNodeNew(NULL);
2278 if (hada_null)
2279 curr->choice = SEQLOC_MIX;
2280 else
2281 curr->choice = SEQLOC_PACKED_INT;
2282 curr->data.ptrvalue = (Pointer)head;
2283 head = curr;
2284 }
2285
2286 if (first_segp != NULL)
2287 *first_segp = first_seg;
2288 if (last_segp != NULL)
2289 *last_segp = last_seg;
2290
2291 return head;
2292 }
2293
2294 /*****************************************************************************
2295 *
2296 * SeqFeatCopy(new, old, from, to, strand)
2297 *
2298 *****************************************************************************/
2299 static Int2 LIBCALL IndexedSeqFeatsCopy (BioseqPtr newbsp, BioseqPtr oldbsp, Int4 from, Int4 to, Uint1 strand)
2300
2301 {
2302 Int2 ctr=0;
2303 SeqFeatPtr sfp, last=NULL, newsfp;
2304 SeqInt si;
2305 ValNode vn;
2306 ValNodePtr region;
2307 SeqLocPtr newloc;
2308 Boolean split = FALSE;
2309 SeqAnnotPtr sap = NULL, saptmp;
2310 CdRegionPtr crp;
2311 CodeBreakPtr cbp, prevcbp, nextcbp;
2312 RnaRefPtr rrp;
2313 tRNAPtr trp;
2314 SeqMgrFeatContext fcontext;
2315
2316 region = &vn;
2317 vn.choice = SEQLOC_INT;
2318 vn.data.ptrvalue = (Pointer)(&si);
2319 si.from = from;
2320 si.to = to;
2321 si.id = oldbsp->id;
2322 si.if_from = NULL;
2323 si.if_to = NULL;
2324
2325 sfp = NULL;
2326 while ((sfp = SeqMgrGetNextFeature (oldbsp, sfp, 0, 0, &fcontext)) != NULL)
2327 {
2328 /* can exit once past rightmost limit */
2329 if (fcontext.left > to) return ctr;
2330
2331 if (fcontext.right >= from && fcontext.left <= to) {
2332
2333 split = FALSE;
2334 newloc = SeqLocCopyRegion(newbsp->id, sfp->location, oldbsp, from, to, strand, &split);
2335 if (newloc != NULL) /* got one */
2336 {
2337 newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
2338 SeqLocFree(newsfp->location);
2339 newsfp->location = newloc;
2340 if (split)
2341 newsfp->partial = TRUE;
2342 if (last == NULL) /* first one */
2343 {
2344 sap = SeqAnnotNew();
2345 if (newbsp->annot == NULL)
2346 newbsp->annot = sap;
2347 else
2348 {
2349 for (saptmp = newbsp->annot; saptmp->next != NULL; saptmp = saptmp->next)
2350 continue;
2351 saptmp->next = sap;
2352 }
2353 sap->type = 1; /* feature table */
2354 sap->data = (Pointer)newsfp;
2355 }
2356 else
2357 last->next = newsfp;
2358 last = newsfp;
2359
2360 switch (newsfp->data.choice)
2361 {
2362 case SEQFEAT_CDREGION: /* cdregion */
2363 crp = (CdRegionPtr)(newsfp->data.value.ptrvalue);
2364 prevcbp = NULL;
2365 for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
2366 {
2367 nextcbp = cbp->next;
2368 cbp->loc = SeqLocCopyRegion(newbsp->id, cbp->loc, oldbsp, from, to, strand, &split);
2369 if (cbp->loc == NULL)
2370 {
2371 if (prevcbp != NULL)
2372 prevcbp->next = nextcbp;
2373 else
2374 crp->code_break = nextcbp;
2375 cbp->next = NULL;
2376 CodeBreakFree(cbp);
2377 }
2378 else
2379 prevcbp = cbp;
2380 }
2381 break;
2382 case SEQFEAT_RNA:
2383 rrp = (RnaRefPtr)(newsfp->data.value.ptrvalue);
2384 if (rrp->ext.choice == 2) /* tRNA */
2385 {
2386 trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
2387 if (trp->anticodon != NULL)
2388 {
2389 trp->anticodon = SeqLocCopyRegion(newbsp->id, trp->anticodon, oldbsp, from, to, strand, &split);
2390 }
2391 }
2392 break;
2393 default:
2394 break;
2395 }
2396 }
2397 }
2398
2399 }
2400 return ctr;
2401 }
2402
2403 NLM_EXTERN Int2 LIBCALL SeqFeatsCopy (BioseqPtr newbsp, BioseqPtr oldbsp, Int4 from, Int4 to, Uint1 strand)
2404 {
2405 Int2 ctr=0;
2406 BioseqContextPtr bcp = NULL;
2407 SeqFeatPtr sfp, last=NULL, newsfp;
2408 SeqInt si;
2409 ValNode vn;
2410 ValNodePtr region;
2411 SeqLocPtr newloc;
2412 Boolean split = FALSE;
2413 SeqAnnotPtr sap = NULL, saptmp;
2414 CdRegionPtr crp;
2415 CodeBreakPtr cbp, prevcbp, nextcbp;
2416 RnaRefPtr rrp;
2417 tRNAPtr trp;
2418 Uint2 entityID;
2419
2420 if (oldbsp == NULL) return ctr;
2421
2422 entityID = ObjMgrGetEntityIDForPointer (oldbsp);
2423 if (entityID > 0 && SeqMgrFeaturesAreIndexed (entityID)) {
2424 /* indexed version should be much faster */
2425 return IndexedSeqFeatsCopy (newbsp, oldbsp, from, to, strand);
2426 }
2427
2428 bcp = BioseqContextNew(oldbsp);
2429 if (bcp == NULL) return ctr;
2430
2431 region = &vn;
2432 vn.choice = SEQLOC_INT;
2433 vn.data.ptrvalue = (Pointer)(&si);
2434 si.from = from;
2435 si.to = to;
2436 si.id = oldbsp->id;
2437 si.if_from = NULL;
2438 si.if_to = NULL;
2439
2440 sfp = NULL;
2441 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL)
2442 {
2443 split = FALSE;
2444 newloc = SeqLocCopyRegion(newbsp->id, sfp->location, oldbsp, from, to, strand, &split);
2445 if (newloc != NULL) /* got one */
2446 {
2447 newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
2448 SeqLocFree(newsfp->location);
2449 newsfp->location = newloc;
2450 if (split)
2451 newsfp->partial = TRUE;
2452 if (last == NULL) /* first one */
2453 {
2454 sap = SeqAnnotNew();
2455 if (newbsp->annot == NULL)
2456 newbsp->annot = sap;
2457 else
2458 {
2459 for (saptmp = newbsp->annot; saptmp->next != NULL; saptmp = saptmp->next)
2460 continue;
2461 saptmp->next = sap;
2462 }
2463 sap->type = 1; /* feature table */
2464 sap->data = (Pointer)newsfp;
2465 }
2466 else
2467 last->next = newsfp;
2468 last = newsfp;
2469
2470 switch (newsfp->data.choice)
2471 {
2472 case SEQFEAT_CDREGION: /* cdregion */
2473 crp = (CdRegionPtr)(newsfp->data.value.ptrvalue);
2474 prevcbp = NULL;
2475 for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
2476 {
2477 nextcbp = cbp->next;
2478 cbp->loc = SeqLocCopyRegion(newbsp->id, cbp->loc, oldbsp, from, to, strand, &split);
2479 if (cbp->loc == NULL)
2480 {
2481 if (prevcbp != NULL)
2482 prevcbp->next = nextcbp;
2483 else
2484 crp->code_break = nextcbp;
2485 cbp->next = NULL;
2486 CodeBreakFree(cbp);
2487 }
2488 else
2489 prevcbp = cbp;
2490 }
2491 break;
2492 case SEQFEAT_RNA:
2493 rrp = (RnaRefPtr)(newsfp->data.value.ptrvalue);
2494 if (rrp->ext.choice == 2) /* tRNA */
2495 {
2496 trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
2497 if (trp->anticodon != NULL)
2498 {
2499 trp->anticodon = SeqLocCopyRegion(newbsp->id, trp->anticodon, oldbsp, from, to, strand, &split);
2500 }
2501 }
2502 break;
2503 default:
2504 break;
2505 }
2506 }
2507
2508 }
2509 BioseqContextFree (bcp);
2510 return ctr;
2511 }
2512
2513
2514 NLM_EXTERN SeqLocPtr LIBCALL SeqLocCopyRegion(SeqIdPtr newid, SeqLocPtr head, BioseqPtr oldbsp,
2515 Int4 from, Int4 to, Uint1 strand, BoolPtr split)
2516 {
2517 SeqLocPtr newhead = NULL, tmp, slp, prev, next, thead;
2518 SeqIntPtr sip, sip2;
2519 SeqPntPtr spp, spp2;
2520 PackSeqPntPtr pspp, pspp2;
2521 SeqBondPtr sbp, sbp2;
2522 SeqIdPtr sidp, oldids;
2523 Int4 numpnt, i, tpos, len, intcnt, othercnt;
2524 Boolean dropped_one;
2525 IntFuzzPtr ifp;
2526 ValNode vn;
2527
2528 if ((head == NULL) || (oldbsp == NULL)) return NULL;
2529
2530 oldids = oldbsp->id;
2531 len = to - from + 1;
2532 switch (head->choice)
2533 {
2534 case SEQLOC_BOND: /* bond -- 2 seqs */
2535 sbp2 = NULL;
2536 sbp = (SeqBondPtr)(head->data.ptrvalue);
2537 vn.choice = SEQLOC_PNT;
2538 vn.data.ptrvalue = sbp->a;
2539 vn.next = NULL;
2540 tmp = SeqLocCopyRegion(newid, (SeqLocPtr)(&vn), oldbsp, from, to, strand, split);
2541 if (tmp != NULL)
2542 {
2543 sbp2 = SeqBondNew();
2544 sbp2->a = (SeqPntPtr)(tmp->data.ptrvalue);
2545 MemFree(tmp);
2546 }
2547 if (sbp->b != NULL)
2548 {
2549 vn.data.ptrvalue = sbp->b;
2550 tmp = SeqLocCopyRegion(newid, (SeqLocPtr)(&vn), oldbsp, from, to, strand, split);
2551 if (tmp != NULL)
2552 {
2553 if (sbp2 == NULL)
2554 {
2555 sbp2 = SeqBondNew();
2556 sbp2->a = (SeqPntPtr)(tmp->data.ptrvalue);
2557 }
2558 else
2559 sbp2->b = (SeqPntPtr)(tmp->data.ptrvalue);
2560 MemFree(tmp);
2561 }
2562 }
2563 if (sbp2 != NULL)
2564 {
2565 newhead = ValNodeNew(NULL);
2566 newhead->choice = SEQLOC_BOND;
2567 newhead->data.ptrvalue = sbp2;
2568 if ((sbp->b != NULL) && (sbp2->b == NULL))
2569 *split = TRUE;
2570 }
2571 break;
2572 case SEQLOC_FEAT: /* feat -- can't track yet */
2573 case SEQLOC_NULL: /* NULL */
2574 case SEQLOC_EMPTY: /* empty */
2575 break;
2576 case SEQLOC_WHOLE: /* whole */
2577 sidp = (SeqIdPtr)(head->data.ptrvalue);
2578 if (SeqIdIn(sidp, oldids))
2579 {
2580 if ((from != 0) || (to != (oldbsp->length - 1)))
2581 {
2582 *split = TRUE;
2583 }
2584 newhead = ValNodeNew(NULL);
2585 sip2 = SeqIntNew();
2586 sip2->id = SeqIdDup(newid);
2587 sip2->from = 0;
2588 sip2->to = to - from;
2589 newhead->choice = SEQLOC_INT;
2590 newhead->data.ptrvalue = (Pointer)sip2;
2591 if (strand == Seq_strand_minus)
2592 {
2593 sip2->strand = Seq_strand_minus;
2594 }
2595 else if (sip2->strand == Seq_strand_minus)
2596 {
2597 sip2->strand = strand;
2598 }
2599 }
2600 break;
2601 case SEQLOC_EQUIV: /* does it stay equiv? */
2602 case SEQLOC_MIX: /* mix -- more than one seq */
2603 case SEQLOC_PACKED_INT: /* packed int */
2604 prev = NULL;
2605 thead = NULL;
2606 dropped_one = FALSE;
2607 for (slp = (SeqLocPtr)(head->data.ptrvalue); slp != NULL; slp = next)
2608 {
2609 next = slp->next;
2610 tmp = SeqLocCopyRegion(newid, slp, oldbsp, from, to, strand, split);
2611 if (tmp != NULL)
2612 {
2613 if (prev != NULL)
2614 {
2615 if ((prev->choice == SEQLOC_INT) && (tmp->choice == SEQLOC_INT))
2616 {
2617 sip = (SeqIntPtr)(prev->data.ptrvalue);
2618 sip2 = (SeqIntPtr)(tmp->data.ptrvalue);
2619
2620 if ((sip->strand == Seq_strand_minus) &&
2621 (sip2->strand == Seq_strand_minus))
2622 {
2623 if (sip->from == (sip2->to + 1))
2624 {
2625 sip->from = sip2->from;
2626 sip->if_from = sip2->if_from;
2627 sip2->if_from = NULL;
2628 tmp = SeqLocFree(tmp);
2629 }
2630 }
2631 else if((sip->strand != Seq_strand_minus) &&
2632 (sip2->strand != Seq_strand_minus))
2633 {
2634 if (sip->to == (sip2->from - 1))
2635 {
2636 sip->to = sip2->to;
2637 sip->if_to = sip2->if_to;
2638 sip2->if_to = NULL;
2639 tmp = SeqLocFree(tmp);
2640 }
2641 }
2642 }
2643 else if ((prev->choice == SEQLOC_NULL) && (tmp->choice == SEQLOC_NULL))
2644 {
2645 tmp = SeqLocFree(tmp);
2646 dropped_one = TRUE;
2647 }
2648 }
2649 else if (tmp->choice == SEQLOC_NULL)
2650 {
2651 tmp = SeqLocFree(tmp);
2652 dropped_one = TRUE;
2653 }
2654
2655 if (tmp != NULL) /* still have one? */
2656 {
2657 if (prev != NULL)
2658 prev->next = tmp;
2659 else
2660 thead = tmp;
2661 prev = tmp;
2662 }
2663 else
2664 dropped_one = TRUE;
2665 }
2666 else
2667 dropped_one = TRUE;
2668 }
2669 if (prev != NULL)
2670 {
2671 if (prev->choice == SEQLOC_NULL) /* ends with NULL */
2672 {
2673 prev = NULL;
2674 for (slp = thead; slp->next != NULL; slp = slp->next)
2675 prev = slp;
2676 if (prev != NULL)
2677 {
2678 prev->next = NULL;
2679 SeqLocFree(slp);
2680 }
2681 else
2682 {
2683 thead = SeqLocFree(thead);
2684 }
2685 dropped_one = TRUE;
2686 }
2687 }
2688 if (thead != NULL)
2689 {
2690 if (dropped_one)
2691 *split = TRUE;
2692 intcnt = 0;
2693 othercnt = 0;
2694 for (slp = thead; slp != NULL; slp = slp->next)
2695 {
2696 if (slp->choice == SEQLOC_INT)
2697 intcnt++;
2698 else
2699 othercnt++;
2700 }
2701 if ((intcnt + othercnt) > 1)
2702 {
2703 newhead = ValNodeNew(NULL);
2704 if (head->choice == SEQLOC_EQUIV)
2705 newhead->choice = SEQLOC_EQUIV;
2706 else
2707 {
2708 if (othercnt == 0)
2709 newhead->choice = SEQLOC_PACKED_INT;
2710 else
2711 newhead->choice = SEQLOC_MIX;
2712 }
2713
2714 newhead->data.ptrvalue = (Pointer)thead;
2715 }
2716 else /* only one SeqLoc left */
2717 newhead = thead;
2718
2719 }
2720 break;
2721 case SEQLOC_INT: /* int */
2722 sip = (SeqIntPtr)(head->data.ptrvalue);
2723 if (SeqIdIn(sip->id, oldids))
2724 {
2725 if (sip->to < from) /* completely before cut */
2726 break;
2727 if (sip->from > to) /* completely after cut */
2728 break;
2729
2730 sip2 = SeqIntNew();
2731 sip2->id = SeqIdDup(newid);
2732 sip2->strand = sip->strand;
2733
2734 if (sip->to > to)
2735 {
2736 sip2->to = to;
2737 *split = TRUE;
2738 ifp = IntFuzzNew();
2739 ifp->choice = 4; /* lim */
2740 ifp->a = 1; /* greater than */
2741 sip2->if_to = ifp;
2742 }
2743 else
2744 {
2745 sip2->to = sip->to;
2746 if (sip->if_to != NULL)
2747 {
2748 ifp = IntFuzzNew();
2749 MemCopy((Pointer)ifp, (Pointer)(sip->if_to), sizeof(IntFuzz));
2750 sip2->if_to = ifp;
2751 }
2752 }
2753
2754 if (sip->from < from)
2755 {
2756 sip2->from = from;
2757 *split = TRUE;
2758 ifp = IntFuzzNew();
2759 ifp->choice = 4; /* lim */
2760 ifp->a = 2; /* less than */
2761 sip2->if_from = ifp;
2762 }
2763 else
2764 {
2765 sip2->from = sip->from;
2766 if (sip->if_from != NULL)
2767 {
2768 ifp = IntFuzzNew();
2769 MemCopy((Pointer)ifp, (Pointer)(sip->if_from), sizeof(IntFuzz));
2770 sip2->if_from = ifp;
2771 }
2772 }
2773 /* set to region coordinates */
2774 sip2->from -= from;
2775 sip2->to -= from;
2776 IntFuzzClip(sip2->if_from, from, to, strand, split);
2777 IntFuzzClip(sip2->if_to, from, to, strand, split);
2778
2779 if (strand == Seq_strand_minus) /* rev comp */
2780 {
2781 sip2->strand = StrandCmp(sip2->strand);
2782 tpos = len - sip2->from - 1;
2783 sip2->from = len - sip2->to - 1;
2784 sip2->to = tpos;
2785 /* IntFuzz already complemented by IntFuzzClip */
2786 /* just switch order */
2787 ifp = sip2->if_from;
2788 sip2->if_from = sip2->if_to;
2789 sip2->if_to = ifp;
2790 }
2791
2792 newhead = ValNodeNew(NULL);
2793 newhead->choice = SEQLOC_INT;
2794 newhead->data.ptrvalue = (Pointer)sip2;
2795 }
2796 break;
2797 case SEQLOC_PNT: /* pnt */
2798 spp = (SeqPntPtr)(head->data.ptrvalue);
2799 if (SeqIdIn(spp->id, oldids))
2800 {
2801 if ((spp->point >= from) && (spp->point <= to))
2802 {
2803 spp2 = SeqPntNew();
2804 spp2->id = SeqIdDup(newid);
2805 spp2->point = spp->point - from;
2806 spp2->strand = spp->strand;
2807 if (spp->fuzz != NULL)
2808 {
2809 ifp = IntFuzzNew();
2810 spp2->fuzz = ifp;
2811 MemCopy((Pointer)ifp, (Pointer)spp->fuzz, sizeof(IntFuzz));
2812 IntFuzzClip(ifp, from, to, strand, split);
2813 }
2814 if (strand == Seq_strand_minus)
2815 {
2816 spp2->point = len - spp2->point - 1;
2817 spp2->strand = StrandCmp(spp->strand);
2818 }
2819 else if (spp2->strand == Seq_strand_minus)
2820 {
2821 spp2->strand = strand;
2822 }
2823 newhead = ValNodeNew(NULL);
2824 newhead->choice = SEQLOC_PNT;
2825 newhead->data.ptrvalue = (Pointer)spp2;
2826 }
2827 }
2828 break;
2829 case SEQLOC_PACKED_PNT: /* packed pnt */
2830 pspp = (PackSeqPntPtr)(head->data.ptrvalue);
2831 if (SeqIdIn(pspp->id, oldids))
2832 {
2833 numpnt = PackSeqPntNum(pspp);
2834 pspp2 = PackSeqPntNew();
2835 pspp2->strand = pspp->strand;
2836 intcnt = 0; /* use for included points */
2837 othercnt = 0; /* use for exclued points */
2838 for (i = 0; i < numpnt; i++)
2839 {
2840 tpos = PackSeqPntGet(pspp, i);
2841 if ((tpos < from) || (tpos > to))
2842 {
2843 othercnt++;
2844 }
2845 else
2846 {
2847 intcnt++;
2848 PackSeqPntPut(pspp2, tpos - from);
2849 }
2850 }
2851 if (! intcnt) /* no points in region */
2852 {
2853 PackSeqPntFree(pspp2);
2854 break;
2855 }
2856 if (othercnt)
2857 *split = TRUE;
2858 if (pspp->fuzz != NULL)
2859 {
2860 ifp = IntFuzzNew();
2861 MemCopy((Pointer)ifp, (Pointer)(pspp->fuzz), sizeof(IntFuzz));
2862 }
2863 else
2864 ifp = NULL;
2865
2866 if (strand == Seq_strand_minus) /* rev comp */
2867 {
2868 IntFuzzClip(ifp, from, to, strand, split);
2869 pspp = pspp2;
2870 pspp2 = PackSeqPntNew();
2871 pspp2->strand = StrandCmp(pspp->strand);
2872 numpnt = PackSeqPntNum(pspp);
2873 numpnt--;
2874 for (i = numpnt; i >= 0; i--) /* reverse order */
2875 {
2876 tpos = PackSeqPntGet(pspp, i);
2877 PackSeqPntPut(pspp2, (len - tpos - 1));
2878 }
2879 PackSeqPntFree(pspp);
2880 }
2881 else if (pspp2->strand == Seq_strand_minus)
2882 {
2883 pspp2->strand = strand;
2884 }
2885 pspp2->id = SeqIdDup(newid);
2886 pspp2->fuzz = ifp;
2887
2888 newhead = ValNodeNew(NULL);
2889 newhead->choice = SEQLOC_PACKED_PNT;
2890 newhead->data.ptrvalue = (Pointer)pspp2;
2891
2892 }
2893 break;
2894 default:
2895 break;
2896
2897 }
2898 return newhead;
2899 }
2900
2901 /*****************************************************************************
2902 *
2903 * IntFuzzClip()
2904 * returns TRUE if clipped range values
2905 * in all cases, adjusts and/or complements IntFuzz
2906 * Designed for IntFuzz on SeqLocs
2907 *
2908 *****************************************************************************/
2909 NLM_EXTERN void LIBCALL IntFuzzClip(IntFuzzPtr ifp, Int4 from, Int4 to, Uint1 strand, BoolPtr split)
2910 {
2911 Int4 len, tmp;
2912
2913 if (ifp == NULL) return;
2914 len = to - from + 1;
2915 switch (ifp->choice)
2916 {
2917 case 1: /* plus/minus - no changes */
2918 case 3: /* percent - no changes */
2919 break;
2920 case 2: /* range */
2921 if (ifp->a > to) /* max */
2922 {
2923 *split = TRUE;
2924 ifp->a = to;
2925 }
2926 if (ifp->a < from)
2927 {
2928 *split = TRUE;
2929 ifp->a = from;
2930 }
2931 if (ifp->b > to) /* min */
2932 {
2933 *split = TRUE;
2934 ifp->b = to;
2935 }
2936 if (ifp->b < from)
2937 {
2938 *split = TRUE;
2939 ifp->b = from;
2940 }
2941 ifp->a -= from; /* adjust to window */
2942 ifp->b -= to;
2943 if (strand == Seq_strand_minus)
2944 {
2945 tmp = len - ifp->a; /* reverse/complement */
2946 ifp->a = len - ifp->b;
2947 ifp->b = tmp;
2948 }
2949 break;
2950 case 4: /* lim */
2951 if (strand == Seq_strand_minus) /* reverse/complement */
2952 {
2953 switch (ifp->a)
2954 {
2955 case 1: /* greater than */
2956 ifp->a = 2;
2957 break;
2958 case 2: /* less than */
2959 ifp->a = 1;
2960 break;
2961 case 3: /* to right of residue */
2962 ifp->a = 4;
2963 break;
2964 case 4: /* to left of residue */
2965 ifp->a = 3;
2966 break;
2967 default:
2968 break;
2969 }
2970 }
2971 break;
2972 }
2973 return;
2974 }
2975
2976 extern void
2977 AdjustFeaturesForInsertion
2978 (BioseqPtr tobsp,
2979 SeqIdPtr to_id,
2980 Int4 pos,
2981 Int4 len,
2982 Boolean do_split)
2983 {
2984 Uint2 entityID;
2985 SeqFeatPtr sfp;
2986 CdRegionPtr crp;
2987 CodeBreakPtr cbp, prevcbp, nextcbp;
2988 RnaRefPtr rrp;
2989 tRNAPtr trp;
2990 SeqMgrFeatContext fcontext;
2991 ValNodePtr prods, vnp;
2992 BioseqContextPtr bcp;
2993 Boolean partial5, partial3, changed;
2994
2995 if (tobsp == NULL || to_id == NULL)
2996 {
2997 return;
2998 }
2999
3000 entityID = ObjMgrGetEntityIDForPointer (tobsp);
3001 if (entityID > 0 && SeqMgrFeaturesAreIndexed (entityID)) {
3002 sfp = NULL;
3003 while ((sfp = SeqMgrGetNextFeature (tobsp, sfp, 0, 0, &fcontext)) != NULL)
3004 {
3005 if (len > 0) {
3006 sfp->location = SeqLocInsert (sfp->location, to_id,pos, len, do_split, NULL);
3007 } else {
3008 changed = FALSE;
3009 partial5 = FALSE;
3010 partial3 = FALSE;
3011 sfp->location = SeqEdSeqLocDelete (sfp->location, tobsp, pos, pos - len - 1, FALSE, &changed, &partial5, &partial3);
3012 if (changed) {
3013 if (sfp->location == NULL) {
3014 sfp->idx.deleteme = TRUE;
3015 }
3016 SetSeqLocPartial (sfp->location, partial5, partial3);
3017 sfp->partial |= partial5 || partial3;
3018 }
3019 }
3020 switch (sfp->data.choice)
3021 {
3022 case SEQFEAT_CDREGION: /* cdregion */
3023 crp = (CdRegionPtr)(sfp->data.value.ptrvalue);
3024 prevcbp = NULL;
3025 for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
3026 {
3027 nextcbp = cbp->next;
3028 if (len > 0) {
3029 cbp->loc = SeqLocInsert (cbp->loc, to_id,pos, len, do_split, NULL);
3030 } else {
3031 changed = FALSE;
3032 partial5 = FALSE;
3033 partial3 = FALSE;
3034 cbp->loc = SeqEdSeqLocDelete (cbp->loc, tobsp, pos, pos - len + 1, FALSE, &changed, &partial5, &partial3);
3035 if (changed) {
3036 SetSeqLocPartial (cbp->loc, partial5, partial3);
3037 }
3038 }
3039 if (cbp->loc == NULL)
3040 {
3041 if (prevcbp != NULL)
3042 prevcbp->next = nextcbp;
3043 else
3044 crp->code_break = nextcbp;
3045 cbp->next = NULL;
3046 CodeBreakFree (cbp);
3047 }
3048 else
3049 prevcbp = cbp;
3050 }
3051 break;
3052 case SEQFEAT_RNA:
3053 rrp = (RnaRefPtr)(sfp->data.value.ptrvalue);
3054 if (rrp->ext.choice == 2) /* tRNA */
3055 {
3056 trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
3057 if (trp->anticodon != NULL)
3058 {
3059 if (len > 0) {
3060 trp->anticodon = SeqLocInsert (trp->anticodon, to_id,pos, len, do_split, NULL);
3061 } else {
3062 changed = FALSE;
3063 partial5 = FALSE;
3064 partial3 = FALSE;
3065 trp->anticodon = SeqEdSeqLocDelete (trp->anticodon, tobsp, pos, pos - len + 1, FALSE, &changed, &partial5, &partial3);
3066 if (changed) {
3067 SetSeqLocPartial (trp->anticodon, partial5, partial3);
3068 }
3069 }
3070 }
3071 }
3072 break;
3073 default:
3074 break;
3075 }
3076 }
3077
3078 /* adjust features pointing by product */
3079 prods = SeqMgrGetSfpProductList (tobsp);
3080 for (vnp = prods; vnp != NULL; vnp = vnp->next) {
3081 sfp = (SeqFeatPtr) vnp->data.ptrvalue;
3082 if (sfp == NULL) continue;
3083 sfp->product = SeqLocInsert (sfp->product, to_id,pos, len, do_split, NULL);
3084 }
3085
3086 } else {
3087 bcp = BioseqContextNew(tobsp);
3088 sfp = NULL;
3089 /* adjust features pointing by location */
3090 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL)
3091 {
3092 sfp->location = SeqLocInsert(sfp->location, to_id,pos, len, do_split, NULL);
3093 switch (sfp->data.choice)
3094 {
3095 case SEQFEAT_CDREGION: /* cdregion */
3096 crp = (CdRegionPtr)(sfp->data.value.ptrvalue);
3097 prevcbp = NULL;
3098 for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
3099 {
3100 nextcbp = cbp->next;
3101 cbp->loc = SeqLocInsert(cbp->loc, to_id,pos, len, do_split, NULL);
3102 if (cbp->loc == NULL)
3103 {
3104 if (prevcbp != NULL)
3105 prevcbp->next = nextcbp;
3106 else
3107 crp->code_break = nextcbp;
3108 cbp->next = NULL;
3109 CodeBreakFree(cbp);
3110 }
3111 else
3112 prevcbp = cbp;
3113 }
3114 break;
3115 case SEQFEAT_RNA:
3116 rrp = (RnaRefPtr)(sfp->data.value.ptrvalue);
3117 if (rrp->ext.choice == 2) /* tRNA */
3118 {
3119 trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
3120 if (trp->anticodon != NULL)
3121 {
3122 trp->anticodon = SeqLocInsert(trp->anticodon, to_id,pos, len, do_split, NULL);
3123 }
3124 }
3125 break;
3126 default:
3127 break;
3128 }
3129 }
3130
3131 sfp = NULL;
3132 /* adjust features pointing by product */
3133 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 1)) != NULL)
3134 sfp->product = SeqLocInsert(sfp->product, to_id,pos, len, do_split, NULL);
3135 BioseqContextFree(bcp);
3136 }
3137 }
3138
3139 /*****************************************************************************
3140 *
3141 * BioseqInsert (from_id, from, to, strand, to_id, pos, from_feat, to_feat,
3142 * do_split)
3143 * Inserts a copy the region "from"-"to" on "strand" of the Bioseq
3144 * identified by "from_id" into the Bioseq identified by "to_id"
3145 * before "pos".
3146 * if from_feat = TRUE, copies the feature table from "from" and updates
3147 * to locations to point to the proper residues in "to_id"
3148 * If to_feat = TRUE, updates feature table on "to_id" as well.
3149 * if do_split == TRUE, then splits features in "to_id" (to_feat must
3150 * be TRUE as well). Otherwise expands features at insertion.
3151 *
3152 * All operations are copies. "frombsp" is unchanged.
3153 * Insert will only occur between certain Bioseq.repr classes as below
3154 *
3155 * From Bioseq.repr To Bioseq.repr
3156 *
3157 * virtual raw segmented map
3158 * +---------------------------------------------------
3159 * virtual | length inst SeqLoc length
3160 * +---------------------------------------------------
3161 * raw | error copy SeqLoc error
3162 * +---------------------------------------------------
3163 * segmented | error inst SeqLoc* error
3164 * +---------------------------------------------------
3165 * map | error inst* SeqLoc copy
3166 * +---------------------------------------------------
3167 *
3168 * length = changes length of "to" by length of "from"
3169 * error = insertion not allowed
3170 * inst = "from" instantiated as residues ("N" or "X" for virtual "from")
3171 * inst* = as above, but a restriction map can instantiate other bases
3172 * than "N" for known restriction recognition sites.
3173 * copy = copy of "from" inserted into "to"
3174 * SeqLoc = a SeqLoc added to "to" which points to "from". No copy of residues.
3175 * SeqLoc* = as above, but note that "to" points to "from" directly, not
3176 * what "from" itself may point to.
3177 *
3178 *****************************************************************************/
3179 NLM_EXTERN Boolean LIBCALL BioseqInsert (SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos,
3180 Boolean from_feat, Boolean to_feat, Boolean do_split)
3181 {
3182 BioseqPtr tobsp, frombsp;
3183 Int4 len, i, ctr, tlen;
3184 Boolean from_type, to_type;
3185 Uint1 seqtype;
3186 SeqAnnotPtr sap, newsap;
3187 SeqFeatPtr sfp, newsfp, prevsfp, sfphead = NULL;
3188 BioseqContextPtr bcp;
3189 Boolean handled = FALSE;
3190 SeqPortPtr spp;
3191 Int2 residue;
3192 Boolean split, added = FALSE, do_bsadd = TRUE;
3193 SeqLocPtr newloc, curr, head, tloc, xloc, yloc, fake;
3194 SeqIntPtr sip;
3195 CdRegionPtr crp;
3196 CodeBreakPtr cbp, prevcbp, nextcbp;
3197 RnaRefPtr rrp;
3198 tRNAPtr trp;
3199 SeqEntryPtr oldscope;
3200
3201 if ((from_id == NULL) || (to_id == NULL)) return FALSE;
3202
3203 tobsp = BioseqFind(to_id);
3204 if (tobsp == NULL) {
3205 oldscope = SeqEntrySetScope (NULL);
3206 if (oldscope != NULL) {
3207 tobsp = BioseqFind(to_id);
3208 SeqEntrySetScope (oldscope);
3209 }
3210 }
3211 if (tobsp == NULL) return FALSE;
3212
3213 len = BioseqGetLen(tobsp);
3214
3215 if (pos == LAST_RESIDUE)
3216 pos = len - 1;
3217 else if (pos == APPEND_RESIDUE) {
3218 pos = len;
3219 }
3220
3221 if ((pos < 0) || (pos > len)) return FALSE;
3222
3223 frombsp = BioseqFind(from_id);
3224 if (frombsp == NULL) {
3225 oldscope = SeqEntrySetScope (NULL);
3226 if (oldscope != NULL) {
3227 frombsp = BioseqFind(from_id);
3228 SeqEntrySetScope (oldscope);
3229 }
3230 }
3231 if (frombsp == NULL) return FALSE;
3232
3233 from_type = ISA_na(frombsp->mol);
3234 to_type = ISA_na(tobsp->mol);
3235
3236 if (from_type != to_type) return FALSE;
3237
3238 len = BioseqGetLen(frombsp);
3239 if (to == LAST_RESIDUE)
3240 to = len - 1;
3241
3242 if ((from < 0) || (to >= len)) return FALSE;
3243
3244 len = to - from + 1;
3245
3246 if (tobsp->repr == Seq_repr_virtual)
3247 {
3248 if (frombsp->repr != Seq_repr_virtual)
3249 return FALSE;
3250
3251 handled = TRUE; /* just length and features */
3252 }
3253
3254 if (((tobsp->repr == Seq_repr_raw) || (tobsp->repr == Seq_repr_const)) && tobsp->seq_data_type != Seq_code_gap)
3255 {
3256 if (ISA_na(tobsp->mol))
3257 {
3258 seqtype = Seq_code_iupacna;
3259 }
3260 else
3261 {
3262 seqtype = Seq_code_ncbieaa;
3263 }
3264
3265 if (tobsp->seq_data_type != seqtype)
3266 BioseqRawConvert(tobsp, seqtype);
3267 BSSeek((ByteStorePtr) tobsp->seq_data, pos, SEEK_SET);
3268 if (do_bsadd) {
3269 Nlm_BSAdd((ByteStorePtr) tobsp->seq_data, len, FALSE);
3270 }
3271
3272 i = 0;
3273
3274 spp = SeqPortNew(frombsp, from, to, strand, seqtype);
3275 while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF)
3276 {
3277 if (! IS_residue(residue))
3278 {
3279 ErrPost(CTX_NCBIOBJ, 1, "Non-residue in BioseqInsert [%d]",
3280 (int)residue);
3281 }
3282 else
3283 {
3284 BSPutByte((ByteStorePtr) tobsp->seq_data, residue);
3285 i++;
3286 }
3287 }
3288 SeqPortFree(spp);
3289
3290 if (i != len)
3291 {
3292 ErrPost(CTX_NCBIOBJ, 1, "Tried to insert %ld residues but %ld went in",
3293 len, i);
3294 return FALSE;
3295 }
3296
3297 handled = TRUE;
3298 }
3299
3300 if ((tobsp->repr == Seq_repr_seg) || (tobsp->repr == Seq_repr_ref))
3301 {
3302 sip = SeqIntNew();
3303 sip->id = SeqIdDup(from_id);
3304 sip->from = from;
3305 sip->to = to;
3306 sip->strand = strand;
3307 tloc = ValNodeNew(NULL);
3308 tloc->choice = SEQLOC_INT;
3309 tloc->data.ptrvalue = (Pointer)sip;
3310 head = NULL;
3311 if (tobsp->repr == Seq_repr_seg)
3312 {
3313 fake = ValNodeNew(NULL);
3314 fake->choice = SEQLOC_MIX;
3315 fake->data.ptrvalue = (Pointer)(tobsp->seq_ext);
3316 }
3317 else
3318 fake = (SeqLocPtr)(tobsp->seq_ext);
3319 curr = NULL;
3320 ctr = 0;
3321 while ((curr = SeqLocFindNext(fake, curr)) != NULL)
3322 {
3323 if ((! added) && (ctr == pos))
3324 {
3325 newloc = SeqLocAdd(&head, tloc, TRUE, TRUE);
3326 added = TRUE;
3327 }
3328 tlen = SeqLocLen(curr);
3329 if ((! added) && ((ctr + tlen) > pos)) /* split interval */
3330 {
3331 yloc = NULL;
3332 xloc = SeqLocAdd(&yloc, curr, TRUE, TRUE);
3333 i = (pos - ctr) + SeqLocStart(curr);
3334 newloc = SeqLocInsert(xloc, SeqLocId(xloc), i, 0, TRUE, NULL);
3335 xloc = newloc;
3336 yloc = newloc->next;
3337 SeqLocAdd(&head, xloc, TRUE, TRUE);
3338 SeqLocAdd(&head, tloc, TRUE, TRUE);
3339 SeqLocAdd(&head, yloc, TRUE, TRUE);
3340 SeqLocFree(xloc);
3341 SeqLocFree(yloc);
3342 added = TRUE;
3343 }
3344 else
3345 newloc = SeqLocAdd(&head, curr, TRUE, TRUE);
3346 ctr += tlen;
3347 }
3348 if ((! added) && (ctr == pos))
3349 {
3350 newloc = SeqLocAdd(&head, tloc, TRUE, TRUE);
3351 added = TRUE;
3352 }
3353 SeqLocFree(tloc);
3354 SeqLocFree(fake);
3355 if (tobsp->repr == Seq_repr_seg)
3356 {
3357 tobsp->seq_ext = (Pointer)head;
3358 }
3359 else
3360 {
3361 tobsp->seq_ext = SeqLocPackage(head);
3362 }
3363 handled = TRUE;
3364 }
3365
3366 if (tobsp->repr == Seq_repr_map)
3367 {
3368 if (! ((frombsp->repr == Seq_repr_map) || (frombsp->repr == Seq_repr_virtual)))
3369 return FALSE;
3370
3371 prevsfp = NULL;
3372 for (sfp = (SeqFeatPtr)(tobsp->seq_ext); sfp != NULL; sfp = sfp->next)
3373 {
3374 sfp->location = SeqLocInsert(sfp->location, to_id, pos, len, TRUE, NULL);
3375 prevsfp = sfp;
3376 }
3377
3378 if (frombsp->repr == Seq_repr_map)
3379 {
3380 for (sfp = (SeqFeatPtr)(frombsp->seq_ext); sfp != NULL; sfp = sfp->next)
3381 {
3382 split = FALSE;
3383 newloc = SeqLocCopyRegion(to_id, sfp->location, frombsp, from, to, strand, &split);
3384 if (newloc != NULL) /* got one */
3385 {
3386 newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
3387 SeqLocFree(newsfp->location);
3388 newsfp->location = newloc;
3389 if (split)
3390 newsfp->partial = TRUE;
3391
3392 if (prevsfp == NULL)
3393 tobsp->seq_ext = (Pointer)newsfp;
3394 else
3395 prevsfp->next = newsfp;
3396 prevsfp = newsfp;
3397
3398 newsfp->location = SeqLocInsert(newsfp->location, to_id, 0,
3399 pos, TRUE, to_id);
3400 }
3401 }
3402 }
3403 handled = TRUE;
3404 }
3405
3406 if (! handled) return FALSE;
3407
3408 tobsp->length += len;
3409
3410 if (to_feat) /* fix up sourceid Bioseq feature table(s) */
3411 {
3412 AdjustFeaturesForInsertion (tobsp, to_id, pos, len, do_split);
3413 }
3414
3415 if (from_feat) /* add source Bioseq features to sourceid */
3416 {
3417 bcp = BioseqContextNew(frombsp);
3418 sfp = NULL; /* NOTE: should make NEW feature table */
3419 prevsfp = NULL;
3420 /* is there an old feature table to use? */
3421 for (newsap = tobsp->annot; newsap != NULL; newsap = newsap->next)
3422 {
3423 if (newsap->type == 1) /* feature table */
3424 break;
3425 }
3426 if (newsap != NULL)
3427 { /* create a new one if necessary */
3428 for (prevsfp = (SeqFeatPtr)(newsap->data); prevsfp != NULL;
3429 prevsfp = prevsfp->next)
3430 {
3431 if (prevsfp->next == NULL)
3432 break;
3433 }
3434 }
3435 /* get features by location */
3436 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL)
3437 { /* copy all old features */
3438 split = FALSE;
3439 newloc = SeqLocCopyRegion(to_id, sfp->location, frombsp, from, to, strand, &split);
3440 if (newloc != NULL) /* got one */
3441 {
3442 newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
3443 SeqLocFree(newsfp->location);
3444 newsfp->location = newloc;
3445
3446 if (split)
3447 newsfp->partial = TRUE;
3448
3449 if (prevsfp == NULL)
3450 sfphead = newsfp;
3451 else
3452 prevsfp->next = newsfp;
3453 prevsfp = newsfp;
3454
3455 newsfp->location = SeqLocInsert(newsfp->location, to_id, 0,
3456 pos, TRUE, to_id);
3457 switch (newsfp->data.choice)
3458 {
3459 case SEQFEAT_CDREGION: /* cdregion */
3460 crp = (CdRegionPtr)(newsfp->data.value.ptrvalue);
3461 prevcbp = NULL;
3462 for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
3463 {
3464 nextcbp = cbp->next;
3465 cbp->loc = SeqLocCopyRegion(to_id, cbp->loc, frombsp, from, to, strand, &split);
3466 if (cbp->loc == NULL)
3467 {
3468 if (prevcbp != NULL)
3469 prevcbp->next = nextcbp;
3470 else
3471 crp->code_break = nextcbp;
3472 cbp->next = NULL;
3473 CodeBreakFree(cbp);
3474 }
3475 else
3476 {
3477 cbp->loc = SeqLocInsert(cbp->loc, to_id, 0,
3478 pos, TRUE, to_id);
3479 prevcbp = cbp;
3480 }
3481 }
3482 break;
3483 case SEQFEAT_RNA:
3484 rrp = (RnaRefPtr)(newsfp->data.value.ptrvalue);
3485 if (rrp->ext.choice == 2) /* tRNA */
3486 {
3487 trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
3488 if (trp->anticodon != NULL)
3489 {
3490 trp->anticodon = SeqLocCopyRegion(to_id, trp->anticodon, frombsp, from, to, strand, &split);
3491 trp->anticodon = SeqLocInsert(trp->anticodon, to_id, 0,
3492 pos, TRUE, to_id);
3493 }
3494 }
3495 break;
3496 default:
3497 break;
3498 }
3499 }
3500 }
3501
3502 sfp = NULL;
3503 /* get features by product */
3504 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 1)) != NULL)
3505 { /* copy all old features */
3506 split = FALSE;
3507 newloc = SeqLocCopyRegion(to_id, sfp->product, frombsp, from, to, strand, &split);
3508 if (newloc != NULL) /* got one */
3509 {
3510 newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
3511 SeqLocFree(newsfp->product);
3512 newsfp->product = newloc;
3513 if (split)
3514 newsfp->partial = TRUE;
3515
3516 if (prevsfp == NULL)
3517 sfphead = newsfp;
3518 else
3519 prevsfp->next = newsfp;
3520 prevsfp = newsfp;
3521
3522 newsfp->product = SeqLocInsert(newsfp->product, to_id, 0, pos,
3523 TRUE, to_id);
3524 }
3525 }
3526 BioseqContextFree(bcp);
3527
3528
3529 if (sfphead != NULL) /* orphan chain of seqfeats to attach */
3530 {
3531 if (newsap == NULL)
3532 {
3533 for (sap = tobsp->annot; sap != NULL; sap = sap->next)
3534 {
3535 if (sap->next == NULL)
3536 break;
3537 }
3538 newsap = SeqAnnotNew();
3539 newsap->type = 1;
3540 if (sap == NULL)
3541 tobsp->annot = newsap;
3542 else
3543 sap->next = newsap;
3544 }
3545
3546 newsap->data = (Pointer)sfphead;
3547 }
3548 }
3549
3550 return TRUE;
3551 }
3552
3553 /*****************************************************************************
3554 *
3555 * SeqLocInsert()
3556 * alters "head" by insert "len" residues before "pos" in any SeqLoc
3557 * on the Bioseq "target"
3558 * all SeqLocs not on "target" are unaltered
3559 * for SeqLocs on "target"
3560 * all SeqLocs before "pos" are unaltered
3561 * all SeqLocs >= "pos" are incremented by "len"
3562 * all SeqLocs spanning "pos"
3563 * if "split" == TRUE, are split into two SeqLocs, one to the
3564 * left of the insertion, the other to right
3565 * if "split" != TRUE, the SeqLoc is increased in length to cover
3566 * the insertion
3567 * returns altered head or NULL if nothing left.
3568 * if ("newid" != NULL) replaces "target" with "newid" whether the
3569 * SeqLoc is altered on not.
3570 *
3571 * Usage hints:
3572 * 1) To update a feature location on "target" when 10 residues of
3573 * sequence have been inserted before position 5
3574 * SeqFeatPtr->location = SeqLocInsert ( SeqFeatPtr->location ,
3575 * "target", 5, 10, TRUE, NULL); [for some feature types
3576 * you may want "split" equal FALSE]
3577 * 2) To insert the complete feature table from "source" into a
3578 * different Bioseq "dest" before position 20 in "dest"
3579 * SFP->location = SeqLocInsert(SFP->location, "source", 0, 20,
3580 * FALSE, "dest");
3581 *
3582 *
3583 *****************************************************************************/
3584 NLM_EXTERN SeqLocPtr LIBCALL SeqLocInsert (SeqLocPtr head, SeqIdPtr target, Int4 pos, Int4 len,
3585 Boolean split, SeqIdPtr newid)
3586 {
3587 SeqIntPtr sip, sip2;
3588 SeqPntPtr spp;
3589 PackSeqPntPtr pspp, pspp2;
3590 SeqBondPtr sbp;
3591 SeqLocPtr slp, tmp, prev, next, thead, tmp2;
3592 Int4 diff, numpnt, i, tpos;
3593 Uint1 oldchoice;
3594 ValNode vn;
3595 SeqIdPtr sidp;
3596
3597 if ((head == NULL) || (target == NULL))
3598 return head;
3599
3600 head->next = NULL; /* caller maintains chains */
3601
3602 diff = len;
3603
3604 switch (head->choice)
3605 {
3606 case SEQLOC_BOND: /* bond -- 2 seqs */
3607 vn.next = NULL;
3608 vn.choice = SEQLOC_PNT;
3609
3610 sbp = (SeqBondPtr)(head->data.ptrvalue);
3611 vn.data.ptrvalue = (Pointer)(sbp->a);
3612 SeqLocInsert(&vn, target, pos, len, split, newid);
3613 sbp->a = (SeqPntPtr)(vn.data.ptrvalue);
3614 if (sbp->b != NULL)
3615 {
3616 vn.data.ptrvalue = (Pointer)(sbp->b);
3617 SeqLocInsert(&vn, target, pos, len, split, newid);
3618 sbp->b = (SeqPntPtr)(vn.data.ptrvalue);
3619 }
3620 break;
3621 case SEQLOC_FEAT: /* feat -- can't track yet */
3622 case SEQLOC_NULL: /* NULL */
3623 break;
3624 case SEQLOC_EMPTY: /* empty */
3625 case SEQLOC_WHOLE: /* whole */
3626 if (newid != NULL)
3627 {
3628 sidp = (SeqIdPtr)(head->data.ptrvalue);
3629 if (SeqIdForSameBioseq(sidp, target))
3630 {
3631 SeqIdFree(sidp);
3632 sidp = SeqIdDup(newid);
3633 head->data.ptrvalue = (Pointer)sidp;
3634 }
3635 }
3636 break;
3637 case SEQLOC_MIX: /* mix -- more than one seq */
3638 case SEQLOC_EQUIV: /* equiv -- ditto */
3639 case SEQLOC_PACKED_INT: /* packed int */
3640 prev = NULL;
3641 thead = NULL;
3642 for (slp = (SeqLocPtr)(head->data.ptrvalue); slp != NULL; slp = next)
3643 {
3644 next = slp->next;
3645 oldchoice = slp->choice;
3646 tmp = SeqLocInsert(slp, target, pos, len, split, newid);
3647 if (tmp != NULL)
3648 {
3649 if ((head->choice != SEQLOC_EQUIV) &&
3650 (oldchoice != tmp->choice)) /* split interval? */
3651 {
3652 if ((oldchoice == SEQLOC_INT) &&
3653 (tmp->choice == SEQLOC_PACKED_INT))
3654 {
3655 tmp2 = tmp;
3656 tmp = (SeqLocPtr)(tmp2->data.ptrvalue);
3657 MemFree(tmp2);
3658 while (tmp->next != NULL)
3659 {
3660 if (prev != NULL)
3661 prev->next = tmp;
3662 else
3663 thead = tmp;
3664 prev = tmp;
3665 tmp = tmp->next;
3666 }
3667 }
3668 }
3669 if (prev != NULL)
3670 prev->next = tmp;
3671 else
3672 thead = tmp;
3673 prev = tmp;
3674 }
3675 }
3676 head->data.ptrvalue = thead;
3677 if (thead == NULL)
3678 head = SeqLocFree(head);
3679 break;
3680 case SEQLOC_INT: /* int */
3681 sip = (SeqIntPtr)(head->data.ptrvalue);
3682 if (SeqIdForSameBioseq(sip->id, target))
3683 {
3684 if (newid != NULL) /* change id? */
3685 {
3686 SeqIdFree(sip->id);
3687 sip->id = SeqIdDup(newid);
3688 }
3689
3690 if (sip->to < pos) /* completely before insertion */
3691 {
3692 break;
3693 }
3694
3695 if ((! split) || (sip->from >= pos)) /* interval unbroken */
3696 {
3697 if (sip->from >= pos)
3698 sip->from += len;
3699 sip->to += len;
3700 break;
3701 }
3702
3703 /* split interval */
3704 sip2 = SeqIntNew();
3705 slp = ValNodeNew(NULL);
3706 slp->choice = SEQLOC_INT;
3707 slp->data.ptrvalue = (Pointer)sip2;
3708 sip2->strand = sip->strand;
3709 sip2->id = SeqIdDup(sip->id);
3710
3711 sip2->to = sip->to + len;
3712 sip2->from = pos + len;
3713 sip2->if_to = sip->if_to;
3714 sip->if_to = NULL;
3715 sip->to = pos - 1;
3716 head->next = slp;
3717
3718 if (sip->strand == Seq_strand_minus) /* reverse order */
3719 {
3720 head->data.ptrvalue = (Pointer)sip2;
3721 slp->data.ptrvalue = (Pointer)sip;
3722 }
3723
3724 thead = head; /* make split interval into PACKED_INT */
3725 head = ValNodeNew(NULL);
3726 head->choice = SEQLOC_PACKED_INT;
3727 head->data.ptrvalue = thead;
3728
3729 }
3730 break;
3731 case SEQLOC_PNT: /* pnt */
3732 spp = (SeqPntPtr)(head->data.ptrvalue);
3733 if (SeqIdForSameBioseq(spp->id, target))
3734 {
3735 if (newid != NULL) /* change id? */
3736 {
3737 SeqIdFree(spp->id);
3738 spp->id = SeqIdDup(newid);
3739 }
3740
3741 if (spp->point >= pos)
3742 spp->point += len;
3743 }
3744 break;
3745 case SEQLOC_PACKED_PNT: /* packed pnt */
3746 pspp = (PackSeqPntPtr)(head->data.ptrvalue);
3747 if (SeqIdForSameBioseq(pspp->id, target))
3748 {
3749 if (newid != NULL) /* change id? */
3750 {
3751 SeqIdFree(pspp->id);
3752 pspp->id = SeqIdDup(newid);
3753 }
3754
3755 numpnt = PackSeqPntNum(pspp);
3756 pspp2 = PackSeqPntNew();
3757 head->data.ptrvalue = pspp2;
3758 for (i = 0; i < numpnt; i++)
3759 {
3760 tpos = PackSeqPntGet(pspp, i);
3761 if (tpos >= pos)
3762 tpos += len;
3763 PackSeqPntPut(pspp2, tpos);
3764 }
3765 pspp2->id = pspp->id;
3766 pspp->id = NULL;
3767 pspp2->fuzz = pspp->fuzz;
3768 pspp->fuzz = NULL;
3769 pspp2->strand = pspp->strand;
3770 PackSeqPntFree(pspp);
3771 }
3772 break;
3773 default:
3774 break;
3775 }
3776
3777 if (head == NULL)
3778 ErrPost(CTX_NCBIOBJ, 1, "SeqLocInsert: lost a SeqLoc");
3779
3780 return head;
3781 }
3782
3783 /*****************************************************************************
3784 *
3785 * SeqLocSubtract (SeqLocPtr head, SeqLocPtr piece)
3786 * Deletes piece from head.
3787 * head may be changed.
3788 * returns the changed head.
3789 *
3790 *****************************************************************************/
3791 NLM_EXTERN SeqLocPtr LIBCALL SeqLocSubtract (SeqLocPtr head, SeqLocPtr piece)
3792 {
3793 SeqLocPtr slp = NULL;
3794 SeqIdPtr sip;
3795 Int4 from, to;
3796 Boolean changed = FALSE;
3797
3798 if ((head == NULL) || (piece == NULL))
3799 return NULL;
3800
3801 while ((slp = SeqLocFindNext(piece, slp)) != NULL)
3802 {
3803 sip = SeqLocId(slp);
3804 from = SeqLocStart(slp);
3805 to = SeqLocStop(slp);
3806 head = SeqLocDelete(head, sip, from, to, FALSE, &changed);
3807 }
3808
3809 return head;
3810 }
3811
3812 /********************************************************************
3813 *
3814 * SeqLocReplaceID
3815 * replaces the Seq-Id in a Seq-Loc (slp) with a new Seq-Id (new_sip)
3816 *
3817 **********************************************************************/
3818 NLM_EXTERN SeqLocPtr SeqLocReplaceID (SeqLocPtr slp, SeqIdPtr new_sip)
3819 {
3820 SeqLocPtr curr;
3821 PackSeqPntPtr pspp;
3822 SeqIntPtr target_sit;
3823 SeqBondPtr sbp;
3824 SeqPntPtr spp;
3825
3826 switch (slp->choice) {
3827 case SEQLOC_PACKED_INT :
3828 case SEQLOC_MIX :
3829 case SEQLOC_EQUIV :
3830 curr = NULL;
3831 while ((curr = SeqLocFindNext (slp, curr)) != NULL) {
3832 curr = SeqLocReplaceID (curr, new_sip);
3833 }
3834 break;
3835 case SEQLOC_PACKED_PNT :
3836 pspp = (PackSeqPntPtr) slp->data.ptrvalue;
3837 if (pspp != NULL) {
3838 SeqIdFree (pspp->id);
3839 pspp->id = SeqIdDup (new_sip);
3840 }
3841 break;
3842 case SEQLOC_EMPTY :
3843 case SEQLOC_WHOLE :
3844 SeqIdFree ((SeqIdPtr) slp->data.ptrvalue);
3845 slp->data.ptrvalue = (Pointer) SeqIdDup (new_sip);
3846 break;
3847 case SEQLOC_INT :
3848 target_sit = (SeqIntPtr) slp->data.ptrvalue;
3849 SeqIdFree (target_sit->id);
3850 target_sit->id = SeqIdDup (new_sip);
3851 break;
3852 case SEQLOC_PNT :
3853 spp = (SeqPntPtr) slp->data.ptrvalue;
3854 SeqIdFree(spp->id);
3855 spp->id = SeqIdDup(new_sip);
3856 break;
3857 case SEQLOC_BOND :
3858 sbp = (SeqBondPtr) slp->data.ptrvalue;
3859 if (sbp == NULL || sbp->a == NULL || sbp->b == NULL) break;
3860 /* only do this if both ends bonded to same Seq-id */
3861 if (SeqIdMatch (sbp->a->id, sbp->b->id)) {
3862 spp = sbp->a;
3863 SeqIdFree(spp->id);
3864 spp->id = SeqIdDup(new_sip);
3865 spp = sbp->b;
3866 SeqIdFree(spp->id);
3867 spp->id = SeqIdDup(new_sip);
3868 }
3869 break;
3870 default :
3871 break;
3872 }
3873 return slp;
3874 }
3875
3876 /**********************************************************
3877 *
3878 * NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLoc(range):
3879 *
3880 * Gets the size of gap and constructs SeqLoc block with
3881 * $(seqlitdbtag) value as Dbtag.db and Dbtag.tag.id = 0.
3882 *
3883 **********************************************************/
3884 NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLoc(Int4 range)
3885 {
3886 SeqLocPtr slp;
3887 SeqIntPtr sip;
3888 SeqIdPtr sidp;
3889 DbtagPtr dp;
3890
3891 if(range < 0)
3892 return(NULL);
3893
3894 slp = ValNodeNew(NULL);
3895 if(range == 0)
3896 {
3897 slp->choice = SEQLOC_NULL;
3898 slp->data.ptrvalue = NULL;
3899 slp->next = NULL;
3900 return(slp);
3901 }
3902
3903 dp = DbtagNew();
3904 dp->db = StringSave(seqlitdbtag);
3905 dp->tag = ObjectIdNew();
3906 dp->tag->id = 0;
3907 dp->tag->str = NULL;
3908
3909 sidp = ValNodeNew(NULL);
3910 sidp->choice = SEQID_GENERAL;
3911 sidp->data.ptrvalue = dp;
3912
3913 sip = SeqIntNew();
3914 sip->from = 0;
3915 sip->to = range - 1;
3916 sip->id = sidp;
3917
3918 slp->choice = SEQLOC_INT;
3919 slp->data.ptrvalue = sip;
3920
3921 return(slp);
3922 }
3923
3924 /**********************************************************
3925 *
3926 * NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLocEx(range, unknown):
3927 *
3928 * Gets the size of gap and constructs SeqLoc block with
3929 * $(seqlitdbtag) value as Dbtag.db and Dbtag.tag.id = 0.
3930 *
3931 **********************************************************/
3932 NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLocEx(Int4 range, Boolean unknown)
3933 {
3934 SeqLocPtr slp;
3935 SeqIntPtr sip;
3936 SeqIdPtr sidp;
3937 DbtagPtr dp;
3938
3939 if(range < 0)
3940 return(NULL);
3941
3942 slp = ValNodeNew(NULL);
3943 if(range == 0)
3944 {
3945 slp->choice = SEQLOC_NULL;
3946 slp->data.ptrvalue = NULL;
3947 slp->next = NULL;
3948 return(slp);
3949 }
3950
3951 dp = DbtagNew();
3952 if(unknown == FALSE)
3953 dp->db = StringSave(seqlitdbtag);
3954 else
3955 dp->db = StringSave(unkseqlitdbtag);
3956 dp->tag = ObjectIdNew();
3957 dp->tag->id = 0;
3958 dp->tag->str = NULL;
3959
3960 sidp = ValNodeNew(NULL);
3961 sidp->choice = SEQID_GENERAL;
3962 sidp->data.ptrvalue = dp;
3963
3964 sip = SeqIntNew();
3965 sip->from = 0;
3966 sip->to = range - 1;
3967 sip->id = sidp;
3968
3969 slp->choice = SEQLOC_INT;
3970 slp->data.ptrvalue = sip;
3971
3972 return(slp);
3973 }
3974
3975 /**********************************************************
3976 *
3977 * NLM_EXTERN Boolean LIBCALL ISAGappedSeqLoc(slp):
3978 *
3979 * Looks at a single SeqLoc item. If it has the SeqId
3980 * of type GENERAL with Dbtag.db == $(seqlitdbtag) and
3981 * Dbtag.tag.id == 0, then returns TRUE, otherwise
3982 * returns FALSE.
3983 *
3984 **********************************************************/
3985 NLM_EXTERN Boolean LIBCALL ISAGappedSeqLoc(SeqLocPtr slp)
3986 {
3987 SeqIdPtr sip;
3988 DbtagPtr dp;
3989
3990 if(slp == NULL)
3991 return(FALSE);
3992
3993 sip = SeqLocId(slp);
3994 if(sip == NULL || sip->choice != SEQID_GENERAL)
3995 return(FALSE);
3996
3997 dp = (DbtagPtr) sip->data.ptrvalue;
3998 if(dp == NULL || dp->db == NULL || dp->tag == NULL)
3999 return(FALSE);
4000
4001 if((StringCmp(seqlitdbtag, dp->db) == 0 ||
4002 StringCmp(unkseqlitdbtag, dp->db) == 0) && dp->tag->id == 0)
4003 return(TRUE);
4004
4005 return(FALSE);
4006 }
4007
4008 /**********************************************************
4009 *
4010 * NLM_EXTERN DeltaSeqPtr LIBCALL GappedSeqLocsToDeltaSeqs(slp):
4011 *
4012 * This functions is used only in the case, if ISAGappedSeqLoc()
4013 * has returned TRUE.
4014 * Converts SeqLoc set to the sequence of DeltaSeqs.
4015 * Gbtag'ed SeqLocs it turns into SeqLits with the only "length"
4016 * element. The regular SeqLocs saves as they are. Returns
4017 * obtained DeltaSeq.
4018 *
4019 **********************************************************/
4020 NLM_EXTERN DeltaSeqPtr LIBCALL GappedSeqLocsToDeltaSeqs(SeqLocPtr slp)
4021 {
4022 DeltaSeqPtr res;
4023 DeltaSeqPtr dsp;
4024 SeqIntPtr sip;
4025 SeqLitPtr slip;
4026 SeqIdPtr id;
4027 DbtagPtr dp;
4028
4029 dsp = ValNodeNew(NULL);
4030 dsp->next = NULL;
4031 dsp->choice = 0;
4032 res = dsp;
4033 for(; slp != NULL; slp = slp->next)
4034 {
4035 if(ISAGappedSeqLoc(slp) != FALSE)
4036 {
4037 dsp->next = ValNodeNew(NULL);
4038 dsp = dsp->next;
4039 sip = slp->data.ptrvalue;
4040 slip = SeqLitNew();
4041 slip->length = sip->to - sip->from + 1;
4042 dsp->choice = 2;
4043 dsp->data.ptrvalue = slip;
4044 id = SeqLocId(slp);
4045 if(id != NULL)
4046 {
4047 dp = (DbtagPtr) id->data.ptrvalue;
4048 if(dp != NULL && dp->db != NULL &&
4049 StringCmp(unkseqlitdbtag, dp->db) == 0)
4050 {
4051 slip->fuzz = IntFuzzNew();
4052 slip->fuzz->choice = 4;
4053 }
4054 }
4055 }
4056 else
4057 {
4058 dsp->next = ValNodeNew(NULL);
4059 dsp = dsp->next;
4060 dsp->choice = 1;
4061 dsp->data.ptrvalue = AsnIoMemCopy((Pointer) slp,
4062 (AsnReadFunc) SeqLocAsnRead,
4063 (AsnWriteFunc) SeqLocAsnWrite);
4064 }
4065 }
4066 dsp = res->next;
4067 MemFree(res);
4068 return(dsp);
4069 }
4070
4071 /* This structure and the functions following it are used to track the prior locations
4072 * of features that were affected by the removal of nucleotides, so that they may be
4073 * returned to their original status in an undo.
4074 */
4075 typedef struct affectedfeat
4076 {
4077 SeqFeatPtr feat_before;
4078 SeqFeatPtr feat_after;
4079 } AffectedFeatData, PNTR AffectedFeatPtr;
4080
4081 static AffectedFeatPtr AffectedFeatNew (void)
4082 {
4083 AffectedFeatPtr afp;
4084
4085 afp = (AffectedFeatPtr) MemNew (sizeof (AffectedFeatData));
4086 if (afp != NULL)
4087 {
4088 afp->feat_before = NULL;
4089 afp->feat_after = NULL;
4090 }
4091 return afp;
4092 }
4093
4094 static AffectedFeatPtr AffectedFeatFree (AffectedFeatPtr afp)
4095 {
4096 if (afp == NULL) return NULL;
4097 afp->feat_before = SeqFeatFree (afp->feat_before);
4098 afp->feat_after = SeqFeatFree (afp->feat_after);
4099 afp = MemFree (afp);
4100 return NULL;
4101 }
4102
4103 static ValNodePtr SeqEdJournalAffectedFeatsFree (ValNodePtr vnp)
4104 {
4105 if (vnp == NULL) return NULL;
4106 vnp->next = SeqEdJournalAffectedFeatsFree (vnp->next);
4107 vnp->data.ptrvalue = AffectedFeatFree ((AffectedFeatPtr) (vnp->data.ptrvalue));
4108 ValNodeFree (vnp);
4109 return NULL;
4110 }
4111
4112 static Boolean SeqEdRecreateDeletedFeats (SeqEdJournalPtr sejp)
4113 {
4114 ValNodePtr vnp;
4115 AffectedFeatPtr afp = NULL;
4116 Boolean recreated_feats = FALSE;
4117 SeqEntryPtr sep = NULL;
4118 SeqFeatPtr sfp;
4119
4120 for (vnp = sejp->affected_feats; vnp != NULL && afp == NULL; vnp = vnp->next)
4121 {
4122 if (vnp->choice == 1 || vnp->data.ptrvalue == NULL) continue;
4123 afp = (AffectedFeatPtr) vnp->data.ptrvalue;
4124 if (afp->feat_after == NULL && afp->feat_before != NULL)
4125 {
4126 vnp->choice = 1;
4127 if (sep == NULL)
4128 {
4129 sep = SeqMgrGetSeqEntryForData (sejp->bsp);
4130 if (sep == NULL) return FALSE;
4131 }
4132 sfp = CreateNewFeature (sep, NULL, afp->feat_before->data.choice, afp->feat_before);
4133 afp->feat_before = NULL;
4134 recreated_feats = TRUE;
4135 }
4136 }
4137 return recreated_feats;
4138 }
4139
4140
4141 /* This section of code deals with inserting new characters into a Bioseq and adjusting the
4142 * locations of the affected features. It is adapted from code from SeqLocInsert.
4143 */
4144
4145 NLM_EXTERN void SeqEdInsertAdjustCdRgn
4146 (SeqFeatPtr sfp,
4147 BioseqPtr bsp,
4148 Int4 insert_pos,
4149 Int4 len,
4150 Boolean do_split)
4151 {
4152 CdRegionPtr crp;
4153 CodeBreakPtr prevcbp, cbp, nextcbp;
4154
4155 if (sfp == NULL || bsp == NULL) return;
4156 crp = (CdRegionPtr)(sfp->data.value.ptrvalue);
4157 if (crp == NULL) return;
4158
4159 prevcbp = NULL;
4160 for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
4161 {
4162 nextcbp = cbp->next;
4163 cbp->loc = SeqEdSeqLocInsert (cbp->loc, bsp, insert_pos, len, do_split, NULL);
4164 if (cbp->loc == NULL)
4165 {
4166 if (prevcbp != NULL)
4167 prevcbp->next = nextcbp;
4168 else
4169 crp->code_break = nextcbp;
4170 cbp->next = NULL;
4171 CodeBreakFree (cbp);
4172 }
4173 else
4174 {
4175 prevcbp = cbp;
4176 }
4177 }
4178 }
4179
4180 NLM_EXTERN void SeqEdInsertAdjustRNA
4181 (SeqFeatPtr sfp,
4182 BioseqPtr bsp,
4183 Int4 insert_pos,
4184 Int4 len,
4185 Boolean do_split)
4186 {
4187 RnaRefPtr rrp;
4188 tRNAPtr trp;
4189
4190 if (sfp == NULL || bsp == NULL) return;
4191 rrp = (RnaRefPtr)(sfp->data.value.ptrvalue);
4192 if (rrp == NULL) return;
4193 if (rrp->ext.choice == 2) /* tRNA */
4194 {
4195 trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
4196 if (trp->anticodon != NULL)
4197 {
4198 trp->anticodon = SeqEdSeqLocInsert (trp->anticodon, bsp, insert_pos, len, do_split, NULL);
4199 }
4200 }
4201 }
4202
4203
4204 static BioseqPtr
4205 GetParentForSegment
4206 (BioseqPtr bsp, Int4Ptr p_start, Int4Ptr p_stop)
4207 {
4208 BioseqSetPtr parts_bssp, seg_bssp;
4209 BioseqPtr master_bsp, other_part;
4210 SeqEntryPtr sep;
4211 Int4 offset = 0;
4212
4213 if (bsp == NULL || bsp->idx.parentptr == NULL || bsp->idx.parenttype != OBJ_BIOSEQSET) return NULL;
4214
4215 parts_bssp = (BioseqSetPtr) bsp->idx.parentptr;
4216 if (parts_bssp->_class != BioseqseqSet_class_parts
4217 || parts_bssp->idx.parentptr == NULL
4218 || parts_bssp->idx.parenttype != OBJ_BIOSEQSET)
4219 {
4220 return NULL;
4221 }
4222
4223 seg_bssp = (BioseqSetPtr) parts_bssp->idx.parentptr;
4224 if (seg_bssp->_class != BioseqseqSet_class_segset
4225 || seg_bssp->seq_set == NULL
4226 || !IS_Bioseq (seg_bssp->seq_set))
4227 {
4228 return NULL;
4229 }
4230
4231 master_bsp = (BioseqPtr) seg_bssp->seq_set->data.ptrvalue;
4232
4233 if (p_start != NULL || p_stop != NULL)
4234 {
4235 sep = parts_bssp->seq_set;
4236 while (sep != NULL && sep->data.ptrvalue != bsp)
4237 {
4238 if (IS_Bioseq (sep) && sep->data.ptrvalue != NULL)
4239 {
4240 other_part = sep->data.ptrvalue;
4241 offset += other_part->length;
4242 }
4243 sep = sep->next;
4244 }
4245 if (p_start != NULL)
4246 {
4247 *p_start = offset;
4248 }
4249 if (p_stop != NULL)
4250 {
4251 *p_stop = offset + bsp->length - 1;
4252 }
4253 }
4254
4255 return master_bsp;
4256 }
4257
4258
4259 static Boolean AdjustOffsetsForSegment (SeqIdPtr del_id, SeqIdPtr target_id, Int4Ptr from, Int4Ptr to)
4260 {
4261 BioseqPtr bsp_del, bsp_target, bsp_master;
4262 Int4 seg_offset = 0, seg_end = 0;
4263 Boolean rval = FALSE;
4264
4265 if (del_id == NULL || target_id == NULL || from == NULL || to == NULL)
4266 {
4267 return FALSE;
4268 }
4269
4270 bsp_del = BioseqFind (del_id);
4271 bsp_target = BioseqFind (target_id);
4272 if (bsp_del == NULL || bsp_target == NULL) return FALSE;
4273
4274 bsp_master = GetParentForSegment (bsp_del, &seg_offset, &seg_end);
4275 if (bsp_master != NULL)
4276 {
4277 if (bsp_master == bsp_target && seg_offset < *to)
4278 {
4279 /* loc to delete is in parent coordinates */
4280 if (*from > seg_end || *to < seg_offset)
4281 {
4282 /* loc to delete is entirely past this segment */
4283 }
4284 else
4285 {
4286 *from = MAX (0, *from - seg_offset);
4287 *to = MIN (bsp_target->length, *to - seg_offset);
4288 rval = TRUE;
4289 }
4290 }
4291 }
4292 return rval;
4293 }
4294
4295
4296 static void SeqEdInsertSeqPnt (SeqPntPtr spp, SeqIdPtr target_id, Int4 pos, Int4 len, SeqIdPtr newid)
4297 {
4298 Int4 to = pos + len;
4299 Boolean id_in_list;
4300
4301 if (spp == NULL) return;
4302
4303 if ((id_in_list = SeqIdIn(spp->id, target_id))
4304 || AdjustOffsetsForSegment (spp->id, target_id, &pos, &to))
4305 {
4306 if (id_in_list && newid != NULL) /* change id? */
4307 {
4308 SeqIdFree(spp->id);
4309 spp->id = SeqIdDup(newid);
4310 }
4311
4312 if (spp->point >= pos)
4313 {
4314 spp->point += len;
4315 }
4316 }
4317 }
4318
4319
4320 static void
4321 SeqEdInsertSeqInt
4322 (SeqIntPtr sip,
4323 SeqIdPtr target_id,
4324 Int4 pos,
4325 Int4 len,
4326 Boolean split,
4327 SeqIdPtr newid,
4328 SeqIntPtr PNTR split_end)
4329 {
4330 Int4 to = pos + len;
4331 Boolean id_in_list;
4332 SeqIntPtr sip2;
4333 SeqLocPtr slp;
4334
4335 if (sip == NULL || split_end == NULL) return;
4336
4337 if (!(id_in_list = SeqIdIn(sip->id, target_id))
4338 && ! AdjustOffsetsForSegment(sip->id, target_id, &pos, &to))
4339 {
4340 return;
4341 }
4342
4343 if (newid != NULL && id_in_list) /* change id? */
4344 {
4345 SeqIdFree(sip->id);
4346 sip->id = SeqIdDup(newid);
4347 }
4348
4349 if (sip->to < pos) /* completely before insertion */
4350 {
4351 return;
4352 }
4353
4354 if ((! split) || (sip->from >= pos)) /* interval unbroken */
4355 {
4356 if (sip->from >= pos)
4357 sip->from += len;
4358 sip->to += len;
4359 return;
4360 }
4361 /* split interval */
4362 sip2 = SeqIntNew();
4363 slp = ValNodeNew(NULL);
4364 slp->choice = SEQLOC_INT;
4365 slp->data.ptrvalue = (Pointer)sip2;
4366 sip2->strand = sip->strand;
4367 sip2->id = SeqIdDup(sip->id);
4368
4369 sip2->to = sip->to + len;
4370 sip2->from = pos + len;
4371 sip2->if_to = sip->if_to;
4372 sip->if_to = NULL;
4373 sip->to = pos - 1;
4374
4375 *split_end = sip2;
4376 }
4377
4378
4379 /*****************************************************************************
4380 *
4381 * SeqEdSeqLocInsert()
4382 * alters "head" by insert "len" residues before "pos" in any SeqLoc
4383 * on the Bioseq "target"
4384 * all SeqLocs not on "target" are unaltered
4385 * for SeqLocs on "target"
4386 * all SeqLocs before "pos" are unaltered
4387 * all SeqLocs >= "pos" are incremented by "len"
4388 * all SeqLocs spanning "pos"
4389 * if "split" == TRUE, are split into two SeqLocs, one to the
4390 * left of the insertion, the other to right
4391 * if "split" != TRUE, the SeqLoc is increased in length to cover
4392 * the insertion
4393 * returns altered head or NULL if nothing left.
4394 * if ("newid" != NULL) replaces "target" with "newid" whether the
4395 * SeqLoc is altered on not.
4396 *
4397 * Usage hints:
4398 * 1) To update a feature location on "target" when 10 residues of
4399 * sequence have been inserted before position 5
4400 * SeqFeatPtr->location = SeqLocInsert ( SeqFeatPtr->location ,
4401 * "target", 5, 10, TRUE, NULL); [for some feature types
4402 * you may want "split" equal FALSE]
4403 * 2) To insert the complete feature table from "source" into a
4404 * different Bioseq "dest" before position 20 in "dest"
4405 * SFP->location = SeqLocInsert(SFP->location, "source", 0, 20,
4406 * FALSE, "dest");
4407 *
4408 *
4409 *****************************************************************************/
4410 NLM_EXTERN SeqLocPtr LIBCALL SeqEdSeqLocInsert (SeqLocPtr head, BioseqPtr target, Int4 pos, Int4 len,
4411 Boolean split, SeqIdPtr newid)
4412 {
4413 SeqIntPtr sip, sip2;
4414 SeqPntPtr spp;
4415 PackSeqPntPtr pspp, pspp2;
4416 SeqBondPtr sbp;
4417 SeqLocPtr slp, tmp, prev, next, thead, tmp2;
4418 Int4 diff, numpnt, i, tpos;
4419 Uint1 oldchoice;
4420 SeqIdPtr sidp;
4421 Boolean id_in_list;
4422
4423 if ((head == NULL) || (target == NULL))
4424 return head;
4425
4426 head->next = NULL; /* caller maintains chains */
4427
4428 diff = len;
4429
4430 switch (head->choice)
4431 {
4432 case SEQLOC_BOND: /* bond -- 2 seqs */
4433 sbp = (SeqBondPtr)(head->data.ptrvalue);
4434 SeqEdInsertSeqPnt (sbp->a, target->id, pos, len, newid);
4435 SeqEdInsertSeqPnt (sbp->b, target->id, pos, len, newid);
4436 break;
4437 case SEQLOC_FEAT: /* feat -- can't track yet */
4438 case SEQLOC_NULL: /* NULL */
4439 break;
4440 case SEQLOC_EMPTY: /* empty */
4441 case SEQLOC_WHOLE: /* whole */
4442 if (newid != NULL)
4443 {
4444 sidp = (SeqIdPtr)(head->data.ptrvalue);
4445 if ( SeqIdIn(sidp, target->id))
4446 {
4447 SeqIdFree(sidp);
4448 sidp = SeqIdDup(newid);
4449 head->data.ptrvalue = (Pointer)sidp;
4450 }
4451 }
4452 break;
4453 case SEQLOC_MIX: /* mix -- more than one seq */
4454 case SEQLOC_EQUIV: /* equiv -- ditto */
4455 case SEQLOC_PACKED_INT: /* packed int */
4456 prev = NULL;
4457 thead = NULL;
4458 for (slp = (SeqLocPtr)(head->data.ptrvalue); slp != NULL; slp = next)
4459 {
4460 next = slp->next;
4461 oldchoice = slp->choice;
4462 tmp = SeqEdSeqLocInsert(slp, target, pos, len, split, newid);
4463 if (tmp != NULL)
4464 {
4465 if ((head->choice != SEQLOC_EQUIV) &&
4466 (oldchoice != tmp->choice)) /* split interval? */
4467 {
4468 if ((oldchoice == SEQLOC_INT) &&
4469 (tmp->choice == SEQLOC_PACKED_INT))
4470 {
4471 tmp2 = tmp;
4472 tmp = (SeqLocPtr)(tmp2->data.ptrvalue);
4473 MemFree(tmp2);
4474 while (tmp->next != NULL)
4475 {
4476 if (prev != NULL)
4477 prev->next = tmp;
4478 else
4479 thead = tmp;
4480 prev = tmp;
4481 tmp = tmp->next;
4482 }
4483 }
4484 }
4485 if (prev != NULL)
4486 prev->next = tmp;
4487 else
4488 thead = tmp;
4489 prev = tmp;
4490 }
4491 }
4492 head->data.ptrvalue = thead;
4493 if (thead == NULL)
4494 head = SeqLocFree(head);
4495 break;
4496 case SEQLOC_INT: /* int */
4497 sip = (SeqIntPtr)(head->data.ptrvalue);
4498 sip2 = NULL;
4499 SeqEdInsertSeqInt (sip, target->id, pos, len, split, newid, &sip2);
4500 if (sip2 != NULL)
4501 {
4502 thead = head; /* make split interval into PACKED_INT */
4503 head = ValNodeNew (NULL);
4504 head->choice = SEQLOC_PACKED_INT;
4505
4506 slp = ValNodeNew (NULL);
4507 slp->choice = SEQLOC_INT;
4508 slp->data.ptrvalue = sip2;
4509
4510 if (sip->strand == Seq_strand_minus) /* reverse order */
4511 {
4512 head->data.ptrvalue = slp;
4513 slp->next = thead;
4514 }
4515 else
4516 {
4517 head->data.ptrvalue = thead;
4518 thead->next = slp;
4519 }
4520 }
4521 break;
4522 case SEQLOC_PNT: /* pnt */
4523 spp = (SeqPntPtr)(head->data.ptrvalue);
4524 SeqEdInsertSeqPnt (spp, target->id, pos, len, newid);
4525 break;
4526 case SEQLOC_PACKED_PNT: /* packed pnt */
4527 pspp = (PackSeqPntPtr)(head->data.ptrvalue);
4528 if ((id_in_list = SeqIdIn(pspp->id, target->id))
4529 || AdjustOffsetsForSegment(pspp->id, target->id, &pos, NULL))
4530 {
4531 if (id_in_list && newid != NULL) /* change id? */
4532 {
4533 SeqIdFree(pspp->id);
4534 pspp->id = SeqIdDup(newid);
4535 }
4536
4537 numpnt = PackSeqPntNum(pspp);
4538 pspp2 = PackSeqPntNew();
4539 head->data.ptrvalue = pspp2;
4540 for (i = 0; i < numpnt; i++)
4541 {
4542 tpos = PackSeqPntGet(pspp, i);
4543 if (tpos >= pos)
4544 tpos += len;
4545 PackSeqPntPut(pspp2, tpos);
4546 }
4547 pspp2->id = pspp->id;
4548 pspp->id = NULL;
4549 pspp2->fuzz = pspp->fuzz;
4550 pspp->fuzz = NULL;
4551 pspp2->strand = pspp->strand;
4552 PackSeqPntFree(pspp);
4553 }
4554 break;
4555 default:
4556 break;
4557 }
4558
4559 if (head == NULL)
4560 ErrPost(CTX_NCBIOBJ, 1, "SeqEdSeqLocInsert: lost a SeqLoc");
4561
4562 return head;
4563 }
4564
4565
4566 /* return TRUE if spp should be deleted */
4567 static Boolean SeqEdDeleteFromSeqPnt (SeqPntPtr spp, SeqIdPtr target_id, Int4 from, Int4 to)
4568 {
4569 Boolean rval = FALSE;
4570 Int4 diff = to - from + 1;
4571
4572 if (spp == NULL) return FALSE;
4573
4574 if (SeqIdIn (spp->id, target_id)
4575 || AdjustOffsetsForSegment (spp->id, target_id, &from, &to))
4576 {
4577 if ((spp->point >= from) && (spp->point <= to))
4578 {
4579 rval = TRUE;
4580 }
4581 else if (spp->point > to)
4582 {
4583 spp->point -= diff;
4584 }
4585 }
4586 return rval;
4587 }
4588
4589
4590 static SeqLocPtr
4591 SeqEdDeleteFromSeqLocBond (SeqLocPtr head, SeqIdPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed)
4592 {
4593 SeqBondPtr sbp;
4594
4595 if (head == NULL || target == NULL || head->choice != SEQLOC_BOND) return NULL;
4596 sbp = (SeqBondPtr)(head->data.ptrvalue);
4597
4598 if (SeqEdDeleteFromSeqPnt (sbp->a, target, from, to))
4599 {
4600 *changed = TRUE;
4601 sbp->a = SeqPntFree(sbp->a);
4602 }
4603
4604 if (SeqEdDeleteFromSeqPnt (sbp->b, target, from, to))
4605 {
4606 *changed = TRUE;
4607 sbp->b = SeqPntFree(sbp->b);
4608 }
4609
4610 if (sbp->a == NULL)
4611 {
4612 if (sbp->b != NULL) /* only a required */
4613 {
4614 sbp->a = sbp->b;
4615 sbp->b = NULL;
4616 }
4617 else
4618 {
4619 head = SeqLocFree(head);
4620 }
4621 }
4622 return head;
4623 }
4624
4625 static SeqLocPtr DeleteFromSeqLocWhole
4626 (SeqLocPtr head, BioseqPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed)
4627 {
4628 SeqIdPtr sidp;
4629 SeqIntPtr sip;
4630 SeqLocPtr slp, tmp;
4631
4632 if (head == NULL || target == NULL || head->choice != SEQLOC_WHOLE) return NULL;
4633
4634 sidp = (SeqIdPtr)(head->data.ptrvalue);
4635
4636 if ( SeqIdIn(sidp, target->id))
4637 {
4638 if ((from == 0) && (to >= (target->length - 1)))
4639 { /* complete delete */
4640 head = SeqLocFree(head);
4641 *changed = TRUE;
4642 return head;
4643 }
4644
4645 if (! merge) /* split it up */
4646 {
4647 SeqIdFree(sidp);
4648 head->choice = SEQLOC_PACKED_INT;
4649 head->data.ptrvalue = NULL;
4650 slp = NULL;
4651 if (from != 0)
4652 {
4653 sip = SeqIntNew();
4654 sip->from = 0;
4655 sip->to = from - 1;
4656 sip->id = SeqIdDup(target->id);
4657 slp = ValNodeNew(NULL);
4658 slp->choice = SEQLOC_INT;
4659 slp->data.ptrvalue = sip;
4660 head->data.ptrvalue = slp;
4661 *changed = TRUE;
4662 }
4663 if (to < (target->length - 1))
4664 {
4665 sip = SeqIntNew();
4666 sip->from = to + 1;
4667 sip->to = target->length - 1;
4668 sip->id = SeqIdDup(target->id);
4669 tmp = ValNodeNew(NULL);
4670 tmp->choice = SEQLOC_INT;
4671 tmp->data.ptrvalue = sip;
4672 if (slp != NULL)
4673 slp->next = tmp;
4674 else
4675 head->data.ptrvalue = tmp;
4676 *changed = TRUE;
4677 }
4678 }
4679 }
4680 return head;
4681 }
4682
4683 static SeqLocPtr SeqEdDeleteFromSeqLocPackedInt
4684 (SeqLocPtr head, BioseqPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed, BoolPtr partial5, BoolPtr partial3)
4685 {
4686 Boolean part5, part3, first;
4687 SeqLocPtr slp, tmp, prev, next, thead;
4688 SeqIntPtr sip, sip2;
4689
4690 if (head == NULL || target == NULL) return NULL;
4691 if (head->choice != SEQLOC_MIX && head->choice != SEQLOC_EQUIV && head->choice != SEQLOC_PACKED_INT)
4692 return NULL;
4693 prev = NULL;
4694 thead = NULL;
4695 part5 = FALSE;
4696 part3 = FALSE;
4697 first = TRUE;
4698 for (slp = (SeqLocPtr)(head->data.ptrvalue); slp != NULL; slp = next)
4699 {
4700 next = slp->next;
4701 tmp = SeqEdSeqLocDelete (slp, target, from, to, merge, changed, &part5, &part3);
4702 if (first)
4703 {
4704 if (partial5 != NULL)
4705 {
4706 *partial5 = part5;
4707 }
4708 }
4709 first = FALSE;
4710 if (tmp != NULL)
4711 {
4712 if (prev != NULL)
4713 {
4714 if ((merge) && (prev->choice == SEQLOC_INT) && (tmp->choice == SEQLOC_INT))
4715 {
4716 sip = (SeqIntPtr)(prev->data.ptrvalue);
4717 sip2 = (SeqIntPtr)(tmp->data.ptrvalue);
4718
4719 if (SeqIdForSameBioseq(sip->id, sip2->id))
4720 {
4721 /* merge intervals? */
4722 if ((sip->strand == Seq_strand_minus) &&
4723 (sip2->strand == Seq_strand_minus))
4724 {
4725 if (sip->from == (sip2->to + 1))
4726 {
4727 sip->from = sip2->from;
4728 sip->if_from = sip2->if_from;
4729 sip2->if_from = NULL;
4730 tmp = SeqLocFree(tmp);
4731 }
4732 }
4733 else if((sip->strand != Seq_strand_minus) &&
4734 (sip2->strand != Seq_strand_minus))
4735 {
4736 if (sip->to == (sip2->from - 1))
4737 {
4738 sip->to = sip2->to;
4739 sip->if_to = sip2->if_to;
4740 sip2->if_to = NULL;
4741 tmp = SeqLocFree(tmp);
4742 }
4743 }
4744 }
4745 }
4746 else if ((prev->choice == SEQLOC_NULL) && (tmp->choice == SEQLOC_NULL))
4747 {
4748 tmp = SeqLocFree(tmp);
4749 *changed = TRUE;
4750 }
4751 }
4752 else if (tmp->choice == SEQLOC_NULL)
4753 {
4754 tmp = SeqLocFree(tmp);
4755 *changed = TRUE;
4756 }
4757
4758 if (tmp != NULL) /* still have one? */
4759 {
4760 if (prev != NULL)
4761 prev->next = tmp;
4762 else
4763 thead = tmp;
4764 prev = tmp;
4765 }
4766 }
4767 else
4768 {
4769 *changed = TRUE;
4770 }
4771 }
4772 if (partial3 != NULL)
4773 {
4774 *partial3 = part3;
4775 }
4776 if (prev != NULL)
4777 {
4778 if (prev->choice == SEQLOC_NULL) /* ends with NULL */
4779 {
4780 prev = NULL;
4781 for (slp = thead; slp->next != NULL; slp = slp->next)
4782 {
4783 prev = slp;
4784 }
4785 if (prev != NULL)
4786 {
4787 prev->next = NULL;
4788 SeqLocFree(slp);
4789 }
4790 else
4791 {
4792 thead = SeqLocFree(thead);
4793 }
4794 *changed = TRUE;
4795 }
4796 }
4797 head->data.ptrvalue = thead;
4798 if (thead == NULL)
4799 head = SeqLocFree(head);
4800 return head;
4801 }
4802
4803
4804 static SeqLocPtr SeqEdDeleteFromSeqLocInt (SeqLocPtr head, BioseqPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed, BoolPtr partial5, BoolPtr partial3)
4805 {
4806 Int4 diff;
4807 SeqIntPtr sip, sip2;
4808 SeqLocPtr slp, tmp;
4809
4810 if (head == NULL || target == NULL || head->choice != SEQLOC_INT) return NULL;
4811
4812 sip = (SeqIntPtr)(head->data.ptrvalue);
4813 if ( !SeqIdIn(sip->id, target->id)
4814 && ! AdjustOffsetsForSegment (sip->id, target->id, &from, &to))
4815 {
4816 return head;
4817 }
4818
4819 diff = to - from + 1;
4820
4821 if (sip->to < from) /* completely before cut */
4822 return head;
4823
4824 /* completely contained in cut */
4825 if ((sip->from >= from) && (sip->to <= to))
4826 {
4827 head = SeqLocFree(head);
4828 *changed = TRUE;
4829 return head;
4830 }
4831
4832 if (sip->from > to) /* completely past cut */
4833 {
4834 sip->from -= diff;
4835 sip->to -= diff;
4836 return head;
4837 }
4838 /* overlap here */
4839 if (sip->to > to)
4840 {
4841 sip->to -= diff;
4842 }
4843 else /* to inside cut, so partial delete */
4844 {
4845 sip->to = from - 1;
4846 *changed = TRUE;
4847 if (partial3 != NULL)
4848 {
4849 *partial3 = TRUE;
4850 }
4851 }
4852
4853 if (sip->from >= from) /* from inside cut, partial del */
4854 {
4855 *changed = TRUE;
4856 sip->from = to + 1;
4857 sip->from -= diff;
4858 if (partial5 != NULL)
4859 {
4860 *partial5 = TRUE;
4861 }
4862
4863 if (merge)
4864 return head;
4865
4866 /* interval spans cut.. only in non-merge */
4867 /* have to split */
4868
4869 if ((sip->from < from) && (sip->to > to))
4870 {
4871 *changed = TRUE;
4872 head->choice = SEQLOC_PACKED_INT;
4873 head->data.ptrvalue = NULL;
4874 tmp = ValNodeNew(NULL);
4875 tmp->choice = SEQLOC_INT;
4876 tmp->data.ptrvalue = sip;
4877
4878 sip2 = SeqIntNew();
4879 sip2->from = to + 1;
4880 sip2->to = sip->to;
4881 sip2->strand = sip->strand;
4882 sip2->if_to = sip->if_to;
4883 sip2->id = SeqIdDup(target->id);
4884 slp = ValNodeNew(NULL);
4885 slp->choice = SEQLOC_INT;
4886 slp->data.ptrvalue = sip2;
4887
4888 sip->if_to = NULL;
4889 sip->to = from - 1;
4890
4891 if (sip->strand == Seq_strand_minus)
4892 {
4893 head->data.ptrvalue = slp;
4894 slp->next = tmp;
4895 }
4896 else
4897 {
4898 head->data.ptrvalue = tmp;
4899 tmp->next = slp;
4900 }
4901
4902 }
4903 }
4904 return head;
4905 }
4906
4907 static SeqLocPtr SeqEdDeleteFromSeqLocPackedPnt
4908 (SeqLocPtr head, BioseqPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed)
4909 {
4910 PackSeqPntPtr pspp, pspp2;
4911 Int4 i, diff, numpnt, tpos;
4912
4913 if (head == NULL || target == NULL || head->choice != SEQLOC_PACKED_PNT) return NULL;
4914
4915 pspp = (PackSeqPntPtr)(head->data.ptrvalue);
4916 if (!SeqIdIn (pspp->id, target->id)) return head;
4917
4918 diff = to - from + 1;
4919
4920 numpnt = PackSeqPntNum(pspp);
4921 pspp2 = PackSeqPntNew();
4922 head->data.ptrvalue = pspp2;
4923 for (i = 0; i < numpnt; i++)
4924 {
4925 tpos = PackSeqPntGet(pspp, i);
4926 if (tpos < from)
4927 {
4928 PackSeqPntPut(pspp2, tpos);
4929 }
4930 else
4931 {
4932 if (tpos > to)
4933 {
4934 if (merge)
4935 {
4936 tpos -= diff;
4937 }
4938 PackSeqPntPut(pspp2, tpos);
4939 }
4940 else
4941 {
4942 *changed = TRUE;
4943 }
4944 }
4945 }
4946 pspp2->id = pspp->id;
4947 pspp->id = NULL;
4948 pspp2->fuzz = pspp->fuzz;
4949 pspp->fuzz = NULL;
4950 pspp2->strand = pspp->strand;
4951 PackSeqPntFree(pspp);
4952 numpnt = PackSeqPntNum(pspp2);
4953 if (! numpnt)
4954 {
4955 head = SeqLocFree(head);
4956 }
4957 return head;
4958 }
4959
4960
4961 /*****************************************************************************
4962 *
4963 * SeqEdSeqLocDelete()
4964 * returns altered head or NULL if nothing left.
4965 * sets changed=TRUE if all or part of loc is deleted
4966 * does NOT set changed if location coordinates are only moved
4967 * if (merge) then corrects coordinates upstream of to
4968 * else
4969 * splits intervals covering from-to, does not correct upstream of to
4970 *
4971 *****************************************************************************/
4972 NLM_EXTERN SeqLocPtr SeqEdSeqLocDelete (SeqLocPtr head, BioseqPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed, BoolPtr partial5, BoolPtr partial3)
4973 {
4974 SeqPntPtr spp;
4975 Int4 diff;
4976
4977 if ((head == NULL) || (target == NULL))
4978 return head;
4979
4980 head->next = NULL; /* caller maintains chains */
4981 diff = to - from + 1;
4982
4983 switch (head->choice)
4984 {
4985 case SEQLOC_BOND: /* bond -- 2 seqs */
4986 head = SeqEdDeleteFromSeqLocBond (head, target->id, from, to, merge, changed);
4987 break;
4988 case SEQLOC_FEAT: /* feat -- can't track yet */
4989 case SEQLOC_NULL: /* NULL */
4990 case SEQLOC_EMPTY: /* empty */
4991 break;
4992 case SEQLOC_WHOLE: /* whole */
4993 head = DeleteFromSeqLocWhole (head, target, from, to, merge, changed);
4994 break;
4995 case SEQLOC_MIX: /* mix -- more than one seq */
4996 case SEQLOC_EQUIV: /* equiv -- ditto */
4997 case SEQLOC_PACKED_INT: /* packed int */
4998 head = SeqEdDeleteFromSeqLocPackedInt (head, target, from, to, merge, changed, partial5, partial3);
4999 break;
5000 case SEQLOC_INT: /* int */
5001 head = SeqEdDeleteFromSeqLocInt (head, target, from, to, merge, changed, partial5, partial3);
5002 break;
5003 case SEQLOC_PNT: /* pnt */
5004 spp = (SeqPntPtr)(head->data.ptrvalue);
5005 if (SeqEdDeleteFromSeqPnt (spp, target->id, from, to))
5006 {
5007 head = SeqLocFree(head);
5008 *changed = TRUE;
5009 }
5010 break;
5011 case SEQLOC_PACKED_PNT: /* packed pnt */
5012 head = SeqEdDeleteFromSeqLocPackedPnt (head, target, from, to, merge, changed);
5013 break;
5014 default:
5015 break;
5016 }
5017
5018 return head;
5019 }
5020
5021
5022 NLM_EXTERN SeqFeatPtr
5023 SeqEdGetNextFeature
5024 (BioseqPtr bsp,
5025 SeqFeatPtr curr,
5026 Uint1 seqFeatChoice,
5027 Uint1 featDefChoice,
5028 SeqMgrFeatContext PNTR context,
5029 Boolean byLabel,
5030 Boolean byLocusTag,
5031 Uint2 entityID)
5032
5033 {
5034 SMFeatItemPtr PNTR array = NULL;
5035 BioseqExtraPtr bspextra;
5036 Int4 i;
5037 SMFeatItemPtr item;
5038 Int4 num = 0;
5039 ObjMgrDataPtr omdp;
5040 ObjMgrPtr omp;
5041 Uint1 seqfeattype;
5042
5043 if (context == NULL) return NULL;
5044
5045 /* if curr is NULL, initialize context fields (in user's stack) */
5046
5047 if (curr == NULL) {
5048 if (bsp == NULL) return NULL;
5049 omdp = (ObjMgrDataPtr) bsp->omdp;
5050 if (omdp == NULL)
5051 {
5052 omp = ObjMgrWriteLock ();
5053 omdp = ObjMgrFindByData (omp, bsp);
5054 ObjMgrUnlock ();
5055 bsp->omdp = (Pointer) omdp;
5056 }
5057 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return NULL;
5058
5059 context->omdp = (Pointer) omdp;
5060 context->index = 0;
5061 }
5062
5063 omdp = (ObjMgrDataPtr) context->omdp;
5064 if (omdp == NULL) return NULL;
5065 bspextra = (BioseqExtraPtr) omdp->extradata;
5066 if (bspextra == NULL) return NULL;
5067 if (byLocusTag) {
5068 array = bspextra->genesByLocusTag;
5069 num = bspextra->numgenes;
5070 } else if (byLabel) {
5071 array = bspextra->featsByLabel;
5072 num = bspextra->numfeats;
5073 } else {
5074 array = bspextra->featsByPos;
5075 num = bspextra->numfeats;
5076 }
5077 if (array == NULL || num < 1) return NULL;
5078
5079 i = context->index;
5080
5081 /* now look for next appropriate feature */
5082
5083 while (i < num) {
5084 item = array [i];
5085 if (item != NULL) {
5086 curr = item->sfp;
5087 i++;
5088 if (curr != NULL) {
5089 seqfeattype = curr->data.choice;
5090 if ((seqFeatChoice == 0 || seqfeattype == seqFeatChoice) &&
5091 (featDefChoice == 0 || item->subtype == featDefChoice) &&
5092 (! item->ignore)) {
5093 context->entityID = entityID;
5094 context->itemID = item->itemID;
5095 context->sfp = curr;
5096 context->sap = item->sap;
5097 context->bsp = item->bsp;
5098 context->label = item->label;
5099 context->left = item->left;
5100 context->right = item->right;
5101 context->dnaStop = item->dnaStop;
5102 context->partialL = item->partialL;
5103 context->partialR = item->partialR;
5104 context->farloc = item->farloc;
5105 context->strand = item->strand;
5106 context->seqfeattype = seqfeattype;
5107 context->featdeftype = item->subtype;
5108 context->numivals = item->numivals;
5109 context->ivals = item->ivals;
5110 context->userdata = NULL;
5111 context->omdp = (Pointer) omdp;
5112 if (byLocusTag) {
5113 context->index = i;
5114 } else if (byLabel) {
5115 context->index = i;
5116 } else {
5117 context->index = item->index + 1;
5118 }
5119 return curr;
5120 }
5121 }
5122 }
5123 }
5124
5125 return NULL;
5126 }
5127
5128 static void ReindexExtendedFeatures (SeqEdJournalPtr sejp)
5129 {
5130 ValNodePtr vnp;
5131 AffectedFeatPtr afp;
5132 SeqFeatPtr affected_sfp, real_sfp;
5133 SeqMgrFeatContext fcontext;
5134
5135 for (vnp = sejp->affected_feats; vnp != NULL; vnp = vnp->next)
5136 {
5137 if (vnp->choice == 1 && vnp->data.ptrvalue != NULL)
5138 {
5139 afp = (AffectedFeatPtr) vnp->data.ptrvalue;
5140 affected_sfp = afp->feat_after;
5141 if (affected_sfp != NULL)
5142 {
5143 real_sfp = SeqMgrGetDesiredFeature (sejp->entityID, sejp->bsp, affected_sfp->idx.itemID, 0, NULL, &fcontext);
5144 SeqEdReindexFeature (real_sfp, sejp->bsp);
5145 }
5146 }
5147 }
5148 }
5149
5150
5151 static Boolean DoesSeqFeatMatch (SeqFeatPtr a, SeqFeatPtr b)
5152 {
5153 if (a == b) return TRUE;
5154 if (a == NULL || b == NULL) return FALSE;
5155
5156 if (a->data.choice != b->data.choice) return FALSE;
5157 if (SeqLocCompare (a->location, b->location) != SLC_A_EQ_B)
5158 {
5159 return FALSE;
5160 }
5161 return TRUE;
5162 }
5163
5164
5165 static void SeqEdInsertAdjustFeat (SeqFeatPtr sfp, SeqEdJournalPtr sejp, Int4 insert_point)
5166 {
5167 ValNodePtr vnp;
5168 AffectedFeatPtr afp = NULL;
5169 SeqLocPtr tmp_loc;
5170 Boolean split_mode;
5171 BioseqPtr bsp;
5172 Int4 insert_offset;
5173
5174 if (sfp == NULL || sejp == NULL)
5175 {
5176 return;
5177 }
5178
5179 bsp = GetParentForSegment (sejp->bsp, &insert_offset, NULL);
5180 if (bsp == NULL)
5181 {
5182 bsp = sejp->bsp;
5183 }
5184 else
5185 {
5186 insert_point += insert_offset;
5187 }
5188
5189 for (vnp = sejp->affected_feats; vnp != NULL && afp == NULL; vnp = vnp->next)
5190 {
5191 afp = (AffectedFeatPtr) vnp->data.ptrvalue;
5192 if (afp != NULL && DoesSeqFeatMatch (afp->feat_after, sfp))
5193 {
5194 vnp->choice = 1;
5195 }
5196 else
5197 {
5198 afp = NULL;
5199 }
5200 }
5201
5202 /* if we're inserting a gap and the feature is a coding region, need to split location
5203 * regardless of mode */
5204 split_mode = sejp->spliteditmode;
5205 if (sejp->action == eSeqEdInsertGap || sejp->action == eSeqEdDeleteGap
5206 && sfp->data.choice == SEQFEAT_CDREGION)
5207 {
5208 split_mode = TRUE;
5209 }
5210
5211 if (afp != NULL)
5212 {
5213 tmp_loc = sfp->location;
5214 sfp->location = afp->feat_before->location;
5215 afp->feat_before->location = tmp_loc;
5216 }
5217 else
5218 {
5219 sfp->location = SeqEdSeqLocInsert (sfp->location, bsp, insert_point,
5220 sejp->num_chars, split_mode, NULL);
5221 }
5222 switch (sfp->data.choice)
5223 {
5224 case SEQFEAT_CDREGION: /* cdregion */
5225 SeqEdInsertAdjustCdRgn (sfp, bsp, insert_point, sejp->num_chars,
5226 split_mode);
5227 break;
5228 case SEQFEAT_RNA:
5229 SeqEdInsertAdjustRNA (sfp, bsp, insert_point, sejp->num_chars,
5230 split_mode);
5231 break;
5232 default:
5233 break;
5234 }
5235 }
5236
5237 extern Boolean IsDeltaSeqGap (DeltaSeqPtr dsp)
5238 {
5239 SeqLitPtr slip;
5240 if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL)
5241 {
5242 return FALSE;
5243 }
5244 slip = (SeqLitPtr) (dsp->data.ptrvalue);
5245 if (slip->seq_data == NULL || slip->seq_data_type == Seq_code_gap)
5246 {
5247 return TRUE;
5248 }
5249 else
5250 {
5251 return FALSE;
5252 }
5253 }
5254
5255 extern Boolean IsDeltaSeqUnknownGap (DeltaSeqPtr dsp)
5256 {
5257 SeqLitPtr slip;
5258 if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL)
5259 {
5260 return FALSE;
5261 }
5262 slip = (SeqLitPtr) (dsp->data.ptrvalue);
5263 if ((slip->seq_data == NULL || slip->seq_data_type == Seq_code_gap) &&
5264 slip->fuzz != NULL && slip->fuzz->choice == 4)
5265 {
5266 return TRUE;
5267 }
5268 else
5269 {
5270 return FALSE;
5271 }
5272 }
5273
5274
5275 extern Boolean IsDeltaSeqKnownGap (DeltaSeqPtr dsp)
5276 {
5277 SeqLitPtr slip;
5278 if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL)
5279 {
5280 return FALSE;
5281 }
5282 slip = (SeqLitPtr) (dsp->data.ptrvalue);
5283 if ((slip->seq_data == NULL || slip->seq_data_type == Seq_code_gap) &&
5284 slip->fuzz == NULL)
5285 {
5286 return TRUE;
5287 }
5288 else
5289 {
5290 return FALSE;
5291 }
5292 }
5293
5294
5295 extern Boolean DoesSeqLitHaveGapTypeOrLinkage (SeqLitPtr slip)
5296 {
5297 if (slip != NULL && slip->seq_data_type == Seq_code_gap) {
5298 return TRUE;
5299 } else {
5300 return FALSE;
5301 }
5302 }
5303
5304 extern Boolean DoesDeltaSeqHaveGapTypeOrLinkage (DeltaSeqPtr dsp)
5305 {
5306 if (dsp != NULL && dsp->choice == 2) {
5307 return DoesSeqLitHaveGapTypeOrLinkage ((SeqLitPtr) dsp->data.ptrvalue);
5308 } else {
5309 return FALSE;
5310 }
5311 }
5312
5313 static DeltaSeqPtr GetDeltaSeqForOffset (BioseqPtr bsp, Int4 offset, Int4Ptr seqstart)
5314 {
5315 Int4 curr_pos = 0;
5316 Boolean found = FALSE;
5317 SeqLocPtr slp;
5318 SeqLitPtr slip = NULL;
5319 DeltaSeqPtr dsp;
5320
5321 if (bsp == NULL || bsp->repr != Seq_repr_delta
5322 || bsp->seq_ext_type != 4 || bsp->seq_ext == NULL
5323 || offset < 0)
5324 {
5325 return NULL;
5326 }
5327
5328 if (seqstart != NULL)
5329 {
5330 *seqstart = 0;
5331 }
5332 dsp = (DeltaSeqPtr) bsp->seq_ext;
5333 while (dsp != NULL && !found)
5334 {
5335 if (dsp->data.ptrvalue == NULL) continue;
5336 if (dsp->choice == 1)
5337 { /* SeqLoc */
5338 slp = (SeqLocPtr)(dsp->data.ptrvalue);
5339 curr_pos += SeqLocLen (slp);
5340 }
5341 else if (dsp->choice == 2)
5342 {
5343 slip = (SeqLitPtr) (dsp->data.ptrvalue);
5344 curr_pos += slip->length;
5345 }
5346 if (curr_pos > offset
5347 || (curr_pos == offset
5348 && (dsp->next == NULL || ! IsDeltaSeqGap (dsp))))
5349 {
5350 found = TRUE;
5351 }
5352 else
5353 {
5354 if (seqstart != NULL)
5355 {
5356 *seqstart = curr_pos;
5357 }
5358 dsp=dsp->next;
5359 }
5360 }
5361
5362 return dsp;
5363 }
5364
5365 static Boolean
5366 SeqEdInsertByteStore
5367 (ByteStorePtr seq_data,
5368 Int4 insert_point,
5369 CharPtr char_data,
5370 Int4 num_chars,
5371 Uint1 moltype)
5372 {
5373 Char ch;
5374 Int4 i;
5375
5376 if (seq_data == NULL || insert_point < 0 || char_data == NULL || num_chars < 1)
5377 {
5378 return FALSE;
5379 }
5380 BSSeek(seq_data, insert_point, SEEK_SET);
5381 Nlm_BSAdd(seq_data, num_chars, FALSE);
5382 BSSeek(seq_data, insert_point, SEEK_SET);
5383 for (i = 0; i < num_chars; i++)
5384 {
5385 ch = TO_UPPER (char_data [i]);
5386 if ( ISA_na (moltype) ) {
5387 if (ch == 'U') ch = 'T';
5388 if (ch == 'X') ch = 'N';
5389 if ( StringChr ("EFIJLOPQXZ-.*", ch) == NULL ) {
5390 BSPutByte ( seq_data, (Int2) ch );
5391 }
5392 }
5393 else
5394 {
5395 if ( StringChr("JO-.", ch) == NULL ) {
5396 BSPutByte ( seq_data, (Int2) ch );
5397 }
5398 }
5399 }
5400 return TRUE;
5401 }
5402
5403 static Boolean SeqEdInsertRaw (SeqEdJournalPtr sejp, Int4 insert_point)
5404 {
5405 Boolean rval;
5406 BioseqPtr bsp;
5407
5408 if (sejp == NULL || sejp->bsp == NULL || sejp->bsp->repr != Seq_repr_raw
5409 || sejp->char_data == NULL || sejp->num_chars == 0 || insert_point < 0)
5410 {
5411 return FALSE;
5412 }
5413 if (sejp->bsp->seq_data_type == Seq_code_gap) return FALSE;
5414
5415 rval = SeqEdInsertByteStore ((ByteStorePtr) sejp->bsp->seq_data, insert_point,
5416 sejp->char_data, sejp->num_chars, sejp->moltype);
5417
5418 if (rval)
5419 {
5420 sejp->bsp->length += sejp->num_chars;
5421 bsp = GetParentForSegment (sejp->bsp, NULL, NULL);
5422 if (bsp != NULL)
5423 {
5424 bsp->length += sejp->num_chars;
5425 }
5426 }
5427 return rval;
5428 }
5429
5430 static Boolean
5431 SeqEdInsertIntoDeltaGap
5432 (DeltaSeqPtr dsp,
5433 SeqEdJournalPtr sejp,
5434 Int4 insert_point)
5435 {
5436 SeqLitPtr slip, slip_data, slip_second_gap;
5437 Boolean rval = FALSE;
5438 DeltaSeqPtr dsp_data, dsp_second_gap;
5439 IntFuzzPtr ifp = NULL;
5440
5441 if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL)
5442 {
5443 return rval;
5444 }
5445 slip = (SeqLitPtr) dsp->data.ptrvalue;
5446 if (slip->seq_data != NULL && slip->seq_data_type != Seq_code_gap)
5447 {
5448 return rval;
5449 }
5450
5451 if (slip->fuzz != NULL && slip->fuzz->choice == 4)
5452 {
5453 ifp = IntFuzzNew ();
5454 ifp->choice = 4;
5455 }
5456
5457 /* split the gap in two and create a new DeltaSeqPtr in the middle */
5458 slip_data = SeqLitNew ();
5459 slip_data->seq_data_type = Seq_code_iupacna;
5460 slip_data->seq_data = (SeqDataPtr) BSNew (sejp->num_chars);
5461 rval = SeqEdInsertByteStore ((ByteStorePtr) slip_data->seq_data, 0,
5462 sejp->char_data, sejp->num_chars, sejp->moltype);
5463 if (rval)
5464 {
5465 slip_data->length = sejp->num_chars;
5466 /* create second gap */
5467 slip_second_gap = SeqLitNew ();
5468 slip_second_gap->length = slip->length - insert_point;
5469 slip_second_gap->fuzz = ifp;
5470 /* truncate first gap */
5471 slip->length = insert_point;
5472 dsp_data = ValNodeNew (NULL);
5473 dsp_data->choice = 2;
5474 dsp_data->data.ptrvalue = slip_data;
5475 dsp_second_gap = ValNodeNew (NULL);
5476 dsp_second_gap->choice = 2;
5477 dsp_second_gap->data.ptrvalue = slip_second_gap;
5478 dsp_second_gap->next = dsp->next;
5479 dsp_data->next = dsp_second_gap;
5480 dsp->next = dsp_data;
5481 }
5482 return rval;
5483 }
5484
5485 static Boolean IsInsertAllNs (SeqEdJournalPtr sejp)
5486 {
5487 Int4 k;
5488
5489 if (sejp == NULL || sejp->char_data == NULL || sejp->num_chars < 1)
5490 {
5491 return FALSE;
5492 }
5493
5494 for (k = 0; k < sejp->num_chars; k++)
5495 {
5496 if (TO_LOWER (sejp->char_data [k]) != 'n')
5497 {
5498 return FALSE;
5499 }
5500 }
5501 return TRUE;
5502 }
5503
5504 static Boolean SeqEdInsertDelta (SeqEdJournalPtr sejp, Int4 insert_point)
5505 {
5506 DeltaSeqPtr dsp;
5507 SeqLitPtr slip;
5508 Int4 seqstart = 0;
5509 ByteStorePtr bs_new;
5510 Boolean rval;
5511
5512 if (sejp == NULL || sejp->bsp == NULL || sejp->bsp->repr != Seq_repr_delta
5513 || sejp->bsp->seq_ext_type != 4
5514 || sejp->char_data == NULL || sejp->num_chars == 0
5515 || insert_point < 0)
5516 {
5517 return FALSE;
5518 }
5519
5520 dsp = GetDeltaSeqForOffset (sejp->bsp, insert_point, &seqstart);
5521
5522 if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL)
5523 {
5524 return FALSE;
5525 }
5526
5527 slip = (SeqLitPtr) dsp->data.ptrvalue;
5528 insert_point -= seqstart;
5529
5530 if (IsDeltaSeqGap (dsp))
5531 {
5532 if (slip->fuzz == NULL && IsInsertAllNs (sejp))
5533 {
5534 slip->length += sejp->num_chars;
5535 rval = TRUE;
5536 }
5537 else
5538 {
5539 rval = SeqEdInsertIntoDeltaGap (dsp, sejp, insert_point);
5540 }
5541 }
5542 else
5543 {
5544 if (slip->seq_data_type != Seq_code_iupacna && slip->seq_data_type != Seq_code_gap)
5545 {
5546 bs_new = BSConvertSeq((ByteStorePtr) slip->seq_data, Seq_code_iupacna,
5547 slip->seq_data_type,
5548 slip->length);
5549 slip->seq_data_type = Seq_code_iupacna;
5550 slip->seq_data = (SeqDataPtr) bs_new;
5551 }
5552
5553 rval = SeqEdInsertByteStore ((ByteStorePtr) slip->seq_data, insert_point,
5554 sejp->char_data, sejp->num_chars,
5555 sejp->moltype);
5556 if (rval)
5557 {
5558 slip->length += sejp->num_chars;
5559 }
5560 }
5561
5562 if (rval)
5563 {
5564 sejp->bsp->length += sejp->num_chars;
5565 }
5566 return rval;
5567 }
5568
5569 static Boolean
5570 SeqEdInsertGap (SeqEdJournalPtr sejp, Int4 insert_point)
5571 {
5572 DeltaSeqPtr dsp, dsp_gap, dsp_after;
5573 Int4 seqstart = 0;
5574 SeqLitPtr slip, slip_before, slip_gap, slip_after;
5575 ByteStorePtr bs_new;
5576
5577 if (sejp == NULL || sejp->bsp == NULL || sejp->bsp->repr != Seq_repr_delta
5578 || sejp->bsp->seq_ext_type != 4
5579 || sejp->char_data == NULL || sejp->num_chars == 0
5580 || insert_point < 0)
5581 {
5582 return FALSE;
5583 }
5584
5585 dsp = GetDeltaSeqForOffset (sejp->bsp, insert_point, &seqstart);
5586
5587 if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL)
5588 {
5589 return FALSE;
5590 }
5591
5592 slip_gap = SeqLitNew ();
5593 slip_gap->seq_data_type = 0;
5594 slip_gap->seq_data = NULL;
5595 slip_gap->length = sejp->num_chars;
5596 if (sejp->unknown_gap)
5597 {
5598 slip_gap->fuzz = IntFuzzNew ();
5599 slip_gap->fuzz->choice = 4;
5600 }
5601
5602 slip = (SeqLitPtr) (dsp->data.ptrvalue);
5603
5604 /* make insert_point relative to start of this SeqLit */
5605 insert_point -= seqstart;
5606
5607 if (insert_point == 0)
5608 {
5609 /* insert gap before */
5610 dsp_after = ValNodeNew (NULL);
5611 dsp_after->choice = 2;
5612 dsp_after->data.ptrvalue = slip;
5613 dsp_after->next = dsp->next;
5614 dsp->next = dsp_after;
5615 dsp->data.ptrvalue = slip_gap;
5616 }
5617 else if (insert_point == slip->length)
5618 {
5619 /* insert gap after */
5620 dsp_after = ValNodeNew (NULL);
5621 dsp_after->choice = 2;
5622 dsp_after->data.ptrvalue = slip_gap;
5623 dsp_after->next = dsp->next;
5624 dsp->next = dsp_after;
5625 }
5626 else if (IsDeltaSeqUnknownGap (dsp))
5627 {
5628 /* can't insert gap inside gap of unknown length */
5629 slip_gap = SeqLitFree (slip_gap);
5630 return FALSE;
5631 }
5632 else if (IsDeltaSeqGap (dsp) && !sejp->unknown_gap)
5633 {
5634 slip_gap = SeqLitFree (slip_gap);
5635 slip->length += sejp->num_chars;
5636 }
5637 else
5638 {
5639 slip_before = SeqLitNew ();
5640 slip_before->seq_data_type = Seq_code_iupacna;
5641 slip_before->length = insert_point;
5642
5643 slip_after = SeqLitNew ();
5644 slip_after->seq_data_type = Seq_code_iupacna;
5645 slip_after->length = slip->length - insert_point;
5646
5647 if (slip->seq_data != NULL && slip->seq_data_type != Seq_code_gap)
5648 {
5649 if (slip->seq_data_type != Seq_code_iupacna && slip->seq_data_type != Seq_code_gap)
5650 {
5651 bs_new = BSConvertSeq((ByteStorePtr) slip->seq_data, Seq_code_iupacna,
5652 slip->seq_data_type,
5653 slip->length);
5654 slip->seq_data_type = Seq_code_iupacna;
5655 slip->seq_data = (SeqDataPtr) bs_new;
5656 }
5657 slip_before->seq_data = (SeqDataPtr) BSNew (slip_before->length);
5658 slip_after->seq_data = (SeqDataPtr) BSNew (slip_after->length);
5659
5660 BSSeek((ByteStorePtr) slip->seq_data, 0, SEEK_SET);
5661 BSInsertFromBS ((ByteStorePtr) slip_before->seq_data, (ByteStorePtr) slip->seq_data, slip_before->length);
5662 BSInsertFromBS ((ByteStorePtr) slip_after->seq_data, (ByteStorePtr) slip->seq_data, slip_after->length);
5663 }
5664
5665 dsp_after = ValNodeNew (NULL);
5666 dsp_after->choice = 2;
5667 dsp_after->data.ptrvalue = slip_after;
5668 dsp_after->next = dsp->next;
5669
5670 dsp_gap = ValNodeNew (NULL);
5671 dsp_gap->choice = 2;
5672 dsp_gap->data.ptrvalue = slip_gap;
5673 dsp_gap->next = dsp_after;
5674
5675 dsp->data.ptrvalue = slip_before;
5676 dsp->next = dsp_gap;
5677 slip = SeqLitFree (slip);
5678 }
5679
5680 sejp->bsp->length += sejp->num_chars;
5681
5682 return TRUE;
5683 }
5684
5685 NLM_EXTERN Boolean
5686 SeqEdInsert (SeqEdJournalPtr sejp)
5687 {
5688 Int4 len;
5689 SeqFeatPtr sfp;
5690 SeqMgrFeatContext fcontext;
5691 ValNodePtr prods, vnp;
5692 BioseqContextPtr bcp;
5693 Int4 insert_point;
5694 Boolean recreated_feats = FALSE;
5695 Boolean rval = FALSE;
5696 BioseqPtr bsp;
5697 Int4 insert_offset = 0;
5698
5699 if (sejp == NULL || sejp->bsp == NULL
5700 || sejp->char_data == NULL || sejp->num_chars == 0)
5701 {
5702 return FALSE;
5703 }
5704
5705 len = BioseqGetLen(sejp->bsp);
5706 insert_point = sejp->offset;
5707
5708 if (insert_point == LAST_RESIDUE)
5709 {
5710 insert_point = len - 1;
5711 }
5712 else if (insert_point == APPEND_RESIDUE)
5713 {
5714 insert_point = len;
5715 }
5716
5717 if ((insert_point < 0) || (insert_point > len)) return FALSE;
5718
5719 if (sejp->action == eSeqEdInsertGap || sejp->action == eSeqEdDeleteGap)
5720 {
5721 rval = SeqEdInsertGap (sejp, insert_point);
5722 }
5723 else if (sejp->bsp->repr == Seq_repr_raw)
5724 {
5725 rval = SeqEdInsertRaw (sejp, insert_point);
5726 }
5727 else if (sejp->bsp->repr == Seq_repr_delta)
5728 {
5729 rval = SeqEdInsertDelta (sejp, insert_point);
5730 }
5731
5732 if (!rval)
5733 {
5734 return FALSE;
5735 }
5736
5737 /* fix features */
5738 if (sejp->entityID > 0 && SeqMgrFeaturesAreIndexed (sejp->entityID))
5739 {
5740 sfp = NULL;
5741 bsp = GetParentForSegment (sejp->bsp, &insert_offset, NULL);
5742 if (bsp == NULL)
5743 {
5744 bsp = sejp->bsp;
5745 }
5746
5747 while ((sfp = SeqEdGetNextFeature (bsp, sfp, 0, 0, &fcontext, FALSE, FALSE, sejp->entityID)) != NULL)
5748 {
5749 SeqEdInsertAdjustFeat (sfp, sejp, insert_point);
5750 }
5751
5752 if (bsp != sejp->bsp)
5753 {
5754 insert_point += insert_offset;
5755 }
5756
5757 /* adjust features pointing by product */
5758 prods = SeqMgrGetSfpProductList (sejp->bsp);
5759 for (vnp = prods; vnp != NULL; vnp = vnp->next)
5760 {
5761 sfp = (SeqFeatPtr) vnp->data.ptrvalue;
5762 if (sfp == NULL) continue;
5763 sfp->product = SeqEdSeqLocInsert (sfp->product, bsp, insert_point, sejp->num_chars, sejp->spliteditmode, NULL);
5764 }
5765 } else {
5766 bcp = BioseqContextNew(sejp->bsp);
5767 sfp = NULL;
5768 /* adjust features pointing by location */
5769 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL)
5770 {
5771 SeqEdInsertAdjustFeat (sfp, sejp, insert_point);
5772 }
5773 sfp = NULL;
5774 /* adjust features pointing by product */
5775 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 1)) != NULL)
5776 {
5777 sfp->product = SeqEdSeqLocInsert (sfp->product, sejp->bsp, insert_point, sejp->num_chars, sejp->spliteditmode, NULL);
5778 }
5779 BioseqContextFree(bcp);
5780 }
5781
5782 recreated_feats = SeqEdRecreateDeletedFeats (sejp);
5783
5784 if (recreated_feats)
5785 {
5786 SeqMgrIndexFeatures (sejp->entityID, NULL);
5787 }
5788 else
5789 {
5790 SeqEdReindexAffectedFeatures (sejp->offset, sejp->num_chars,
5791 sejp->spliteditmode, sejp->bsp);
5792 ReindexExtendedFeatures (sejp);
5793 }
5794 sejp->affected_feats = SeqEdJournalAffectedFeatsFree (sejp->affected_feats);
5795 return TRUE;
5796 }
5797
5798
5799 /* This section contains code for deleting from sequences and feature locations, adapted from
5800 * that found in edutil.c */
5801
5802 /*****************************************************************************
5803 *
5804 * SeqEdSeqFeatDelete()
5805 * 0 = no changes made to location or product
5806 * 1 = changes made but feature still has some location
5807 * 2 = all of sfp->location in deleted interval
5808 *
5809 * if (merge)
5810 * 1) correct numbers > to by subtraction
5811 * 2) do not split intervals spanning the deletion
5812 * else
5813 * 1) do not change numbers > to
5814 * 2) split intervals which span the deletions
5815 *
5816 *****************************************************************************/
5817 NLM_EXTERN Int2 LIBCALL SeqEdSeqFeatDelete (SeqFeatPtr sfp, BioseqPtr target, Int4 from, Int4 to, Boolean merge)
5818 {
5819 ValNode vn;
5820 SeqLocPtr tloc;
5821 SeqInt si;
5822 Boolean changed = FALSE, tmpbool = FALSE;
5823 CdRegionPtr crp;
5824 CodeBreakPtr cbp, prevcbp, nextcbp;
5825 RnaRefPtr rrp;
5826 tRNAPtr trp;
5827
5828 tloc = &vn;
5829 MemSet((Pointer)tloc, 0, sizeof(ValNode));
5830 MemSet((Pointer)&si, 0, sizeof(SeqInt));
5831 tloc->choice = SEQLOC_INT;
5832 tloc->data.ptrvalue = (Pointer)(&si);
5833 si.id = target->id;
5834 si.from = from;
5835 si.to = to;
5836
5837 sfp->location = SeqEdSeqLocDelete (sfp->location, target, from, to, merge, &changed, NULL, NULL);
5838 sfp->product = SeqEdSeqLocDelete(sfp->product, target, from, to, merge, &changed, NULL, NULL);
5839
5840 if (sfp->location == NULL)
5841 return 2;
5842
5843 switch (sfp->data.choice)
5844 {
5845 case SEQFEAT_CDREGION: /* cdregion */
5846 crp = (CdRegionPtr)(sfp->data.value.ptrvalue);
5847 prevcbp = NULL;
5848 for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
5849 {
5850 nextcbp = cbp->next;
5851 cbp->loc = SeqEdSeqLocDelete(cbp->loc, target, from, to, merge, &tmpbool, NULL, NULL);
5852 if (cbp->loc == NULL)
5853 {
5854 if (prevcbp != NULL)
5855 prevcbp->next = nextcbp;
5856 else
5857 crp->code_break = nextcbp;
5858 cbp->next = NULL;
5859 CodeBreakFree(cbp);
5860 }
5861 else
5862 prevcbp = cbp;
5863 }
5864 break;
5865 case SEQFEAT_RNA:
5866 rrp = (RnaRefPtr)(sfp->data.value.ptrvalue);
5867 if (rrp->ext.choice == 2) /* tRNA */
5868 {
5869 trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
5870 if (trp->anticodon != NULL)
5871 {
5872 trp->anticodon = SeqEdSeqLocDelete(trp->anticodon, target, from, to, merge, &tmpbool, NULL, NULL);
5873 }
5874 }
5875 break;
5876 default:
5877 break;
5878 }
5879
5880 if (changed)
5881 {
5882 return 1;
5883 }
5884 else
5885 return 0;
5886 }
5887
5888 /*
5889 static Boolean SeqEdDeleteFromDeltaSeq (DeltaSeqPtr dsp, Int4 from, Int4 to)
5890 {
5891 ByteStorePtr bs_new;
5892 SeqLitPtr slip;
5893
5894 if (dsp == NULL || dsp->choice != 2 || dsp->data.ptrvalue == NULL)
5895 {
5896 return FALSE;
5897 }
5898
5899 slip = (SeqLitPtr) dsp->data.ptrvalue;
5900
5901 if (from < 0 || to > slip->length)
5902 {
5903 return FALSE;
5904 }
5905 if (to < 0)
5906 {
5907 to = slip->length - 1;
5908 }
5909
5910 if (! IsDeltaSeqGap (dsp))
5911 {
5912 if (slip->seq_data_type != Seq_code_iupacna)
5913 {
5914 bs_new = BSConvertSeq(slip->seq_data, Seq_code_iupacna,
5915 slip->seq_data_type,
5916 slip->length);
5917 slip->seq_data_type = Seq_code_iupacna;
5918 slip->seq_data = bs_new;
5919 }
5920 BSSeek(slip->seq_data, from, SEEK_SET);
5921 Nlm_BSDelete (slip->seq_data, to - from + 1);
5922 }
5923 slip->length -= (to - from + 1);
5924
5925 return TRUE;
5926 }
5927 */
5928
5929 static void DeleteFromSeqLit (SeqLitPtr slip, Int4 from, Int4 to)
5930 {
5931 ByteStorePtr bs_new;
5932
5933 if (slip == NULL)
5934 {
5935 return;
5936 }
5937 if (from < 0)
5938 {
5939 from = 0;
5940 }
5941
5942 if (to > slip->length - 1 || to < 0)
5943 {
5944 to = slip->length - 1;
5945 }
5946
5947 if (slip->seq_data != NULL && slip->seq_data_type != Seq_code_gap)
5948 {
5949 if (slip->seq_data_type != Seq_code_iupacna && slip->seq_data_type != Seq_code_gap)
5950 {
5951 bs_new = BSConvertSeq((ByteStorePtr) slip->seq_data, Seq_code_iupacna,
5952 slip->seq_data_type,
5953 slip->length);
5954 slip->seq_data_type = Seq_code_iupacna;
5955 slip->seq_data = (SeqDataPtr) bs_new;
5956 }
5957 BSSeek((ByteStorePtr) slip->seq_data, from, SEEK_SET);
5958 Nlm_BSDelete ((ByteStorePtr) slip->seq_data, to - from + 1);
5959 }
5960 slip->length -= (to - from + 1);
5961 }
5962
5963 static Boolean SeqEdDeleteFromDeltaBsp (BioseqPtr bsp, Int4 from, Int4 to)
5964 {
5965 Boolean retval = FALSE;
5966 DeltaSeqPtr dsp, dsp_next, prev_dsp;
5967 SeqLitPtr slip;
5968 Int4 curr_pos = 0;
5969 Int4 del_to, del_from;
5970 Int4 piece_len;
5971 SeqLocPtr slp;
5972
5973 if (bsp == NULL || bsp->repr != Seq_repr_delta
5974 || bsp->seq_ext_type != 4 || bsp->seq_ext == NULL)
5975 {
5976 return retval;
5977 }
5978
5979 prev_dsp = NULL;
5980 dsp = (DeltaSeqPtr) bsp->seq_ext;
5981 while (dsp != NULL && curr_pos <= to)
5982 {
5983 dsp_next = dsp->next;
5984 piece_len = 0;
5985 /* remove empty dsps */
5986 if (dsp->data.ptrvalue == NULL)
5987 {
5988 /* skip */
5989 prev_dsp = dsp;
5990 }
5991 else if (dsp->choice == 1)
5992 { /* SeqLoc */
5993 slp = (SeqLocPtr)(dsp->data.ptrvalue);
5994 piece_len = SeqLocLen (slp);
5995 prev_dsp = dsp;
5996 }
5997 else if (dsp->choice == 2)
5998 {
5999 slip = (SeqLitPtr) (dsp->data.ptrvalue);
6000 piece_len = slip->length;
6001 if (curr_pos + piece_len > from)
6002 {
6003 if (from > curr_pos)
6004 {
6005 del_from = from - curr_pos;
6006 }
6007 else
6008 {
6009 del_from = 0;
6010 }
6011
6012 if (to - curr_pos < slip->length - 1)
6013 {
6014 del_to = to - curr_pos;
6015 }
6016 else
6017 {
6018 del_to = slip->length - 1;
6019 }
6020 DeleteFromSeqLit (slip, del_from, del_to);
6021
6022 /* remove empty delta seq parts */
6023 if (slip->length == 0)
6024 {
6025 if (prev_dsp == NULL)
6026 {
6027 bsp->seq_ext = dsp->next;
6028 }
6029 else
6030 {
6031 prev_dsp->next = dsp->next;
6032 }
6033 dsp->next = NULL;
6034 slip = SeqLitFree (slip);
6035 dsp = ValNodeFree (dsp);
6036 }
6037 else
6038 {
6039 prev_dsp = dsp;
6040 }
6041 }
6042 else
6043 {
6044 prev_dsp = dsp;
6045 }
6046 }
6047 curr_pos += piece_len;
6048 dsp = dsp_next;
6049 }
6050 return TRUE;
6051 }
6052
6053 static Boolean SeqEdDeleteFromSegOrDeltaBsp (BioseqPtr bsp, Int4 from, Int4 to)
6054 {
6055 SeqLocPtr tmp, head;
6056 DeltaSeqPtr tdsp = NULL;
6057 SeqLocPtr PNTR newheadptr;
6058 Int4 totlen, templen, tfrom, tto, diff1, diff2;
6059 SeqLocPtr slp, tloc, newhead, prev;
6060 Boolean retval = FALSE;
6061 SeqInt si;
6062 ValNode vn;
6063
6064 if (bsp == NULL) return retval;
6065 if (bsp->repr != Seq_repr_seg && bsp->repr != Seq_repr_delta) return retval;
6066
6067 head = ValNodeNew(NULL); /* allocate to facilitate SeqLocFree */
6068 head->choice = SEQLOC_MIX; /* make a SeqLoc out of the extension */
6069 if (bsp->repr == Seq_repr_seg)
6070 head->data.ptrvalue = bsp->seq_ext;
6071 else
6072 {
6073 tdsp = (DeltaSeqPtr)(bsp->seq_ext);
6074 head->data.ptrvalue = DeltaSeqsToSeqLocs(tdsp);
6075 }
6076
6077 newhead = NULL;
6078 newheadptr = &newhead;
6079
6080 tloc = &vn;
6081 MemSet((Pointer)tloc, 0, sizeof(ValNode));
6082 MemSet((Pointer)&si, 0, sizeof(SeqInt));
6083 tloc->choice = SEQLOC_INT;
6084 tloc->data.ptrvalue = (Pointer)(&si);
6085
6086 slp = NULL;
6087 totlen = 0;
6088 while ((slp = SeqLocFindNext(head, slp)) != NULL)
6089 {
6090 templen = SeqLocLen(slp);
6091 tfrom = SeqLocStart(slp);
6092 tto = SeqLocStop(slp);
6093
6094 if (((totlen + templen - 1) < from) || /* before cut */
6095 (totlen > to)) /* after cut */
6096 {
6097 tmp = SeqLocAdd(newheadptr, slp, TRUE, TRUE); /* add whole SeqLoc */
6098 }
6099 else
6100 {
6101 retval = TRUE; /* will modify or drop interval */
6102 diff1 = from - totlen; /* partial beginning? */
6103 diff2 = (templen + totlen - 1) - to; /* partial end? */
6104 si.id = SeqLocId(slp);
6105 si.strand = SeqLocStrand(slp);
6106
6107 if (diff1 > 0) /* partial start */
6108 {
6109 if (si.strand != Seq_strand_minus)
6110 {
6111 si.from = tfrom;
6112 si.to = tfrom + diff1 - 1;
6113 }
6114 else
6115 {
6116 si.from = tto - diff1 + 1;
6117 si.to = tto;
6118 }
6119 tmp = SeqLocAdd(newheadptr, tloc, TRUE, TRUE);
6120 }
6121
6122 if (diff2 > 0) /* partial end */
6123 {
6124 if (si.strand != Seq_strand_minus)
6125 {
6126 si.from = tto - diff2 + 1;
6127 si.to = tto;
6128 }
6129 else
6130 {
6131 si.from = tfrom;
6132 si.to = tfrom + diff2 - 1;
6133 }
6134 tmp = SeqLocAdd(newheadptr, tloc, TRUE, TRUE);
6135 }
6136 }
6137 totlen += templen;
6138 }
6139
6140 prev = NULL;
6141 for (tmp = newhead; tmp != NULL; tmp = tmp->next)
6142 {
6143 if (tmp->next == NULL) /* last one */
6144 {
6145 if (tmp->choice == SEQLOC_NULL)
6146 {
6147 if (prev != NULL)
6148 prev->next = NULL;
6149 else /* only a NULL left */
6150 {
6151 newhead = NULL;
6152 }
6153 MemFree(tmp);
6154 break;
6155 }
6156 }
6157 prev = tmp;
6158 }
6159
6160 if (bsp->repr == Seq_repr_seg)
6161 bsp->seq_ext = newhead;
6162 else
6163 {
6164 bsp->seq_ext = SeqLocsToDeltaSeqs(tdsp, newhead);
6165 DeltaSeqSetFree(tdsp);
6166 SeqLocSetFree(newhead);
6167 }
6168 SeqLocFree(head);
6169 return TRUE;
6170 }
6171
6172 static Boolean SeqEdDeleteFromMapBioseq (BioseqPtr bsp, Int4 from, Int4 to)
6173 {
6174 SeqFeatPtr sfpcurr, sfpnext, sfpprev;
6175 Int2 dropped;
6176
6177 if (bsp == NULL || bsp->repr != Seq_repr_map) return FALSE;
6178
6179 sfpprev = NULL;
6180 sfpnext = NULL;
6181 sfpcurr = (SeqFeatPtr)(bsp->seq_ext);
6182 bsp->seq_ext = NULL;
6183 for (; sfpcurr != NULL; sfpcurr = sfpnext)
6184 {
6185 sfpnext = sfpcurr->next;
6186 dropped = SeqEdSeqFeatDelete(sfpcurr, bsp, from, to, TRUE);
6187 if (dropped == 2) /* completely gone */
6188 {
6189 SeqFeatFree(sfpcurr);
6190 }
6191 else
6192 {
6193 if (sfpprev == NULL)
6194 bsp->seq_ext = (Pointer)sfpcurr;
6195 else
6196 sfpprev->next = sfpcurr;
6197 sfpcurr->next = NULL;
6198 sfpprev = sfpcurr;
6199 }
6200 }
6201 return TRUE;
6202 }
6203
6204 static SeqLocPtr FreeSeqLocList (SeqLocPtr slp)
6205 {
6206 if (slp == NULL)
6207 {
6208 return NULL;
6209 }
6210 slp->next = SeqLocFree (slp->next);
6211 slp = SeqLocFree (slp);
6212 return slp;
6213 }
6214
6215 static Boolean ReStitchLocation (Int4 delete_point, SeqFeatPtr sfp)
6216 {
6217 Int4 this_start, this_stop, next_start, next_stop;
6218 SeqLocPtr this_slp, next_slp, loc_list = NULL, tmp_slp, last_slp = NULL, tmp_next;
6219 SeqIdPtr this_id, next_id;
6220 Boolean merged = FALSE;
6221 Uint1 this_strand, next_strand;
6222
6223 if (sfp->location == NULL)
6224 {
6225 return FALSE;
6226 }
6227
6228 this_start = SeqLocStart (sfp->location);
6229 this_stop = SeqLocStop (sfp->location);
6230 if (delete_point <= this_start || delete_point >= this_stop)
6231 {
6232 return FALSE;
6233 }
6234
6235 this_slp = SeqLocFindNext (sfp->location, NULL);
6236 if (this_slp == NULL)
6237 {
6238 return FALSE;
6239 }
6240 next_slp = SeqLocFindNext (sfp->location, this_slp);
6241
6242 while (next_slp != NULL)
6243 {
6244 this_start = SeqLocStart (this_slp);
6245 this_stop = SeqLocStop (this_slp);
6246 this_id = SeqLocId (this_slp);
6247 this_strand = SeqLocStrand (this_slp);
6248 next_start = SeqLocStart (next_slp);
6249 next_stop = SeqLocStop (next_slp);
6250 next_id = SeqLocId (next_slp);
6251 next_strand = SeqLocStrand (next_slp);
6252 if (this_stop + 1 == next_start
6253 && next_start == delete_point
6254 && SeqIdComp (this_id, next_id) == SIC_YES
6255 && this_strand == next_strand)
6256 {
6257 tmp_slp = SeqLocIntNew (this_start, next_stop, this_strand, this_id);
6258 next_slp = SeqLocFindNext (sfp->location, next_slp);
6259 merged = TRUE;
6260 }
6261 else
6262 {
6263 tmp_next = this_slp->next;
6264 this_slp->next = NULL;
6265 tmp_slp = SeqLocCopy (this_slp);
6266 this_slp->next = tmp_next;
6267 }
6268 if (tmp_slp != NULL)
6269 {
6270 if (last_slp == NULL)
6271 {
6272 loc_list = tmp_slp;
6273 }
6274 else
6275 {
6276 last_slp->next = tmp_slp;
6277 }
6278 last_slp = tmp_slp;
6279 }
6280
6281 this_slp = next_slp;
6282 if (this_slp != NULL)
6283 {
6284 next_slp = SeqLocFindNext (sfp->location, this_slp);
6285 }
6286 }
6287 if (merged && loc_list != NULL)
6288 {
6289 if (this_slp != NULL)
6290 {
6291 this_start = SeqLocStart (this_slp);
6292 this_stop = SeqLocStop (this_slp);
6293 tmp_next = this_slp->next;
6294 this_slp->next = NULL;
6295 tmp_slp = SeqLocCopy (this_slp);
6296 this_slp->next = tmp_next;
6297 if (last_slp == NULL)
6298 {
6299 loc_list = tmp_slp;
6300 }
6301 else
6302 {
6303 last_slp->next = tmp_slp;
6304 }
6305 }
6306 if (loc_list->next == NULL)
6307 {
6308 sfp->location = SeqLocFree (sfp->location);
6309 sfp->location = loc_list;
6310 }
6311 else
6312 {
6313 /* already mix, just need to replace list */
6314 sfp->location->data.ptrvalue = FreeSeqLocList (sfp->location->data.ptrvalue);
6315 sfp->location->data.ptrvalue = loc_list;
6316 }
6317 return TRUE;
6318 }
6319 else
6320 {
6321 loc_list = FreeSeqLocList (loc_list);
6322 return FALSE;
6323 }
6324 }
6325
6326 /* ideally, this should take a SeqJournalEntry and perform the deletion.
6327 * We will always be deleting a contiguous section of characters.
6328 * This function will only delete from the specified Bioseq, so there should
6329 * be no need to call BioseqFind (which is expensive).
6330 */
6331 NLM_EXTERN Boolean SeqEdDeleteFromBsp (SeqEdJournalPtr sejp, BoolPtr pfeats_deleted)
6332 {
6333 Boolean retval = FALSE;
6334 Boolean feats_altered = FALSE;
6335 Int4 deleted;
6336 SeqFeatPtr sfp;
6337 SeqMgrFeatContext fcontext;
6338 BioseqContextPtr bcp;
6339 Int2 feat_change;
6340 Boolean feats_deleted = FALSE;
6341 SeqFeatPtr tmp_sfp;
6342 AffectedFeatPtr afp;
6343 Boolean merge_mode;
6344 Boolean location_restitched = FALSE, adjusted_master = FALSE;
6345 BioseqPtr bsp;
6346 Int4 cut_offset = 0, offset = 0;
6347
6348 if (sejp == NULL || sejp->bsp == NULL || sejp->offset < 0 || sejp->offset >= sejp->bsp->length
6349 || sejp->offset + sejp->num_chars + 1 < 0 || sejp->offset + sejp->num_chars > sejp->bsp->length
6350 || sejp->num_chars < 1)
6351 {
6352 return retval;
6353 }
6354
6355 if (sejp->affected_feats != NULL)
6356 {
6357 sejp->affected_feats = SeqEdJournalAffectedFeatsFree (sejp->affected_feats);
6358 }
6359
6360 bsp = GetParentForSegment (sejp->bsp, &offset, NULL);
6361 if (bsp == NULL)
6362 {
6363 bsp = sejp->bsp;
6364 cut_offset = sejp->offset;
6365 }
6366 else
6367 {
6368 cut_offset = sejp->offset + offset;
6369 }
6370
6371 /* fix features */
6372 if (sejp->entityID > 0 && SeqMgrFeaturesAreIndexed (sejp->entityID)) {
6373
6374 sfp = NULL;
6375 while ((sfp = SeqEdGetNextFeature (bsp, sfp, 0, 0, &fcontext, FALSE, FALSE, sejp->entityID)) != NULL)
6376 {
6377 if ((cut_offset <= fcontext.left && cut_offset + sejp->num_chars >= fcontext.left)
6378 || (cut_offset >= fcontext.left && cut_offset + sejp->num_chars <= fcontext.right)
6379 || (cut_offset <= fcontext.right && cut_offset + sejp->num_chars >= fcontext.right))
6380 {
6381 tmp_sfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
6382 }
6383 else
6384 {
6385 tmp_sfp = NULL;
6386 }
6387 /* if we're deleting a gap and the feature is a coding region, merge location
6388 * by default */
6389 merge_mode = sejp->spliteditmode;
6390 if (sejp->action == eSeqEdInsertGap || sejp->action == eSeqEdDeleteGap
6391 && sfp->data.choice == SEQFEAT_CDREGION)
6392 {
6393 merge_mode = TRUE;
6394 }
6395
6396 feat_change = SeqEdSeqFeatDelete (sfp, bsp, cut_offset,
6397 cut_offset + sejp->num_chars - 1,
6398 sejp->spliteditmode);
6399
6400 if (feat_change == 0 || feat_change == 1)
6401 {
6402 if (ReStitchLocation (cut_offset, sfp))
6403 {
6404 feat_change = 1;
6405 location_restitched = TRUE;
6406 }
6407 }
6408
6409 if (feat_change > 0)
6410 {
6411 if (feat_change == 2)
6412 {
6413 /* remove from index and SeqAnnot */
6414 sfp->idx.deleteme = TRUE;
6415 feats_deleted = TRUE;
6416 }
6417
6418 afp = AffectedFeatNew ();
6419 if (afp != NULL)
6420 {
6421 afp->feat_before = tmp_sfp;
6422 if (feat_change != 2)
6423 {
6424 afp->feat_after = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
6425 if (afp->feat_after != NULL)
6426 {
6427 afp->feat_after->idx.itemID = sfp->idx.itemID;
6428 }
6429 }
6430 }
6431 ValNodeAddPointer (&sejp->affected_feats, 0, afp);
6432 feats_altered = TRUE;
6433 }
6434 else
6435 {
6436 SeqFeatFree (tmp_sfp);
6437 }
6438 if (bsp != sejp->bsp)
6439 {
6440 adjusted_master = TRUE;
6441 }
6442 }
6443 } else {
6444 bcp = BioseqContextNew(sejp->bsp);
6445 sfp = NULL;
6446 /* adjust features pointing by location */
6447 while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL)
6448 {
6449 tmp_sfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
6450 /* if we're deleting a gap and the feature is a coding region, merge location
6451 * by default */
6452 merge_mode = sejp->spliteditmode;
6453 if (sejp->action == eSeqEdInsertGap || sejp->action == eSeqEdDeleteGap
6454 && sfp->data.choice == SEQFEAT_CDREGION)
6455 {
6456 merge_mode = TRUE;
6457 }
6458 feat_change = SeqEdSeqFeatDelete (sfp, bsp, cut_offset,
6459 cut_offset + sejp->num_chars - 1,
6460 sejp->spliteditmode);
6461
6462 if (feat_change == 0 || feat_change == 1)
6463 {
6464 if (ReStitchLocation (cut_offset, sfp))
6465 {
6466 feat_change = 1;
6467 location_restitched = TRUE;
6468 }
6469 }
6470
6471 if (feat_change > 0)
6472 {
6473 if (feat_change == 2)
6474 {
6475 /* remove from index and SeqAnnot */
6476 sfp->idx.deleteme = TRUE;
6477 feats_deleted = TRUE;
6478 }
6479 afp = AffectedFeatNew ();
6480 if (afp != NULL)
6481 {
6482 afp->feat_before = tmp_sfp;
6483 afp->feat_after = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
6484 }
6485 ValNodeAddPointer (&sejp->affected_feats, 0, afp);
6486 feats_altered = TRUE;
6487 }
6488 else
6489 {
6490 SeqFeatFree (tmp_sfp);
6491 }
6492 }
6493 BioseqContextFree(bcp);
6494 }
6495
6496 /* now delete nucleotides from bioseq */
6497 switch (sejp->bsp->repr)
6498 {
6499 case Seq_repr_raw:
6500 case Seq_repr_const:
6501 if (sejp->bsp->seq_data_type != Seq_code_gap) {
6502 /* if actual sequence present */
6503 if (ISA_na(sejp->bsp->mol))
6504 {
6505 if (sejp->bsp->seq_data_type != Seq_code_iupacna) /* need 1 byte/base */
6506 BioseqRawConvert(sejp->bsp, Seq_code_iupacna);
6507 }
6508 else
6509 {
6510 if (sejp->bsp->seq_data_type != Seq_code_ncbieaa)
6511 BioseqRawConvert(sejp->bsp, Seq_code_ncbieaa);
6512 }
6513
6514 BSSeek((ByteStorePtr) sejp->bsp->seq_data, sejp->offset, SEEK_SET);
6515 deleted = BSDelete((ByteStorePtr) sejp->bsp->seq_data, sejp->num_chars);
6516 if (deleted != sejp->num_chars) /* error */
6517 ErrPost(CTX_NCBIOBJ, 1, "Delete of %ld residues failed", sejp->num_chars);
6518 else
6519 retval = TRUE;
6520 }
6521 break;
6522 case Seq_repr_seg:
6523 /* update segmented sequence */
6524 retval = SeqEdDeleteFromSegOrDeltaBsp (sejp->bsp, sejp->offset, sejp->offset + sejp->num_chars - 1);
6525 break;
6526 case Seq_repr_delta:
6527 /* update delta sequence */
6528 retval = SeqEdDeleteFromDeltaBsp (sejp->bsp, sejp->offset, sejp->offset + sejp->num_chars - 1);
6529 break;
6530 case Seq_repr_map:
6531 /* map bioseq */
6532 retval = SeqEdDeleteFromMapBioseq (sejp->bsp, sejp->offset, sejp->offset + sejp->num_chars - 1);
6533 break;
6534 case Seq_repr_virtual:
6535 retval = TRUE; /* nothing to do */
6536 break;
6537 }
6538
6539 if (retval)
6540 {
6541 sejp->bsp->length -= sejp->num_chars;
6542 if (bsp != sejp->bsp)
6543 {
6544 bsp->length -= sejp->num_chars;
6545 }
6546 }
6547
6548 if (feats_deleted)
6549 {
6550 DeleteMarkedObjects (sejp->entityID, 0, NULL);
6551 SeqMgrIndexFeatures (sejp->entityID, NULL);
6552 }
6553 else if (location_restitched || adjusted_master)
6554 {
6555 SeqMgrIndexFeatures (sejp->entityID, NULL);
6556 }
6557 else
6558 {
6559 SeqEdReindexAffectedFeatures (sejp->offset, 0 - sejp->num_chars,
6560 sejp->spliteditmode, sejp->bsp);
6561
6562 }
6563
6564 if (pfeats_deleted != NULL)
6565 {
6566 *pfeats_deleted = feats_deleted;
6567 }
6568
6569 return retval;
6570 }
6571
6572 /* this function will indicate whether the interval on the Bioseq specified contains
6573 * any gaps of unknown length.
6574 */
6575
6576 /*
6577 static Boolean DoesIntervalContainUnknownGap (BioseqPtr bsp, Int4 from, Int4 to)
6578 {
6579 DeltaSeqPtr from_dsp, to_dsp, this_dsp;
6580 Int4 from_start = 0, to_start = 0;
6581 Boolean unknown_gap = FALSE;
6582
6583 if (bsp == NULL || from < 0 || from >= bsp->length || to < 0 || to >= bsp->length)
6584 {
6585 return FALSE;
6586 }
6587
6588 from_dsp = GetDeltaSeqForOffset (bsp, from, &from_start);
6589 to_dsp = GetDeltaSeqForOffset (bsp, to, &to_start);
6590
6591 this_dsp = from_dsp;
6592 while (!unknown_gap && this_dsp != NULL && (to_dsp == NULL || this_dsp != to_dsp->next))
6593 {
6594 unknown_gap = IsDeltaSeqUnknownGap (this_dsp);
6595 this_dsp = this_dsp->next;
6596 }
6597
6598 return unknown_gap;
6599 }
6600 */
6601
6602 /* This section of code deals with editing the sequence by inserting and removing characters.
6603 * Functions are needed to change the indices for the affected features so that they will
6604 * display properly.
6605 */
6606 static void SeqEdFixExtraIndex
6607 (SMFeatItemPtr PNTR array,
6608 Int4 num,
6609 Int4 shift_start,
6610 Int4 shift_amt,
6611 Boolean split,
6612 BioseqPtr bsp)
6613 {
6614 SMFeatItemPtr item;
6615 Int4 i = 0, j, k, n;
6616 Int4Ptr newivals;
6617
6618 if (array == NULL || num < 1 || bsp == NULL) return;
6619 while (i < num) {
6620 item = array [i];
6621 i++;
6622 if (item != NULL) {
6623 if (item->right >= shift_start)
6624 {
6625 if (item->left > shift_start
6626 || (shift_amt > 0 && item->left == shift_start))
6627 {
6628 /* move left and right indexed endpoints */
6629 item->left += shift_amt;
6630 if (item->left < 0)
6631 {
6632 item->left = 0;
6633 }
6634 item->right += shift_amt;
6635 /* move all ivals */
6636 for (j = 0; j < item->numivals; j++)
6637 {
6638 item->ivals [2 * j] += shift_amt;
6639 if (item->ivals [2 * j] < 0)
6640 {
6641 item->ivals [2 * j] = 0;
6642 }
6643 item->ivals [2 * j + 1] += shift_amt;
6644 if (item->ivals [2 * j + 1] < 0)
6645 {
6646 item->ivals [2 * j + 1] = 0;
6647 }
6648 }
6649 }
6650 else
6651 {
6652 item->right += shift_amt;
6653 for (j = 0; j < item->numivals; j++)
6654 {
6655 if (item->ivals [2 * j] < shift_start && item->ivals[2 * j + 1] < shift_start)
6656 {
6657 /* upstream - we may safely ignore */
6658 }
6659 else if ((item->ivals [2 * j] > shift_start && item->ivals [2 * j + 1] > shift_start)
6660 || (shift_amt > 0 && item->ivals [2 * j] >= shift_start
6661 && item->ivals [2 * j + 1] >= shift_start))
6662 {
6663 /* downstream - shift both endpoints */
6664 item->ivals [2 * j] += shift_amt;
6665 item->ivals [2 * j + 1] += shift_amt;
6666 }
6667 else if (split)
6668 {
6669 /* create a new list of ivals */
6670 newivals = (Int4Ptr) MemNew (sizeof (Int4) * (item->numivals + 1) * 2);
6671 /* copy all ivals up to j into new list */
6672 for (k = 0; k < j; k++)
6673 {
6674 newivals [2 * k] = item->ivals [2 * k];
6675 newivals [2 * k + 1] = item->ivals [2 * k + 1];
6676 }
6677 /* create two intervals using split */
6678 if (item->ivals [2 * j] < item->ivals [2 * j + 1])
6679 {
6680 /* plus strand */
6681 newivals [2 * k] = item->ivals [2 * j];
6682 newivals [2 * k + 1] = shift_start - 1;
6683 k++;
6684 newivals [2 * k] = shift_start + shift_amt;
6685 newivals [2 * k + 1] = item->ivals [2 * j + 1] + shift_amt;
6686 k++;
6687 }
6688 else
6689 {
6690 /* minus strand */
6691 newivals [2 * k] = item->ivals [2 * j] + shift_amt;
6692 newivals [2 * k + 1] = shift_start + shift_amt;
6693 k++;
6694 newivals [2 * k] = shift_start - 1;
6695 newivals [2 * k + 1] = item->ivals [2 * j + 1];
6696 k++;
6697 }
6698 /* copy remaining intervals (they will be shifted later in the loop */
6699 n = j + 1;
6700 while (n < item->numivals)
6701 {
6702 newivals[2 * k] = item->ivals [2 * n];
6703 newivals[2 * k + 1] = item->ivals [2 * n + 1];
6704 k++;
6705 n++;
6706 }
6707 MemFree (item->ivals);
6708 item->ivals = newivals;
6709 item->numivals ++;
6710 /* increment j so that we will not re-increment the second interval */
6711 j++;
6712 }
6713 else
6714 {
6715 /* move only downstream endpoint */
6716 if (item->ivals [2 * j] > shift_start
6717 || (shift_amt > 0 && item->ivals [2 * j] == shift_start))
6718 {
6719 item->ivals [2 * j] += shift_amt;
6720 if (item->ivals [2 * j] < 0)
6721 {
6722 item->ivals [2 * j] = 0;
6723 }
6724 }
6725 else
6726 {
6727 item->ivals [2 * j + 1] += shift_amt;
6728 if (item->ivals [2 * j + 1] < 0)
6729 {
6730 item->ivals [2 * j + 1] = 0;
6731 }
6732 }
6733 }
6734 }
6735 }
6736 }
6737 }
6738 }
6739 }
6740
6741 NLM_EXTERN void SeqEdReindexAffectedFeatures (Int4 shift_start, Int4 shift_amt,
6742 Boolean split, BioseqPtr bsp)
6743 {
6744 ObjMgrDataPtr omdp;
6745 BioseqExtraPtr bspextra;
6746 ObjMgrPtr omp;
6747
6748 if (bsp == NULL) return;
6749
6750 omdp = (ObjMgrDataPtr) bsp->omdp;
6751 if (omdp == NULL)
6752 {
6753 omp = ObjMgrWriteLock ();
6754 omdp = ObjMgrFindByData (omp, bsp);
6755 ObjMgrUnlock ();
6756 bsp->omdp = (Pointer) omdp;
6757 }
6758 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return;
6759
6760 bspextra = (BioseqExtraPtr) omdp->extradata;
6761 if (bspextra == NULL) return;
6762
6763 SeqEdFixExtraIndex (bspextra->featsByPos, bspextra->numfeats,
6764 shift_start, shift_amt, split, bsp);
6765 }
6766
6767 NLM_EXTERN void SeqEdReindexFeature (SeqFeatPtr sfp, BioseqPtr bsp)
6768 {
6769 ObjMgrDataPtr omdp;
6770 BioseqExtraPtr bspextra;
6771 ObjMgrPtr omp;
6772 Int4 i;
6773 SeqLocPtr this_slp;
6774 SMFeatItemPtr item = NULL;
6775 Int4 numivals;
6776 Int4 start, stop;
6777 Int4 left, right;
6778
6779 if (sfp == NULL || bsp == NULL) return;
6780 omdp = (ObjMgrDataPtr) bsp->omdp;
6781 if (omdp == NULL)
6782 {
6783 omp = ObjMgrWriteLock ();
6784 omdp = ObjMgrFindByData (omp, bsp);
6785 ObjMgrUnlock ();
6786 bsp->omdp = (Pointer) omdp;
6787 }
6788 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return;
6789
6790 bspextra = (BioseqExtraPtr) omdp->extradata;
6791 if (bspextra == NULL) return;
6792
6793 for (i = 0; i < bspextra->numfeats; i++)
6794 {
6795 item = bspextra->featsByPos [i];
6796 if (item != NULL && item->itemID == sfp->idx.itemID)
6797 {
6798 /* first, find out how many intervals we have, so we can make sure our ivals
6799 * array is the right size */
6800 for (this_slp = SeqLocFindNext (sfp->location, NULL), numivals = 0;
6801 this_slp != NULL;
6802 this_slp = this_slp->next, numivals ++)
6803 {
6804
6805 }
6806 if (numivals != item->numivals)
6807 {
6808 item->ivals = MemFree (item->ivals);
6809 item->ivals = (Int4Ptr) MemNew (2 * numivals * sizeof (Int4));
6810 if (item->ivals == NULL) return;
6811 item->numivals = numivals;
6812 }
6813
6814 /* now populate the ivals */
6815
6816 left = -1;
6817 right = -1;
6818 for (this_slp = SeqLocFindNext (sfp->location, NULL), numivals = 0;
6819 this_slp != NULL;
6820 this_slp = this_slp->next, numivals ++)
6821 {
6822 start = GetOffsetInBioseq (this_slp, bsp, SEQLOC_START);
6823 stop = GetOffsetInBioseq (this_slp, bsp, SEQLOC_STOP);
6824 item->ivals [2 * numivals] = start;
6825 item->ivals [2 * numivals + 1] = stop;
6826 if (left == -1 || start < left)
6827 {
6828 left = start;
6829 }
6830 if (stop < left)
6831 {
6832 left = stop;
6833 }
6834 if (right == -1 || right < start)
6835 {
6836 right = start;
6837 }
6838 if (right < stop)
6839 {
6840 right = stop;
6841 }
6842 }
6843 item->left = left;
6844 item->right = right;
6845 }
6846 else
6847 {
6848 item = NULL;
6849 }
6850 }
6851 }
6852
6853
6854 /* This function will repair any problems with the interval order that
6855 * moving the feature interval around may have caused.
6856 */
6857 NLM_EXTERN void SeqEdRepairIntervalOrder (SeqFeatPtr sfp, BioseqPtr bsp)
6858 {
6859 Boolean hasNulls;
6860 SeqLocPtr gslp;
6861 Boolean noLeft, noRight;
6862
6863 hasNulls = LocationHasNullsBetween (sfp->location);
6864 gslp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, hasNulls);
6865 if (gslp != NULL)
6866 {
6867 CheckSeqLocForPartial (sfp->location, &noLeft, &noRight);
6868 sfp->location = SeqLocFree (sfp->location);
6869 sfp->location = gslp;
6870 if (bsp->repr == Seq_repr_seg)
6871 {
6872 gslp = SegLocToParts (bsp, sfp->location);
6873 sfp->location = SeqLocFree (sfp->location);
6874 sfp->location = gslp;
6875 }
6876 FreeAllFuzz (sfp->location);
6877 SetSeqLocPartial (sfp->location, noLeft, noRight);
6878 }
6879 }
6880
6881 /* This function recursively frees a list of SeqEdJournalPtr, working in the next direction,
6882 * and fixes the prev pointer for the previous entry in the SeqEdJournalPtr list (if there is one).
6883 */
6884 NLM_EXTERN void SeqEdJournalFree (SeqEdJournalPtr sejp)
6885 {
6886 SeqEdJournalPtr prev;
6887
6888 if (sejp == NULL) return;
6889 SeqEdJournalFree (sejp->next);
6890 sejp->slp = SeqLocFree (sejp->slp);
6891 MemFree (sejp->char_data);
6892 sejp->affected_feats = SeqEdJournalAffectedFeatsFree (sejp->affected_feats);
6893 prev = sejp->prev;
6894 if (prev != NULL)
6895 prev->next = NULL;
6896 MemFree (sejp);
6897 }
6898
6899 NLM_EXTERN SeqEdJournalPtr SeqEdJournalNewSeqEdit
6900 (ESeqEdJournalAction action,
6901 Int4 offset,
6902 Int4 num_chars,
6903 CharPtr char_data,
6904 Boolean spliteditmode,
6905 BioseqPtr bsp,
6906 Uint1 moltype,
6907 Uint2 entityID)
6908 {
6909 SeqEdJournalPtr sejp;
6910
6911 if (num_chars == 0) return NULL;
6912 sejp = (SeqEdJournalPtr) MemNew (sizeof (SeqEdJournalData));
6913 if (sejp == NULL) return NULL;
6914 sejp->action = action;
6915 sejp->offset = offset;
6916 sejp->num_chars = num_chars;
6917 sejp->spliteditmode = spliteditmode;
6918 sejp->affected_feats = NULL;
6919 sejp->sfp = NULL;
6920 sejp->slp = NULL;
6921 sejp->bsp = bsp;
6922 sejp->moltype = moltype;
6923 sejp->entityID = entityID;
6924 sejp->char_data = MemNew (sejp->num_chars + 1);
6925 if (char_data != NULL)
6926 {
6927 StringCpy (sejp->char_data, char_data);
6928 }
6929 sejp->prev = NULL;
6930 sejp->next = NULL;
6931 return sejp;
6932 }
6933
6934 NLM_EXTERN SeqEdJournalPtr SeqEdJournalNewFeatEdit
6935 (ESeqEdJournalAction action,
6936 SeqFeatPtr sfp,
6937 SeqLocPtr slp,
6938 BioseqPtr bsp,
6939 Uint1 moltype,
6940 Uint2 entityID)
6941 {
6942 SeqEdJournalPtr sejp;
6943
6944 if (sfp == NULL || slp == NULL) return NULL;
6945 sejp = (SeqEdJournalPtr) MemNew (sizeof (SeqEdJournalData));
6946 if (sejp == NULL) return NULL;
6947 sejp->action = action;
6948 sejp->offset = 0;
6949 sejp->num_chars = 0;
6950 sejp->spliteditmode = FALSE;
6951 sejp->sfp = sfp;
6952 sejp->slp = slp;
6953 sejp->bsp = bsp;
6954 sejp->affected_feats = NULL;
6955 sejp->moltype = moltype;
6956 sejp->entityID = entityID;
6957 sejp->char_data = NULL;
6958 sejp->prev = NULL;
6959 sejp->next = NULL;
6960 return sejp;
6961 }
6962
6963 /* This section of code contains functions used by the new sequence editor for moving feature
6964 * intervals.
6965 */
6966 static Boolean SeqEdAdjustFeatureInterval
6967 (SeqLocPtr slp, Int4 change, EMoveType move_type, Int4 interval_offset, BioseqPtr bsp)
6968 {
6969 SeqIntPtr sint;
6970 SeqPntPtr spp;
6971 SeqLocPtr this_slp;
6972 Boolean rval = FALSE;
6973
6974 if (slp == NULL || bsp == NULL) return rval;
6975
6976 if (slp->choice == SEQLOC_INT)
6977 {
6978 if (interval_offset != 0)
6979 {
6980 return rval;
6981 }
6982 sint = (SeqIntPtr)slp->data.ptrvalue;
6983 switch (move_type)
6984 {
6985 case eLeftEnd:
6986 if (sint->from + change < sint->to
6987 && sint->from + change > -1
6988 && sint->from + change < bsp->length)
6989 {
6990 sint->from += change;
6991 rval = TRUE;
6992 }
6993 break;
6994 case eRightEnd:
6995 if (sint->to + change > sint->from
6996 && sint->to + change > -1
6997 && sint->to + change < bsp->length)
6998 {
6999 sint->to += change;
7000 rval = TRUE;
7001 }
7002 break;
7003 case eSlide:
7004 if (sint->from + change > -1 && sint->from + change < bsp->length
7005 && sint->to + change > -1 && sint->to + change < bsp->length)
7006 {
7007 sint->from += change;
7008 sint->to += change;
7009 rval = TRUE;
7010 }
7011 }
7012 }
7013 else if (slp->choice == SEQLOC_PNT)
7014 {
7015 if (interval_offset != 0)
7016 {
7017 return rval;
7018 }
7019 spp = (SeqPntPtr)(slp->data.ptrvalue);
7020 if (spp->point + change > -1 && spp->point + change < bsp->length)
7021 {
7022 spp->point += change;
7023 rval = TRUE;
7024 }
7025 }
7026 else
7027 {
7028 for (this_slp = SeqLocFindNext (slp, NULL);
7029 this_slp != NULL && interval_offset > 0;
7030 this_slp = SeqLocFindNext (slp, this_slp), interval_offset --)
7031 {}
7032 if (this_slp != NULL && interval_offset == 0)
7033 {
7034 rval = SeqEdAdjustFeatureInterval (this_slp, change, move_type, interval_offset, bsp);
7035 }
7036 }
7037 return rval;
7038 }
7039
7040
7041 NLM_EXTERN Boolean SeqEdGetNthIntervalEndPoints
7042 (SeqLocPtr slp, Int4 n, Int4Ptr left, Int4Ptr right)
7043 {
7044 Boolean rval = FALSE;
7045 SeqIntPtr sintp;
7046 SeqPntPtr spp;
7047 SeqLocPtr this_slp;
7048
7049 if (slp == NULL || left == NULL || right == NULL || n < 0) return FALSE;
7050 switch (slp->choice)
7051 {
7052 case SEQLOC_INT:
7053 if (n == 0)
7054 {
7055 sintp = (SeqIntPtr) slp->data.ptrvalue;
7056 *left = sintp->from;
7057 *right = sintp->to;
7058 rval = TRUE;
7059 }
7060 break;
7061 case SEQLOC_PNT:
7062 if (n == 0)
7063 {
7064 spp = (SeqPntPtr) slp->data.ptrvalue;
7065 *left = spp->point;
7066 *right = spp->point;
7067 rval = TRUE;
7068 }
7069 break;
7070 default:
7071 for (this_slp = SeqLocFindNext (slp, NULL);
7072 this_slp != NULL && n > 0;
7073 this_slp = SeqLocFindNext (slp, this_slp), n --)
7074 {}
7075 if (this_slp != NULL && n == 0)
7076 {
7077 rval = SeqEdGetNthIntervalEndPoints (this_slp, n, left, right);
7078 }
7079 break;
7080 }
7081 return rval;
7082 }
7083
7084 static void
7085 SeqEdFixFeatureIndexForFeatureLocAdjust
7086 (BioseqPtr bsp,
7087 SeqFeatPtr sfp,
7088 Int4 change,
7089 Int4 move_type,
7090 Int4 interval_offset)
7091 {
7092 ObjMgrDataPtr omdp;
7093 BioseqExtraPtr bspextra;
7094 ObjMgrPtr omp;
7095 SMFeatItemPtr item;
7096 Int4 i, j;
7097 Int4 left, right;
7098
7099 if (bsp == NULL || sfp == NULL) return;
7100
7101 omdp = (ObjMgrDataPtr) bsp->omdp;
7102 if (omdp == NULL)
7103 {
7104 omp = ObjMgrWriteLock ();
7105 omdp = ObjMgrFindByData (omp, bsp);
7106 ObjMgrUnlock ();
7107 bsp->omdp = (Pointer) omdp;
7108 }
7109 if (omdp == NULL || omdp->datatype != OBJ_BIOSEQ) return;
7110
7111 bspextra = (BioseqExtraPtr) omdp->extradata;
7112 if (bspextra == NULL) return;
7113
7114 if (! SeqEdGetNthIntervalEndPoints (sfp->location, interval_offset, &left, &right))
7115 {
7116 return;
7117 }
7118
7119 i = 0;
7120 while (i < bspextra->numfeats) {
7121 item = bspextra->featsByPos [i];
7122 i++;
7123 if (item != NULL && item->itemID == sfp->idx.itemID)
7124 {
7125 if (interval_offset >= item->numivals || interval_offset < 0) return;
7126 if (item->ivals [ 2 * interval_offset] < item->ivals [2 * interval_offset + 1])
7127 {
7128 item->ivals [2 * interval_offset] = left;
7129 item->ivals [2 * interval_offset + 1] = right;
7130 }
7131 else
7132 {
7133 item->ivals [2 * interval_offset + 1] = left;
7134 item->ivals [2 * interval_offset] = right;
7135 }
7136 /* correct item left and right values */
7137 if (item->ivals [0] > item->ivals [1])
7138 {
7139 item->right = item->ivals [0];
7140 item->left = item->ivals [1];
7141 }
7142 else
7143 {
7144 item->left = item->ivals [0];
7145 item->right = item->ivals [1];
7146 }
7147 for (j = 1; j < item->numivals; j++)
7148 {
7149 if (item->left > item->ivals[2 * j])
7150 {
7151 item->left = item->ivals [2 * j];
7152 }
7153 if (item->left > item->ivals [2 * j + 1])
7154 {
7155 item->left = item->ivals [2 * j + 1];
7156 }
7157 if (item->right < item->ivals [2 * j])
7158 {
7159 item->right = item->ivals [2 * j];
7160 }
7161 if (item->right < item->ivals [2 * j + 1])
7162 {
7163 item->right = item->ivals [ 2 * j + 1];
7164 }
7165 }
7166 }
7167 }
7168 }
7169
7170
7171 NLM_EXTERN void SeqEdFeatureAdjust
7172 (SeqFeatPtr sfp,
7173 SeqLocPtr orig_loc,
7174 Int4 change,
7175 EMoveType move_type,
7176 Int4 interval_offset,
7177 BioseqPtr bsp)
7178 {
7179 SeqLocPtr new_loc;
7180 Boolean partial3, partial5;
7181
7182 if (sfp == NULL || bsp == NULL)
7183 {
7184 return;
7185 }
7186
7187 CheckSeqLocForPartial (orig_loc, &partial5, &partial3);
7188 new_loc = SeqLocMerge (bsp, orig_loc, NULL, FALSE, FALSE, FALSE);
7189 if (new_loc == NULL)
7190 {
7191 return;
7192 }
7193 SetSeqLocPartial (new_loc, partial5, partial3);
7194
7195 if (SeqEdAdjustFeatureInterval (new_loc, change, move_type, interval_offset, bsp))
7196 {
7197 SeqLocFree (sfp->location);
7198 sfp->location = new_loc;
7199
7200 /* need to reindex feature */
7201 SeqEdFixFeatureIndexForFeatureLocAdjust (bsp, sfp, change, move_type, interval_offset);
7202 }
7203 }
7204
7205
7206 NLM_EXTERN void
7207 AdjustFeatureForGapChange
7208 (SeqFeatPtr sfp,
7209 BioseqPtr bsp,
7210 Int4 offset,
7211 Int4 len_diff)
7212 {
7213 if (sfp == NULL || bsp == NULL || offset < 0 || len_diff == 0)
7214 {
7215 return;
7216 }
7217
7218 if (len_diff > 0)
7219 {
7220 SeqEdSeqFeatDelete (sfp, bsp, offset, offset + len_diff - 1, TRUE);
7221 }
7222 else
7223 {
7224 sfp->location = SeqEdSeqLocInsert (sfp->location, bsp, offset, -len_diff, FALSE, NULL);
7225 if (sfp->data.choice == SEQFEAT_CDREGION)
7226 {
7227 SeqEdInsertAdjustCdRgn (sfp, bsp, offset, -len_diff, FALSE);
7228 }
7229 else if (sfp->data.choice == SEQFEAT_RNA)
7230 {
7231 SeqEdInsertAdjustRNA (sfp, bsp, offset, -len_diff, FALSE);
7232 }
7233 }
7234 }
7235
7236
7237
7238
7239 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |