|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/sequin/sequin4.c |
source navigation diff markup identifier search freetext search file search |
1 /* sequin4.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: sequin4.c
27 *
28 * Author: Jonathan Kans
29 *
30 * Version Creation Date: 6/28/96
31 *
32 * $Revision: 6.437 $
33 *
34 * File Description:
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date Name Description of modification
39 * ------- ---------- -----------------------------------------------------
40 *
41 *
42 * ==========================================================================
43 */
44
45 #include "sequin.h"
46 #include <ncbilang.h>
47 #include <seqport.h>
48 #include <gather.h>
49 #include <objall.h>
50 #include <objcode.h>
51 #include <utilpub.h>
52 #include <vibrant.h>
53 #include <document.h>
54 #include <toasn3.h>
55 #include <asn2ffp.h>
56 #include <salfiles.h>
57 #include <salsap.h>
58 #include <saledit.h>
59 #include <salign.h>
60 #include <salptool.h>
61 #include <subutil.h>
62 #include <pobutil.h>
63 #include <tfuns.h>
64 #include <edutil.h>
65 #include <biosrc.h>
66 #include <seqgrphx.h>
67 #include <seqmtrx.h>
68 #include <bspview.h>
69 #include <vsmpriv.h>
70 #include <explore.h>
71 #include <alignval.h>
72 #include <alignmgr.h>
73 #include <alignmgr2.h>
74 #include <aliparse.h>
75 #include <spidey.h>
76 #include <ent2api.h>
77 #include <valid.h>
78 #include <sqnutils.h>
79 #include <seqpanel.h>
80 #include <salpanel.h>
81 #include <findrepl.h>
82 #include <macrodlg.h>
83 #include <macroapi.h>
84
85 static Int2 LIBCALLBACK CreateSegSet (Pointer data);
86 static Int2 LIBCALLBACK ConvertToDeltaSequence (Pointer data);
87 static Int2 LIBCALLBACK FeatToDeltaSeq (Pointer data);
88 static Int2 LIBCALLBACK CopyMasterSourceToSegments (Pointer data);
89
90 #define REGISTER_REMOVESET ObjMgrProcLoadEx (OMPROC_FILTER,"Remove Set","RemoveSet",0,0,0,0,NULL,RemoveSet,PROC_PRIORITY_DEFAULT, "Indexer")
91
92 #define REGISTER_REMOVESETSINSET ObjMgrProcLoadEx (OMPROC_FILTER,"Remove Sets in Selected Set","RemoveSetsInSelectedSet",0,0,0,0,NULL,RemoveSetsInSelectedSet,PROC_PRIORITY_DEFAULT, "Indexer")
93
94 #define REGISTER_REPACKAGE_PARTS ObjMgrProcLoadEx (OMPROC_FILTER,"Repackage Segmented Parts","RepackageParts",0,0,0,0,NULL,PackagePartsInPartsSet,PROC_PRIORITY_DEFAULT, "Indexer")
95
96 #define REGISTER_NORMALIZE_NUCPROT ObjMgrProcLoadEx (OMPROC_FILTER,"Normalize Nuc-Prot","NormalizeNucProts",0,0,0,0,NULL,NormalizeNucProts,PROC_PRIORITY_DEFAULT, "Indexer")
97
98 #define REGISTER_REMOVE_EXTRANEOUS ObjMgrProcLoadEx (OMPROC_FILTER,"Remove Extraneous Sets","RemoveExtraneousSets",0,0,0,0,NULL,RemoveExtraneousSets,PROC_PRIORITY_DEFAULT, "Indexer")
99
100 #define REGISTER_POPSET_WITHIN_GENBANK ObjMgrProcLoadEx (OMPROC_FILTER,"Add Popset Within GenBank Set","AddPopSetWithinGenBankSet",0,0,0,0,NULL,PopWithinGenBankSet,PROC_PRIORITY_DEFAULT, "Indexer")
101
102 #define REGISTER_PHYSET_WITHIN_GENBANK ObjMgrProcLoadEx (OMPROC_FILTER,"Add Physet Within GenBank Set","AddPhySetWithinGenBankSet",0,0,0,0,NULL,PhyWithinGenBankSet,PROC_PRIORITY_DEFAULT, "Indexer")
103
104 #define REGISTER_REMOVE_MESSEDUP ObjMgrProcLoadEx (OMPROC_FILTER,"Repair Messed Up Sets","RepairMessedUpSets",0,0,0,0,NULL,RepairMessedUpRecord,PROC_PRIORITY_DEFAULT, "Indexer")
105
106 #define REGISTER_UPDATE_SEQALIGN ObjMgrProcLoadEx (OMPROC_FILTER, "Update SeqAlign","UpdateSeqAlign",OBJ_SEQALIGN,0,OBJ_SEQALIGN,0,NULL,NewUpdateSeqAlign,PROC_PRIORITY_DEFAULT, "Alignment")
107
108 #define REGISTER_FIX_ALIGNMENT_OVER_GAPS ObjMgrProcLoadEx (OMPROC_FILTER, "Fix Alignment Over Gaps", "FixAlignmentOverGaps", 0,0,0,0,NULL,FixAlignmentOverGaps,PROC_PRIORITY_DEFAULT, "Alignment")
109
110 #define REGISTER_FIX_ALIGNMENT_GAP_GAPS ObjMgrProcLoadEx (OMPROC_FILTER, "Fix Alignment Gaps Containing Known Gaps", "FixKnownGapAlignmentGaps", 0,0,0,0,NULL,ConsolidateGapGaps, PROC_PRIORITY_DEFAULT, "Alignment")
111
112 #define REGISTER_DELETE_BY_TEXT ObjMgrProcLoadEx (OMPROC_FILTER, "Delete By Text","DeleteByText",0,0,0,0,NULL,CreateDeleteByTextWindow,PROC_PRIORITY_DEFAULT, "Indexer")
113
114 #define REGISTER_REORDER_BY_ID ObjMgrProcLoadEx (OMPROC_FILTER, "Reorder by ID","ReorderByID",0,0,0,0,NULL,ReorderSetByAccession,PROC_PRIORITY_DEFAULT, "Indexer")
115
116 #define REGISTER_CONVERTSEQALIGN ObjMgrProcLoadEx (OMPROC_FILTER,"Convert SeqAlign","ConvertSeqAlign",0,0,0,0,NULL,ConvertToTrueMultipleAlignment,PROC_PRIORITY_DEFAULT, "Alignment")
117
118 #define REGISTER_CONVERTTOSEGSETALIGN ObjMgrProcLoadEx (OMPROC_FILTER,"Convert to SegSetAlign","ConvertSegSetAlign",0,0,0,0,NULL,ConvertToSegSetAlignment,PROC_PRIORITY_DEFAULT, "Alignment")
119
120 #define REGISTER_MAKESEQALIGN ObjMgrProcLoadEx (OMPROC_FILTER,"Make SeqAlign","CreateSeqAlign",0,0,0,0,NULL,GenerateSeqAlignFromSeqEntry,PROC_PRIORITY_DEFAULT, "Alignment")
121
122 #define REGISTER_MAKESEQALIGNNEWBLAST ObjMgrProcLoadEx (OMPROC_FILTER,"Make SeqAlign, New BLAST","CreateSeqAlignNewBLAST",0,0,0,0,NULL,GenerateSeqAlignFromSeqEntryUseNewBlast,PROC_PRIORITY_DEFAULT, "Alignment")
123
124 #define REGISTER_MAKESEQALIGNMASTER ObjMgrProcLoadEx (OMPROC_FILTER,"Make SeqAlign Choose Master","CreateSeqAlignChooseMaster",0,0,0,0,NULL,GenerateSeqAlignFromSeqEntryChooseMaster,PROC_PRIORITY_DEFAULT, "Alignment")
125
126 #define REGISTER_MAKESEQALIGNMASTERNEWBLAST ObjMgrProcLoadEx (OMPROC_FILTER,"Make SeqAlign Choose Master, New BLAST","CreateSeqAlignChooseMasterNewBLAST",0,0,0,0,NULL,GenerateSeqAlignFromSeqEntryChooseMasterUseNewBlast,PROC_PRIORITY_DEFAULT, "Alignment")
127
128 #define REGISTER_MAKESEQALIGNP ObjMgrProcLoadEx (OMPROC_FILTER,"Make Protein SeqAlign","CreateSeqAlignProt",0,0,0,0,NULL,GenerateSeqAlignFromSeqEntryProt,PROC_PRIORITY_DEFAULT, "Alignment")
129
130 #define REGISTER_MAKESEQALIGNPNEWBLAST ObjMgrProcLoadEx (OMPROC_FILTER,"Make Protein SeqAlign, New BLAST","CreateSeqAlignProtNewBLAST",0,0,0,0,NULL,GenerateSeqAlignFromSeqEntryProtUseNewBlast,PROC_PRIORITY_DEFAULT, "Alignment")
131
132 #define REGISTER_NORMSEQALIGN ObjMgrProcLoadEx (OMPROC_FILTER,"Validate SeqAlign","ValidateSeqAlign",0,0,0,0,NULL,ValidateSeqAlignFromData,PROC_PRIORITY_DEFAULT, "Alignment")
133
134 #define REGISTER_NOMORESEGGAP ObjMgrProcLoadEx (OMPROC_FILTER,"Get Rid of Seg Gap","GetRidOfSegGap",0,0,0,0,NULL,NoMoreSegGap,PROC_PRIORITY_DEFAULT, "Alignment")
135
136 #define REGISTER_PARTSEQALIGNTOPARENT ObjMgrProcLoadEx (OMPROC_FILTER,"Part SeqAlign to Parent","PartSeqAlignToParent",0,0,0,0,NULL,PartSeqAlignToParent,PROC_PRIORITY_DEFAULT, "Alignment")
137
138 #define REGISTER_GROUP_EXPLODE ObjMgrProcLoadEx (OMPROC_FILTER, "Explode a group", "GroupExplode", OBJ_SEQFEAT, 0, OBJ_SEQFEAT, 0, NULL, GroupExplodeFunc, PROC_PRIORITY_DEFAULT, "Indexer")
139
140 #define REGISTER_INTERVAL_COMBINE ObjMgrProcLoadEx (OMPROC_FILTER, "Combine feature intervals", "IntervalCombine", OBJ_SEQFEAT, 0, OBJ_SEQFEAT, 0, NULL, IntervalCombineFunc, PROC_PRIORITY_DEFAULT, "Indexer")
141
142 #define REGISTER_INTERVAL_COMBINE_AND_FUSE ObjMgrProcLoadEx (OMPROC_FILTER, "Combine and fuse feature intervals", "IntervalCombineAndFuse", OBJ_SEQFEAT, 0, OBJ_SEQFEAT, 0, NULL, IntervalCombineAndFuseFunc, PROC_PRIORITY_DEFAULT, "Indexer")
143
144 #define REGISTER_MRNA_FROM_CDS ObjMgrProcLoadEx (OMPROC_FILTER, "mRNA from CDS", "mRNAfromCDS", OBJ_SEQFEAT, FEATDEF_CDS, OBJ_SEQFEAT, FEATDEF_CDS, NULL, MRnaFromCdsFunc, PROC_PRIORITY_DEFAULT, "Utilities")
145
146 #define REGISTER_SPLIT_BIOSEQ ObjMgrProcLoadEx (OMPROC_FILTER,"Split Bioseq Into Segments","SplitBioseqIntoSegments",0,0,0,0,NULL,SplitIntoSegmentedBioseq,PROC_PRIORITY_DEFAULT, "Indexer")
147
148 #define REGISTER_MAKE_GROUPS_OF_100 ObjMgrProcLoadEx (OMPROC_FILTER,"Split Into Groups of 100 Bioseqs","SplitIntoGroupsOf100Bioseqs",0,0,0,0,NULL,MakeGroupsOf200,PROC_PRIORITY_DEFAULT, "Indexer")
149
150 #define REGISTER_MAP_TO_PROT ObjMgrProcLoadEx (OMPROC_FILTER,"Map to Prot","MapToProt",OBJ_SEQFEAT,0,OBJ_SEQFEAT,0,NULL,MapToProtFunc,PROC_PRIORITY_DEFAULT, "Utilities")
151
152 #define REGISTER_MAP_TO_NUC ObjMgrProcLoadEx (OMPROC_FILTER,"Map to Nuc","MapToNuc",OBJ_SEQFEAT,0,OBJ_SEQFEAT,0,NULL,MapToNucFunc,PROC_PRIORITY_DEFAULT, "Utilities")
153
154 #define REGISTER_BIOSEQ_ORF ObjMgrProcLoadEx (OMPROC_FILTER, "ORF Finder", "OrfFinder", OBJ_BIOSEQ, 0, OBJ_BIOSEQ, 0, NULL, OrfFindFunc, PROC_PRIORITY_DEFAULT, "Utilities")
155
156 #define REGISTER_BIOSEQ_REVCOMP_WITHFEAT ObjMgrProcLoadEx (OMPROC_FILTER, "Bioseq and Features RevComp", "BioseqFeatsRevComp", OBJ_BIOSEQ, 0, OBJ_BIOSEQ, 0, NULL, RevCompFuncFeat, PROC_PRIORITY_DEFAULT, "Utilities")
157 #define REGISTER_BIOSEQ_REVCOMP_NOTFEAT ObjMgrProcLoadEx (OMPROC_FILTER, "Bioseq only RevComp", "BioseqOnlyRevComp", OBJ_BIOSEQ, 0, OBJ_BIOSEQ, 0, NULL, RevCompFunc, PROC_PRIORITY_DEFAULT, "Utilities")
158 #define REGISTER_BIOSEQ_REVERSE ObjMgrProcLoadEx (OMPROC_FILTER, "Bioseq Reverse", "BioseqReverse", OBJ_BIOSEQ, 0, OBJ_BIOSEQ, 0, NULL, RevFunc, PROC_PRIORITY_DEFAULT, "Utilities")
159 #define REGISTER_BIOSEQ_COMPLEMENT ObjMgrProcLoadEx (OMPROC_FILTER, "Bioseq Complement", "BioseqComplement", OBJ_BIOSEQ, 0, OBJ_BIOSEQ, 0, NULL, CompFunc, PROC_PRIORITY_DEFAULT, "Utilities")
160 #define REGISTER_BIOSEQ_REVCOMP_BYID ObjMgrProcLoadEx (OMPROC_FILTER, "Bioseq and Features RevComp By ID", "RevCompByID", OBJ_BIOSEQ, 0, OBJ_BIOSEQ, 0, NULL, BioseqRevCompByID, PROC_PRIORITY_DEFAULT, "Utilities")
161
162 #define REGISTER_BIOSEQ_SEG_REPORT ObjMgrProcLoadEx (OMPROC_FILTER, "Bioseq Seg Report", "BioseqSegReport", OBJ_BIOSEQ, 0, OBJ_BIOSEQ, 0, NULL, ReportDeltaSegments, PROC_PRIORITY_DEFAULT, "Misc")
163
164 #define REGISTER_FIXUP_RBS ObjMgrProcLoadEx (OMPROC_FILTER,"Fixup RBS","FixupRBS",0,0,0,0,NULL,FixupRBS,PROC_PRIORITY_DEFAULT, "Indexer")
165
166 #define REGISTER_BSP_INDEX ObjMgrProcLoadEx (OMPROC_FILTER,"Bioseq Index","MakeBioseqIndex",0,0,0,0,NULL,DoBioseqIndexing,PROC_PRIORITY_DEFAULT, "Indexer")
167
168 #define REGISTER_FIND_NON_ACGT ObjMgrProcLoadEx (OMPROC_FILTER,"Find Non ACGT","FindNonACGT",0,0,0,0,NULL,FindNonACGT,PROC_PRIORITY_DEFAULT, "Indexer")
169
170 #define REGISTER_TRIM_GENES ObjMgrProcLoadEx (OMPROC_FILTER,"Trim Genes","TrimGenes",0,0,0,0,NULL,TrimGenes,PROC_PRIORITY_DEFAULT, "Indexer")
171
172 #define REGISTER_OPENALED ObjMgrProcLoadEx (OMPROC_FILTER,"Open Align Editor 1","Open Contiguous Protein Alignment", 0,0,0,0,NULL,LaunchAlignEditorFromDesktop, PROC_PRIORITY_DEFAULT, "Alignment")
173
174 #define REGISTER_OPENALED2 ObjMgrProcLoadEx (OMPROC_FILTER,"Open Align Editor 2","Open Interleave Protein Alignment", 0,0,0,0,NULL,LaunchAlignEditorFromDesktop2, PROC_PRIORITY_DEFAULT, "Alignment")
175
176 #define REGISTER_OPENALED3 ObjMgrProcLoadEx (OMPROC_FILTER,"Open Align Editor 3","AA2NASeqAlign", 0,0,0,0,NULL,LaunchAlignEditorFromDesktop3, PROC_PRIORITY_DEFAULT, "Alignment")
177
178 #define REGISTER_OPENALED4 ObjMgrProcLoadEx (OMPROC_FILTER,"Open Align Editor 4","Alignment Profile", 0,0,0,0,NULL,LaunchAlignEditorFromDesktop4, PROC_PRIORITY_DEFAULT, "Alignment")
179
180 #define REGISTER_MAKEEXONINTRON ObjMgrProcLoadEx (OMPROC_FILTER,"Make Exons and Introns","MakeExonIntron",OBJ_SEQFEAT,0,OBJ_SEQFEAT,0,NULL,MakeExonIntron,PROC_PRIORITY_DEFAULT, "Misc")
181
182 #define REGISTER_DETACH ObjMgrProcLoadEx (OMPROC_FILTER,"Detach After Bioseq","DetachBioseq",OBJ_SEQFEAT,0,OBJ_SEQFEAT,0,NULL,DetachBioseq,PROC_PRIORITY_DEFAULT, "Misc")
183
184 #define REGISTER_PROT_IDS_TO_GENE_SYN ObjMgrProcLoadEx (OMPROC_FILTER,"Protein SeqID to Gene Synonym","ProtLocalIDtoGeneSyn",0,0,0,0,NULL,ProtLocalIDtoGeneSyn,PROC_PRIORITY_DEFAULT, "Indexer")
185
186 #define REGISTER_DESKTOP_REPORT ObjMgrProcLoadEx (OMPROC_FILTER, "Desktop Report", "DesktopReport", 0, 0, 0, 0, NULL, DesktopReportFunc, PROC_PRIORITY_DEFAULT, "Indexer")
187
188 #define REGISTER_DESCRIPTOR_PROPAGATE ObjMgrProcLoadEx (OMPROC_FILTER, "Descriptor Propagate", "DescriptorPropagate", 0, 0, 0, 0, NULL, DescriptorPropagate, PROC_PRIORITY_DEFAULT, "Indexer")
189
190 #define REGISTER_DESCRIPTOR_COPY_TO_LIST ObjMgrProcLoadEx (OMPROC_FILTER, "Copy Descriptor to List", "CopyDescriptorToList", 0, 0, 0, 0, NULL, CopyDescriptorToList, PROC_PRIORITY_DEFAULT, "Indexer")
191
192 #define REGISTER_COPY_MASTER_SOURCE_TO_SEGMENTS ObjMgrProcLoadEx (OMPROC_FILTER, "Copy Master Source To Segments", "CopyMasterSourceToSegments", 0, 0, 0, 0, NULL, CopyMasterSourceToSegments, PROC_PRIORITY_DEFAULT, "Indexer")
193
194 #define REGISTER_CLEAR_SEQENTRYSCOPE ObjMgrProcLoadEx (OMPROC_FILTER, "Clear SeqEntry Scope", "ClearSeqEntryScope", 0, 0, 0, 0, NULL, DoClearSeqEntryScope, PROC_PRIORITY_DEFAULT, "Indexer")
195
196 #define REGISTER_SEQUIN_PROT_TITLES ObjMgrProcLoadEx (OMPROC_FILTER,"Sequin Style Protein Titles","SequinStyleProteinTitles",0,0,0,0,NULL,MakeSequinProteinTitles,PROC_PRIORITY_DEFAULT, "Misc")
197 extern Int2 LIBCALLBACK MakeSequinProteinTitles (Pointer data);
198
199 #define REGISTER_SEQUIN_NUC_TITLES ObjMgrProcLoadEx (OMPROC_FILTER,"Sequin Style Nucleotide Titles","SequinStyleNucleotideTitles",0,0,0,0,NULL,MakeSequinNucleotideTitles,PROC_PRIORITY_DEFAULT, "Misc")
200 extern Int2 LIBCALLBACK MakeSequinNucleotideTitles (Pointer data);
201
202 #define REGISTER_SEQUIN_FEAT_TABLE ObjMgrProcLoadEx (OMPROC_FILTER,"Sequin Style Feature Table","SequinStyleFeatureTable",0,0,0,0,NULL,MakeSequinFeatureTable,PROC_PRIORITY_DEFAULT, "Misc")
203 extern Int2 LIBCALLBACK MakeSequinFeatureTable (Pointer data);
204
205 #define REGISTER_SEQUIN_GI_TO_ACCN ObjMgrProcLoadEx (OMPROC_FILTER,"Convert Align GI to Accession","ConvertAlignGisToAccn",0,0,0,0,NULL,AlignGiToAccnProc,PROC_PRIORITY_DEFAULT, "Misc")
206 static Int2 LIBCALLBACK AlignGiToAccnProc (Pointer data);
207
208 #define REGISTER_SEQUIN_ACCN_TO_GI ObjMgrProcLoadEx (OMPROC_FILTER,"Convert Align Accession To Gi","ConvertAlignAccnsToGi",0,0,0,0,NULL,AlignAccnToGiProc,PROC_PRIORITY_DEFAULT, "Misc")
209 static Int2 LIBCALLBACK AlignAccnToGiProc (Pointer data);
210
211 #define REGISTER_SEQUIN_CACHE_ACCN ObjMgrProcLoadEx (OMPROC_FILTER,"Cache Accessions to Disk","CacheAccnsToDisk",0,0,0,0,NULL,CacheAccnsToDisk,PROC_PRIORITY_DEFAULT, "Misc")
212 static Int2 LIBCALLBACK CacheAccnsToDisk (Pointer data);
213
214 #define REGISTER_SEPARATE_MRNA_ALIGNS ObjMgrProcLoadEx (OMPROC_FILTER,"Separate mRNA Alignments from NR","SeparateMrnaAlignsFromNR",0,0,0,0,NULL,SeparateMrnaFromNrProc,PROC_PRIORITY_DEFAULT, "Misc")
215 static Int2 LIBCALLBACK SeparateMrnaFromNrProc (Pointer data);
216
217 #define REGISTER_REFGENEUSER_DESC_EDIT ObjMgrProcLoad(OMPROC_EDIT,"Edit RefGene UserTrack Desc","RefGene Tracking",OBJ_SEQDESC,Seq_descr_user,OBJ_SEQDESC,Seq_descr_user,NULL,RefGeneUserGenFunc,PROC_PRIORITY_DEFAULT)
218 extern Int2 LIBCALLBACK RefGeneUserGenFunc (Pointer data);
219
220 #define REGISTER_GENOMEPROJSDBUSER_DESC_EDIT ObjMgrProcLoad(OMPROC_EDIT,"Edit GenomeProjectsDB User Desc","GenomeProjectsDB",OBJ_SEQDESC,Seq_descr_user,OBJ_SEQDESC,Seq_descr_user,NULL,GenomeProjectsDBUserGenFunc,PROC_PRIORITY_DEFAULT)
221 extern Int2 LIBCALLBACK GenomeProjectsDBUserGenFunc (Pointer data);
222
223 #define REGISTER_DBLINKUSER_DESC_EDIT ObjMgrProcLoad(OMPROC_EDIT,"Edit DBLink User Desc","DBLink",OBJ_SEQDESC,Seq_descr_user,OBJ_SEQDESC,Seq_descr_user,NULL,DBlinkUserGenFunc,PROC_PRIORITY_DEFAULT)
224 extern Int2 LIBCALLBACK DBlinkUserGenFunc (Pointer data);
225
226 #define REGISTER_TPAASSEMBLYUSER_DESC_EDIT ObjMgrProcLoad(OMPROC_EDIT,"Edit Assembly User Desc","TPA Assembly",OBJ_SEQDESC,Seq_descr_user,OBJ_SEQDESC,Seq_descr_user,NULL,AssemblyUserGenFunc,PROC_PRIORITY_DEFAULT)
227 extern Int2 LIBCALLBACK AssemblyUserGenFunc (Pointer data);
228
229 #define REGISTER_STRUCTUREDCOMMENTUSER_DESC_EDIT ObjMgrProcLoad(OMPROC_EDIT,"Edit StructuredComment User Desc","Structured Comment",OBJ_SEQDESC,Seq_descr_user,OBJ_SEQDESC,Seq_descr_user,NULL,StruCommUserGenFunc,PROC_PRIORITY_DEFAULT)
230 extern Int2 LIBCALLBACK StruCommUserGenFunc (Pointer data);
231
232 #define REGISTER_CONVERT_TO_DELTA ObjMgrProcLoadEx (OMPROC_FILTER, "Convert to Delta Sequence", "ConvertToDelta", 0,0,0,0,NULL, ConvertToDeltaSequence, PROC_PRIORITY_DEFAULT, "Indexer")
233
234 #define REGISTER_FEAT_INTERVALS_TO_DELTA ObjMgrProcLoadEx (OMPROC_FILTER, "Feature Intervals to Delta Sequence", "FeatToDelta", 0,0,0,0,NULL, FeatToDeltaSeq, PROC_PRIORITY_DEFAULT, "Indexer")
235
236 #if defined(OS_UNIX) || defined(OS_MSWIN)
237 #define REGISTER_CORRECTRNASTRANDSMART ObjMgrProcLoadEx (OMPROC_FILTER, "Correct RNA Strand Use SMART BLAST Results","CorrectRNAStrandSMART",0,0,0,0,NULL,CorrectRNAStrandednessUseSmart,PROC_PRIORITY_DEFAULT, "Analysis")
238 #define REGISTER_CORRECTRNASTRAND ObjMgrProcLoadEx (OMPROC_FILTER, "Correct RNA Strand Use Local Database Search","CorrectRNAStrandLocalDatabase",0,0,0,0,NULL,CorrectRNAStrandedness,PROC_PRIORITY_DEFAULT, "Analysis")
239 #endif
240
241 /* commands for Desktop Segregate menu */
242 #define REGISTER_SEGREGATE_BY_TEXT ObjMgrProcLoadEx (OMPROC_FILTER, "Segregate By Text","SegregateByText",0,0,0,0,NULL,CreateSegregateByTextWindow,PROC_PRIORITY_DEFAULT, "Segregate")
243
244 #define REGISTER_SEGREGATE_BY_FIELD ObjMgrProcLoadEx (OMPROC_FILTER, "Segregate By Options","NewSegregate",0,0,0,0,NULL,SegregateSetsByField,PROC_PRIORITY_DEFAULT, "Segregate")
245
246 #define REGISTER_SEGREGATE_BY_FEATURE ObjMgrProcLoadEx (OMPROC_FILTER, "Segregate By Feature","SegregateByFeature",0,0,0,0,NULL,CreateSegregateByFeatureWindow,PROC_PRIORITY_DEFAULT, "Segregate")
247
248 #define REGISTER_SEGREGATE_BY_DESCRIPTOR ObjMgrProcLoadEx (OMPROC_FILTER, "Segregate By Descriptor","SegregateByDescriptor",0,0,0,0,NULL,CreateSegregateByDescriptorWindow,PROC_PRIORITY_DEFAULT, "Segregate")
249
250 #define REGISTER_SEGREGATE_BY_MOLECULE_TYPE ObjMgrProcLoadEx (OMPROC_FILTER, "Segregate By Molecule Type","SegregateByMoleculeType",0,0,0,0,NULL,CreateSegregateByMoleculeTypeWindow,PROC_PRIORITY_DEFAULT, "Segregate")
251
252 #define REGISTER_SEGREGATE_BY_ID ObjMgrProcLoadEx (OMPROC_FILTER, "Segregate By ID","SegregateByID",0,0,0,0,NULL,CreateSegregateByIdWindow,PROC_PRIORITY_DEFAULT, "Segregate")
253
254 #define REGISTER_SEQUESTER_SETS ObjMgrProcLoadEx (OMPROC_FILTER, "Sequester", "Sequester",0,0,0,0,NULL,SequesterSequences, PROC_PRIORITY_DEFAULT, "Segregate")
255 /* commands for Desktop SegmentedSets menu */
256 #define REGISTER_CREATESEGSET ObjMgrProcLoadEx (OMPROC_FILTER,"Create Segmented Set", "CreateSegSet", 0,0,0,0,NULL,CreateSegSet, PROC_PRIORITY_DEFAULT, "SegmentedSets")
257 #define REGISTER_UPDATESEGSET ObjMgrProcLoadEx (OMPROC_FILTER,"Update Segmented Set","UpdateSegSet",0,0,0,0,NULL,UpdateSegSet,PROC_PRIORITY_DEFAULT, "SegmentedSets")
258 #define REGISTER_NEWUPDATESEGSET ObjMgrProcLoadEx (OMPROC_FILTER,"New Update Segmented Set","NewUpdateSegSet",0,0,0,0,NULL,NewUpdateSegSet,PROC_PRIORITY_DEFAULT, "SegmentedSets")
259 #define REGISTER_ADJUSTMULTISEGSEQ ObjMgrProcLoadEx (OMPROC_FILTER,"Adjust SegSeq Length","AdjustSegLength",0,0,0,0,NULL,AdjustSegSeqLength,PROC_PRIORITY_DEFAULT, "SegmentedSets")
260 #define REGISTER_UNDOSEGSET ObjMgrProcLoadEx (OMPROC_FILTER,"Undo Segmented Set","UndoSegSet",0,0,0,0,NULL,UndoSegSet,PROC_PRIORITY_DEFAULT, "SegmentedSets")
261 #define REGISTER_SEGSETREMOVESETSINSET ObjMgrProcLoadEx (OMPROC_FILTER,"Remove Sets in Selected Set (Like Undo SegSet)","RemoveSetsInSelectedSet",0,0,0,0,NULL,RemoveSetsInSelectedSet,PROC_PRIORITY_DEFAULT, "SegmentedSets")
262
263
264 typedef struct {
265 CharPtr oldStr;
266 SeqIdPtr newSip;
267 } ReplaceIDStruct, PNTR ReplaceIDStructPtr;
268
269 typedef struct _explodeStruct {
270 SeqEntryPtr topSep;
271 SeqFeatPtr seqFeatPtr;
272 struct _explodeStruct PNTR next;
273 } ExplodeStruct, PNTR ExplodeStructPtr;
274
275 typedef struct {
276 DESCRIPTOR_FORM_BLOCK
277 PopuP fromPopup;
278 PopuP toPopup;
279 Int2 fromStrand;
280 Int2 toStrand;
281 ValNodePtr featlist;
282 LisT feature;
283 Int2 featSubType;
284 CharPtr findThisStr;
285 TexT findThis;
286 Boolean case_insensitive;
287 ButtoN case_insensitive_btn;
288 Boolean when_string_not_present;
289 ButtoN when_string_not_present_btn;
290 } EditStrand, PNTR EditStrandPtr;
291
292 static void AddBspToSegSet (BioseqPtr segseq, BioseqPtr bsp)
293
294 {
295 SeqIdPtr sip;
296 SeqLocPtr slp;
297
298 if (segseq == NULL) return;
299 slp = ValNodeNew ((ValNodePtr) segseq->seq_ext);
300 if (slp == NULL) return;
301 if (segseq->seq_ext == NULL) {
302 segseq->seq_ext = (Pointer) slp;
303 }
304 if (bsp != NULL && bsp->length > 0) {
305 segseq->length += bsp->length;
306 slp->choice = SEQLOC_WHOLE;
307 sip = SeqIdFindBest (bsp->id, 0);
308 slp->data.ptrvalue = (Pointer) SeqIdStripLocus (SeqIdDup (sip));
309 } else {
310 slp->choice = SEQLOC_NULL;
311 }
312 }
313
314 static void RemoveMolInfoDescriptors (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
315
316 {
317 BioseqPtr bsp;
318 BioseqSetPtr bssp;
319 ValNodePtr nextsdp;
320 Pointer PNTR prevsdp;
321 ValNodePtr sdp;
322
323 if (IS_Bioseq (sep)) {
324 bsp = (BioseqPtr) sep->data.ptrvalue;
325 sdp = bsp->descr;
326 prevsdp = (Pointer PNTR) &(bsp->descr);
327 } else if (IS_Bioseq_set (sep)) {
328 bssp = (BioseqSetPtr) sep->data.ptrvalue;
329 sdp = bssp->descr;
330 prevsdp = (Pointer PNTR) &(bssp->descr);
331 } else return;
332 while (sdp != NULL) {
333 nextsdp = sdp->next;
334 if (sdp->choice == Seq_descr_molinfo) {
335 *(prevsdp) = sdp->next;
336 sdp->next = NULL;
337 SeqDescFree (sdp);
338 } else {
339 prevsdp = (Pointer PNTR) &(sdp->next);
340 }
341 sdp = nextsdp;
342 }
343 }
344
345 static void MoveSegSetMolInfo (BioseqPtr segseq, BioseqSetPtr parts, BioseqSetPtr segset)
346
347 {
348 MolInfoPtr first;
349 MolInfoPtr mip;
350 SeqEntryPtr partssep;
351 ValNodePtr sdp;
352 SeqEntryPtr sep;
353 SeqEntryPtr tmp;
354
355 if (segseq == NULL || parts == NULL || parts->seq_set == NULL || segset == NULL) return;
356 sep = SeqMgrGetSeqEntryForData (segset);
357 if (sep == NULL) return;
358 sdp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL);
359 if (sdp != NULL) return;
360 first = NULL;
361 partssep = SeqMgrGetSeqEntryForData (parts);
362 if (partssep != NULL) {
363 sdp = SeqEntryGetSeqDescr (partssep, Seq_descr_molinfo, NULL);
364 if (sdp != NULL) {
365 first = (MolInfoPtr) sdp->data.ptrvalue;
366 }
367 }
368 for (tmp = parts->seq_set; tmp != NULL; tmp = tmp->next) {
369 sdp = SeqEntryGetSeqDescr (tmp, Seq_descr_molinfo, NULL);
370 if (sdp != NULL) {
371 if (first == NULL) {
372 first = (MolInfoPtr) sdp->data.ptrvalue;
373 } else {
374 mip = (MolInfoPtr) sdp->data.ptrvalue;
375 if (first != NULL && mip != NULL) {
376 if (mip->biomol != first->biomol) return;
377 }
378 }
379 }
380 }
381 if (first == NULL) return;
382 mip = MolInfoNew ();
383 if (mip == NULL) return;
384 sdp = CreateNewDescriptor (sep, Seq_descr_molinfo);
385 if (sdp == NULL) return;
386 sdp->data.ptrvalue = (Pointer) mip;
387 mip->biomol = first->biomol;
388 mip->tech = first->tech;
389 mip->completeness = first->completeness;
390 mip->techexp = StringSaveNoNull (first->techexp);
391 if (partssep != NULL) {
392 SeqEntryExplore (partssep, NULL, RemoveMolInfoDescriptors);
393 }
394 for (tmp = parts->seq_set; tmp != NULL; tmp = tmp->next) {
395 SeqEntryExplore (tmp, NULL, RemoveMolInfoDescriptors);
396 }
397 }
398
399
400 static Boolean DoDescriptorsMatch (SeqDescrPtr sdp1, SeqDescrPtr sdp2)
401 {
402 if (sdp1 == NULL || sdp2 == NULL || sdp1->choice != sdp2->choice)
403 {
404 return FALSE;
405 }
406
407 /* compare publications */
408 if (sdp1->choice == Seq_descr_pub
409 && PubdescContentMatch (sdp1->data.ptrvalue, sdp2->data.ptrvalue))
410 {
411 return TRUE;
412 }
413 /* compare sources */
414 else if (sdp1->choice == Seq_descr_source
415 && BioSourceMatch (sdp1->data.ptrvalue, sdp2->data.ptrvalue))
416 {
417 return TRUE;
418 }
419 /* compare update dates */
420 else if (sdp1->choice == Seq_descr_update_date
421 && DateMatch (sdp1->data.ptrvalue, sdp2->data.ptrvalue, TRUE))
422 {
423 return TRUE;
424 }
425 else
426 {
427 return FALSE;
428 }
429 }
430
431 static Boolean FindIdenticalDescriptorInEachBioseqInList (SeqDescrPtr sdp, SeqEntryPtr sep)
432 {
433 SeqDescrPtr check_sdp;
434 Boolean found_match = FALSE;
435 BioseqPtr bsp;
436
437 if (sdp == NULL)
438 {
439 return FALSE;
440 }
441 if (sep == NULL)
442 {
443 return TRUE;
444 }
445
446 if (!IS_Bioseq(sep) || sep->data.ptrvalue == NULL)
447 {
448 return FALSE;
449 }
450
451 bsp = (BioseqPtr) sep->data.ptrvalue;
452 check_sdp = bsp->descr;
453 while (check_sdp != NULL && !found_match)
454 {
455 found_match = DoDescriptorsMatch (check_sdp, sdp);
456 check_sdp = check_sdp->next;
457 }
458
459 if (found_match)
460 {
461 found_match = FindIdenticalDescriptorInEachBioseqInList (sdp, sep->next);
462 }
463
464 return found_match;
465 }
466
467
468 static void RemoveIdenticalDescriptorInEachBioseqInList (SeqDescrPtr sdp, SeqEntryPtr sep)
469 {
470 SeqDescrPtr check_sdp, prev = NULL;
471 BioseqPtr bsp;
472
473 if (sdp == NULL || sep == NULL || !IS_Bioseq(sep) || sep->data.ptrvalue == NULL)
474 {
475 return;
476 }
477
478 bsp = (BioseqPtr) sep->data.ptrvalue;
479 check_sdp = bsp->descr;
480 while (check_sdp != NULL)
481 {
482 if (DoDescriptorsMatch (check_sdp, sdp))
483 {
484 if (prev == NULL)
485 {
486 bsp->descr = check_sdp->next;
487 }
488 else
489 {
490 prev->next = check_sdp->next;
491 }
492 check_sdp->next = NULL;
493 check_sdp = SeqDescrFree (check_sdp);
494 }
495 else
496 {
497 prev = check_sdp;
498 check_sdp = check_sdp->next;
499 }
500 }
501
502 RemoveIdenticalDescriptorInEachBioseqInList (sdp, sep->next);
503 }
504
505
506 static void
507 MoveUpIdenticalSegSetDescriptors
508 (BioseqPtr segseq,
509 BioseqSetPtr parts,
510 BioseqSetPtr segset,
511 Int2 descriptor_choice)
512 {
513 SeqEntryPtr sep;
514 BioseqPtr bsp;
515 BioseqSetPtr bssp;
516 SeqDescrPtr sdp, sdp_next, prev = NULL;
517 SeqDescrPtr moved_list = NULL;
518
519 if (segseq == NULL || parts == NULL || parts->seq_set == NULL || segset == NULL
520 || ! IS_Bioseq (parts->seq_set) || parts->seq_set->data.ptrvalue == NULL)
521 {
522 return;
523 }
524
525 sep = GetBestTopParentForData (segseq->idx.entityID, segseq);
526 if (sep == NULL)
527 {
528 sep = SeqMgrGetSeqEntryForData (segset);
529 }
530
531 bsp = (BioseqPtr) parts->seq_set->data.ptrvalue;
532
533 sdp = bsp->descr;
534 while (sdp != NULL)
535 {
536 sdp_next = sdp->next;
537 if (sdp->choice == descriptor_choice
538 && FindIdenticalDescriptorInEachBioseqInList (sdp, parts->seq_set->next))
539 {
540 RemoveIdenticalDescriptorInEachBioseqInList (sdp, parts->seq_set->next);
541 if (prev == NULL)
542 {
543 bsp->descr = sdp->next;
544 }
545 else
546 {
547 prev->next = sdp->next;
548 }
549 sdp->next = NULL;
550 ValNodeLink (&moved_list, sdp);
551 }
552 else
553 {
554 prev = sdp;
555 }
556 sdp = sdp_next;
557 }
558
559 if (IS_Bioseq_set (sep))
560 {
561 bssp = (BioseqSetPtr) sep->data.ptrvalue;
562 ValNodeLink (&(bssp->descr), moved_list);
563 }
564 else if (IS_Bioseq (sep))
565 {
566 bsp = (BioseqPtr) sep->data.ptrvalue;
567 ValNodeLink (&(bsp->descr), moved_list);
568 }
569 }
570
571
572 static void DoUpdateSegSet (BioseqPtr segseq, BioseqSetPtr parts, Boolean ask, Boolean force_intersperse)
573
574 {
575 MsgAnswer ans;
576 BioseqPtr bsp;
577 Boolean notFirst;
578 Boolean nullsBetween;
579 SeqFeatPtr sfp;
580 SeqFeatPtr sfpnext;
581 SeqLocPtr slp;
582 SeqEntryPtr tmp;
583
584 if (segseq == NULL || parts == NULL || parts->seq_set == NULL) return;
585 tmp = parts->seq_set;
586 notFirst = FALSE;
587 nullsBetween = FALSE;
588 segseq->length = 0;
589 switch (segseq->seq_ext_type) {
590 case 1: /* seg-ext */
591 slp = (ValNodePtr) segseq->seq_ext;
592 while (slp != NULL) {
593 if (slp->choice == SEQLOC_NULL) {
594 nullsBetween = TRUE;
595 }
596 slp = slp->next;
597 }
598 SeqLocSetFree ((ValNodePtr) segseq->seq_ext);
599 break;
600 case 2: /* reference */
601 SeqLocFree ((ValNodePtr) segseq->seq_ext);
602 break;
603 case 3: /* map */
604 sfp = (SeqFeatPtr) segseq->seq_ext;
605 while (sfp != NULL) {
606 sfpnext = sfp->next;
607 SeqFeatFree (sfp);
608 sfp = sfpnext;
609 }
610 break;
611 default:
612 break;
613 }
614 segseq->seq_ext = NULL;
615 if (ask) {
616 if (nullsBetween) {
617 ans = Message (MSG_YN, "Intersperse intervals with gaps (currently has gaps)?");
618 } else {
619 ans = Message (MSG_YN, "Intersperse intervals with gaps (currently no gaps)?");
620 }
621 nullsBetween = (Boolean) (ans == ANS_YES);
622 }
623 else
624 {
625 nullsBetween |= force_intersperse;
626 }
627 while (tmp != NULL) {
628 if (nullsBetween && notFirst) {
629 AddBspToSegSet (segseq, NULL);
630 }
631 bsp = (BioseqPtr) tmp->data.ptrvalue;
632 if (bsp != NULL) {
633 AddBspToSegSet (segseq, bsp);
634 }
635 notFirst = TRUE;
636 tmp = tmp->next;
637 }
638 }
639
640 typedef struct updatesegstruc {
641 BioseqSetPtr parts;
642 BioseqPtr segseq;
643 BioseqSetPtr segset;
644 } UpdateSegStruc, PNTR UpdateSegStrucPtr;
645
646 static void FindSegSetComponentsCallback (SeqEntryPtr sep, Pointer mydata,
647 Int4 index, Int2 indent)
648
649 {
650 BioseqPtr bsp;
651 BioseqSetPtr bssp;
652 UpdateSegStrucPtr ussp;
653
654 if (sep != NULL && sep->data.ptrvalue && mydata != NULL) {
655 ussp = (UpdateSegStrucPtr) mydata;
656 if (sep->choice == 1) {
657 bsp = (BioseqPtr) sep->data.ptrvalue;
658 if (bsp->repr == Seq_repr_seg) {
659 ussp->segseq = bsp;
660 }
661 } else if (sep->choice == 2) {
662 bssp = (BioseqSetPtr) sep->data.ptrvalue;
663 if (bssp->_class == 2) {
664 ussp->segset = bssp;
665 } else if (bssp->_class == 4) {
666 ussp->parts = bssp;
667 }
668 }
669 }
670 }
671
672 static Int4 UpdateSegList (SeqEntryPtr sep, Pointer mydata,
673 SeqEntryFunc mycallback,
674 Int4 index, Int2 indent)
675
676 {
677 BioseqSetPtr bssp;
678
679 if (sep == NULL) return index;
680 if (mycallback != NULL)
681 (*mycallback) (sep, mydata, index, indent);
682 index++;
683 if (IS_Bioseq (sep)) return index;
684 if (Bioseq_set_class (sep) == 4) return index;
685 bssp = (BioseqSetPtr) sep->data.ptrvalue;
686 sep = bssp->seq_set;
687 indent++;
688 while (sep != NULL) {
689 index = UpdateSegList (sep, mydata, mycallback, index, indent);
690 sep = sep->next;
691 }
692 return index;
693 }
694
695 #define UpdateSegExplore(a,b,c) UpdateSegList(a, b, c, 0L, 0);
696
697 extern Int2 DoOneSegFixup (SeqEntryPtr sep, Boolean ask);
698 extern Int2 DoOneSegFixup (SeqEntryPtr sep, Boolean ask)
699
700 {
701 BioseqSetPtr bssp;
702 Uint1 choice;
703 Int2 count;
704 SeqEntryPtr insert;
705 SeqEntryPtr next;
706 ObjMgrDataPtr omdptop;
707 ObjMgrData omdata;
708 Uint2 parenttype;
709 Pointer parentptr;
710 UpdateSegStruc uss;
711 SeqEntryPtr tmp;
712
713 if (sep == NULL) return 0;
714 if (IS_Bioseq_set (sep)) {
715 bssp = (BioseqSetPtr) sep->data.ptrvalue;
716 if (bssp != NULL && (bssp->_class == BioseqseqSet_class_genbank
717 || IsPopPhyEtcSet (bssp->_class))) {
718 choice = 0;
719 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
720 if (choice == 0) {
721 choice = tmp->choice;
722 } else if (choice != tmp->choice) {
723 return 0;
724 }
725 }
726 count = 0;
727 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
728 count += DoOneSegFixup (sep, ask);
729 }
730 return count;
731 }
732 }
733 if (IS_Bioseq (sep) && sep->next != NULL) {
734 count = 0;
735 SaveSeqEntryObjMgrData (sep, &omdptop, &omdata);
736 GetSeqEntryParent (sep, &parentptr, &parenttype);
737 insert = sep->next;
738 sep->next = NULL;
739 while (insert != NULL) {
740 next = insert->next;
741 insert->next = NULL;
742 AddSeqEntryToSeqEntry (sep, insert, FALSE);
743 count++;
744 insert = next;
745 }
746 SeqMgrLinkSeqEntry (sep, parenttype, parentptr);
747 RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
748 uss.segseq = NULL;
749 uss.parts = NULL;
750 uss.segset = NULL;
751 UpdateSegExplore (sep, (Pointer) &uss, FindSegSetComponentsCallback);
752 if (uss.segseq != NULL && uss.parts != NULL && uss.segset != NULL) {
753 DoUpdateSegSet (uss.segseq, uss.parts, ask, FALSE);
754 MoveSegSetMolInfo (uss.segseq, uss.parts, uss.segset);
755
756 MoveUpIdenticalSegSetDescriptors (uss.segseq, uss.parts, uss.segset, Seq_descr_pub);
757 MoveUpIdenticalSegSetDescriptors (uss.segseq, uss.parts, uss.segset, Seq_descr_update_date);
758 MoveUpIdenticalSegSetDescriptors (uss.segseq, uss.parts, uss.segset, Seq_descr_source);
759 }
760 return count;
761 }
762 uss.segseq = NULL;
763 uss.parts = NULL;
764 uss.segset = NULL;
765 UpdateSegExplore (sep, (Pointer) &uss, FindSegSetComponentsCallback);
766 if (uss.segseq != NULL && uss.parts != NULL && uss.segset != NULL) {
767 DoUpdateSegSet (uss.segseq, uss.parts, ask, FALSE);
768 MoveSegSetMolInfo (uss.segseq, uss.parts, uss.segset);
769
770 MoveUpIdenticalSegSetDescriptors (uss.segseq, uss.parts, uss.segset, Seq_descr_pub);
771 MoveUpIdenticalSegSetDescriptors (uss.segseq, uss.parts, uss.segset, Seq_descr_update_date);
772 MoveUpIdenticalSegSetDescriptors (uss.segseq, uss.parts, uss.segset, Seq_descr_source);
773 return 1;
774 }
775 return 0;
776 }
777
778 static void
779 MoveNucProtSetFeaturesAndDescriptorsToNucSeg
780 (BioseqSetPtr bssp, BioseqPtr bsp)
781 {
782 SeqAnnotPtr last_sap;
783 SeqDescrPtr last_sdp;
784 if (bssp == NULL || bsp == NULL)
785 {
786 return;
787 }
788 last_sap = bsp->annot;
789 while (last_sap != NULL && last_sap->next != NULL)
790 {
791 last_sap = last_sap->next;
792 }
793 if (last_sap == NULL)
794 {
795 bsp->annot = bssp->annot;
796 }
797 else
798 {
799 last_sap->next = bssp->annot;
800 }
801 bssp->annot = NULL;
802
803 last_sdp = bsp->descr;
804 while (last_sdp != NULL && last_sdp->next != NULL)
805 {
806 last_sdp = last_sdp->next;
807 }
808 if (last_sdp == NULL)
809 {
810 bsp->descr = bssp->descr;
811 }
812 else
813 {
814 last_sdp->next = bssp->descr;
815 }
816 bssp->descr = NULL;
817 }
818
819 static void UpdateOneSegSet (BioseqSetPtr seg_bssp, Boolean intersperse_nulls)
820 {
821 BioseqPtr seg = NULL;
822 BioseqSetPtr parts = NULL;
823 SeqEntryPtr sep;
824 BioseqPtr bsp;
825 BioseqSetPtr bssp;
826
827 if (seg_bssp == NULL || seg_bssp->_class != BioseqseqSet_class_segset)
828 {
829 return;
830 }
831
832 for (sep = seg_bssp->seq_set; sep != NULL; sep = sep->next)
833 {
834 if (IS_Bioseq (sep))
835 {
836 bsp = (BioseqPtr) sep->data.ptrvalue;
837 if (bsp != NULL
838 && ISA_na (bsp->mol)
839 && bsp->repr == Seq_repr_seg)
840 {
841 seg = bsp;
842 }
843 } else if (IS_Bioseq_set (sep)) {
844 bssp = (BioseqSetPtr) sep->data.ptrvalue;
845 if (bssp != NULL
846 && bssp->_class == BioseqseqSet_class_parts)
847 {
848 parts = bssp;
849 }
850 }
851 }
852 DoUpdateSegSet (seg, parts, FALSE, intersperse_nulls);
853 MoveSegSetMolInfo (seg, parts, seg_bssp);
854 MoveUpIdenticalSegSetDescriptors (seg, parts, seg_bssp, Seq_descr_pub);
855 MoveUpIdenticalSegSetDescriptors (seg, parts, seg_bssp, Seq_descr_update_date);
856 MoveUpIdenticalSegSetDescriptors (seg, parts, seg_bssp, Seq_descr_source);
857 }
858
859 static void ConvertOneSetToSegSet (SeqEntryPtr sep, Boolean intersperse_nulls)
860 {
861 BioseqSetPtr bssp, this_bssp;
862 BioseqPtr seg;
863 SeqEntryPtr this_sep, next_sep;
864 SeqEntryPtr segment_list;
865 SeqEntryPtr nuc_list = NULL, last_nuc = NULL;
866 SeqEntryPtr prot_list = NULL, last_prot = NULL;
867 SeqEntryPtr nps_next, nuc_seg;
868 SeqEntryPtr nuc_sep, parts_sep, seg_sep;
869 BioseqSetPtr seg_bssp, parts;
870 BioseqPtr bsp;
871 SeqLocPtr slp;
872 ObjMgrDataPtr omdptop;
873 ObjMgrData omdata;
874 Uint2 parenttype;
875 Pointer parentptr;
876 BioseqSetPtr wrapper_bssp;
877 SeqEntryPtr tmp_sep;
878
879 if (sep == NULL || ! IS_Bioseq_set (sep))
880 {
881 return;
882 }
883 bssp = (BioseqSetPtr) sep->data.ptrvalue;
884 if (bssp == NULL
885 || bssp->_class == BioseqseqSet_class_nuc_prot
886 || bssp->_class == BioseqseqSet_class_segset)
887 {
888 return;
889 }
890
891 SaveSeqEntryObjMgrData (sep, &omdptop, &omdata);
892 GetSeqEntryParent (sep, &parentptr, &parenttype);
893
894
895 segment_list = bssp->seq_set;
896 bssp->seq_set = NULL;
897
898 for (this_sep = segment_list;
899 this_sep != NULL;
900 this_sep = next_sep)
901 {
902 next_sep = this_sep->next;
903 this_sep->next = NULL;
904 if (IS_Bioseq (this_sep))
905 {
906 if (nuc_list == NULL)
907 {
908 nuc_list = this_sep;
909 }
910 else
911 {
912 if (last_nuc == NULL)
913 {
914 last_nuc = nuc_list;
915 }
916 while (last_nuc->next != NULL)
917 {
918 last_nuc = last_nuc->next;
919 }
920 last_nuc->next = this_sep;
921 }
922 }
923 else if (IS_Bioseq_set (this_sep))
924 {
925 this_bssp = (BioseqSetPtr) this_sep->data.ptrvalue;
926 if (this_bssp != NULL
927 && this_bssp->_class == BioseqseqSet_class_nuc_prot
928 && this_bssp->seq_set != NULL)
929 {
930 nuc_seg = this_bssp->seq_set;
931 this_bssp->seq_set = NULL;
932 nps_next = nuc_seg->next;
933 nuc_seg->next = NULL;
934 /* move set features to segment */
935 bsp = (BioseqPtr) nuc_seg->data.ptrvalue;
936 MoveNucProtSetFeaturesAndDescriptorsToNucSeg (this_bssp, bsp);
937
938 /* add nuc_seg to nuc_list */
939 if (nuc_list == NULL)
940 {
941 nuc_list = nuc_seg;
942 }
943 else
944 {
945 if (last_nuc == NULL)
946 {
947 last_nuc = nuc_list;
948 }
949 while (last_nuc->next != NULL)
950 {
951 last_nuc = last_nuc->next;
952 }
953 last_nuc->next = nuc_seg;
954 }
955
956 /* add proteins to prot_list */
957 if (prot_list == NULL)
958 {
959 prot_list = nps_next;
960 }
961 else
962 {
963 /* add proteins to protein list */
964 if (last_prot == NULL)
965 {
966 last_prot = prot_list;
967 }
968 while (last_prot->next != NULL)
969 {
970 last_prot = last_prot->next;
971 }
972 last_prot->next = nps_next;
973 }
974 }
975
976 /* remove nuc-prot set */
977 SeqEntryFree (this_sep);
978 }
979 }
980
981 if (nuc_list == NULL)
982 {
983 return;
984 }
985
986 /* create segment and parts from nuc_list */
987 bsp = nuc_list->data.ptrvalue;
988 seg = BioseqNew ();
989 if (seg == NULL) return;
990 seg->mol = bsp->mol;
991 seg->repr = Seq_repr_seg;
992 seg->seq_ext_type = 1;
993 seg->length = 0;
994 seg->id = MakeUniqueSeqID ("segseq_");
995 SeqMgrAddToBioseqIndex (seg);
996
997 nuc_sep = SeqEntryNew ();
998 if (nuc_sep == NULL) return;
999 nuc_sep->choice = 1;
1000 nuc_sep->data.ptrvalue = (Pointer) seg;
1001
1002 parts = BioseqSetNew ();
1003 if (parts == NULL) return;
1004 parts->_class = 4;
1005
1006 parts_sep = SeqEntryNew ();
1007 if (parts_sep == NULL) return;
1008 parts_sep->choice = 2;
1009 parts_sep->data.ptrvalue = (Pointer) parts;
1010 nuc_sep->next = parts_sep;
1011
1012 parts->seq_set = nuc_list;
1013 for (this_sep = nuc_list; this_sep != NULL; this_sep = this_sep->next)
1014 {
1015 bsp = (BioseqPtr) this_sep->data.ptrvalue;
1016 if (bsp == NULL)
1017 {
1018 continue;
1019 }
1020 slp = ValNodeNew ((ValNodePtr) seg->seq_ext);
1021 if (slp == NULL)
1022 {
1023 continue;
1024 }
1025
1026 if (seg->seq_ext == NULL)
1027 {
1028 seg->seq_ext = (Pointer) slp;
1029 }
1030 if (bsp->length >= 0)
1031 {
1032 seg->length += bsp->length;
1033 slp->choice = SEQLOC_WHOLE;
1034 slp->data.ptrvalue = (Pointer) SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0)));
1035 } else {
1036 slp->choice = SEQLOC_NULL;
1037 }
1038 }
1039
1040 if (prot_list == NULL)
1041 {
1042 /* we just have nucleotides, so change the BioseqSet class to segset */
1043 bssp->_class = BioseqseqSet_class_segset;
1044 bssp->seq_set = nuc_sep;
1045 seg_bssp = bssp;
1046 }
1047 else
1048 {
1049 /* change the BioseqSet class to nucprot, create the segmented
1050 * set and put it in the nucleotide slot, and then add the proteins
1051 */
1052 bssp->_class = BioseqseqSet_class_nuc_prot;
1053 seg_sep = SeqEntryNew ();
1054 seg_bssp = BioseqSetNew ();
1055 seg_sep->choice = 2;
1056 seg_sep->data.ptrvalue = (Pointer) seg_bssp;
1057 seg_bssp->_class = BioseqseqSet_class_segset;
1058 seg_bssp->seq_set = nuc_sep;
1059
1060 bssp->seq_set = seg_sep;
1061 seg_sep->next = prot_list;
1062 }
1063
1064 /* if the set we have converted to a segset used to be our genbank wrapper,
1065 * create a new genbank wrapper
1066 */
1067 if (parentptr == NULL)
1068 {
1069 tmp_sep = SeqEntryNew ();
1070 tmp_sep->choice = sep->choice;
1071 tmp_sep->data.ptrvalue = sep->data.ptrvalue;
1072 wrapper_bssp = BioseqSetNew ();
1073 wrapper_bssp->_class = BioseqseqSet_class_genbank;
1074 wrapper_bssp->seq_set = tmp_sep;
1075 sep->choice = 2;
1076 sep->data.ptrvalue = wrapper_bssp;
1077 }
1078
1079 SeqMgrLinkSeqEntry (sep, parenttype, parentptr);
1080 RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
1081
1082 UpdateOneSegSet (seg_bssp, intersperse_nulls);
1083 }
1084
1085 static Boolean IsFlatNucProtSet (BioseqSetPtr bssp)
1086 {
1087 SeqEntryPtr this_sep;
1088
1089 if (bssp == NULL || bssp->seq_set == NULL
1090 || bssp->_class != BioseqseqSet_class_nuc_prot)
1091 {
1092 return FALSE;
1093 }
1094
1095 for (this_sep = bssp->seq_set; this_sep != NULL; this_sep = this_sep->next)
1096 {
1097 if (!IS_Bioseq (this_sep))
1098 {
1099 return FALSE;
1100 }
1101 }
1102 return TRUE;
1103 }
1104
1105 static void NewSegFixup (SeqEntryPtr sep, Boolean intersperse_nulls)
1106 {
1107 BioseqSetPtr bssp, this_bssp;
1108 SeqEntryPtr this_sep;
1109 Boolean can_convert = TRUE;
1110
1111 if (sep == NULL || IS_Bioseq (sep))
1112 {
1113 return;
1114 }
1115
1116 bssp = (BioseqSetPtr) sep->data.ptrvalue;
1117 if (bssp == NULL)
1118 {
1119 return;
1120 }
1121 else if (bssp->_class == BioseqseqSet_class_segset)
1122 {
1123 UpdateOneSegSet (bssp, intersperse_nulls);
1124 return;
1125 }
1126
1127 /* if all of the entries in bssp->seq_set are bioseqs or nuc_prot
1128 sets, then we can convert this to a segset */
1129 for (this_sep = bssp->seq_set;
1130 this_sep != NULL && can_convert;
1131 this_sep = this_sep->next)
1132 {
1133 if (IS_Bioseq_set (this_sep))
1134 {
1135 this_bssp = (BioseqSetPtr) this_sep->data.ptrvalue;
1136 if (!IsFlatNucProtSet (this_bssp))
1137 {
1138 can_convert = FALSE;
1139 }
1140 }
1141 else if (!IS_Bioseq (this_sep))
1142 {
1143 can_convert = FALSE;
1144 }
1145 }
1146
1147 if (can_convert)
1148 {
1149 ConvertOneSetToSegSet (sep, intersperse_nulls);
1150 }
1151 else
1152 {
1153 for (this_sep = bssp->seq_set;
1154 this_sep != NULL;
1155 this_sep = this_sep->next)
1156 {
1157 NewSegFixup (this_sep, intersperse_nulls);
1158 }
1159 }
1160 }
1161
1162 extern void DoFixupLocus (SeqEntryPtr sep);
1163 extern void DoFixupSegSet (SeqEntryPtr sep);
1164
1165 static Int2 LIBCALLBACK NewUpdateSegSet (Pointer data)
1166
1167 {
1168 OMProcControlPtr ompcp;
1169 SeqEntryPtr sep;
1170 ErrSev sev;
1171 ModalAcceptCancelData acd;
1172 WindoW w;
1173 GrouP h, g, c;
1174 ButtoN intersperse_nulls_btn;
1175 ButtoN fix_locus_btn;
1176 ButtoN tax_fix_cleanup_btn;
1177 ButtoN b;
1178
1179 ompcp = (OMProcControlPtr) data;
1180 if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
1181 switch (ompcp->input_itemtype) {
1182 case OBJ_BIOSEQ :
1183 break;
1184 case OBJ_BIOSEQSET :
1185 break;
1186 case 0 :
1187 return OM_MSG_RET_ERROR;
1188 default :
1189 return OM_MSG_RET_ERROR;
1190 }
1191 if (ompcp->input_data == NULL) return OM_MSG_RET_ERROR;
1192 sep = GetTopSeqEntryForEntityID (ompcp->input_entityID);
1193 if (sep == NULL) return OM_MSG_RET_ERROR;
1194
1195 w = MovableModalWindow (-50, -33, -10, -10, "SegSet Conversion Options", NULL);
1196 h = HiddenGroup (w, -1, 0, NULL);
1197 SetGroupSpacing (h, 10, 10);
1198
1199 g = HiddenGroup (h, 0, 4, NULL);
1200 intersperse_nulls_btn = CheckBox (g, "Intersperse NULLS", NULL);
1201 SetStatus (intersperse_nulls_btn, TRUE);
1202 fix_locus_btn = CheckBox (g, "Force Locus Fixup", NULL);
1203 SetStatus (fix_locus_btn, TRUE);
1204 tax_fix_cleanup_btn = CheckBox (g, "Do Tax_Fix/Cleanup", NULL);
1205 SetStatus (tax_fix_cleanup_btn, TRUE);
1206
1207 acd.accepted = FALSE;
1208 acd.cancelled = FALSE;
1209 c = HiddenGroup (h, 4, 0, NULL);
1210 b = DefaultButton (c, "Accept", ModalAcceptButton);
1211 SetObjectExtra (b, &acd, NULL);
1212 b = PushButton (c, "Cancel", ModalCancelButton);
1213 SetObjectExtra (b, &acd, NULL);
1214
1215 AlignObjects (ALIGN_CENTER, (HANDLE) g, (HANDLE) c, NULL);
1216 RealizeWindow (w);
1217 Show (w);
1218 Update ();
1219
1220 while (!acd.accepted && !acd.cancelled)
1221 {
1222 ProcessExternalEvent ();
1223 Update ();
1224 }
1225 ProcessAnEvent ();
1226 Hide (w);
1227 if (acd.cancelled)
1228 {
1229 Remove (w);
1230 return OM_MSG_RET_DONE;
1231 }
1232
1233 NewSegFixup (sep, GetStatus (intersperse_nulls_btn));
1234
1235 if (GetStatus (fix_locus_btn))
1236 {
1237 sev = ErrSetMessageLevel (SEV_FATAL);
1238 DoFixupLocus (sep);
1239 DoFixupSegSet (sep);
1240 ErrSetMessageLevel (sev);
1241 }
1242
1243 if (GetStatus (tax_fix_cleanup_btn))
1244 {
1245 ForceCleanupEntityID (ompcp->input_entityID);
1246 }
1247 Remove (w);
1248
1249 ObjMgrSetDirtyFlag (ompcp->input_entityID, TRUE);
1250 ObjMgrSendMsg (OM_MSG_UPDATE, ompcp->input_entityID, 0, 0);
1251 Update ();
1252 return OM_MSG_RET_DONE;
1253 }
1254
1255 static Int2 LIBCALLBACK UpdateSegSet (Pointer data)
1256
1257 {
1258 OMProcControlPtr ompcp;
1259 SeqEntryPtr sep;
1260 ErrSev sev;
1261
1262 ompcp = (OMProcControlPtr) data;
1263 if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
1264 switch (ompcp->input_itemtype) {
1265 case OBJ_BIOSEQ :
1266 break;
1267 case OBJ_BIOSEQSET :
1268 break;
1269 case 0 :
1270 return OM_MSG_RET_ERROR;
1271 default :
1272 return OM_MSG_RET_ERROR;
1273 }
1274 if (ompcp->input_data == NULL) return OM_MSG_RET_ERROR;
1275 sep = GetTopSeqEntryForEntityID (ompcp->input_entityID);
1276 if (sep == NULL) return OM_MSG_RET_ERROR;
1277
1278 DoOneSegFixup (sep, TRUE);
1279 if (Message (MSG_YN, "Do you want to Force Locus Fixup?") == ANS_YES) {
1280 sev = ErrSetMessageLevel (SEV_FATAL);
1281 DoFixupLocus (sep);
1282 DoFixupSegSet (sep);
1283 ErrSetMessageLevel (sev);
1284 }
1285
1286 ObjMgrSetDirtyFlag (ompcp->input_entityID, TRUE);
1287 ObjMgrSendMsg (OM_MSG_UPDATE, ompcp->input_entityID, 0, 0);
1288 Update ();
1289 return OM_MSG_RET_DONE;
1290 }
1291
1292 static void DoAdjustSegSeqLength (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
1293
1294 {
1295 BioseqPtr bsp;
1296 ValNode head;
1297 Int4 len;
1298 Int4 len2;
1299 ValNodePtr vnp;
1300
1301 if (! IS_Bioseq (sep)) return;
1302 bsp = (BioseqPtr) sep->data.ptrvalue;
1303 if (bsp == NULL || bsp->repr != Seq_repr_seg) return;
1304 head.choice = SEQLOC_MIX;
1305 head.data.ptrvalue = bsp->seq_ext;
1306 head.next = NULL;
1307 vnp = NULL;
1308 len = 0;
1309 while ((vnp = SeqLocFindNext (&head, vnp)) != NULL) {
1310 len2 = SeqLocLen (vnp);
1311 if (len2 > 0) {
1312 len += len2;
1313 }
1314 }
1315 bsp->length = len;
1316 }
1317
1318 static Int2 LIBCALLBACK AdjustSegSeqLength (Pointer data)
1319
1320 {
1321 OMProcControlPtr ompcp;
1322 SeqEntryPtr sep;
1323
1324 ompcp = (OMProcControlPtr) data;
1325 if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
1326 switch (ompcp->input_itemtype) {
1327 case OBJ_BIOSEQ :
1328 break;
1329 case OBJ_BIOSEQSET :
1330 break;
1331 case 0 :
1332 return OM_MSG_RET_ERROR;
1333 default :
1334 return OM_MSG_RET_ERROR;
1335 }
1336 if (ompcp->input_data == NULL) return OM_MSG_RET_ERROR;
1337 sep = GetTopSeqEntryForEntityID (ompcp->input_entityID);
1338 if (sep == NULL) return OM_MSG_RET_ERROR;
1339 SeqEntryExplore (sep, NULL, DoAdjustSegSeqLength);
1340 ObjMgrSetDirtyFlag (ompcp->input_entityID, TRUE);
1341 ObjMgrSendMsg (OM_MSG_UPDATE, ompcp->input_entityID, 0, 0);
1342 return OM_MSG_RET_DONE;
1343 }
1344
1345
1346 static SeqLocPtr ReduceLocationToSingleBioseq (SeqLocPtr slp, BioseqPtr bsp)
1347 {
1348 SeqLocPtr this_slp, prev_slp, next_slp;
1349 BioseqPtr this_bsp;
1350 SeqIntPtr sint;
1351 ValNodePtr this_vnp, prev_vnp, next_vnp;
1352
1353 if (slp == NULL || bsp == NULL) return NULL;
1354
1355 if (slp->choice == SEQLOC_MIX)
1356 {
1357 this_slp = slp->data.ptrvalue;
1358 prev_slp = NULL;
1359 while (this_slp != NULL)
1360 {
1361 next_slp = this_slp->next;
1362 this_bsp = BioseqFind (SeqLocId (this_slp));
1363 if (this_bsp != bsp)
1364 {
1365 if (prev_slp == NULL)
1366 {
1367 slp->data.ptrvalue = next_slp;
1368 }
1369 else
1370 {
1371 prev_slp->next = next_slp;
1372 }
1373 this_slp->next = NULL;
1374 SeqLocFree (this_slp);
1375 }
1376 else
1377 {
1378 prev_slp = this_slp;
1379 }
1380 this_slp = next_slp;
1381 }
1382 if (slp->data.ptrvalue == NULL)
1383 {
1384 slp = SeqLocFree (slp);
1385 }
1386 else
1387 {
1388 this_slp = slp->data.ptrvalue;
1389 if (this_slp->next == NULL)
1390 {
1391 slp->data.ptrvalue = NULL;
1392 slp = SeqLocFree (slp);
1393 slp = this_slp;
1394 }
1395 }
1396 }
1397 else if (slp->choice == SEQLOC_PACKED_INT)
1398 {
1399 prev_vnp = NULL;
1400 this_vnp = slp->data.ptrvalue;
1401 while (this_vnp != NULL)
1402 {
1403 next_vnp = this_vnp->next;
1404 sint = this_vnp->data.ptrvalue;
1405 this_bsp = BioseqFind (sint->id);
1406 if (this_bsp != bsp)
1407 {
1408 if (prev_vnp == NULL) {
1409 slp->data.ptrvalue = next_vnp;
1410 } else {
1411 prev_vnp->next = next_vnp;
1412 }
1413 this_vnp->next = NULL;
1414 sint = SeqIntFree (sint);
1415 this_vnp = ValNodeFree (this_vnp);
1416 } else {
1417 prev_vnp = this_vnp;
1418 }
1419 this_vnp = next_vnp;
1420 }
1421 if (slp->data.ptrvalue == NULL)
1422 {
1423 slp = SeqLocFree (slp);
1424 }
1425 }
1426 else
1427 {
1428 this_bsp = BioseqFind (SeqLocId (slp));
1429 if (this_bsp != bsp)
1430 {
1431 slp = SeqLocFree (slp);
1432 }
1433 }
1434 return slp;
1435 }
1436
1437 static SeqLocPtr RemoveBioseqFromLocation (SeqLocPtr slp, BioseqPtr bsp)
1438 {
1439 SeqLocPtr this_slp, prev_slp, next_slp;
1440 BioseqPtr this_bsp;
1441 SeqIntPtr sint;
1442 ValNodePtr this_vnp, prev_vnp, next_vnp;
1443
1444 if (slp == NULL || bsp == NULL) return NULL;
1445
1446 if (slp->choice == SEQLOC_MIX)
1447 {
1448 this_slp = slp->data.ptrvalue;
1449 prev_slp = NULL;
1450 while (this_slp != NULL)
1451 {
1452 next_slp = this_slp->next;
1453 this_bsp = BioseqFind (SeqLocId (this_slp));
1454 if (this_bsp == bsp)
1455 {
1456 if (prev_slp == NULL)
1457 {
1458 slp->data.ptrvalue = next_slp;
1459 }
1460 else
1461 {
1462 prev_slp->next = next_slp;
1463 }
1464 this_slp->next = NULL;
1465 SeqLocFree (this_slp);
1466 }
1467 else
1468 {
1469 prev_slp = this_slp;
1470 }
1471 this_slp = next_slp;
1472 }
1473 if (slp->data.ptrvalue == NULL)
1474 {
1475 slp = SeqLocFree (slp);
1476 }
1477 else
1478 {
1479 this_slp = slp->data.ptrvalue;
1480 if (this_slp->next == NULL)
1481 {
1482 slp->data.ptrvalue = NULL;
1483 slp = SeqLocFree (slp);
1484 slp = this_slp;
1485 }
1486 }
1487 }
1488 else if (slp->choice == SEQLOC_PACKED_INT)
1489 {
1490 prev_vnp = NULL;
1491 this_vnp = slp->data.ptrvalue;
1492 while (this_vnp != NULL)
1493 {
1494 next_vnp = this_vnp->next;
1495 sint = this_vnp->data.ptrvalue;
1496 this_bsp = BioseqFind (sint->id);
1497 if (this_bsp == bsp)
1498 {
1499 if (prev_vnp == NULL) {
1500 slp->data.ptrvalue = next_vnp;
1501 } else {
1502 prev_vnp->next = next_vnp;
1503 }
1504 this_vnp->next = NULL;
1505 sint = SeqIntFree (sint);
1506 this_vnp = ValNodeFree (this_vnp);
1507 } else {
1508 prev_vnp = this_vnp;
1509 }
1510 this_vnp = next_vnp;
1511 }
1512 if (slp->data.ptrvalue == NULL)
1513 {
1514 slp = SeqLocFree (slp);
1515 }
1516 }
1517 else
1518 {
1519 this_bsp = BioseqFind (SeqLocId (slp));
1520 if (this_bsp == bsp)
1521 {
1522 slp = SeqLocFree (slp);
1523 }
1524 }
1525 return slp;
1526 }
1527
1528 static void PushFeaturesDownToBioseq (SeqAnnotPtr annot, SeqEntryPtr sep)
1529 {
1530 BioseqSetPtr bssp;
1531 BioseqPtr bsp;
1532 SeqFeatPtr sfp, prev_sfp, last_sfp, next_sfp;
1533
1534 if (annot == NULL || annot->type != 1 || sep == NULL) return;
1535
1536 if (IS_Bioseq_set (sep)) {
1537 bssp = sep->data.ptrvalue;
1538 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
1539 PushFeaturesDownToBioseq (annot, sep);
1540 }
1541 return;
1542 }
1543
1544 if (! IS_Bioseq (sep)) return;
1545
1546 bsp = sep->data.ptrvalue;
1547
1548 sfp = annot->data;
1549 prev_sfp = NULL;
1550 while (sfp != NULL) {
1551 next_sfp = sfp->next;
1552 while (sfp != NULL && ! SeqIdIn (SeqLocId (sfp->location), bsp->id)) {
1553 prev_sfp = sfp;
1554 sfp = sfp->next;
1555 if (sfp != NULL) {
1556 next_sfp = sfp->next;
1557 }
1558 }
1559 if (sfp != NULL) {
1560 if (bsp->annot == NULL) {
1561 bsp->annot = SeqAnnotNew ();
1562 bsp->annot->type = 1;
1563 bsp->annot->data = sfp;
1564 } else {
1565 if (bsp->annot->data == NULL) {
1566 bsp->annot->data = sfp;
1567 } else {
1568 last_sfp = bsp->annot->data;
1569 while (last_sfp->next != NULL) {
1570 last_sfp = last_sfp->next;
1571 }
1572 last_sfp->next = sfp;
1573 }
1574 }
1575 if (prev_sfp == NULL) {
1576 annot->data = sfp->next;
1577 } else {
1578 prev_sfp->next = sfp->next;
1579 }
1580 sfp->next = NULL;
1581 sfp = next_sfp;
1582 }
1583 }
1584 }
1585
1586 static void
1587 SplitSegmentedProduct
1588 (BioseqPtr old_protein,
1589 BioseqPtr new_protein,
1590 Int4 protein_offset)
1591 {
1592 SeqFeatPtr sfp, copy_sfp, new_sfp;
1593 SeqMgrFeatContext fcontext;
1594 Int4 right_end;
1595 SeqLoc subtract_loc;
1596 SeqInt sint;
1597 Boolean partial3;
1598 Boolean partial5;
1599
1600 if (new_protein == NULL)
1601 {
1602 return;
1603 }
1604
1605 /* delete full length feature on new protein */
1606 sfp = SeqMgrGetNextFeature (new_protein, NULL, 0, 0, &fcontext);
1607 if (sfp != NULL)
1608 {
1609 sfp->idx.deleteme = TRUE;
1610 }
1611
1612 right_end = protein_offset + new_protein->length - 1;
1613
1614 /* if there are features on the original protein product
1615 * other than the full-length feature, we need to find the offsets
1616 * so that we can determine whether they belong in this product.
1617 */
1618 sfp = SeqMgrGetNextFeature (old_protein, NULL, 0, 0, &fcontext);
1619 while (sfp != NULL)
1620 {
1621 new_sfp = NULL;
1622 if ((fcontext.left == 0 && fcontext.right == old_protein->length - 1)
1623 || (fcontext.left <= protein_offset && fcontext.right >= right_end))
1624 {
1625 copy_sfp = SeqFeatCopy (sfp);
1626 new_sfp = CreateNewFeatureOnBioseq (new_protein, copy_sfp->data.choice, NULL);
1627 new_sfp->data.value.ptrvalue = copy_sfp->data.value.ptrvalue;
1628 copy_sfp->data.value.ptrvalue = NULL;
1629 copy_sfp = SeqFeatFree (copy_sfp);
1630 }
1631 else if ((fcontext.left <= protein_offset && fcontext.right >= protein_offset)
1632 || (fcontext.left <= right_end && fcontext.right >= right_end)
1633 || (fcontext.left >= protein_offset && fcontext.right <= protein_offset))
1634 {
1635 copy_sfp = SeqFeatCopy (sfp);
1636 new_sfp = CreateNewFeatureOnBioseq (new_protein, copy_sfp->data.choice, NULL);
1637 new_sfp->data.value.ptrvalue = copy_sfp->data.value.ptrvalue;
1638 copy_sfp->data.value.ptrvalue = NULL;
1639 copy_sfp = SeqFeatFree (copy_sfp);
1640 subtract_loc.next = NULL;
1641 subtract_loc.choice = SEQLOC_INT;
1642 subtract_loc.data.ptrvalue = &sint;
1643 sint.id = new_protein->id;
1644 sint.strand = Seq_strand_plus;
1645 sint.if_from = NULL;
1646 sint.if_to = NULL;
1647
1648 /* chop off left end if needed */
1649 if (fcontext.left > protein_offset)
1650 {
1651 sint.from = 0;
1652 sint.to = fcontext.left - protein_offset - 1;
1653 new_sfp->location = SeqLocSubtract (new_sfp->location, &subtract_loc);
1654 }
1655 /* chop off right end if needed */
1656 if (fcontext.right < right_end)
1657 {
1658 sint.from = fcontext.right - protein_offset + 1;
1659 sint.to = new_protein->length - 1;
1660 new_sfp->location = SeqLocSubtract (new_sfp->location, &subtract_loc);
1661 }
1662 }
1663 if (new_sfp != NULL)
1664 {
1665 partial3 = FALSE;
1666 partial5 = FALSE;
1667 if (fcontext.left < protein_offset)
1668 {
1669 partial5 = TRUE;
1670 }
1671 if (fcontext.right > right_end)
1672 {
1673 partial3 = TRUE;
1674 }
1675
1676 if (partial3 || partial5)
1677 {
1678 SetSeqLocPartial (new_sfp->location, partial5, partial3);
1679 new_sfp->partial = TRUE;
1680 }
1681
1682 }
1683 sfp = SeqMgrGetNextFeature (old_protein, sfp, 0, 0, &fcontext);
1684 }
1685 }
1686
1687 static void SplitSegmentedFeatsOnOneSet (BioseqSetPtr set)
1688 {
1689 SeqAnnotPtr annot, prev_annot, next_annot;
1690 SeqFeatPtr sfp, new_sfp, prev_sfp, next_sfp;
1691 SeqEntryPtr sep, set_sep;
1692 BioseqPtr bsp, last_bsp;
1693 SeqLocPtr loc, slp;
1694 Int4 product_offset, protein_offset;
1695 Boolean is_first = TRUE;
1696 SeqIdPtr sip;
1697 CdRegionPtr crp;
1698 Boolean partial3, partial5;
1699 Int4 len_modulo, new_frame;
1700
1701 if (set == NULL || set->annot == NULL) return;
1702
1703 set_sep = SeqMgrGetSeqEntryForData (set);
1704
1705 prev_annot = NULL;
1706 annot = set->annot;
1707 next_annot = annot->next;
1708 while (annot != NULL) {
1709 next_annot = annot->next;
1710 if (annot->type != 1) {
1711 prev_annot = annot;
1712 annot = next_annot;
1713 continue;
1714 }
1715 sfp = annot->data;
1716 while (sfp != NULL) {
1717 next_sfp = sfp->next;
1718 prev_sfp = sfp;
1719 loc = SeqLocFindNext (sfp->location, NULL);
1720 sip = SeqLocId (loc);
1721 last_bsp = BioseqFind (sip);
1722
1723 product_offset = 0;
1724 slp = SeqLocFindNext (sfp->location, loc);
1725 while (slp != NULL) {
1726 bsp = BioseqFind (SeqLocId (slp));
1727 if (bsp != last_bsp && last_bsp != NULL) {
1728 new_sfp = SeqFeatCopy (sfp);
1729 new_sfp->location = ReduceLocationToSingleBioseq (new_sfp->location, last_bsp);
1730 sfp->location = RemoveBioseqFromLocation (sfp->location, last_bsp);
1731 if (is_first)
1732 {
1733 CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
1734 SetSeqLocPartial (sfp->location, TRUE, partial3);
1735 is_first = FALSE;
1736 sfp->partial = TRUE;
1737 SetSeqLocPartial (new_sfp->location, partial5, TRUE);
1738 new_sfp->partial = TRUE;
1739 }
1740 else
1741 {
1742 SetSeqLocPartial (new_sfp->location, TRUE, TRUE);
1743 new_sfp->partial = TRUE;
1744 }
1745
1746 if (sfp->data.choice == SEQFEAT_CDREGION && sfp->product != NULL)
1747 {
1748 /* now figure out protein offset, to use for copying protein features */
1749 if (SeqLocStrand (new_sfp->location) == Seq_strand_minus)
1750 {
1751 /* if on minus strand, offset is length of remaining location */
1752 protein_offset = SeqLocLen (sfp->location) / 3;
1753 len_modulo = SeqLocLen (sfp->location) % 3;
1754 }
1755 else
1756 {
1757 /* if on plus strand, offset is locations already removed */
1758 protein_offset = product_offset / 3;
1759 len_modulo = (product_offset + SeqLocLen (new_sfp->location)) % 3;
1760 }
1761 crp = (CdRegionPtr) sfp->data.value.ptrvalue;
1762 if (crp != NULL && (crp->frame == 2 || crp->frame == 3))
1763 {
1764 protein_offset --;
1765 }
1766
1767 /* correct frame */
1768 if (len_modulo == 0)
1769 {
1770 new_frame = crp->frame;
1771 }
1772 else if (crp->frame == 0 || crp->frame == 1)
1773 {
1774 if (len_modulo == 1)
1775 {
1776 new_frame = 2;
1777 }
1778 else
1779 {
1780 /* len_modulo == 2 */
1781 new_frame = 3;
1782 }
1783 }
1784 else if (len_modulo == 1)
1785 {
1786 if (crp->frame == 2)
1787 {
1788 new_frame = 1;
1789 }
1790 else
1791 {
1792 /* crp->frame == 3 */
1793 new_frame = 2;
1794 }
1795
1796 }
1797 else if (len_modulo == 2)
1798 {
1799 if (crp->frame == 2)
1800 {
1801 new_frame = 3;
1802 }
1803 else
1804 {
1805 /* crp->frame == 3 */
1806 new_frame = 1;
1807 }
1808 }
1809 crp->frame = new_frame;
1810
1811 new_sfp->product = SeqLocFree (new_sfp->product);
1812 SeqEdTranslateOneCDS (new_sfp, last_bsp, sfp->idx.entityID, Sequin_GlobalAlign2Seq);
1813
1814 SplitSegmentedProduct (BioseqFindFromSeqLoc (sfp->product),
1815 BioseqFindFromSeqLoc (new_sfp->product),
1816 protein_offset);
1817 ResynchCDSPartials (new_sfp, NULL);
1818 }
1819
1820 prev_sfp->next = new_sfp;
1821 new_sfp->next = next_sfp;
1822 prev_sfp = new_sfp;
1823 product_offset += SeqLocLen (slp);
1824 slp = SeqLocFindNext (sfp->location, NULL);
1825 last_bsp = bsp;
1826 } else {
1827 product_offset += SeqLocLen (slp);
1828 slp = SeqLocFindNext (sfp->location, slp);
1829 }
1830 }
1831 if (sfp->location == NULL)
1832 {
1833 sfp->idx.deleteme = TRUE;
1834 }
1835 else if (!is_first)
1836 {
1837 CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
1838 SetSeqLocPartial (sfp->location, TRUE, partial3);
1839 sfp->partial = TRUE;
1840 if (sfp->data.choice == SEQFEAT_CDREGION && sfp->product != NULL)
1841 {
1842 SeqEdTranslateOneCDS (sfp, last_bsp, sfp->idx.entityID, Sequin_GlobalAlign2Seq);
1843
1844 ResynchCDSPartials (sfp, NULL);
1845 }
1846 }
1847 sfp = next_sfp;
1848 }
1849
1850 /* push features to appropriate bioseqs */
1851 for (sep = set->seq_set; sep != NULL; sep = sep->next) {
1852 PushFeaturesDownToBioseq (annot, sep);
1853 }
1854 if (prev_annot == NULL) {
1855 set->annot = annot->next;
1856 } else {
1857 prev_annot->next = annot->next;
1858 }
1859 annot->next = NULL;
1860 SeqAnnotFree (annot);
1861 annot = next_annot;
1862 }
1863 }
1864
1865 static void SplitSegmentedFeats (SeqEntryPtr sep, Uint2 entityID)
1866 {
1867 BioseqSetPtr bssp;
1868
1869 if (IS_Bioseq_set (sep)) {
1870 bssp = (BioseqSetPtr) sep->data.ptrvalue;
1871 SplitSegmentedFeatsOnOneSet (bssp);
1872 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
1873 SplitSegmentedFeats (sep, entityID);
1874 }
1875 }
1876 DeleteMarkedObjects (entityID, 0, NULL);
1877 }
1878
1879 extern void SplitSegmentedFeatsMenuItem (IteM i)
1880 {
1881 BaseFormPtr bfp;
1882 SeqEntryPtr sep;
1883
1884 #ifdef WIN_MAC
1885 bfp = currentFormDataPtr;
1886 #else
1887 bfp = GetObjectExtra (i);
1888 #endif
1889 if (bfp == NULL) return;
1890 sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
1891 if (sep == NULL) return;
1892 SplitSegmentedFeats (sep, bfp->input_entityID);
1893 ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
1894 ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
1895
1896 }
1897
1898 static void DoSegSetUndo (BioseqPtr segseq, BioseqSetPtr parts, BioseqSetPtr segset, Uint2 entityID)
1899
1900 {
1901 SeqAnnotPtr annot;
1902 ValNodePtr descr;
1903 SeqAnnotPtr sap;
1904 SeqEntryPtr tmp;
1905 SeqEntryPtr split_sep;
1906 Uint2 parenttype;
1907 Pointer parentptr;
1908 BioseqSetPtr parent_bssp = NULL;
1909
1910 if (segseq == NULL || parts == NULL || parts->seq_set == NULL || segset == NULL) return;
1911
1912 /* split segmented features before undoing segset */
1913 split_sep = SeqMgrGetSeqEntryForData (segset);
1914 GetSeqEntryParent (split_sep, &parentptr, &parenttype);
1915 if (parenttype == OBJ_BIOSEQSET && parentptr != NULL)
1916 {
1917 parent_bssp = (BioseqSetPtr) parentptr;
1918 if (parent_bssp->_class == BioseqseqSet_class_nuc_prot)
1919 {
1920 split_sep = SeqMgrGetSeqEntryForData (parent_bssp);
1921 }
1922 }
1923 SplitSegmentedFeats (split_sep, entityID);
1924
1925 segset->_class = 7;
1926 parts->_class = 14;
1927 annot = segseq->annot;
1928 segseq->annot = NULL;
1929 if (segset->annot == NULL) {
1930 segset->annot = annot;
1931 annot = NULL;
1932 } else {
1933 sap = segset->annot;
1934 while (sap->next != NULL) {
1935 sap = sap->next;
1936 }
1937 sap->next = annot;
1938 }
1939 descr = segseq->descr;
1940 segseq->descr = NULL;
1941 ValNodeLink (&(segset->descr), descr);
1942 tmp = segset->seq_set;
1943 if (tmp != NULL && IS_Bioseq (tmp)) {
1944 segset->seq_set = tmp->next;
1945 tmp->next = NULL;
1946 SeqEntryFree (tmp);
1947 }
1948
1949 /* propagate the descriptors on the segset to the parts in the set */
1950 SetDescriptorPropagate (segset);
1951 SetDescriptorPropagate (parts);
1952
1953 }
1954
1955 static Int2 DoOneSegUndo (SeqEntryPtr sep, Uint2 entityID)
1956
1957 {
1958 BioseqSetPtr bssp;
1959 Int2 count = 0;
1960 UpdateSegStruc uss;
1961
1962 if (sep == NULL) return 0;
1963 if (IS_Bioseq_set (sep)) {
1964 bssp = (BioseqSetPtr) sep->data.ptrvalue;
1965 if (bssp != NULL && (bssp->_class == 7 ||
1966 (IsPopPhyEtcSet (bssp->_class)))) {
1967 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
1968 count += DoOneSegUndo (sep, entityID);
1969 }
1970 return count;
1971 }
1972 }
1973
1974 uss.segseq = NULL;
1975 uss.parts = NULL;
1976 uss.segset = NULL;
1977 UpdateSegExplore (sep, (Pointer) &uss, FindSegSetComponentsCallback);
1978 if (uss.segseq != NULL && uss.parts != NULL && uss.segset != NULL) {
1979 DoSegSetUndo (uss.segseq, uss.parts, uss.segset, entityID);
1980 return 1;
1981 }
1982 return 0;
1983 }
1984
1985 static SeqEntryPtr BioseqExtract (SeqEntryPtr top, BioseqPtr bsp)
1986
1987 {
1988 BioseqSetPtr bssp;
1989 SeqEntryPtr next;
1990 SeqEntryPtr PNTR prev;
1991 SeqEntryPtr rsult = NULL;
1992 SeqEntryPtr sep;
1993
1994 if (top == NULL || bsp == NULL) return NULL;
1995 if (! IS_Bioseq_set (top)) return NULL;
1996 bssp = (BioseqSetPtr) top->data.ptrvalue;
1997 if (bssp == NULL) return NULL;
1998 prev = &(bssp->seq_set);
1999 sep = bssp->seq_set;
2000 while (sep != NULL) {
2001 next = sep->next;
2002 if (IS_Bioseq_set (sep)) {
2003 rsult = BioseqExtract (sep, bsp);
2004 if (rsult != NULL) {
2005 return rsult;
2006 }
2007 prev = &(sep->next);
2008 sep = next;
2009 } else if (IS_Bioseq (sep) && sep->data.ptrvalue == (Pointer) bsp) {
2010 *(prev) = next;
2011 sep->next = NULL;
2012 return sep;
2013 } else {
2014 prev = &(sep->next);
2015 sep = next;
2016 }
2017 }
2018 return NULL;
2019 }
2020
2021 static void DoRepairPartsSet (SeqEntryPtr sep)
2022
2023 {
2024 BioseqPtr bsp;
2025 BioseqSetPtr bssp;
2026 ValNode head;
2027 BioseqSetPtr parts;
2028 BioseqPtr segseq;
2029 SeqLocPtr slp;
2030 SeqEntryPtr tmp;
2031 UpdateSegStruc uss;
2032
2033 if (sep == NULL) return;
2034 if (IS_Bioseq (sep)) return;
2035 if (IS_Bioseq_set (sep)) {
2036 bssp = (BioseqSetPtr) sep->data.ptrvalue;
2037 if (bssp == NULL) return;
2038 if (bssp->_class == 7 ||
2039 (IsPopPhyEtcSet (bssp->_class))) {
2040 for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
2041 DoRepairPartsSet (tmp);
2042 }
2043 return;
2044 }
2045 }
2046 uss.segseq = NULL;
2047 uss.parts = NULL;
2048 uss.segset = NULL;
2049 UpdateSegExplore (sep, (Pointer) &uss, FindSegSetComponentsCallback);
2050 if (uss.segseq == NULL || uss.parts == NULL || uss.segset == NULL) return;
2051 segseq = uss.segseq;
2052 parts = uss.parts;
2053 if (segseq->repr != Seq_repr_seg ||
2054 segseq->seq_ext_type != 1 ||
2055 segseq->seq_ext == NULL) return;
2056 head.choice = SEQLOC_MIX;
2057 head.data.ptrvalue = segseq->seq_ext;
2058 head.next = NULL;
2059 slp = NULL;
2060 while ((slp = SeqLocFindNext (&head, slp)) != NULL) {
2061 bsp = BioseqFind (SeqLocId (slp));
2062 if (bsp != NULL) {
2063 tmp = BioseqExtract (sep, bsp);
2064 if (tmp != NULL) {
2065 ValNodeLink (&parts->seq_set, tmp);
2066 }
2067 }
2068 }
2069 }
2070
2071
2072 static void RepackageParksInPartsSetForEntityID (Uint2 entityID)
2073 {
2074 ObjMgrDataPtr omdptop;
2075 ObjMgrData omdata;
2076 Uint2 parenttype;
2077 Pointer parentptr;
2078 SeqEntryPtr sep;
2079
2080 sep = GetTopSeqEntryForEntityID (entityID);
2081 if (sep == NULL) return;
2082 SaveSeqEntryObjMgrData (sep, &omdptop, &omdata);
2083 GetSeqEntryParent (sep, &parentptr, &parenttype);
2084 DoRepairPartsSet (sep);
2085 SeqMgrLinkSeqEntry (sep, parenttype, parentptr);
2086 RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
2087 ObjMgrSetDirtyFlag (entityID, TRUE);
2088 ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
2089 }
2090
2091
2092 static Int2 LIBCALLBACK PackagePartsInPartsSet (Pointer data)
2093
2094 {
2095 OMProcControlPtr ompcp;
2096
2097 ompcp = (OMProcControlPtr) data;
2098 if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
2099 switch (ompcp->input_itemtype) {
2100 case OBJ_BIOSEQ :
2101 break;
2102 case OBJ_BIOSEQSET :
2103 break;
2104 case 0 :
2105 return OM_MSG_RET_ERROR;
2106 default :
2107 return OM_MSG_RET_ERROR;
2108 }
2109 if (ompcp->input_data == NULL) return OM_MSG_RET_ERROR;
2110 RepackageParksInPartsSetForEntityID (ompcp->input_entityID);
2111 return OM_MSG_RET_DONE;
2112 }
2113
2114
2115 NLM_EXTERN void RepackagePartsMenuItem (IteM i)
2116 {
2117 BaseFormPtr bfp;
2118
2119 #ifdef WIN_MAC
2120 bfp = currentFormDataPtr;
2121 #else
2122 bfp = GetObjectExtra (i);
2123 #endif
2124 if (bfp == NULL) return;
2125 RepackageParksInPartsSetForEntityID (bfp->input_entityID);
2126 }
2127
2128
2129 NLM_EXTERN void RemoveDupGenBankSets (SeqEntryPtr sep)
2130
2131 {
2132 SeqAnnotPtr annot;
2133 BioseqSetPtr bssp;
2134 ValNodePtr descr;
2135 SeqAnnotPtr sap;
2136 SeqEntryPtr tmp;
2137 BioseqSetPtr tmpbssp;
2138
2139 if (sep == NULL || ! IS_Bioseq_set (sep)) return;
2140 bssp = (BioseqSetPtr) sep->data.ptrvalue;
2141 if (bssp == NULL || bssp->_class != 7) return;
2142 tmp = bssp->seq_set;
2143 if (tmp == NULL || ! IS_Bioseq_set (tmp) || tmp->next != NULL) return;
2144 tmpbssp = (BioseqSetPtr) tmp->data.ptrvalue;
2145 if (tmpbssp == NULL || tmpbssp->_class != 7 || tmpbssp->seq_set == NULL) return;
2146 annot = tmpbssp->annot;
2147 tmpbssp->annot = NULL;
2148 if (bssp->annot == NULL) {
2149 bssp->annot = annot;
2150 annot = NULL;
2151 } else {
2152 sap = bssp->annot;
2153 while (sap->next != NULL) {
2154 sap = sap->next;
2155 }
2156 sap->next = annot;
2157 }
2158 descr = tmpbssp->descr;
2159 tmpbssp->descr = NULL;
2160 ValNodeLink (&(bssp->descr), descr);
2161 bssp->seq_set = tmpbssp->seq_set;
2162 tmpbssp->seq_set = NULL;
2163 SeqEntryFree (tmp);
2164 }
2165
2166 static Int2 LIBCALLBACK UndoSegSet (Pointer data)
2167
2168 {
2169 Int2 count;
2170 ObjMgrDataPtr omdptop;
2171 ObjMgrData omdata;
2172 OMProcControlPtr ompcp;
2173 Uint2 parenttype;
2174 Pointer parentptr;
2175 SeqEntryPtr sep;
2176
2177 ompcp = (OMProcControlPtr) data;
2178 if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
2179 switch (ompcp->input_itemtype) {
2180 case OBJ_BIOSEQ :
2181 break;
2182 case OBJ_BIOSEQSET :
2183 break;
2184 case 0 :
2185 return OM_MSG_RET_ERROR;
2186 default :
2187 break;
2188 }
2189 if (ompcp->input_data == NULL) return OM_MSG_RET_ERROR;
2190 sep = GetTopSeqEntryForEntityID (ompcp->input_entityID);
2191 if (sep == NULL) return OM_MSG_RET_ERROR;
2192 SaveSeqEntryObjMgrData (sep, &omdptop, &omdata);
2193 GetSeqEntryParent (sep, &parentptr, &parenttype);
2194 count = DoOneSegUndo (sep, ompcp->input_entityID);
2195 RemoveDupGenBankSets (sep);
2196 SeqMgrLinkSeqEntry (sep, parenttype, parentptr);
2197 RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
2198 if (count > 0) {
2199 PropagateFromGenBankBioseqSet (sep, FALSE);
2200 SeqMgrClearFeatureIndexes (ompcp->input_entityID, NULL);
2201 SeqMgrIndexFeatures (ompcp->input_entityID, NULL);
2202 SplitSegmentedFeats (sep, ompcp->input_entityID);
2203 ObjMgrSetDirtyFlag (ompcp->input_entityID, TRUE);
2204 ObjMgrSendMsg (OM_MSG_UPDATE, ompcp->input_entityID, 0, 0);
2205 }
2206 return OM_MSG_RET_DONE;
2207 }
2208
2209 static Boolean
2210 ProteinUsedForThisSegmentInThisAnnotList
2211 (SeqEntryPtr seg_sep,
2212 SeqEntryPtr prot_sep,
2213 SeqAnnotPtr sap)
2214 {
2215 SeqFeatPtr sfp;
2216 BioseqPtr seg_bsp, prot_bsp;
2217 SeqIdPtr loc_id, prot_id;
2218
2219 if (seg_sep == NULL || ! IS_Bioseq (seg_sep) || seg_sep->data.ptrvalue == NULL
2220 || prot_sep == NULL || ! IS_Bioseq (prot_sep) || prot_sep->data.ptrvalue == NULL
2221 || sap == NULL)
2222 {
2223 return FALSE;
2224 }
2225
2226 seg_bsp = seg_sep->data.ptrvalue;
2227 prot_bsp = prot_sep->data.ptrvalue;
2228
2229 if (sap->type == 1)
2230 {
2231 sfp = sap->data;
2232 while (sfp != NULL)
2233 {
2234 loc_id = SeqLocId (sfp->location);
2235 prot_id = SeqLocId (sfp->product);
2236 if (SeqIdIn (loc_id, seg_bsp->id) && SeqIdIn (prot_id, prot_bsp->id))
2237 {
2238 return TRUE;
2239 }
2240 sfp = sfp->next;
2241 }
2242 }
2243 return ProteinUsedForThisSegmentInThisAnnotList (seg_sep, prot_sep, sap->next);
2244 }
2245
2246 static void RemoveIntermediateGenBankWrapper (BioseqSetPtr bssp)
2247 {
2248 SeqEntryPtr this_sep, next_sep, prev_sep, last_mem_sep;
2249 Uint2 parenttype;
2250 Pointer parentptr;
2251 BioseqSetPtr parent_bssp;
2252
2253 if (bssp == NULL || bssp->_class != BioseqseqSet_class_genbank)
2254 {
2255 return;
2256 }
2257 this_sep = SeqMgrGetSeqEntryForData (bssp);
2258 GetSeqEntryParent (this_sep, &parentptr, &parenttype);
2259 if (parenttype != OBJ_BIOSEQSET || parentptr == NULL)
2260 {
2261 return;
2262 }
2263 parent_bssp = (BioseqSetPtr) parentptr;
2264
2265 if (parent_bssp->_class != BioseqseqSet_class_genbank)
2266 {
2267 return;
2268 }
2269
2270 /* propagate descriptors on our original set to its members */
2271 SetDescriptorPropagate (bssp);
2272
2273 /* put the set members in the parent list where the set was */
2274 last_mem_sep = bssp->seq_set;
2275 while (last_mem_sep != NULL && last_mem_sep->next != NULL)
2276 {
2277 last_mem_sep = last_mem_sep->next;
2278 }
2279
2280 next_sep = this_sep->next;
2281 this_sep->next = NULL;
2282
2283 if (parent_bssp->seq_set == this_sep)
2284 {
2285 if (last_mem_sep == NULL)
2286 {
2287 parent_bssp->seq_set = next_sep;
2288 }
2289 else
2290 {
2291 parent_bssp->seq_set = bssp->seq_set;
2292 last_mem_sep->next = next_sep;
2293 }
2294 }
2295 else
2296 {
2297 prev_sep = parent_bssp->seq_set;
2298 while (prev_sep->next != this_sep)
2299 {
2300 prev_sep = prev_sep->next;
2301 }
2302 if (last_mem_sep == NULL)
2303 {
2304 prev_sep->next = next_sep;
2305 }
2306 else
2307 {
2308 prev_sep->next = bssp->seq_set;
2309 last_mem_sep->next = next_sep;
2310 }
2311 }
2312
2313 bssp->seq_set = NULL;
2314 SeqEntryFree (this_sep);
2315 }
2316
2317 static Boolean SetContainsProteins (BioseqSetPtr bssp)
2318 {
2319 BioseqPtr bsp;
2320 SeqEntryPtr sep;
2321 Boolean has_proteins = FALSE;
2322
2323 if (bssp == NULL || bssp->seq_set == NULL)
2324 {
2325 return FALSE;
2326 }
2327
2328 for (sep = bssp->seq_set; sep != NULL && !has_proteins; sep = sep->next)
2329 {
2330 if (sep->data.ptrvalue == NULL)
2331 {
2332 continue;
2333 }
2334 if (IS_Bioseq (sep))
2335 {
2336 bsp = (BioseqPtr) sep->data.ptrvalue;
2337 if (ISA_aa (bsp->mol))
2338 {
2339 has_proteins = TRUE;
2340 }
2341 }
2342 else if (IS_Bioseq_set (sep))
2343 {
2344 has_proteins |= SetContainsProteins (sep->data.ptrvalue);
2345 }
2346 }
2347 return has_proteins;
2348 }
2349
2350 static void
2351 RemoveOneSegSet
2352 (SeqEntryPtr this_sep,
2353 BioseqSetPtr parent_bssp,
2354 BioseqSetPtr target_bssp,
2355 Uint2 entityID)
2356 {
2357 SeqEntryPtr protein_list = NULL;
2358 SeqEntryPtr seg_list, seg_sep, prot_sep;
2359 SeqEntryPtr prev_prot_sep, next_prot_sep;
2360 BioseqPtr seg_bsp;
2361 SeqEntryPtr this_prot_list, last_this;
2362 BioseqSetPtr seg_nuc_prot, seg_bssp;
2363 SeqEntryPtr next_seg;
2364
2365 if (this_sep == NULL)
2366 {
2367 return;
2368 }
2369
2370 DoOneSegUndo (this_sep, entityID);
2371 if (this_sep->choice != 2 || this_sep->data.ptrvalue == NULL)
2372 {
2373 return;
2374 }
2375
2376 target_bssp = (BioseqSetPtr) this_sep->data.ptrvalue;
2377
2378 if (parent_bssp != NULL
2379 && parent_bssp->_class == BioseqseqSet_class_nuc_prot
2380 && !SetContainsProteins (target_bssp))
2381 {
2382 seg_list = target_bssp->seq_set;
2383 target_bssp->seq_set = NULL;
2384
2385 protein_list = parent_bssp->seq_set->next;
2386 parent_bssp->seq_set->next = NULL;
2387
2388 /* parent_bssp->seq_set is this_sep */
2389 /* we've already moved everything out of this_sep, so we can free it now */
2390 parent_bssp->seq_set = SeqEntryFree (parent_bssp->seq_set);
2391 this_sep = NULL;
2392 target_bssp = NULL;
2393
2394 if (IS_Bioseq_set (seg_list))
2395 {
2396 this_sep = seg_list;
2397 target_bssp = seg_list->data.ptrvalue;
2398 seg_list = target_bssp->seq_set;
2399 target_bssp->seq_set = NULL;
2400 this_sep = SeqEntryFree (this_sep);
2401 target_bssp = NULL;
2402 }
2403
2404 for (seg_sep = seg_list; seg_sep != NULL; seg_sep = next_seg)
2405 {
2406 next_seg = seg_sep->next;
2407 if (!IS_Bioseq (seg_sep) || seg_sep->data.ptrvalue == NULL)
2408 {
2409 seg_bssp = (BioseqSetPtr) seg_sep->data.ptrvalue;
2410 continue;
2411 }
2412 seg_bsp = (BioseqPtr) seg_sep->data.ptrvalue;
2413 /* get the proteins from the parent set that go with this segment */
2414 this_prot_list = NULL;
2415 last_this = NULL;
2416 prot_sep = protein_list;
2417 prev_prot_sep = NULL;
2418 while (prot_sep != NULL)
2419 {
2420 next_prot_sep = prot_sep->next;
2421 if (ProteinUsedForThisSegmentInThisAnnotList (seg_sep, prot_sep, parent_bssp->annot)
2422 || ProteinUsedForThisSegmentInThisAnnotList (seg_sep, prot_sep, seg_bsp->annot))
2423 {
2424 /* remove from total list */
2425 if (prev_prot_sep == NULL)
2426 {
2427 protein_list = prot_sep->next;
2428 }
2429 else
2430 {
2431 prev_prot_sep->next = prot_sep->next;
2432 }
2433 prot_sep->next = NULL;
2434
2435 /* add to list for this nuc-prot set */
2436 if (last_this == NULL)
2437 {
2438 this_prot_list = prot_sep;
2439 }
2440 else
2441 {
2442 last_this->next = prot_sep;
2443 }
2444 last_this = prot_sep;
2445 }
2446 else
2447 {
2448 prev_prot_sep = prot_sep;
2449 }
2450 prot_sep = next_prot_sep;
2451 }
2452
2453 if (this_prot_list != NULL)
2454 {
2455 seg_nuc_prot = BioseqSetNew ();
2456 seg_nuc_prot->_class = BioseqseqSet_class_nuc_prot;
2457 seg_nuc_prot->seq_set = SeqEntryNew ();
2458 seg_nuc_prot->seq_set->choice = 1;
2459 seg_nuc_prot->seq_set->data.ptrvalue = seg_bsp;
2460 seg_nuc_prot->seq_set->next = this_prot_list;
2461 seg_sep->choice = 2;
2462 seg_sep->data.ptrvalue = seg_nuc_prot;
2463 }
2464 }
2465 parent_bssp->_class = BioseqseqSet_class_genbank;
2466 parent_bssp->seq_set = seg_list;
2467 }
2468
2469 if (protein_list != NULL && parent_bssp != NULL)
2470 {
2471 seg_sep = parent_bssp->seq_set;
2472 while (seg_sep != NULL && seg_sep->next != NULL)
2473 {
2474 seg_sep = seg_sep->next;
2475 }
2476 if (seg_sep == NULL)
2477 {
2478 parent_bssp->seq_set = protein_list;
2479 }
2480 else
2481 {
2482 seg_sep->next = protein_list;
2483 }
2484 }
2485
2486 /* if we have just put a genbank set inside another genbank set, move the nuc-prot sets
2487 * up. */
2488 RemoveIntermediateGenBankWrapper (parent_bssp);
2489 }
2490
2491 static void
2492 RemoveOneUnsegmentedSet
2493 (SeqEntryPtr this_sep,
2494 BioseqSetPtr target_bssp,
2495 BioseqSetPtr parent_bssp)
2496 {
2497 SeqEntryPtr last_sep, prev_sep, target_sep;
2498
2499 if (this_sep == NULL || target_bssp == NULL || parent_bssp == NULL)
2500 {
2501 return;
2502 }
2503
2504 /* find last seqentry in target */
2505 last_sep = target_bssp->seq_set;
2506 while (last_sep != NULL && last_sep->next != NULL)
2507 {
2508 last_sep = last_sep->next;
2509 }
2510
2511 /* find target in parent set, put seqentries in its place */
2512 prev_sep = NULL;
2513 target_sep = parent_bssp->seq_set;
2514 while (target_sep != this_sep)
2515 {
2516 prev_sep = target_sep;
2517 target_sep = target_sep->next;
2518 }
2519 if (last_sep != NULL)
2520 {
2521 if (prev_sep == NULL)
2522 {
2523 parent_bssp->seq_set = target_bssp->seq_set;
2524 }
2525 else
2526 {
2527 prev_sep->next = target_bssp->seq_set;
2528 }
2529 last_sep->next = this_sep->next;
2530 }
2531 this_sep->next = NULL;
2532 target_bssp->seq_set = NULL;
2533 SeqEntryFree (this_sep);
2534 }
2535
2536 /* This function will remove a set and move the sequences in the set
2537 * to the parent set.
2538 */
2539 static Int2 LIBCALLBACK RemoveSet (Pointer data)
2540
2541 {
2542 ObjMgrDataPtr omdptop;
2543 ObjMgrData omdata;
2544 OMProcControlPtr ompcp;
2545 Uint2 parenttype, top_parenttype;
2546 Pointer parentptr, top_parentptr;
2547 SeqEntryPtr top_sep, this_sep;
2548 BioseqSetPtr target_bssp, parent_bssp;
2549
2550 ompcp = (OMProcControlPtr) data;
2551 if (ompcp == NULL
2552 || ompcp->input_itemtype != OBJ_BIOSEQSET
2553 || ompcp->input_data == NULL)
2554 {
2555 return OM_MSG_RET_ERROR;
2556 }
2557
2558 top_sep = GetTopSeqEntryForEntityID (ompcp->input_entityID);
2559 if (top_sep == NULL) return OM_MSG_RET_ERROR;
2560 this_sep = SeqMgrGetSeqEntryForData (ompcp->input_data);
2561 GetSeqEntryParent (this_sep, &parentptr, &parenttype);
2562
2563 if (parenttype != OBJ_BIOSEQSET || parentptr == NULL)
2564 {
2565 Message (MSG_ERROR, "Can't remove top set!");
2566 return OM_MSG_RET_ERROR;
2567 }
2568
2569 SaveSeqEntryObjMgrData (top_sep, &omdptop, &omdata);
2570 GetSeqEntryParent (top_sep, &top_parentptr, &top_parenttype);
2571
2572 target_bssp = (BioseqSetPtr) ompcp->input_data;
2573 parent_bssp = (BioseqSetPtr) parentptr;
2574
2575 if (parent_bssp->_class == BioseqseqSet_class_not_set
2576 || parent_bssp->_class == BioseqseqSet_class_segset
2577 || parent_bssp->_class == BioseqseqSet_class_parts)
2578 {
2579 Message (MSG_ERROR, "Can't move sequences up into parent");
2580 return OM_MSG_RET_ERROR;
2581 }
2582
2583 if (target_bssp->_class == BioseqseqSet_class_not_set
2584 || target_bssp->_class == BioseqseqSet_class_nuc_prot
2585 || target_bssp->_class == BioseqseqSet_class_parts)
2586 {
2587 Message (MSG_ERROR, "Can't disassemble this set");
2588 return OM_MSG_RET_ERROR;
2589 }
2590
2591 if (target_bssp->_class == BioseqseqSet_class_segset)
2592 {
2593 RemoveOneSegSet (this_sep, parent_bssp, target_bssp, ompcp->input_entityID);
2594 /* propagate the descriptors on the set to the sequences in the set */
2595 SetDescriptorPropagate (parent_bssp);
2596 DoFixupLocus (top_sep);
2597 DoFixupSegSet (top_sep);
2598 }
2599 else if (parent_bssp->_class == BioseqseqSet_class_nuc_prot
2600 && ! SetContainsProteins (target_bssp))
2601 {
2602 Message (MSG_ERROR, "Can't move sequences up into parent");
2603 return OM_MSG_RET_ERROR;
2604 }
2605 else
2606 {
2607 /* propagate the descriptors on the set to the sequences in the set */
2608 SetDescriptorPropagate (target_bssp);
2609
2610 RemoveOneUnsegmentedSet (this_sep, target_bssp, parent_bssp);
2611 }
2612
2613 SeqMgrLinkSeqEntry (top_sep, top_parenttype, top_parentptr);
2614
2615 SeqMgrClearFeatureIndexes (ompcp->input_entityID, NULL);
2616 SeqMgrIndexFeatures (ompcp->input_entityID, NULL);
2617
2618 RestoreSeqEntryObjMgrData (top_sep, omdptop, &omdata);
2619
2620 SeqMgrClearFeatureIndexes (ompcp->input_entityID, NULL);
2621 SeqMgrIndexFeatures (ompcp->input_entityID, NULL);
2622
2623 ObjMgrSetDirtyFlag (ompcp->input_entityID, TRUE);
2624 ObjMgrSendMsg (OM_MSG_UPDATE, ompcp->input_entityID, 0, 0);
2625
2626 ForceCleanupEntityID (ompcp->input_entityID);
2627
2628 ObjMgrSetDirtyFlag (ompcp->input_entityID, TRUE);
2629 ObjMgrSendMsg (OM_MSG_UPDATE, ompcp->input_entityID, 0, 0);
2630
2631 return OM_MSG_RET_DONE;
2632 }
2633
2634 static Boolean
2635 RemoveSetsInNucProtSet
2636 (BioseqSetPtr parent_bssp,
2637 Uint2 entityID)
2638 {
2639 Boolean rval = FALSE;
2640 SeqEntryPtr this_sep;
2641 BioseqSetPtr target_bssp;
2642
2643 if (parent_bssp == NULL || parent_bssp->_class != BioseqseqSet_class_nuc_prot)
2644 {
2645 return FALSE;
2646 }
2647
2648 this_sep = parent_bssp->seq_set;
2649 if (this_sep != NULL
2650 && IS_Bioseq_set (this_sep)
2651 && this_sep->data.ptrvalue != NULL)
2652 {
2653 target_bssp = (BioseqSetPtr) this_sep->data.ptrvalue;
2654 if (target_bssp->_class == BioseqseqSet_class_segset)
2655 {
2656 RemoveOneSegSet (this_sep, parent_bssp, target_bssp, entityID);
2657 /* propagate the descriptors on the set to the sequences in the set */
2658 SetDescriptorPropagate (parent_bssp);
2659 rval = TRUE;
2660 }
2661 }
2662 return rval;
2663 }
2664
2665
2666 static Int2 LIBCALLBACK RemoveSetsInBioseqSet (BioseqSetPtr parent_bssp, Uint2 entityID)
2667 {
2668 ObjMgrDataPtr omdptop;
2669 ObjMgrData omdata;
2670 Uint2 top_parenttype;
2671 Pointer top_parentptr;
2672 SeqEntryPtr top_sep, this_sep, next_sep;
2673 BioseqSetPtr target_bssp;
2674 Boolean need_locus_fixup = FALSE;
2675
2676
2677 if (parent_bssp == NULL) {
2678 return OM_MSG_RET_ERROR;
2679 }
2680
2681 top_sep = GetTopSeqEntryForEntityID (entityID);
2682 if (top_sep == NULL) return OM_MSG_RET_ERROR;
2683 SaveSeqEntryObjMgrData (top_sep, &omdptop, &omdata);
2684 GetSeqEntryParent (top_sep, &top_parentptr, &top_parenttype);
2685
2686 if (parent_bssp->_class == BioseqseqSet_class_not_set
2687 || parent_bssp->_class == BioseqseqSet_class_segset
2688 || parent_bssp->_class == BioseqseqSet_class_parts)
2689 {
2690 Message (MSG_ERROR, "Can't move sequences up into parent");
2691 return OM_MSG_RET_ERROR;
2692 }
2693 else if (parent_bssp->_class == BioseqseqSet_class_nuc_prot)
2694 {
2695 if (! RemoveSetsInNucProtSet (parent_bssp, entityID))
2696 {
2697 Message (MSG_ERROR, "Can't move sequences up into parent");
2698 return OM_MSG_RET_ERROR;
2699 }
2700 else
2701 {
2702 need_locus_fixup = TRUE;
2703 }
2704 }
2705 else
2706 {
2707 for (this_sep = parent_bssp->seq_set; this_sep != NULL; this_sep = next_sep)
2708 {
2709 next_sep = this_sep->next;
2710 if (!IS_Bioseq_set (this_sep) || this_sep->data.ptrvalue == NULL)
2711 {
2712 continue;
2713 }
2714 target_bssp = (BioseqSetPtr) this_sep->data.ptrvalue;
2715 if (target_bssp->_class == BioseqseqSet_class_nuc_prot)
2716 {
2717 need_locus_fixup |= RemoveSetsInNucProtSet (target_bssp, entityID);
2718 }
2719 else if (target_bssp->_class == BioseqseqSet_class_segset)
2720 {
2721 RemoveOneSegSet (this_sep, parent_bssp, target_bssp, entityID);
2722 }
2723 else if (target_bssp->_class != BioseqseqSet_class_not_set
2724 && target_bssp->_class != BioseqseqSet_class_parts)
2725 {
2726 /* propagate the descriptors on the set to the sequences in the set */
2727 SetDescriptorPropagate (target_bssp);
2728
2729 RemoveOneUnsegmentedSet (this_sep, target_bssp, parent_bssp);
2730 }
2731 }
2732 }
2733
2734 if (need_locus_fixup)
2735 {
2736 DoFixupLocus (top_sep);
2737 DoFixupSegSet (top_sep);
2738 }
2739
2740 SeqMgrLinkSeqEntry (top_sep, top_parenttype, top_parentptr);
2741
2742 SeqMgrClearFeatureIndexes (entityID, NULL);
2743 SeqMgrIndexFeatures (entityID, NULL);
2744
2745 RestoreSeqEntryObjMgrData (top_sep, omdptop, &omdata);
2746
2747 SeqMgrClearFeatureIndexes (entityID, NULL);
2748 SeqMgrIndexFeatures (entityID, NULL);
2749
2750 ObjMgrSetDirtyFlag (entityID, TRUE);
2751 ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
2752
2753 ForceCleanupEntityID (entityID);
2754
2755 ObjMgrSetDirtyFlag (entityID, TRUE);
2756 ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
2757
2758 return OM_MSG_RET_DONE;
2759 }
2760
2761 static Int2 LIBCALLBACK RemoveSetsInSelectedSet (Pointer data)
2762 {
2763 OMProcControlPtr ompcp;
2764
2765 ompcp = (OMProcControlPtr) data;
2766 if (ompcp == NULL
2767 || ompcp->input_itemtype != OBJ_BIOSEQSET
2768 || ompcp->input_data == NULL)
2769 {
2770 return OM_MSG_RET_ERROR;
2771 }
2772
2773 return RemoveSetsInBioseqSet((BioseqSetPtr) ompcp->input_data, ompcp->input_entityID);
2774
2775 }
2776
2777
2778 extern void RemoveSetsInSetMenuItem (IteM i)
2779 {
2780 BaseFormPtr bfp;
2781 BioseqSetPtr bssp;
2782 SeqEntryPtr sep;
2783
2784 #ifdef WIN_MAC
2785 bfp = currentFormDataPtr;
2786 #else
2787 bfp = GetObjectExtra (i);
2788 #endif
2789 if (bfp == NULL) return;
2790
2791 sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
2792 if (sep == NULL || !IS_Bioseq_set (sep)) {
2793 Message (MSG_ERROR, "This record does not have a top-levelset!");
2794 } else {
2795 bssp = FindTopLevelSetForDesktopFunction((BioseqSetPtr) sep->data.ptrvalue);
2796 RemoveSetsInBioseqSet(bssp, bfp->input_entityID);
2797 }
2798 }
2799
2800
2801 static void DoPartSeqAlignToParent (DenseSegPtr dsp)
2802
2803 {
2804 BioseqPtr bsp, part;
2805 SeqMgrSegmentContext context;
2806 Int2 i, j, k;
2807 SeqIdPtr sip, next;
2808 SeqIdPtr PNTR prev;
2809 Int4 val;
2810
2811 if (dsp == NULL || dsp->ids == NULL || dsp->starts == NULL) return;
2812 for (i = 0, sip = dsp->ids, prev = &(dsp->ids);
2813 i < dsp->dim && sip != NULL;
2814 i++, sip = next) {
2815 next = sip->next;
2816 part = BioseqFind (sip);
2817 if (part == NULL) continue;
2818 bsp = SeqMgrGetParentOfPart (part, &context);
2819 if (bsp == NULL) continue;
2820 *prev = NULL;
2821 sip->next = NULL;
2822 SeqIdFree (sip);
2823 sip = SeqIdDup (SeqIdFindBest (bsp->id, 0));
2824 *prev = sip;
2825 sip->next = next;
2826 prev = &(sip->next);
2827 for (j = 0; j < dsp->numseg; j++) {
2828 k = (dsp->dim * j) + i;
2829 val = dsp->starts [k];
2830 if (val != -1) {
2831 if (context.strand == Seq_strand_minus) {
2832 dsp->starts [k] = context.cumOffset + (context.to - val);
2833 } else {
2834 dsp->starts [k] = context.cumOffset + (val - context.from);
2835 }
2836 }
2837 }
2838 }
2839 }
2840
2841 static Int2 LIBCALLBACK PartSeqAlignToParent (Pointer data)
2842
2843 {
2844 DenseSegPtr dsp;
2845 OMProcControlPtr ompcp;
2846 SeqAlignPtr sap;
2847
2848 ompcp = (OMProcControlPtr) data;
2849 if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
2850 switch (ompcp->input_itemtype) {
2851 case OBJ_SEQALIGN :
2852 sap = (SeqAlignPtr) ompcp->input_data;
2853 break;
2854 case 0 :
2855 return OM_MSG_RET_ERROR;
2856 default :
2857 return OM_MSG_RET_ERROR;
2858 }
2859 if (sap == NULL) return OM_MSG_RET_ERROR;
2860
2861 if (sap->segtype == SAS_DENSEG) {
2862 dsp = (DenseSegPtr) sap->segs;
2863 if (dsp != NULL) {
2864 DoPartSeqAlignToParent (dsp);
2865 }
2866 }
2867
2868 ObjMgrSetDirtyFlag (ompcp->input_entityID, TRUE);
2869 ObjMgrSendMsg (OM_MSG_UPDATE, ompcp->input_entityID, 0, 0);
2870
2871 return OM_MSG_RET_DONE;
2872 }
2873
2874 static Int2 LIBCALLBACK ConvertToTrueMultipleAlignment (Pointer data)
2875
2876 {
2877 OMProcControlPtr ompcp;
2878 SeqAlignPtr sap;
2879 SeqAnnotPtr sanp;
2880
2881 ompcp = (OMProcControlPtr) data;
2882 if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
2883 switch (ompcp->input_itemtype) {
2884 case OBJ_SEQALIGN :
2885 break;
2886 case OBJ_SEQANNOT :
2887 break;
2888 case 0 :
2889 return OM_MSG_RET_ERROR;
2890 default :
2891 return OM_MSG_RET_ERROR;
2892 }
2893 if (ompcp->input_data == NULL) return OM_MSG_RET_ERROR;
2894 sap = NULL;
2895 if (ompcp->input_itemtype == OBJ_SEQALIGN)
2896 sap = (SeqAlignPtr) ompcp->input_data;
2897 else
2898 {
2899 sanp = (SeqAnnotPtr) ompcp->input_data;
2900 if (sanp->type == 2)
2901 sap = (SeqAlignPtr)(sanp->data);
2902 }
2903 if (sap == NULL) return OM_MSG_RET_ERROR;
2904
2905 ConvertPairwiseToMultipleAlignment (sap);
2906
2907 ObjMgrSetDirtyFlag (ompcp->input_entityID, TRUE);
2908 ObjMgrSendMsg (OM_MSG_UPDATE, ompcp->input_entityID, 0, 0);
2909
2910 return OM_MSG_RET_DONE;
2911 }
2912
2913 static Int2 LIBCALLBACK ConvertToSegSetAlignment (Pointer data)
2914
2915 {
2916 OMProcControlPtr ompcp;
2917 SeqAlignPtr salp, sap;
2918 SeqAnnotPtr sanp = NULL, new_sanp;
2919
2920 ompcp = (OMProcControlPtr) data;
2921 if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
2922 switch (ompcp->input_itemtype) {
2923 case OBJ_SEQALIGN :
2924 break;
2925 case OBJ_SEQANNOT :
2926 break;
2927 case 0 :
2928 default :
2929 Message (MSG_ERROR, "Must select alignment to convert!");
2930 return OM_MSG_RET_ERROR;
2931 }
2932 if (ompcp->input_data == NULL) return OM_MSG_RET_ERROR;
2933 sap = NULL;
2934 if (ompcp->input_itemtype == OBJ_SEQALIGN)
2935 {
2936 sap = (SeqAlignPtr) ompcp->input_data;
2937 sanp = GetSeqAnnotForAlignment (sap);
2938 }
2939 else
2940 {
2941 sanp = (SeqAnnotPtr) ompcp->input_data;
2942 if (sanp->type == 2)
2943 sap = (SeqAlignPtr)(sanp->data);
2944 }
2945 if (sap == NULL || sanp == NULL) return OM_MSG_RET_ERROR;
2946 if (sap->next == NULL)
2947 {
2948 return OM_MSG_RET_DONE;
2949 }
2950
2951 salp = sap->next;
2952 sap->next = NULL;
2953 if (sap->saip != NULL)
2954 {
2955 SeqAlignIndexFree(sap->saip);
2956 sap->saip = NULL;
2957 }
2958 AlnMgr2IndexSeqAlignEx(sap, FALSE);
2959 /* AlnMgr2IndexSeqAlign (sap); */
2960
2961 while (salp != NULL)
2962 {
2963 new_sanp = SeqAnnotNew ();
2964 new_sanp->type = 2;
2965 new_sanp->data = salp;
2966 new_sanp->next = sanp->next;
2967 sanp->next = new_sanp;
2968 sap = salp->next;
2969 salp->next = NULL;
2970 if (salp->saip != NULL)
2971 {
2972 SeqAlignIndexFree(salp->saip);
2973 salp->saip = NULL;
2974 }
2975 AlnMgr2IndexSeqAlign (salp);
2976
2977 salp = sap;
2978 }
2979
2980 ObjMgrSetDirtyFlag (ompcp->input_entityID, TRUE);
2981 ObjMgrSendMsg (OM_MSG_UPDATE, ompcp->input_entityID, 0, 0);
2982
2983 return OM_MSG_RET_DONE;
2984 }
2985
2986
2987 static void NoMoreSegGapForOneAlignment (SeqAlignPtr sap, Pointer userdata)
2988 {
2989 SeqAlignPtr salp;
2990
2991 salp = sap;
2992 while (salp != NULL)
2993 {
2994 if (salp->saip != NULL)
2995 {
2996 SeqAlignIndexFree(salp->saip);
2997 salp->saip = NULL;
2998 } else {
2999 AlnMgr2IndexSingleChildSeqAlign(salp); /* make sure it's dense-seg */
3000 SeqAlignIndexFree(salp->saip);
3001 salp->saip = NULL;
3002 }
3003 CleanUpSegGap(salp);
3004 AlnMgr2IndexSingleChildSeqAlign(salp);
3005 salp = salp->next;
3006 }
3007 }
3008
3009
3010 static Int2 LIBCALLBACK NoMoreSegGap (Pointer data)
3011 {
3012 OMProcControlPtr ompcp;
3013 SeqAlignPtr sap;
3014 SeqAnnotPtr sanp;
3015
3016 ompcp = (OMProcControlPtr) data;
3017 if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
3018 switch (ompcp->input_itemtype) {
3019 case OBJ_SEQALIGN :
3020 sap = (SeqAlignPtr) ompcp->input_data;
3021 if (sap == NULL)
3022 {
3023 return OM_MSG_RET_ERROR;
3024 }
3025 else
3026 {
3027 NoMoreSegGapForOneAlignment (sap, NULL);
3028 }
3029 break;
3030 case OBJ_SEQANNOT :
3031 sanp = (SeqAnnotPtr) ompcp->input_data;
3032 if (sanp->type != 2)
3033 {
3034 return OM_MSG_RET_ERROR;
3035 }
3036 else if ((sap = (SeqAlignPtr)(sanp->data)) == NULL)
3037 {
3038 return OM_MSG_RET_ERROR;
3039 }
3040 else
3041 {
3042 NoMoreSegGapForOneAlignment (sap, NULL);
3043 }
3044 break;
3045 case OBJ_BIOSEQ :
3046 VisitAlignmentsOnBsp (ompcp->input_data, NULL, NoMoreSegGapForOneAlignment);
3047 break;
3048 case OBJ_BIOSEQSET :
3049 VisitAlignmentsInSet (ompcp->input_data, NULL, NoMoreSegGapForOneAlignment);
3050 break;
3051 case 0 :
3052 return OM_MSG_RET_ERROR;
3053 break;
3054 default :
3055 return OM_MSG_RET_ERROR;
3056 break;
3057 }
3058 ObjMgrSetDirtyFlag (ompcp->input_entityID, TRUE);
3059 ObjMgrSendMsg (OM_MSG_UPDATE, ompcp->input_entityID, 0, 0);
3060 return OM_MSG_RET_DONE;
3061 }
3062
3063
3064 extern void GetRidOfSegGapMenuItem (IteM i)
3065 {
3066 BaseFormPtr bfp;
3067 SeqEntryPtr sep;
3068
3069 #ifdef WIN_MAC
3070 bfp = currentFormDataPtr;
3071 #else
3072 bfp = GetObjectExtra (i);
3073 #endif
3074 if (bfp == NULL) return;
3075
3076 sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
3077 VisitAlignmentsInSep (sep, NULL, NoMoreSegGapForOneAlignment);
3078 ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
3079 ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
3080 }
3081
3082
3083 static void SqnSeqAlignDeleteInSeqEntryCallBack (SeqEntryPtr sep, Pointer mydata,
3084 Int4 index, Int2 indent)
3085 {
3086 BioseqPtr bsp;
3087 BioseqSetPtr bssp;
3088 SeqAnnotPtr sap,
3089 pre;
3090 BoolPtr dirtyp;
3091
3092 if (sep != NULL && sep->data.ptrvalue && mydata != NULL) {
3093 dirtyp = (BoolPtr)mydata;
3094 if (IS_Bioseq(sep)) {
3095 bsp = (BioseqPtr) sep->data.ptrvalue;
3096 if (bsp!=NULL) {
3097 sap=bsp->annot;
3098 pre=NULL;
3099 while (sap) {
3100 if (sap->type == 2) {
3101 if (pre==NULL) {
3102 bsp->annot = sap->next;
3103 sap->next=NULL;
3104 sap = SeqAnnotFree (sap);
3105 if (bsp->annot)
3106 sap=bsp->annot->next;
3107 }
3108 else {
3109 pre=sap->next;
3110 sap->next=NULL;
3111 sap = SeqAnnotFree (sap);
3112 if (pre)
3113 sap=pre->next;
3114 }
3115 *dirtyp=TRUE;
3116 }
3117 else {
3118 pre=sap;
3119 sap=sap->next;
3120 }
3121 }
3122 }
3123 }
3124 else if(IS_Bioseq_set(sep)) {
3125 bssp = (BioseqSetPtr)sep->data.ptrvalue;
3126 if (bssp!=NULL) {
3127 sap=bssp->annot;
3128 pre=NULL;
3129 while (sap) {
3130 if (sap->type == 2) {
3131 if (pre==NULL) {
3132 bssp->annot = sap->next;
3133 sap->next=NULL;
3134 sap = SeqAnnotFree (sap);
3135 if (bssp->annot)
3136 sap=bssp->annot->next;
3137 }
3138 else {
3139 pre=sap->next;
3140 sap->next=NULL;
3141 sap = SeqAnnotFree (sap);
3142 if (pre)
3143 sap=pre->next;
3144 }
3145 *dirtyp=TRUE;
3146 }
3147 else {
3148 pre=sap;
3149 sap=sap->next;
3150 }
3151 }
3152 }
3153 }
3154 }
3155 }
3156
3157 typedef struct alignmentoptionsform
3158 {
3159 Boolean accepted;
3160 Boolean done;
3161 } AlignmentOptionsFormData, PNTR AlignmentOptionsFormPtr;
3162
3163 static void AcceptAlignmentOptions (ButtoN b)
3164 {
3165 AlignmentOptionsFormPtr aofp;
3166
3167 aofp = (AlignmentOptionsFormPtr) GetObjectExtra (b);
3168 if (aofp == NULL) return;
3169 aofp->accepted = TRUE;
3170 aofp->done = TRUE;
3171 }
3172
3173 static void CancelAlignmentOptions (ButtoN b)
3174 {
3175 AlignmentOptionsFormPtr aofp;
3176
3177 aofp = (AlignmentOptionsFormPtr) GetObjectExtra (b);
3178 if (aofp == NULL) return;
3179 aofp->accepted = FALSE;
3180 aofp->done = TRUE;
3181 }
3182
3183
3184 extern TSequenceInfoPtr GetAlignmentOptions (Uint1Ptr moltype, TSequenceInfoPtr sequence_info)
3185 {
3186 ButtoN b;
3187 GrouP c, h;
3188 WindoW w;
3189 AlignmentOptionsFormData aofd;
3190 DialoG d;
3191 ValNodePtr err_list;
3192
3193 aofd.accepted = FALSE;
3194 aofd.done = FALSE;
3195 w = ModalWindow (-50, -33, -10, -10, NULL);
3196
3197 h = HiddenGroup (w, -1, 0, NULL);
3198 SetGroupSpacing (h, 10, 10);
3199
3200 d = AlnSettingsDlg (h, moltype == NULL ? FALSE : TRUE);
3201 PointerToDialog (d, sequence_info);
3202
3203 c = HiddenGroup (h, 4, 0, NULL);
3204 b = DefaultButton (c, "Accept", AcceptAlignmentOptions);
3205 SetObjectExtra (b, &aofd, NULL);
3206 b = PushButton (c, "Cancel", CancelAlignmentOptions);
3207 SetObjectExtra (b, &aofd, NULL);
3208
3209 AlignObjects (ALIGN_CENTER, (HANDLE) d, (HANDLE) c, NULL);
3210 RealizeWindow (w);
3211 Show (w);
3212 Update ();
3213
3214 while (!aofd.done) {
3215 while (!aofd.done)
3216 {
3217 ProcessExternalEvent ();
3218 Update ();
3219 }
3220 ProcessAnEvent ();
3221 if (!aofd.accepted)
3222 {
3223 Remove (w);
3224 return NULL;
3225 }
3226
3227 err_list = TestDialog (d);
3228 if (err_list != NULL) {
3229 Message (MSG_ERROR, err_list->data.ptrvalue);
3230 err_list = ValNodeFreeData (err_list);
3231 aofd.done = FALSE;
3232 aofd.accepted = FALSE;
3233 }
3234 }
3235 sequence_info = DialogToPointer (d);
3236 if (sequence_info == NULL) return NULL;
3237 if (StringCmp (sequence_info->alphabet, protein_alphabet) == 0) {
3238 if (moltype != NULL) {
3239 *moltype = Seq_mol_aa;
3240 }
3241 } else {
3242 if (moltype != NULL) {
3243 *moltype = Seq_mol_na;
3244 }
3245 }
3246
3247 Remove (w);
3248 return sequence_info;
3249 }
3250
3251 static void FixAlignmentIdsOkCancel (ButtoN b)
3252 {
3253 BoolPtr bp;
3254
3255 bp = (BoolPtr) GetObjectExtra (b);
3256 if (bp != NULL)
3257 {
3258 *bp = TRUE;
3259 }
3260 }
3261
3262 static void EnableTextID (GrouP g)
3263 {
3264 TexT id_text;
3265
3266 id_text = (TexT) GetObjectExtra (g);
3267 if (id_text != NULL)
3268 {
3269 if (GetValue (g) > 5)
3270 {
3271 Enable (id_text);
3272 }
3273 else
3274 {
3275 Disable (id_text);
3276 }
3277 }
3278 }
3279
3280 static Boolean ReplaceAlignmentIDsFromFile (TAlignmentFilePtr afp, Int4 index)
3281 {
3282 ReadBufferData rbd;
3283 Char path [PATH_MAX];
3284 CharPtr line, cp, first_id, second_id;
3285 Int4 k;
3286 Boolean found_id;
3287 ValNodePtr err_list = NULL;
3288 CharPtr err_msg = NULL;
3289 CharPtr err_msg_prefix = "Unable to find ";
3290 CharPtr err_msg_suffix = " from file in alignment";
3291 Int4 err_msg_len = 0;
3292 ValNodePtr vnp;
3293
3294 if (afp == NULL || index < -1 || index >= afp->num_sequences)
3295 {
3296 return FALSE;
3297 }
3298
3299 rbd.fp = NULL;
3300 while (rbd.fp == NULL)
3301 {
3302 if (!GetInputFileName (path, sizeof (path), NULL, NULL))
3303 {
3304 return FALSE;
3305 }
3306 rbd.fp = FileOpen (path, "r");
3307 if (rbd.fp == NULL)
3308 {
3309 Message (MSG_ERROR, "Unable to open %s", path);
3310 }
3311 }
3312
3313 rbd.current_data = NULL;
3314
3315 line = AbstractReadFunction (&rbd);
3316 while (line != NULL)
3317 {
3318 cp = line;
3319 while (isspace ((Int4)(*cp)))
3320 {
3321 cp++;
3322 }
3323 if (*cp != 0)
3324 {
3325 first_id = cp;
3326 while (!isspace ((Int4)(*cp)) && *cp != 0)
3327 {
3328 cp++;
3329 }
3330 while (isspace ((Int4)(*cp)))
3331 {
3332 *cp = 0;
3333 cp++;
3334 }
3335 second_id = cp;
3336 TrimSpacesAroundString (second_id);
3337 if (*second_id != 0)
3338 {
3339 found_id = FALSE;
3340 for (k = index; k < afp->num_sequences && ! found_id; k++)
3341 {
3342 if (StringCmp (afp->ids[k], first_id) == 0)
3343 {
3344 found_id = TRUE;
3345 MemFree (afp->ids[k]);
3346 afp->ids[k] = StringSave (second_id);
3347 }
3348 else if (StringCmp (afp->ids[k], second_id) == 0)
3349 {
3350 found_id = TRUE;
3351 MemFree (afp->ids[k]);
3352 afp->ids[k] = StringSave (first_id);
3353 }
3354 }
3355 if (!found_id)
3356 {
3357 ValNodeAddPointer (&err_list, 0, StringSave (first_id));
3358 ValNodeAddPointer (&err_list, 0, StringSave (second_id));
3359 err_msg_len += StringLen (first_id) + StringLen (second_id) + 8;
3360 }
3361 }
3362 }
3363
3364 line = AbstractReadFunction (&rbd);
3365 }
3366 FileClose (rbd.fp);
3367 if (err_list != NULL)
3368 {
3369 err_msg_len += StringLen (err_msg_prefix) + StringLen (err_msg_suffix) + 6;
3370 err_msg = (CharPtr) MemNew (err_msg_len * sizeof (Char));
3371 if (err_msg != NULL)
3372 {
3373 sprintf (err_msg, err_msg_prefix);
3374 vnp = err_list;
3375 while (vnp != NULL && vnp->next != NULL)
3376 {
3377 StringCat (err_msg, (CharPtr) vnp->data.ptrvalue);
3378 StringCat (err_msg, " or ");
3379 StringCat (err_msg, (CharPtr) vnp->next->data.ptrvalue);
3380
3381 if (vnp->next->next != NULL)
3382 {
3383 StringCat (err_msg, ", ");
3384 }
3385 if (vnp->next->next != NULL && vnp->next->next->next != NULL && vnp->next->next->next->next == NULL)
3386 {
3387 StringCat (err_msg, " and ");
3388 }
3389 vnp = vnp->next->next;
3390 }
3391 StringCat (err_msg, err_msg_suffix);
3392 Message (MSG_ERROR, err_msg);
3393 MemFree (err_msg);
3394 }
3395 }
3396 return TRUE;
3397 }
3398
3399 static void FixToFarPointer (TAlignmentFilePtr afp, Int4 index)
3400 {
3401 CharPtr tmp_id_str;
3402
3403 if (afp == NULL || index < -1 || index >= afp->num_sequences)
3404 {
3405 return;
3406 }
3407 tmp_id_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (afp->ids [index]) + 4));
3408 if (tmp_id_str == NULL)
3409 {
3410 return;
3411 }
3412 sprintf (tmp_id_str, "acc%s", afp->ids [index]);
3413 MemFree (afp->ids [index]);
3414 afp->ids[index] = tmp_id_str;
3415 }
3416
3417
3418 static void RemoveNthSequenceFromAlignment (TAlignmentFilePtr afp, Int4 n)
3419 {
3420 Int4 i;
3421
3422 if (afp == NULL || n < 0) {
3423 return;
3424 }
3425
3426 if (afp->deflines != NULL && n < afp->num_deflines) {
3427 afp->deflines[n] = MemFree (afp->deflines[n]);
3428 for (i = n + 1; i < afp->num_deflines; i++) {
3429 afp->deflines[i - 1] = afp->deflines[i];
3430 }
3431 afp->deflines[afp->num_deflines - 1] = NULL;
3432 afp->num_deflines--;
3433 }
3434
3435 if (afp->organisms != NULL && n < afp->num_organisms) {
3436 afp->organisms[n] = MemFree (afp->organisms[n]);
3437 for (i = n + 1; i < afp->num_organisms; i++) {
3438 afp->organisms[i - 1] = afp->organisms[i];
3439 }
3440 afp->organisms[afp->num_organisms - 1] = NULL;
3441 afp->num_organisms--;
3442 }
3443
3444 if (afp->sequences != NULL && n < afp->num_sequences) {
3445 afp->sequences[n] = MemFree (afp->sequences[n]);
3446 afp->ids[n] = MemFree (afp->ids[n]);
3447 for (i = n + 1; i < afp->num_sequences; i++) {
3448 afp->sequences[i - 1] = afp->sequences[i];
3449 afp->ids[i - 1] = afp->ids[i];
3450 }
3451 afp->sequences[afp->num_sequences - 1] = NULL;
3452 afp->ids[afp->num_sequences - 1] = NULL;
3453 afp->num_sequences--;
3454 }
3455 }
3456
3457
3458 static Boolean FixAlignmentIDs (TAlignmentFilePtr afp, Int4 index, BoolPtr all_far, BoolPtr all_skip, BoolPtr removed)
3459 {
3460 WindoW w;
3461 GrouP h, choice_grp, c;
3462 ButtoN b;
3463 Boolean done = FALSE;
3464 Boolean cancelled = FALSE;
3465 PrompT p;
3466 CharPtr prompt_str;
3467 CharPtr prompt_str_fmt = "Unable to find sequence %s from alignment in set.";
3468 TexT id_text;
3469 CharPtr id_str;
3470 Int2 fix_choice;
3471
3472 if (afp == NULL || index < -1 || index >= afp->num_sequences)
3473 {
3474 return FALSE;
3475 }
3476
3477 id_str = afp->ids[index];
3478 if (StringHasNoText (id_str))
3479 {
3480 return FALSE;
3481 }
3482
3483 w = MovableModalWindow (-20, -13, -10, -10, "Source Assistant", NULL);
3484 h = HiddenGroup(w, -1, 0, NULL);
3485 SetGroupSpacing (h, 10, 10);
3486
3487 prompt_str = (CharPtr) MemNew ((StringLen (prompt_str_fmt) + StringLen (id_str)) * sizeof (Char));
3488 if (prompt_str != NULL)
3489 {
3490 sprintf (prompt_str, prompt_str_fmt, id_str);
3491 }
3492 p = StaticPrompt (h, prompt_str, 0, dialogTextHeight, systemFont, 'c');
3493 choice_grp = HiddenGroup (h, 0, 7, EnableTextID);
3494 RadioButton (choice_grp, "This is a far pointer");
3495 RadioButton (choice_grp, "All unmatched sequences are far pointers");
3496 RadioButton (choice_grp, "Read in a file that maps alignment IDs to sequence IDs");
3497 RadioButton (choice_grp, "Remove this sequence from the alignment");
3498 RadioButton (choice_grp, "Remove all unmatched sequences from the alignment");
3499 RadioButton (choice_grp, "Use this ID for this sequence");
3500 id_text = DialogText (choice_grp, "", 20, NULL);
3501 Disable (id_text);
3502 SetValue (choice_grp, 1);
3503 SetObjectExtra (choice_grp, id_text, NULL);
3504 c = HiddenGroup (h, 2, 0, NULL);
3505 b = PushButton(c, "OK", FixAlignmentIdsOkCancel);
3506 SetObjectExtra (b, &done, NULL);
3507 b = PushButton(c, "Cancel", FixAlignmentIdsOkCancel);
3508 SetObjectExtra (b, &cancelled, NULL);
3509 AlignObjects (ALIGN_CENTER, (HANDLE) p, (HANDLE) choice_grp, (HANDLE) c, NULL);
3510
3511 Show(w);
3512 Select (w);
3513 while (!done && !cancelled)
3514 {
3515 while (!done && !cancelled)
3516 {
3517 ProcessExternalEvent ();
3518 Update ();
3519 }
3520 ProcessAnEvent ();
3521 if (!cancelled)
3522 {
3523 fix_choice = GetValue (choice_grp);
3524 switch (fix_choice)
3525 {
3526 case 1:
3527 /* far pointer */
3528 FixToFarPointer (afp, index);
3529 break;
3530 case 2:
3531 /* all far pointers */
3532 if (all_far != NULL)
3533 {
3534 *all_far = TRUE;
3535 }
3536 FixToFarPointer (afp, index);
3537 break;
3538 case 3:
3539 /* read in file with replacements */
3540 if (!ReplaceAlignmentIDsFromFile (afp, index))
3541 {
3542 done = FALSE;
3543 }
3544 break;
3545 case 4:
3546 /* skip */
3547 RemoveNthSequenceFromAlignment (afp, index);
3548 if (removed != NULL) {
3549 *removed = TRUE;
3550 }
3551 break;
3552 case 5:
3553 /* skip all */
3554 RemoveNthSequenceFromAlignment (afp, index);
3555 if (all_skip != NULL)
3556 {
3557 *all_skip = TRUE;
3558 }
3559 if (removed != NULL) {
3560 *removed = TRUE;
3561 }
3562 break;
3563 case 6:
3564 /* use single replacement */
3565 id_str = SaveStringFromText (id_text);
3566 if (StringHasNoText (id_str))
3567 {
3568 MemFree (id_str);
3569 Message (MSG_ERROR, "You did not specify text for the ID!");
3570 done = FALSE;
3571 }
3572 else
3573 {
3574 MemFree (afp->ids [index]);
3575 afp->ids [index] = id_str;
3576 }
3577 break;
3578 }
3579 }
3580 }
3581 Remove (w);
3582 if (cancelled)
3583 {
3584 return FALSE;
3585 }
3586 else
3587 {
3588 return TRUE;
3589 }
3590 }
3591
3592
3593 static Boolean CorrectAlignmentIDs (TAlignmentFilePtr afp, Uint1 moltype)
3594 {
3595 Int4 index;
3596 CharPtr seq_data;
3597 SeqIdPtr sip;
3598 BioseqPtr bsp;
3599 CharPtr tmp_id_str;
3600 Char prot_str[200];
3601 Boolean all_far = FALSE;
3602 Boolean all_skip = FALSE;
3603 Boolean removed;
3604 SeqEntryPtr nucprot_sep;
3605 BioseqSetPtr nucprot_bssp;
3606
3607 for (index = 0; index < afp->num_sequences; index++) {
3608 seq_data = AlignmentStringToSequenceString (afp->sequences [index], moltype);
3609 if (! StringHasNoText (seq_data))
3610 {
3611 sip = MakeSeqID (afp->ids [index]);
3612 sip->next = SeqIdFree (sip->next);
3613 if (StringNCmp (afp->ids[index], "acc", 3) != 0)
3614 {
3615 bsp = BioseqFind (sip);
3616 if (bsp == NULL && StringChr (afp->ids[index], '|') == NULL)
3617 {
3618 sip = SeqIdFree (sip);
3619 tmp_id_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (afp->ids [index]) + 4));
3620 sprintf (tmp_id_str, "gb|%s", afp->ids [index]);
3621 sip = MakeSeqID (tmp_id_str);
3622 MemFree (tmp_id_str);
3623 bsp = BioseqFind (sip);
3624 }
3625
3626 if (bsp != NULL && moltype == Seq_mol_aa && !ISA_aa (bsp->mol))
3627 {
3628 /* IDs in alignment are for nucleotide sequences but this is
3629 * protein sequence - if single prot in nuc-prot set, replace
3630 * with protein sequence ID. Otherwise set to NULL - don't
3631 * want this ID in our alignment */
3632 nucprot_sep = GetBestTopParentForData (bsp->idx.entityID, bsp);
3633 bsp = NULL;
3634 if (nucprot_sep != NULL && IS_Bioseq_set (nucprot_sep) && nucprot_sep->data.ptrvalue != NULL)
3635 {
3636 nucprot_bssp = nucprot_sep->data.ptrvalue;
3637 if (nucprot_bssp->seq_set != NULL
3638 && nucprot_bssp->seq_set->next != NULL
3639 && nucprot_bssp->seq_set->next->next == NULL
3640 && IS_Bioseq (nucprot_bssp->seq_set->next))
3641 {
3642 sip = SeqIdFree (sip);
3643 bsp = (BioseqPtr) nucprot_bssp->seq_set->next->data.ptrvalue;
3644 SeqIdWrite (SeqIdFindBest (bsp->id, 0), prot_str, PRINTID_FASTA_LONG, sizeof (prot_str) - 1);
3645 afp->ids[index] = MemFree (afp->ids[index]);
3646 afp->ids[index] = StringSave (prot_str);
3647 }
3648 }
3649 }
3650
3651 if (bsp == NULL)
3652 {
3653 if (all_far)
3654 {
3655 FixToFarPointer (afp, index);
3656 }
3657 else if (all_skip)
3658 {
3659 RemoveNthSequenceFromAlignment(afp, index);
3660 index--;
3661 }
3662 else
3663 {
3664 removed = FALSE;
3665 if (!FixAlignmentIDs (afp, index, &all_far, &all_skip, &removed))
3666 {
3667 /* bail - user does not want to fix IDs */
3668 return FALSE;
3669 }
3670 if (removed) {
3671 index--;
3672 }
3673 }
3674 }
3675
3676 }
3677 sip = SeqIdFree (sip);
3678 }
3679 MemFree (seq_data);
3680 }
3681 return TRUE;
3682 }
3683
3684
3685 /* This function creates one SeqAnnot for each alignment in a chain of
3686 * alignments, breaking the chain for each alignment.
3687 * This is used to create the separate alignment annotations for a
3688 * discontiguous set of alignments, especially one created for a set
3689 * that contains delta sequences.
3690 */
3691 static void
3692 CreateSeqAnnotsForDiscontiguousAlignments
3693 (SeqAlignPtr salp_mult,
3694 Uint2 entityID,
3695 BioseqSetPtr set_for_alignment)
3696 {
3697 SeqAlignPtr salp, salp_next;
3698 SeqAnnotPtr sap;
3699 SeqEntryPtr sep;
3700 SeqAnnotPtr PNTR sapp = NULL;
3701 BioseqSetPtr bssp;
3702 BioseqPtr bsp;
3703 SeqAnnotPtr curr;
3704
3705 if (salp_mult == NULL)
3706 {
3707 return;
3708 }
3709 for (salp = salp_mult; salp != NULL; salp = salp_next)
3710 {
3711 salp_next = salp->next;
3712 salp->next = NULL;
3713 sap = SeqAnnotForSeqAlign(salp);
3714
3715 if (sap != NULL) {
3716 if (set_for_alignment == NULL)
3717 {
3718 sep = GetTopSeqEntryForEntityID (entityID);
3719 if (sep != NULL && sep->data.ptrvalue != NULL) {
3720 sapp = NULL;
3721 if (IS_Bioseq (sep)) {
3722 bsp = (BioseqPtr) sep->data.ptrvalue;
3723 sapp = &(bsp->annot);
3724 } else if (IS_Bioseq_set (sep)) {
3725 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3726 sapp = &(bssp->annot);
3727 }
3728 }
3729 }
3730 else
3731 {
3732 sapp = &(set_for_alignment->annot);
3733 }
3734
3735 if (sapp != NULL) {
3736 if (*sapp != NULL) {
3737 curr = *sapp;
3738 while (curr->next != NULL) {
3739 curr = curr->next;
3740 }
3741 curr->next = sap;
3742 } else {
3743 *sapp = sap;
3744 }
3745 }
3746 }
3747 }
3748 }
3749
3750
3751 static Int2 LIBCALLBACK UpdateSeqAlignForSeqEntry (SeqEntryPtr sep)
3752
3753 {
3754 Char path [PATH_MAX];
3755 FILE *fp;
3756 SeqAlignPtr salp=NULL,
3757 salpnew;
3758 SeqEntryPtr sepnew=NULL;
3759 Uint2 entityID,
3760 itemID;
3761 MsgAnswer ans;
3762 Boolean ok = TRUE,
3763 dirty = FALSE;
3764 TSequenceInfoPtr sequence_info;
3765 ReadBufferData rbd;
3766 TErrorInfoPtr error_list;
3767 TAlignmentFilePtr afp;
3768 Uint1 moltype;
3769 ErrSev sev;
3770 SeqEntryPtr scope;
3771 SeqAlignPtr salp_copy;
3772 BioseqSetPtr bssp = NULL;
3773
3774 if (sep==NULL)
3775 return OM_MSG_RET_ERROR;
3776 entityID = ObjMgrGetEntityIDForChoice (sep);
3777 if (entityID < 1)
3778 return OM_MSG_RET_ERROR;
3779
3780 if (IS_Bioseq_set (sep)) {
3781 bssp = sep->data.ptrvalue;
3782 }
3783
3784 if (GetInputFileName (path, sizeof (path), NULL, "TEXT")) {
3785 fp = FileOpen (path, "r");
3786 if (fp != NULL) {
3787 sequence_info = GetAlignmentOptions (&moltype, NULL);
3788 if (sequence_info == NULL) return OM_MSG_RET_ERROR;
3789 error_list = NULL;
3790
3791 rbd.fp = fp;
3792 rbd.current_data = NULL;
3793 afp = ReadAlignmentFile ( AbstractReadFunction,
3794 (Pointer) &rbd,
3795 AbstractReportError,
3796 (Pointer) &error_list,
3797 sequence_info);
3798 if (afp != NULL)
3799 {
3800 SeqEntrySetScope (sep);
3801 if (CorrectAlignmentIDs (afp, moltype))
3802 {
3803 sepnew = MakeSequinDataFromAlignmentEx (afp, moltype, TRUE);
3804 }
3805 }
3806 ProduceAlignmentNotes (afp, error_list);
3807 ErrorInfoFree (error_list);
3808 SequenceInfoFree (sequence_info);
3809 AlignmentFileFree (afp);
3810 Update ();
3811 }
3812 }
3813 if (sepnew)
3814 {
3815 salpnew = (SeqAlignPtr) FindSeqAlignInSeqEntry (sepnew, OBJ_SEQALIGN);
3816 if (salpnew) {
3817 sev = ErrSetMessageLevel (SEV_FATAL);
3818
3819 scope = SeqEntrySetScope (NULL);
3820 /* adjust the start positions for the sequences read in from the alignments. */
3821 CalculateAlignmentOffsets (sepnew, sep);
3822 /* ValidateSeqAlignandACCInSeqEntry will readjust the start positions for
3823 * the alignments for far pointer sequences.
3824 */
3825 SeqEntrySetScope (scope);
3826 ok = ValidateSeqAlignandACCInSeqEntry (sepnew, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE);
3827
3828 ErrSetMessageLevel (sev);
3829
3830 if (ok) {
3831 AlnMgr2IndexSeqAlignEx(salpnew, FALSE);
3832 ok = CheckAlignmentSequenceLengths (salpnew);
3833 }
3834
3835 if (ok) {
3836 /* if there is already an alignment, ask if the user wants to remove the old
3837 * one before adding the new alignment.
3838 */
3839 salp = (SeqAlignPtr) FindSeqAlignInSeqEntry (sep, OBJ_SEQALIGN);
3840 if (salp)
3841 {
3842 ans = Message (MSG_OKC, "Do you wish to replace (OK) or add (Cancel) the alignment in your SeqEntry?");
3843 if (ans == ANS_OK)
3844 {
3845 SeqEntryExplore (sep, &dirty, SqnSeqAlignDeleteInSeqEntryCallBack);
3846 }
3847 }
3848
3849 /* make a copy of the alignment in sepnew - the alignment in sepnew will
3850 * be freed when sepnew is freed.
3851 */
3852 salp_copy = (SeqAlignPtr) AsnIoMemCopy (salpnew, (AsnReadFunc) SeqAlignAsnRead,
3853 (AsnWriteFunc) SeqAlignAsnWrite);
3854 /* the copy function doesn't copy the index, so the alignment must be
3855 * indexed now.
3856 */
3857 AlnMgr2IndexSeqAlignEx(salp_copy, FALSE);
3858 /* break the alignment at gaps, if the set contains any delta sequences */
3859 salp_copy = MakeDiscontiguousAlignments (salp_copy);
3860
3861 /* break the chain of alignments and create separate SeqAnnots for each one */
3862 CreateSeqAnnotsForDiscontiguousAlignments (salp_copy, entityID, bssp);
3863 ObjMgrSetDirtyFlag (entityID, TRUE);
3864 itemID = GetItemIDGivenPointer (entityID, OBJ_SEQENTRY, (Pointer) sep);
3865 ObjMgrSendMsg (OM_MSG_UPDATE, entityID, itemID, OBJ_SEQENTRY);
3866 }
3867 }
3868 ObjMgrFree (OBJ_SEQENTRY, (Pointer)sepnew);
3869 sepnew=NULL;
3870 }
3871 return OM_MSG_RET_OK;
3872 }
3873
3874 static Int2 LIBCALLBACK NewUpdateSeqAlign (Pointer data)
3875 {
3876 OMProcControlPtr ompcp;
3877 SeqEntryPtr sep = NULL;
3878 BioseqSetPtr bssp;
3879 SeqSubmitPtr ssp;
3880
3881 ompcp = (OMProcControlPtr) data;
3882 if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
3883
3884 if (ompcp->input_data == NULL) return OM_MSG_RET_ERROR;
3885
3886 switch(ompcp->input_itemtype)
3887 {
3888 case OBJ_BIOSEQ :
3889 sep = SeqMgrGetSeqEntryForData (ompcp->input_data);
3890 break;
3891 case OBJ_BIOSEQSET :
3892 sep = SeqMgrGetSeqEntryForData (ompcp->input_data);
3893 bssp = (BioseqSetPtr) ompcp->input_data;
3894 break;
3895 case OBJ_SEQENTRY :
3896 sep = ompcp->input_data;
3897 break;
3898 case OBJ_SEQSUB :
3899 ssp = ompcp->input_data;
3900 if(ssp->datatype==1)
3901 sep = (SeqEntryPtr)ssp->data;
3902 break;
3903 case 0 :
3904 return OM_MSG_RET_ERROR;
3905 default :
3906 return OM_MSG_RET_ERROR;
3907 }
3908 if (sep==NULL)
3909 return OM_MSG_RET_ERROR;
3910
3911 return UpdateSeqAlignForSeqEntry (sep);
3912
3913 }
3914
3915 extern void UpdateSeqAlignMenuItem (IteM i)
3916 {
3917 BaseFormPtr bfp;
3918 BioseqSetPtr bssp;
3919 SeqEntryPtr sep;
3920
3921 #ifdef WIN_MAC
3922 bfp = currentFormDataPtr;
3923 #else
3924 bfp = GetObjectExtra (i);
3925 #endif
3926 if (bfp == NULL) return;
3927
3928 sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
3929 if (sep == NULL || !IS_Bioseq_set (sep)) {
3930 Message (MSG_ERROR, "This record does not have a top-levelset!");
3931 } else {
3932 bssp = FindTopLevelSetForDesktopFunction((BioseqSetPtr) sep->data.ptrvalue);
3933 sep = SeqMgrGetSeqEntryForData (bssp);
3934 UpdateSeqAlignForSeqEntry (sep);
3935 }
3936 }
3937
3938
3939 static void DenseSegSwitchSeqPos (DenseSegPtr dsp)
3940 {
3941 SeqIdPtr sip, sip_next;
3942 Int4 tmp_start, i;
3943 Uint1 tmp_strand;
3944
3945 if (dsp == NULL || dsp->dim != 2) {
3946 return;
3947 }
3948
3949 /* switch the IDs */
3950 sip = dsp->ids;
3951 sip_next = dsp->ids->next;
3952 sip_next->next = sip;
3953 sip->next = NULL;
3954 dsp->ids = sip_next;
3955
3956 /* switch the starts and strands*/
3957 for (i = 0; i < dsp->numseg; i++) {
3958 tmp_start = dsp->starts[(i * dsp->dim)];
3959 dsp->starts[(i * dsp->dim)] = dsp->starts[(i * dsp->dim) + 1];
3960 dsp->starts[(i * dsp->dim) + 1] = tmp_start;
3961 }
3962
3963 if (dsp->strands != NULL) {
3964 for (i = 0; i < dsp->numseg; i++) {
3965 tmp_strand = dsp->strands[(i * dsp->dim)];
3966 dsp->strands[(i * dsp->dim)] = dsp->strands[(i * dsp->dim) + 1];
3967 dsp->strands[(i * dsp->dim) + 1] = tmp_strand;
3968 }
3969 }
3970 }
3971
3972
3973 static BioseqPtr FindBioseqForSeqHist (SeqAlignPtr salp)
3974 {
3975 SeqIdPtr sip;
3976 BioseqPtr bsp;
3977 DenseSegPtr dsp;
3978
3979 if (salp == NULL || salp->dim != 2 || salp->segtype != SAS_DENSEG) {
3980 return NULL;
3981 }
3982
3983 dsp = (DenseSegPtr) salp->segs;
3984
3985 sip = dsp->ids->next;
3986
3987 bsp = BioseqFind (sip);
3988 if (bsp == NULL) {
3989 /* if we find the bioseq here, need to switch the positions */
3990 sip = dsp->ids;
3991 bsp = BioseqFind (sip);
3992 if (bsp != NULL) {
3993 DenseSegSwitchSeqPos (dsp);
3994 }
3995 }
3996 return bsp;
3997 }
3998
3999
4000 static SeqIdPtr CorrectIDsForSeqHistAlign (TAlignmentFilePtr afp, SeqEntryPtr sep)
4001 {
4002 SeqIdPtr sip;
4003 Boolean need_switch = FALSE;
4004 char *tmp;
4005
4006 if (afp == NULL || afp->num_sequences != 2) {
4007 return NULL;
4008 }
4009 if (StringNICmp (afp->ids[1], "acc", 3) == 0) {
4010 /* already have far pointer in place */
4011 sip = CreateSeqIdFromText (afp->ids[0], sep);
4012 } else if (StringNICmp (afp->ids[0], "acc", 3) == 0) {
4013 need_switch = TRUE;
4014 sip = CreateSeqIdFromText (afp->ids[1], sep);
4015 } else {
4016 /* first position should be primary */
4017 sip = CreateSeqIdFromText (afp->ids[0], sep);
4018 if (sip == NULL) {
4019 sip = CreateSeqIdFromText (afp->ids[1], sep);
4020 if (sip == NULL) {
4021 /* can't find match in record */
4022 } else {
4023 /* second position is primary, put acc on first position */
4024 need_switch = TRUE;
4025 tmp = malloc (StringLen (afp->ids[0]) + 4);
4026 sprintf (tmp, "acc%s", afp->ids[0]);
4027 free (afp->ids[0]);
4028 afp->ids[0] = tmp;
4029 }
4030 } else {
4031 tmp = malloc (StringLen (afp->ids[1]) + 4);
4032 sprintf (tmp, "acc%s", afp->ids[1]);
4033 free (afp->ids[1]);
4034 afp->ids[1] = tmp;
4035 }
4036 }
4037
4038 if (sip != NULL) {
4039 if (need_switch) {
4040 /* note - not bothering to switch deflines and organism names because
4041 * we don't need them for the seq-hist assembly
4042 */
4043 tmp = afp->ids[0];
4044 afp->ids[0] = afp->ids[1];
4045 afp->ids[1] = tmp;
4046
4047 tmp = afp->sequences[0];
4048 afp->sequences[0] = afp->sequences[1];
4049 afp->sequences[1] = tmp;
4050 }
4051 }
4052 return sip;
4053 }
4054
4055
4056 extern void ImportAlignmentForSeqHistInterval (IteM i)
4057 {
4058 BaseFormPtr bfp;
4059 Char path [PATH_MAX];
4060 SeqAlignPtr salp=NULL,
4061 salpnew;
4062 SeqEntryPtr sep=NULL,
4063 sepnew=NULL;
4064 Boolean ok = TRUE;
4065 TSequenceInfoPtr sequence_info;
4066 ReadBufferData rbd;
4067 TErrorInfoPtr error_list;
4068 TAlignmentFilePtr afp;
4069 Uint1 moltype;
4070 ErrSev sev;
4071 SeqEntryPtr scope;
4072 SeqAlignPtr salp_copy;
4073 BioseqSetPtr bssp = NULL;
4074 BioseqPtr bsp = NULL;
4075 SeqIdPtr sip = NULL;
4076 FILE *fp;
4077
4078 #ifdef WIN_MAC
4079 bfp = currentFormDataPtr;
4080 #else
4081 bfp = GetObjectExtra (i);
4082 #endif
4083 if (bfp == NULL) return;
4084 sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
4085 if (sep == NULL) return;
4086
4087 SeqEntrySetScope (sep);
4088
4089 if (GetInputFileName (path, sizeof (path), NULL, "TEXT")) {
4090 fp = FileOpen (path, "r");
4091 if (fp != NULL) {
4092 sequence_info = GetAlignmentOptions (&moltype, NULL);
4093 if (sequence_info == NULL) return;
4094 error_list = NULL;
4095
4096 rbd.fp = fp;
4097 rbd.current_data = NULL;
4098 afp = ReadAlignmentFile ( AbstractReadFunction,
4099 (Pointer) &rbd,
4100 AbstractReportError,
4101 (Pointer) &error_list,
4102 sequence_info);
4103 if (afp == NULL) {
4104 ProduceAlignmentNotes (afp, error_list);
4105 } else if (afp->num_sequences != 2) {
4106 Message (MSG_ERROR, "Must import alignment containing just the primary and TPA sequences");
4107 } else if ((sip = CorrectIDsForSeqHistAlign (afp, sep)) == NULL
4108 || (bsp = BioseqFind (sip)) == NULL) {
4109 Message (MSG_ERROR, "Unable to locate TPA sequence in alignment");
4110 } else {
4111 sepnew = MakeSequinDataFromAlignmentEx (afp, moltype, TRUE);
4112 if (sepnew != NULL) {
4113 salpnew = (SeqAlignPtr) FindSeqAlignInSeqEntry (sepnew, OBJ_SEQALIGN);
4114 if (salpnew == NULL) {
4115 Message (MSG_ERROR, "No alignment created");
4116 } else if (salpnew->dim != 2) {
4117 Message (MSG_ERROR, "Must import alignment containing just the primary and TPA sequences");
4118 } else {
4119 sev = ErrSetMessageLevel (SEV_FATAL);
4120
4121 scope = SeqEntrySetScope (NULL);
4122 /* adjust the start positions for the sequences read in from the alignments. */
4123 CalculateAlignmentOffsets (sepnew, sep);
4124 /* ValidateSeqAlignandACCInSeqEntry will readjust the start positions for
4125 * the alignments for far pointer sequences.
4126 */
4127 SeqEntrySetScope (scope);
4128 AlnMgr2IndexSingleChildSeqAlign(salpnew);
4129 UpdateOneSeqAlignFarPointer (salpnew, 1);
4130
4131 ErrSetMessageLevel (sev);
4132
4133 if (ok) {
4134 AlnMgr2IndexSingleChildSeqAlign(salpnew);
4135 ok = CheckAlignmentSequenceLengths (salpnew);
4136 }
4137 if (ok) {
4138 /* find the bioseq that this interval should be added to */
4139 scope = SeqEntrySetScope (sep);
4140
4141 /* make a copy of the alignment in sepnew - the alignment in sepnew will
4142 * be freed when sepnew is freed.
4143 */
4144 salp_copy = (SeqAlignPtr) AsnIoMemCopy (salpnew, (AsnReadFunc) SeqAlignAsnRead,
4145 (AsnWriteFunc) SeqAlignAsnWrite);
4146 /* the copy function doesn't copy the index, so the alignment must be
4147 * indexed now.
4148 */
4149 AlnMgr2IndexSingleChildSeqAlign(salp_copy);
4150
4151 /* add to Bioseq-hist assembly */
4152 if (bsp->hist == NULL) {
4153 bsp->hist = SeqHistNew ();
4154 }
4155 salp_copy->next = bsp->hist->assembly;
4156 bsp->hist->assembly = salp_copy;
4157 ObjMgrSetDirtyFlag (bsp->idx.entityID, TRUE);
4158 ObjMgrSendMsg (OM_MSG_UPDATE, bsp->idx.entityID, 0, 0);
4159 }
4160 }
4161 ObjMgrFree (OBJ_SEQENTRY, (Pointer)sepnew);
4162 sepnew=NULL;
4163 }
4164 }
4165 sip = SeqIdFree (sip);
4166 ErrorInfoFree (error_list);
4167 SequenceInfoFree (sequence_info);
4168 AlignmentFileFree (afp);
4169 }
4170 }
4171 }
4172
4173
4174 static Boolean IsIntervalGap (BioseqPtr bsp, Int4 start, Int4 stop)
4175 {
4176 DeltaSeqPtr dsp;
4177 SeqLitPtr litp;
4178 SeqLocPtr loc;
4179 Int4 seq_offset = 0, len;
4180 Boolean this_is_gap;
4181
4182 if (bsp == NULL
4183 || bsp->repr != Seq_repr_delta
4184 || bsp->seq_ext_type != 4
4185 || start < 0
4186 || stop >= bsp->length)
4187 {
4188 return FALSE;
4189 }
4190
4191 for (dsp = (DeltaSeqPtr) bsp->seq_ext;
4192 dsp != NULL && dsp->next != NULL && seq_offset < stop;
4193 dsp = dsp->next)
4194 {
4195 len = 0;
4196 this_is_gap = FALSE;
4197 if (dsp->choice == 1)
4198 {
4199 loc = (SeqLocPtr) dsp->data.ptrvalue;
4200 len = SeqLocLen (loc);
4201 }
4202 else if (dsp->choice == 2)
4203 {
4204 litp = (SeqLitPtr) dsp->data.ptrvalue;
4205 if (litp != NULL)
4206 {
4207 len = litp->length;
4208 if (IsDeltaSeqGap (dsp))
4209 {
4210 this_is_gap = TRUE;
4211 }
4212 }
4213 }
4214
4215 if (seq_offset <= start && seq_offset + len >= stop)
4216 {
4217 return this_is_gap;
4218 }
4219 else if ((seq_offset <= start && seq_offset + len > start)
4220 || (seq_offset < stop && seq_offset + len > stop)
4221 || (seq_offset >= start && seq_offset + len <= stop))
4222 {
4223 if (!this_is_gap)
4224 {
4225 return FALSE;
4226 }
4227 }
4228 seq_offset += len;
4229 }
4230 return TRUE;
4231 }
4232
4233 static Boolean
4234 CollapseTwoGapSegments
4235 (DenseSegPtr dsp,
4236 Int4 seg_num_first,
4237 BioseqPtr PNTR bsparray)
4238 {
4239 Int4Ptr newstarts;
4240 Int4Ptr newlens;
4241 Uint1Ptr newstrands = NULL;
4242 Int4 row_num, seg_num;
4243
4244 if (dsp == NULL || seg_num_first > dsp->numseg - 2)
4245 {
4246 return FALSE;
4247 }
4248
4249 if (dsp->lens [seg_num_first] != dsp->lens [seg_num_first + 1])
4250 {
4251 return FALSE;
4252 }
4253
4254 for (row_num = 0; row_num < dsp->dim; row_num++)
4255 {
4256 if (dsp->starts [seg_num_first * dsp->dim + row_num] != -1
4257 && dsp->starts [(seg_num_first + 1) * dsp->dim + row_num] != -1)
4258 {
4259 return FALSE;
4260 }
4261
4262 /* segment must be over a gap for all rows not in gap */
4263 /* for the first segment */
4264 if (dsp->starts [seg_num_first * dsp->dim + row_num] != -1
4265 && ! IsIntervalGap (bsparray[row_num],
4266 dsp->starts [seg_num_first * dsp->dim + row_num],
4267 dsp->starts [seg_num_first * dsp->dim + row_num] + dsp->lens[seg_num_first]))
4268
4269 {
4270 return FALSE;
4271 }
4272 /* and for the second segment */
4273 if (dsp->starts [(seg_num_first + 1) * dsp->dim + row_num] != -1
4274 && ! IsIntervalGap (bsparray[row_num],
4275 dsp->starts [(seg_num_first + 1) * dsp->dim + row_num],
4276 dsp->starts [(seg_num_first + 1) * dsp->dim + row_num] + dsp->lens[seg_num_first + 1]))
4277
4278 {
4279 return FALSE;
4280 }
4281 }
4282
4283 newstarts = (Int4Ptr) MemNew (sizeof (Int4) * dsp->dim * (dsp->numseg - 1));
4284 newlens = (Int4Ptr) MemNew (sizeof (Int4) * (dsp->numseg - 1));
4285 if (dsp->strands != NULL)
4286 {
4287 newstrands = (Uint1Ptr) MemNew (sizeof (Uint1) * dsp->dim * (dsp->numseg - 1));
4288 }
4289
4290 /* copy the portion of the alignment before the segments to be collapsed */
4291 for (seg_num = 0; seg_num < seg_num_first; seg_num++)
4292 {
4293 for (row_num = 0; row_num < dsp->dim; row_num++)
4294 {
4295 newstarts [seg_num * dsp->dim + row_num] = dsp->starts [seg_num * dsp->dim + row_num];
4296 if (dsp->strands != NULL)
4297 {
4298 newstrands [seg_num * dsp->dim + row_num] = dsp->strands[seg_num * dsp->dim + row_num];
4299 }
4300 }
4301 newlens[seg_num] = dsp->lens[seg_num];
4302 }
4303
4304 /* collapse the two segments */
4305 for (row_num = 0; row_num < dsp->dim; row_num++)
4306 {
4307 if (dsp->starts [seg_num * dsp->dim + row_num] == -1)
4308 {
4309 newstarts [seg_num * dsp->dim + row_num] = dsp->starts [(seg_num + 1) * dsp->dim + row_num];
4310 if (dsp->strands != NULL)
4311 {
4312 newstrands [seg_num * dsp->dim + row_num] = dsp->strands [(seg_num + 1) * dsp->dim + row_num];
4313 }
4314 }
4315 else
4316 {
4317 newstarts [seg_num * dsp->dim + row_num] = dsp->starts [seg_num * dsp->dim + row_num];
4318 if (dsp->strands != NULL)
4319 {
4320 newstrands [seg_num * dsp->dim + row_num] = dsp->strands [seg_num * dsp->dim + row_num];
4321 }
4322 }
4323 }
4324 newlens [seg_num] = dsp->lens [seg_num];
4325 seg_num++;
4326
4327 /* copy the remaining segments after the collapsed pair */
4328 while (seg_num < dsp->numseg - 1)
4329 {
4330 for (row_num = 0; row_num < dsp->dim; row_num++)
4331 {
4332 newstarts [seg_num * dsp->dim + row_num] = dsp->starts [(seg_num + 1) * dsp->dim + row_num];
4333 if (dsp->strands != NULL)
4334 {
4335 newstrands [seg_num * dsp->dim + row_num] = dsp->strands[(seg_num + 1) * dsp->dim + row_num];
4336 }
4337 }
4338 newlens[seg_num] = dsp->lens[(seg_num + 1)];
4339 seg_num++;
4340 }
4341
4342 /* replace the starts, strands, lens, and numseg in dsp */
4343 dsp->starts = MemFree (dsp->starts);
4344 dsp->starts = newstarts;
4345 dsp->strands = MemFree (dsp->strands);
4346 dsp->strands = newstrands;
4347 dsp->lens = MemFree (dsp->lens);
4348 dsp->lens = newlens;
4349 dsp->numseg--;
4350 return TRUE;
4351 }
4352
4353 extern void ConsolidateSegmentsOverKnownLengthGaps (SeqAlignPtr salp)
4354 {
4355 DenseSegPtr dsp;
4356 SeqIdPtr sip;
4357 BioseqPtr PNTR bsparray;
4358 Int4 row_num, seg_num;
4359
4360 if (salp == NULL || salp->segtype != SAS_DENSEG || salp->segs == NULL)
4361 {
4362 return;
4363 }
4364
4365 if (salp->saip != NULL)
4366 {
4367 SeqAlignIndexFree(salp->saip);
4368 salp->saip = NULL;
4369 }
4370
4371 dsp = (DenseSegPtr) salp->segs;
4372
4373 bsparray = (BioseqPtr PNTR) MemNew (sizeof (BioseqPtr) * dsp->dim);
4374
4375
4376 for (sip = dsp->ids, row_num = 0;
4377 sip != NULL && row_num < dsp->dim;
4378 sip = sip->next, row_num++)
4379 {
4380 bsparray [row_num] = BioseqFind (sip);
4381 }
4382
4383 seg_num = 0;
4384 while (seg_num < dsp->numseg - 1)
4385 {
4386 if (!CollapseTwoGapSegments (dsp, seg_num, bsparray))
4387 {
4388 seg_num++;
4389 }
4390 }
4391
4392 AlnMgr2IndexSeqAlignEx(salp, FALSE);
4393 bsparray = MemFree (bsparray);
4394 }
4395
4396 static Int2 LIBCALLBACK ConsolidateGapGaps (Pointer data)
4397
4398 {
4399 OMProcControlPtr ompcp;
4400 SeqAlignPtr salp=NULL;
4401 Uint2 entityID;
4402 SeqAnnotPtr sanp_old;
4403
4404 ompcp = (OMProcControlPtr) data;
4405 if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
4406
4407 if (ompcp->input_data == NULL) return OM_MSG_RET_ERROR;
4408
4409 switch(ompcp->input_itemtype)
4410 {
4411 case OBJ_SEQALIGN :
4412 salp = (SeqAlignPtr) ompcp->input_data;
4413 break;
4414 case OBJ_SEQANNOT:
4415 sanp_old = (SeqAnnotPtr) ompcp->input_data;
4416 if (sanp_old->type == 2)
4417 {
4418 salp = sanp_old->data;
4419 }
4420 break;
4421 default :
4422 return OM_MSG_RET_ERROR;
4423 }
4424 if (salp==NULL)
4425 return OM_MSG_RET_ERROR;
4426 entityID = ompcp->input_entityID;
4427 if (entityID < 1)
4428 return OM_MSG_RET_ERROR;
4429
4430 ConsolidateSegmentsOverKnownLengthGaps (salp);
4431
4432 ObjMgrSetDirtyFlag (entityID, TRUE);
4433
4434 ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
4435
4436 return OM_MSG_RET_OK;
4437 }
4438
4439
4440 /* NOTE - need to call DeleteMarkedObjects after calling this function */
4441 extern void FixOneAlignmentOverGaps (SeqAlignPtr salp, Uint2 entityID)
4442 {
4443 SeqAnnotPtr sanp_old, sanp_new;
4444 SeqAlignPtr salp_copy, salp_tmp;
4445 BioseqSetPtr bssp = NULL;
4446
4447 sanp_old = GetSeqAnnotForAlignment (salp);
4448 if (sanp_old == NULL)
4449 {
4450 return;
4451 }
4452
4453 if (sanp_old->idx.parenttype == OBJ_BIOSEQSET)
4454 {
4455 bssp = (BioseqSetPtr) sanp_old->idx.parentptr;
4456 }
4457
4458 /* make a copy of the alignment */
4459 salp_copy = (SeqAlignPtr) AsnIoMemCopy (salp, (AsnReadFunc) SeqAlignAsnRead,
4460 (AsnWriteFunc) SeqAlignAsnWrite);
4461 /* the copy function doesn't copy the index, so the alignment must be
4462 * indexed now.
4463 */
4464 AlnMgr2IndexSeqAlignEx(salp_copy, FALSE);
4465 /* break the alignment at gaps, if the set contains any delta sequences */
4466 salp_copy = MakeDiscontiguousAlignments (salp_copy);
4467
4468 for (salp_tmp = salp_copy; salp_tmp != NULL; salp_tmp = salp_tmp->next)
4469 {
4470 NoMoreSegGapForOneAlignment (salp_tmp, NULL);
4471 }
4472
4473 /* break the chain of alignments and create separate SeqAnnots for each one */
4474 CreateSeqAnnotsForDiscontiguousAlignments (salp_copy, entityID, bssp);
4475
4476 /* need to index the alignments before we can find the new alignment and remove the old */
4477 ObjMgrSetDirtyFlag (entityID, TRUE);
4478 ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
4479
4480 sanp_new = GetSeqAnnotForAlignment (salp_copy);
4481
4482 if (sanp_new != NULL)
4483 {
4484 salp->idx.deleteme = TRUE;
4485 }
4486 }
4487
4488 static Int2 LIBCALLBACK FixAlignmentOverGaps (Pointer data)
4489
4490 {
4491 OMProcControlPtr ompcp;
4492 SeqAlignPtr salp=NULL;
4493 Uint2 entityID;
4494 SeqAnnotPtr sanp_old;
4495
4496 ompcp = (OMProcControlPtr) data;
4497 if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
4498
4499 if (ompcp->input_data == NULL) return OM_MSG_RET_ERROR;
4500
4501 switch(ompcp->input_itemtype)
4502 {
4503 case OBJ_SEQALIGN :
4504 salp = (SeqAlignPtr) ompcp->input_data;
4505 break;
4506 case OBJ_SEQANNOT:
4507 sanp_old = (SeqAnnotPtr) ompcp->input_data;
4508 if (sanp_old->type == 2)
4509 {
4510 salp = sanp_old->data;
4511 }
4512 break;
4513 default :
4514 return OM_MSG_RET_ERROR;
4515 }
4516 if (salp==NULL)
4517 return OM_MSG_RET_ERROR;
4518 entityID = ompcp->input_entityID;
4519 if (entityID < 1)
4520 return OM_MSG_RET_ERROR;
4521
4522 FixOneAlignmentOverGaps (salp, entityID);
4523 DeleteMarkedObjects (entityID, 0, NULL);
4524
4525 ObjMgrSetDirtyFlag (entityID, TRUE);
4526
4527 ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
4528
4529 return OM_MSG_RET_OK;
4530 }
4531
4532
4533 typedef struct sqn_bsp {
4534 BioseqPtr bsp;
4535 struct sqn_bsp PNTR next;
4536 } SQNBsp, PNTR SQNBspPtr;
4537
4538 static void SQNGetBioseqsProt(SeqEntryPtr sep, Pointer userdata, Int4 index, Int2 indent)
4539 {
4540 BioseqPtr bsp = NULL;
4541 SQNBspPtr sbp;
4542
4543 if (sep == NULL || sep->data.ptrvalue == NULL || userdata == NULL)
4544 return;
4545 sbp = (SQNBspPtr)userdata;
4546 if (IS_Bioseq(sep))
4547 bsp = (BioseqPtr)(sep->data.ptrvalue);
4548 if (IS_Bioseq(sep) && ISA_aa(bsp->mol))
4549 {
4550 if (sbp->bsp == NULL)
4551 sbp->bsp = (BioseqPtr)(sep->data.ptrvalue);
4552 else
4553 {
4554 while (sbp->next != NULL)
4555 {
4556 sbp = sbp->next;
4557 }
4558 sbp->next = (SQNBspPtr)MemNew(sizeof(SQNBsp));
4559 sbp->next->bsp = (BioseqPtr)(sep->data.ptrvalue);
4560 }
4561 }
4562 }
4563
4564 typedef struct masterseqform
4565 {
4566 Boolean listBoxAccept;
4567 Boolean listBoxUp;
4568 DialoG seq_dlg;
4569 GrouP flipFeatGrp;
4570 } MasterSeqFormData, PNTR MasterSeqFormPtr;
4571
4572 static void AcceptMasterSeqMessage (ButtoN b)
4573
4574 {
4575 MasterSeqFormPtr mp;
4576
4577 mp = (MasterSeqFormPtr) GetObjectExtra (b);
4578 if (mp == NULL) return;
4579 mp->listBoxAccept = TRUE;
4580 mp->listBoxUp = FALSE;
4581 }
4582
4583 static void CancelMasterSeqMessage (ButtoN b)
4584
4585 {
4586 MasterSeqFormPtr mp;
4587
4588 mp = (MasterSeqFormPtr) GetObjectExtra (b);
4589 if (mp == NULL) return;
4590 mp->listBoxAccept = FALSE;
4591 mp->listBoxUp = FALSE;
4592 }
4593
4594 static void EnableMasterSeqChoice (GrouP g)
4595 {
4596 MasterSeqFormPtr mp;
4597
4598 mp = (MasterSeqFormPtr) GetObjectExtra (g);
4599 if (mp == NULL) return;
4600 if (GetValue (g) == 1)
4601 {
4602 Enable (mp->seq_dlg);
4603 Enable (mp->flipFeatGrp);
4604 }
4605 else
4606 {
4607 Disable (mp->seq_dlg);
4608 Disable (mp->flipFeatGrp);
4609 }
4610 }
4611
4612 static CharPtr SeqNameFromValNodeProc (ValNodePtr vnp)
4613 {
4614 CharPtr str;
4615 Int4 buf_size = 41;
4616 SeqIdPtr sip;
4617
4618 if (vnp == NULL || vnp->data.ptrvalue == NULL)
4619 {
4620 return NULL;
4621 }
4622
4623 str = (CharPtr) MemNew (buf_size * sizeof (Char));
4624 if (str != NULL)
4625 {
4626 sip = (SeqIdPtr) vnp->data.ptrvalue;
4627 SeqIdWrite (sip, str, PRINTID_REPORT, buf_size - 1);
4628 }
4629 return str;
4630 }
4631
4632 static void FreeSeqIdValNode (ValNodePtr vnp)
4633 {
4634 SeqIdPtr sip;
4635 if (vnp != NULL)
4636 {
4637 sip = (SeqIdPtr) vnp->data.ptrvalue;
4638 SeqIdFree (sip);
4639 vnp->data.ptrvalue = NULL;
4640 }
4641 }
4642
4643
4644 static ValNodePtr CopySeqIDValNode (ValNodePtr vnp)
4645 {
4646 ValNodePtr vnp_new;
4647
4648 vnp_new = ValNodeNew (NULL);
4649 if (vnp_new != NULL)
4650 {
4651 vnp_new->choice = vnp->choice;
4652 vnp_new->data.ptrvalue = SeqIdDup ((SeqIdPtr)vnp->data.ptrvalue);
4653 }
4654 return vnp_new;
4655 }
4656
4657 static Boolean MatchSeqIDValNode (ValNodePtr vnp1, ValNodePtr vnp2)
4658 {
4659 if (vnp1 == NULL && vnp2 == NULL)
4660 {
4661 return TRUE;
4662 }
4663 else if (vnp1 == NULL || vnp2 == NULL)
4664 {
4665 return FALSE;
4666 }
4667 else if (SeqIdComp ((SeqIdPtr) vnp1->data.ptrvalue,
4668 (SeqIdPtr) vnp1->data.ptrvalue) == SIC_YES)
4669 {
4670 return TRUE;
4671 }
4672 else
4673 {
4674 return FALSE;
4675 }
4676
4677 }
4678
4679
4680 static Boolean GetMasterStrandSeq
4681 (ValNodePtr good_list, BioseqPtr PNTR master_bsp, BoolPtr flip_for_aln, BoolPtr flip_feat)
4682
4683 {
4684 GrouP c;
4685 GrouP g;
4686 PrompT ppt1, ppt2, ppt3, ppt4;
4687 WindoW w;
4688 MasterSeqFormData md;
4689 ButtoN b;
4690 GrouP flipGrp;
4691 ValNodePtr vnp;
4692
4693 if (good_list == NULL || master_bsp == NULL || flip_for_aln == NULL || flip_feat == NULL)
4694 {
4695 return FALSE;
4696 }
4697 md.listBoxUp = TRUE;
4698 md.listBoxAccept = FALSE;
4699 w = ModalWindow (-50, -20, -20, -20, NULL);
4700 if (w != NULL) {
4701 g = HiddenGroup (w, -1, 0, NULL);
4702 SetGroupSpacing (g, 10, 10);
4703 ppt1 = StaticPrompt (g, "The alignment you are attempting to create contains mixed strands.",
4704 0, 0, programFont, 'c');
4705 ppt2 = StaticPrompt (g, "Do you wish to reverse complement the sequences to form the alignment?",
4706 0, 0, programFont, 'c');
4707 flipGrp = HiddenGroup (g, 2, 0, EnableMasterSeqChoice);
4708 SetObjectExtra (flipGrp, &md, NULL);
4709 RadioButton (flipGrp, "Yes");
4710 RadioButton (flipGrp, "No");
4711 SetValue (flipGrp, 2);
4712
4713 ppt3 = StaticPrompt (g, "Reverse the strands for the features as well?", 0, 0, programFont, 'c');
4714 md.flipFeatGrp = HiddenGroup (g, 2, 0, NULL);
4715 RadioButton (md.flipFeatGrp, "Yes");
4716 RadioButton (md.flipFeatGrp, "No");
4717 SetValue (md.flipFeatGrp, 1);
4718 Disable (md.flipFeatGrp);
4719
4720 ppt4 = StaticPrompt (g, "Choose a sequence with the correct strand", 0, 0, programFont, 'c');
4721 md.seq_dlg = ValNodeSelectionDialog (g, good_list, 8,
4722 SeqNameFromValNodeProc,
4723 FreeSeqIdValNode,
4724 CopySeqIDValNode,
4725 MatchSeqIDValNode,
4726 "sequence",
4727 NULL, NULL, FALSE);
4728 Disable (md.seq_dlg);
4729 c = HiddenGroup (g, 2, 0, NULL);
4730 b = DefaultButton (c, "Accept", AcceptMasterSeqMessage);
4731 SetObjectExtra (b, &md, NULL);
4732 b = PushButton (c, "Cancel", CancelMasterSeqMessage);
4733 SetObjectExtra (b, &md, NULL);
4734 AlignObjects (ALIGN_CENTER, (HANDLE) ppt1, (HANDLE) ppt2, (HANDLE) flipGrp,
4735 (HANDLE) ppt3, (HANDLE) md.flipFeatGrp,
4736 (HANDLE) ppt4, (HANDLE) md.seq_dlg,
4737 (HANDLE) c, NULL);
4738 RealizeWindow (w);
4739 Show (w);
4740 Select (w);
4741 while (md.listBoxUp) {
4742 ProcessExternalEvent ();
4743 Update ();
4744 }
4745 ProcessAnEvent ();
4746
4747 if (GetValue (flipGrp) == 1)
4748 {
4749 *flip_for_aln = TRUE;
4750 vnp = DialogToPointer (md.seq_dlg);
4751 if (vnp == NULL)
4752 {
4753 md.listBoxAccept = FALSE;
4754 }
4755 else
4756 {
4757 *master_bsp = BioseqFind ((SeqIdPtr)vnp->data.ptrvalue);
4758 }
4759 if (GetValue (md.flipFeatGrp) == 1)
4760 {
4761 *flip_feat = TRUE;
4762 }
4763 else
4764 {
4765 *flip_feat = FALSE;
4766 }
4767 }
4768 else
4769 {
4770 *flip_for_aln = FALSE;
4771 }
4772 Remove (w);
4773 }
4774 return md.listBoxAccept;
4775 }
4776
4777 static ValNodePtr FreeSequenceIDValNodeList (ValNodePtr seqlist)
4778 {
4779 ValNodePtr vnp;
4780 SeqIdPtr sip;
4781
4782 if (seqlist == NULL) return NULL;
4783 for (vnp = seqlist; vnp != NULL; vnp = vnp->next)
4784 {
4785 sip = (SeqIdPtr) vnp->data.ptrvalue;
4786 SeqIdFree (sip);
4787 }
4788 ValNodeFree (seqlist);
4789 return NULL;
4790 }
4791
4792 static void GetBadSequencesAndReversals
4793 (SQNBspPtr sbp,
4794 Uint2 entityID,
4795 Boolean use_new_blast,
4796 BoolPtr some_reversed,
4797 Int4Ptr num_seqs,
4798 ValNodePtr PNTR good_list_ptr,
4799 ValNodePtr PNTR bad_list_ptr)
4800 {
4801 Boolean revcomp = FALSE;
4802 SQNBspPtr sbp_idx;
4803 SeqAlignPtr salp;
4804 ValNodePtr bad_list = NULL, vnp;
4805 SeqIdPtr tmp_id;
4806 Boolean dirty;
4807 ValNodePtr good_list = NULL;
4808
4809 if (sbp == NULL) return;
4810 if (some_reversed != NULL)
4811 {
4812 *some_reversed = FALSE;
4813 }
4814
4815 if (num_seqs != NULL)
4816 {
4817 *num_seqs = 1;
4818 }
4819 for (sbp_idx = sbp->next; sbp_idx != NULL; sbp_idx = sbp_idx->next)
4820 {
4821 if (num_seqs != NULL)
4822 {
4823 (*num_seqs) ++;
4824 }
4825 revcomp = FALSE;
4826 if (use_new_blast)
4827 {
4828 salp = Sequin_GlobalAlign2Seq(sbp->bsp, sbp_idx->bsp, &revcomp);
4829 }
4830 else
4831 {
4832 salp = Sqn_GlobalAlign2Seq(sbp->bsp, sbp_idx->bsp, &revcomp);
4833 }
4834 if (salp == NULL || ! ValidateSeqAlign (salp, entityID, FALSE, FALSE, TRUE, FALSE, FALSE, &dirty))
4835 {
4836 tmp_id = SeqIdDup (SeqIdFindBest (sbp_idx->bsp->id, 0));
4837 if (tmp_id != NULL)
4838 {
4839 vnp = ValNodeNew (bad_list);
4840 if (vnp != NULL)
4841 {
4842 vnp->data.ptrvalue = tmp_id;
4843 }
4844 if (bad_list == NULL)
4845 {
4846 bad_list = vnp;
4847 }
4848 }
4849 }
4850 else
4851 {
4852 if (good_list == NULL)
4853 {
4854 /* add master sequence to good list */
4855 tmp_id = SeqIdDup (SeqIdFindBest (sbp->bsp->id, 0));
4856 ValNodeAddPointer (&good_list, 0, tmp_id);
4857 }
4858 tmp_id = SeqIdDup (SeqIdFindBest (sbp_idx->bsp->id, 0));
4859 if (tmp_id != NULL)
4860 {
4861 vnp = ValNodeNew (good_list);
4862 if (vnp != NULL)
4863 {
4864 vnp->data.ptrvalue = tmp_id;
4865 }
4866 if (good_list == NULL)
4867 {
4868 good_list = vnp;
4869 }
4870 }
4871 if (revcomp)
4872 {
4873 if (some_reversed != NULL)
4874 {
4875 *some_reversed = TRUE;
4876 }
4877 }
4878 }
4879 SeqAlignFree (salp);
4880 if (revcomp)
4881 {
4882 BioseqRevComp (sbp_idx->bsp);
4883 ReverseBioseqFeatureStrands (sbp_idx->bsp);
4884 }
4885 }
4886 if (good_list_ptr != NULL)
4887 {
4888 *good_list_ptr = good_list;
4889 }
4890 else
4891 {
4892 FreeSequenceIDValNodeList (good_list);
4893 }
4894 if (bad_list_ptr != NULL)
4895 {
4896 *bad_list_ptr = bad_list;
4897 }
4898 else
4899 {
4900 FreeSequenceIDValNodeList (bad_list);
4901 }
4902 }
4903
4904 static MsgAnswer ContinueWithBadSequences (ValNodePtr bad_list, Int4 num_seqs)
4905 {
4906 Int4 num_bad_seq;
4907 Char buf [41];
4908 CharPtr msg_start = "BLAST is unable to create valid pairwise "
4909 "alignments for the following sequences:\n";
4910 CharPtr msg_end = "Do you want to create this alignment without "
4911 "these sequences?";
4912 CharPtr msg;
4913 Int4 msg_len;
4914 ValNodePtr vnp;
4915 SeqIdPtr tmp_id;
4916 MsgAnswer ans = ANS_YES;
4917
4918 if (bad_list == NULL) return ANS_YES;
4919 num_bad_seq = ValNodeLen (bad_list);
4920 if (num_bad_seq == 0) return ANS_YES;
4921 if (num_bad_seq == num_seqs)
4922 {
4923 Message (MSG_ERROR, "BLAST is unable to create valid pairwise alignments "
4924 "for any of the sequences. No alignment will be created.");
4925 return ANS_NO;
4926 }
4927 msg_len = StringLen (msg_start) + StringLen (msg_end)
4928 + num_bad_seq * (sizeof (buf) + 2) + 3;
4929
4930 msg = (CharPtr) MemNew (msg_len * sizeof (Char));
4931 if (msg != NULL)
4932 {
4933 StringCat (msg, msg_start);
4934 for (vnp = bad_list; vnp != NULL; vnp = vnp->next)
4935 {
4936 tmp_id = (SeqIdPtr) vnp->data.ptrvalue;
4937 SeqIdWrite (tmp_id, buf, PRINTID_REPORT, sizeof (buf) - 1);
4938 StringCat (msg, buf);
4939 if (vnp->next != NULL)
4940 {
4941 StringCat (msg, ", ");
4942 }
4943 }
4944 StringCat (msg, msg_end);
4945 ans = Message (MSG_YN, msg);
4946 MemFree (msg);
4947 }
4948 return ans;
4949 }
4950
4951 /* In order to create alignments for segmented sets, first need to change
4952 * method of collecting bioseqs to collect one set per segment, then
4953 * need to to process each set individually.
4954 */
4955 static void FindSegSetsCallback (BioseqSetPtr bssp, Pointer userdata)
4956 {
4957 SeqEntryPtr sep;
4958 BioseqSetPtr segment_set = NULL;
4959 ValNodePtr vnp;
4960 ValNodePtr PNTR seg_list;
4961 SQNBspPtr sbp;
4962
4963 if (bssp == NULL || bssp->_class != BioseqseqSet_class_segset
4964 || userdata == NULL) return;
4965
4966 for (sep = bssp->seq_set; sep != NULL && segment_set == NULL; sep = sep->next)
4967 {
4968 if (IS_Bioseq_set (sep))
4969 {
4970 segment_set = sep->data.ptrvalue;
4971 if (segment_set != NULL && segment_set->_class != BioseqseqSet_class_parts)
4972 {
4973 segment_set = NULL;
4974 }
4975 }
4976 }
4977 if (segment_set != NULL)
4978 {
4979 seg_list = (ValNodePtr PNTR) userdata;
4980 vnp = *seg_list;
4981 for (sep = segment_set->seq_set; sep != NULL; sep = sep->next)
4982 {
4983 if (!IS_Bioseq (sep)) continue;
4984 if (vnp == NULL)
4985 {
4986 vnp = ValNodeNew (*seg_list);
4987 if (*seg_list == NULL)
4988 {
4989 *seg_list = vnp;
4990 }
4991 sbp = MemNew (sizeof (SQNBsp));
4992 sbp->bsp = (BioseqPtr)sep->data.ptrvalue;
4993 sbp->next = NULL;
4994 vnp->data.ptrvalue = sbp;
4995 }
4996 else
4997 {
4998 sbp = (SQNBspPtr) vnp->data.ptrvalue;
4999 if (sbp == NULL)
5000 {
5001 sbp = MemNew (sizeof (SQNBsp));
5002 sbp->bsp = (BioseqPtr)sep->data.ptrvalue;
5003 sbp->next = NULL;
5004 vnp->data.ptrvalue = sbp;
5005 }
5006 else
5007 {
5008 while (sbp->next != NULL)
5009 {
5010 sbp = sbp->next;
5011 }
5012 sbp->next = (SQNBspPtr)MemNew(sizeof(SQNBsp));
5013 sbp->next->bsp = (BioseqPtr)(sep->data.ptrvalue);
5014 }
5015 }
5016 vnp = vnp->next;
5017 }
5018 }
5019 }
5020
5021 static void FindNucBioseqsCallback (BioseqPtr bsp, Pointer userdata)
5022 {
5023 ValNodePtr PNTR seg_list;
5024 ValNodePtr vnp;
5025 SQNBspPtr sbp;
5026
5027 if (bsp == NULL || ! ISA_na(bsp->mol) || userdata == NULL)
5028 return;
5029
5030 seg_list = (ValNodePtr PNTR) userdata;
5031 if (*seg_list == NULL)
5032 {
5033 vnp = ValNodeNew (*seg_list);
5034 if (vnp == NULL) return;
5035 *seg_list = vnp;
5036 }
5037 else
5038 {
5039 vnp = *seg_list;
5040 }
5041 sbp = (SQNBspPtr)vnp->data.ptrvalue;
5042 if (sbp == NULL)
5043 {
5044 sbp = MemNew (sizeof (SQNBsp));
5045 if (sbp == NULL) return;
5046 sbp->bsp = bsp;
5047 sbp->next = NULL;
5048 vnp->data.ptrvalue = sbp;
5049 }
5050 else
5051 {
5052 while (sbp->next != NULL)
5053 {
5054 sbp = sbp->next;
5055 }
5056 sbp->next = (SQNBspPtr)MemNew(sizeof(SQNBsp));
5057 sbp->next->bsp = bsp;
5058 }
5059 }
5060
5061 static ValNodePtr GetAlignmentSegmentsList (SeqEntryPtr sep)
5062 {
5063 ValNodePtr seg_list = NULL;
5064 if (sep == NULL) return NULL;
5065
5066 VisitSetsInSep (sep, &seg_list, FindSegSetsCallback);
5067 if (seg_list == NULL)
5068 {
5069 VisitBioseqsInSep (sep, &seg_list, FindNucBioseqsCallback);
5070 }
5071 return seg_list;
5072 }
5073
5074 /* This function finds the zero-based index of the single sequence in an alignment
5075 * that extends past the left side of an alignment (like a sore thumb).
5076 */
5077 static Int4 FindAlignmentLeftThumb (SeqAlignPtr salp)
5078 {
5079 DenseSegPtr dsp;
5080 Int4 k;
5081 Int4 sore_thumb = -1; /* because the sequence in question sticks out... */
5082 BioseqPtr bsp;
5083 SeqIdPtr sip;
5084
5085 if (salp == NULL || salp->segtype != SAS_DENSEG || salp->segs == NULL)
5086 {
5087 return -1;
5088 }
5089
5090 /* we need to examine the alignment, to see if there is a single sequence
5091 * that extends before the beginning of the alignment or past the end of
5092 * the alignment, so that we can insert additional segments with gaps for
5093 * all other sequences.
5094 */
5095
5096 dsp = (DenseSegPtr) salp->segs;
5097
5098 /* check left end of alignment */
5099 for (k = 0, sip = dsp->ids; k < dsp->dim; k++, sip = sip->next)
5100 {
5101 if (dsp->strands [k] == Seq_strand_minus)
5102 {
5103 bsp = BioseqFind (sip);
5104 if (bsp != NULL
5105 && dsp->starts [k] + dsp->lens [0] < bsp->length
5106 && dsp->starts [k] > -1)
5107 {
5108 if (sore_thumb != -1)
5109 {
5110 /* can only do this when only one sequence extends past the end */
5111 return -1;
5112 }
5113 else
5114 {
5115 sore_thumb = k;
5116 }
5117 }
5118 }
5119 else if (dsp->starts [k] > 0)
5120 {
5121 if (sore_thumb != -1)
5122 {
5123 /* can only do this when only one sequence extends past the end */
5124 return -1;
5125 }
5126 else
5127 {
5128 sore_thumb = k;
5129 }
5130 }
5131 }
5132
5133 return sore_thumb;
5134 }
5135
5136 /* This function finds the zero-based index of the single sequence in an alignment
5137 * that extends past the right side of an alignment (like a sore thumb).
5138 */
5139 static Int4 FindAlignmentRightThumb (SeqAlignPtr salp)
5140 {
5141 DenseSegPtr dsp;
5142 Int4 k, start_index;
5143 Int4 sore_thumb = -1; /* because the sequence in question sticks out... */
5144 BioseqPtr bsp;
5145 SeqIdPtr sip;
5146
5147 if (salp == NULL || salp->segtype != SAS_DENSEG || salp->segs == NULL)
5148 {
5149 return -1;
5150 }
5151
5152 /* we need to examine the alignment, to see if there is a single sequence
5153 * that extends before the beginning of the alignment or past the end of
5154 * the alignment, so that we can insert additional segments with gaps for
5155 * all other sequences.
5156 */
5157
5158 dsp = (DenseSegPtr) salp->segs;
5159
5160 /* check right end of alignment */
5161 for (k = 0, sip = dsp->ids; k < dsp->dim; k++, sip = sip->next)
5162 {
5163 start_index = (dsp->dim * (dsp->numseg - 1)) + k ;
5164 if (dsp->strands [start_index] == Seq_strand_minus)
5165 {
5166 if (dsp->starts [start_index] > 0)
5167 {
5168 if (sore_thumb != -1)
5169 {
5170 /* can only do this when only one sequence extends past the end */
5171 return -1;
5172 }
5173 else
5174 {
5175 sore_thumb = k;
5176 }
5177 }
5178 }
5179 else
5180 {
5181 bsp = BioseqFind (sip);
5182 if (bsp != NULL
5183 && dsp->starts [start_index] > -1
5184 && dsp->starts [start_index] + dsp->lens [dsp->numseg - 1] < bsp->length)
5185 {
5186 if (sore_thumb != -1)
5187 {
5188 /* can only do this when only one sequence extends past the end */
5189 return -1;
5190 }
5191 else
5192 {
5193 sore_thumb = k;
5194 }
5195 }
5196 }
5197 }
5198
5199 return sore_thumb;
5200 }
5201
5202 /* This function looks for a single sequence extending past the left side of the
5203 * alignment and/or a single sequence extending past the right side of the alignment
5204 * and extends the alignment to cover these sequences, with gaps in the alignment for
5205 * all of the other sequences.
5206 * The function is unable to extend the alignment if more than one sequence extends
5207 * past the end of the alignment on that side.
5208 */
5209 static void FixAlignmentEndStubs (SeqAlignPtr salp)
5210 {
5211 DenseSegPtr dsp, dsp_new;
5212 Int4 k;
5213 Int4 left_thumb, right_thumb; /* because the sequence in question sticks out... */
5214 BioseqPtr bsp;
5215 Int4 extra_segs = 0, seg_offset = 0;
5216 SeqIdPtr sip;
5217 Int4 new_index, old_index;
5218 Int4 thumb_len;
5219
5220 if (salp == NULL || salp->segtype != SAS_DENSEG || salp->segs == NULL)
5221 {
5222 return;
5223 }
5224
5225 /* we need to examine the alignment, to see if there is a single sequence
5226 * that extends before the beginning of the alignment or past the end of
5227 * the alignment, so that we can insert additional segments with gaps for
5228 * all other sequences.
5229 */
5230
5231 dsp = (DenseSegPtr) salp->segs;
5232
5233 left_thumb = FindAlignmentLeftThumb (salp);
5234 right_thumb = FindAlignmentRightThumb (salp);
5235
5236 if (left_thumb == -1 && right_thumb == 1)
5237 {
5238 return;
5239 }
5240
5241 if (left_thumb != -1)
5242 {
5243 extra_segs ++;
5244 }
5245 if (right_thumb != -1)
5246 {
5247 extra_segs ++;
5248 }
5249
5250 /* insert sequence for the thumb and gap for all of the other sequences at the
5251 * beginning of the alignment or end of the alignment.
5252 */
5253
5254 dsp_new = DenseSegNew();
5255 dsp_new->dim = dsp->dim;
5256 dsp_new->numseg = dsp->numseg + extra_segs;
5257 dsp_new->starts = (Int4Ptr) MemNew(dsp_new->dim * dsp_new->numseg * sizeof(Int4));
5258 dsp_new->lens = (Int4Ptr) MemNew(dsp_new->numseg * sizeof(Int4));
5259 dsp_new->strands = (Uint1Ptr) MemNew (dsp_new->dim * dsp_new->numseg * sizeof(Int4));
5260 dsp_new->ids = dsp->ids;
5261 dsp->ids = NULL;
5262
5263 if (left_thumb != -1)
5264 {
5265 for (k = 0, sip = dsp_new->ids; k < dsp_new->dim; k++, sip = sip->next)
5266 {
5267 if (k == left_thumb)
5268 {
5269 if (dsp->strands [k] == Seq_strand_minus)
5270 {
5271 bsp = BioseqFind (sip);
5272 if (bsp == NULL)
5273 {
5274 dsp->ids = dsp_new->ids;
5275 dsp_new->ids = NULL;
5276 DenseSegFree (dsp_new);
5277 return;
5278 }
5279 thumb_len = bsp->length - dsp->starts [k] - dsp->lens [0];
5280 dsp_new->starts [k] = bsp->length - thumb_len;
5281 dsp_new->lens [0] = thumb_len;
5282 dsp_new->strands [k] = Seq_strand_minus;
5283 }
5284 else
5285 {
5286 dsp_new->starts [k] = 0;
5287 dsp_new->lens [0] = dsp->starts [k];
5288 dsp_new->strands [k] = Seq_strand_plus;
5289 }
5290 }
5291 else
5292 {
5293 dsp_new->starts [k] = -1;
5294 /* keep strand consistent with first segment */
5295 dsp_new->strands [k] = dsp->strands [k];
5296 }
5297 }
5298 seg_offset ++;
5299 }
5300
5301 /* copy middle alignment starts and strands */
5302 for (k = 0; k < dsp->dim * dsp->numseg; k++)
5303 {
5304 dsp_new->starts [k + (dsp->dim * seg_offset)] = dsp->starts [k];
5305 dsp_new->strands [k + (dsp->dim * seg_offset)] = dsp->strands [k];
5306 }
5307 /* copy middle alignment lens */
5308 for (k = 0; k < dsp->numseg; k++)
5309 {
5310 dsp_new->lens [k + seg_offset] = dsp->lens [k];
5311 }
5312
5313 /* add final segment */
5314 if (right_thumb != -1)
5315 {
5316 for (k = 0, sip = dsp_new->ids; k < dsp->dim; k++, sip = sip->next)
5317 {
5318 new_index = dsp_new->dim * (dsp_new->numseg - 1) + k;
5319 old_index = dsp->dim * (dsp->numseg - 1) + k;
5320
5321 if (k == right_thumb)
5322 {
5323 if (dsp->strands [old_index] == Seq_strand_minus)
5324 {
5325 dsp_new->starts [new_index] = 0;
5326 dsp_new->strands [new_index] = Seq_strand_minus;
5327 dsp_new->lens [dsp_new->numseg - 1] = dsp->starts [old_index];
5328 }
5329 else
5330 {
5331 bsp = BioseqFind (sip);
5332 thumb_len = bsp->length - dsp->starts [old_index] - dsp->lens [dsp->numseg - 1];
5333 dsp_new->starts [new_index] = bsp->length - thumb_len;
5334 dsp_new->lens [dsp_new->numseg - 1] = thumb_len;
5335 dsp_new->strands [new_index] = Seq_strand_plus;
5336 }
5337 }
5338 else
5339 {
5340 dsp_new->starts [new_index] = -1;
5341 /* keep strands consistent */
5342 dsp_new->strands [new_index] = dsp->strands [old_index];
5343 }
5344 }
5345 }
5346
5347 /* free the old alignment */
5348 dsp = DenseSegFree(dsp);
5349
5350 /* replace it with the new alignment */
5351 salp->segs = (Pointer)(dsp_new);
5352
5353 /* reindex the alignment */
5354 SAIndex2Free2(salp->saip);
5355 salp->saip = NULL;
5356 AlnMgr2IndexSingleChildSeqAlign(salp);
5357 }
5358
5359 static Int2 CreateOneAlignment
5360 (SQNBspPtr sbp,
5361 Uint2 entityID,
5362 BioseqSetPtr bssp,
5363 Boolean use_new_blast,
5364 FILE *fp,
5365 BoolPtr errors_in_log)
5366 {
5367 BioseqPtr master_bsp;
5368 Boolean some_reversed;
5369 ValNodePtr bad_list, good_list;
5370 MsgAnswer continue_with_bad;
5371 Int4 num_seqs = 0;
5372 Boolean flip_for_aln = FALSE;
5373 Boolean flip_feat = FALSE;
5374 SeqAlignPtr salp, salp_head, salp_prev, salp_tmp, salp_mult;
5375 Boolean revcomp = FALSE;
5376 SeqIdPtr sip, sip1, sip2;
5377 Char buf [41];
5378 BioseqPtr bsp;
5379 SQNBspPtr sbp_prev;
5380 Boolean dirty;
5381 DenseSegPtr dsp;
5382 ValNodePtr vnp;
5383 Int4 num_reversed = 0;
5384
5385 if (sbp == NULL || errors_in_log == NULL) return OM_MSG_RET_ERROR;
5386
5387 master_bsp = sbp->bsp;
5388 GetBadSequencesAndReversals (sbp, entityID, use_new_blast, &some_reversed, &num_seqs, &good_list, &bad_list);
5389 continue_with_bad = ContinueWithBadSequences(bad_list, num_seqs);
5390 if (continue_with_bad != ANS_YES)
5391 {
5392 bad_list = FreeSequenceIDValNodeList (bad_list);
5393 return OM_MSG_RET_DONE;
5394 }
5395 if (some_reversed)
5396 {
5397 if (! GetMasterStrandSeq (good_list, &master_bsp, &flip_for_aln, &flip_feat))
5398 {
5399 bad_list = FreeSequenceIDValNodeList (bad_list);
5400 return OM_MSG_RET_ERROR;
5401 }
5402 }
5403
5404 if (bad_list != NULL)
5405 {
5406 fprintf (fp, "The following sequences were omitted from the alignment:\n");
5407 for (vnp = bad_list; vnp != NULL; vnp = vnp->next)
5408 {
5409 sip = (SeqIdPtr) vnp->data.ptrvalue;
5410 SeqIdWrite (sip, buf, PRINTID_REPORT, sizeof (buf) - 1);
5411 fprintf (fp, "%s\n", buf);
5412 }
5413 bad_list = FreeSequenceIDValNodeList (bad_list);
5414 *errors_in_log = TRUE;
5415 }
5416
5417 if (flip_for_aln && master_bsp != sbp->bsp)
5418 {
5419 /* we align the master with the top of the list - this will reverse the strandedness
5420 * of the top of the list if necessary.
5421 */
5422 revcomp = FALSE;
5423 if (use_new_blast)
5424 {
5425 salp = Sequin_GlobalAlign2Seq(master_bsp, sbp->bsp, &revcomp);
5426 }
5427 else
5428 {
5429 salp = Sqn_GlobalAlign2Seq(master_bsp, sbp->bsp, &revcomp);
5430 }
5431 SeqAlignFree (salp);
5432 if (revcomp)
5433 {
5434 if (!flip_feat)
5435 {
5436 ReverseBioseqFeatureStrands (sbp->bsp);
5437 }
5438 fprintf (fp, "The following sequences were reversed in order to "
5439 "construct the alignment:\n");
5440 sip = SeqIdFindBest(sbp->bsp->id, 0);
5441 SeqIdWrite (sip, buf, PRINTID_REPORT, sizeof (buf) - 1);
5442 fprintf (fp, "%s\n", buf);
5443 num_reversed ++;
5444 }
5445 }
5446
5447 bsp = sbp->bsp;
5448 sbp_prev = sbp;
5449 sbp = sbp->next;
5450 MemFree(sbp_prev);
5451 salp_head = salp_prev = NULL;
5452
5453
5454 num_seqs = 1;
5455 while (sbp != NULL)
5456 {
5457 if (ISA_na(sbp->bsp->mol))
5458 {
5459 sip1 = SeqIdDup(bsp->id);
5460 sip2 = SeqIdDup(sbp->bsp->id);
5461 revcomp = FALSE;
5462 if (use_new_blast)
5463 {
5464 salp = Sequin_GlobalAlign2Seq(bsp, sbp->bsp, &revcomp);
5465 }
5466 else
5467 {
5468 salp = Sqn_GlobalAlign2Seq(bsp, sbp->bsp, &revcomp);
5469 }
5470
5471 /* count the number of sequences we are trying to align */
5472 num_seqs ++;
5473
5474 if (salp != NULL
5475 && ! ValidateSeqAlign (salp, entityID, FALSE, FALSE, TRUE, FALSE, FALSE, &dirty))
5476 {
5477 /* if an alignment was created and the sequence was reversed to create the alignment,
5478 * but the new alignment wasn't valid, un-reverse the sequence and don't add the
5479 * sequence to the list of sequences reversed.
5480 */
5481 salp = SeqAlignFree (salp);
5482 if (revcomp)
5483 {
5484 BioseqRevComp (sbp->bsp);
5485 ReverseBioseqFeatureStrands (sbp->bsp);
5486 revcomp = FALSE;
5487 }
5488 }
5489
5490 if (salp != NULL)
5491 {
5492 if (revcomp) {
5493 if (flip_for_aln)
5494 {
5495 if (!flip_feat)
5496 {
5497 ReverseBioseqFeatureStrands (sbp->bsp);
5498 }
5499 if (num_reversed == 0)
5500 {
5501 fprintf (fp, "The following sequences were reversed in order to "
5502 "construct the alignment:\n");
5503 }
5504 sip = SeqIdFindBest(sbp->bsp->id, 0);
5505 SeqIdWrite (sip, buf, PRINTID_REPORT, sizeof (buf) - 1);
5506 fprintf (fp, "%s\n", buf);
5507 num_reversed ++;
5508 }
5509 else
5510 {
5511 /* un-reverse the sequence */
5512 BioseqRevComp (sbp->bsp);
5513 ReverseBioseqFeatureStrands (sbp->bsp);
5514 /* change the alignment to show the reversed strand */
5515 ReverseAlignmentStrand (salp, 2);
5516 }
5517 revcomp = FALSE;
5518 }
5519
5520 dsp = (DenseSegPtr)(salp->segs);
5521 SeqIdSetFree(dsp->ids);
5522 dsp->ids = sip1;
5523 dsp->ids->next = sip2;
5524 if (salp != NULL && salp_head != NULL) {
5525 salp_prev->next = salp;
5526 salp_prev = salp;
5527 } else if (salp != NULL) {
5528 salp_head = salp_prev = salp;
5529 }
5530 }
5531 }
5532 sbp_prev = sbp;
5533 sbp = sbp->next;
5534 MemFree(sbp_prev);
5535 }
5536 if (salp_head != NULL)
5537 {
5538 salp_tmp = salp_head;
5539 while (salp_tmp != NULL)
5540 {
5541 if (salp_tmp->saip != NULL)
5542 {
5543 SeqAlignIndexFree(salp_tmp->saip);
5544 salp_tmp->saip = NULL;
5545 }
5546 salp_tmp = salp_tmp->next;
5547 }
5548 AlnMgr2IndexSeqAlignEx(salp_head, FALSE);
5549 salp_mult = AlnMgr2GetSubAlign(salp_head, 0, -1, 0, TRUE);
5550 salp_mult->dim = AlnMgr2GetNumRows(salp_head);
5551 salp_mult->type = SAT_PARTIAL;
5552
5553 FixAlignmentEndStubs (salp_mult);
5554
5555 ValidateSeqAlign (salp_mult, entityID, TRUE, FALSE, TRUE, FALSE, FALSE, &dirty);
5556 SeqAlignSetFree(salp_head);
5557
5558 /* index multiple alignment */
5559 AlnMgr2IndexSeqAlignEx(salp_mult, FALSE);
5560
5561 /* break up alignment if it covers gaps of unknown length */
5562 salp_mult = MakeDiscontiguousAlignments (salp_mult);
5563
5564 } else
5565 salp_mult = NULL;
5566
5567 if (salp_mult != NULL)
5568 {
5569 /* create separate SeqAnnots for each alignment in the chain */
5570 CreateSeqAnnotsForDiscontiguousAlignments (salp_mult, entityID, bssp);
5571 ObjMgrSetDirtyFlag (entityID, TRUE);
5572 ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
5573 }
5574 if (num_reversed > 0)
5575 {
5576 fprintf (fp, "%d out of %d sequences were reversed.\n", num_reversed, num_seqs);
5577 *errors_in_log = TRUE;
5578 }
5579 return OM_MSG_RET_DONE;
5580 }
5581
5582 static void ReOrderOneListForMaster (ValNodePtr one_list, Int4 master_pos)
5583 {
5584 SQNBspPtr sbp, sbp_prev;
5585 Int4 seg_pos;
5586
5587 if (one_list == NULL || one_list->data.ptrvalue == NULL || master_pos < 2)
5588 {
5589 return;
5590 }
5591
5592 sbp = (SQNBspPtr) one_list->data.ptrvalue;
5593
5594 sbp_prev = sbp;
5595 sbp = sbp->next;
5596 seg_pos = 2;
5597 while (sbp != NULL && seg_pos != master_pos)
5598 {
5599 sbp_prev = sbp;
5600 sbp = sbp->next;
5601 seg_pos++;
5602 }
5603
5604 if (sbp != NULL)
5605 {
5606 sbp_prev->next = sbp->next;
5607 sbp->next = (SQNBspPtr) one_list->data.ptrvalue;
5608 one_list->data.ptrvalue = sbp;
5609 }
5610 }
5611
5612 static Boolean GetMasterSeq (ValNodePtr seg_aln_list)
5613
5614 {
5615 GrouP c;
5616 GrouP g;
5617 PrompT ppt1;
5618 WindoW w;
5619 MasterSeqFormData md;
5620 ButtoN b;
5621 ValNodePtr choice_name_list = NULL, vnp;
5622 SQNBspPtr sbp, seg_list;
5623 Char buf [41];
5624 Int4 master_pos;
5625 SeqIdPtr sip;
5626
5627 if (seg_aln_list == NULL || seg_aln_list->data.ptrvalue == NULL)
5628 {
5629 return FALSE;
5630 }
5631 md.listBoxUp = TRUE;
5632 md.listBoxAccept = FALSE;
5633 w = ModalWindow (-50, -20, -20, -20, NULL);
5634 if (w == NULL)
5635 {
5636 return FALSE;
5637 }
5638
5639 g = HiddenGroup (w, -1, 0, NULL);
5640 SetGroupSpacing (g, 10, 10);
5641 ppt1 = StaticPrompt (g, "Select the master sequence for this alignment.",
5642 0, 0, programFont, 'c');
5643
5644 seg_list = (SQNBspPtr) seg_aln_list->data.ptrvalue;
5645 for (sbp = seg_list; sbp != NULL; sbp = sbp->next)
5646 {
5647 sip = SeqIdFindBest (sbp->bsp->id, 0);
5648 SeqIdWrite (sip, buf, PRINTID_REPORT, sizeof (buf) - 1);
5649 ValNodeAddPointer (&choice_name_list, 0, StringSave (buf));
5650 }
5651 md.seq_dlg = SelectionDialog (g, NULL, NULL, FALSE, "sequence", choice_name_list, 8);
5652 choice_name_list = ValNodeFreeData (choice_name_list);
5653
5654 c = HiddenGroup (g, 2, 0, NULL);
5655 b = DefaultButton (c, "Accept", AcceptMasterSeqMessage);
5656 SetObjectExtra (b, &md, NULL);
5657 b = PushButton (c, "Cancel", CancelMasterSeqMessage);
5658 SetObjectExtra (b, &md, NULL);
5659 AlignObjects (ALIGN_CENTER, (HANDLE) ppt1, (HANDLE) md.seq_dlg,
5660 (HANDLE) c, NULL);
5661 RealizeWindow (w);
5662 Show (w);
5663 Select (w);
5664 while (md.listBoxUp) {
5665 ProcessExternalEvent ();
5666 Update ();
5667 }
5668 ProcessAnEvent ();
5669
5670 vnp = DialogToPointer (md.seq_dlg);
5671 if (vnp == NULL || vnp->data.intvalue == 0)
5672 {
5673 md.listBoxAccept = FALSE;
5674 }
5675 else
5676 {
5677 master_pos = vnp->data.intvalue;
5678 for (vnp = seg_aln_list; vnp != NULL; vnp = vnp->next)
5679 {
5680 ReOrderOneListForMaster (vnp, master_pos);
5681 }
5682 }
5683 Remove (w);
5684 return md.listBoxAccept;
5685 }
5686
5687
5688 extern Int2 AddSeqAlignForSeqEntry (SeqEntryPtr sep, Uint2 entityID, Boolean choose_master, Boolean use_new_blast)
5689 {
5690 SQNBspPtr sbp;
5691 Char path [PATH_MAX]; /* path for log of sequences reversed during alignment */
5692 FILE *fp; /* file pointer for sequence reverse log */
5693 Boolean errors_in_log = FALSE;
5694 ValNodePtr seg_aln_list, vnp;
5695 BioseqSetPtr bssp = NULL;
5696
5697 if (sep == NULL || sep->data.ptrvalue == NULL || !IS_Bioseq_set (sep)) return OM_MSG_RET_ERROR;
5698
5699 bssp = (BioseqSetPtr) sep->data.ptrvalue;
5700 seg_aln_list = GetAlignmentSegmentsList (sep);
5701
5702 if (choose_master)
5703 {
5704 if (!GetMasterSeq (seg_aln_list))
5705 {
5706 return OM_MSG_RET_ERROR;
5707 }
5708 }
5709
5710 TmpNam (path);
5711 fp = FileOpen (path, "wb");
5712 if (fp == NULL) return OM_MSG_RET_ERROR;
5713
5714 for (vnp = seg_aln_list; vnp != NULL; vnp = vnp->next)
5715 {
5716 sbp = vnp->data.ptrvalue;
5717 if (sbp == NULL) continue;
5718 CreateOneAlignment (sbp, entityID, bssp, use_new_blast, fp, &errors_in_log);
5719 }
5720
5721 FileClose (fp);
5722 if (errors_in_log > 0) {
5723 LaunchGeneralTextViewer (path, "Alignment Notes");
5724 }
5725 FileRemove (path);
5726 return OM_MSG_RET_DONE;
5727 }
5728
5729
5730 extern void GenerateSeqAlignMenuItem (IteM i)
5731 {
5732 BaseFormPtr bfp;
5733 BioseqSetPtr bssp;
5734 SeqEntryPtr sep;
5735
5736 #ifdef WIN_MAC
5737 bfp = currentFormDataPtr;
5738 #else
5739 bfp = GetObjectExtra (i);
5740 #endif
5741 if (bfp == NULL) return;
5742
5743 sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
5744 if (sep == NULL || !IS_Bioseq_set (sep)) {
5745 Message (MSG_ERROR, "This record does not have a top-levelset!");
5746 } else {
5747 bssp = FindTopLevelSetForDesktopFunction((BioseqSetPtr) sep->data.ptrvalue);
5748 sep = SeqMgrGetSeqEntryForData (bssp);
5749 AddSeqAlignForSeqEntry (sep, bfp->input_entityID, FALSE, TRUE);
5750 }
5751 }
5752
5753 static Int2 LIBCALLBACK
5754 GenerateSeqAlignFromSeqEntryMasterOption
5755 (Pointer data,
5756 Boolean choose_master,
5757 Boolean use_new_blast)
5758
5759 {
5760 OMProcControlPtr ompcp;
5761 SeqEntryPtr sep;
5762 BioseqSetPtr bssp = NULL;
5763 BioseqPtr bsp = NULL;
5764
5765 ompcp = (OMProcControlPtr) data;
5766 if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
5767 switch (ompcp->input_itemtype) {
5768 case OBJ_BIOSEQ :
5769 Message (MSG_ERROR, "Must select BioseqSet to create alignment");
5770 return OM_MSG_RET_ERROR;
5771 break;
5772 case OBJ_BIOSEQSET :
5773 bssp = (BioseqSetPtr) ompcp->input_data;
5774 if (bssp == NULL) {
5775 Message (MSG_ERROR, "Must select BioseqSet to create alignment");
5776 return OM_MSG_RET_ERROR;
5777 }
5778 break;
5779 case 0 :
5780 return OM_MSG_RET_ERROR;
5781 default :
5782 return OM_MSG_RET_ERROR;
5783 }
5784 if (ompcp->input_data == NULL) return OM_MSG_RET_ERROR;
5785 sep = SeqMgrGetSeqEntryForData (ompcp->input_data);
5786 if (sep == NULL) return OM_MSG_RET_ERROR;
5787
5788 return AddSeqAlignForSeqEntry (sep, ompcp->input_entityID, choose_master, use_new_blast);
5789 }
5790
5791 static Int2 LIBCALLBACK GenerateSeqAlignFromSeqEntry (Pointer data)
5792
5793 {
5794 return GenerateSeqAlignFromSeqEntryMasterOption (data, FALSE, FALSE);
5795 }
5796
5797 static Int2 LIBCALLBACK GenerateSeqAlignFromSeqEntryChooseMaster (Pointer data)
5798
5799 {
5800 return GenerateSeqAlignFromSeqEntryMasterOption (data, TRUE, FALSE);
5801 }
5802
5803 static Int2 LIBCALLBACK GenerateSeqAlignFromSeqEntryUseNewBlast (Pointer data)
5804
5805 {
5806 return GenerateSeqAlignFromSeqEntryMasterOption (data, FALSE, TRUE);
5807 }
5808
5809 static Int2 LIBCALLBACK GenerateSeqAlignFromSeqEntryChooseMasterUseNewBlast (Pointer data)
5810
5811 {
5812 return GenerateSeqAlignFromSeqEntryMasterOption (data, TRUE, TRUE);
5813 }
5814
5815 static Int2 LIBCALLBACK GenerateSeqAlignFromSeqEntryProtEx (Pointer data, Boolean use_new_blast)
5816
5817 {
5818 BioseqPtr bsp;
5819 BioseqSetPtr bssp;
5820 SeqAnnotPtr curr;
5821 OMProcControlPtr ompcp;
5822 SeqAlignPtr salp;
5823 SeqAlignPtr salp_head;
5824 SeqAlignPtr salp_mult;
5825 SeqAlignPtr salp_prev;
5826 SeqAlignPtr salp_tmp;
5827 SeqAnnotPtr sap;
5828 SeqAnnotPtr PNTR sapp;
5829 SQNBspPtr sbp;
5830 SQNBspPtr sbp_prev;
5831 SeqEntryPtr sep;
5832
5833 ompcp = (OMProcControlPtr) data;
5834 if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
5835 switch (ompcp->input_itemtype) {
5836 case OBJ_BIOSEQ :
5837 break;
5838 case OBJ_BIOSEQSET :
5839 break;
5840 case 0 :
5841 return OM_MSG_RET_ERROR;
5842 default :
5843 return OM_MSG_RET_ERROR;
5844 }
5845 if (ompcp->input_data == NULL) return OM_MSG_RET_ERROR;
5846 sep = SeqMgrGetSeqEntryForData (ompcp->input_data);
5847 if (sep == NULL) return OM_MSG_RET_ERROR;
5848 sbp = (SQNBspPtr)MemNew(sizeof(SQNBsp));
5849 SeqEntryExplore(sep, sbp, SQNGetBioseqsProt);
5850 bsp = sbp->bsp;
5851 sbp_prev = sbp;
5852 sbp = sbp->next;
5853 MemFree(sbp_prev);
5854 salp_head = salp_prev = NULL;
5855 while (sbp != NULL)
5856 {
5857 if (use_new_blast)
5858 {
5859 salp = Sequin_GlobalAlign2Seq(bsp, sbp->bsp, FALSE);
5860 }
5861 else
5862 {
5863 salp = Sqn_GlobalAlign2Seq(bsp, sbp->bsp, FALSE);
5864 }
5865 if (salp_head != NULL && salp != NULL)
5866 {
5867 salp_prev->next = salp;
5868 salp_prev = salp;
5869 } else if (salp != NULL)
5870 salp_head = salp_prev = salp;
5871 sbp_prev = sbp;
5872 sbp = sbp->next;
5873 MemFree(sbp_prev);
5874 }
5875 if (salp_head != NULL)
5876 {
5877 salp_tmp = salp_head;
5878 while (salp_tmp != NULL)
5879 {
5880 if (salp_tmp->saip != NULL)
5881 {
5882 SeqAlignIndexFree(salp_tmp->saip);
5883 salp_tmp->saip = NULL;
5884 }
5885 salp_tmp = salp_tmp->next;
5886 }
5887 AlnMgr2IndexSeqAlign(salp_head);
5888 salp_mult = AlnMgr2GetSubAlign(salp_head, 0, -1, 0, TRUE);
5889 salp_mult->dim = AlnMgr2GetNumRows(salp_head);
5890 salp_mult->type = SAT_PARTIAL;
5891 FixAlignmentEndStubs (salp_mult);
5892 SeqAlignSetFree(salp_head);
5893 sap = SeqAnnotForSeqAlign(salp_mult);
5894 } else
5895 sap = NULL;
5896 if (sap != NULL) {
5897
5898 sep = GetTopSeqEntryForEntityID (ompcp->input_entityID);
5899 if (sep != NULL && sep->data.ptrvalue != NULL) {
5900 sapp = NULL;
5901 if (IS_Bioseq (sep)) {
5902 bsp = (BioseqPtr) sep->data.ptrvalue;
5903 sapp = &(bsp->annot);
5904 } else if (IS_Bioseq_set (sep)) {
5905 bssp = (BioseqSetPtr) sep->data.ptrvalue;
5906 sapp = &(bssp->annot);
5907 }
5908 if (sapp != NULL) {
5909 if (*sapp != NULL) {
5910 curr = *sapp;
5911 while (curr->next != NULL) {
5912 curr = curr->next;
5913 }
5914 curr->next = sap;
5915 } else {
5916 *sapp = sap;
5917 }
5918 }
5919 ObjMgrSetDirtyFlag (ompcp->input_entityID, TRUE);
5920 ObjMgrSendMsg (OM_MSG_UPDATE, ompcp->input_entityID, 0, 0);
5921 }
5922 }
5923 return OM_MSG_RET_DONE;
5924 }
5925
5926
5927 static Int2 LIBCALLBACK GenerateSeqAlignFromSeqEntryProt (Pointer data)
5928 {
5929 return GenerateSeqAlignFromSeqEntryProtEx (data, FALSE);
5930 }
5931
5932 static Int2 LIBCALLBACK GenerateSeqAlignFromSeqEntryProtUseNewBlast (Pointer data)
5933 {
5934 return GenerateSeqAlignFromSeqEntryProtEx (data, TRUE);
5935 }
5936
5937 static Boolean RawSeqLaunchFunc (GatherContextPtr gcp)
5938
5939 {
5940 BioseqPtr bsp;
5941 Int2 handled;
5942
5943 if (gcp == NULL) return TRUE;
5944 bsp = (BioseqPtr) gcp->userdata;
5945 if (bsp == NULL) return TRUE;
5946 if (gcp->thistype == OBJ_BIOSEQ) {
5947 if (bsp == (BioseqPtr) gcp->thisitem) {
5948 WatchCursor ();
5949 handled = GatherProcLaunch (OMPROC_EDIT, FALSE, gcp->entityID, gcp->itemID,
5950 OBJ_BIOSEQ, 0, 0, OBJ_BIOSEQ, 0);
5951 ArrowCursor ();
5952 if (handled != OM_MSG_RET_DONE || handled == OM_MSG_RET_NOPROC) {
5953 /*
5954 Message (MSG_ERROR, "Unable to launch editor on sequence.");
5955 */
5956 }
5957 return FALSE;
5958 }
5959 }
5960 return TRUE;
5961 }
5962
5963 extern Int2 LIBCALLBACK BioseqSegEditFunc (Pointer data)
5964
5965 {
5966 BioseqPtr bsp;
5967 GatherScope gs;
5968 SeqIdPtr sip;
5969 SeqLocPtr slp = NULL;
5970 OMProcControlPtr ompcp;
5971
5972 ompcp = (OMProcControlPtr) data;
5973 slp = NULL;
5974 if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
5975 switch (ompcp->input_itemtype) {
5976 case OBJ_BIOSEQ_SEG :
5977 slp = (SeqLocPtr) ompcp->input_data;
5978 break;
5979 case 0 :
5980 return OM_MSG_RET_ERROR;
5981 default :
5982 return OM_MSG_RET_ERROR;
5983 }
5984 if (slp == NULL) return OM_MSG_RET_ERROR;
5985 sip = SeqLocId (slp);
5986 if (sip == NULL) return OM_MSG_RET_ERROR;
5987 bsp = BioseqFind (sip);
5988 if (bsp == NULL) return OM_MSG_RET_ERROR;
5989 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
5990 gs.seglevels = 1;
5991 gs.get_feats_location = TRUE;
5992 MemSet((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
5993 gs.ignore[OBJ_BIOSEQ] = FALSE;
5994 gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
5995 GatherEntity (ompcp->input_entityID, (Pointer) bsp, RawSeqLaunchFunc, &gs);
5996
5997 return OM_MSG_RET_DONE;
5998 }
5999
6000 static SeqLocPtr SeqLocCopyOne (SeqLocPtr slp)
6001 {
6002 SeqLocPtr slpnew, slptemp;
6003
6004 slptemp = slp->next;
6005 slp->next = NULL;
6006 slpnew = AsnIoMemCopy ((Pointer) slp, (AsnReadFunc) SeqLocAsnRead,
6007 (AsnWriteFunc) SeqLocAsnWrite);
6008 slp->next = slptemp;
6009 return slpnew;
6010 }
6011
6012 extern SeqFeatPtr SeqFeatCopy (SeqFeatPtr sfp)
6013 {
6014 SeqFeatPtr sfpnew;
6015
6016 sfpnew = AsnIoMemCopy ((Pointer) sfp, (AsnReadFunc) SeqFeatAsnRead,
6017 (AsnWriteFunc) SeqFeatAsnWrite);
6018 return sfpnew;
6019 }
6020
6021 static void
6022 SetExplodedProtein
6023 (BioseqPtr orig_prot,
6024 BioseqSetPtr nucprot_bssp,
6025 BioseqPtr nucbsp,
6026 SeqFeatPtr sfp,
6027 CharPtr prot_id_str,
6028 Int4 cum_offset)
6029 {
6030 SeqIdPtr prot_sip;
6031 BioseqPtr new_prot;
6032 SeqEntryPtr prot_sep, sep_last;
6033 Int4 frame_shift, prot_start, prot_stop;
6034 Int4 loc_len, adjusted_len = 0, prot_len;
6035 CdRegionPtr crp;
6036 ValNodePtr vnp;
6037 MolInfoPtr mip;
6038 Boolean partial5, partial3;
6039
6040 if (orig_prot == NULL || nucprot_bssp == NULL
6041 || sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION
6042 || StringHasNoText (prot_id_str))
6043 {
6044 return;
6045 }
6046
6047 crp = (CdRegionPtr) sfp->data.value.ptrvalue;
6048 if (crp == NULL)
6049 {
6050 return;
6051 }
6052
6053 prot_sip = MakeUniqueSeqID (prot_id_str);
6054 frame_shift = cum_offset % 3;
6055 if (frame_shift == 0
6056 || (crp->frame == 3 && frame_shift == 1)
6057 || (crp->frame == 2 && frame_shift == 2))
6058 {
6059 prot_start = cum_offset / 3;
6060 }
6061 else
6062 {
6063 prot_start = cum_offset / 3 + 1;
6064 }
6065 loc_len = SeqLocLen (sfp->location);
6066 if (crp->frame == 1 || crp->frame == 0)
6067 {
6068 adjusted_len = loc_len - frame_shift;
6069 }
6070 else if (crp->frame == 2)
6071 {
6072 adjusted_len = loc_len - frame_shift - 1;
6073 }
6074 else if (crp->frame == 3)
6075 {
6076 adjusted_len = loc_len - frame_shift - 2;
6077 }
6078 prot_len = adjusted_len / 3;
6079 if (adjusted_len % 3 == 2)
6080 {
6081 prot_len ++;
6082 }
6083 prot_stop = prot_start + prot_len - 1;
6084 if (prot_stop > orig_prot->length - 1)
6085 {
6086 prot_stop = orig_prot->length - 1;
6087 }
6088 new_prot = BioseqCopyEx (prot_sip, orig_prot,
6089 prot_start,
6090 prot_stop,
6091 Seq_strand_plus, TRUE);
6092 /* add to nuc-prot set */
6093 prot_sep = SeqEntryNew ();
6094 prot_sep->choice = 1;
6095 prot_sep->data.ptrvalue = new_prot;
6096 sep_last = nucprot_bssp->seq_set;
6097 while (sep_last != NULL && sep_last->next != NULL)
6098 {
6099 sep_last = sep_last->next;
6100 }
6101 if (sep_last == NULL)
6102 {
6103 nucprot_bssp->seq_set = prot_sep;
6104 }
6105 else
6106 {
6107 sep_last->next = prot_sep;
6108 }
6109
6110 SeqMgrAddToBioseqIndex (new_prot);
6111
6112 /* set partials */
6113 CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
6114 if (cum_offset > 0)
6115 {
6116 partial5 = TRUE;
6117 }
6118 if (prot_stop < orig_prot->length - 1)
6119 {
6120 partial3 = TRUE;
6121 }
6122 SetSeqLocPartial (sfp->location, partial5, partial3);
6123 sfp->partial = partial5 || partial3;
6124
6125 /* add MolInfo descriptor */
6126 vnp = SeqEntryGetSeqDescr (prot_sep, Seq_descr_molinfo, NULL);
6127 if (vnp == NULL) {
6128 vnp = CreateNewDescriptor (prot_sep, Seq_descr_molinfo);
6129 }
6130 if (vnp != NULL)
6131 {
6132 mip = (MolInfoPtr) vnp->data.ptrvalue;
6133 if (mip == NULL)
6134 {
6135 mip = MolInfoNew ();
6136 vnp->data.ptrvalue = (Pointer) mip;
6137 }
6138 if (mip != NULL) {
6139 mip->biomol = 8;
6140 mip->tech = 13;
6141 if (partial5 && partial3) {
6142 mip->completeness = 5;
6143 } else if (partial5) {
6144 mip->completeness = 3;
6145 } else if (partial3) {
6146 mip->completeness = 4;
6147 } else {
6148 mip->completeness = 0;
6149 }
6150 }
6151 }
6152
6153 /* make feature product point to new Bioseq */
6154 sfp->product = ValNodeNew (NULL);
6155 sfp->product->choice = SEQLOC_WHOLE;
6156 sfp->product->data.ptrvalue = prot_sip;
6157
6158 /* adjust frame */
6159 if (frame_shift != 0)
6160 {
6161 switch (crp->frame)
6162 {
6163 case 0:
6164 case 1:
6165 if (frame_shift == 1)
6166 {
6167 crp->frame = 3;
6168 }
6169 else if (frame_shift == 2)
6170 {
6171 crp->frame = 2;
6172 }
6173 break;
6174 case 2:
6175 if (frame_shift == 1)
6176 {
6177 crp->frame = 1;
6178 }
6179 else if (frame_shift == 2)
6180 {
6181 crp->frame = 3;
6182 }
6183 break;
6184 case 3:
6185 if (frame_shift == 1)
6186 {
6187 crp->frame = 2;
6188 }
6189 else if (frame_shift == 2)
6190 {
6191 crp->frame = 1;
6192 }
6193 break;
6194 }
6195 }
6196
6197 /* retranslate coding region */
6198 SeqEdTranslateOneCDS (sfp, nucbsp, nucbsp->idx.entityID, Sequin_GlobalAlign2Seq);
6199 }
6200
6201 static ObjectIdPtr ObjectIdFromString (CharPtr str)
6202 {
6203 CharPtr cp;
6204 ObjectIdPtr oip;
6205
6206 oip = ObjectIdNew ();
6207
6208 if (!StringHasNoText (str)) {
6209 cp = str;
6210 while (*cp != 0 && isdigit (*cp)) {
6211 cp++;
6212 }
6213 if (*cp == 0) {
6214 oip->id = atoi (str);
6215 } else {
6216 oip->str = StringSave (str);
6217 }
6218 }
6219 return oip;
6220 }
6221
6222
6223 static void IncrementObjectId (ObjectIdPtr oip)
6224 {
6225 Int4 len;
6226
6227 if (oip == NULL) {
6228 return;
6229 }
6230
6231 if (!StringHasNoText (oip->str)) {
6232 len = StringLen (oip->str);
6233 *(oip->str + len - 1) = *(oip->str + len - 1) + 1;
6234 } else {
6235 oip->id++;
6236 }
6237
6238 }
6239
6240
6241 static void DecrementObjectId (ObjectIdPtr oip)
6242 {
6243 Int4 len;
6244
6245 if (oip == NULL) {
6246 return;
6247 }
6248
6249 if (!StringHasNoText (oip->str)) {
6250 len = StringLen (oip->str);
6251 *(oip->str + len - 1) = *(oip->str + len - 1) - 1;
6252 } else {
6253 oip->id--;
6254 }
6255
6256 }
6257
6258
6259 static CharPtr ObjectIdLabel (ObjectIdPtr oip)
6260 {
6261 Char buf[15];
6262
6263 if (oip == NULL) {
6264 return NULL;
6265 }
6266 if (!StringHasNoText (oip->str)) {
6267 return StringSave (oip->str);
6268 } else {
6269 sprintf (buf, "%d", oip->id);
6270 return StringSave (buf);
6271 }
6272 }
6273
6274
6275 static Boolean ExplodeGroup (SeqEntryPtr sep, SeqFeatPtr sfp)
6276
6277 {
6278 SeqFeatPtr sfpnew, sfpold, sfplast;
6279 ImpFeatPtr ifp;
6280 SeqLocPtr slphead, slp;
6281 GBQualPtr gbq;
6282 ObjectIdPtr exon_count = NULL;
6283 BioseqPtr orig_prot = NULL, nucbsp;
6284 Int4 cum_offset = 0;
6285 Char prot_id_str [128];
6286 Char prot_id_str_prefix [255];
6287 SeqEntryPtr nucprot_sep = NULL;
6288 BioseqSetPtr nucprot_bssp = NULL;
6289 ObjMgrDataPtr omdptop;
6290 ObjMgrData omdata;
6291 Uint2 parenttype;
6292 Pointer parentptr;
6293 Int4 feat_num = 1;
6294
6295 if (sfp == NULL || sfp->location == NULL) return FALSE;
6296
6297 /* save the seqloc (chain) */
6298 slphead = sfp->location;
6299 slp = SeqLocFindNext (slphead, NULL);
6300 if (slp == NULL) return FALSE;
6301
6302 /* trash the loc info in the impfeat */
6303 if (sfp->data.choice == SEQFEAT_IMP) {
6304 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
6305 if (ifp != NULL) {
6306 ifp->loc = MemFree (ifp->loc);
6307 }
6308 }
6309
6310 /* if coding region, get copy of original protein, to use when
6311 * retranslating coding regions
6312 */
6313 if (sfp->data.choice == SEQFEAT_CDREGION && sfp->product != NULL)
6314 {
6315 orig_prot = BioseqFindFromSeqLoc (sfp->product);
6316 if (orig_prot != NULL)
6317 {
6318 nucbsp = BioseqFindFromSeqLoc (sfp->location);
6319 if (nucbsp != NULL)
6320 {
6321 nucprot_sep = GetBestTopParentForData (nucbsp->idx.entityID, nucbsp);
6322 if (nucprot_sep != NULL && IS_Bioseq_set (nucprot_sep))
6323 {
6324 nucprot_bssp = (BioseqSetPtr) nucprot_sep->data.ptrvalue;
6325 if (nucprot_bssp != NULL
6326 && nucprot_bssp->_class == BioseqseqSet_class_nuc_prot)
6327 {
6328 sfp->product = NULL;
6329 SeqIdWrite (SeqIdFindBest (orig_prot->id, SEQID_LOCAL), prot_id_str,
6330 PRINTID_REPORT, sizeof (prot_id_str) - 1);
6331 }
6332 }
6333 }
6334 if (nucprot_bssp == NULL)
6335 {
6336 orig_prot = NULL;
6337 }
6338 else
6339 {
6340 SaveSeqEntryObjMgrData (nucprot_sep, &omdptop, &omdata);
6341 GetSeqEntryParent (nucprot_sep, &parentptr, &parenttype);
6342 }
6343 }
6344 }
6345
6346
6347 /* orig sfp is copied then orig sfp data is replaced */
6348 sfplast = sfp->next;
6349 sfpnew = SeqFeatCopy (sfp);
6350
6351 /* if exon, increment /number qualifier */
6352 gbq = NULL;
6353 if (sfpnew != NULL && sfpnew->data.choice == SEQFEAT_IMP) {
6354 ifp = (ImpFeatPtr) sfpnew->data.value.ptrvalue;
6355 if (ifp != NULL) {
6356 if (StringICmp (ifp->key, "exon") == 0) {
6357 gbq = sfpnew->qual;
6358 while (gbq != NULL && StringICmp (gbq->qual, "number") != 0) {
6359 gbq = gbq->next;
6360 }
6361 if (gbq != NULL) {
6362 exon_count = ObjectIdFromString (gbq->val);
6363 }
6364 }
6365 }
6366 }
6367
6368 sfp->location = SeqLocCopyOne (slp);
6369 if (sfp->data.choice == SEQFEAT_CDREGION && orig_prot != NULL)
6370 {
6371 sprintf (prot_id_str_prefix, "%s%d", prot_id_str, feat_num);
6372 SetExplodedProtein (orig_prot, nucprot_bssp, nucbsp, sfp, prot_id_str_prefix, cum_offset);
6373 /* adjust cum_offset */
6374 cum_offset += SeqLocLen (sfp->location);
6375 feat_num++;
6376 }
6377 sfpold = sfp;
6378 slp = SeqLocFindNext (slphead, slp);
6379
6380 /* clone as many more as required */
6381 while (slp != NULL) {
6382 if (slp->choice != SEQLOC_NULL) {
6383 if (gbq != NULL) {
6384 gbq->val = MemFree (gbq->val);
6385 IncrementObjectId (exon_count);
6386 gbq->val = ObjectIdLabel (exon_count);
6387 }
6388 sfp = SeqFeatCopy (sfpnew);
6389 sfp->location = SeqLocFree (sfp->location);
6390 sfp->location = SeqLocCopyOne (slp);
6391 sfp->partial = CheckSeqLocForPartial (sfp->location, NULL, NULL);
6392
6393 if (sfp->data.choice == SEQFEAT_CDREGION && orig_prot != NULL)
6394 {
6395 sprintf (prot_id_str_prefix, "%s%d", prot_id_str, feat_num);
6396 SetExplodedProtein (orig_prot, nucprot_bssp, nucbsp, sfp, prot_id_str_prefix, cum_offset);
6397 /* adjust cum_offset */
6398 cum_offset += SeqLocLen (sfp->location);
6399 feat_num++;
6400 }
6401
6402 sfpold->next = sfp;
6403 sfpold = sfp;
6404 }
6405 slp = SeqLocFindNext (slphead, slp);
6406 }
6407 sfpold->next = sfplast;
6408 sfpnew = SeqFeatFree (sfpnew);
6409 slphead = SeqLocFree (slphead);
6410
6411 if (orig_prot != NULL)
6412 {
6413 /* mark orig_prot for deletion */
6414 orig_prot->idx.deleteme = TRUE;
6415
6416 /* relink nucprot set parent */
6417 SeqMgrLinkSeqEntry (nucprot_sep, parenttype, parentptr);
6418 RestoreSeqEntryObjMgrData (nucprot_sep, omdptop, &omdata);
6419 }
6420
6421 return TRUE;
6422 }
6423
6424 static Int2 LIBCALLBACK GroupExplodeFunc (Pointer data)
6425
6426 {
6427 OMProcControlPtr ompcp;
6428 SelStructPtr ssp;
6429 Boolean isDirty = FALSE;
6430 Boolean isFirstSsp;
6431 ExplodeStructPtr esp;
6432 ExplodeStructPtr firstEsp = NULL;
6433 ExplodeStructPtr lastEsp;
6434 Boolean isFirstEsp;
6435
6436 /* Check the parameter */
6437
6438 ompcp = (OMProcControlPtr) data;
6439 if (ompcp == NULL || ompcp->input_itemtype == 0)
6440 return OM_MSG_RET_ERROR;
6441
6442 /* Get the linked of list of selected items */
6443
6444 ssp = ObjMgrGetSelected();
6445
6446 /* Go through the list and save pointers */
6447 /* to the items themselves. */
6448
6449 isFirstEsp = TRUE;
6450 isFirstSsp = TRUE;
6451
6452 while (NULL != ssp) {
6453
6454 if (!isFirstSsp) {
6455 ompcp->input_entityID = ssp->entityID;
6456 ompcp->input_itemID = ssp->itemID;
6457 ompcp->input_itemtype = ssp->itemtype;
6458
6459 GatherDataForProc (ompcp, FALSE);
6460 }
6461
6462 switch (ssp->itemtype)
6463 {
6464 case OBJ_SEQFEAT:
6465
6466 esp = (ExplodeStructPtr) MemNew (sizeof (ExplodeStruct));
6467 esp->seqFeatPtr = (SeqFeatPtr) ompcp->input_data;
6468 esp->topSep = GetTopSeqEntryForEntityID (ssp->entityID);
6469
6470 if (isFirstEsp) {
6471 firstEsp = esp;
6472 isFirstEsp = FALSE;
6473 }
6474 else
6475 lastEsp->next = esp;
6476
6477 lastEsp = esp;
6478 lastEsp->next = NULL;
6479 break;
6480 default:
6481 break;
6482 }
6483
6484 isFirstSsp = FALSE;
6485 ssp = ssp->next;
6486 }
6487
6488 /* Loop through all the selected items */
6489 /* and explode each one. */
6490
6491 esp = firstEsp;
6492 while (NULL != esp) {
6493 if (ExplodeGroup (esp->topSep, esp->seqFeatPtr))
6494 isDirty = TRUE;
6495 esp = esp->next;
6496 }
6497
6498 /* If any actual exploding was done then */
6499 /* force an update to be done. */
6500
6501 if (isDirty)
6502 {
6503 /* remove any protein sequences that were marked for deletion */
6504 DeleteMarkedObjects (ompcp->input_entityID, 0, NULL);
6505
6506 ObjMgrSetDirtyFlag (ompcp->input_entityID, TRUE);
6507 ObjMgrSendMsg (OM_MSG_UPDATE, ompcp->input_entityID,
6508 ompcp->input_itemID, ompcp->input_itemtype);
6509 return OM_MSG_RET_DONE;
6510 }
6511 else
6512 return OM_MSG_RET_ERROR;
6513 }
6514
6515 extern void GroupExplodeToolBtn (ButtoN b)
6516 {
6517 BaseFormPtr bfp;
6518 SelStructPtr ssp;
6519 Boolean isDirty = FALSE;
6520 SeqEntryPtr sep;
6521 SeqFeatPtr sfp;
6522 SeqMgrFeatContext context;
6523
6524 bfp = (BaseFormPtr) GetObjectExtra (b);
6525 if (bfp == NULL) return;
6526
6527 ssp = ObjMgrGetSelected();
6528 while (NULL != ssp) {
6529 if (ssp->itemtype == OBJ_SEQFEAT)
6530 {
6531 sep = GetTopSeqEntryForEntityID (ssp->entityID);
6532
6533 sfp = SeqMgrGetDesiredFeature (ssp->entityID, NULL, ssp->itemID, 0, NULL, &context);
6534 if (sfp != NULL && ExplodeGroup (sep, sfp))
6535 {
6536 isDirty = TRUE;
6537 }
6538 }
6539 ssp = ssp->next;
6540 }
6541
6542 /* If any actual exploding was done then */
6543 /* force an update to be done. */
6544
6545 if (isDirty)
6546 {
6547 /* remove any protein sequences that were marked for deletion */
6548 DeleteMarkedObjects (bfp->input_entityID, 0, NULL);
6549 ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
6550 ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID,
6551 bfp->input_itemID, bfp->input_itemtype);
6552 }
6553 }
6554
6555
6556 static Boolean MakeExonsAndIntronsFromFeature (SeqEntryPtr sep, BioseqPtr bsp,
6557 SeqLocPtr location,
6558 SeqFeatPtr putafterhere,
6559 Boolean MakeIntrons,
6560 ObjectIdPtr first_exon_number)
6561
6562 {
6563 SeqFeatPtr curr;
6564 Boolean first;
6565 Int2 fuzz_from;
6566 Int2 fuzz_to;
6567 ImpFeatPtr ifp;
6568 Int4 last;
6569 SeqLocPtr next;
6570 SeqFeatPtr putbeforehere;
6571 SeqFeatPtr sfp;
6572 SeqLocPtr slp;
6573 Int4 start;
6574 Int4 stop;
6575 Uint1 strand;
6576 Int4 tmp;
6577 Boolean partial5, partial3;
6578 GBQualPtr gbqual;
6579 ObjectIdPtr part_number;
6580 ValNodePtr merge_to_parts_list = NULL;
6581 ValNodePtr vnp;
6582
6583 if (sep == NULL || bsp == NULL || location == NULL || putafterhere == NULL) return FALSE;
6584 putbeforehere = putafterhere->next;
6585 curr = putafterhere;
6586 slp = SeqLocFindNext (location, NULL);
6587 if (slp == NULL) return FALSE;
6588 first = TRUE;
6589 last = 0;
6590 part_number = ObjectIdDup (first_exon_number);
6591 while (slp != NULL) {
6592 CheckSeqLocForPartial (slp, &partial5, &partial3);
6593 next = SeqLocFindNext (location, slp);
6594 if (slp->choice != SEQLOC_NULL) {
6595 start = GetOffsetInBioseq (slp, bsp, SEQLOC_START);
6596 stop = GetOffsetInBioseq (slp, bsp, SEQLOC_STOP);
6597 strand = SeqLocStrand (slp);
6598 if (strand > Seq_strand_both_rev && strand != Seq_strand_other) {
6599 strand = Seq_strand_unknown;
6600 }
6601 fuzz_from = -1;
6602 fuzz_to = -1;
6603 if (start > stop) {
6604 tmp = start;
6605 start = stop;
6606 stop = tmp;
6607 }
6608 if (! first && MakeIntrons) {
6609 sfp = SeqFeatNew ();
6610 if (sfp != NULL) {
6611 sfp->data.choice = SEQFEAT_IMP;
6612 if (strand == Seq_strand_minus) {
6613 AddIntToSeqFeat (sfp, stop + 1, last - 1, bsp,
6614 fuzz_from, fuzz_to, strand);
6615 } else {
6616 AddIntToSeqFeat (sfp, last + 1, start - 1, bsp,
6617 fuzz_from, fuzz_to, strand);
6618 }
6619 ifp = ImpFeatNew ();
6620 if (ifp != NULL) {
6621 sfp->data.value.ptrvalue = (Pointer) ifp;
6622 ifp->key = StringSave ("intron");
6623 }
6624 gbqual = GBQualNew ();
6625 if (gbqual != NULL)
6626 {
6627 /* need to use the previous value */
6628 DecrementObjectId (part_number);
6629 gbqual->qual = StringSave ("number");
6630 gbqual->val = ObjectIdLabel (part_number);
6631 /* put back to original value */
6632 IncrementObjectId (part_number);
6633 gbqual->next = sfp->qual;
6634 sfp->qual = gbqual;
6635 }
6636 if (bsp->repr == Seq_repr_seg)
6637 {
6638 ValNodeAddPointer (&merge_to_parts_list, 0, sfp);
6639 }
6640 curr->next = sfp;
6641 curr = sfp;
6642 }
6643 }
6644 first = FALSE;
6645 if (strand == Seq_strand_minus) {
6646 last = start;
6647 } else {
6648 last = stop;
6649 }
6650 sfp = SeqFeatNew ();
6651 if (sfp != NULL) {
6652 sfp->data.choice = SEQFEAT_IMP;
6653 AddIntToSeqFeat (sfp, start, stop, bsp,
6654 fuzz_from, fuzz_to, strand);
6655 ifp = ImpFeatNew ();
6656 if (ifp != NULL) {
6657 sfp->data.value.ptrvalue = (Pointer) ifp;
6658 ifp->key = StringSave ("exon");
6659 }
6660 SetSeqLocPartial (sfp->location, partial5, partial3);
6661 gbqual = GBQualNew ();
6662 if (gbqual != NULL)
6663 {
6664 gbqual->qual = StringSave ("number");
6665 gbqual->val = ObjectIdLabel (part_number);
6666 gbqual->next = sfp->qual;
6667 sfp->qual = gbqual;
6668 }
6669 IncrementObjectId (part_number);
6670 if (bsp->repr == Seq_repr_seg)
6671 {
6672 ValNodeAddPointer (&merge_to_parts_list, 0, sfp);
6673 }
6674 curr->next = sfp;
6675 curr = sfp;
6676 }
6677 }
6678 slp = next;
6679 }
6680 curr->next = putbeforehere;
6681
6682 for (vnp = merge_to_parts_list; vnp != NULL; vnp = vnp->next)
6683 {
6684 sfp = vnp->data.ptrvalue;
6685 MergeFeatureIntervalsToParts (sfp, FALSE);
6686 }
6687 ValNodeFree (merge_to_parts_list);
6688
6689 part_number = ObjectIdFree (part_number);
6690
6691 return TRUE;
6692 }
6693
6694
6695 typedef struct makeexondata {
6696 FEATURE_FORM_BLOCK
6697
6698 ButtoN make_introns_button;
6699 TexT exon_number_field;
6700 ButtoN accept;
6701
6702 SeqEntryPtr sep;
6703 Boolean make_introns;
6704 ObjectIdPtr first_exon_number;
6705 Uint1 feature_type;
6706 } MakeExonData, PNTR MakeExonPtr;
6707
6708 static void MakeExonsFromFeatureIntervalsVisitFunc (SeqFeatPtr sfp, Pointer userdata)
6709 {
6710 MakeExonPtr mep;
6711 BioseqPtr bsp;
6712
6713 if (sfp == NULL || (mep = (MakeExonPtr) userdata) == NULL || sfp->idx.subtype != mep->feature_type)
6714 {
6715 return;
6716 }
6717
6718 mep = (MakeExonPtr) userdata;
6719 bsp = BioseqFindFromSeqLoc (sfp->location);
6720
6721 MakeExonsAndIntronsFromFeature (mep->sep, bsp, sfp->location, sfp,
6722 mep->make_introns, mep->first_exon_number);
6723
6724 }
6725
6726
6727 static void DoMakeExonsFromFeatureIntervals (ButtoN b)
6728 {
6729 MakeExonPtr mep;
6730 Char exon_number_str [256];
6731
6732 if (b == NULL || (mep = (MakeExonPtr) GetObjectExtra (b)) == NULL) return;
6733
6734 Hide (mep->form);
6735
6736 WatchCursor ();
6737 Update ();
6738
6739 mep->sep = GetTopSeqEntryForEntityID (mep->input_entityID);
6740 mep->make_introns = GetStatus (mep->make_introns_button);
6741 GetTitle (mep->exon_number_field,
6742 exon_number_str,
6743 sizeof (exon_number_str) - 1 );
6744
6745 mep->first_exon_number = ObjectIdFromString (exon_number_str);
6746
6747 VisitFeaturesInSep (mep->sep, mep,
6748 MakeExonsFromFeatureIntervalsVisitFunc);
6749 mep->first_exon_number = ObjectIdFree (mep->first_exon_number);
6750 ObjMgrSetDirtyFlag (mep->input_entityID, TRUE);
6751 ObjMgrSendMsg (OM_MSG_UPDATE, mep->input_entityID, 0, 0);
6752 ArrowCursor ();
6753 Update ();
6754 }
6755
6756 static void CheckExonNumberText (TexT number_field)
6757 {
6758 MakeExonPtr mep;
6759 Char exon_number_str [256];
6760
6761 if (number_field == NULL || (mep = (MakeExonPtr)GetObjectExtra (number_field)) == NULL) return;
6762 if (StringHasNoText (exon_number_str)) {
6763 Disable (mep->accept);
6764 } else {
6765 Enable (mep->accept);
6766 }
6767 }
6768
6769 static void CommonMakeExonsFromFeatureIntervals (
6770 IteM i,
6771 Boolean make_introns,
6772 Uint1 feature_type
6773 )
6774 {
6775 BaseFormPtr bfp;
6776 MakeExonPtr mep;
6777 WindoW w;
6778 GrouP h, p, c;
6779
6780 #ifdef WIN_MAC
6781 bfp = currentFormDataPtr;
6782 #else
6783 bfp = GetObjectExtra (i);
6784 #endif
6785
6786 if (bfp == NULL) return;
6787
6788 mep = MemNew (sizeof (MakeExonData));
6789 if (mep == NULL) return;
6790 mep->input_entityID = bfp->input_entityID;
6791 mep->feature_type = feature_type;
6792
6793 if (feature_type == FEATDEF_CDS)
6794 {
6795 w = FixedWindow (-50, -33, -10, -10, "Make Exons from CDS", NULL);
6796 }
6797 else if (feature_type == FEATDEF_mRNA)
6798 {
6799 w = FixedWindow (-50, -33, -10, -10, "Make Exons from mRNA", NULL);
6800 }
6801 else
6802 {
6803 w = FixedWindow (-50, -33, -10, -10, "Make Exons from Feature", NULL);
6804 }
6805
6806 SetObjectExtra (w, mep, StdCleanupFormProc);
6807 mep->form = (ForM) w;
6808
6809 h = HiddenGroup (w, -1, 0, NULL);
6810 SetGroupSpacing (h, 10, 10);
6811
6812 p = HiddenGroup (h, 2, 0, NULL);
6813 StaticPrompt (p, "First Exon Number", 0, 0, programFont, 'c');
6814 mep->exon_number_field = DialogText (p, "1", 3, CheckExonNumberText);
6815 SetObjectExtra (mep->exon_number_field, mep, NULL);
6816 mep->make_introns_button = CheckBox (p, "Make Introns", NULL);
6817
6818 c = HiddenGroup (h, 4, 0, NULL);
6819 mep->accept = DefaultButton (c, "Accept", DoMakeExonsFromFeatureIntervals);
6820 SetObjectExtra (mep->accept, mep, NULL);
6821 PushButton (c, "Cancel", StdCancelButtonProc);
6822 AlignObjects (ALIGN_CENTER, (HANDLE) p, (HANDLE) c, NULL);
6823 RealizeWindow (w);
6824 Show (w);
6825 Update ();
6826 }
6827
6828 extern void MakeExonsFromCDSIntervals (IteM i)
6829 {
6830 CommonMakeExonsFromFeatureIntervals (i, FALSE, FEATDEF_CDS);
6831 }
6832
6833 extern void MakeExonsFromMRNAIntervals (IteM i)
6834 {
6835 CommonMakeExonsFromFeatureIntervals (i, TRUE, FEATDEF_mRNA);
6836 }
6837
6838 static Int2 LIBCALLBACK MakeExonIntron (Pointer data)
6839
6840 {
6841 BioseqPtr nbsp;
6842 SeqEntryPtr nsep;
6843 OMProcControlPtr ompcp;
6844 SeqFeatPtr sfp;
6845 SeqEntryPtr sep;
6846 ObjectIdPtr oip;
6847 Int2 rval = OM_MSG_RET_ERROR;
6848
6849 ompcp = (OMProcControlPtr) data;
6850 if (ompcp == NULL || ompcp->input_itemtype == 0 || ompcp->input_data == NULL)
6851 return OM_MSG_RET_ERROR;
6852
6853 switch (ompcp->input_itemtype)
6854 {
6855 case OBJ_SEQFEAT:
6856 sfp = (SeqFeatPtr) ompcp->input_data;
6857 if (sfp->data.choice != SEQFEAT_CDREGION && sfp->data.choice != SEQFEAT_RNA) {
6858 return OM_MSG_RET_ERROR;
6859 }
6860 break;
6861 default:
6862 return OM_MSG_RET_ERROR;
6863 }
6864
6865 sep = GetBestTopParentForItemID (ompcp->input_entityID,
6866 ompcp->input_itemID,
6867 ompcp->input_itemtype);
6868 nsep = FindNucSeqEntry (sep);
6869 if (nsep == NULL || nsep->choice != 1) return OM_MSG_RET_ERROR;
6870 nbsp = (BioseqPtr) nsep->data.ptrvalue;
6871 if (nbsp == NULL) return OM_MSG_RET_ERROR;
6872
6873 oip = ObjectIdNew ();
6874 oip->id = 1;
6875 if (MakeExonsAndIntronsFromFeature (sep, nbsp, sfp->location, sfp, TRUE, oip))
6876 {
6877 ObjMgrSetDirtyFlag (ompcp->input_entityID, TRUE);
6878 ObjMgrSendMsg (OM_MSG_UPDATE, ompcp->input_entityID, ompcp->input_itemID,
6879 ompcp->input_itemtype);
6880 rval = OM_MSG_RET_DONE;
6881 }
6882 oip = ObjectIdFree (oip);
6883
6884 return rval;
6885 }
6886
6887 static Int2 LIBCALLBACK DetachBioseq (Pointer data)
6888
6889 {
6890 BioseqPtr bsp;
6891 BioseqSetPtr bssp;
6892 OMProcControlPtr ompcp;
6893 SeqEntryPtr sep;
6894
6895 ompcp = (OMProcControlPtr) data;
6896 if (ompcp == NULL || ompcp->input_itemtype == 0 || ompcp->input_data == NULL)
6897 return OM_MSG_RET_ERROR;
6898
6899 switch (ompcp->input_itemtype)
6900 {
6901 case OBJ_BIOSEQ:
6902 bsp = (BioseqPtr) ompcp->input_data;
6903 if (bsp != NULL) {
6904 sep = bsp->seqentry;
6905 if (sep != NULL) {
6906 sep->next = NULL;
6907 ObjMgrSetDirtyFlag (ompcp->input_entityID, TRUE);
6908 ObjMgrSendMsg (OM_MSG_UPDATE, ompcp->input_entityID, ompcp->input_itemID, ompcp->input_itemtype);
6909 return OM_MSG_RET_DONE;
6910 }
6911 }
6912 break;
6913 case OBJ_BIOSEQSET:
6914 bssp = (BioseqSetPtr) ompcp->input_data;
6915 if (bssp != NULL) {
6916 sep = bssp->seqentry;
6917 if (sep != NULL) {
6918 sep->next = NULL;
6919 ObjMgrSetDirtyFlag (ompcp->input_entityID, TRUE);
6920 ObjMgrSendMsg (OM_MSG_UPDATE, ompcp->input_entityID, ompcp->input_itemID, ompcp->input_itemtype);
6921 return OM_MSG_RET_DONE;
6922 }
6923 }
6924 break;
6925 default:
6926 return OM_MSG_RET_ERROR;
6927 }
6928
6929 return OM_MSG_RET_ERROR;
6930 }
6931
6932 static Int2 LIBCALLBACK ProtLocalIDtoGeneSyn (Pointer data)
6933
6934 {
6935 BioseqPtr bsp = NULL;
6936 Char buf [41];
6937 SeqMgrFeatContext ccontext;
6938 SeqFeatPtr cds;
6939 SeqMgrFeatContext gcontext;
6940 SeqFeatPtr gene;
6941 GeneRefPtr grp;
6942 OMProcControlPtr ompcp;
6943 BioseqPtr pbsp;
6944 SeqEntryPtr sep;
6945 SeqIdPtr sip;
6946
6947 ompcp = (OMProcControlPtr) data;
6948 if (ompcp == NULL || ompcp->input_itemtype == 0 || ompcp->input_data == NULL)
6949 return OM_MSG_RET_ERROR;
6950
6951 switch (ompcp->input_itemtype)
6952 {
6953 case OBJ_BIOSEQ:
6954 bsp = (BioseqPtr) ompcp->input_data;
6955 break;
6956 default:
6957 return OM_MSG_RET_ERROR;
6958 }
6959
6960 if (bsp == NULL) {
6961 return OM_MSG_RET_ERROR;
6962 }
6963 sep = SeqMgrGetSeqEntryForData (bsp);
6964 if (sep == NULL) {
6965 return OM_MSG_RET_ERROR;
6966 }
6967
6968 cds = NULL;
6969 while ((cds = SeqMgrGetNextFeature (bsp, cds, SEQFEAT_CDREGION, 0, &ccontext)) != NULL) {
6970 sip = SeqLocId (cds->product);
6971 if (sip != NULL) {
6972 pbsp = BioseqFind (sip);
6973 if (pbsp != NULL) {
6974 sip = SeqIdFindBest (pbsp->id, SEQID_LOCAL);
6975 if (sip != NULL) {
6976 SeqIdWrite (sip, buf, PRINTID_REPORT, sizeof (buf) - 1);
6977 grp = SeqMgrGetGeneXref (cds);
6978 if (grp != NULL && SeqMgrGeneIsSuppressed (grp)) {
6979 continue;
6980 }
6981 if (grp == NULL) {
6982 gene = SeqMgrGetOverlappingGene (cds->location, &gcontext);
6983 if (gene == NULL) {
6984 gene = CreateNewFeature (sep, NULL, SEQFEAT_GENE, NULL);
6985 if (gene != NULL) {
6986 grp = GeneRefNew ();
6987 gene->data.value.ptrvalue = (Pointer) grp;
6988 gene->location = SeqLocFree (gene->location);
6989 gene->location = AsnIoMemCopy ((Pointer) cds->location,
6990 (AsnReadFunc) SeqLocAsnRead,
6991 (AsnWriteFunc) SeqLocAsnWrite);
6992 }
6993 }
6994 if (gene != NULL) {
6995 grp = (GeneRefPtr) gene->data.value.ptrvalue;
6996 }
6997 }
6998 if (grp != NULL) {
6999 ValNodeCopyStr (&(grp->syn), 0, buf);
7000 }
7001 }
7002 }
7003 }
7004 }
7005
7006 BasicSeqEntryCleanup (sep);
7007 SeriousSeqEntryCleanup (sep, NULL, NULL);
7008
7009 ObjMgrSetDirtyFlag (ompcp->input_entityID, TRUE);
7010 ObjMgrSendMsg (OM_MSG_UPDATE, ompcp->input_entityID, ompcp->input_itemID,
7011 ompcp->input_itemtype);
7012 return OM_MSG_RET_DONE;
7013 }
7014
7015 #define BLACK 0
7016 #define RED 4
7017 #define GREEN 2
7018 #define BLUE 1
7019 #define CYAN 3
7020 #define MAGENTA 5
7021 #define YELLOW 6
7022 #define WHITE 15
7023 #define GRAY 8
7024 #define LTGRAY 7
7025
7026 #define DKCYAN 21
7027 #define DKGREEN 22
7028 #define DKBLUE 23
7029
7030 #define ORF_LENGTH 10
7031
7032
7033 typedef struct orfviewform
7034 {
7035 FORM_MESSAGE_BLOCK
7036 IcoN icon;
7037 WindoW w;
7038 DoC doc; /* orf list doc */
7039 BioseqPtr bsp;
7040 Int2 gcode;
7041 ValNodePtr orfs;
7042 Int2 frame, strand;
7043 Int4 from, to;
7044 double dx, dy;
7045 RecT mi;
7046 Boolean orf_only;
7047 SeqLocPtr select_orf;
7048 Uint2 bsp_entityID;
7049 Uint4 bsp_itemID;
7050 Uint2 len; /* minimum length of the ORF shown */
7051 Boolean standAlone;
7052 Boolean alt_start;
7053 ParData par; /* pardata for orf list doc */
7054 GrouP orf_order;
7055 GrouP start_choice;
7056 ButtoN show_partial_btn;
7057 Boolean allow_partial;
7058 } OrfViewForm, PNTR OrfViewFormPtr;
7059
7060 static RecT mi0 = { 24, 56, 460, 200 };
7061
7062 Uint1 AAForCodon (Uint1Ptr codon, CharPtr codes); /* in seqport.c */
7063
7064 static void dkCyan (void)
7065 {
7066 SelectColor(0, 203, 196);
7067 }
7068
7069 static void dkBlue (void)
7070 {
7071 SelectColor(0, 0, 196);
7072 }
7073
7074 static void dkGreen (void)
7075 {
7076 SelectColor(0, 203, 0);
7077 }
7078
7079 static void SetIntColor (int color)
7080 {
7081 switch (color) {
7082 case BLACK: Black(); break;
7083 case RED: Red(); break;
7084 case GREEN: Green(); break;
7085 case BLUE: Blue(); break;
7086 case CYAN: Cyan(); break;
7087 case MAGENTA: Magenta(); break;
7088 case YELLOW: Yellow(); break;
7089 case WHITE: White(); break;
7090 case GRAY: Gray(); break;
7091 case LTGRAY: LtGray(); break;
7092 case DKCYAN: dkCyan(); break;
7093 case DKBLUE: dkBlue(); break;
7094 case DKGREEN: dkGreen(); break;
7095 }
7096 }
7097
7098 static void Frame2Rect (RecT PNTR a, int color1, int color2)
7099 {
7100 MoveTo(a->left, a->top);
7101 SetIntColor(color1);
7102 LineTo(a->right, a->top);
7103 LineTo(a->right, a->bottom);
7104 SetIntColor(color2);
7105 LineTo(a->left, a->bottom);
7106 LineTo(a->left, a->top);
7107 }
7108
7109 static void Frame3d (RecT PNTR a)
7110 {
7111 int i, in = 3;
7112
7113 SetIntColor(LTGRAY);
7114 PaintRect(a);
7115 for (i=0; i < in; i++) {
7116 if (i > 0) {
7117 InsetRect(a, 1, 1);
7118 }
7119 Frame2Rect(a, WHITE, GRAY);
7120 }
7121 for (i=0; i < in; i++) {
7122 InsetRect(a, 1, 1);
7123 }
7124 for (i=0; i < in; i++) {
7125 InsetRect(a, 1, 1);
7126 Frame2Rect(a, GRAY, WHITE);
7127 }
7128 SelectColor(0, 203, 196);
7129 PaintRect(a);
7130 }
7131
7132 static void DrawColorLine (int x0, int y0, int x1, int y1, int color)
7133 {
7134 SetIntColor(color);
7135 MoveTo(x0, y0);
7136 LineTo(x1, y1);
7137 }
7138
7139 static void Rect3d (RecT PNTR a, int color)
7140 {
7141 Frame2Rect(a, GRAY, WHITE);
7142 InsetRect(a, 1, 1);
7143 Frame2Rect(a, GRAY, WHITE);
7144 InsetRect(a, 1, 1);
7145 if (color != -1) {
7146 SetIntColor(color);
7147 PaintRect(a);
7148 }
7149 }
7150
7151
7152
7153 static void OrfQuitProc (ButtoN b)
7154
7155 {
7156 QuitProgram ();
7157 }
7158
7159 static void CloseProc (ButtoN b)
7160 {
7161 WindoW w;
7162
7163 w = ParentWindow (b);
7164 Remove (w);
7165 }
7166
7167 static void draw_rect(SeqPortPtr spp, Int2 ir, RecT PNTR frect, CharPtr vals, CharPtr codes, Boolean paint, OrfViewFormPtr ovp, Int2 strand)
7168 {
7169 Int4 pos;
7170 Uint1 codon[3], aa;
7171 Int4 len;
7172 double x;
7173 RecT r;
7174
7175 SeqPortSeek(spp, ir, SEEK_SET);
7176 frect->bottom = frect->top + 15;
7177 len = spp->totlen;
7178 if (paint) {
7179 dkGreen();
7180 } else {
7181 dkCyan();
7182 }
7183 dkCyan();
7184 PaintRect(frect);
7185 Black();
7186 FrameRect(frect);
7187 for (pos=0; pos < len-2; pos += 3) {
7188 codon[0] = SeqPortGetResidue(spp);
7189 codon[1] = SeqPortGetResidue(spp);
7190 codon[2] = SeqPortGetResidue(spp);
7191 aa = AAForCodon(codon, codes);
7192 if (aa == '*') {
7193 if (strand == Seq_strand_plus) {
7194 x = frect->left + (pos+ir)*ovp->dx;
7195 } else {
7196 x = frect->left + (len+2-(pos+ir))*ovp->dx;
7197 }
7198 DrawColorLine(x, frect->top+1, x, frect->bottom-1, RED);
7199 DrawColorLine(x+1, frect->top+1, x+1, frect->bottom-1, RED);
7200 }
7201 if (ovp->alt_start == TRUE) {
7202 aa = AAForCodon(codon, vals);
7203 }
7204 if (aa == 'M') {
7205 if (strand == Seq_strand_plus) {
7206 x = frect->left + (pos+ir)*ovp->dx;
7207 } else {
7208 x = frect->left + (len+2-(pos+ir))*ovp->dx;
7209 }
7210 DrawColorLine(x, frect->top+1, x, frect->bottom-1, WHITE);
7211 DrawColorLine(x+1, frect->top+1, x+1, frect->bottom-1, WHITE);
7212 }
7213 }
7214 if (paint) {
7215 r.top = frect->top + 1;
7216 r.bottom = frect->bottom - 1;
7217 r.left = frect->left + ovp->from*ovp->dx + 2;
7218 r.right = frect->left + ovp->to*ovp->dx - 2;
7219 Magenta();
7220 PaintRect(&r);
7221 }
7222 frect->top = frect->bottom + 4;
7223 }
7224
7225 static void draw_frame(Int2 ir, RecT PNTR frect, Boolean paint, OrfViewFormPtr ovp, Int2 strand)
7226 {
7227 RecT r; /* for ORF */
7228 ValNodePtr vnp;
7229 SeqLocPtr slp;
7230 SeqIntPtr sip;
7231
7232 frect->bottom = frect->top + 15;
7233 LtGray();
7234 PaintRect(frect);
7235 Black();
7236 FrameRect(frect);
7237 for (vnp = ovp->orfs; vnp; vnp=vnp->next) {
7238 if (vnp->choice == ir) {
7239 slp = vnp->data.ptrvalue;
7240 if (slp == NULL) {
7241 continue;
7242 }
7243 sip = slp->data.ptrvalue;
7244 if (sip == NULL) {
7245 continue;
7246 }
7247 if (sip->strand == strand) {
7248 r.top = frect->top + 1;
7249 r.bottom = frect->bottom - 1;
7250 r.left = frect->left + sip->from*ovp->dx;
7251 r.right = frect->left + sip->to*ovp->dx;
7252 dkCyan();
7253 PaintRect(&r);
7254 Black();
7255 r.top = frect->top;
7256 r.bottom = frect->bottom;
7257 FrameRect(&r);
7258 }
7259 if (paint) {
7260 r.top = frect->top + 1;
7261 r.bottom = frect->bottom - 1;
7262 r.left = frect->left + ovp->from*ovp->dx + 2;
7263 r.right = frect->left + ovp->to*ovp->dx - 2;
7264 Magenta();
7265 PaintRect(&r);
7266 }
7267 }
7268 }
7269 frect->top = frect->bottom + 4;
7270 return;
7271 }
7272
7273 static void draw_strands(OrfViewFormPtr ovp)
7274 {
7275 Int2 ir, gcode;
7276 RecT frect;
7277 SeqPortPtr spp;
7278 GeneticCodePtr gcp;
7279 CharPtr vals, codes;
7280 ValNodePtr vnp;
7281 Boolean paint;
7282 RecT PNTR r;
7283
7284 r = &(ovp->mi);
7285 gcode = ovp->gcode;
7286 gcp = GeneticCodeFind(gcode, NULL); /* use universal */
7287 vals = NULL;
7288 codes = NULL;
7289 for (vnp = (ValNodePtr)gcp->data.ptrvalue; vnp != NULL; vnp = vnp->next)
7290 {
7291 if (vnp->choice == 6) /* sncbieaa */
7292 vals = (CharPtr)vnp->data.ptrvalue;
7293 else if (vnp->choice == 3) /* ncbieaa */
7294 codes = (CharPtr)vnp->data.ptrvalue;
7295 }
7296 if (vals == NULL) {
7297 vals = codes;
7298 }
7299 frect.left = r->left + 11;
7300 frect.right = r->right - 11;
7301 frect.top = r->top + 4;
7302 ovp->dx = (frect.right - frect.left - 1.) / (ovp->bsp)->length;
7303 ovp->dy = (r->bottom - r->top - 1.) / 6.;
7304 spp = SeqPortNew(ovp->bsp, 0, -1, Seq_strand_plus, Seq_code_ncbi4na);
7305 for (ir=0; ir < 3; ir++) {
7306 if (ovp->from == 0 && ovp->to == 0) {
7307 paint = FALSE;
7308 } else if (ovp->strand == Seq_strand_plus && ovp->frame == ir) {
7309 paint = TRUE;
7310 } else {
7311 paint = FALSE;
7312 }
7313 if (ovp->orf_only) {
7314 draw_frame(ir, &frect, paint, ovp, Seq_strand_plus);
7315 } else {
7316 draw_rect(spp, ir, &frect, vals, codes, paint, ovp, Seq_strand_plus);
7317 }
7318 }
7319 SeqPortFree(spp);
7320 spp = SeqPortNew(ovp->bsp, 0, -1, Seq_strand_minus, Seq_code_ncbi4na);
7321 frect.top += 7;
7322 for (ir=0; ir < 3; ir++) {
7323 if (ovp->from == 0 && ovp->to == 0) {
7324 paint = FALSE;
7325 } else if (ovp->strand == Seq_strand_minus && ovp->frame == ir) {
7326 paint = TRUE;
7327 } else {
7328 paint = FALSE;
7329 }
7330 if (ovp->orf_only) {
7331 draw_frame(ir, &frect, paint, ovp, Seq_strand_minus);
7332 } else {
7333 draw_rect(spp, ir, &frect, vals, codes, paint, ovp, Seq_strand_minus);
7334 }
7335 }
7336
7337 SeqPortFree(spp);
7338 }
7339
7340 static void DrawIcon(IcoN ic0)
7341 {
7342 RecT r;
7343 OrfViewFormPtr ovp;
7344
7345 ovp = (OrfViewFormPtr) GetObjectExtra (ic0);
7346 ObjectRect(ovp->icon, &r);
7347 ovp->mi.left = mi0.left + r.left;
7348 ovp->mi.right = mi0.right + r.left;
7349 ovp->mi.top = mi0.top + r.top;
7350 ovp->mi.bottom = mi0.bottom + r.top;
7351 Frame3d(&r);
7352 Rect3d(&(ovp->mi), LTGRAY);
7353 draw_strands(ovp);
7354 }
7355
7356 static void notify( DoC d, Int2 item, Int2 raw, Int2 col, Boolean event)
7357 {
7358 ValNodePtr vnp;
7359 SeqLocPtr slp, slptmp;
7360 SeqIntPtr sip;
7361 Int2 i;
7362 Int2 itemOld1;
7363 Int2 itemOld2;
7364 Boolean status;
7365 OrfViewFormPtr ovp;
7366 Int2 top, bottom;
7367 BaR sb;
7368 Int2 startsAt;
7369
7370 if( item == 0) {
7371 return;
7372 }
7373 ovp = (OrfViewFormPtr) GetObjectExtra (d);
7374 for (vnp = ovp->orfs, i = 1; i < item && vnp; i++, vnp=vnp->next) continue;
7375 if (vnp == NULL) {
7376 return;
7377 }
7378 if (ItemIsVisible (d, item, &top, &bottom, NULL) == FALSE) {
7379 GetItemParams (d, item, &startsAt, NULL, NULL, NULL, NULL);
7380 sb = GetSlateVScrollBar ((SlatE) d);
7381 CorrectBarValue (sb, startsAt);
7382 }
7383 GetDocHighlight(d, &itemOld1, &itemOld2);
7384 SetDocHighlight(d, item, item);
7385 UpdateDocument(d, itemOld1, itemOld2);
7386 UpdateDocument(d, item, item);
7387
7388 ovp->frame = vnp->choice;
7389 slp = vnp->data.ptrvalue;
7390 sip = slp->data.ptrvalue;
7391 ovp->strand = sip->strand;
7392 ovp->from = sip->from;
7393 ovp->to = sip->to;
7394 DrawIcon(ovp->icon);
7395 status = GetStatus(ovp->icon);
7396 SetStatus(ovp->icon, !status);
7397
7398 if (! ovp->standAlone) {
7399 slptmp = AsnIoMemCopy(slp, (AsnReadFunc) SeqLocAsnRead,
7400 (AsnWriteFunc) SeqLocAsnWrite);
7401 ovp->select_orf = slptmp;
7402 ObjMgrSelect (ovp->bsp_entityID, ovp->bsp_itemID, OBJ_BIOSEQ,
7403 OM_REGION_SEQLOC, slptmp);
7404 }
7405
7406 Update();
7407 }
7408
7409 static void LaunchOrfFindCDSEditor (OrfViewFormPtr ovp)
7410 {
7411 OMProcControl ompc;
7412 ObjMgrProcPtr ompp;
7413 ObjMgrPtr omp;
7414 Int2 retval;
7415
7416 if (ovp == NULL)
7417 {
7418 return;
7419 }
7420 omp = ObjMgrGet ();
7421 if (omp == NULL)
7422 {
7423 return;
7424 }
7425 ompp = NULL;
7426 while ((ompp = ObjMgrProcFindNext (omp, OMPROC_EDIT,
7427 OBJ_SEQFEAT, 0, ompp)) != NULL)
7428
7429 {
7430 if (ompp->subinputtype == FEATDEF_CDS)
7431 {
7432 break;
7433 }
7434 }
7435 if (ompp == NULL) return;
7436 MemSet ((Pointer) (&ompc), 0, sizeof (OMProcControl));
7437 ompc.input_entityID = ovp->bsp_entityID;
7438 ompc.input_itemID = ovp->bsp_itemID;
7439 ompc.input_itemtype = OBJ_BIOSEQ;
7440 GatherDataForProc (&ompc, FALSE);
7441 ompc.proc = ompp;
7442 retval = (*(ompp->func)) (&ompc);
7443 if (retval == OM_MSG_RET_ERROR)
7444 {
7445 ErrShow ();
7446 }
7447 }
7448
7449 static void myprocessmousepos (IcoN ic, PoinT pt, Boolean edit_on_dblclick)
7450 {
7451 RecT r;
7452 Int2 ir;
7453 Int4 pos;
7454 ValNodePtr vnp;
7455 SeqLocPtr slp;
7456 SeqIntPtr sip;
7457 Boolean status;
7458 OrfViewFormPtr ovp;
7459 Int2 item;
7460 Int2 top, bottom;
7461 BaR sb;
7462 Int2 startsAt;
7463 DoC doc;
7464
7465 if (edit_on_dblclick && ! Nlm_dblClick)
7466 {
7467 return;
7468 }
7469
7470 ovp = GetObjectExtra((IcoN) ic);
7471 ObjectRect(ic, &r);
7472 if (!PtInRect(pt, &(ovp->mi))) {
7473 return;
7474 }
7475 doc = ovp->doc;
7476 r.left = ovp->mi.left + 11;
7477 r.right = ovp->mi.right - 11;
7478 r.top = ovp->mi.top + 4;
7479 ovp->dx = (r.right - r.left - 1.) / (ovp->bsp)->length;
7480 ovp->dy = (ovp->mi.bottom - ovp->mi.top - 1.) / 6.;
7481 for (ir=0; ir < 3; ir++) {
7482 r.bottom = r.top + 15;
7483 if (pt.y < r.bottom && pt.y > r.top) {
7484 break;
7485 }
7486 r.top = r.bottom + 4;
7487 }
7488 if (ir < 3) {
7489 pos = (pt.x - r.left)/ovp->dx - ir;
7490 for (vnp=ovp->orfs, item=1; vnp; vnp=vnp->next, item++) {
7491 slp = vnp->data.ptrvalue;
7492 if (slp == NULL) continue;
7493 sip = slp->data.ptrvalue;
7494 if (sip == NULL) continue;
7495 if (vnp->choice != ir || sip->strand != Seq_strand_plus) {
7496 continue;
7497 }
7498 if (pos < sip->to && pos > sip->from) {
7499 break;
7500 }
7501 }
7502 if (vnp == NULL) {
7503 Beep();
7504 return;
7505 }
7506 if (slp == NULL) return;
7507 if (sip == NULL) return;
7508 ovp->frame = ir;
7509 ovp->strand = sip->strand;
7510 ovp->from = sip->from;
7511 ovp->to = sip->to;
7512 ovp->select_orf = AsnIoMemCopy(slp, (AsnReadFunc) SeqLocAsnRead,
7513 (AsnWriteFunc) SeqLocAsnWrite);
7514 status = GetStatus(ic);
7515 SetStatus(ic, !status);
7516 Update();
7517 Select(doc);
7518 SetDocHighlight(doc, item, item);
7519 if (ItemIsVisible (doc, item, &top, &bottom, NULL) == FALSE) {
7520 GetItemParams (doc, item, &startsAt, NULL, NULL, NULL, NULL);
7521 sb = GetSlateVScrollBar ((SlatE) doc);
7522 CorrectBarValue (sb, startsAt);
7523 }
7524 UpdateDocument(doc, 0, 0);
7525
7526 if (! ovp->standAlone) {
7527 if (ovp->select_orf != NULL) {
7528 ObjMgrSelect (ovp->bsp_entityID, ovp->bsp_itemID, OBJ_BIOSEQ,
7529 OM_REGION_SEQLOC, ovp->select_orf);
7530 if (edit_on_dblclick)
7531 {
7532 /* launch CDS Editor */
7533 LaunchOrfFindCDSEditor (ovp);
7534 }
7535 }
7536 }
7537 Update();
7538 return;
7539 }
7540 r.top += 7;
7541 for (ir=0; ir < 3; ir++) {
7542 r.bottom = r.top + 15;
7543 if (pt.y < r.bottom && pt.y > r.top) {
7544 break;
7545 }
7546 r.top = r.bottom + 4;
7547 }
7548 if (ir < 3) {
7549 pos = (pt.x - r.left)/ovp->dx - ir;
7550 for (vnp=ovp->orfs, item=1; vnp; vnp=vnp->next, item++) {
7551 slp = vnp->data.ptrvalue;
7552 if (slp == NULL) continue;
7553 sip = slp->data.ptrvalue;
7554 if (sip == NULL) continue;
7555 if (vnp->choice != ir || sip->strand != Seq_strand_minus) {
7556 continue;
7557 }
7558 if (pos < sip->to && pos > sip->from) {
7559 break;
7560 }
7561 }
7562 if (vnp == NULL) {
7563 Beep();
7564 return;
7565 }
7566 if (slp == NULL) return;
7567 if (sip == NULL) return;
7568 ovp->frame = ir;
7569 ovp->strand = sip->strand;
7570 ovp->from = sip->from;
7571 ovp->to = sip->to;
7572 ovp->select_orf = AsnIoMemCopy(slp, (AsnReadFunc) SeqLocAsnRead,
7573 (AsnWriteFunc) SeqLocAsnWrite);
7574 status = GetStatus(ic);
7575 SetStatus(ic, !status);
7576 Update();
7577 Select(doc);
7578 SetDocHighlight(doc, item, item);
7579 if (ItemIsVisible (doc, item, &top, &bottom, NULL) == FALSE) {
7580 GetItemParams (doc, item, &startsAt, NULL, NULL, NULL, NULL);
7581 sb = GetSlateVScrollBar ((SlatE) doc);
7582 CorrectBarValue (sb, startsAt);
7583 }
7584 UpdateDocument(doc, 0, 0);
7585 }
7586 if (! ovp->standAlone) {
7587 if (ovp->select_orf != NULL) {
7588 ObjMgrSelect (ovp->bsp_entityID, ovp->bsp_itemID, OBJ_BIOSEQ,
7589 OM_REGION_SEQLOC, ovp->select_orf);
7590 if (edit_on_dblclick)
7591 {
7592 /* launch CDS Editor */
7593 LaunchOrfFindCDSEditor (ovp);
7594 }
7595 }
7596 }
7597
7598 Update();
7599 return;
7600 }
7601
7602 static void myrelease(IcoN ic, PoinT pt)
7603 {
7604 myprocessmousepos (ic, pt, FALSE);
7605 }
7606
7607 static void myclick(IcoN ic, PoinT pt)
7608 {
7609 myprocessmousepos (ic, pt, TRUE);
7610 }
7611
7612
7613 /* show all ORF from List without starts and stops */
7614 static void ORFProc(ButtoN b)
7615 {
7616 OrfViewFormPtr ovp;
7617 Boolean status;
7618
7619 if ((ovp = (OrfViewFormPtr) GetObjectExtra (b)) == NULL) {
7620 return;
7621 }
7622 ovp->orf_only = !ovp->orf_only;
7623 draw_strands(ovp);
7624 status = GetStatus(ovp->icon);
7625 SetStatus(ovp->icon, !status);
7626 Update();
7627 }
7628 static void AddOrfListToDoc(DoC doc, ValNodePtr list)
7629 {
7630 ParPtr par;
7631 ValNodePtr vnp;
7632 SeqLocPtr slp, tmp;
7633 SeqIntPtr sip;
7634 Boolean minus;
7635 Int2 l, i;
7636 CharPtr buf, str;
7637 OrfViewFormPtr ovp;
7638
7639 Reset(doc);
7640 ovp = (OrfViewFormPtr) GetObjectExtra(doc);
7641 par = (ParPtr) MemNew(sizeof (ParData));
7642 par->openSpace = FALSE;
7643 par->keepWithNext = 1;
7644 par->keepTogether = 1;
7645 par->newPage = 1;
7646 par->tabStops = 1;
7647 for (vnp = list, i=0; vnp; vnp=vnp->next, i++) {
7648 minus = FALSE;
7649 tmp = (SeqLocPtr) vnp->data.ptrvalue;
7650 slp = AsnIoMemCopy ((Pointer) tmp, (AsnReadFunc) SeqLocAsnRead,
7651 (AsnWriteFunc) SeqLocAsnWrite);
7652 if (slp == NULL) {
7653 continue;
7654 }
7655 slp->next = NULL;
7656 sip = (SeqIntPtr) slp->data.ptrvalue;
7657 if (sip->strand == Seq_strand_minus) {
7658 sip->strand = Seq_strand_plus;
7659 minus = TRUE;
7660 }
7661 str = FlatLoc(ovp->bsp, slp);
7662 MemFree(slp);
7663 l = StringLen(str);
7664 if (minus == TRUE) {
7665 buf = MemNew(l+4);
7666 sprintf(buf, "c(%s)", str);
7667 AppendText(doc, buf, par, NULL, NULL);
7668 MemFree(buf);
7669 sip->strand = Seq_strand_minus;
7670 } else {
7671 AppendText(doc, str, par, NULL, NULL);
7672 }
7673 }
7674 UpdateDocument(doc, 0, 0);
7675 }
7676
7677 static ValNodePtr ValNodeSeqLocListFree (ValNodePtr vnp)
7678 {
7679 if (vnp != NULL) {
7680 vnp->next = ValNodeSeqLocListFree(vnp->next);
7681 vnp->data.ptrvalue = SeqLocFree (vnp->data.ptrvalue);
7682 vnp = ValNodeFree (vnp);
7683 }
7684 return vnp;
7685 }
7686
7687 static void PopulateOrfList (OrfViewFormPtr ovp);
7688 static ValNodePtr ListOrfs (BioseqPtr bsp, Boolean altstart, Int4 min_len, Boolean allow_partial);
7689 static void SortOrfs (OrfViewFormPtr ovp);
7690
7691 static void AltProc(GrouP g)
7692 {
7693 OrfViewFormPtr ovp;
7694 Boolean status;
7695
7696 if ((ovp = (OrfViewFormPtr) GetObjectExtra (g)) == NULL) {
7697 return;
7698 }
7699
7700 ovp->orfs = ValNodeSeqLocListFree(ovp->orfs);
7701 if (GetValue (ovp->start_choice) == 1) {
7702 ovp->alt_start = FALSE;
7703 } else {
7704 ovp->alt_start = TRUE;
7705 }
7706 ovp->orfs = ListOrfs(ovp->bsp, ovp->alt_start, ovp->len, ovp->allow_partial);
7707 SortOrfs(ovp);
7708 PopulateOrfList (ovp);
7709 /* AddOrfListToDoc(ovp->doc, ovp->orfs); */
7710 ovp->from = 0;
7711 ovp->to = 0;
7712 draw_strands(ovp);
7713 status = GetStatus(ovp->icon);
7714 SetStatus(ovp->icon, !status);
7715 Update();
7716 }
7717
7718 static void ChangeAAcutoff(PopuP p)
7719 {
7720 OrfViewFormPtr ovp;
7721 Int2 i;
7722 Boolean status;
7723
7724 ovp = (OrfViewFormPtr) GetObjectExtra(p);
7725 i = GetValue((PopuP) p);
7726 switch (i) {
7727 case 1:
7728 ovp->len = 10;
7729 break;
7730 case 2:
7731 ovp->len = 50;
7732 break;
7733 case 3:
7734 ovp->len = 100;
7735 break;
7736 default:
7737 ovp->len = 3;
7738 break;
7739 }
7740 ovp->orfs = ValNodeSeqLocListFree(ovp->orfs);
7741 ovp->orfs = ListOrfs(ovp->bsp, ovp->alt_start, ovp->len, ovp->allow_partial);
7742 SortOrfs(ovp);
7743 PopulateOrfList (ovp);
7744
7745 draw_strands(ovp);
7746 status = GetStatus(ovp->icon);
7747 SetStatus(ovp->icon, !status);
7748 Update();
7749 }
7750
7751 static void ShowPartialOrfs (ButtoN b)
7752 {
7753 OrfViewFormPtr ovp;
7754
7755 ovp = (OrfViewFormPtr) GetObjectExtra(b);
7756
7757 if (ovp == NULL)
7758 {
7759 return;
7760 }
7761
7762 ovp->allow_partial = GetStatus (ovp->show_partial_btn);
7763
7764 ovp->orfs = ValNodeSeqLocListFree(ovp->orfs);
7765 ovp->orfs = ListOrfs(ovp->bsp, ovp->alt_start, ovp->len, ovp->allow_partial);
7766 SortOrfs(ovp);
7767 PopulateOrfList (ovp);
7768
7769 draw_strands(ovp);
7770
7771 Update();
7772 }
7773
7774
7775 static Int2 LIBCALLBACK OrfViewerMsgFunc (OMMsgStructPtr ommsp)
7776
7777 {
7778 ObjMgrDataPtr omdp;
7779 OMUserDataPtr omudp;
7780 OrfViewFormPtr ovp;
7781
7782 omudp = (OMUserDataPtr)(ommsp->omuserdata);
7783 if (omudp == NULL) return OM_MSG_RET_ERROR;
7784 ovp = (OrfViewFormPtr) omudp->userdata.ptrvalue;
7785 if (ovp == NULL) return OM_MSG_RET_ERROR;
7786 switch (ommsp->message) {
7787 case OM_MSG_DEL:
7788 omdp = ObjMgrGetData (ommsp->entityID);
7789 if (omdp != NULL) {
7790 if (ObjMgrWholeEntity (omdp, ommsp->itemID, ommsp->itemtype)) {
7791 if (ovp != NULL) {
7792 Remove (ovp->form);
7793 }
7794 return OM_MSG_RET_OK;
7795 }
7796 }
7797 break;
7798 default :
7799 break;
7800 }
7801 return OM_MSG_RET_OK;
7802 }
7803
7804 static void CleanupOrfViewer (GraphiC g, VoidPtr data)
7805
7806 {
7807 OrfViewFormPtr ovp;
7808
7809 ovp = (OrfViewFormPtr) data;
7810 if (ovp != NULL && ovp->input_entityID > 0) {
7811 ObjMgrFreeUserData (ovp->input_entityID, ovp->procid, ovp->proctype, ovp->userkey);
7812 }
7813 StdCleanupFormProc (g, data);
7814 }
7815
7816 static void PopulateOrfList (OrfViewFormPtr ovp)
7817 {
7818 ValNodePtr vnp;
7819 Int4 i, l, select_pos = 0;
7820 Boolean minus;
7821 SeqLocPtr tmp, slp;
7822 SeqIntPtr sip;
7823 CharPtr str, buf;
7824
7825 if (ovp == NULL) {
7826 return;
7827 }
7828
7829 Reset (ovp->doc);
7830 for (vnp = ovp->orfs, i=0; vnp; vnp=vnp->next, i++) {
7831 minus = FALSE;
7832 tmp = (SeqLocPtr) vnp->data.ptrvalue;
7833 if (ovp->select_orf != NULL && SeqLocCompare (ovp->select_orf, tmp) == SLC_A_EQ_B) {
7834 select_pos = i + 1;
7835 }
7836 slp = AsnIoMemCopy ((Pointer) tmp, (AsnReadFunc) SeqLocAsnRead,
7837 (AsnWriteFunc) SeqLocAsnWrite);
7838 if (slp == NULL) {
7839 continue;
7840 }
7841 slp->next = NULL;
7842 sip = (SeqIntPtr) slp->data.ptrvalue;
7843 if (sip->strand == Seq_strand_minus) {
7844 sip->strand = Seq_strand_plus;
7845 minus = TRUE;
7846 }
7847 str = FlatLoc(ovp->bsp, slp);
7848 MemFree(slp);
7849 l = StringLen(str);
7850 if (minus == TRUE) {
7851 buf = MemNew(l+4);
7852 sprintf(buf, "c(%s)", str);
7853 AppendText(ovp->doc, buf, &(ovp->par), NULL, NULL);
7854 MemFree(buf);
7855 sip->strand = Seq_strand_minus;
7856 } else {
7857 AppendText(ovp->doc, str, &(ovp->par), NULL, NULL);
7858 }
7859 }
7860 notify (ovp->doc, select_pos, 0, 0, FALSE);
7861 UpdateDocument (ovp->doc, 0, 0);
7862 }
7863
7864 static int LIBCALLBACK SortOrfsByStart (VoidPtr vp1, VoidPtr vp2)
7865 {
7866 ValNodePtr vnp1, vnp2;
7867 ValNodePtr PNTR vnpp1;
7868 ValNodePtr PNTR vnpp2;
7869 Int4 l1, l2;
7870
7871 vnpp1 = (ValNodePtr PNTR) vp1;
7872 vnpp2 = (ValNodePtr PNTR) vp2;
7873 vnp1 = *vnpp1;
7874 vnp2 = *vnpp2;
7875
7876 if (vnp1->data.ptrvalue == NULL) {
7877 l1 = 2;
7878 } else if (vnp2->data.ptrvalue == NULL) {
7879 l2 = 2;
7880 }
7881 l1 = SeqLocStart((SeqLocPtr) vnp1->data.ptrvalue);
7882 l2 = SeqLocStart((SeqLocPtr) vnp2->data.ptrvalue);
7883
7884 if (l1 < l2)
7885 return -1;
7886 else if (l1 > l2)
7887 return 1;
7888 else
7889 return 0;
7890 }
7891
7892 static int LIBCALLBACK SortOrfsByLength (VoidPtr vp1, VoidPtr vp2)
7893 {
7894 ValNodePtr vnp1, vnp2;
7895 ValNodePtr PNTR vnpp1;
7896 ValNodePtr PNTR vnpp2;
7897 Int4 l1, l2;
7898
7899 vnpp1 = (ValNodePtr PNTR) vp1;
7900 vnpp2 = (ValNodePtr PNTR) vp2;
7901 vnp1 = *vnpp1;
7902 vnp2 = *vnpp2;
7903 l1 = SeqLocLen((SeqLocPtr) vnp1->data.ptrvalue);
7904 l2 = SeqLocLen((SeqLocPtr) vnp2->data.ptrvalue);
7905
7906 if (l1 > l2)
7907 return -1;
7908 else if (l1 < l2)
7909 return 1;
7910 else
7911 return 0;
7912 }
7913
7914
7915 static void SortOrfs (OrfViewFormPtr ovp)
7916 {
7917 Int4 sort_choice;
7918
7919 if (ovp == NULL) return;
7920
7921 sort_choice = GetValue (ovp->orf_order);
7922 if (sort_choice == 1) {
7923 VnpHeapSort(&(ovp->orfs), SortOrfsByLength);
7924 } else {
7925 VnpHeapSort(&(ovp->orfs), SortOrfsByStart);
7926 }
7927 }
7928
7929 static void ReorderOrfs (GrouP g)
7930 {
7931 OrfViewFormPtr ovp;
7932
7933 ovp = (OrfViewFormPtr) GetObjectExtra (g);
7934 if (ovp == NULL) return;
7935 SortOrfs (ovp);
7936 PopulateOrfList (ovp);
7937 }
7938
7939 typedef struct orfdata {
7940 Int4 curlen [6], currstart [6], sublen [6];
7941 ValNodePtr lastvnp[6];
7942 Boolean inorf [6], altstart;
7943 Int4 min_len;
7944 Int4 bioseq_len;
7945 ValNodePtr orf_list;
7946 SeqIdPtr sip;
7947 Boolean allow_partial_orf;
7948 Boolean partial_other_end[6];
7949 } OrfData, PNTR OrfDataPtr;
7950
7951
7952 static Boolean TreatLikeStop (Int2 frame, Int4 pos, Uint1 strand, Int4 len)
7953 {
7954 Int4 remainder = len % 3;
7955 Boolean like_stop = FALSE;
7956
7957 if (strand == Seq_strand_minus) {
7958 if (pos < 3) {
7959 like_stop = TRUE;
7960 }
7961 } else {
7962 if (pos >= len - remainder - 3) {
7963 like_stop = TRUE;
7964 }
7965 }
7966 return like_stop;
7967 }
7968
7969
7970 static void LIBCALLBACK LookForOrfs (
7971 Int4 position,
7972 Char residue,
7973 Boolean atgStart,
7974 Boolean altStart,
7975 Boolean orfStop,
7976 Int2 frame,
7977 Uint1 strand,
7978 Pointer userdata
7979 )
7980
7981 {
7982 Int2 idx;
7983 OrfDataPtr odp;
7984 SeqLocPtr slp, tmp;
7985 Boolean start_of_seq = FALSE;
7986 Boolean partial_this_end = FALSE;
7987
7988 odp = (OrfDataPtr) userdata;
7989
7990 if (strand == Seq_strand_plus) {
7991
7992 /* top strand */
7993
7994 idx = frame;
7995 if (position == 0 && (atgStart || (altStart && odp->altstart)))
7996 {
7997 /* not partial at 5' end */
7998 odp->partial_other_end[idx] = FALSE;
7999 }
8000
8001 if (odp->inorf [idx]) {
8002 if (!orfStop) {
8003 /* treat the end of the sequence like a stop codon */
8004 if (odp->allow_partial_orf && TreatLikeStop(frame, position, strand, odp->bioseq_len)) {
8005 position += 3;
8006 (odp->curlen[idx])++;
8007 orfStop = TRUE;
8008 partial_this_end = TRUE;
8009 }
8010 }
8011 if (orfStop) {
8012 odp->inorf [idx] = FALSE;
8013 if (odp->curlen[idx] >= odp->min_len) {
8014 slp = SeqLocIntNew (odp->currstart [idx] + idx,
8015 MIN (position + 2, (Int4) odp->bioseq_len - 1),
8016 strand, odp->sip);
8017 SetSeqLocPartial (slp, odp->partial_other_end[idx], partial_this_end);
8018 ValNodeAddPointer (&(odp->orf_list), frame, slp);
8019 }
8020 } else {
8021 (odp->curlen [idx])++;
8022 }
8023 } else if (atgStart || (altStart && odp->altstart)) {
8024 odp->inorf [idx] = TRUE;
8025 odp->curlen [idx] = 1;
8026 odp->currstart [idx] = position - frame;
8027 odp->partial_other_end [idx] = FALSE;
8028 }
8029 } else {
8030
8031 /* bottom strand */
8032
8033 idx = frame + 3;
8034
8035 if (!orfStop && odp->allow_partial_orf) {
8036 start_of_seq = TreatLikeStop (frame, position, strand, odp->bioseq_len);
8037 }
8038 if (orfStop) {
8039 odp->curlen [idx] = 0;
8040 odp->sublen [idx] = 0;
8041 odp->currstart [idx] = position - frame;
8042 odp->partial_other_end[idx] = FALSE;
8043 } else if (start_of_seq) {
8044 odp->curlen [idx] = 1;
8045 odp->sublen [idx] = 1;
8046 odp->currstart [idx] = position - frame - 3;
8047 } else if (atgStart || (altStart && odp->altstart) || (odp->allow_partial_orf && position >= odp->bioseq_len - 5)) {
8048 (odp->sublen [idx])++;
8049 odp->curlen [idx] = odp->sublen [idx];
8050 if (odp->allow_partial_orf && position >= odp->bioseq_len - 5)
8051 {
8052 /* also include partial codon at partial end */
8053 (odp->curlen[idx]) ++;
8054 if (!atgStart && (!altStart || !odp->altstart))
8055 {
8056 partial_this_end = TRUE;
8057 }
8058 }
8059 if (odp->curlen[idx] >= odp->min_len) {
8060 slp = SeqLocIntNew (MAX ((Int4) odp->currstart [idx] + idx - 3, (Int4) 0),
8061 MIN (odp->currstart [idx] + idx - 3 + (odp->curlen [idx]) * 3 + 2, odp->bioseq_len - 1),
8062 Seq_strand_minus, odp->sip);
8063 SetSeqLocPartial (slp, partial_this_end, odp->partial_other_end[idx]);
8064 if (odp->lastvnp[idx] != NULL) {
8065 tmp = (SeqLocPtr) odp->lastvnp[idx]->data.ptrvalue;
8066 if (SeqLocStart (tmp) == SeqLocStart (slp)) {
8067 tmp = SeqLocFree (tmp);
8068 odp->lastvnp[idx]->data.ptrvalue = slp;
8069 } else {
8070 odp->lastvnp[idx] = ValNodeAddPointer (&(odp->orf_list), frame, slp);
8071 }
8072 } else {
8073 odp->lastvnp[idx] = ValNodeAddPointer (&(odp->orf_list), frame, slp);
8074 }
8075 }
8076 } else {
8077 (odp->sublen [idx])++;
8078 }
8079 }
8080 }
8081
8082 static ValNodePtr ListOrfs (
8083 BioseqPtr bsp,
8084 Boolean altstart,
8085 Int4 min_len,
8086 Boolean allow_partial
8087 )
8088
8089 {
8090 Int2 i;
8091 OrfData od;
8092 TransTablePtr tbl;
8093 Int2 genCode;
8094
8095 if (bsp == NULL) return NULL;
8096
8097 od.sip = SeqIdFindBest (bsp->id, 0);
8098 genCode = GetGcodeFromBioseq(bsp);
8099
8100
8101 for (i = 0; i < 6; i++) {
8102 od.curlen [i] = INT4_MIN;
8103 od.currstart [i] = 0;
8104 od.sublen [i] = INT4_MIN;
8105 od.inorf [i] = FALSE;
8106 od.lastvnp [i] = NULL;
8107 od.partial_other_end [i] = allow_partial;
8108 }
8109
8110 if (allow_partial) {
8111 /* allow 5' partial ORFs */
8112 for (i = 0; i < 3; i++) {
8113 od.inorf[i] = TRUE;
8114 od.curlen [i] = 1;
8115 od.currstart [i] = 0 - i;
8116 }
8117 }
8118
8119 od.altstart = altstart;
8120 od.orf_list = NULL;
8121 od.min_len = min_len;
8122 od.bioseq_len = bsp->length;
8123 od.allow_partial_orf = allow_partial;
8124
8125 /* use simultaneous 6-frame translation finite state machine */
8126
8127 tbl = PersistentTransTableByGenCode (genCode);
8128 if (tbl != NULL) {
8129 TransTableProcessBioseq (tbl, LookForOrfs, (Pointer) &od, bsp);
8130 }
8131
8132 return od.orf_list;
8133 }
8134
8135 extern void LaunchOrfViewer (BioseqPtr bsp, Uint2 entityID, Uint4 itemID, Boolean standAlone)
8136 {
8137 ButtoN qu, b1;
8138 GrouP g, k;
8139 OrfViewFormPtr ovp;
8140 WindoW w;
8141 IcoN ic;
8142 PopuP pu;
8143 ObjMgrPtr omp;
8144 ObjMgrProcPtr ompp;
8145 OMUserDataPtr omudp;
8146
8147 WatchCursor ();
8148 Update ();
8149 ovp = (OrfViewFormPtr) MemNew (sizeof (OrfViewForm));
8150 ovp->bsp = bsp;
8151 ovp->select_orf = NULL;
8152 ovp->input_entityID = entityID;
8153 ovp->bsp_entityID = entityID;
8154 ovp->bsp_itemID = itemID;
8155 ovp->len = 10;
8156 ovp->alt_start = FALSE;
8157 ovp->allow_partial = TRUE;
8158 #if 1
8159 ovp->orfs = ListOrfs (bsp, ovp->alt_start, ovp->len, ovp->allow_partial);
8160 #else
8161 ovp->orfs = GetOrfList(bsp, ORF_LENGTH);
8162 #endif
8163 ovp->orf_only = TRUE;
8164 ovp->gcode = 1;
8165 ovp->standAlone = standAlone;
8166 w = FixedWindow(-50, -33, -10, -10, "Orf Finder", NULL);
8167 SetObjectExtra (w, ovp, CleanupOrfViewer);
8168 ovp->form = (ForM) w;
8169 g = HiddenGroup (w, 6, 0, NULL);
8170 SetGroupSpacing (g, 6, 0);
8171 if (ovp->standAlone) {
8172 qu = PushButton(g, "Quit", OrfQuitProc);
8173 } else {
8174 qu = PushButton(g, "Close", CloseProc);
8175 }
8176 b1 = PushButton(g, "ORF", ORFProc);
8177 SetObjectExtra (b1, ovp, NULL);
8178 ovp->start_choice = NormalGroup (g, 2, 0, "Initiation Codon", programFont, AltProc);
8179 SetObjectExtra (ovp->start_choice, ovp, NULL);
8180 RadioButton (ovp->start_choice, "Standard");
8181 RadioButton (ovp->start_choice, "Alternative");
8182 SetValue (ovp->start_choice, 1);
8183 StaticPrompt(g, "ORF length", 0, 16, systemFont, '1');
8184 pu = PopupList(g, TRUE, ChangeAAcutoff );
8185 PopupItem(pu, "10");
8186 PopupItem(pu, "50");
8187 PopupItem(pu, "100");
8188 SetValue(pu, 1);
8189 SetObjectExtra (pu, ovp, NULL);
8190
8191 ovp->show_partial_btn = CheckBox (g, "Show Partial ORFs", ShowPartialOrfs);
8192 SetObjectExtra (ovp->show_partial_btn, ovp, NULL);
8193 SetStatus (ovp->show_partial_btn, TRUE);
8194
8195 g = HiddenGroup (w, 2, 0, NULL);
8196 ic = IconButton(g, 500, 240,
8197 DrawIcon, NULL, myclick, NULL, NULL, myrelease);
8198 if (ovp->select_orf != NULL) {
8199 }
8200 ovp->icon = ic;
8201 SetObjectExtra (ic, ovp, NULL);
8202 k = HiddenGroup (g, 0, 2, NULL);
8203 ovp->orf_order = HiddenGroup (k, 2, 0, ReorderOrfs);
8204 RadioButton (ovp->orf_order, "Order by Length");
8205 RadioButton (ovp->orf_order, "Order by Start");
8206 SetValue (ovp->orf_order, 1);
8207 SortOrfs (ovp);
8208 SetObjectExtra (ovp->orf_order, ovp, NULL);
8209 ovp->doc = DocumentPanel(k, 10 * stdCharWidth, 240);
8210 SetObjectExtra (ovp->doc, ovp, NULL);
8211 /****/
8212 ovp->par.openSpace = FALSE;
8213 ovp->par.keepWithNext = 1;
8214 ovp->par.keepTogether = 1;
8215 ovp->par.newPage = 1;
8216 ovp->par.tabStops = 1;
8217
8218 PopulateOrfList (ovp);
8219 /****/
8220 SetDocNotify(ovp->doc, notify);
8221 /*ovp = (OrfViewFormPtr) GetObjectExtra(ic);*/
8222 omp = ObjMgrGet ();
8223 if (omp != NULL) {
8224 ompp = ObjMgrProcFind (omp, 0, "ORF Finder", OMPROC_FILTER);
8225 if (ompp != NULL) {
8226 ovp->procid = ompp->procid;
8227 ovp->proctype = OMPROC_FILTER;
8228 ovp->userkey = OMGetNextUserKey ();
8229 omudp = ObjMgrAddUserData (ovp->input_entityID, ompp->procid,
8230 OMPROC_FILTER, ovp->userkey);
8231 if (omudp != NULL) {
8232 omudp->userdata.ptrvalue = (Pointer) ovp;
8233 omudp->messagefunc = OrfViewerMsgFunc;
8234 }
8235 }
8236 }
8237 RealizeWindow (w);
8238 Show(w);
8239 ArrowCursor ();
8240 Update ();
8241 if (ovp->standAlone) {
8242 ProcessEvents();
8243 } else {
8244 return;
8245 }
8246 }
8247
8248 static Int2 LIBCALLBACK OrfFindFunc (Pointer data)
8249
8250 {
8251 BioseqPtr bsp;
8252 OMProcControlPtr ompcp;
8253
8254
8255 ompcp = (OMProcControlPtr) data; /* always do this cast */
8256
8257 if (ompcp == NULL || ompcp->input_itemtype == 0)
8258 return OM_MSG_RET_ERROR;
8259
8260 switch (ompcp->input_itemtype)
8261 {
8262 case OBJ_BIOSEQ:
8263 bsp = (BioseqPtr) ompcp->input_data;
8264 break;
8265 default:
8266 return OM_MSG_RET_ERROR;
8267 }
8268
8269 LaunchOrfViewer (bsp, ompcp->input_entityID, ompcp->input_itemID, FALSE);
8270 return OM_MSG_RET_DONE;
8271 }
8272
8273 typedef struct mergedata {
8274 SeqLocPtr slp;
8275 Boolean fuse;
8276 } MergeData, PNTR MergeDataPtr;
8277
8278 static Boolean AddToSeqLoc (GatherContextPtr gcp)
8279
8280 {
8281 BioseqPtr bsp;
8282 MergeDataPtr mdp;
8283 SeqFeatPtr sfp;
8284 SeqLocPtr slp;
8285
8286 mdp = (MergeDataPtr) gcp->userdata;
8287 if (mdp == NULL) return TRUE;
8288 if (gcp->thistype != OBJ_SEQFEAT) return TRUE;
8289 sfp = (SeqFeatPtr) gcp->thisitem;
8290 if (sfp == NULL || sfp->location == NULL) return TRUE;
8291 bsp = GetBioseqGivenSeqLoc (sfp->location, gcp->entityID);
8292 if (bsp == NULL) return TRUE;
8293 slp = SeqLocMerge (bsp, sfp->location, mdp->slp, FALSE, mdp->fuse, FALSE);
8294 mdp->slp = SeqLocFree (mdp->slp);
8295 mdp->slp = slp;
8296 return TRUE;
8297 }
8298
8299 static SeqLocPtr MergeSelectedFeatureIntervals (Boolean fuse)
8300
8301 {
8302 MergeData md;
8303 SelStructPtr sel;
8304
8305 md.slp = NULL;
8306 md.fuse = fuse;
8307 for (sel = ObjMgrGetSelected (); sel != NULL; sel = sel->next) {
8308 GatherItem (sel->entityID, sel->itemID, sel->itemtype,
8309 (Pointer) &md, AddToSeqLoc);
8310 }
8311 return md.slp;
8312 }
8313
8314 static Int2 LIBCALLBACK IntervalCombineFunc (Pointer data)
8315
8316 {
8317 OMProcControlPtr ompcp;
8318 SeqLocPtr slp;
8319
8320 ompcp = (OMProcControlPtr) data;
8321 if (ompcp == NULL) return OM_MSG_RET_ERROR;
8322 slp = MergeSelectedFeatureIntervals (FALSE);
8323 if (slp == NULL) return OM_MSG_RET_ERROR;
8324 ObjMgrRegister (OBJ_SEQLOC, (Pointer) slp);
8325 return OM_MSG_RET_DONE;
8326 }
8327
8328 static Int2 LIBCALLBACK IntervalCombineAndFuseFunc (Pointer data)
8329
8330 {
8331 OMProcControlPtr ompcp;
8332 SeqLocPtr slp;
8333
8334 ompcp = (OMProcControlPtr) data;
8335 if (ompcp == NULL) return OM_MSG_RET_ERROR;
8336 slp = MergeSelectedFeatureIntervals (TRUE);
8337 if (slp == NULL) return OM_MSG_RET_ERROR;
8338 ObjMgrRegister (OBJ_SEQLOC, (Pointer) slp);
8339 return OM_MSG_RET_DONE;
8340 }
8341
8342 static CharPtr objmgrtypestrs [] = {
8343 "OBJ_ALL", "OBJ_SEQENTRY", "OBJ_BIOSEQ", "OBJ_BIOSEQSET", "OBJ_SEQDESC",
8344 "OBJ_SEQANNOT", "OBJ_ANNOTDESC", "OBJ_SEQFEAT", "OBJ_SEQALIGN", "OBJ_SEQGRAPH",
8345 "OBJ_SEQSUB", "OBJ_SUBMIT_BLOCK", "OBJ_SEQSUB_CONTACT", "13", "OBJ_BIOSEQ_MAPFEAT",
8346 "OBJ_BIOSEQ_SEG", "OBJ_SEQHIST", "OBJ_SEQHIST_ALIGN", "OBJ_BIOSEQ_DELTA", "19",
8347 "OBJ_PUB", "OBJ_SEQFEAT_CIT", "OBJ_SEQSUB_CIT", "OBJ_MEDLINE_ENTRY", "OBJ_PUB_SET",
8348 "OBJ_SEQLOC", "OBJ_SEQID", "OBJ_SEQCODE", "OBJ_SEQCODE_SET", "OBJ_GENETIC_CODE",
8349 "OBJ_GENETIC_CODE_SET", "OBJ_TEXT_REPORT", "OBJ_FASTA", "OBJ_VIBRANT_PICTURE", "OBJ_PROJECT"
8350 };
8351
8352 static CharPtr temploadstrs [] = {
8353 "TL_NOT_TEMP", "TL_LOADED", "TL_CACHED"
8354 };
8355
8356 static CharPtr proctypestrs [] = {
8357 "0", "OMPROC_OPEN", "OMPROC_DELETE", "OMPROC_VIEW", "OMPROC_EDIT",
8358 "OMPROC_SAVE", "OMPROC_CUT", "OMPROC_COPY", "OMPROC_PASTE", "OMPROC_ANALYZE",
8359 "OMPROC_FIND", "OMPROC_REPLACE", "OMPROC_FILTER", "OMPROC_FETCH",
8360 };
8361
8362 static void PrintABool (FILE *fp, CharPtr str, Boolean val)
8363
8364 {
8365 if (val) {
8366 fprintf (fp, "%s TRUE\n", str);
8367 } else {
8368 fprintf (fp, "%s FALSE\n", str);
8369 }
8370 }
8371
8372 Int2 LIBCALLBACK VSMPictMsgFunc PROTO((OMMsgStructPtr ommsp));
8373
8374 static void ReportOnEntity (ObjMgrDataPtr omdp, ObjMgrPtr omp, Boolean selected, Uint4 itemID,
8375 Uint2 itemtype, Int2 index, FILE *fp)
8376
8377 {
8378 BioseqPtr bsp;
8379 BioseqSetPtr bssp;
8380 Char buf [50];
8381 OMUserDataPtr omudp;
8382 VSMPictPtr vsmpp;
8383
8384 if (omdp == NULL || fp == NULL) return;
8385 if (selected) {
8386 fprintf (fp, "Data Element\n\n");
8387 fprintf (fp, " EntityID %d selected\n", (int) omdp->EntityID);
8388 fprintf (fp, " ItemID %d, Itemtype %d\n", (int) itemID, (int) itemtype);
8389 } else if (omdp->parentptr == NULL) {
8390 fprintf (fp, "Top Data Element %d\n\n", (int) index);
8391 fprintf (fp, " EntityID %d\n", (int) omdp->EntityID);
8392 } else {
8393 fprintf (fp, "Inner Data Element %d\n\n", (int) index);
8394 fprintf (fp, " EntityID %d\n", (int) omdp->EntityID);
8395 }
8396 if (omdp->datatype < OBJ_MAX) {
8397 fprintf (fp, " Datatype %s", objmgrtypestrs [omdp->datatype]);
8398 if (omdp->datatype == OBJ_BIOSEQ) {
8399 bsp = (BioseqPtr) omdp->dataptr;
8400 if (bsp != NULL) {
8401 SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1);
8402 fprintf (fp, " %s, length %ld", buf, (long) bsp->length);
8403 }
8404 } else if (omdp->datatype == OBJ_BIOSEQSET) {
8405 bssp = (BioseqSetPtr) omdp->dataptr;
8406 if (bssp != NULL) {
8407 fprintf (fp, " class %d", (int) bssp->_class);
8408 }
8409 }
8410 fprintf (fp, "\n");
8411 } else {
8412 fprintf (fp, " Unregistered datatype %d\n", (int) omdp->datatype);
8413 }
8414 fprintf (fp, " Lockcnt %d\n", (int) omdp->lockcnt);
8415 if (omdp->tempload < 3) {
8416 fprintf (fp, " Tempload %s\n", temploadstrs [omdp->tempload]);
8417 } else {
8418 fprintf (fp, " Unrecognized tempload %d\n", (int) omdp->tempload);
8419 }
8420 PrintABool (fp, " Clipboard", omdp->clipboard);
8421 PrintABool (fp, " Dirty", omdp->dirty);
8422 PrintABool (fp, " Being_freed", omdp->being_freed);
8423 PrintABool (fp, " Free", omdp->free);
8424 fprintf (fp, "\n");
8425 for (omudp = omdp->userdata; omudp != NULL; omudp = omudp->next) {
8426 if (omudp->proctype <= OMPROC_MAX) {
8427 fprintf (fp, " Proctype %s\n", proctypestrs [omudp->proctype]);
8428 } else {
8429 fprintf (fp, " Unrecognized proctype %d\n", (int) omudp->proctype);
8430 }
8431 fprintf (fp, " Procid %d\n", (int) omudp->procid);
8432 fprintf (fp, " Userkey %d\n", (int) omudp->userkey);
8433 if (omudp->messagefunc == VSMPictMsgFunc) {
8434 vsmpp = (VSMPictPtr) omudp->userdata.ptrvalue;
8435 if (vsmpp != NULL) {
8436 if (vsmpp->s != NULL) {
8437 fprintf (fp, " VSMPictPtr segment not NULL\n");
8438 } else {
8439 fprintf (fp, " VSMPictPtr segment is NULL\n");
8440 }
8441 }
8442 }
8443 fprintf (fp, "\n");
8444 }
8445 }
8446
8447 static Int2 LIBCALLBACK DesktopReportFunc (Pointer data)
8448
8449 {
8450 FILE *fp;
8451 Uint4 j;
8452 Uint4 num;
8453 ObjMgrPtr omp;
8454 ObjMgrDataPtr omdp;
8455 ObjMgrDataPtr PNTR omdpp;
8456 Char path [PATH_MAX];
8457 SelStructPtr sel;
8458
8459 omp = ObjMgrGet ();
8460 if (omp == NULL) return OM_MSG_RET_DONE;
8461 TmpNam (path);
8462 fp = FileOpen (path, "w");
8463 fprintf (fp, "Object Manager\n\n");
8464 fprintf (fp, " HighestEntityID %d\n", (int) omp->HighestEntityID);
8465 fprintf (fp, " Totobj %d\n", (int) omp->totobj);
8466 fprintf (fp, " Currobj %d\n", (int) omp->currobj);
8467 fprintf (fp, " Maxtemp %d\n", (int) omp->maxtemp);
8468 fprintf (fp, " Tempcnt %d\n", (int) omp->tempcnt);
8469 fprintf (fp, " Hold %d\n", (int) omp->hold);
8470 PrintABool (fp, " Reaping", omp->reaping);
8471 PrintABool (fp, " Is_write_locked", omp->is_write_locked);
8472 fprintf (fp, "\n");
8473 sel = ObjMgrGetSelected ();
8474 if (sel != NULL) {
8475 omdp = ObjMgrGetData (sel->entityID);
8476 ReportOnEntity (omdp, omp, TRUE, sel->itemID, sel->itemtype, 0, fp);
8477 } else {
8478 num = omp->currobj;
8479 for (j = 0, omdpp = omp->datalist; j < num && omdpp != NULL; j++, omdpp++) {
8480 omdp = *omdpp;
8481 if (omdp->parentptr == NULL) {
8482 ReportOnEntity (omdp, omp, FALSE, 0, 0, j + 1, fp);
8483 }
8484 }
8485 for (j = 0, omdpp = omp->datalist; j < num && omdpp != NULL; j++, omdpp++) {
8486 omdp = *omdpp;
8487 if (omdp->parentptr != NULL) {
8488 ReportOnEntity (omdp, omp, FALSE, 0, 0, j + 1, fp);
8489 }
8490 }
8491 }
8492 FileClose (fp);
8493 LaunchGeneralTextViewer (path, "Object Manager Report");
8494 FileRemove (path);
8495 return OM_MSG_RET_DONE;
8496 }
8497
8498 static void ConvertGiToAccn (SeqIdPtr sip)
8499
8500 {
8501 Int4 gi;
8502 SeqIdPtr newsip;
8503
8504 if (sip == NULL) return;
8505 if (sip->choice != SEQID_GI) return;
8506 gi = sip->data.intvalue;
8507 newsip = GetSeqIdForGI (gi);
8508 if (newsip == NULL) return;
8509 if (newsip->choice == SEQID_GIBBSQ ||
8510 newsip->choice == SEQID_GIBBMT ||
8511 newsip->choice == SEQID_GI) {
8512 SeqIdFree (newsip);
8513 return;
8514 }
8515 sip->choice = newsip->choice;
8516 sip->data.ptrvalue = newsip->data.ptrvalue;
8517 newsip->choice = SEQID_NOT_SET;
8518 newsip->data.ptrvalue = NULL;
8519 SeqIdFree (newsip);
8520 }
8521
8522 static Boolean GiToAccnAlignCallback (GatherContextPtr gcp)
8523
8524 {
8525 SeqAlignPtr align;
8526 DenseDiagPtr ddp;
8527 DenseSegPtr dsp;
8528 SeqIdPtr sip;
8529 StdSegPtr ssp;
8530 SeqLocPtr tloc;
8531
8532 if (gcp == NULL) return TRUE;
8533 switch (gcp->thistype) {
8534 case OBJ_SEQALIGN :
8535 case OBJ_SEQHIST_ALIGN :
8536 align = (SeqAlignPtr) gcp->thisitem;
8537 sip = NULL;
8538 if (align->segtype == 1) {
8539 ddp = (DenseDiagPtr) align->segs;
8540 if (ddp != NULL) {
8541 for (sip = ddp->id; sip != NULL; sip = sip->next) {
8542 ConvertGiToAccn (sip);
8543 }
8544 }
8545 } else if (align->segtype == 2) {
8546 dsp = (DenseSegPtr) align->segs;
8547 if (dsp != NULL) {
8548 for (sip = dsp->ids; sip != NULL; sip = sip->next) {
8549 ConvertGiToAccn (sip);
8550 }
8551 }
8552 } else if (align->segtype == 3) {
8553 ssp = (StdSegPtr) align->segs;
8554 if (ssp != NULL) {
8555 for (tloc = ssp->loc; tloc != NULL; tloc = tloc->next) {
8556 sip = SeqLocId (tloc);
8557 ConvertGiToAccn (sip);
8558 }
8559 }
8560 }
8561 break;
8562 default :
8563 break;
8564 }
8565 return TRUE;
8566 }
8567
8568 static Int2 LIBCALLBACK AlignGiToAccnProc (Pointer data)
8569
8570 {
8571 MsgAnswer ans;
8572 GatherScope gs;
8573 OMProcControlPtr ompcp;
8574 SeqEntryPtr sep;
8575
8576 ompcp = (OMProcControlPtr) data;
8577 if (ompcp == NULL) return OM_MSG_RET_ERROR;
8578 ans = Message (MSG_OKC, "Are you sure you want to convert alignment GIs to accessions?");
8579 if (ans == ANS_CANCEL) return OM_MSG_RET_DONE;
8580 sep = GetTopSeqEntryForEntityID (ompcp->input_entityID);
8581 LookupFarSeqIDs (sep, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE);
8582 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
8583 gs.seglevels = 1;
8584 gs.get_feats_location = FALSE;
8585 MemSet((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
8586 gs.ignore[OBJ_BIOSEQ] = FALSE;
8587 gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
8588 gs.ignore[OBJ_SEQALIGN] = FALSE;
8589 gs.ignore[OBJ_SEQHIST_ALIGN] = FALSE;
8590 gs.ignore[OBJ_SEQANNOT] = FALSE;
8591 GatherEntity (ompcp->input_entityID, NULL, GiToAccnAlignCallback, &gs);
8592 return OM_MSG_RET_DONE;
8593 }
8594
8595 static void ConvertAccnToGi (SeqIdPtr sip)
8596
8597 {
8598 Int4 gi;
8599 SeqIdPtr newsip;
8600 Char str [42];
8601
8602 if (sip == NULL) return;
8603 if (sip->choice == SEQID_GI) return;
8604 gi = GetGIForSeqId (sip);
8605 if (gi < 1) return;
8606 sprintf (str, "gi|%ld", (long) gi);
8607 newsip = SeqIdParse (str);
8608 if (newsip == NULL) return;
8609 sip->choice = newsip->choice;
8610 sip->data.ptrvalue = newsip->data.ptrvalue;
8611 newsip->choice = SEQID_NOT_SET;
8612 newsip->data.ptrvalue = NULL;
8613 SeqIdFree (newsip);
8614 }
8615
8616 static Boolean AccnToGiAlignCallback (GatherContextPtr gcp)
8617
8618 {
8619 SeqAlignPtr align;
8620 DenseDiagPtr ddp;
8621 DenseSegPtr dsp;
8622 SeqIdPtr sip;
8623 StdSegPtr ssp;
8624 SeqLocPtr tloc;
8625
8626 if (gcp == NULL) return TRUE;
8627 switch (gcp->thistype) {
8628 case OBJ_SEQALIGN :
8629 case OBJ_SEQHIST_ALIGN :
8630 align = (SeqAlignPtr) gcp->thisitem;
8631 sip = NULL;
8632 if (align->segtype == 1) {
8633 ddp = (DenseDiagPtr) align->segs;
8634 if (ddp != NULL) {
8635 for (sip = ddp->id; sip != NULL; sip = sip->next) {
8636 ConvertAccnToGi (sip);
8637 }
8638 }
8639 } else if (align->segtype == 2) {
8640 dsp = (DenseSegPtr) align->segs;
8641 if (dsp != NULL) {
8642 for (sip = dsp->ids; sip != NULL; sip = sip->next) {
8643 ConvertAccnToGi (sip);
8644 }
8645 }
8646 } else if (align->segtype == 3) {
8647 ssp = (StdSegPtr) align->segs;
8648 if (ssp != NULL) {
8649 for (tloc = ssp->loc; tloc != NULL; tloc = tloc->next) {
8650 sip = SeqLocId (tloc);
8651 ConvertAccnToGi (sip);
8652 }
8653 }
8654 }
8655 break;
8656 default :
8657 break;
8658 }
8659 return TRUE;
8660 }
8661
8662 static Int2 LIBCALLBACK AlignAccnToGiProc (Pointer data)
8663
8664 {
8665 MsgAnswer ans;
8666 GatherScope gs;
8667 OMProcControlPtr ompcp;
8668 SeqEntryPtr sep;
8669
8670 ompcp = (OMProcControlPtr) data;
8671 if (ompcp == NULL) return OM_MSG_RET_ERROR;
8672 ans = Message (MSG_OKC, "Are you sure you want to convert alignment accessions to GIs?");
8673 if (ans == ANS_CANCEL) return OM_MSG_RET_DONE;
8674 sep = GetTopSeqEntryForEntityID (ompcp->input_entityID);
8675 LookupFarSeqIDs (sep, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE);
8676 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
8677 gs.seglevels = 1;
8678 gs.get_feats_location = FALSE;
8679 MemSet((Pointer)(gs.ignore), (int)(TRUE), (size_t)(OBJ_MAX * sizeof(Boolean)));
8680 gs.ignore[OBJ_BIOSEQ] = FALSE;
8681 gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
8682 gs.ignore[OBJ_SEQALIGN] = FALSE;
8683 gs.ignore[OBJ_SEQHIST_ALIGN] = FALSE;
8684 gs.ignore[OBJ_SEQANNOT] = FALSE;
8685 GatherEntity (ompcp->input_entityID, NULL, AccnToGiAlignCallback, &gs);
8686 return OM_MSG_RET_DONE;
8687 }
8688
8689 static Boolean IsSipMrna (SeqIdPtr sip)
8690
8691 {
8692 Char buf [45];
8693 BioseqPtr bsp;
8694 Int4 count;
8695 Entrez2BooleanReplyPtr e2br;
8696 Entrez2RequestPtr e2rq;
8697 Entrez2ReplyPtr e2ry;
8698 Int4 gi;
8699 Char query [128];
8700 E2ReplyPtr reply;
8701
8702 if (sip == NULL) return FALSE;
8703 bsp = BioseqFind (sip);
8704 if (bsp != NULL) return FALSE;
8705 if (sip->choice == SEQID_GI) {
8706 gi = sip->data.intvalue;
8707 sip = GetSeqIdForGI (gi);
8708 }
8709 if (sip == NULL) return FALSE;
8710 SeqIdWrite (sip, buf, PRINTID_TEXTID_ACCESSION, 41);
8711 sprintf (query, "biomol_mrna [PROP] AND %s [ACCN]", buf);
8712 e2rq = EntrezCreateBooleanRequest (FALSE, FALSE, "nucleotide", query, 0, 0, NULL, 0, 0);
8713 e2ry = EntrezSynchronousQuery (e2rq);
8714 e2rq = Entrez2RequestFree (e2rq);
8715 if (e2ry == NULL) return FALSE;
8716 reply = e2ry->reply;
8717 if (reply == NULL || reply->choice != E2Reply_eval_boolean) return FALSE;
8718 e2br = EntrezExtractBooleanReply (e2ry);
8719 if (e2br == NULL) return FALSE;
8720 count = e2br->count;
8721 Entrez2BooleanReplyFree (e2br);
8722 if (count > 0) return TRUE;
8723 return FALSE;
8724 }
8725
8726 static Boolean IsMrnaAlignment (SeqAlignPtr align)
8727
8728 {
8729 DenseDiagPtr ddp;
8730 DenseSegPtr dsp;
8731 SeqIdPtr sip;
8732 StdSegPtr ssp;
8733 SeqLocPtr tloc;
8734
8735 if (align == NULL) return FALSE;
8736 sip = NULL;
8737 if (align->segtype == 1) {
8738 ddp = (DenseDiagPtr) align->segs;
8739 if (ddp != NULL) {
8740 for (sip = ddp->id; sip != NULL; sip = sip->next) {
8741 if (IsSipMrna (sip)) return TRUE;
8742 }
8743 }
8744 } else if (align->segtype == 2) {
8745 dsp = (DenseSegPtr) align->segs;
8746 if (dsp != NULL) {
8747 for (sip = dsp->ids; sip != NULL; sip = sip->next) {
8748 if (IsSipMrna (sip)) return TRUE;
8749 }
8750 }
8751 } else if (align->segtype == 3) {
8752 ssp = (StdSegPtr) align->segs;
8753 if (ssp != NULL) {
8754 for (tloc = ssp->loc; tloc != NULL; tloc = tloc->next) {
8755 sip = SeqLocId (tloc);
8756 if (IsSipMrna (sip)) return TRUE;
8757 }
8758 }
8759 }
8760 return FALSE;
8761 }
8762
8763 static SeqAnnotPtr ExtractBlastMrna (SeqAlignPtr sap, Pointer PNTR prevlink)
8764
8765 {
8766 AnnotDescrPtr adp;
8767 SeqAnnotPtr annot = NULL;
8768 SeqAlignPtr next;
8769 ObjectIdPtr oip;
8770 UserFieldPtr ufp;
8771 UserObjectPtr uop;
8772
8773 while (sap != NULL) {
8774 next = sap->next;
8775
8776 if (IsMrnaAlignment (sap)) {
8777 *prevlink = sap->next;
8778 sap->next = NULL;
8779
8780 if (annot == NULL) {
8781 annot = SeqAnnotNew ();
8782 if (annot != NULL) {
8783 annot->type = 2;
8784 adp = AnnotDescrNew (NULL);
8785 adp->choice = Annot_descr_user;
8786 annot->desc = adp;
8787 uop = UserObjectNew ();
8788 adp->data.ptrvalue = uop;
8789 oip = ObjectIdNew ();
8790 oip->str = StringSave ("Blast Type");
8791 ufp = UserFieldNew ();
8792 uop->type = oip;
8793 uop->data = ufp;
8794 oip = ObjectIdNew ();
8795 oip->str = StringSave ("BLASTN - mrna");
8796 ufp->label = oip;
8797 ufp->choice = 2;
8798 ufp->data.intvalue = 1;
8799 }
8800 }
8801 if (annot != NULL) {
8802 sap->next = annot->data;
8803 annot->data = sap;
8804 }
8805
8806 } else {
8807 sap->idx.prevlink = prevlink;
8808 prevlink = (Pointer PNTR) &(sap->next);
8809 }
8810
8811 sap = next;
8812 }
8813
8814 return annot;
8815 }
8816
8817 static void FindBlastNR (SeqAnnotPtr sap, Pointer userdata)
8818
8819 {
8820 AnnotDescrPtr adp;
8821 SeqAnnotPtr annot;
8822 ObjectIdPtr oip;
8823 UserFieldPtr ufp;
8824 UserObjectPtr uop;
8825
8826 if (sap == NULL || sap->type != 2) return;
8827 for (adp = sap->desc; adp != NULL; adp = adp->next) {
8828 if (adp->choice != Annot_descr_user) continue;
8829 for (uop = adp->data.ptrvalue; uop != NULL; uop = uop->next) {
8830 oip = uop->type;
8831 if (oip == NULL) continue;
8832 if (StringCmp (oip->str, "Blast Type") == 0) {
8833 ufp = uop->data;
8834 if (ufp == NULL) continue;
8835 oip = ufp->label;
8836 if (oip == NULL) continue;
8837 if (StringCmp (oip->str, "BLASTN - nr") == 0) {
8838 annot = ExtractBlastMrna ((SeqAlignPtr) sap->data, (Pointer PNTR) &(sap->data));
8839 if (annot != NULL) {