|
NCBI C++ ToolKit
|
00001 /* $Id: bam_ui_data_source.cpp 25315 2012-02-23 16:34:03Z wuliangs $ 00002 * =========================================================================== 00003 * 00004 * PUBLIC DOMAIN NOTICE 00005 * National Center for Biotechnology Information 00006 * 00007 * This software/database is a "United States Government Work" under the 00008 * terms of the United States Copyright Act. It was written as part of 00009 * the author's official duties as a United States Government employee and 00010 * thus cannot be copyrighted. This software/database is freely available 00011 * to the public for use. The National Library of Medicine and the U.S. 00012 * Government have not placed any restriction on its use or reproduction. 00013 * 00014 * Although all reasonable efforts have been taken to ensure the accuracy 00015 * and reliability of the software and data, the NLM and the U.S. 00016 * Government do not and cannot warrant the performance or results that 00017 * may be obtained by using this software or data. The NLM and the U.S. 00018 * Government disclaim all warranties, express or implied, including 00019 * warranties of performance, merchantability or fitness for any particular 00020 * purpose. 00021 * 00022 * Please cite the author in any work or product based on this material. 00023 * 00024 * =========================================================================== 00025 * 00026 * Authors: Liangshou Wu 00027 * 00028 */ 00029 00030 #include <ncbi_pch.hpp> 00031 00032 #include <corelib/ncbiexec.hpp> 00033 #include <corelib/ncbi_process.hpp> 00034 #include <corelib/ncbi_system.hpp> 00035 #include <corelib/ncbifile.hpp> 00036 00037 #include "bam_load_option_panel.hpp" 00038 #include "bam_indexing_panel.hpp" 00039 #include "bam_coverage_graph_panel.hpp" 00040 00041 #include <gui/packages/pkg_sequence/bam_ui_data_source.hpp> 00042 #include <gui/core/app_tasks.hpp> 00043 #include <gui/core/app_explorer_service.hpp> 00044 #include <gui/core/loading_app_job.hpp> 00045 00046 #include <gui/framework/workbench.hpp> 00047 #include <gui/framework/app_task_service.hpp> 00048 #include <gui/framework/app_job_task.hpp> 00049 00050 #include <gui/widgets/wx/wx_utils.hpp> 00051 #include <gui/widgets/wx/fileartprov.hpp> 00052 #include <gui/widgets/wx/ui_command.hpp> 00053 #include <gui/widgets/wx/message_box.hpp> 00054 00055 #include <gui/objects/LoaderDescriptor.hpp> 00056 #include <gui/objutils/registry.hpp> 00057 #include <gui/core/assm_info.hpp> 00058 #include <gui/utils/extension_impl.hpp> 00059 #include <gui/utils/system_path.hpp> 00060 00061 #include <wx/menu.h> 00062 00063 #include <objmgr/object_manager.hpp> 00064 #include <objects/general/Object_id.hpp> 00065 #include <objects/seq/Seq_annot.hpp> 00066 #include <objects/seq/Annot_descr.hpp> 00067 #include <objects/seq/Annotdesc.hpp> 00068 #include <objects/seqres/Seq_graph.hpp> 00069 00070 #include <serial/objistr.hpp> 00071 #include <serial/objostr.hpp> 00072 #include <serial/serial.hpp> 00073 00074 #include <sra/data_loaders/bam/bamloader.hpp> 00075 #include <sra/readers/bam/bamgraph.hpp> 00076 #include <sra/readers/bam/bamread.hpp> 00077 #include <sra/readers/ncbi_traces_path.hpp> 00078 #include <objtools/readers/idmapper.hpp> 00079 00080 #include <sra/data_loaders/csra/csraloader.hpp> 00081 00082 #include <util/format_guess.hpp> 00083 #include <corelib/ncbi_system.hpp> 00084 #include <corelib/ncbiexec.hpp> 00085 00086 #include <sstream> 00087 00088 BEGIN_NCBI_SCOPE 00089 USING_SCOPE(objects); 00090 00091 static const char* kLoaderDataType_File = "BAM file"; 00092 static const char* kLoaderDataType_SRZ = "BAM SRZ"; 00093 static const char* kLoaderDataType_CSra_File = "CSRA file"; 00094 static const char* kBamLoadOption = "BAM/CSRA files"; 00095 static const char* kBamPath = "path"; 00096 static const char* kBamFile = "files"; 00097 static const char* kCSraFile = "file"; 00098 static const char* kBamIndexFile = "index"; 00099 static const char* kTargetAssm = "context"; 00100 static const char* kSRZAccession = "SRZ"; 00101 static const int kLoaderPriority = 98; 00102 00103 00104 /////////////////////////////////////////////////////////////////////////////// 00105 /// CBamLoadingJob 00106 class CBamLoadingJob : public CDataLoadingAppJob 00107 { 00108 public: 00109 struct SBamFile 00110 { 00111 string m_BamFile; 00112 string m_IndexFile; 00113 string m_CoverageGraph; 00114 }; 00115 00116 typedef vector<SBamFile> TBamFiles; 00117 typedef set<string> TCSraFiles; 00118 typedef set<string> TSrzAccs; 00119 00120 CBamLoadingJob(CProjectService* service); 00121 00122 void SetSrzAccs(const TSrzAccs& accs) { m_SrzAccs = accs; } 00123 void SetBamFiles(const TBamFiles& files) { m_BamFiles = files; } 00124 void SetCSraFiles(const TCSraFiles& files) { m_CSraFiles = files; } 00125 void SetSamtoolsPath(const string& samtools) { m_SamtoolsPath = samtools; } 00126 void SetGraphBinSize(int size) { m_BinSize = size; } 00127 00128 const string& GetTargetAssembly() const { return m_TargetAssm; } 00129 void SetTargetAssembly(const string& assm) { m_TargetAssm = assm; } 00130 00131 protected: 00132 virtual void x_CreateProjectItems(); 00133 00134 private: 00135 TLoaderRef x_CreateDataLoader(SBamFile& bam_input); 00136 00137 CRef<objects::CSeq_annot> x_CreateCoverageGraphs(const string& path, 00138 const string& bam_file, const string& index_file, 00139 const string& annot_name); 00140 00141 private: 00142 TSrzAccs m_SrzAccs; // SRZ accessions 00143 TBamFiles m_BamFiles; 00144 TCSraFiles m_CSraFiles; 00145 string m_TargetAssm; 00146 string m_SamtoolsPath; 00147 int m_BinSize; 00148 }; 00149 00150 00151 /////////////////////////////////////////////////////////////////////////////// 00152 /// CBamDSEvtHandler - wxEvtHandler-derived adapter for Bam data source. 00153 00154 class CBamDSEvtHandler : public wxEvtHandler 00155 { 00156 DECLARE_EVENT_TABLE(); 00157 public: 00158 CBamDSEvtHandler(IWorkbench* workbench) 00159 : m_Workbench(workbench) { 00160 } 00161 void OnLoadBamFile(wxCommandEvent& event) 00162 { 00163 if(m_Workbench) { 00164 COpenDlgTask* task = new COpenDlgTask(m_Workbench, kBamLoadOption); 00165 00166 CAppTaskService* task_srv = m_Workbench->GetServiceByType<CAppTaskService>(); 00167 task_srv->AddTask(*task); 00168 } 00169 } 00170 protected: 00171 IWorkbench* m_Workbench; 00172 }; 00173 00174 00175 BEGIN_EVENT_TABLE(CBamDSEvtHandler, wxEvtHandler) 00176 EVT_MENU(eCmdLoadBamFile, CBamDSEvtHandler::OnLoadBamFile) 00177 END_EVENT_TABLE(); 00178 00179 00180 /////////////////////////////////////////////////////////////////////////////// 00181 /// CBamUIDataSource 00182 00183 static const char* kBAM_DS_Icon = "icon::bam_data_source"; 00184 00185 CBamUIDataSource::CBamUIDataSource(CBamUIDataSourceType& type) 00186 : m_Type(&type), 00187 m_Descr("BAM", kBAM_DS_Icon), 00188 m_SrvLocator(NULL), 00189 m_Open(false) 00190 { 00191 } 00192 00193 CBamUIDataSource::~CBamUIDataSource() 00194 { 00195 } 00196 00197 00198 string CBamUIDataSource::GetExtensionIdentifier() const 00199 { 00200 return "bam_data_source"; 00201 } 00202 00203 00204 string CBamUIDataSource::GetExtensionLabel() const 00205 { 00206 return "Bam Data Source"; 00207 } 00208 00209 00210 void CBamUIDataSource::SetServiceLocator(IServiceLocator* locator) 00211 { 00212 m_SrvLocator = locator; 00213 } 00214 00215 00216 IExplorerItemCmdContributor::TContribution 00217 CBamUIDataSource::GetMenu(TItemRefVector& items, 00218 CAppExplorerService& app_service) 00219 { 00220 IExplorerItemCmdContributor::TContribution contrib; 00221 00222 /// this is not a good solution, but simple 00223 IWorkbench* workbench = dynamic_cast<IWorkbench*>(m_SrvLocator); 00224 00225 if(items.size() != 1 || workbench == NULL) { 00226 return contrib; // return empty object - nothin to contribute 00227 } 00228 00229 CUICommandRegistry& cmd_reg = CUICommandRegistry::GetInstance(); 00230 00231 CExplorerItem* item = items[0].GetPointer(); 00232 int type = app_service.GetItemType(*item); 00233 00234 if(type == CAppExplorerService::eDataSource) { 00235 CAppExplorerService::TDataSourceTreeItem* ds_item = 00236 dynamic_cast<CAppExplorerService::TDataSourceTreeItem*>(item); 00237 00238 if(ds_item) { 00239 CIRef<IUIDataSource> ds = ds_item->GetData(); 00240 CBamUIDataSource* bam_ds = dynamic_cast<CBamUIDataSource*>(ds.GetPointer()); 00241 00242 if(bam_ds) { 00243 wxMenu* menu = new wxMenu; 00244 menu->Append(wxID_SEPARATOR, wxT("Top Actions")); 00245 cmd_reg.AppendMenuItem(*menu, eCmdLoadBamFile); 00246 00247 contrib.first = menu; 00248 contrib.second = new CBamDSEvtHandler(workbench); 00249 } 00250 } 00251 } 00252 return contrib; 00253 } 00254 00255 00256 IUIDataSourceType& CBamUIDataSource::GetType() const 00257 { 00258 return *m_Type; 00259 } 00260 00261 00262 const IUIObject& CBamUIDataSource::GetDescr() 00263 { 00264 return m_Descr; 00265 } 00266 00267 00268 bool CBamUIDataSource::IsOpen() 00269 { 00270 return m_Open; 00271 } 00272 00273 //static const char* kCmdExtPoint = "scoped_objects::cmd_contributor"; 00274 static const char* kAppExpCmdExtPoint = "project_tree_view::context_menu::item_cmd_contributor"; 00275 00276 bool CBamUIDataSource::Open() 00277 { 00278 if (m_Open) { 00279 LOG_POST(Error << "CBamUIDataSource::Open(): " 00280 "attempt to open already open data source"); 00281 return false; 00282 } 00283 00284 CStopWatch sw; 00285 sw.Start(); 00286 00287 /// register itself as menu contributor 00288 CIRef<IExtensionRegistry> reg = CExtensionRegistry::GetInstance(); 00289 reg->AddExtension(kAppExpCmdExtPoint, *this); 00290 00291 m_Open = true; 00292 return true; 00293 } 00294 00295 00296 bool CBamUIDataSource::Close() 00297 { 00298 if (m_Open) { 00299 /// remove itself from menu contribution points 00300 CIRef<IExtensionRegistry> reg = CExtensionRegistry::GetInstance(); 00301 reg->RemoveExtension(kAppExpCmdExtPoint, *this); 00302 00303 m_Open = false; 00304 return true; 00305 } 00306 00307 return false; 00308 } 00309 00310 00311 void CBamUIDataSource::EditProperties() 00312 { 00313 //TODO 00314 } 00315 00316 00317 IUIToolManager* CBamUIDataSource::GetLoadManager() 00318 { 00319 // TODO may need to link the manager to this particular datasource 00320 return new CBamUILoadManager(); 00321 } 00322 00323 00324 string CBamUIDataSource::AddDataLoader(const objects::CUser_object& obj) 00325 { 00326 string loader_name = kEmptyStr; 00327 00328 // figure out the loader name and data from object 00329 if (obj.GetType().IsStr()) { 00330 const string& loader_type = obj.GetType().GetStr(); 00331 if (loader_type == kLoaderDataType_File) { 00332 string path, file, index, target_assm; 00333 if (x_GetBamInputs(obj, path, file, index, target_assm)) { 00334 // make sure the given BAM file exists 00335 if ( !CFile(path + file).Exists() ) { 00336 string msg = "Can't find the bam file: " + path + file; 00337 NCBI_THROW(CException, eUnknown, msg); 00338 } 00339 00340 if ( !CFile(path + index).Exists() ) { 00341 string msg = "Can't find the bam index file: " + path + index; 00342 NCBI_THROW(CException, eUnknown, msg); 00343 } 00344 00345 CBAMDataLoader::SLoaderParams params; 00346 params.m_DirPath = path; 00347 params.m_BamFiles.push_back(CBAMDataLoader::SBamFileName(file, index)); 00348 00349 if ( !target_assm.empty() ) { 00350 //params.m_IdMapper.reset(new CIdMapperBuiltin(target_assm, false)); 00351 string mapper_str; 00352 CAssemblyInfo::GetIdMapperString(target_assm, mapper_str); 00353 std::stringstream mapper_stream(mapper_str); 00354 params.m_IdMapper.reset(new CIdMapperConfig(mapper_stream, target_assm)); 00355 } 00356 CRef<objects::CObjectManager> obj_mgr = CObjectManager::GetInstance(); 00357 CBAMDataLoader::TRegisterLoaderInfo info = 00358 CBAMDataLoader::RegisterInObjectManager(*obj_mgr, params, 00359 CObjectManager::eNonDefault, kLoaderPriority); 00360 loader_name = info.GetLoader()->GetName(); 00361 } 00362 } else if (loader_type == kLoaderDataType_CSra_File) { 00363 string file; 00364 if (x_GetCSraInputs(obj, file)) { 00365 // make sure the given BAM file exists 00366 if ( !CFile(file).Exists() ) { 00367 string msg = "Can't find the CSRA file: " + file; 00368 NCBI_THROW(CException, eUnknown, msg); 00369 } 00370 00371 CRef<objects::CObjectManager> obj_mgr = CObjectManager::GetInstance(); 00372 CCSRADataLoader::TRegisterLoaderInfo info = 00373 CCSRADataLoader::RegisterInObjectManager 00374 (*obj_mgr, "", file, 00375 CObjectManager::eNonDefault, kLoaderPriority); 00376 loader_name = info.GetLoader()->GetName(); 00377 } 00378 } else if (loader_type == kLoaderDataType_SRZ) { 00379 string srz; 00380 if (x_GetSRZAccession(obj, srz)) { 00381 CRef<objects::CObjectManager> obj_mgr = CObjectManager::GetInstance(); 00382 try { 00383 CBAMDataLoader::TRegisterLoaderInfo info = 00384 CBAMDataLoader::RegisterInObjectManager(*obj_mgr, srz, 00385 CObjectManager::eNonDefault, kLoaderPriority); 00386 loader_name = info.GetLoader()->GetName(); 00387 } catch (CException&) { 00388 objects::CCSRADataLoader::TRegisterLoaderInfo info; 00389 info = objects::CCSRADataLoader::RegisterInObjectManager(*obj_mgr, srz, 00390 objects::CObjectManager::eNonDefault); 00391 loader_name = info.GetLoader()->GetName(); 00392 } 00393 } 00394 } 00395 } 00396 00397 return loader_name; 00398 } 00399 00400 00401 string CBamUIDataSource::GetLoaderName(const objects::CUser_object& obj) const 00402 { 00403 string loader_name; 00404 00405 if (obj.GetType().IsStr()) { 00406 const string& loader_type = obj.GetType().GetStr(); 00407 if (loader_type == kLoaderDataType_File) { 00408 string path, file, index, target_assm; 00409 if (x_GetBamInputs(obj, path, file, index, target_assm)) { 00410 vector<CBAMDataLoader::SBamFileName> bam_files; 00411 bam_files.push_back(CBAMDataLoader::SBamFileName(file, index)); 00412 loader_name = 00413 CBAMDataLoader::GetLoaderNameFromArgs(path, bam_files); 00414 } 00415 } else if (loader_type == kLoaderDataType_SRZ) { 00416 string srz; 00417 if (x_GetSRZAccession(obj, srz)) { 00418 loader_name = CBAMDataLoader::GetLoaderNameFromArgs(srz); 00419 } 00420 } 00421 } 00422 00423 return loader_name; 00424 } 00425 00426 00427 bool CBamUIDataSource::x_GetBamInputs(const objects::CUser_object& obj, 00428 string& path, string& file, 00429 string& idx, string& target_assm) const 00430 { 00431 CConstRef<CUser_field> field_path = obj.GetFieldRef(kBamPath); 00432 CConstRef<CUser_field> field_file = obj.GetFieldRef(kBamFile); 00433 CConstRef<CUser_field> field_index = obj.GetFieldRef(kBamIndexFile); 00434 CConstRef<CUser_field> field_target_assm = obj.GetFieldRef(kTargetAssm); 00435 00436 if (field_path && field_file && field_index && field_target_assm) { 00437 path = field_path->GetData().GetStr(); 00438 file = field_file->GetData().GetStr(); 00439 idx = field_index->GetData().GetStr(); 00440 target_assm = field_target_assm->GetData().GetStr(); 00441 00442 return true; 00443 } 00444 00445 return false; 00446 } 00447 00448 00449 bool CBamUIDataSource::x_GetSRZAccession(const objects::CUser_object& obj, 00450 string& srz) const 00451 { 00452 CConstRef<CUser_field> field_srz = obj.GetFieldRef(kSRZAccession); 00453 if (field_srz) { 00454 srz = field_srz->GetData().GetStr(); 00455 return true; 00456 } 00457 return false; 00458 } 00459 00460 00461 bool CBamUIDataSource::x_GetCSraInputs(const objects::CUser_object& obj, 00462 string& file) const 00463 { 00464 CConstRef<CUser_field> field_file = obj.GetFieldRef(kCSraFile); 00465 00466 if (field_file) { 00467 file = field_file->GetData().GetStr(); 00468 00469 return true; 00470 } 00471 00472 return false; 00473 } 00474 00475 00476 /////////////////////////////////////////////////////////////////////////////// 00477 /// CBamUIDataSourceType 00478 CBamUIDataSourceType::CBamUIDataSourceType() 00479 : m_Descr("Bam File data Loader", "") 00480 { 00481 wxFileArtProvider* provider = GetDefaultFileArtProvider(); 00482 provider->RegisterFileAlias(ToWxString(kBAM_DS_Icon), 00483 wxT("bam_data_source.png")); 00484 } 00485 00486 00487 const IUIObject& CBamUIDataSourceType::GetDescr() 00488 { 00489 return m_Descr; 00490 } 00491 00492 00493 IUIDataSource* CBamUIDataSourceType::CreateDataSource() 00494 { 00495 return new CBamUIDataSource(*this); 00496 } 00497 00498 00499 bool CBamUIDataSourceType::AutoCreateDefaultDataSource() 00500 { 00501 return true; // we want to create default "Bam" datasource 00502 } 00503 00504 00505 string CBamUIDataSourceType::GetExtensionIdentifier() const 00506 { 00507 static string ext_id("bam_data_source_type"); 00508 return ext_id; 00509 } 00510 00511 00512 string CBamUIDataSourceType::GetExtensionLabel() const 00513 { 00514 return m_Descr.GetLabel(); 00515 } 00516 00517 00518 00519 /////////////////////////////////////////////////////////////////////////////// 00520 /// CBamUILoadManager 00521 CBamUILoadManager::CBamUILoadManager() 00522 : m_SrvLocator(NULL), 00523 m_ParentWindow(NULL), 00524 m_Descriptor(kBamLoadOption, ""), 00525 m_State(eInvalid), 00526 m_AssmWizard(CGCAssemblySearchWizard::eIdMapping), 00527 m_OptionPanel(NULL), 00528 m_IndexingPanel(NULL), 00529 m_GraphPanel(NULL), 00530 m_ProjectSelPanel(NULL) 00531 { 00532 m_ProjectParams.m_EnableDecideLater = false; 00533 } 00534 00535 00536 void CBamUILoadManager::SetServiceLocator(IServiceLocator* srv_locator) 00537 { 00538 m_SrvLocator = srv_locator; 00539 } 00540 00541 00542 void CBamUILoadManager::SetParentWindow(wxWindow* parent) 00543 { 00544 m_ParentWindow = parent; 00545 m_AssmWizard.SetParentWindow(parent); 00546 } 00547 00548 00549 const IUIObject& CBamUILoadManager::GetDescriptor() const 00550 { 00551 return m_Descriptor; 00552 } 00553 00554 00555 void CBamUILoadManager::InitUI() 00556 { 00557 m_State = eSelectBam; 00558 m_AssmWizard.InitUI(); 00559 } 00560 00561 00562 void CBamUILoadManager::CleanUI() 00563 { 00564 m_State = eInvalid; 00565 if(m_OptionPanel) { 00566 m_SavedInput = m_OptionPanel->GetInput(); 00567 m_OptionPanel = NULL; // window is destroyed by the system 00568 } 00569 if (m_IndexingPanel) { 00570 m_SamtoolsPath = m_IndexingPanel->GetSamtoolsPath(); 00571 m_IndexingPanel = NULL; 00572 } 00573 m_GraphPanel = NULL; 00574 m_ProjectSelPanel = NULL; 00575 m_AssmWizard.CleanUI(); 00576 } 00577 00578 00579 wxPanel* CBamUILoadManager::GetCurrentPanel() 00580 { 00581 if (m_State == eSelectBam) { 00582 if (m_OptionPanel == NULL) { 00583 m_OptionPanel = new CBamLoadOptionPanel(m_ParentWindow); 00584 m_OptionPanel->SetInput(m_SavedInput); 00585 } 00586 return m_OptionPanel; 00587 } else if (m_State == eBamIndex) { 00588 if (m_IndexingPanel == NULL) { 00589 m_IndexingPanel = new CBamIndexingPanel(m_ParentWindow); 00590 m_IndexingPanel->SetSamtoolsPath(m_SamtoolsPath); 00591 m_IndexingPanel->SetBamFilePath(m_OptionPanel->GetBamFilePath()); 00592 } 00593 return m_IndexingPanel; 00594 } else if (m_State == eCoverageGraph) { 00595 if (m_GraphPanel == NULL) { 00596 m_GraphPanel = new CBamCoverageGraphPanel(m_ParentWindow); 00597 m_GraphPanel->SetBamFiles(m_OptionPanel->GetBamFiles()); 00598 } 00599 return m_GraphPanel; 00600 } else if (m_State == eIdMapping) { 00601 return m_AssmWizard.GetCurrentPanel(); 00602 } else if (m_State == eSelectProject) { 00603 if(m_ProjectSelPanel == NULL) { 00604 CIRef<CProjectService> srv = m_SrvLocator->GetServiceByType<CProjectService>(); 00605 m_ProjectSelPanel = new CProjectSelectorPanel(m_ParentWindow); 00606 m_ProjectSelPanel->SetProjectService(srv); 00607 m_ProjectSelPanel->SetParams(m_ProjectParams); 00608 m_ProjectSelPanel->TransferDataToWindow(); 00609 } 00610 return m_ProjectSelPanel; 00611 } 00612 return NULL; 00613 } 00614 00615 00616 bool CBamUILoadManager::CanDo(EAction action) 00617 { 00618 switch(m_State) { 00619 case eSelectBam: 00620 return action == eNext; 00621 case eBamIndex: 00622 case eCoverageGraph: 00623 case eIdMapping: 00624 case eSelectProject: 00625 return action == eBack || action == eNext; 00626 case eCompleted: 00627 return false; // nothing left to do 00628 default: 00629 _ASSERT(false); 00630 return false; 00631 } 00632 } 00633 00634 00635 bool CBamUILoadManager::IsFinalState() 00636 { 00637 return m_State == eSelectProject; 00638 } 00639 00640 00641 bool CBamUILoadManager::IsCompletedState() 00642 { 00643 return m_State == eCompleted; 00644 } 00645 00646 00647 bool CBamUILoadManager::DoTransition(EAction action, wxPanel* currPanel) 00648 { 00649 if (m_State == eSelectBam && action == eNext) { 00650 if (m_OptionPanel->IsInputValid()) { 00651 if (m_OptionPanel->AllSRZ()) { 00652 m_State = eSelectProject; 00653 return true; 00654 } else { 00655 if (m_OptionPanel->HasIndexFile()) { 00656 m_State = eCoverageGraph; 00657 } else { 00658 m_State = eBamIndex; 00659 } 00660 return true; 00661 } 00662 } 00663 return false; 00664 } else if (m_State == eBamIndex) { 00665 if (action == eBack) { 00666 m_State = eSelectBam; 00667 return true; 00668 } else if (action == eNext) { 00669 if (m_IndexingPanel->IsInputValid()) { 00670 m_State = eCoverageGraph; 00671 return true; 00672 } 00673 return false; 00674 } 00675 } else if (m_State == eCoverageGraph) { 00676 if (action == eBack) { 00677 if (m_OptionPanel->HasIndexFile()) { 00678 m_State = eSelectBam; 00679 } else { 00680 m_State = eBamIndex; 00681 } 00682 return true; 00683 } else if (action == eNext) { 00684 if (m_GraphPanel->IsInputValid()) { 00685 m_State = eIdMapping; 00686 return true; 00687 } 00688 return false; 00689 } 00690 } else if (m_State == eIdMapping) { 00691 if (action == eSkip) { 00692 m_State = eSelectProject; 00693 m_AssmAcc.clear(); 00694 return true; 00695 } else if (action == eBack) { 00696 if (m_AssmWizard.IsInitialState()) { 00697 m_State = eCoverageGraph; 00698 return true; 00699 } else { 00700 return m_AssmWizard.DoTransition(action, currPanel); 00701 } 00702 } else if (action == eNext) { 00703 if ( !m_AssmWizard.CanDo(action) ) { 00704 vector<string> assm_accs = m_AssmWizard.GetAssmAccs(); 00705 if ( !assm_accs.empty() ) { 00706 m_AssmAcc = assm_accs[0]; 00707 m_State = eSelectProject; 00708 return true; 00709 } 00710 return false; 00711 } else { 00712 return m_AssmWizard.DoTransition(action, currPanel); 00713 } 00714 } 00715 } else if (m_State == eSelectProject) { 00716 if (action == eBack) { 00717 if (m_OptionPanel->AllSRZ()) { 00718 m_State = eSelectBam; 00719 } else { 00720 m_State = eIdMapping; 00721 } 00722 return true; 00723 } else if (action == eNext) { 00724 if(m_ProjectSelPanel->TransferDataFromWindow()) { 00725 m_State = eCompleted; 00726 return true; 00727 } 00728 return false; 00729 } 00730 } 00731 _ASSERT(false); 00732 return false; 00733 } 00734 00735 00736 IAppTask* CBamUILoadManager::GetTask() 00737 { 00738 // create loading Job 00739 m_ProjectSelPanel->GetParams(m_ProjectParams); 00740 00741 CIRef<CProjectService> srv = m_SrvLocator->GetServiceByType<CProjectService>(); 00742 CBamLoadingJob* job = new CBamLoadingJob(srv.GetPointer()); 00743 // create a wrapper task 00744 auto_ptr<CDataLoadingAppTask> task(new CDataLoadingAppTask(*job)); 00745 task->SetReportErrors(false); // we handle errors ourselves 00746 00747 CDataLoadingOptions& options = job->GetOptions(); 00748 m_ProjectParams.ToLoadingOptions(options); 00749 00750 CBamLoadingJob::TBamFiles bam_files; 00751 ITERATE (CBamLoadOptionPanel::TBamFiles, iter, m_OptionPanel->GetBamFiles()) { 00752 CBamLoadingJob::SBamFile bam_input; 00753 bam_input.m_BamFile = *iter; 00754 if (CFile(bam_input.m_BamFile + ".bai").Exists()) { 00755 string bam_file, ext; 00756 CFile::SplitPath(bam_input.m_BamFile, NULL, &bam_file, &ext); 00757 bam_input.m_IndexFile = bam_file + ext + ".bai"; 00758 } 00759 if (m_GraphPanel) { 00760 bam_input.m_CoverageGraph = m_GraphPanel->GetGraphFile(*iter); 00761 } 00762 bam_files.push_back(bam_input); 00763 } 00764 00765 job->SetSrzAccs(m_OptionPanel->GetSrzAccs()); 00766 job->SetBamFiles(bam_files); 00767 job->SetCSraFiles(m_OptionPanel->GetCSraFiles()); 00768 if (m_IndexingPanel) { 00769 job->SetSamtoolsPath(m_IndexingPanel->GetSamtoolsPath()); 00770 } 00771 if (m_GraphPanel) { 00772 job->SetGraphBinSize(m_GraphPanel->GetGraphBinSize()); 00773 } 00774 00775 job->SetTargetAssembly(m_AssmAcc); 00776 00777 return task.release(); 00778 } 00779 00780 00781 void CBamUILoadManager::SetRegistryPath(const string& path) 00782 { 00783 m_RegPath = path; // store for later use 00784 m_AssmWizard.SetRegistryPath(path); 00785 } 00786 00787 00788 static const char* kBamInputTag = "BamInput"; 00789 static const char* kSamtoolsPathTag = "SamtoolsPath"; 00790 static const char* kProjectParamsTag = "ProjectParams"; 00791 00792 00793 void CBamUILoadManager::SaveSettings() const 00794 { 00795 if ( !m_RegPath.empty() ) { 00796 CGuiRegistry& gui_reg = CGuiRegistry::GetInstance(); 00797 CGuiRegistry::TReadWriteView view = gui_reg.GetReadWriteView(m_RegPath); 00798 00799 /// remember the selected Format (only if m_OptionPanel exists) 00800 if (m_OptionPanel) { 00801 m_SavedInput = m_OptionPanel->GetInput(); 00802 } 00803 if (m_SavedInput.size() < 10000) { 00804 // do not save large texts 00805 view.Set(kBamInputTag, NStr::URLEncode(m_SavedInput)); 00806 } 00807 00808 if (m_IndexingPanel) { 00809 m_SamtoolsPath = m_IndexingPanel->GetSamtoolsPath(); 00810 } 00811 00812 if ( !m_SamtoolsPath.empty() ) { 00813 view.Set(kSamtoolsPathTag, m_SamtoolsPath); 00814 } 00815 /// save Project Panel settings 00816 m_ProjectParams.SaveSettings(view, kProjectParamsTag); 00817 m_AssmWizard.SaveSettings(); 00818 } 00819 } 00820 00821 00822 void CBamUILoadManager::LoadSettings() 00823 { 00824 if( ! m_RegPath.empty()) { 00825 CGuiRegistry& gui_reg = CGuiRegistry::GetInstance(); 00826 CGuiRegistry::TReadView view = gui_reg.GetReadView(m_RegPath); 00827 00828 m_SavedInput = view.GetString(kBamInputTag, kEmptyStr); 00829 NStr::URLDecodeInPlace(m_SavedInput); 00830 00831 m_SamtoolsPath = view.GetString(kSamtoolsPathTag, kEmptyStr); 00832 00833 /// load Project Panel settings 00834 m_ProjectParams.LoadSettings(view, kProjectParamsTag); 00835 m_AssmWizard.LoadSettings(); 00836 } 00837 } 00838 00839 00840 /// CBamLoadingJob 00841 00842 CBamLoadingJob::CBamLoadingJob(CProjectService* service) 00843 : CDataLoadingAppJob(service) 00844 , m_BinSize(1000) 00845 { 00846 CFastMutexGuard lock(m_Mutex); 00847 m_Descr = "Loading BAM file"; 00848 } 00849 00850 inline string s_QuotedPath(const string& path) 00851 { 00852 #if defined __WXMSW__ 00853 return "\"" + path + "\""; 00854 #else 00855 return path; 00856 #endif 00857 } 00858 00859 string s_GetTempSortedBamFile(const string& sorted_file, int file_i) 00860 { 00861 string i_str = NStr::IntToString(file_i); 00862 if (i_str.length() < 4) { 00863 i_str.insert((size_t)0, size_t(4 - i_str.length()), '0'); 00864 } 00865 return sorted_file + "." + i_str + ".bam"; 00866 } 00867 00868 00869 00870 string s_GetAnnotName(const CSeq_annot& annot) 00871 { 00872 if (annot.IsSetDesc()) { 00873 ITERATE (CSeq_annot::TDesc::Tdata, iter, annot.GetDesc().Get()) { 00874 if ((*iter)->IsName()) { 00875 return (*iter)->GetName(); 00876 } 00877 } 00878 } 00879 00880 return ""; 00881 } 00882 00883 00884 CRef<CSeq_annot> s_LoadSeqAnnot(const string& file, const string& annot_name) 00885 { 00886 // Currently, only ASN.1, Binary ASN.1 or XML files are acceptible 00887 CNcbiIfstream istr(file.c_str(), ios::binary); 00888 ESerialDataFormat sfmt = eSerial_None; 00889 CFormatGuess fg; 00890 CFormatGuess::EFormat f = fg.Format(istr); 00891 switch (f) { 00892 case CFormatGuess::eBinaryASN: 00893 sfmt = eSerial_AsnBinary; 00894 break; 00895 case CFormatGuess::eTextASN: 00896 sfmt = eSerial_AsnText; 00897 break; 00898 case CFormatGuess::eXml: 00899 sfmt = eSerial_Xml; 00900 break; 00901 default: 00902 {{ 00903 string msg("File format could not be determined:\n"); 00904 msg += file; 00905 LOG_POST(Info << msg); 00906 }} 00907 break; 00908 } 00909 if (sfmt == eSerial_None) { 00910 string msg = "The graph file format is not supported!"; 00911 msg += "\nGraph file: " + file; 00912 NCBI_THROW(CException, eUnknown, msg); 00913 } 00914 00915 istr.seekg(0); 00916 auto_ptr<CObjectIStream> obj_str(CObjectIStream::Open(sfmt, istr)); 00917 CRef<CSeq_annot> annot(new CSeq_annot); 00918 try { 00919 // We only consider seq-annot 00920 *obj_str >> *annot; 00921 } catch (CException& e) { 00922 string msg = "Can't deserialize the graph file. A seq-annot is expected. "; 00923 msg += "Error: " + e.GetMsg(); 00924 msg += "\nGraph file: " + file; 00925 NCBI_THROW(CException, eUnknown, msg); 00926 } 00927 00928 if ( !annot->IsSetData() || !annot->IsGraph()) { 00929 string msg = "The input graph file doesn't cotain a graph!"; 00930 msg += "\nGraph file: " + file; 00931 NCBI_THROW(CException, eUnknown, msg); 00932 } 00933 00934 if ( s_GetAnnotName(*annot) != annot_name) { 00935 string msg = "The graph annotatin name (" + s_GetAnnotName(*annot); 00936 msg += ") needs to match with BAM file: " + annot_name; 00937 msg += "\nGraph file: " + file; 00938 NCBI_THROW(CException, eUnknown, msg); 00939 } 00940 00941 return annot; 00942 } 00943 00944 00945 void CBamLoadingJob::x_CreateProjectItems() 00946 { 00947 NON_CONST_ITERATE (TBamFiles, iter, m_BamFiles) { 00948 TLoaderRef bam_loader = x_CreateDataLoader(*iter); 00949 if (x_IsCanceled() ) { 00950 return; 00951 } 00952 00953 if ( !bam_loader ) continue; 00954 00955 // skip the one that doesn't have an index file 00956 if (iter->m_IndexFile.empty()) continue; 00957 00958 const string& bam_file = iter->m_BamFile; 00959 string path, file, ext; 00960 CFile::SplitPath(bam_file, &path, &file, &ext); 00961 CRef<CSeq_annot> annot; 00962 00963 LOG_POST(Info << "Create Project Items for BAM file: " << bam_file); 00964 00965 if ( !CFile(iter->m_CoverageGraph).Exists() ) { 00966 // use Bam2Graph app to create a coverage graph on the fly 00967 // prepare parameters 00968 x_SetStatusText( "Creating BAM coverage graphs..." ); 00969 annot = x_CreateCoverageGraphs(path, file + ext, iter->m_IndexFile, file); 00970 if ( annot && !iter->m_CoverageGraph.empty() ) { 00971 // save to a file 00972 auto_ptr<CObjectOStream> 00973 out(CObjectOStream::Open(eSerial_AsnBinary, iter->m_CoverageGraph)); 00974 *out << *annot; 00975 } 00976 } else { 00977 annot = s_LoadSeqAnnot(iter->m_CoverageGraph, file); 00978 } 00979 00980 if (!annot) { 00981 return; 00982 } 00983 00984 CRef<CProjectItem> item(new CProjectItem()); 00985 item->SetObject(*annot); 00986 string label = item->GetLabelByData(m_Scope.GetPointer()); 00987 if (label.empty()) { 00988 label = file; 00989 } 00990 00991 if (label.find("coverage") == string::npos) { 00992 label += " (coverage graph)"; 00993 } 00994 00995 item->SetLabel(label); 00996 m_Items.push_back(item); 00997 00998 m_Loaders.insert(TLoaders::value_type(item.GetPointer(), bam_loader)); 00999 } 01000 01001 // add data loader from SRZ accessions 01002 ITERATE (TSrzAccs, iter, m_SrzAccs) { 01003 if (x_IsCanceled()) { 01004 return; 01005 } 01006 01007 CRef<CLoaderDescriptor> loader(new CLoaderDescriptor()); 01008 CBamUIDataSourceType data_source_type; 01009 01010 loader->SetLabel(*iter); 01011 loader->SetPriority(kLoaderPriority); 01012 loader->SetLoader_type(data_source_type.GetExtensionIdentifier()); 01013 01014 CRef<CLoaderDescriptor::TData> bam_data(new CLoaderDescriptor::TData); 01015 bam_data->SetType().SetStr(kLoaderDataType_SRZ); 01016 bam_data->AddField(kSRZAccession, *iter); 01017 01018 loader->SetData(*bam_data); 01019 m_Loaders.insert(TLoaders::value_type( 01020 (const ncbi::objects::CProjectItem*)NULL, loader)); 01021 } 01022 01023 // add data loader from CSRA files 01024 ITERATE (TCSraFiles, iter, m_CSraFiles) { 01025 if (x_IsCanceled()) { 01026 return; 01027 } 01028 01029 CRef<CLoaderDescriptor> loader(new CLoaderDescriptor()); 01030 CBamUIDataSourceType data_source_type; 01031 01032 loader->SetLabel(*iter); 01033 loader->SetPriority(kLoaderPriority); 01034 loader->SetLoader_type(data_source_type.GetExtensionIdentifier()); 01035 01036 CRef<CLoaderDescriptor::TData> bam_data(new CLoaderDescriptor::TData); 01037 bam_data->SetType().SetStr(kLoaderDataType_CSra_File); 01038 bam_data->AddField(kCSraFile, *iter); 01039 01040 loader->SetData(*bam_data); 01041 m_Loaders.insert(TLoaders::value_type( 01042 (const ncbi::objects::CProjectItem*)NULL, loader)); 01043 } 01044 } 01045 01046 01047 CBamLoadingJob::TLoaderRef 01048 CBamLoadingJob::x_CreateDataLoader(CBamLoadingJob::SBamFile& bam_input) 01049 { 01050 TLoaderRef bam_loader; 01051 const string& bam_file = bam_input.m_BamFile; 01052 if (bam_file.empty()) { 01053 LOG_POST(Error << "CBamLoadingJob::x_CreateDataLoader(): " 01054 "Invalid (empty) bam file."); 01055 return bam_loader; 01056 } 01057 01058 // bam file 01059 string path, file_name, ext; 01060 CDirEntry::SplitPath(bam_file, &path, &file_name, &ext); 01061 01062 if (path.empty() || file_name.empty()) { 01063 return bam_loader; 01064 } 01065 01066 if (bam_input.m_IndexFile.empty()) { 01067 LOG_POST(Info << "Generating Bam index file for " << bam_file); 01068 x_SetStatusText("Generating Bam index file"); 01069 01070 // need to create an index file 01071 if (m_SamtoolsPath.empty()) { 01072 return bam_loader; 01073 } 01074 01075 string index_file = bam_file + ".bai"; 01076 01077 CExec::CResult result = CExec::SpawnL(CExec::eNoWait, 01078 m_SamtoolsPath.c_str(), "index", 01079 s_QuotedPath(bam_file).c_str(), 01080 s_QuotedPath(index_file).c_str(), NULL); 01081 01082 // wait here until it finishes 01083 CProcess proc(result.GetProcessHandle()); 01084 while(proc.IsAlive()) { 01085 if (x_IsCanceled() && proc.Kill()) { 01086 // need clean up the possible generated index file? 01087 CFile t_file(index_file); 01088 if (t_file.Exists()) { 01089 t_file.Remove(); 01090 } 01091 return bam_loader; 01092 } 01093 proc.Wait(500); 01094 } 01095 01096 // if failed, try sorting the Bam file, and do it again 01097 if ( !CFile(index_file).Exists() ) { 01098 LOG_POST(Warning << "Maybe the input Bam file is not sorted. " 01099 << "Try sorting it and do indexing again ..."); 01100 01101 x_SetStatusText("Sorting Bam file"); 01102 // sorting BAM file 01103 file_name += ".sorted"; 01104 string sorted_file = path + file_name; 01105 01106 // wait here until it finishes 01107 result = CExec::SpawnL(CExec::eNoWait, 01108 m_SamtoolsPath.c_str(), "sort", 01109 s_QuotedPath(bam_file).c_str(), 01110 s_QuotedPath(sorted_file).c_str(), NULL); 01111 CProcess sort_proc(result.GetProcessHandle()); 01112 01113 while(sort_proc.IsAlive()) { 01114 if (x_IsCanceled() && sort_proc.Kill()) { 01115 // need clean up the possible generated index file? 01116 int file_i = 0; 01117 string t_file = s_GetTempSortedBamFile(sorted_file, file_i); 01118 while (file_i < 10000 && CFile(t_file).Exists()) { 01119 CFile(t_file).Remove(); 01120 ++file_i; 01121 t_file = s_GetTempSortedBamFile(sorted_file, file_i); 01122 } 01123 return bam_loader; 01124 } 01125 sort_proc.Wait(2000); 01126 } 01127 01128 // samtools automatically append '.bam' to the given sorted file name. 01129 // so we need to adjust it 01130 sorted_file += ".bam"; 01131 if (CFile(sorted_file).Exists()) { 01132 x_SetStatusText("Generating Bam index file"); 01133 01134 // use the sorted Bam file instead of the original user-provided one 01135 index_file = sorted_file + ".bai"; 01136 result = CExec::SpawnL(CExec::eNoWait, 01137 m_SamtoolsPath.c_str(), "index", 01138 s_QuotedPath(sorted_file).c_str(), 01139 s_QuotedPath(index_file).c_str(), NULL); 01140 01141 // wait here until it finishes 01142 CProcess index_proc(result.GetProcessHandle()); 01143 while(index_proc.IsAlive()) { 01144 if (x_IsCanceled() && index_proc.Kill()) { 01145 // need clean up the possible generated index file 01146 CFile t_file(index_file); 01147 if (t_file.Exists()) { 01148 t_file.Remove(); 01149 } 01150 return bam_loader; 01151 } 01152 index_proc.Wait(500); 01153 } 01154 01155 if (CFile(index_file).Exists()) { 01156 bam_input.m_BamFile = sorted_file; 01157 } else { 01158 string msg = "Failed to generate index file for " + bam_file; 01159 NCBI_THROW(CException, eUnknown, msg); 01160 } 01161 } else { 01162 string msg = "Failed to sort the Bam file for " + bam_file; 01163 NCBI_THROW(CException, eUnknown, msg); 01164 } 01165 } 01166 01167 bam_input.m_IndexFile = file_name + ext + ".bai"; 01168 } 01169 01170 x_SetStatusText("Creating Bam data loader"); 01171 file_name += ext; 01172 bam_loader.Reset(new CLoaderDescriptor()); 01173 CBamUIDataSourceType data_source_type; 01174 01175 bam_loader->SetLabel(file_name); 01176 bam_loader->SetPriority(kLoaderPriority); 01177 bam_loader->SetLoader_type(data_source_type.GetExtensionIdentifier()); 01178 01179 CRef<CLoaderDescriptor::TData> bam_data(new CLoaderDescriptor::TData); 01180 bam_data->SetType().SetStr(kLoaderDataType_File); 01181 01182 bam_data->AddField(kBamFile, file_name); 01183 bam_data->AddField(kBamPath, path); 01184 bam_data->AddField(kBamIndexFile, file_name + ".bai"); 01185 bam_data->AddField(kTargetAssm, GetTargetAssembly()); 01186 01187 bam_loader->SetData(*bam_data); 01188 return bam_loader; 01189 } 01190 01191 01192 static 01193 CRef<CSeq_id> s_GetRefSeq_id(const string& id_str) 01194 { 01195 CRef<CSeq_id> seq_id; 01196 try { 01197 seq_id = new CSeq_id(id_str); 01198 } catch ( CException&) { 01199 // ignored 01200 } 01201 01202 if ( !seq_id && id_str.find('|') != NPOS ) { 01203 try { 01204 CBioseq::TId ids; 01205 CSeq_id::ParseIDs(ids, id_str); 01206 if ( !ids.empty() ) { 01207 seq_id = *ids.begin(); 01208 } 01209 } catch ( CException& /*ignored*/ ) { 01210 } 01211 } 01212 01213 if ( !seq_id || (seq_id->IsGi() && seq_id->GetGi() < 1000) ) { 01214 seq_id = new CSeq_id(CSeq_id::e_Local, id_str); 01215 } 01216 return seq_id; 01217 } 01218 01219 01220 CRef<CSeq_annot> 01221 CBamLoadingJob::x_CreateCoverageGraphs(const string& path, 01222 const string& bam_file, 01223 const string& index_file, 01224 const string& annot_name) 01225 { 01226 CRef<CSeq_annot> annot; 01227 CBamMgr mgr; 01228 01229 // Get a list reference sequence ids 01230 vector<string> ref_ids; 01231 CBamDb db(mgr, path + bam_file, path + index_file); 01232 for (CBamRefSeqIterator sit(db); sit; ++sit) { 01233 if (x_IsCanceled()) { 01234 return annot; 01235 } 01236 ref_ids.push_back(sit.GetRefSeqId()); 01237 } 01238 01239 int i = 0; 01240 bool need_id_mapping = false; 01241 ITERATE (vector<string>, ref_it, ref_ids) { 01242 if (x_IsCanceled()) { 01243 annot.Reset(NULL); 01244 break; 01245 } 01246 string msg = "Creating BAM coverage graphs " + 01247 NStr::IntToString(i++) + "/" + 01248 NStr::SizetToString(ref_ids.size()) + " ..."; 01249 x_SetStatusText(msg); 01250 01251 CBam2Seq_graph cvt; 01252 cvt.SetRefLabel(*ref_it); 01253 CRef<CSeq_id> seq_id = s_GetRefSeq_id(*ref_it); 01254 if (seq_id->IsLocal()) { 01255 need_id_mapping = true; 01256 } 01257 cvt.SetRefId(*seq_id); 01258 cvt.SetAnnotName(annot_name); 01259 cvt.SetGraphTitle(annot_name + " coverage graph"); 01260 cvt.SetGraphValueType(cvt.eGraphValueTyps_int); 01261 cvt.SetGraphBinSize(m_BinSize); 01262 //cvt.SetOutlierMax(args["max"].AsDouble()); 01263 CRef<CSeq_annot> annot_t = cvt.MakeSeq_annot(db, path + bam_file); 01264 01265 if (annot_t) { 01266 if (annot) { 01267 // move all graphs to one annotation since 01268 // they all share the same name and title 01269 CSeq_annot::TData::TGraph& target = annot->SetData().SetGraph(); 01270 CSeq_annot::TData::TGraph& src = annot_t->SetData().SetGraph(); 01271 std::copy(src.begin(), src.end(), back_inserter(target)); 01272 } else { 01273 annot = annot_t; 01274 } 01275 } 01276 } 01277 01278 if (annot) { 01279 annot->SetTitleDesc(annot_name + " coverage graph"); 01280 if (need_id_mapping && !m_TargetAssm.empty() ) { 01281 //CIdMapperBuiltin mapper(m_TargetAssm); 01282 string mapper_str; 01283 CAssemblyInfo::GetIdMapperString(m_TargetAssm, mapper_str); 01284 std::stringstream mapper_stream(mapper_str); 01285 CIdMapperConfig mapper(mapper_stream, m_TargetAssm); 01286 mapper.MapObject(*annot); 01287 } 01288 } 01289 01290 return annot; 01291 } 01292 01293 01294 END_NCBI_SCOPE
1.7.5.1
Modified on Wed May 23 13:05:10 2012 by modify_doxy.py rev. 337098