From 966e4d41228c5dfd8fc1e1bf172406741d2e4a05 Mon Sep 17 00:00:00 2001 From: Stephen Smith Date: Mon, 21 Jan 2013 14:52:37 -0500 Subject: [PATCH 01/10] cleanup --- PHLAWDGU/PHLAWDGU.pro | 18 ---- PHLAWDGU/PHLAWDGU.pro.user | 167 ------------------------------------- PHLAWDGU/main.cpp | 11 --- PHLAWDGU/mainwindow.cpp | 57 ------------- PHLAWDGU/mainwindow.h | 28 ------- PHLAWDGU/mainwindow.ui | 135 ------------------------------ 6 files changed, 416 deletions(-) delete mode 100644 PHLAWDGU/PHLAWDGU.pro delete mode 100644 PHLAWDGU/PHLAWDGU.pro.user delete mode 100644 PHLAWDGU/main.cpp delete mode 100644 PHLAWDGU/mainwindow.cpp delete mode 100644 PHLAWDGU/mainwindow.h delete mode 100644 PHLAWDGU/mainwindow.ui diff --git a/PHLAWDGU/PHLAWDGU.pro b/PHLAWDGU/PHLAWDGU.pro deleted file mode 100644 index bcc4a74..0000000 --- a/PHLAWDGU/PHLAWDGU.pro +++ /dev/null @@ -1,18 +0,0 @@ -#------------------------------------------------- -# -# Project created by QtCreator 2011-04-21T10:55:41 -# -#------------------------------------------------- - -QT += core gui sql - -TARGET = PHLAWDGU -TEMPLATE = app - - -SOURCES += main.cpp\ - mainwindow.cpp - -HEADERS += mainwindow.h - -FORMS += mainwindow.ui diff --git a/PHLAWDGU/PHLAWDGU.pro.user b/PHLAWDGU/PHLAWDGU.pro.user deleted file mode 100644 index 3c96a2d..0000000 --- a/PHLAWDGU/PHLAWDGU.pro.user +++ /dev/null @@ -1,167 +0,0 @@ - - - - ProjectExplorer.Project.ActiveTarget - 0 - - - ProjectExplorer.Project.EditorSettings - - Default - - - - ProjectExplorer.Project.Target.0 - - Desktop - - Qt4ProjectManager.Target.DesktopTarget - 0 - 0 - 0 - - - - qmake - - QtProjectManager.QMakeBuildStep - - false - - - Make - - Qt4ProjectManager.MakeStep - false - - - - 2 - Build - - ProjectExplorer.BuildSteps.Build - - - - Make - - Qt4ProjectManager.MakeStep - true - - clean - - - - 1 - Clean - - ProjectExplorer.BuildSteps.Clean - - 2 - false - - Debug - - Qt4ProjectManager.Qt4BuildConfiguration - 2 - /home/smitty/Dropbox/programming/cpp/PHLAWD/PHLAWDGU-build-desktop - 2 - 0 - true - - - - - qmake - - QtProjectManager.QMakeBuildStep - - false - - - Make - - Qt4ProjectManager.MakeStep - false - - - - 2 - Build - - ProjectExplorer.BuildSteps.Build - - - - Make - - Qt4ProjectManager.MakeStep - true - - clean - - - - 1 - Clean - - ProjectExplorer.BuildSteps.Clean - - 2 - false - - Release - - Qt4ProjectManager.Qt4BuildConfiguration - 0 - /home/smitty/Dropbox/programming/cpp/PHLAWD/PHLAWDGU-build-desktop - 2 - 0 - true - - 2 - - - 0 - Deploy - - ProjectExplorer.BuildSteps.Deploy - - 1 - No deployment - - ProjectExplorer.DefaultDeployConfiguration - - 1 - - PHLAWDGU - - Qt4ProjectManager.Qt4RunConfiguration - 2 - - PHLAWDGU.pro - false - false - - false - - 3768 - true - false - - 1 - - - - ProjectExplorer.Project.TargetCount - 1 - - - ProjectExplorer.Project.Updater.EnvironmentId - {2691950f-6e7f-4094-9195-6fc2a593cdfc} - - - ProjectExplorer.Project.Updater.FileVersion - 8 - - diff --git a/PHLAWDGU/main.cpp b/PHLAWDGU/main.cpp deleted file mode 100644 index 9ae175b..0000000 --- a/PHLAWDGU/main.cpp +++ /dev/null @@ -1,11 +0,0 @@ -#include -#include "mainwindow.h" - -int main(int argc, char *argv[]) -{ - QApplication a(argc, argv); - MainWindow w; - w.show(); - - return a.exec(); -} diff --git a/PHLAWDGU/mainwindow.cpp b/PHLAWDGU/mainwindow.cpp deleted file mode 100644 index 961b618..0000000 --- a/PHLAWDGU/mainwindow.cpp +++ /dev/null @@ -1,57 +0,0 @@ -#include "mainwindow.h" -#include "ui_mainwindow.h" -#include -#include -using namespace std; - -#include -#include -#include - -MainWindow::MainWindow(QWidget *parent) : QMainWindow(parent),ui(new Ui::MainWindow){ - ui->setupUi(this); -} - -MainWindow::~MainWindow(){ - delete ui; -} - -/* - could allow for multiple connections but not doing that for now - */ -void MainWindow::load_sequencesqlite(){ - if (sdb.connectionName().length() != 0){ - cout << "existing connection" << endl; - QSqlDatabase::database(sdb.databaseName()).close(); - QSqlDatabase::removeDatabase(sdb.databaseName()); - } - QString filename = QFileDialog::getOpenFileName( this, tr("Open Document"), QDir::currentPath(), tr("Database files (*.db *.sql *.sqlite);;All files (*.*)"), 0, QFileDialog::DontUseNativeDialog ); - cout << filename.toStdString() << endl; - sdb = QSqlDatabase::addDatabase("QSQLITE"); - sdb.setDatabaseName(filename); - if (!sdb.open()) - cout << "open failed" << endl; - else{ - ui->statusBar->showMessage("sequence database opened"); - ui->actionDB_stats->setEnabled(true); - ui->actionDB_stats->setText("get stats: "+filename); - ui->statusBar->showMessage("current: "+filename); - } - QSqlQuery q; - cout << q.exec("SELECT Count(*) FROM sequence;") << endl; - if (q.next()){ - QString numvalues (q.value(0).toString()); - cout << q.value(0).toString().toStdString() << endl; - ui->textBrowser->setText("Number of sequences: "+numvalues); - } -} - -void MainWindow::get_sequencedb_stats(){ - QSqlQuery q; - - cout << q.exec("SELECT Count(*) FROM sequence;") << endl; - if (q.next()){ - cout << q.value(0).toString().toStdString()<< endl; - } - ui->textBrowser->setText(""); -} diff --git a/PHLAWDGU/mainwindow.h b/PHLAWDGU/mainwindow.h deleted file mode 100644 index 2a42cc0..0000000 --- a/PHLAWDGU/mainwindow.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef MAINWINDOW_H -#define MAINWINDOW_H - -#include -#include - -namespace Ui { - class MainWindow; -} - -class MainWindow : public QMainWindow -{ - Q_OBJECT - -public: - explicit MainWindow(QWidget *parent = 0); - ~MainWindow(); - -private: - Ui::MainWindow *ui; - QSqlDatabase sdb; - -private slots: - void load_sequencesqlite(); - void get_sequencedb_stats(); -}; - -#endif // MAINWINDOW_H diff --git a/PHLAWDGU/mainwindow.ui b/PHLAWDGU/mainwindow.ui deleted file mode 100644 index 23c3f81..0000000 --- a/PHLAWDGU/mainwindow.ui +++ /dev/null @@ -1,135 +0,0 @@ - - - MainWindow - - - - 0 - 0 - 400 - 300 - - - - MainWindow - - - - - - 0 - 0 - 401 - 261 - - - - - - - - 0 - 0 - 400 - 23 - - - - - Load - - - - - - - - Get - - - - - - - - - - Sequence DB - - - - - Alignments DB - - - - - Close - - - - - false - - - DB stats - - - - - - - - actionSequence_DB - triggered() - MainWindow - load_sequencesqlite() - - - -1 - -1 - - - 199 - 149 - - - - - actionDB_stats - triggered() - MainWindow - get_sequencedb_stats() - - - -1 - -1 - - - 199 - 149 - - - - - actionClose - triggered() - MainWindow - close() - - - -1 - -1 - - - 199 - 149 - - - - - - load_sequencesqlite() - get_sequencedb_stats() - - From 27e3f927ab893e4962b1c6c56fcdc0fcbcc8e6ab Mon Sep 17 00:00:00 2001 From: Jonathan Chang Date: Tue, 10 Dec 2013 18:54:29 -0800 Subject: [PATCH 02/10] Hack to include daily updates. This doesn't do any filtering by division, unlike the genbank releases. --- src/SQLiteDBController.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/SQLiteDBController.cpp b/src/SQLiteDBController.cpp index 08cdfd7..6e23bb3 100644 --- a/src/SQLiteDBController.cpp +++ b/src/SQLiteDBController.cpp @@ -393,6 +393,17 @@ void SQLiteDBController::load_seqs(string div, string ref, bool downl) { } } + // download daily updates + if (downl == true) { + string fnameString = "nc*.flat.gz"; + string cmd = "wget -nv ftp://ftp.ncbi.nih.gov/genbank/daily-nc/" + fnameString; + cout << "downloading dailies with wget" << endl; + system(cmd.c_str()); + cmd = "gunzip -d " + fnameString; + cout << "uncompressing dailies" << endl; + system(cmd.c_str()); + } + // get the names of the files to use vector file_names; cout << "getting file names for gb flat files" << endl; @@ -401,6 +412,8 @@ void SQLiteDBController::load_seqs(string div, string ref, bool downl) { for (int j = 0; j < groups.size(); j++) { if (file_names[i].find("gb" + groups[j]) != string::npos && file_names[i].substr(file_names[i].size() - 4, 4) == ".seq") { filesToProcess.push_back(file_names[i]); + } else if (file_names[i].find("nc") != string::npos && file_names[i].substr(file_names[i].size() - 5, 5) == ".flat") { + filesToProcess.push_back(file_names[i]); } } } From 05ecf78bcac627f2b782a8ea92dd5b6c7caff450 Mon Sep 17 00:00:00 2001 From: Jonathan Chang Date: Tue, 2 Dec 2014 17:39:43 -0800 Subject: [PATCH 03/10] Fix overflow in genome shrink This was causing infinite loops during certain large alignments. `std::string::size` returns a `size_t`, which is 8 bytes wide on 64-bit Linux but `int` is only 4 bytes wide (and signed, to boot). --- src/SQLiteConstructor.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/SQLiteConstructor.cpp b/src/SQLiteConstructor.cpp index 4b3addb..f1505b7 100644 --- a/src/SQLiteConstructor.cpp +++ b/src/SQLiteConstructor.cpp @@ -1655,12 +1655,12 @@ void SQLiteConstructor::clean_shrunken_genomes() { string trimmed_genomes = genefoldername + "genome_shrink_aln"; fu.read_aligned_fasta_into(tempalseqs, trimmed_genomes); // assuming this is aligned - int seqlength = tempalseqs[0].get_sequence().size(); + size_t seqlength = tempalseqs[0].get_sequence().size(); float fseql = float(tempalseqs.size()); - vector removeem; - for (int j = 0; j < seqlength; j++) { - int gaps = 0; - for (int i = 0; i < tempalseqs.size(); i++) { + vector removeem; + for (size_t j = 0; j < seqlength; j++) { + size_t gaps = 0; + for (size_t i = 0; i < tempalseqs.size(); i++) { if (tempalseqs[i].get_sequence()[j] == '-' || tempalseqs[i].get_sequence()[j] == 'N' || tempalseqs[i].get_sequence()[j] == 'n') gaps += 1; } @@ -1669,9 +1669,9 @@ void SQLiteConstructor::clean_shrunken_genomes() { removeem.push_back(j); } } - for (int i = 0; i < tempalseqs.size(); i++) { + for (size_t i = 0; i < tempalseqs.size(); i++) { string a; - for (int j = 0; j < seqlength; j++) { + for (size_t j = 0; j < seqlength; j++) { if (count(removeem.begin(), removeem.end(), j) == 0) a += tempalseqs[i].get_sequence()[j]; } From 70a784b91b61933779836d691e464d69c389fbd3 Mon Sep 17 00:00:00 2001 From: Jonathan Chang Date: Thu, 27 Apr 2017 17:51:24 -0700 Subject: [PATCH 04/10] use appropriate TEXT type instead of VARCHAR --- src/SQLiteDBController.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/SQLiteDBController.cpp b/src/SQLiteDBController.cpp index 6e23bb3..fea160c 100644 --- a/src/SQLiteDBController.cpp +++ b/src/SQLiteDBController.cpp @@ -106,7 +106,7 @@ bool SQLiteDBController::initiate() { Database conn(db_name); Query query(conn); query.get_result( - "create table taxonomy (id INTEGER PRIMARY KEY,ncbi_id INTEGER,name VARCHAR(255),name_class VARCHAR(32),node_rank VARCHAR(32),parent_ncbi_id INTEGER,edited_name VARCHAR(255),left_value INTEGER,right_value INTEGER);"); + "create table taxonomy (id INTEGER PRIMARY KEY,ncbi_id INTEGER,name TEXT,name_class TEXT,node_rank TEXT,parent_ncbi_id INTEGER,edited_name TEXT,left_value INTEGER,right_value INTEGER);"); query.free_result(); query.get_result("CREATE INDEX taxonomy_left_value on taxonomy(left_value);"); query.free_result(); @@ -122,7 +122,7 @@ bool SQLiteDBController::initiate() { query.free_result(); query.get_result( - "create table sequence (id INTEGER PRIMARY KEY,ncbi_id INTEGER,accession_id VARCHAR(128),identifier VARCHAR(40),description TEXT,seq LONGTEXT);"); + "create table sequence (id INTEGER PRIMARY KEY,ncbi_id INTEGER,accession_id TEXT,identifier TEXT,description TEXT,seq LONGTEXT);"); query.free_result(); query.get_result("CREATE INDEX sequence_ncbi_id on sequence(ncbi_id);"); query.free_result(); @@ -131,7 +131,7 @@ bool SQLiteDBController::initiate() { query.get_result("CREATE INDEX sequence_identifier on sequence(identifier);"); query.free_result(); - query.get_result("create table information (id INTEGER PRIMARY KEY, name VARCHAR(128), value VARCHAR(128));"); + query.get_result("create table information (id INTEGER PRIMARY KEY, name TEXT, value TEXT);"); query.free_result(); return ret; } From dc89f6509c6209cd5a7279721eb0be9a20bd22c2 Mon Sep 17 00:00:00 2001 From: Jonathan Chang Date: Thu, 27 Apr 2017 17:51:43 -0700 Subject: [PATCH 05/10] NCBI no longer uses genbank IDs, prefer accession.version --- src/GenBankReader.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/GenBankReader.cpp b/src/GenBankReader.cpp index 4340cfc..3ab032c 100644 --- a/src/GenBankReader.cpp +++ b/src/GenBankReader.cpp @@ -69,11 +69,7 @@ void GenBankReader::parse_file(string fl, string db_name){ continue; } if(tokens[0] == "VERSION"){ - gin = tokens[2]; - vector t2; - string del2(":"); - Tokenize(tokens[2],t2,del2); - gin = t2[1]; + gin = tokens[1]; continue; } if(tokens[0].find("/db_xref=\"taxon:")!= string::npos){ From 2b9479e59da5af1c7c481daa0b63f6edff59e839 Mon Sep 17 00:00:00 2001 From: Jonathan Chang Date: Thu, 27 Apr 2017 18:16:12 -0700 Subject: [PATCH 06/10] update README --- README | 39 ++++++--------------------------------- 1 file changed, 6 insertions(+), 33 deletions(-) diff --git a/README b/README index 33dcb94..5792175 100644 --- a/README +++ b/README @@ -1,36 +1,9 @@ -## Follow these commands to install on Mac +PHLAWD is available via Homebrew. -## Install homebrew for compilers and dependencies -ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" -brew tap homebrew/versions -brew tap homebrew/science +1. Install homebrew (if you haven't already) at https://brew.sh -## Get gcc with support for openmp (this can take a while) -brew install gcc --without-multilib - -## Set gcc as default compiler (rather than clang), and build things from source -## To make this permanent, put in ~/.bashrc -export HOMEBREW_CC=gcc-5 -export HOMEBREW_CXX=g++-5 -export HOMEBREW_BUILD_FROM_SOURCE=TRUE - -## Check -brew --env - -## Some general dependencies -brew install automake -brew install autoconf -brew install libtool - -## PHLAWD dependencies -brew install muscle -brew install quicktree -brew install mafft - -## Compile PHLAWD -cd phlawd/src -autoreconf -fi -./configure CC=gcc-5 CXX=g++-5 -make -sudo make install +2. Tap the Homebrew/science tap + brew tap homebrew/science +3. Download, compile, and install PHLAWD and its dependencies + brew install phlawd From ed00041a8b2674a595f6f73a785c412c680dafee Mon Sep 17 00:00:00 2001 From: Jonathan Chang Date: Thu, 27 Apr 2017 18:21:57 -0700 Subject: [PATCH 07/10] Make it clear that this version is unofficial --- src/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.cpp b/src/main.cpp index afe441b..89d312f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -42,7 +42,7 @@ using namespace std; int main(int argc, char* argv[]) { if(argc != 3) { - cout << "PHLAWD 3.4a" << endl; + cout << "PHLAWD 3.4a (UNOFFICIAL FORK)" << endl; cout << "you need more arguments." << endl; cout << "usage: PHLAWD task configfile" << endl; cout << "possible tasks include:" << endl; From 2063a9095df35412615112f3280347731b410dbe Mon Sep 17 00:00:00 2001 From: Jonathan Chang Date: Thu, 27 Apr 2017 18:40:10 -0700 Subject: [PATCH 08/10] update README --- README | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/README b/README index 5792175..3c820c7 100644 --- a/README +++ b/README @@ -1,9 +1,35 @@ -PHLAWD is available via Homebrew. +PHLAWD +====== -1. Install homebrew (if you haven't already) at https://brew.sh +There are several verisons of phlawd floating around: -2. Tap the Homebrew/science tap - brew tap homebrew/science +* The official version of phlawd at https://github.com/blackrim/phlawd + entered maintenance mode Sep 2012 and has not had its code updated + since then. -3. Download, compile, and install PHLAWD and its dependencies - brew install phlawd +* The "chinchliff" fork of phlawd was last updated in Mar 2016 and + contains a number of bug fixes and features, including the ability + to use whole genome sequences and "shrinking" to the target gene. + +This is an unofficial fork, which updates the code for the GenBank ID change +(https://www.ncbi.nlm.nih.gov/news/03-02-2016-phase-out-of-GI-numbers/) +and downloads GenBank daily updates in addition to the regular releases. + +Requirements +------------ + +phlawd requires the GCC compiler, as well as wget, mafft, muscle, +quicktree, and sqlite to be in the user's path. These can be installed via +Homebrew (https://brew.sh) on macOS or Linuxbrew (https://linuxbrew.sh). + +Installation +------------ + +```sh +git clone https://github.com/jonchang/phlawd.git +cd phlawd/ +cd src/ +./configure +make +install PHLAWD /usr/local/bin # or wherever... +``` From ce255e075bb6c05c7856a195fe178fd25eb46119 Mon Sep 17 00:00:00 2001 From: Jonathan Chang Date: Tue, 9 May 2017 14:18:40 -0700 Subject: [PATCH 09/10] Update README --- README | 2 -- 1 file changed, 2 deletions(-) diff --git a/README b/README index 3c820c7..9cd3006 100644 --- a/README +++ b/README @@ -25,11 +25,9 @@ Homebrew (https://brew.sh) on macOS or Linuxbrew (https://linuxbrew.sh). Installation ------------ -```sh git clone https://github.com/jonchang/phlawd.git cd phlawd/ cd src/ ./configure make install PHLAWD /usr/local/bin # or wherever... -``` From f86752341ce944ea9350239ea8053e92e2a3d1f5 Mon Sep 17 00:00:00 2001 From: Jonathan Chang Date: Tue, 1 Aug 2017 17:35:17 -0700 Subject: [PATCH 10/10] bump version for homebrew --- src/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.cpp b/src/main.cpp index 89d312f..2748e46 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -42,7 +42,7 @@ using namespace std; int main(int argc, char* argv[]) { if(argc != 3) { - cout << "PHLAWD 3.4a (UNOFFICIAL FORK)" << endl; + cout << "PHLAWD 3.4b (UNOFFICIAL FORK)" << endl; cout << "you need more arguments." << endl; cout << "usage: PHLAWD task configfile" << endl; cout << "possible tasks include:" << endl;