diff --git a/PHLAWDGU/PHLAWDGU.pro b/PHLAWDGU/PHLAWDGU.pro
deleted file mode 100644
index bcc4a74..0000000
--- a/PHLAWDGU/PHLAWDGU.pro
+++ /dev/null
@@ -1,18 +0,0 @@
-#-------------------------------------------------
-#
-# Project created by QtCreator 2011-04-21T10:55:41
-#
-#-------------------------------------------------
-
-QT += core gui sql
-
-TARGET = PHLAWDGU
-TEMPLATE = app
-
-
-SOURCES += main.cpp\
- mainwindow.cpp
-
-HEADERS += mainwindow.h
-
-FORMS += mainwindow.ui
diff --git a/PHLAWDGU/PHLAWDGU.pro.user b/PHLAWDGU/PHLAWDGU.pro.user
deleted file mode 100644
index 3c96a2d..0000000
--- a/PHLAWDGU/PHLAWDGU.pro.user
+++ /dev/null
@@ -1,167 +0,0 @@
-
-
-
- ProjectExplorer.Project.ActiveTarget
- 0
-
-
- ProjectExplorer.Project.EditorSettings
-
- Default
-
-
-
- ProjectExplorer.Project.Target.0
-
- Desktop
-
- Qt4ProjectManager.Target.DesktopTarget
- 0
- 0
- 0
-
-
-
- qmake
-
- QtProjectManager.QMakeBuildStep
-
- false
-
-
- Make
-
- Qt4ProjectManager.MakeStep
- false
-
-
-
- 2
- Build
-
- ProjectExplorer.BuildSteps.Build
-
-
-
- Make
-
- Qt4ProjectManager.MakeStep
- true
-
- clean
-
-
-
- 1
- Clean
-
- ProjectExplorer.BuildSteps.Clean
-
- 2
- false
-
- Debug
-
- Qt4ProjectManager.Qt4BuildConfiguration
- 2
- /home/smitty/Dropbox/programming/cpp/PHLAWD/PHLAWDGU-build-desktop
- 2
- 0
- true
-
-
-
-
- qmake
-
- QtProjectManager.QMakeBuildStep
-
- false
-
-
- Make
-
- Qt4ProjectManager.MakeStep
- false
-
-
-
- 2
- Build
-
- ProjectExplorer.BuildSteps.Build
-
-
-
- Make
-
- Qt4ProjectManager.MakeStep
- true
-
- clean
-
-
-
- 1
- Clean
-
- ProjectExplorer.BuildSteps.Clean
-
- 2
- false
-
- Release
-
- Qt4ProjectManager.Qt4BuildConfiguration
- 0
- /home/smitty/Dropbox/programming/cpp/PHLAWD/PHLAWDGU-build-desktop
- 2
- 0
- true
-
- 2
-
-
- 0
- Deploy
-
- ProjectExplorer.BuildSteps.Deploy
-
- 1
- No deployment
-
- ProjectExplorer.DefaultDeployConfiguration
-
- 1
-
- PHLAWDGU
-
- Qt4ProjectManager.Qt4RunConfiguration
- 2
-
- PHLAWDGU.pro
- false
- false
-
- false
-
- 3768
- true
- false
-
- 1
-
-
-
- ProjectExplorer.Project.TargetCount
- 1
-
-
- ProjectExplorer.Project.Updater.EnvironmentId
- {2691950f-6e7f-4094-9195-6fc2a593cdfc}
-
-
- ProjectExplorer.Project.Updater.FileVersion
- 8
-
-
diff --git a/PHLAWDGU/main.cpp b/PHLAWDGU/main.cpp
deleted file mode 100644
index 9ae175b..0000000
--- a/PHLAWDGU/main.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-#include
-#include "mainwindow.h"
-
-int main(int argc, char *argv[])
-{
- QApplication a(argc, argv);
- MainWindow w;
- w.show();
-
- return a.exec();
-}
diff --git a/PHLAWDGU/mainwindow.cpp b/PHLAWDGU/mainwindow.cpp
deleted file mode 100644
index 961b618..0000000
--- a/PHLAWDGU/mainwindow.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-#include "mainwindow.h"
-#include "ui_mainwindow.h"
-#include
-#include
-using namespace std;
-
-#include
-#include
-#include
-
-MainWindow::MainWindow(QWidget *parent) : QMainWindow(parent),ui(new Ui::MainWindow){
- ui->setupUi(this);
-}
-
-MainWindow::~MainWindow(){
- delete ui;
-}
-
-/*
- could allow for multiple connections but not doing that for now
- */
-void MainWindow::load_sequencesqlite(){
- if (sdb.connectionName().length() != 0){
- cout << "existing connection" << endl;
- QSqlDatabase::database(sdb.databaseName()).close();
- QSqlDatabase::removeDatabase(sdb.databaseName());
- }
- QString filename = QFileDialog::getOpenFileName( this, tr("Open Document"), QDir::currentPath(), tr("Database files (*.db *.sql *.sqlite);;All files (*.*)"), 0, QFileDialog::DontUseNativeDialog );
- cout << filename.toStdString() << endl;
- sdb = QSqlDatabase::addDatabase("QSQLITE");
- sdb.setDatabaseName(filename);
- if (!sdb.open())
- cout << "open failed" << endl;
- else{
- ui->statusBar->showMessage("sequence database opened");
- ui->actionDB_stats->setEnabled(true);
- ui->actionDB_stats->setText("get stats: "+filename);
- ui->statusBar->showMessage("current: "+filename);
- }
- QSqlQuery q;
- cout << q.exec("SELECT Count(*) FROM sequence;") << endl;
- if (q.next()){
- QString numvalues (q.value(0).toString());
- cout << q.value(0).toString().toStdString() << endl;
- ui->textBrowser->setText("Number of sequences: "+numvalues);
- }
-}
-
-void MainWindow::get_sequencedb_stats(){
- QSqlQuery q;
-
- cout << q.exec("SELECT Count(*) FROM sequence;") << endl;
- if (q.next()){
- cout << q.value(0).toString().toStdString()<< endl;
- }
- ui->textBrowser->setText("");
-}
diff --git a/PHLAWDGU/mainwindow.h b/PHLAWDGU/mainwindow.h
deleted file mode 100644
index 2a42cc0..0000000
--- a/PHLAWDGU/mainwindow.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef MAINWINDOW_H
-#define MAINWINDOW_H
-
-#include
-#include
-
-namespace Ui {
- class MainWindow;
-}
-
-class MainWindow : public QMainWindow
-{
- Q_OBJECT
-
-public:
- explicit MainWindow(QWidget *parent = 0);
- ~MainWindow();
-
-private:
- Ui::MainWindow *ui;
- QSqlDatabase sdb;
-
-private slots:
- void load_sequencesqlite();
- void get_sequencedb_stats();
-};
-
-#endif // MAINWINDOW_H
diff --git a/PHLAWDGU/mainwindow.ui b/PHLAWDGU/mainwindow.ui
deleted file mode 100644
index 23c3f81..0000000
--- a/PHLAWDGU/mainwindow.ui
+++ /dev/null
@@ -1,135 +0,0 @@
-
-
- MainWindow
-
-
-
- 0
- 0
- 400
- 300
-
-
-
- MainWindow
-
-
-
-
-
- 0
- 0
- 401
- 261
-
-
-
-
-
-
-
-
- Sequence DB
-
-
-
-
- Alignments DB
-
-
-
-
- Close
-
-
-
-
- false
-
-
- DB stats
-
-
-
-
-
-
-
- actionSequence_DB
- triggered()
- MainWindow
- load_sequencesqlite()
-
-
- -1
- -1
-
-
- 199
- 149
-
-
-
-
- actionDB_stats
- triggered()
- MainWindow
- get_sequencedb_stats()
-
-
- -1
- -1
-
-
- 199
- 149
-
-
-
-
- actionClose
- triggered()
- MainWindow
- close()
-
-
- -1
- -1
-
-
- 199
- 149
-
-
-
-
-
- load_sequencesqlite()
- get_sequencedb_stats()
-
-
diff --git a/README b/README
index 33dcb94..9cd3006 100644
--- a/README
+++ b/README
@@ -1,36 +1,33 @@
-## Follow these commands to install on Mac
-
-## Install homebrew for compilers and dependencies
-ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
-brew tap homebrew/versions
-brew tap homebrew/science
-
-## Get gcc with support for openmp (this can take a while)
-brew install gcc --without-multilib
-
-## Set gcc as default compiler (rather than clang), and build things from source
-## To make this permanent, put in ~/.bashrc
-export HOMEBREW_CC=gcc-5
-export HOMEBREW_CXX=g++-5
-export HOMEBREW_BUILD_FROM_SOURCE=TRUE
-
-## Check
-brew --env
-
-## Some general dependencies
-brew install automake
-brew install autoconf
-brew install libtool
-
-## PHLAWD dependencies
-brew install muscle
-brew install quicktree
-brew install mafft
-
-## Compile PHLAWD
-cd phlawd/src
-autoreconf -fi
-./configure CC=gcc-5 CXX=g++-5
-make
-sudo make install
+PHLAWD
+======
+
+There are several verisons of phlawd floating around:
+
+* The official version of phlawd at https://github.com/blackrim/phlawd
+ entered maintenance mode Sep 2012 and has not had its code updated
+ since then.
+
+* The "chinchliff" fork of phlawd was last updated in Mar 2016 and
+ contains a number of bug fixes and features, including the ability
+ to use whole genome sequences and "shrinking" to the target gene.
+
+This is an unofficial fork, which updates the code for the GenBank ID change
+(https://www.ncbi.nlm.nih.gov/news/03-02-2016-phase-out-of-GI-numbers/)
+and downloads GenBank daily updates in addition to the regular releases.
+Requirements
+------------
+
+phlawd requires the GCC compiler, as well as wget, mafft, muscle,
+quicktree, and sqlite to be in the user's path. These can be installed via
+Homebrew (https://brew.sh) on macOS or Linuxbrew (https://linuxbrew.sh).
+
+Installation
+------------
+
+git clone https://github.com/jonchang/phlawd.git
+cd phlawd/
+cd src/
+./configure
+make
+install PHLAWD /usr/local/bin # or wherever...
diff --git a/src/GenBankReader.cpp b/src/GenBankReader.cpp
index 4340cfc..3ab032c 100644
--- a/src/GenBankReader.cpp
+++ b/src/GenBankReader.cpp
@@ -69,11 +69,7 @@ void GenBankReader::parse_file(string fl, string db_name){
continue;
}
if(tokens[0] == "VERSION"){
- gin = tokens[2];
- vector t2;
- string del2(":");
- Tokenize(tokens[2],t2,del2);
- gin = t2[1];
+ gin = tokens[1];
continue;
}
if(tokens[0].find("/db_xref=\"taxon:")!= string::npos){
diff --git a/src/SQLiteConstructor.cpp b/src/SQLiteConstructor.cpp
index 81bdb94..72b173b 100644
--- a/src/SQLiteConstructor.cpp
+++ b/src/SQLiteConstructor.cpp
@@ -1680,12 +1680,12 @@ void SQLiteConstructor::clean_shrunken_genomes() {
string trimmed_genomes = genefoldername + "genome_shrink_aln";
fu.read_aligned_fasta_into(tempalseqs, trimmed_genomes); // assuming this is aligned
- int seqlength = tempalseqs[0].get_sequence().size();
+ size_t seqlength = tempalseqs[0].get_sequence().size();
float fseql = float(tempalseqs.size());
- vector removeem;
- for (int j = 0; j < seqlength; j++) {
- int gaps = 0;
- for (int i = 0; i < tempalseqs.size(); i++) {
+ vector removeem;
+ for (size_t j = 0; j < seqlength; j++) {
+ size_t gaps = 0;
+ for (size_t i = 0; i < tempalseqs.size(); i++) {
if (tempalseqs[i].get_sequence()[j] == '-' || tempalseqs[i].get_sequence()[j] == 'N' || tempalseqs[i].get_sequence()[j] == 'n')
gaps += 1;
}
@@ -1694,9 +1694,9 @@ void SQLiteConstructor::clean_shrunken_genomes() {
removeem.push_back(j);
}
}
- for (int i = 0; i < tempalseqs.size(); i++) {
+ for (size_t i = 0; i < tempalseqs.size(); i++) {
string a;
- for (int j = 0; j < seqlength; j++) {
+ for (size_t j = 0; j < seqlength; j++) {
if (count(removeem.begin(), removeem.end(), j) == 0)
a += tempalseqs[i].get_sequence()[j];
}
diff --git a/src/SQLiteDBController.cpp b/src/SQLiteDBController.cpp
index 08cdfd7..fea160c 100644
--- a/src/SQLiteDBController.cpp
+++ b/src/SQLiteDBController.cpp
@@ -106,7 +106,7 @@ bool SQLiteDBController::initiate() {
Database conn(db_name);
Query query(conn);
query.get_result(
- "create table taxonomy (id INTEGER PRIMARY KEY,ncbi_id INTEGER,name VARCHAR(255),name_class VARCHAR(32),node_rank VARCHAR(32),parent_ncbi_id INTEGER,edited_name VARCHAR(255),left_value INTEGER,right_value INTEGER);");
+ "create table taxonomy (id INTEGER PRIMARY KEY,ncbi_id INTEGER,name TEXT,name_class TEXT,node_rank TEXT,parent_ncbi_id INTEGER,edited_name TEXT,left_value INTEGER,right_value INTEGER);");
query.free_result();
query.get_result("CREATE INDEX taxonomy_left_value on taxonomy(left_value);");
query.free_result();
@@ -122,7 +122,7 @@ bool SQLiteDBController::initiate() {
query.free_result();
query.get_result(
- "create table sequence (id INTEGER PRIMARY KEY,ncbi_id INTEGER,accession_id VARCHAR(128),identifier VARCHAR(40),description TEXT,seq LONGTEXT);");
+ "create table sequence (id INTEGER PRIMARY KEY,ncbi_id INTEGER,accession_id TEXT,identifier TEXT,description TEXT,seq LONGTEXT);");
query.free_result();
query.get_result("CREATE INDEX sequence_ncbi_id on sequence(ncbi_id);");
query.free_result();
@@ -131,7 +131,7 @@ bool SQLiteDBController::initiate() {
query.get_result("CREATE INDEX sequence_identifier on sequence(identifier);");
query.free_result();
- query.get_result("create table information (id INTEGER PRIMARY KEY, name VARCHAR(128), value VARCHAR(128));");
+ query.get_result("create table information (id INTEGER PRIMARY KEY, name TEXT, value TEXT);");
query.free_result();
return ret;
}
@@ -393,6 +393,17 @@ void SQLiteDBController::load_seqs(string div, string ref, bool downl) {
}
}
+ // download daily updates
+ if (downl == true) {
+ string fnameString = "nc*.flat.gz";
+ string cmd = "wget -nv ftp://ftp.ncbi.nih.gov/genbank/daily-nc/" + fnameString;
+ cout << "downloading dailies with wget" << endl;
+ system(cmd.c_str());
+ cmd = "gunzip -d " + fnameString;
+ cout << "uncompressing dailies" << endl;
+ system(cmd.c_str());
+ }
+
// get the names of the files to use
vector file_names;
cout << "getting file names for gb flat files" << endl;
@@ -401,6 +412,8 @@ void SQLiteDBController::load_seqs(string div, string ref, bool downl) {
for (int j = 0; j < groups.size(); j++) {
if (file_names[i].find("gb" + groups[j]) != string::npos && file_names[i].substr(file_names[i].size() - 4, 4) == ".seq") {
filesToProcess.push_back(file_names[i]);
+ } else if (file_names[i].find("nc") != string::npos && file_names[i].substr(file_names[i].size() - 5, 5) == ".flat") {
+ filesToProcess.push_back(file_names[i]);
}
}
}
diff --git a/src/main.cpp b/src/main.cpp
index afe441b..2748e46 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -42,7 +42,7 @@ using namespace std;
int main(int argc, char* argv[]) {
if(argc != 3) {
- cout << "PHLAWD 3.4a" << endl;
+ cout << "PHLAWD 3.4b (UNOFFICIAL FORK)" << endl;
cout << "you need more arguments." << endl;
cout << "usage: PHLAWD task configfile" << endl;
cout << "possible tasks include:" << endl;