From 741fd8f336ac25764ab73a886cf2940a1d1f4a90 Mon Sep 17 00:00:00 2001 From: gmmh Date: Tue, 16 Dec 2014 14:30:21 -0800 Subject: [PATCH 01/21] little fixes and clean-up --- Cruise_Size_distribution.R | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/Cruise_Size_distribution.R b/Cruise_Size_distribution.R index 006d0dc..ea0d034 100644 --- a/Cruise_Size_distribution.R +++ b/Cruise_Size_distribution.R @@ -1,8 +1,8 @@ #[gwennm@bloom DeepDOM/Cell_Division] ##Run this script using the commented out code just below (paste directly into the command line), should be in the directory listed above -# Rscript ~/DeepDOM/scripts/Cruise_Size_distribution_sqlite_GMH.R d1 d2 prochloro DeepDOM | qsub -lwalltime=00:30:00,nodes=1:ppn=1 -N DeepDOM -d. +# Rscript ~/DeepDOM/scripts/Cruise_Size_distribution_sqlite_GMH.R d1 d2 prochloro DeepDOM -#note edit line above according to changes to script: make day input an integer number range (ie: 1,2 for 1:2). check with Chris to make sure the script above will work... esp. wall time +#note edit line above according to changes to script: make day input an integer number range (ie: 1,2 for 1:2). #allowing arguments to be input from the command line input commented out above @@ -12,6 +12,7 @@ d2 <- as.numeric(args[2]) #2nd argument, day number to end phyto <- as.character(args[3]) # 2nd arg phyto to calc size dist cruise <- as.character(args[4]) # 3rd arg cruise name + library(popcycle) library(stats) @@ -19,32 +20,34 @@ library(stats) ### BATCH FILE inputs ### ######################### #globals necessary for running on bloom -# home <- '~/DeepDOM/Cell_Division/' +# home <- '~/DeepDOM/Cell_Division/' #change to take from input, so not hardcoded in # folder <- NULL # root <- "/misc/seaflow/" +#db.location <- "~/popcycle" #change to make variable? #globals necessary for running on local machine connected to bloom home <- "/Users/gwen/Desktop/Cruises/DeepDOM_2013/seaflow/" folder <- "Cell_Division/" root <- "/Volumes/seaflow/" -cruise <- "DeepDOM" -phyto <- 'prochloro' +db.location <- "/Volumes/gwennm/popcycle" d1 <- 1 #start day -d2 <- 1 # end day +d2 <- 2 # end day +phyto <- 'prochloro' +cruise <- "DeepDOM" #Globals necessary for popcycle commands: consider making an input variable to the script set.evt.location(paste(root, cruise, sep="")) -set.project.location("/Volumes/gwennm/popcycle") +set.project.location(db.location) set.cruise.id("march2013") #parameters for making smoothed distributions para <- "fsc_small" n.breaks <- 2^10 # 1024 -concat <- 4 # 3-minute file x 4 = 12 min chunks +#concat <- 4 # note this does not work here, make as an option later? out <- NULL -#write a new function to query sqlite database by parameter and population +# new function to query sqlite database by parameter and population and file.name get.param.by.pop <- function(file.name, para, phyto, db= db.name){ #this sqlite query will return a table with columns: file, param, pop (only phyto) sql <- paste0("SELECT opp.file, opp.", para, ", vct.pop @@ -148,7 +151,7 @@ for(n in d1:d2){ #Note changed the start range of density from 1 to 0 for smaller pro #shouldn't this range be dependent on the phyto we are trying to model? maybe this should be a range determined by the max of the slice #could add an if statement, if phyto = prochloro then this range for dens - dens <- density(log10(slice[,para]), n=n.breaks,from=0, to=3.5, bw="SJ", kernel='gaussian',na.rm=T) + dens <- density(log10(slice[,para]), n=n.breaks,from=1, to=3.5, bw="SJ", kernel='gaussian',na.rm=T) freq.dist <- dens$y*diff(dens$x)[1] size.dist <- round(freq.dist * ntot) stages <- round(10^dens$x,3) From 060400cb5e9758aa342cd553e4c860ce75768f6d Mon Sep 17 00:00:00 2001 From: gmmh Date: Tue, 16 Dec 2014 14:38:15 -0800 Subject: [PATCH 02/21] oops, range needs to start at 0, not 1 --- Cruise_Size_distribution.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cruise_Size_distribution.R b/Cruise_Size_distribution.R index ea0d034..d40633e 100644 --- a/Cruise_Size_distribution.R +++ b/Cruise_Size_distribution.R @@ -151,7 +151,7 @@ for(n in d1:d2){ #Note changed the start range of density from 1 to 0 for smaller pro #shouldn't this range be dependent on the phyto we are trying to model? maybe this should be a range determined by the max of the slice #could add an if statement, if phyto = prochloro then this range for dens - dens <- density(log10(slice[,para]), n=n.breaks,from=1, to=3.5, bw="SJ", kernel='gaussian',na.rm=T) + dens <- density(log10(slice[,para]), n=n.breaks,from=0, to=3.5, bw="SJ", kernel='gaussian',na.rm=T) freq.dist <- dens$y*diff(dens$x)[1] size.dist <- round(freq.dist * ntot) stages <- round(10^dens$x,3) From 1c60a3608048b2feaabfaa66d1a346a0759931f0 Mon Sep 17 00:00:00 2001 From: gmmh Date: Tue, 16 Dec 2014 15:08:34 -0800 Subject: [PATCH 03/21] fixed ntot= 1/opp_evt_ratio * n_count --- Cruise_Size_distribution.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cruise_Size_distribution.R b/Cruise_Size_distribution.R index d40633e..fd39578 100644 --- a/Cruise_Size_distribution.R +++ b/Cruise_Size_distribution.R @@ -144,8 +144,8 @@ for(n in d1:d2){ print("no beads found") } - #insert total abundance of this pop from the correct file, ** is this correct?? - ntot <- stat[which(stat$file == file), "abundance"] + #insert ntot from 1/opp_evt_ratio * n_count from stats + ntot <- 1/stat[which(stat$file == file), "opp_evt_ratio"] * stat[which(stat$file == file), "n_count"] time.class <- rep(time, n.breaks) #make vector of time to go with dist #Note changed the start range of density from 1 to 0 for smaller pro From ca7bf4ed9a976634641b4ff363ffe5976d14f75e Mon Sep 17 00:00:00 2001 From: gmmh Date: Tue, 16 Dec 2014 15:45:46 -0800 Subject: [PATCH 04/21] clean-up --- Cruise_Size_distribution.R | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/Cruise_Size_distribution.R b/Cruise_Size_distribution.R index fd39578..dcfc450 100644 --- a/Cruise_Size_distribution.R +++ b/Cruise_Size_distribution.R @@ -1,8 +1,9 @@ #[gwennm@bloom DeepDOM/Cell_Division] ##Run this script using the commented out code just below (paste directly into the command line), should be in the directory listed above -# Rscript ~/DeepDOM/scripts/Cruise_Size_distribution_sqlite_GMH.R d1 d2 prochloro DeepDOM +# Rscript ~/DeepDOM/scripts/Cruise_Size_distribution_sqlite_GMH.R 1 40 prochloro DeepDOM -#note edit line above according to changes to script: make day input an integer number range (ie: 1,2 for 1:2). + +#Note: this code requires popcycle package, and a flag file called "flag_file.txt" in the same directory as the db.location. It produces a plot of beads across the cruise with a smoothed spline fit to estimate the ave value of the parameter (fsc_small). It also produces a .csv file for each day containing the distributions of fsc_small for the phyto called for each 3-min file. #allowing arguments to be input from the command line input commented out above @@ -65,14 +66,6 @@ get.param.by.pop <- function(file.name, para, phyto, db= db.name){ return(opp.slice) } -#new function to retrieve opp.evt.table from sqlite database, suggest adding to popcycle -get.opp.evt.ratio.table <- function(db=db.name){ - sql <- paste0("SELECT * FROM ", opp.evt.ratio.table.name) - con <- dbConnect(SQLite(), dbname= db) - table <- dbGetQuery(con, sql) - dbDisconnect(con) - return (table) -} ########################### @@ -93,14 +86,13 @@ all.pop <- subset(all.stat, pop == phyto) all.files <- all.pop$file julian.day <- unique(basename(dirname(all.files))) -#load opp.evt.ratios to correct for true cell density, so far not necessary because I use abundance from stats table -#oer <- get.opp.evt.ratio.table() ######################## ### INSTRUMENT DRIFT ### ######################## #need this code to normalize to beads fsc_small across the whole cruise + #this also plots the bead distribution for the whole cruise with smoothed spline spar <- 0.45 beads <- subset(all.stat, pop == "beads" & fsc_small < 60000) @@ -131,8 +123,7 @@ for(n in d1:d2){ size.class <- NULL for(file in filenames){ - #still need to figure out how to implement concat to put together 12 min chunks - #do we need to concat at this step? or can it wait until the model? + #select parameter for phytoplankton from each file slice <- get.param.by.pop(file, para, phyto) #find smoothed beads from same time stamp to normalize param of interest @@ -148,8 +139,7 @@ for(n in d1:d2){ ntot <- 1/stat[which(stat$file == file), "opp_evt_ratio"] * stat[which(stat$file == file), "n_count"] time.class <- rep(time, n.breaks) #make vector of time to go with dist - #Note changed the start range of density from 1 to 0 for smaller pro - #shouldn't this range be dependent on the phyto we are trying to model? maybe this should be a range determined by the max of the slice + #shouldn't this range be dependent on the phyto we are trying to model? #could add an if statement, if phyto = prochloro then this range for dens dens <- density(log10(slice[,para]), n=n.breaks,from=0, to=3.5, bw="SJ", kernel='gaussian',na.rm=T) freq.dist <- dens$y*diff(dens$x)[1] From 69cdfcb736d75185b3d07421113e5d3cbe01b9e6 Mon Sep 17 00:00:00 2001 From: gmmh Date: Tue, 6 Jan 2015 15:04:46 -0800 Subject: [PATCH 05/21] version 1 used to make my distributions for Deep DOM on Dec. 17, 2014 --- Cruise_Size_distribution.R | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Cruise_Size_distribution.R b/Cruise_Size_distribution.R index dcfc450..4c3b6d8 100644 --- a/Cruise_Size_distribution.R +++ b/Cruise_Size_distribution.R @@ -1,6 +1,6 @@ #[gwennm@bloom DeepDOM/Cell_Division] ##Run this script using the commented out code just below (paste directly into the command line), should be in the directory listed above -# Rscript ~/DeepDOM/scripts/Cruise_Size_distribution_sqlite_GMH.R 1 40 prochloro DeepDOM +# Rscript ~/DeepDOM/ssPopModel/Cruise_Size_distribution.R 1 40 prochloro DeepDOM #Note: this code requires popcycle package, and a flag file called "flag_file.txt" in the same directory as the db.location. It produces a plot of beads across the cruise with a smoothed spline fit to estimate the ave value of the parameter (fsc_small). It also produces a .csv file for each day containing the distributions of fsc_small for the phyto called for each 3-min file. @@ -21,20 +21,20 @@ library(stats) ### BATCH FILE inputs ### ######################### #globals necessary for running on bloom -# home <- '~/DeepDOM/Cell_Division/' #change to take from input, so not hardcoded in -# folder <- NULL -# root <- "/misc/seaflow/" -#db.location <- "~/popcycle" #change to make variable? +home <- '~/DeepDOM/Cell_Division/' #change to take from input, so not hardcoded in +folder <- NULL +root <- "/misc/seaflow/" +db.location <- "~/popcycle" #change to make variable? #globals necessary for running on local machine connected to bloom -home <- "/Users/gwen/Desktop/Cruises/DeepDOM_2013/seaflow/" -folder <- "Cell_Division/" -root <- "/Volumes/seaflow/" -db.location <- "/Volumes/gwennm/popcycle" -d1 <- 1 #start day -d2 <- 2 # end day -phyto <- 'prochloro' -cruise <- "DeepDOM" +# home <- "/Users/gwen/Desktop/Cruises/DeepDOM_2013/seaflow/" +# folder <- "Cell_Division/" +# root <- "/Volumes/seaflow/" +# db.location <- "/Volumes/gwennm/popcycle" +# d1 <- 1 #start day +# d2 <- 2 # end day +# phyto <- 'prochloro' +# cruise <- "DeepDOM" #Globals necessary for popcycle commands: consider making an input variable to the script set.evt.location(paste(root, cruise, sep="")) From ce4f472f960793cc7d32c907fa8eea1c24e9893d Mon Sep 17 00:00:00 2001 From: gmmh Date: Wed, 7 Jan 2015 11:27:24 -0800 Subject: [PATCH 06/21] changes to make code work for my directory, need to still make generally useful --- Viz_Size_Dist.R | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/Viz_Size_Dist.R b/Viz_Size_Dist.R index 53855bb..486ef4c 100644 --- a/Viz_Size_Dist.R +++ b/Viz_Size_Dist.R @@ -1,12 +1,17 @@ +## Script for visualizing the HD.size.class files as 3D histograms + + library(rgl) library(zoo) jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000")) - home <- "~/Documents/DATA/SeaFlow/Thompson/" - home <- "/Volumes/ribalet/Cell_Division/" - cruise <- "Rhodomonas_Feb2014" # "Med4_TimeCourse" - phyto <- "crypto" +#Change according to your own directory structure, cruise and phyto of interest +#note: should implement a command line input for this + #home <- "~/Documents/DATA/SeaFlow/Thompson/" #local + home <- "/Volumes/gwennm/DeepDOM/Cell_Division/" #on server + cruise <- "DeepDOM" + phyto <- "prochloro" ############################ ## OLD SIZE DISTRIBUTION ### @@ -23,20 +28,23 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" ## FULL SIZE DISTRIBUTION ### ############################# -list <- list.files(paste(home,cruise,"/",sep=""),pattern=paste("HD.size.class_",cruise,"_",phyto,sep="")) +list <- list.files(home, pattern=paste("HD.size.class_",cruise,"_",phyto,sep="")) Size <- NULL for(l in list){ print(l) - s <- read.csv(paste(home,cruise,"/",l,sep="")) + s <- read.csv(paste(home,l,sep="")) Size <- rbind(Size, s) } Size$time <- as.POSIXct(Size$time, tz="GMT") Size$num.time <- as.numeric(Size$time) Size[Size[,"size.dist"] == 0,"freq.dist"] <- 0 + Size.cut <- subset(Size, freq.dist > 0) + +percentile <- cut(Size[,"freq.dist"], 100); plot3d(x=log10(Size$stages/Size$fsc_beads), y=Size$time, z=Size$freq.dist, col=jet.colors(100)[percentile], type='p', lwd=1, xlab="Size distribution", ylab="Frequency", zlab="Time",axes=T) -percentile <- cut(Size[,"freq.dist"], 100); plot3d(x=log10(Size$stages/Size$fsc_beads), y=Size$time, z=Size$freq.dist, col=jet.colors(100)[percentile], type='p', lwd=1, xlab="Size distribution", ylab="Frequency", zlab="Time",axes=F) +percentile <- cut(Size.cut[,"freq.dist"], 100); plot3d(x=log10(Size.cut$stages/Size.cut$fsc_beads), y=Size.cut$time, z=Size.cut$freq.dist, col=jet.colors(100)[percentile], type='p', lwd=1, xlab="Size distribution", ylab="Frequency", zlab="Time",axes=T) percentile <- cut(Size[,"freq.dist"], 100); plot3d(x=Size$stages/Size$fsc_beads, y=Size$time, z=Size$freq.dist, col=jet.colors(100)[percentile], type='p', lwd=1, xlab="Size distribution", ylab="Frequency", zlab="Time",axes=F) From aa5308b98f6a806581a08ee13d655dd468963fe4 Mon Sep 17 00:00:00 2001 From: gmmh Date: Wed, 7 Jan 2015 12:33:06 -0800 Subject: [PATCH 07/21] made directory structure for this script more general and changed the input arguments --- Conversion_Size_Dist.R | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/Conversion_Size_Dist.R b/Conversion_Size_Dist.R index 49219b4..1ea19f9 100644 --- a/Conversion_Size_Dist.R +++ b/Conversion_Size_Dist.R @@ -1,20 +1,22 @@ -# [ribalet@bloom Cell_Division] -# for i in $(seq 6 1 8); do echo "Rscript Conversion_Size_Dist.R $i prochloro Med4_TimeCourse_July2012" | qsub -lwalltime=1:00:00,nodes=1:ppn=1 -N pro_conv$i -d.; done +# This script takes HD.size.class files and makes concatenated distributions with the calibrated cell volume from forward scatter +#arguments of this script: (1) distributuion file location, (2) cat (number of files to concatenate?), (3) phytoplankton group, (4) cruise +# for i in $(seq 6 1 8); do echo "Rscript Conversion_Size_Dist.R ~/DeepDOM/Cell_Division $i prochloro DeepDOM" | qsub -lwalltime=1:00:00,nodes=1:ppn=1 -N pro_conv$i -d.; done args <- commandArgs(TRUE) -cat <- as.numeric(args[1]) -phyto <- as.character(args[2]) -cruise <- as.character(args[3]) +home <- as.character(args[1]) +cat <- as.numeric(args[2]) +phyto <- as.character(args[3]) +cruise <- as.character(args[4]) + -home <- '~/Cell_Division/' #library(rgl) library(zoo) -# home <- "/Volumes/ribalet/Cell_division/" +# home <- "/Volumes/gwennm/DeepDOM/Cell_division/" # cruise <- "Med4_TimeCourse_July2012" # phyto <- "prochloro" @@ -27,12 +29,12 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" ## SIZE DISTRIBUTION ## ####################### - list <- list.files(paste(home,cruise,"/",sep=""),pattern=paste("HD.size.class_",cruise,"_",phyto,sep="")) + list <- list.files(home,pattern=paste("HD.size.class_",cruise,"_",phyto,sep="")) Size <- NULL for(l in list){ print(l) - s <- read.csv(paste(home,cruise,"/",l,sep="")) + s <- read.csv(paste(home,l,sep="")) Size <- rbind(Size, s) } @@ -161,5 +163,5 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" distribution[[1]] <- Vhists distribution[[2]] <- N_dist - save(distribution, file=paste(home,cruise,"/", phyto,"_dist_Ncat",m,"_",cruise,sep="")) - print(paste("saving ", home,cruise,"/", phyto,"_dist_Ncat",m,"_",cruise,sep="")) \ No newline at end of file + save(distribution, file=paste(home, phyto,"_dist_Ncat",m,"_",cruise,sep="")) + print(paste("saving ", home, phyto,"_dist_Ncat",m,"_",cruise,sep="")) \ No newline at end of file From 5e9d14ec048ee0e64fe9acf668ebbccde1c99242 Mon Sep 17 00:00:00 2001 From: gmmh Date: Wed, 7 Jan 2015 17:43:12 -0800 Subject: [PATCH 08/21] minor edits to clarify comments --- Conversion_Size_Dist.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Conversion_Size_Dist.R b/Conversion_Size_Dist.R index 1ea19f9..4cec2b6 100644 --- a/Conversion_Size_Dist.R +++ b/Conversion_Size_Dist.R @@ -1,5 +1,5 @@ # This script takes HD.size.class files and makes concatenated distributions with the calibrated cell volume from forward scatter -#arguments of this script: (1) distributuion file location, (2) cat (number of files to concatenate?), (3) phytoplankton group, (4) cruise +#arguments of this script: (1) distributuion file location, (2) cat (2^cat number of bins), (3) phytoplankton group, (4) cruise # for i in $(seq 6 1 8); do echo "Rscript Conversion_Size_Dist.R ~/DeepDOM/Cell_Division $i prochloro DeepDOM" | qsub -lwalltime=1:00:00,nodes=1:ppn=1 -N pro_conv$i -d.; done @@ -17,7 +17,7 @@ cruise <- as.character(args[4]) library(zoo) # home <- "/Volumes/gwennm/DeepDOM/Cell_division/" -# cruise <- "Med4_TimeCourse_July2012" +# cruise <- "DeepDOM" # phyto <- "prochloro" @@ -60,9 +60,7 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" if(phyto == "synecho" | phyto == "pico" | phyto == "prochloro"){ Size$volume <- 10^(0.524*log10(Size$stages/Size$fsc_beads) + 0.283) # Size$volume <- 10^(0.5*log10(Size$stages/Size$fsc_beads))# MIE THEORY - } - - else{ + }else{ Size$volume <- 10^(1.682*log10(Size$stages/Size$fsc_beads) + 0.961) } From 4a2ddfd36cb5e3b6344f5a2ddf31ca714e7974f3 Mon Sep 17 00:00:00 2001 From: gmmh Date: Fri, 9 Jan 2015 11:36:14 -0800 Subject: [PATCH 09/21] fixed minor bug in reading HD.files --- Conversion_Size_Dist.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Conversion_Size_Dist.R b/Conversion_Size_Dist.R index 4cec2b6..b535311 100644 --- a/Conversion_Size_Dist.R +++ b/Conversion_Size_Dist.R @@ -34,7 +34,7 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" for(l in list){ print(l) - s <- read.csv(paste(home,l,sep="")) + s <- read.csv(paste(home,l,sep="/")) Size <- rbind(Size, s) } @@ -161,5 +161,5 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" distribution[[1]] <- Vhists distribution[[2]] <- N_dist - save(distribution, file=paste(home, phyto,"_dist_Ncat",m,"_",cruise,sep="")) - print(paste("saving ", home, phyto,"_dist_Ncat",m,"_",cruise,sep="")) \ No newline at end of file + save(distribution, file=paste(home,"/",phyto,"_dist_Ncat",m,"_",cruise,sep="")) + print(paste("saving ", home,"/",phyto,"_dist_Ncat",m,"_",cruise,sep="")) \ No newline at end of file From f252301ff41f615331e99d6564b0a03aca160eb5 Mon Sep 17 00:00:00 2001 From: gmmh Date: Fri, 9 Jan 2015 11:44:42 -0800 Subject: [PATCH 10/21] changed input arguments to allow for different directory structure for Model scripts, input files and output files. I also made the script dependant on popcycle by using get.sfl.table to access the PAR values from the popcycle.db. --- Model_HD_Division_Rate.R | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/Model_HD_Division_Rate.R b/Model_HD_Division_Rate.R index c1df6d9..ba98b7d 100644 --- a/Model_HD_Division_Rate.R +++ b/Model_HD_Division_Rate.R @@ -1,24 +1,18 @@ -# [ribalet@bloom Cell_Division] -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i synecho Thompson_4" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N synGR$i -d.; done -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i prochloro Thompson_4" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N proGR$i -d.; done -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i synecho MBARI_1" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N synGR$i -d.; done -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i pico MBARI_1" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N proGR$i -d.; done -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i synecho Thompson_10" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N synGR$i -d.; done -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i crypto Crypto_TimeCourse_June2013" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N cryptoGR$i -d.; done -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i crypto Rhodomonas_Feb2014" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N cryptoGR$i -d.; done -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i prochloro Med4_TimeCourse_July2012" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N proGR$i -d.; done - +# This script does model optimization on the phyto +# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i prochloro DeepDOM ~/DeepDOM/ssPopModel ~/DeepDOM/Cell_Division ~/DeepDOM/Cell_Division" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N synGR$i -d.; done # library(rgl) library(DEoptim) library(zoo) +library(popcycle) -#home <- "/Volumes/ribalet/Cell_division/"; folder <- NULL; cruise <- "Crypto_TimeCourse_June2013" +# script.home <- "/Volumes/gwennm/DeepDOM/ssPopModel" +# in.dir <-"/Volumes/gwennm/DeepDOM/Cell_Division" +# out.dir <- "/Volumes/gwennm/DeepDOM/Cell_Division" -home <- '~/Cell_Division/'; folder <- NULL -source(paste(home,'functions_modelHD.R',sep=""), chdir = TRUE) +source(paste(script.home,'functions_modelHD.R',sep="/"), chdir = TRUE) jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000")) @@ -27,6 +21,9 @@ args <- commandArgs(TRUE) t <- as.numeric(args[1]) phyto <- as.character(args[2]) cruise <- as.character(args[3]) +script.home <- as.character(args[4]) +in.dir <-as.character(args[5]) +out.dir <- as.character(args[6]) @@ -52,10 +49,10 @@ m <- 2^6 # number of size class ############## ## PAR DATA ## ############## - - Par.path <- paste(home, folder,cruise,"/Par_",cruise,sep="") - Par <- read.csv(Par.path, sep=",") - Par$time <- as.POSIXct(Par$time, tz="GMT") + #db.name = "/Volumes/gwennm/popcycle/sqlite/popcycle.db") + sfl <- get.sfl.table(db.name) + Par <- sfl[,c("date", "par")] + Par$time <- strptime(Par$date, "%Y-%m-%dT%H:%M:%S", tz="GMT") Par$num.time <- as.numeric(Par$time) @@ -64,12 +61,12 @@ m <- 2^6 # number of size class ####################### # t <- 0 - # phyto <- "crypto" + # phyto <- "prochloro" print(paste("time delay:", t)) print(paste("phytoplankton population:",phyto)) - load(paste(home,folder,cruise,"/", phyto,"_dist_Ncat",m,"_",cruise,sep="")) + load(paste(in.dir,"/", phyto,"_dist_Ncat",m,"_",cruise,sep="")) Vhists <- distribution[[1]] Vhists <- sweep(Vhists, 2, colSums(Vhists), '/') # Normalize each column of VHists to 1 N_dist <- distribution[[2]] @@ -133,7 +130,7 @@ m <- 2^6 # number of size class if(class(proj) !='try-error'){ model <- matrix(cbind(as.array(model), as.array(proj)), nrow=4,ncol=ncol(model)+1) - save(model, file=paste(home,folder,cruise,"/",phyto,"_modelHD_growth_",cruise,"_Ncat",m,"_t",t, sep="")) + save(model, file=paste(out.dir,"/",phyto,"_modelHD_growth_",cruise,"_Ncat",m,"_t",t, sep="")) }else{print("error during optimization")} } From 16c9f4e87bbd32ebddfcd03966e9ee4e055b99f7 Mon Sep 17 00:00:00 2001 From: gmmh Date: Mon, 12 Jan 2015 10:18:31 -0800 Subject: [PATCH 11/21] changed Model script so that they wouldn't try to access the db at the same time while running in parallel, this meant adding another script to make a PAR file --- Conversion_Size_Dist.R | 4 +++- Make_PAR_table.R | 21 +++++++++++++++++++++ Model_HD_Division_Rate.R | 7 +++---- 3 files changed, 27 insertions(+), 5 deletions(-) create mode 100644 Make_PAR_table.R diff --git a/Conversion_Size_Dist.R b/Conversion_Size_Dist.R index b535311..de02ac2 100644 --- a/Conversion_Size_Dist.R +++ b/Conversion_Size_Dist.R @@ -16,7 +16,7 @@ cruise <- as.character(args[4]) #library(rgl) library(zoo) -# home <- "/Volumes/gwennm/DeepDOM/Cell_division/" +# home <- "/Volumes/gwennm/DeepDOM/Cell_division" # cruise <- "DeepDOM" # phyto <- "prochloro" @@ -91,6 +91,8 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" # percentile <- cut(Size.phyto[,"freq.dist"], 100); plot3d(x=(Size.phyto$volume), y=Size.phyto$num.time, z=Size.phyto$freq.dist, col=jet.colors(100)[percentile], type='l', lwd=2) n.day <- round(diff(range(Size.phyto$time))); print(paste("Number of days in the dataset:",n.day)) + + #broke right here after printing the number of days in the dataset, exit with no error start <- min(Size.phyto$time) ############################## diff --git a/Make_PAR_table.R b/Make_PAR_table.R new file mode 100644 index 0000000..5264d19 --- /dev/null +++ b/Make_PAR_table.R @@ -0,0 +1,21 @@ +### Make_PAR_table.R arg1 arg2 arg3 +## run this script before running Model_HD_Division_Rate +## arg 1= db.name +## arg 2= output/directory +## arg 3= cruise + +library(popcycle) + +args <- commandArgs(TRUE) +db.name <- as.character(args[1]) +out.dir <- as.character(args[2]) +cruise <- as.character(args[3]) + +# db.name = "/Volumes/gwennm/popcycle/sqlite/popcycle.db" +# out.dir = "/Volumes/gwennm/DeepDOM/Cell_Division" +# cruise = "DeepDOM" + + sfl <- get.sfl.table(db.name) + Par <- sfl[,c("date", "par")] + Par$time <- strptime(Par$date, "%Y-%m-%dT%H:%M:%S", tz="GMT") +write.csv(Par[,c("par", "time")], paste0(out.dir, "/PAR_", cruise)) \ No newline at end of file diff --git a/Model_HD_Division_Rate.R b/Model_HD_Division_Rate.R index ba98b7d..0204ff6 100644 --- a/Model_HD_Division_Rate.R +++ b/Model_HD_Division_Rate.R @@ -49,10 +49,9 @@ m <- 2^6 # number of size class ############## ## PAR DATA ## ############## - #db.name = "/Volumes/gwennm/popcycle/sqlite/popcycle.db") - sfl <- get.sfl.table(db.name) - Par <- sfl[,c("date", "par")] - Par$time <- strptime(Par$date, "%Y-%m-%dT%H:%M:%S", tz="GMT") + Par.path <- paste(in.dir, "/PAR_"cruise) + Par <- read.csv(Par.path, sep=",") + Par$time <- as.POSIXct(Par$time, tz= "GMT") Par$num.time <- as.numeric(Par$time) From 41a781ac9f7bd8a237edf14dea4a55287b3e7b20 Mon Sep 17 00:00:00 2001 From: gmmh Date: Mon, 12 Jan 2015 10:23:39 -0800 Subject: [PATCH 12/21] now modified correctly --- Model_HD_Division_Rate.R | 1 - 1 file changed, 1 deletion(-) diff --git a/Model_HD_Division_Rate.R b/Model_HD_Division_Rate.R index 0204ff6..277a338 100644 --- a/Model_HD_Division_Rate.R +++ b/Model_HD_Division_Rate.R @@ -5,7 +5,6 @@ # library(rgl) library(DEoptim) library(zoo) -library(popcycle) # script.home <- "/Volumes/gwennm/DeepDOM/ssPopModel" # in.dir <-"/Volumes/gwennm/DeepDOM/Cell_Division" From 841f21553430e309d423302229efde4339ea9271 Mon Sep 17 00:00:00 2001 From: gmmh Date: Mon, 12 Jan 2015 10:25:46 -0800 Subject: [PATCH 13/21] fixed bug in ordering of args --- Model_HD_Division_Rate.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Model_HD_Division_Rate.R b/Model_HD_Division_Rate.R index 277a338..5d143db 100644 --- a/Model_HD_Division_Rate.R +++ b/Model_HD_Division_Rate.R @@ -6,6 +6,14 @@ library(DEoptim) library(zoo) +args <- commandArgs(TRUE) +t <- as.numeric(args[1]) +phyto <- as.character(args[2]) +cruise <- as.character(args[3]) +script.home <- as.character(args[4]) +in.dir <-as.character(args[5]) +out.dir <- as.character(args[6]) + # script.home <- "/Volumes/gwennm/DeepDOM/ssPopModel" # in.dir <-"/Volumes/gwennm/DeepDOM/Cell_Division" # out.dir <- "/Volumes/gwennm/DeepDOM/Cell_Division" @@ -16,14 +24,6 @@ source(paste(script.home,'functions_modelHD.R',sep="/"), chdir = TRUE) jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000")) -args <- commandArgs(TRUE) -t <- as.numeric(args[1]) -phyto <- as.character(args[2]) -cruise <- as.character(args[3]) -script.home <- as.character(args[4]) -in.dir <-as.character(args[5]) -out.dir <- as.character(args[6]) - ############################### From 3a388b56d4855b47bbddb34028e91191a811e13e Mon Sep 17 00:00:00 2001 From: gmmh Date: Mon, 12 Jan 2015 10:40:40 -0800 Subject: [PATCH 14/21] fixed small bugs --- Model_HD_Division_Rate.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Model_HD_Division_Rate.R b/Model_HD_Division_Rate.R index 5d143db..35b6582 100644 --- a/Model_HD_Division_Rate.R +++ b/Model_HD_Division_Rate.R @@ -1,5 +1,5 @@ # This script does model optimization on the phyto -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i prochloro DeepDOM ~/DeepDOM/ssPopModel ~/DeepDOM/Cell_Division ~/DeepDOM/Cell_Division" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N synGR$i -d.; done +# for i in $(seq 0 1 24); do echo "Rscript ~/DeepDOM/ssPopModel/Model_HD_Division_Rate.R $i prochloro DeepDOM ~/DeepDOM/ssPopModel ~/DeepDOM/Cell_Division ~/DeepDOM/Cell_Division" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N proGR$i -d.; done # library(rgl) @@ -14,6 +14,9 @@ script.home <- as.character(args[4]) in.dir <-as.character(args[5]) out.dir <- as.character(args[6]) +# t = 1 +# phyto= "prochloro" +# cruise = "DeepDOM" # script.home <- "/Volumes/gwennm/DeepDOM/ssPopModel" # in.dir <-"/Volumes/gwennm/DeepDOM/Cell_Division" # out.dir <- "/Volumes/gwennm/DeepDOM/Cell_Division" @@ -48,7 +51,7 @@ m <- 2^6 # number of size class ############## ## PAR DATA ## ############## - Par.path <- paste(in.dir, "/PAR_"cruise) + Par.path <- paste0(in.dir,"/PAR_",cruise) Par <- read.csv(Par.path, sep=",") Par$time <- as.POSIXct(Par$time, tz= "GMT") Par$num.time <- as.numeric(Par$time) @@ -100,7 +103,7 @@ m <- 2^6 # number of size class next } print(paste("calculating growth projection from ",start , "to",end)) - + #plot(Par$time, Par$par, type='o'); points(c(start, end),c(0,0), col='red',pch=16, cex=2) From 3be697c537888b975bbd8761ce0ec18b9f993bdb Mon Sep 17 00:00:00 2001 From: Francois Ribalet Date: Tue, 13 Jan 2015 12:31:29 -0800 Subject: [PATCH 15/21] add script for visualization of model output --- ModelOutput_Viz.R | 83 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 ModelOutput_Viz.R diff --git a/ModelOutput_Viz.R b/ModelOutput_Viz.R new file mode 100644 index 0000000..6f86598 --- /dev/null +++ b/ModelOutput_Viz.R @@ -0,0 +1,83 @@ +## MODEL +cruise <- "Thompson_9" +location.model <- "/Volumes/ribalet/Cell_Division/" +phyto <- 'prochloro' +cat <- 64 # number of size bin + + +all.filelist <- list.files(paste(location.model,cruise,sep="/"),pattern=paste(phyto,"_modelHD_growth_",cruise,"_Ncat",cat,sep="")) +filelist <- all.filelist[grep(pattern=paste(phyto), all.filelist)] + +n <- c <- 1 +Conc.all <- N.proj.all <- V.hist.all <- div.rate <- para.all <- Col <- NULL + +for(file in filelist){ + #file <- filelist[11] + load(paste(location.model ,cruise,file, sep="/")) + print(file) + print(n) + dim <- conc.proj.all <- n.proj.all <- v.hist.all <- dr.all <- p.all <- NULL + for(i in seq(2,as.numeric(n.day),by=1)){ + n.proj <- model[4,i][[1]] + n.proj.all <- cbind(n.proj.all, n.proj) + + conc.proj <- cbind(as.numeric(colnames(n.proj)), as.numeric(colSums(n.proj))) + conc.proj.all <- rbind(conc.proj.all, conc.proj) + + dr <- model[2,i][[1]] + h.dr <- cbind(as.numeric(colnames(dr)), as.numeric(dr)) + dr.all <- rbind(dr.all, h.dr) + + v.proj <- model[3,i][[1]] + v.hist.all <- cbind(v.hist.all, v.proj) + + para <- model[1,i][[1]] + param <- cbind(time=as.numeric(colnames(n.proj)), para) + p.all <- rbind(p.all, param) + } + + + div.rate <- rbind(div.rate, dr.all) + N.proj.all <- cbind(N.proj.all, n.proj.all) + Conc.all <- rbind(Conc.all, conc.proj.all) + V.hist.all <- cbind(V.hist.all, v.hist.all) + para.all <- rbind(para.all, p.all) + + col <- rep(c, nrow(dr.all)) + Col <- c(Col,col) + + leg <- unlist(list(strsplit(filelist,"_t")))[seq(2,length(filelist[1:n])*2,2)] + + layout(matrix(c(1,1,2:7),4,2, byrow=T)) + par(pty='m') + plot(div.rate, ylab="Div Rate", xlab="time",col=Col) + #abline(v=night$UNIXtime,col='lightgrey');points(div.rate,col=Col) + legend("topleft",legend=leg, col=1:c, ncol=length(leg), pch=1) + plot(para.all[,"time"], para.all[,"gmax"], ylab="gmax", xlab="time",col = Col) + plot(para.all[,"time"], para.all[,"dmax"],ylab="dmax", xlab="time",col = Col) + plot(para.all[,"time"], para.all[,"a"],ylab="a", xlab="time",col = Col) + plot(para.all[,"time"], para.all[,"b"],ylab="b", xlab="time",col = Col) + plot(para.all[,"time"], para.all[,"E_star"],ylab="E_star", xlab="time",col = Col) + plot(para.all[,"time"], para.all[,"resnorm"],ylab="resnorm", xlab="time",col = Col) + + # # + # names(para) <- c("gmax","a","b","E_star","dmax","resnorm") + # par(mfrow=c(4,2)) + # barplot(d.GR, col='grey', main="GR") + # for(i in 1:6) barplot(para[,i], main=colnames(para)[i]) +n <- n + 1 +c <- c + 1 +} + +### ORDERING DATA by TIME + + Div.rate <- div.rate[order(div.rate[,1]),] + Nproj <- N.proj.all[,order(as.numeric(colnames(N.proj.all)))] + Conc.proj <- Conc.all[order(Conc.all[,1]),] + Vproj <- V.hist.all[,order(as.numeric(colnames(V.hist.all)))] + Para.all <- para.all[order(para.all[,"time"]),] + + ### VISUALIZATION + para <- Vproj + percentile <- cut(unlist(para), 100); plot3d(rep(1:dim(para)[1], dim(para)[2]), rep(1:dim(para)[2], each=dim(para)[1]), z=matrix(para), col=jet.colors(100)[percentile], type='l', lwd=3, xlab="size class", ylab="time", zlab="Frequency") + From 9f220b05a15714692c5244e60d8fa60c2b4e0074 Mon Sep 17 00:00:00 2001 From: gmmh Date: Tue, 13 Jan 2015 15:45:33 -0800 Subject: [PATCH 16/21] minor edits --- Viz_Size_Dist.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Viz_Size_Dist.R b/Viz_Size_Dist.R index 486ef4c..2854f3b 100644 --- a/Viz_Size_Dist.R +++ b/Viz_Size_Dist.R @@ -57,7 +57,7 @@ m <- 64 print(paste("phytoplankton population:",phyto)) - load(paste(home,cruise,"/", phyto,"_dist_Ncat",m,"_",cruise,sep="")) + load(paste(home,"/", phyto,"_dist_Ncat",m,"_",cruise,sep="")) Vhists <- distribution[[1]] Vhists <- sweep(Vhists, 2, colSums(Vhists), '/') # Normalize each column of VHists to 1 From 1dacbc4b123e1829619d17e092bb5e61b4185abe Mon Sep 17 00:00:00 2001 From: gmmh Date: Tue, 13 Jan 2015 16:28:49 -0800 Subject: [PATCH 17/21] playing around with script, needed to make minor changes to use --- ModelOutput_Viz.R | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/ModelOutput_Viz.R b/ModelOutput_Viz.R index 6f86598..81a5df0 100644 --- a/ModelOutput_Viz.R +++ b/ModelOutput_Viz.R @@ -1,19 +1,23 @@ ## MODEL -cruise <- "Thompson_9" -location.model <- "/Volumes/ribalet/Cell_Division/" +cruise <- "DeepDOM" +location.model <- "/Volumes/gwennm/DeepDOM/Cell_Division" phyto <- 'prochloro' -cat <- 64 # number of size bin +cat <- 2^6 # number of size bin +n.day <- 39 #number of days in dataset, need for below because it did not find in loading model, may be a bug somewhere? +library(rgl) -all.filelist <- list.files(paste(location.model,cruise,sep="/"),pattern=paste(phyto,"_modelHD_growth_",cruise,"_Ncat",cat,sep="")) + +all.filelist <- list.files(paste0(location.model,"/"),pattern=paste0(phyto,"_modelHD_growth_",cruise,"_Ncat",cat)) filelist <- all.filelist[grep(pattern=paste(phyto), all.filelist)] n <- c <- 1 Conc.all <- N.proj.all <- V.hist.all <- div.rate <- para.all <- Col <- NULL + for(file in filelist){ - #file <- filelist[11] - load(paste(location.model ,cruise,file, sep="/")) + #file <- filelist[2] + load(paste(location.model,file, sep="/")) print(file) print(n) dim <- conc.proj.all <- n.proj.all <- v.hist.all <- dr.all <- p.all <- NULL @@ -77,7 +81,8 @@ c <- c + 1 Vproj <- V.hist.all[,order(as.numeric(colnames(V.hist.all)))] Para.all <- para.all[order(para.all[,"time"]),] - ### VISUALIZATION + ### VISUALIZATION + jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000")) para <- Vproj percentile <- cut(unlist(para), 100); plot3d(rep(1:dim(para)[1], dim(para)[2]), rep(1:dim(para)[2], each=dim(para)[1]), z=matrix(para), col=jet.colors(100)[percentile], type='l', lwd=3, xlab="size class", ylab="time", zlab="Frequency") From 7d9f05850c9b2fe6c8ad6dff0361ca0bdf85801d Mon Sep 17 00:00:00 2001 From: gmmh Date: Wed, 14 Jan 2015 12:08:26 -0800 Subject: [PATCH 18/21] commented out Na.approx function which fails when interpolating longer gaps or time series which start with NA --- Conversion_Size_Dist.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Conversion_Size_Dist.R b/Conversion_Size_Dist.R index de02ac2..abfcb56 100644 --- a/Conversion_Size_Dist.R +++ b/Conversion_Size_Dist.R @@ -1,6 +1,6 @@ # This script takes HD.size.class files and makes concatenated distributions with the calibrated cell volume from forward scatter #arguments of this script: (1) distributuion file location, (2) cat (2^cat number of bins), (3) phytoplankton group, (4) cruise -# for i in $(seq 6 1 8); do echo "Rscript Conversion_Size_Dist.R ~/DeepDOM/Cell_Division $i prochloro DeepDOM" | qsub -lwalltime=1:00:00,nodes=1:ppn=1 -N pro_conv$i -d.; done +# for i in $(seq 6 1 8); do echo "Rscript ~/DeepDOM/ssPopModel/Conversion_Size_Dist.R ~/DeepDOM/Cell_Division $i prochloro DeepDOM" | qsub -lwalltime=8:00:00,nodes=1:ppn=1 -N pro_conv$i -d.; done args <- commandArgs(TRUE) @@ -149,12 +149,12 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" N_dist <- t(tapply(Size.volume$HD.size, list(h,Size.volume$HD.volume), mean)) ### NA interpolation - Vhists <- try(t(apply(Vhists, 1, function(x) na.approx(x, na.rm=F)))) - N_dist <- try(t(apply(N_dist, 1, function(x) na.approx(x, na.rm=F)))) + # Vhists <- try(t(apply(Vhists, 1, function(x) na.approx(x, na.rm=F)))) + # N_dist <- try(t(apply(N_dist, 1, function(x) na.approx(x, na.rm=F)))) - id <- findInterval(h.time, na.approx(time, na.rm=F)) + # id <- findInterval(h.time, na.approx(time, na.rm=F)) - colnames(Vhists) <- colnames(N_dist) <- h.time[id] + # colnames(Vhists) <- colnames(N_dist) <- h.time[id] # para <- Vhists; percentile <- cut(unlist(para), 100); plot3d(log(rep(as.numeric(row.names(para)), dim(para)[2])), rep(as.numeric(colnames(para)), each=dim(para)[1]) , Vhists , col=jet.colors(100)[percentile], type='l', lwd=6, xlab="size class", ylab="time", zlab="Frequency") From 9419c85d2c648bd25d3fcc01c2432e49c62b28e1 Mon Sep 17 00:00:00 2001 From: gmmh Date: Wed, 14 Jan 2015 15:03:35 -0800 Subject: [PATCH 19/21] Fixed an important bug that could result in reodering of some of the distributions in time --- Conversion_Size_Dist.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Conversion_Size_Dist.R b/Conversion_Size_Dist.R index abfcb56..bcefccf 100644 --- a/Conversion_Size_Dist.R +++ b/Conversion_Size_Dist.R @@ -40,6 +40,7 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" Size$time <- as.POSIXct(Size$time, tz="GMT") Size$num.time <- as.numeric(Size$time) + Size <- Size[order(Size$num.time),] @@ -99,7 +100,7 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" ## CELL VOLUME DISTRIBUTION ## ############################## -# cat <- 8 +# cat <- 6 ############################### m <- 2^cat # number of Size class @@ -132,9 +133,11 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" HD.volume <- as.vector(rep(volbins, length(unique(Size.phyto$time)))) HD.time <- rep(unique(Size.phyto$time), each=m) HD.hist <- tapply(Size.phyto$freq.dist, list(HD,Size.phyto$time), mean) - HD.hist <- as.vector(apply(HD.hist, 2, function(x) na.approx(x, na.rm=F))) + HD.hist <- as.vector(HD.hist) + # HD.hist <- as.vector(apply(HD.hist, 2, function(x) na.approx(x, na.rm=F))) HD.size <- tapply(Size.phyto$size.dist, list(HD,Size.phyto$time), mean) - HD.size <- as.vector(apply(HD.size, 2, function(x) na.approx(x, na.rm=F))) + HD.size <- as.vector(HD.size) + # HD.size <- as.vector(apply(HD.size, 2, function(x) na.approx(x, na.rm=F))) # para <- HD.hist; percentile <- cut(para, 100); plot3d(log(HD.volume), HD.time, HD.hist, col=jet.colors(100)[percentile], type='l', lwd=2, xlab="size class", ylab="time", zlab="Frequency") @@ -154,7 +157,7 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" # id <- findInterval(h.time, na.approx(time, na.rm=F)) - # colnames(Vhists) <- colnames(N_dist) <- h.time[id] + colnames(Vhists) <- colnames(N_dist) <- time # para <- Vhists; percentile <- cut(unlist(para), 100); plot3d(log(rep(as.numeric(row.names(para)), dim(para)[2])), rep(as.numeric(colnames(para)), each=dim(para)[1]) , Vhists , col=jet.colors(100)[percentile], type='l', lwd=6, xlab="size class", ylab="time", zlab="Frequency") From 03c9691ac521caa21556d99c2d18cfa9fbbaf03d Mon Sep 17 00:00:00 2001 From: gmmh Date: Fri, 16 Jan 2015 10:47:42 -0800 Subject: [PATCH 20/21] worked to make these scripts handle NAs and gaps in data without causing bugs or prematurely ending the modeling process. --- Conversion_Size_Dist.R | 2 ++ Model_HD_Division_Rate.R | 16 ++++++++++++---- Viz_Size_Dist.R | 4 ++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/Conversion_Size_Dist.R b/Conversion_Size_Dist.R index bcefccf..447d881 100644 --- a/Conversion_Size_Dist.R +++ b/Conversion_Size_Dist.R @@ -165,6 +165,8 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" distribution <- list() distribution[[1]] <- Vhists distribution[[2]] <- N_dist + # para <- distribution[[1]]; percentile <- cut(unlist(para), 100); plot3d(log(rep(as.numeric(row.names(para)), dim(para)[2])), rep(as.numeric(colnames(para)), each=dim(para)[1]) , Vhists , col=jet.colors(100)[percentile], type='l', lwd=6, xlab="size class", ylab="time", zlab="Frequency") + save(distribution, file=paste(home,"/",phyto,"_dist_Ncat",m,"_",cruise,sep="")) print(paste("saving ", home,"/",phyto,"_dist_Ncat",m,"_",cruise,sep="")) \ No newline at end of file diff --git a/Model_HD_Division_Rate.R b/Model_HD_Division_Rate.R index 35b6582..9db1dce 100644 --- a/Model_HD_Division_Rate.R +++ b/Model_HD_Division_Rate.R @@ -1,5 +1,5 @@ # This script does model optimization on the phyto -# for i in $(seq 0 1 24); do echo "Rscript ~/DeepDOM/ssPopModel/Model_HD_Division_Rate.R $i prochloro DeepDOM ~/DeepDOM/ssPopModel ~/DeepDOM/Cell_Division ~/DeepDOM/Cell_Division" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N proGR$i -d.; done +# for i in $(seq 0 1 24); do echo "Rscript ~/DeepDOM/ssPopModel/Model_HD_Division_Rate.R $i prochloro DeepDOM ~/DeepDOM/ssPopModel ~/DeepDOM/Cell_Division ~/DeepDOM/Cell_Division" | qsub -lwalltime=30:00:00,nodes=1:ppn=1 -N proGR$i -d.; done # library(rgl) @@ -69,7 +69,7 @@ m <- 2^6 # number of size class load(paste(in.dir,"/", phyto,"_dist_Ncat",m,"_",cruise,sep="")) Vhists <- distribution[[1]] - Vhists <- sweep(Vhists, 2, colSums(Vhists), '/') # Normalize each column of VHists to 1 + #Vhists <- sweep(Vhists, 2, colSums(Vhists), '/') # Normalize each column of VHists to 1 #this caused a bug erasing the first section of my data, because colSums = NA N_dist <- distribution[[2]] volbins <- as.numeric(row.names(Vhists)) @@ -77,7 +77,7 @@ m <- 2^6 # number of size class time.numc <- as.numeric(colnames(Vhists)) time <- as.POSIXct(time.numc, origin="1970-01-01" ,tz="GMT") - n.day <- round(diff(range(time))); print(paste("Number of days in the dataset:",n.day)) + n.day <- round(diff(range(na.omit(time)))); print(paste("Number of days in the dataset:",n.day)) # para <- Vhists; percentile <- cut(unlist(para), 100); plot3d(log(rep(as.numeric(row.names(para)), dim(para)[2])), rep(as.numeric(colnames(para)), each=dim(para)[1]) , Vhists , col=jet.colors(100)[percentile], type='l', lwd=6, xlab="size class", ylab="time", zlab="Frequency") @@ -94,7 +94,7 @@ m <- 2^6 # number of size class for(i in seq(1,length(time)-24, 24)){ print(paste("starting hour:",i+t)) - #i <- 96 + #i <- 120 start <- time[i+t] end <- time[(i+t)+24] @@ -102,6 +102,14 @@ m <- 2^6 # number of size class print("cycle is less than 24h") next } + if(is.na(start)){ + print("NA in start time, skip ahead") + next + } + if(sum(is.na(time[i:(i+24)+t]))>4){ + print("more than 4 hours missing from time period, skip ahead") + next + } print(paste("calculating growth projection from ",start , "to",end)) diff --git a/Viz_Size_Dist.R b/Viz_Size_Dist.R index 2854f3b..79c0b35 100644 --- a/Viz_Size_Dist.R +++ b/Viz_Size_Dist.R @@ -57,10 +57,10 @@ m <- 64 print(paste("phytoplankton population:",phyto)) - load(paste(home,"/", phyto,"_dist_Ncat",m,"_",cruise,sep="")) + load(paste(home, phyto,"_dist_Ncat",m,"_",cruise,sep="")) Vhists <- distribution[[1]] - Vhists <- sweep(Vhists, 2, colSums(Vhists), '/') # Normalize each column of VHists to 1 + #Vhists <- sweep(Vhists, 2, colSums(Vhists), '/') # Normalize each column of VHists to 1 N_dist <- distribution[[2]] volbins <- as.numeric(row.names(Vhists)) From f04f5fe7e3a2388339bdfcd430e577e20826af43 Mon Sep 17 00:00:00 2001 From: gmmh Date: Tue, 20 Jan 2015 16:20:51 -0800 Subject: [PATCH 21/21] I used this version of the code to run the model on my cell distributions. Had to modify treatment of NAs because I had some empty bins in my distributions that were causing an error. --- ModelOutput_Viz.R | 64 ++++++++++++++++++++++++++++++---------- Model_HD_Division_Rate.R | 8 ++++- functions_modelHD.R | 8 +++-- 3 files changed, 61 insertions(+), 19 deletions(-) diff --git a/ModelOutput_Viz.R b/ModelOutput_Viz.R index 81a5df0..52f2e07 100644 --- a/ModelOutput_Viz.R +++ b/ModelOutput_Viz.R @@ -2,10 +2,12 @@ cruise <- "DeepDOM" location.model <- "/Volumes/gwennm/DeepDOM/Cell_Division" phyto <- 'prochloro' +out.dir <- "/Volumes/gwennm/DeepDOM/Cell_Division" cat <- 2^6 # number of size bin n.day <- 39 #number of days in dataset, need for below because it did not find in loading model, may be a bug somewhere? library(rgl) +library(ggplot2) all.filelist <- list.files(paste0(location.model,"/"),pattern=paste0(phyto,"_modelHD_growth_",cruise,"_Ncat",cat)) @@ -22,20 +24,20 @@ for(file in filelist){ print(n) dim <- conc.proj.all <- n.proj.all <- v.hist.all <- dr.all <- p.all <- NULL for(i in seq(2,as.numeric(n.day),by=1)){ - n.proj <- model[4,i][[1]] + try(n.proj <- model[4,i][[1]]) n.proj.all <- cbind(n.proj.all, n.proj) conc.proj <- cbind(as.numeric(colnames(n.proj)), as.numeric(colSums(n.proj))) conc.proj.all <- rbind(conc.proj.all, conc.proj) - dr <- model[2,i][[1]] + try(dr <- model[2,i][[1]]) h.dr <- cbind(as.numeric(colnames(dr)), as.numeric(dr)) dr.all <- rbind(dr.all, h.dr) - v.proj <- model[3,i][[1]] + try(v.proj <- model[3,i][[1]]) v.hist.all <- cbind(v.hist.all, v.proj) - para <- model[1,i][[1]] + try(para <- model[1,i][[1]]) param <- cbind(time=as.numeric(colnames(n.proj)), para) p.all <- rbind(p.all, param) } @@ -52,17 +54,17 @@ for(file in filelist){ leg <- unlist(list(strsplit(filelist,"_t")))[seq(2,length(filelist[1:n])*2,2)] - layout(matrix(c(1,1,2:7),4,2, byrow=T)) - par(pty='m') - plot(div.rate, ylab="Div Rate", xlab="time",col=Col) - #abline(v=night$UNIXtime,col='lightgrey');points(div.rate,col=Col) - legend("topleft",legend=leg, col=1:c, ncol=length(leg), pch=1) - plot(para.all[,"time"], para.all[,"gmax"], ylab="gmax", xlab="time",col = Col) - plot(para.all[,"time"], para.all[,"dmax"],ylab="dmax", xlab="time",col = Col) - plot(para.all[,"time"], para.all[,"a"],ylab="a", xlab="time",col = Col) - plot(para.all[,"time"], para.all[,"b"],ylab="b", xlab="time",col = Col) - plot(para.all[,"time"], para.all[,"E_star"],ylab="E_star", xlab="time",col = Col) - plot(para.all[,"time"], para.all[,"resnorm"],ylab="resnorm", xlab="time",col = Col) + # layout(matrix(c(1,1,2:7),4,2, byrow=T)) + # par(pty='m') + # plot(div.rate, ylab="Div Rate", xlab="time",col=Col) + # #abline(v=night$UNIXtime,col='lightgrey');points(div.rate,col=Col) + # legend("topleft",legend=leg, col=1:c, ncol=length(leg), pch=1) + # plot(para.all[,"time"], para.all[,"gmax"], ylab="gmax", xlab="time",col = Col) + # plot(para.all[,"time"], para.all[,"dmax"],ylab="dmax", xlab="time",col = Col) + # plot(para.all[,"time"], para.all[,"a"],ylab="a", xlab="time",col = Col) + # plot(para.all[,"time"], para.all[,"b"],ylab="b", xlab="time",col = Col) + # plot(para.all[,"time"], para.all[,"E_star"],ylab="E_star", xlab="time",col = Col) + # plot(para.all[,"time"], para.all[,"resnorm"],ylab="resnorm", xlab="time",col = Col) # # # names(para) <- c("gmax","a","b","E_star","dmax","resnorm") @@ -86,3 +88,35 @@ c <- c + 1 para <- Vproj percentile <- cut(unlist(para), 100); plot3d(rep(1:dim(para)[1], dim(para)[2]), rep(1:dim(para)[2], each=dim(para)[1]), z=matrix(para), col=jet.colors(100)[percentile], type='l', lwd=3, xlab="size class", ylab="time", zlab="Frequency") + +#Average estimates for division rate from same time point and make new table with Ave, SE and number of estimates +Div.rate.ave <- as.data.frame(matrix(data= NA, nrow=length(na.omit(unique(Div.rate[,1]))) , ncol=5)) +colnames(Div.rate.ave)=c("time", "div.ave", "div.sd", "div.se", "div.n") +Div.rate.ave$time <- na.omit(unique(Div.rate[,1])) + for(n in 1:length(Div.rate.ave$time)){ + sum = sum(na.omit(Div.rate[,1] == Div.rate.ave$time[n])) + index <- which(Div.rate[,1] == Div.rate.ave$time[n]) + if(sum <2){ + Div.rate.ave$div.ave[n] <-Div.rate[index,2] + Div.rate.ave$div.sd[n] <- NA + Div.rate.ave$div.n[n] <- 1 + Div.rate.ave$div.se[n] <- NA + next + } + sub <- Div.rate[index,] + Div.rate.ave$div.ave[n] <- mean(na.omit(sub[,2])) + Div.rate.ave$div.sd[n] <- sd(na.omit(sub[,2])) + Div.rate.ave$div.n[n] <- sum(!is.na(sub[,2])) + Div.rate.ave$div.se[n] <- Div.rate.ave$div.sd[n]/sqrt(Div.rate.ave$div.n[n]) + } + +#plot the division rates +- SD +plot(Div.rate.ave$time, Div.rate.ave$div.ave, "l", ylim=c(0, max(na.omit(Div.rate.ave$div.ave + Div.rate.ave$div.sd))), ylab="Division rate (hr-1)", xlab= "Time") +title(main=paste("Model Growth Rate of", phyto), sub=paste("Cruise:", cruise)) +#polygon(c(Div.rate.ave$time, rev(Div.rate.ave$time)), c(Div.rate.ave$div.ave, rev(Div.rate.ave$div.ave + Div.rate.ave$div.sd)), col= "grey", border=NA) + arrows(Div.rate.ave$time, Div.rate.ave$div.ave, Div.rate.ave$time, Div.rate.ave$div.ave + Div.rate.ave$div.sd, angle= 90, length=0.01, lwd= 0.15) + arrows(Div.rate.ave$time, Div.rate.ave$div.ave, Div.rate.ave$time, Div.rate.ave$div.ave - Div.rate.ave$div.sd, angle= 90, length=0.01, lwd= 0.15) + + + +write.table(Div.rate.ave, paste0(out.dir, "/", phyto, "Model_Division_summary.csv"), sep=",", row.names=F) \ No newline at end of file diff --git a/Model_HD_Division_Rate.R b/Model_HD_Division_Rate.R index 9db1dce..36048be 100644 --- a/Model_HD_Division_Rate.R +++ b/Model_HD_Division_Rate.R @@ -94,7 +94,7 @@ m <- 2^6 # number of size class for(i in seq(1,length(time)-24, 24)){ print(paste("starting hour:",i+t)) - #i <- 120 + #i <- 25 start <- time[i+t] end <- time[(i+t)+24] @@ -118,6 +118,12 @@ m <- 2^6 # number of size class ### SELECT SIZE DISTRIBUTION for DAY i V.hists <- Vhists[,c(i:(i+24)+t)] N.dist <- N_dist[,c(i:(i+24)+t)] + + # #NAs break this part and need to be made into zeros + mk.zero <- which(is.na(V.hists)) + V.hists[mk.zero] <- 0 + mk.zero <- which(is.na(N.dist)) + N.dist[mk.zero] <- 0 # para <- V.hists; percentile <- cut(unlist(para), 100); plot3d(log(rep(as.numeric(row.names(para))), dim(para)[2]), rep(as.numeric(colnames(para)), each=dim(para)[1]), para , col=jet.colors(100)[percentile], type='l', lwd=6, xlab="size class", ylab="time", zlab="Frequency") diff --git a/functions_modelHD.R b/functions_modelHD.R index 168954a..27905e8 100644 --- a/functions_modelHD.R +++ b/functions_modelHD.R @@ -94,13 +94,14 @@ matrix.conct.fast <- function(hr, Einterp, volbins, gmax, dmax, a, b, E_star){ for(hr in 1:24){ - B <- matrix.conct.fast(hr=hr-1, Einterp=Einterp, volbins=volbins, gmax=as.numeric(params[1]), dmax=as.numeric(params[2]), a=as.numeric(params[3]),b=as.numeric(params[4]), E_star=as.numeric(params[5])) + B <- matrix.conct.fast(hr=hr-1, Einterp=Einterp, volbins=volbins, gmax=as.numeric(params[1]), dmax=as.numeric(params[2]), a=as.numeric(params[3]),b=as.numeric(params[4]), E_star=as.numeric(params[5])) + wt <- B %*% V.hists[,hr] # calculate the projected size-frequency distribution wt.norm <- wt/sum(wt, na.rm=T) # normalize distribution sigma[,hr] <- (round(N.dist[, hr+1] - TotN[hr+1]*wt.norm)^2) #observed value - fitted value #sigma[,hr] <- abs(V.hists[, hr+1] - wt.norm) #observed value - fitted value } - sigma <- colSums(sigma)/colSums(N.dist[,-1]) + sigma <- colSums(sigma)/colSums(na.omit(N.dist[,-1])) sigma <- sum(sigma, na.rm=T) return(sigma) @@ -121,7 +122,8 @@ determine.opt.para <- function(V.hists,N.dist,Edata,volbins){ dt <- 1/(resol/10) # dt <- 1/6; breaks <- 25 ## MATLAB - TotN <- matrix(colSums(N.dist), ncol=breaks) + + TotN <- matrix(colSums(na.omit(N.dist)), ncol=breaks) ti <- seq(min(Edata[,1],na.rm=T),max(Edata[,1],na.rm=T), length.out=breaks/dt) ep <- data.frame(spline(Edata[,1], Edata[,2], xout=ti)) #interpolate E data according to dt resolution Einterp <- ep$y