diff --git a/Conversion_Size_Dist.R b/Conversion_Size_Dist.R index 49219b4..447d881 100644 --- a/Conversion_Size_Dist.R +++ b/Conversion_Size_Dist.R @@ -1,21 +1,23 @@ -# [ribalet@bloom Cell_Division] -# for i in $(seq 6 1 8); do echo "Rscript Conversion_Size_Dist.R $i prochloro Med4_TimeCourse_July2012" | qsub -lwalltime=1:00:00,nodes=1:ppn=1 -N pro_conv$i -d.; done +# This script takes HD.size.class files and makes concatenated distributions with the calibrated cell volume from forward scatter +#arguments of this script: (1) distributuion file location, (2) cat (2^cat number of bins), (3) phytoplankton group, (4) cruise +# for i in $(seq 6 1 8); do echo "Rscript ~/DeepDOM/ssPopModel/Conversion_Size_Dist.R ~/DeepDOM/Cell_Division $i prochloro DeepDOM" | qsub -lwalltime=8:00:00,nodes=1:ppn=1 -N pro_conv$i -d.; done args <- commandArgs(TRUE) -cat <- as.numeric(args[1]) -phyto <- as.character(args[2]) -cruise <- as.character(args[3]) +home <- as.character(args[1]) +cat <- as.numeric(args[2]) +phyto <- as.character(args[3]) +cruise <- as.character(args[4]) + -home <- '~/Cell_Division/' #library(rgl) library(zoo) -# home <- "/Volumes/ribalet/Cell_division/" -# cruise <- "Med4_TimeCourse_July2012" +# home <- "/Volumes/gwennm/DeepDOM/Cell_division" +# cruise <- "DeepDOM" # phyto <- "prochloro" @@ -27,17 +29,18 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" ## SIZE DISTRIBUTION ## ####################### - list <- list.files(paste(home,cruise,"/",sep=""),pattern=paste("HD.size.class_",cruise,"_",phyto,sep="")) + list <- list.files(home,pattern=paste("HD.size.class_",cruise,"_",phyto,sep="")) Size <- NULL for(l in list){ print(l) - s <- read.csv(paste(home,cruise,"/",l,sep="")) + s <- read.csv(paste(home,l,sep="/")) Size <- rbind(Size, s) } Size$time <- as.POSIXct(Size$time, tz="GMT") Size$num.time <- as.numeric(Size$time) + Size <- Size[order(Size$num.time),] @@ -58,9 +61,7 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" if(phyto == "synecho" | phyto == "pico" | phyto == "prochloro"){ Size$volume <- 10^(0.524*log10(Size$stages/Size$fsc_beads) + 0.283) # Size$volume <- 10^(0.5*log10(Size$stages/Size$fsc_beads))# MIE THEORY - } - - else{ + }else{ Size$volume <- 10^(1.682*log10(Size$stages/Size$fsc_beads) + 0.961) } @@ -91,13 +92,15 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" # percentile <- cut(Size.phyto[,"freq.dist"], 100); plot3d(x=(Size.phyto$volume), y=Size.phyto$num.time, z=Size.phyto$freq.dist, col=jet.colors(100)[percentile], type='l', lwd=2) n.day <- round(diff(range(Size.phyto$time))); print(paste("Number of days in the dataset:",n.day)) + + #broke right here after printing the number of days in the dataset, exit with no error start <- min(Size.phyto$time) ############################## ## CELL VOLUME DISTRIBUTION ## ############################## -# cat <- 8 +# cat <- 6 ############################### m <- 2^cat # number of Size class @@ -130,9 +133,11 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" HD.volume <- as.vector(rep(volbins, length(unique(Size.phyto$time)))) HD.time <- rep(unique(Size.phyto$time), each=m) HD.hist <- tapply(Size.phyto$freq.dist, list(HD,Size.phyto$time), mean) - HD.hist <- as.vector(apply(HD.hist, 2, function(x) na.approx(x, na.rm=F))) + HD.hist <- as.vector(HD.hist) + # HD.hist <- as.vector(apply(HD.hist, 2, function(x) na.approx(x, na.rm=F))) HD.size <- tapply(Size.phyto$size.dist, list(HD,Size.phyto$time), mean) - HD.size <- as.vector(apply(HD.size, 2, function(x) na.approx(x, na.rm=F))) + HD.size <- as.vector(HD.size) + # HD.size <- as.vector(apply(HD.size, 2, function(x) na.approx(x, na.rm=F))) # para <- HD.hist; percentile <- cut(para, 100); plot3d(log(HD.volume), HD.time, HD.hist, col=jet.colors(100)[percentile], type='l', lwd=2, xlab="size class", ylab="time", zlab="Frequency") @@ -147,12 +152,12 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" N_dist <- t(tapply(Size.volume$HD.size, list(h,Size.volume$HD.volume), mean)) ### NA interpolation - Vhists <- try(t(apply(Vhists, 1, function(x) na.approx(x, na.rm=F)))) - N_dist <- try(t(apply(N_dist, 1, function(x) na.approx(x, na.rm=F)))) + # Vhists <- try(t(apply(Vhists, 1, function(x) na.approx(x, na.rm=F)))) + # N_dist <- try(t(apply(N_dist, 1, function(x) na.approx(x, na.rm=F)))) - id <- findInterval(h.time, na.approx(time, na.rm=F)) + # id <- findInterval(h.time, na.approx(time, na.rm=F)) - colnames(Vhists) <- colnames(N_dist) <- h.time[id] + colnames(Vhists) <- colnames(N_dist) <- time # para <- Vhists; percentile <- cut(unlist(para), 100); plot3d(log(rep(as.numeric(row.names(para)), dim(para)[2])), rep(as.numeric(colnames(para)), each=dim(para)[1]) , Vhists , col=jet.colors(100)[percentile], type='l', lwd=6, xlab="size class", ylab="time", zlab="Frequency") @@ -160,6 +165,8 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" distribution <- list() distribution[[1]] <- Vhists distribution[[2]] <- N_dist + # para <- distribution[[1]]; percentile <- cut(unlist(para), 100); plot3d(log(rep(as.numeric(row.names(para)), dim(para)[2])), rep(as.numeric(colnames(para)), each=dim(para)[1]) , Vhists , col=jet.colors(100)[percentile], type='l', lwd=6, xlab="size class", ylab="time", zlab="Frequency") + - save(distribution, file=paste(home,cruise,"/", phyto,"_dist_Ncat",m,"_",cruise,sep="")) - print(paste("saving ", home,cruise,"/", phyto,"_dist_Ncat",m,"_",cruise,sep="")) \ No newline at end of file + save(distribution, file=paste(home,"/",phyto,"_dist_Ncat",m,"_",cruise,sep="")) + print(paste("saving ", home,"/",phyto,"_dist_Ncat",m,"_",cruise,sep="")) \ No newline at end of file diff --git a/Cruise_Size_distribution.R b/Cruise_Size_distribution.R index 006d0dc..4c3b6d8 100644 --- a/Cruise_Size_distribution.R +++ b/Cruise_Size_distribution.R @@ -1,8 +1,9 @@ #[gwennm@bloom DeepDOM/Cell_Division] ##Run this script using the commented out code just below (paste directly into the command line), should be in the directory listed above -# Rscript ~/DeepDOM/scripts/Cruise_Size_distribution_sqlite_GMH.R d1 d2 prochloro DeepDOM | qsub -lwalltime=00:30:00,nodes=1:ppn=1 -N DeepDOM -d. +# Rscript ~/DeepDOM/ssPopModel/Cruise_Size_distribution.R 1 40 prochloro DeepDOM -#note edit line above according to changes to script: make day input an integer number range (ie: 1,2 for 1:2). check with Chris to make sure the script above will work... esp. wall time + +#Note: this code requires popcycle package, and a flag file called "flag_file.txt" in the same directory as the db.location. It produces a plot of beads across the cruise with a smoothed spline fit to estimate the ave value of the parameter (fsc_small). It also produces a .csv file for each day containing the distributions of fsc_small for the phyto called for each 3-min file. #allowing arguments to be input from the command line input commented out above @@ -12,6 +13,7 @@ d2 <- as.numeric(args[2]) #2nd argument, day number to end phyto <- as.character(args[3]) # 2nd arg phyto to calc size dist cruise <- as.character(args[4]) # 3rd arg cruise name + library(popcycle) library(stats) @@ -19,32 +21,34 @@ library(stats) ### BATCH FILE inputs ### ######################### #globals necessary for running on bloom -# home <- '~/DeepDOM/Cell_Division/' -# folder <- NULL -# root <- "/misc/seaflow/" +home <- '~/DeepDOM/Cell_Division/' #change to take from input, so not hardcoded in +folder <- NULL +root <- "/misc/seaflow/" +db.location <- "~/popcycle" #change to make variable? #globals necessary for running on local machine connected to bloom -home <- "/Users/gwen/Desktop/Cruises/DeepDOM_2013/seaflow/" -folder <- "Cell_Division/" -root <- "/Volumes/seaflow/" -cruise <- "DeepDOM" -phyto <- 'prochloro' -d1 <- 1 #start day -d2 <- 1 # end day +# home <- "/Users/gwen/Desktop/Cruises/DeepDOM_2013/seaflow/" +# folder <- "Cell_Division/" +# root <- "/Volumes/seaflow/" +# db.location <- "/Volumes/gwennm/popcycle" +# d1 <- 1 #start day +# d2 <- 2 # end day +# phyto <- 'prochloro' +# cruise <- "DeepDOM" #Globals necessary for popcycle commands: consider making an input variable to the script set.evt.location(paste(root, cruise, sep="")) -set.project.location("/Volumes/gwennm/popcycle") +set.project.location(db.location) set.cruise.id("march2013") #parameters for making smoothed distributions para <- "fsc_small" n.breaks <- 2^10 # 1024 -concat <- 4 # 3-minute file x 4 = 12 min chunks +#concat <- 4 # note this does not work here, make as an option later? out <- NULL -#write a new function to query sqlite database by parameter and population +# new function to query sqlite database by parameter and population and file.name get.param.by.pop <- function(file.name, para, phyto, db= db.name){ #this sqlite query will return a table with columns: file, param, pop (only phyto) sql <- paste0("SELECT opp.file, opp.", para, ", vct.pop @@ -62,14 +66,6 @@ get.param.by.pop <- function(file.name, para, phyto, db= db.name){ return(opp.slice) } -#new function to retrieve opp.evt.table from sqlite database, suggest adding to popcycle -get.opp.evt.ratio.table <- function(db=db.name){ - sql <- paste0("SELECT * FROM ", opp.evt.ratio.table.name) - con <- dbConnect(SQLite(), dbname= db) - table <- dbGetQuery(con, sql) - dbDisconnect(con) - return (table) -} ########################### @@ -90,14 +86,13 @@ all.pop <- subset(all.stat, pop == phyto) all.files <- all.pop$file julian.day <- unique(basename(dirname(all.files))) -#load opp.evt.ratios to correct for true cell density, so far not necessary because I use abundance from stats table -#oer <- get.opp.evt.ratio.table() ######################## ### INSTRUMENT DRIFT ### ######################## #need this code to normalize to beads fsc_small across the whole cruise + #this also plots the bead distribution for the whole cruise with smoothed spline spar <- 0.45 beads <- subset(all.stat, pop == "beads" & fsc_small < 60000) @@ -128,8 +123,7 @@ for(n in d1:d2){ size.class <- NULL for(file in filenames){ - #still need to figure out how to implement concat to put together 12 min chunks - #do we need to concat at this step? or can it wait until the model? + #select parameter for phytoplankton from each file slice <- get.param.by.pop(file, para, phyto) #find smoothed beads from same time stamp to normalize param of interest @@ -141,12 +135,11 @@ for(n in d1:d2){ print("no beads found") } - #insert total abundance of this pop from the correct file, ** is this correct?? - ntot <- stat[which(stat$file == file), "abundance"] + #insert ntot from 1/opp_evt_ratio * n_count from stats + ntot <- 1/stat[which(stat$file == file), "opp_evt_ratio"] * stat[which(stat$file == file), "n_count"] time.class <- rep(time, n.breaks) #make vector of time to go with dist - #Note changed the start range of density from 1 to 0 for smaller pro - #shouldn't this range be dependent on the phyto we are trying to model? maybe this should be a range determined by the max of the slice + #shouldn't this range be dependent on the phyto we are trying to model? #could add an if statement, if phyto = prochloro then this range for dens dens <- density(log10(slice[,para]), n=n.breaks,from=0, to=3.5, bw="SJ", kernel='gaussian',na.rm=T) freq.dist <- dens$y*diff(dens$x)[1] diff --git a/Make_PAR_table.R b/Make_PAR_table.R new file mode 100644 index 0000000..5264d19 --- /dev/null +++ b/Make_PAR_table.R @@ -0,0 +1,21 @@ +### Make_PAR_table.R arg1 arg2 arg3 +## run this script before running Model_HD_Division_Rate +## arg 1= db.name +## arg 2= output/directory +## arg 3= cruise + +library(popcycle) + +args <- commandArgs(TRUE) +db.name <- as.character(args[1]) +out.dir <- as.character(args[2]) +cruise <- as.character(args[3]) + +# db.name = "/Volumes/gwennm/popcycle/sqlite/popcycle.db" +# out.dir = "/Volumes/gwennm/DeepDOM/Cell_Division" +# cruise = "DeepDOM" + + sfl <- get.sfl.table(db.name) + Par <- sfl[,c("date", "par")] + Par$time <- strptime(Par$date, "%Y-%m-%dT%H:%M:%S", tz="GMT") +write.csv(Par[,c("par", "time")], paste0(out.dir, "/PAR_", cruise)) \ No newline at end of file diff --git a/ModelOutput_Viz.R b/ModelOutput_Viz.R new file mode 100644 index 0000000..52f2e07 --- /dev/null +++ b/ModelOutput_Viz.R @@ -0,0 +1,122 @@ +## MODEL +cruise <- "DeepDOM" +location.model <- "/Volumes/gwennm/DeepDOM/Cell_Division" +phyto <- 'prochloro' +out.dir <- "/Volumes/gwennm/DeepDOM/Cell_Division" +cat <- 2^6 # number of size bin +n.day <- 39 #number of days in dataset, need for below because it did not find in loading model, may be a bug somewhere? + +library(rgl) +library(ggplot2) + + +all.filelist <- list.files(paste0(location.model,"/"),pattern=paste0(phyto,"_modelHD_growth_",cruise,"_Ncat",cat)) +filelist <- all.filelist[grep(pattern=paste(phyto), all.filelist)] + +n <- c <- 1 +Conc.all <- N.proj.all <- V.hist.all <- div.rate <- para.all <- Col <- NULL + + +for(file in filelist){ + #file <- filelist[2] + load(paste(location.model,file, sep="/")) + print(file) + print(n) + dim <- conc.proj.all <- n.proj.all <- v.hist.all <- dr.all <- p.all <- NULL + for(i in seq(2,as.numeric(n.day),by=1)){ + try(n.proj <- model[4,i][[1]]) + n.proj.all <- cbind(n.proj.all, n.proj) + + conc.proj <- cbind(as.numeric(colnames(n.proj)), as.numeric(colSums(n.proj))) + conc.proj.all <- rbind(conc.proj.all, conc.proj) + + try(dr <- model[2,i][[1]]) + h.dr <- cbind(as.numeric(colnames(dr)), as.numeric(dr)) + dr.all <- rbind(dr.all, h.dr) + + try(v.proj <- model[3,i][[1]]) + v.hist.all <- cbind(v.hist.all, v.proj) + + try(para <- model[1,i][[1]]) + param <- cbind(time=as.numeric(colnames(n.proj)), para) + p.all <- rbind(p.all, param) + } + + + div.rate <- rbind(div.rate, dr.all) + N.proj.all <- cbind(N.proj.all, n.proj.all) + Conc.all <- rbind(Conc.all, conc.proj.all) + V.hist.all <- cbind(V.hist.all, v.hist.all) + para.all <- rbind(para.all, p.all) + + col <- rep(c, nrow(dr.all)) + Col <- c(Col,col) + + leg <- unlist(list(strsplit(filelist,"_t")))[seq(2,length(filelist[1:n])*2,2)] + + # layout(matrix(c(1,1,2:7),4,2, byrow=T)) + # par(pty='m') + # plot(div.rate, ylab="Div Rate", xlab="time",col=Col) + # #abline(v=night$UNIXtime,col='lightgrey');points(div.rate,col=Col) + # legend("topleft",legend=leg, col=1:c, ncol=length(leg), pch=1) + # plot(para.all[,"time"], para.all[,"gmax"], ylab="gmax", xlab="time",col = Col) + # plot(para.all[,"time"], para.all[,"dmax"],ylab="dmax", xlab="time",col = Col) + # plot(para.all[,"time"], para.all[,"a"],ylab="a", xlab="time",col = Col) + # plot(para.all[,"time"], para.all[,"b"],ylab="b", xlab="time",col = Col) + # plot(para.all[,"time"], para.all[,"E_star"],ylab="E_star", xlab="time",col = Col) + # plot(para.all[,"time"], para.all[,"resnorm"],ylab="resnorm", xlab="time",col = Col) + + # # + # names(para) <- c("gmax","a","b","E_star","dmax","resnorm") + # par(mfrow=c(4,2)) + # barplot(d.GR, col='grey', main="GR") + # for(i in 1:6) barplot(para[,i], main=colnames(para)[i]) +n <- n + 1 +c <- c + 1 +} + +### ORDERING DATA by TIME + + Div.rate <- div.rate[order(div.rate[,1]),] + Nproj <- N.proj.all[,order(as.numeric(colnames(N.proj.all)))] + Conc.proj <- Conc.all[order(Conc.all[,1]),] + Vproj <- V.hist.all[,order(as.numeric(colnames(V.hist.all)))] + Para.all <- para.all[order(para.all[,"time"]),] + + ### VISUALIZATION + jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000")) + para <- Vproj + percentile <- cut(unlist(para), 100); plot3d(rep(1:dim(para)[1], dim(para)[2]), rep(1:dim(para)[2], each=dim(para)[1]), z=matrix(para), col=jet.colors(100)[percentile], type='l', lwd=3, xlab="size class", ylab="time", zlab="Frequency") + + +#Average estimates for division rate from same time point and make new table with Ave, SE and number of estimates +Div.rate.ave <- as.data.frame(matrix(data= NA, nrow=length(na.omit(unique(Div.rate[,1]))) , ncol=5)) +colnames(Div.rate.ave)=c("time", "div.ave", "div.sd", "div.se", "div.n") +Div.rate.ave$time <- na.omit(unique(Div.rate[,1])) + for(n in 1:length(Div.rate.ave$time)){ + sum = sum(na.omit(Div.rate[,1] == Div.rate.ave$time[n])) + index <- which(Div.rate[,1] == Div.rate.ave$time[n]) + if(sum <2){ + Div.rate.ave$div.ave[n] <-Div.rate[index,2] + Div.rate.ave$div.sd[n] <- NA + Div.rate.ave$div.n[n] <- 1 + Div.rate.ave$div.se[n] <- NA + next + } + sub <- Div.rate[index,] + Div.rate.ave$div.ave[n] <- mean(na.omit(sub[,2])) + Div.rate.ave$div.sd[n] <- sd(na.omit(sub[,2])) + Div.rate.ave$div.n[n] <- sum(!is.na(sub[,2])) + Div.rate.ave$div.se[n] <- Div.rate.ave$div.sd[n]/sqrt(Div.rate.ave$div.n[n]) + } + +#plot the division rates +- SD +plot(Div.rate.ave$time, Div.rate.ave$div.ave, "l", ylim=c(0, max(na.omit(Div.rate.ave$div.ave + Div.rate.ave$div.sd))), ylab="Division rate (hr-1)", xlab= "Time") +title(main=paste("Model Growth Rate of", phyto), sub=paste("Cruise:", cruise)) +#polygon(c(Div.rate.ave$time, rev(Div.rate.ave$time)), c(Div.rate.ave$div.ave, rev(Div.rate.ave$div.ave + Div.rate.ave$div.sd)), col= "grey", border=NA) + arrows(Div.rate.ave$time, Div.rate.ave$div.ave, Div.rate.ave$time, Div.rate.ave$div.ave + Div.rate.ave$div.sd, angle= 90, length=0.01, lwd= 0.15) + arrows(Div.rate.ave$time, Div.rate.ave$div.ave, Div.rate.ave$time, Div.rate.ave$div.ave - Div.rate.ave$div.sd, angle= 90, length=0.01, lwd= 0.15) + + + +write.table(Div.rate.ave, paste0(out.dir, "/", phyto, "Model_Division_summary.csv"), sep=",", row.names=F) \ No newline at end of file diff --git a/Model_HD_Division_Rate.R b/Model_HD_Division_Rate.R index c1df6d9..36048be 100644 --- a/Model_HD_Division_Rate.R +++ b/Model_HD_Division_Rate.R @@ -1,33 +1,32 @@ -# [ribalet@bloom Cell_Division] -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i synecho Thompson_4" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N synGR$i -d.; done -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i prochloro Thompson_4" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N proGR$i -d.; done -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i synecho MBARI_1" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N synGR$i -d.; done -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i pico MBARI_1" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N proGR$i -d.; done -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i synecho Thompson_10" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N synGR$i -d.; done -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i crypto Crypto_TimeCourse_June2013" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N cryptoGR$i -d.; done -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i crypto Rhodomonas_Feb2014" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N cryptoGR$i -d.; done -# for i in $(seq 0 1 24); do echo "Rscript Model_HD_Division_Rate.R $i prochloro Med4_TimeCourse_July2012" | qsub -lwalltime=24:00:00,nodes=1:ppn=1 -N proGR$i -d.; done - +# This script does model optimization on the phyto +# for i in $(seq 0 1 24); do echo "Rscript ~/DeepDOM/ssPopModel/Model_HD_Division_Rate.R $i prochloro DeepDOM ~/DeepDOM/ssPopModel ~/DeepDOM/Cell_Division ~/DeepDOM/Cell_Division" | qsub -lwalltime=30:00:00,nodes=1:ppn=1 -N proGR$i -d.; done # library(rgl) library(DEoptim) library(zoo) -#home <- "/Volumes/ribalet/Cell_division/"; folder <- NULL; cruise <- "Crypto_TimeCourse_June2013" +args <- commandArgs(TRUE) +t <- as.numeric(args[1]) +phyto <- as.character(args[2]) +cruise <- as.character(args[3]) +script.home <- as.character(args[4]) +in.dir <-as.character(args[5]) +out.dir <- as.character(args[6]) -home <- '~/Cell_Division/'; folder <- NULL +# t = 1 +# phyto= "prochloro" +# cruise = "DeepDOM" +# script.home <- "/Volumes/gwennm/DeepDOM/ssPopModel" +# in.dir <-"/Volumes/gwennm/DeepDOM/Cell_Division" +# out.dir <- "/Volumes/gwennm/DeepDOM/Cell_Division" -source(paste(home,'functions_modelHD.R',sep=""), chdir = TRUE) + +source(paste(script.home,'functions_modelHD.R',sep="/"), chdir = TRUE) jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000")) -args <- commandArgs(TRUE) -t <- as.numeric(args[1]) -phyto <- as.character(args[2]) -cruise <- as.character(args[3]) - ############################### @@ -52,10 +51,9 @@ m <- 2^6 # number of size class ############## ## PAR DATA ## ############## - - Par.path <- paste(home, folder,cruise,"/Par_",cruise,sep="") + Par.path <- paste0(in.dir,"/PAR_",cruise) Par <- read.csv(Par.path, sep=",") - Par$time <- as.POSIXct(Par$time, tz="GMT") + Par$time <- as.POSIXct(Par$time, tz= "GMT") Par$num.time <- as.numeric(Par$time) @@ -64,14 +62,14 @@ m <- 2^6 # number of size class ####################### # t <- 0 - # phyto <- "crypto" + # phyto <- "prochloro" print(paste("time delay:", t)) print(paste("phytoplankton population:",phyto)) - load(paste(home,folder,cruise,"/", phyto,"_dist_Ncat",m,"_",cruise,sep="")) + load(paste(in.dir,"/", phyto,"_dist_Ncat",m,"_",cruise,sep="")) Vhists <- distribution[[1]] - Vhists <- sweep(Vhists, 2, colSums(Vhists), '/') # Normalize each column of VHists to 1 + #Vhists <- sweep(Vhists, 2, colSums(Vhists), '/') # Normalize each column of VHists to 1 #this caused a bug erasing the first section of my data, because colSums = NA N_dist <- distribution[[2]] volbins <- as.numeric(row.names(Vhists)) @@ -79,7 +77,7 @@ m <- 2^6 # number of size class time.numc <- as.numeric(colnames(Vhists)) time <- as.POSIXct(time.numc, origin="1970-01-01" ,tz="GMT") - n.day <- round(diff(range(time))); print(paste("Number of days in the dataset:",n.day)) + n.day <- round(diff(range(na.omit(time)))); print(paste("Number of days in the dataset:",n.day)) # para <- Vhists; percentile <- cut(unlist(para), 100); plot3d(log(rep(as.numeric(row.names(para)), dim(para)[2])), rep(as.numeric(colnames(para)), each=dim(para)[1]) , Vhists , col=jet.colors(100)[percentile], type='l', lwd=6, xlab="size class", ylab="time", zlab="Frequency") @@ -96,7 +94,7 @@ m <- 2^6 # number of size class for(i in seq(1,length(time)-24, 24)){ print(paste("starting hour:",i+t)) - #i <- 96 + #i <- 25 start <- time[i+t] end <- time[(i+t)+24] @@ -104,14 +102,28 @@ m <- 2^6 # number of size class print("cycle is less than 24h") next } + if(is.na(start)){ + print("NA in start time, skip ahead") + next + } + if(sum(is.na(time[i:(i+24)+t]))>4){ + print("more than 4 hours missing from time period, skip ahead") + next + } print(paste("calculating growth projection from ",start , "to",end)) - + #plot(Par$time, Par$par, type='o'); points(c(start, end),c(0,0), col='red',pch=16, cex=2) ### SELECT SIZE DISTRIBUTION for DAY i V.hists <- Vhists[,c(i:(i+24)+t)] N.dist <- N_dist[,c(i:(i+24)+t)] + + # #NAs break this part and need to be made into zeros + mk.zero <- which(is.na(V.hists)) + V.hists[mk.zero] <- 0 + mk.zero <- which(is.na(N.dist)) + N.dist[mk.zero] <- 0 # para <- V.hists; percentile <- cut(unlist(para), 100); plot3d(log(rep(as.numeric(row.names(para))), dim(para)[2]), rep(as.numeric(colnames(para)), each=dim(para)[1]), para , col=jet.colors(100)[percentile], type='l', lwd=6, xlab="size class", ylab="time", zlab="Frequency") @@ -133,7 +145,7 @@ m <- 2^6 # number of size class if(class(proj) !='try-error'){ model <- matrix(cbind(as.array(model), as.array(proj)), nrow=4,ncol=ncol(model)+1) - save(model, file=paste(home,folder,cruise,"/",phyto,"_modelHD_growth_",cruise,"_Ncat",m,"_t",t, sep="")) + save(model, file=paste(out.dir,"/",phyto,"_modelHD_growth_",cruise,"_Ncat",m,"_t",t, sep="")) }else{print("error during optimization")} } diff --git a/Viz_Size_Dist.R b/Viz_Size_Dist.R index 53855bb..79c0b35 100644 --- a/Viz_Size_Dist.R +++ b/Viz_Size_Dist.R @@ -1,12 +1,17 @@ +## Script for visualizing the HD.size.class files as 3D histograms + + library(rgl) library(zoo) jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000")) - home <- "~/Documents/DATA/SeaFlow/Thompson/" - home <- "/Volumes/ribalet/Cell_Division/" - cruise <- "Rhodomonas_Feb2014" # "Med4_TimeCourse" - phyto <- "crypto" +#Change according to your own directory structure, cruise and phyto of interest +#note: should implement a command line input for this + #home <- "~/Documents/DATA/SeaFlow/Thompson/" #local + home <- "/Volumes/gwennm/DeepDOM/Cell_Division/" #on server + cruise <- "DeepDOM" + phyto <- "prochloro" ############################ ## OLD SIZE DISTRIBUTION ### @@ -23,20 +28,23 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F" ## FULL SIZE DISTRIBUTION ### ############################# -list <- list.files(paste(home,cruise,"/",sep=""),pattern=paste("HD.size.class_",cruise,"_",phyto,sep="")) +list <- list.files(home, pattern=paste("HD.size.class_",cruise,"_",phyto,sep="")) Size <- NULL for(l in list){ print(l) - s <- read.csv(paste(home,cruise,"/",l,sep="")) + s <- read.csv(paste(home,l,sep="")) Size <- rbind(Size, s) } Size$time <- as.POSIXct(Size$time, tz="GMT") Size$num.time <- as.numeric(Size$time) Size[Size[,"size.dist"] == 0,"freq.dist"] <- 0 + Size.cut <- subset(Size, freq.dist > 0) + +percentile <- cut(Size[,"freq.dist"], 100); plot3d(x=log10(Size$stages/Size$fsc_beads), y=Size$time, z=Size$freq.dist, col=jet.colors(100)[percentile], type='p', lwd=1, xlab="Size distribution", ylab="Frequency", zlab="Time",axes=T) -percentile <- cut(Size[,"freq.dist"], 100); plot3d(x=log10(Size$stages/Size$fsc_beads), y=Size$time, z=Size$freq.dist, col=jet.colors(100)[percentile], type='p', lwd=1, xlab="Size distribution", ylab="Frequency", zlab="Time",axes=F) +percentile <- cut(Size.cut[,"freq.dist"], 100); plot3d(x=log10(Size.cut$stages/Size.cut$fsc_beads), y=Size.cut$time, z=Size.cut$freq.dist, col=jet.colors(100)[percentile], type='p', lwd=1, xlab="Size distribution", ylab="Frequency", zlab="Time",axes=T) percentile <- cut(Size[,"freq.dist"], 100); plot3d(x=Size$stages/Size$fsc_beads, y=Size$time, z=Size$freq.dist, col=jet.colors(100)[percentile], type='p', lwd=1, xlab="Size distribution", ylab="Frequency", zlab="Time",axes=F) @@ -49,10 +57,10 @@ m <- 64 print(paste("phytoplankton population:",phyto)) - load(paste(home,cruise,"/", phyto,"_dist_Ncat",m,"_",cruise,sep="")) + load(paste(home, phyto,"_dist_Ncat",m,"_",cruise,sep="")) Vhists <- distribution[[1]] - Vhists <- sweep(Vhists, 2, colSums(Vhists), '/') # Normalize each column of VHists to 1 + #Vhists <- sweep(Vhists, 2, colSums(Vhists), '/') # Normalize each column of VHists to 1 N_dist <- distribution[[2]] volbins <- as.numeric(row.names(Vhists)) diff --git a/functions_modelHD.R b/functions_modelHD.R index 168954a..27905e8 100644 --- a/functions_modelHD.R +++ b/functions_modelHD.R @@ -94,13 +94,14 @@ matrix.conct.fast <- function(hr, Einterp, volbins, gmax, dmax, a, b, E_star){ for(hr in 1:24){ - B <- matrix.conct.fast(hr=hr-1, Einterp=Einterp, volbins=volbins, gmax=as.numeric(params[1]), dmax=as.numeric(params[2]), a=as.numeric(params[3]),b=as.numeric(params[4]), E_star=as.numeric(params[5])) + B <- matrix.conct.fast(hr=hr-1, Einterp=Einterp, volbins=volbins, gmax=as.numeric(params[1]), dmax=as.numeric(params[2]), a=as.numeric(params[3]),b=as.numeric(params[4]), E_star=as.numeric(params[5])) + wt <- B %*% V.hists[,hr] # calculate the projected size-frequency distribution wt.norm <- wt/sum(wt, na.rm=T) # normalize distribution sigma[,hr] <- (round(N.dist[, hr+1] - TotN[hr+1]*wt.norm)^2) #observed value - fitted value #sigma[,hr] <- abs(V.hists[, hr+1] - wt.norm) #observed value - fitted value } - sigma <- colSums(sigma)/colSums(N.dist[,-1]) + sigma <- colSums(sigma)/colSums(na.omit(N.dist[,-1])) sigma <- sum(sigma, na.rm=T) return(sigma) @@ -121,7 +122,8 @@ determine.opt.para <- function(V.hists,N.dist,Edata,volbins){ dt <- 1/(resol/10) # dt <- 1/6; breaks <- 25 ## MATLAB - TotN <- matrix(colSums(N.dist), ncol=breaks) + + TotN <- matrix(colSums(na.omit(N.dist)), ncol=breaks) ti <- seq(min(Edata[,1],na.rm=T),max(Edata[,1],na.rm=T), length.out=breaks/dt) ep <- data.frame(spline(Edata[,1], Edata[,2], xout=ti)) #interpolate E data according to dt resolution Einterp <- ep$y