Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
741fd8f
little fixes and clean-up
gmmh Dec 16, 2014
060400c
oops, range needs to start at 0, not 1
gmmh Dec 16, 2014
1c60a36
fixed ntot= 1/opp_evt_ratio * n_count
gmmh Dec 16, 2014
ca7bf4e
clean-up
gmmh Dec 16, 2014
69cdfcb
version 1 used to make my distributions for Deep DOM on Dec. 17, 2014
gmmh Jan 6, 2015
ce4f472
changes to make code work for my directory, need to still make genera…
gmmh Jan 7, 2015
aa5308b
made directory structure for this script more general and changed the
gmmh Jan 7, 2015
5e9d14e
minor edits to clarify comments
gmmh Jan 8, 2015
4a2ddfd
fixed minor bug in reading HD.files
gmmh Jan 9, 2015
f252301
changed input arguments to allow for different directory structure for
gmmh Jan 9, 2015
16c9f4e
changed Model script so that they wouldn't try to access the db at the
gmmh Jan 12, 2015
41a781a
now modified correctly
gmmh Jan 12, 2015
841f215
fixed bug in ordering of args
gmmh Jan 12, 2015
3a388b5
fixed small bugs
gmmh Jan 12, 2015
3be697c
add script for visualization of model output
fribalet Jan 13, 2015
944f9f8
Merge pull request #1 from armbrustlab/master
gmmh Jan 13, 2015
fd4c861
Merge pull request #2 from gmmh/master
gmmh Jan 13, 2015
9f220b0
minor edits
gmmh Jan 13, 2015
223f595
Merge branch 'sqlite' of https://github.com/gmmh/ssPopModel into sqlite
gmmh Jan 13, 2015
1dacbc4
playing around with script, needed to make minor changes to use
gmmh Jan 14, 2015
7d9f058
commented out Na.approx function which fails when interpolating longer
gmmh Jan 14, 2015
9419c85
Fixed an important bug that could result in reodering of some of the
gmmh Jan 14, 2015
03c9691
worked to make these scripts handle NAs and gaps in data without caus…
gmmh Jan 16, 2015
f04f5fe
I used this version of the code to run the model on my cell distribut…
gmmh Jan 21, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 29 additions & 22 deletions Conversion_Size_Dist.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
# [ribalet@bloom Cell_Division]
# for i in $(seq 6 1 8); do echo "Rscript Conversion_Size_Dist.R $i prochloro Med4_TimeCourse_July2012" | qsub -lwalltime=1:00:00,nodes=1:ppn=1 -N pro_conv$i -d.; done
# This script takes HD.size.class files and makes concatenated distributions with the calibrated cell volume from forward scatter
#arguments of this script: (1) distributuion file location, (2) cat (2^cat number of bins), (3) phytoplankton group, (4) cruise
# for i in $(seq 6 1 8); do echo "Rscript ~/DeepDOM/ssPopModel/Conversion_Size_Dist.R ~/DeepDOM/Cell_Division $i prochloro DeepDOM" | qsub -lwalltime=8:00:00,nodes=1:ppn=1 -N pro_conv$i -d.; done


args <- commandArgs(TRUE)
cat <- as.numeric(args[1])
phyto <- as.character(args[2])
cruise <- as.character(args[3])
home <- as.character(args[1])
cat <- as.numeric(args[2])
phyto <- as.character(args[3])
cruise <- as.character(args[4])


home <- '~/Cell_Division/'



#library(rgl)
library(zoo)

# home <- "/Volumes/ribalet/Cell_division/"
# cruise <- "Med4_TimeCourse_July2012"
# home <- "/Volumes/gwennm/DeepDOM/Cell_division"
# cruise <- "DeepDOM"
# phyto <- "prochloro"


Expand All @@ -27,17 +29,18 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F"
## SIZE DISTRIBUTION ##
#######################

list <- list.files(paste(home,cruise,"/",sep=""),pattern=paste("HD.size.class_",cruise,"_",phyto,sep=""))
list <- list.files(home,pattern=paste("HD.size.class_",cruise,"_",phyto,sep=""))
Size <- NULL

for(l in list){
print(l)
s <- read.csv(paste(home,cruise,"/",l,sep=""))
s <- read.csv(paste(home,l,sep="/"))
Size <- rbind(Size, s)
}

Size$time <- as.POSIXct(Size$time, tz="GMT")
Size$num.time <- as.numeric(Size$time)
Size <- Size[order(Size$num.time),]



Expand All @@ -58,9 +61,7 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F"
if(phyto == "synecho" | phyto == "pico" | phyto == "prochloro"){
Size$volume <- 10^(0.524*log10(Size$stages/Size$fsc_beads) + 0.283)
# Size$volume <- 10^(0.5*log10(Size$stages/Size$fsc_beads))# MIE THEORY
}

else{
}else{
Size$volume <- 10^(1.682*log10(Size$stages/Size$fsc_beads) + 0.961)
}

Expand Down Expand Up @@ -91,13 +92,15 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F"
# percentile <- cut(Size.phyto[,"freq.dist"], 100); plot3d(x=(Size.phyto$volume), y=Size.phyto$num.time, z=Size.phyto$freq.dist, col=jet.colors(100)[percentile], type='l', lwd=2)

n.day <- round(diff(range(Size.phyto$time))); print(paste("Number of days in the dataset:",n.day))

#broke right here after printing the number of days in the dataset, exit with no error
start <- min(Size.phyto$time)

##############################
## CELL VOLUME DISTRIBUTION ##
##############################

# cat <- 8
# cat <- 6

###############################
m <- 2^cat # number of Size class
Expand Down Expand Up @@ -130,9 +133,11 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F"
HD.volume <- as.vector(rep(volbins, length(unique(Size.phyto$time))))
HD.time <- rep(unique(Size.phyto$time), each=m)
HD.hist <- tapply(Size.phyto$freq.dist, list(HD,Size.phyto$time), mean)
HD.hist <- as.vector(apply(HD.hist, 2, function(x) na.approx(x, na.rm=F)))
HD.hist <- as.vector(HD.hist)
# HD.hist <- as.vector(apply(HD.hist, 2, function(x) na.approx(x, na.rm=F)))
HD.size <- tapply(Size.phyto$size.dist, list(HD,Size.phyto$time), mean)
HD.size <- as.vector(apply(HD.size, 2, function(x) na.approx(x, na.rm=F)))
HD.size <- as.vector(HD.size)
# HD.size <- as.vector(apply(HD.size, 2, function(x) na.approx(x, na.rm=F)))

# para <- HD.hist; percentile <- cut(para, 100); plot3d(log(HD.volume), HD.time, HD.hist, col=jet.colors(100)[percentile], type='l', lwd=2, xlab="size class", ylab="time", zlab="Frequency")

Expand All @@ -147,19 +152,21 @@ jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F"
N_dist <- t(tapply(Size.volume$HD.size, list(h,Size.volume$HD.volume), mean))

### NA interpolation
Vhists <- try(t(apply(Vhists, 1, function(x) na.approx(x, na.rm=F))))
N_dist <- try(t(apply(N_dist, 1, function(x) na.approx(x, na.rm=F))))
# Vhists <- try(t(apply(Vhists, 1, function(x) na.approx(x, na.rm=F))))
# N_dist <- try(t(apply(N_dist, 1, function(x) na.approx(x, na.rm=F))))

id <- findInterval(h.time, na.approx(time, na.rm=F))
# id <- findInterval(h.time, na.approx(time, na.rm=F))

colnames(Vhists) <- colnames(N_dist) <- h.time[id]
colnames(Vhists) <- colnames(N_dist) <- time

# para <- Vhists; percentile <- cut(unlist(para), 100); plot3d(log(rep(as.numeric(row.names(para)), dim(para)[2])), rep(as.numeric(colnames(para)), each=dim(para)[1]) , Vhists , col=jet.colors(100)[percentile], type='l', lwd=6, xlab="size class", ylab="time", zlab="Frequency")


distribution <- list()
distribution[[1]] <- Vhists
distribution[[2]] <- N_dist
# para <- distribution[[1]]; percentile <- cut(unlist(para), 100); plot3d(log(rep(as.numeric(row.names(para)), dim(para)[2])), rep(as.numeric(colnames(para)), each=dim(para)[1]) , Vhists , col=jet.colors(100)[percentile], type='l', lwd=6, xlab="size class", ylab="time", zlab="Frequency")


save(distribution, file=paste(home,cruise,"/", phyto,"_dist_Ncat",m,"_",cruise,sep=""))
print(paste("saving ", home,cruise,"/", phyto,"_dist_Ncat",m,"_",cruise,sep=""))
save(distribution, file=paste(home,"/",phyto,"_dist_Ncat",m,"_",cruise,sep=""))
print(paste("saving ", home,"/",phyto,"_dist_Ncat",m,"_",cruise,sep=""))
55 changes: 24 additions & 31 deletions Cruise_Size_distribution.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#[gwennm@bloom DeepDOM/Cell_Division]
##Run this script using the commented out code just below (paste directly into the command line), should be in the directory listed above
# Rscript ~/DeepDOM/scripts/Cruise_Size_distribution_sqlite_GMH.R d1 d2 prochloro DeepDOM | qsub -lwalltime=00:30:00,nodes=1:ppn=1 -N DeepDOM -d.
# Rscript ~/DeepDOM/ssPopModel/Cruise_Size_distribution.R 1 40 prochloro DeepDOM

#note edit line above according to changes to script: make day input an integer number range (ie: 1,2 for 1:2). check with Chris to make sure the script above will work... esp. wall time

#Note: this code requires popcycle package, and a flag file called "flag_file.txt" in the same directory as the db.location. It produces a plot of beads across the cruise with a smoothed spline fit to estimate the ave value of the parameter (fsc_small). It also produces a .csv file for each day containing the distributions of fsc_small for the phyto called for each 3-min file.


#allowing arguments to be input from the command line input commented out above
Expand All @@ -12,39 +13,42 @@ d2 <- as.numeric(args[2]) #2nd argument, day number to end
phyto <- as.character(args[3]) # 2nd arg phyto to calc size dist
cruise <- as.character(args[4]) # 3rd arg cruise name


library(popcycle)
library(stats)

#########################
### BATCH FILE inputs ###
#########################
#globals necessary for running on bloom
# home <- '~/DeepDOM/Cell_Division/'
# folder <- NULL
# root <- "/misc/seaflow/"
home <- '~/DeepDOM/Cell_Division/' #change to take from input, so not hardcoded in
folder <- NULL
root <- "/misc/seaflow/"
db.location <- "~/popcycle" #change to make variable?

#globals necessary for running on local machine connected to bloom
home <- "/Users/gwen/Desktop/Cruises/DeepDOM_2013/seaflow/"
folder <- "Cell_Division/"
root <- "/Volumes/seaflow/"
cruise <- "DeepDOM"
phyto <- 'prochloro'
d1 <- 1 #start day
d2 <- 1 # end day
# home <- "/Users/gwen/Desktop/Cruises/DeepDOM_2013/seaflow/"
# folder <- "Cell_Division/"
# root <- "/Volumes/seaflow/"
# db.location <- "/Volumes/gwennm/popcycle"
# d1 <- 1 #start day
# d2 <- 2 # end day
# phyto <- 'prochloro'
# cruise <- "DeepDOM"

#Globals necessary for popcycle commands: consider making an input variable to the script
set.evt.location(paste(root, cruise, sep=""))
set.project.location("/Volumes/gwennm/popcycle")
set.project.location(db.location)
set.cruise.id("march2013")

#parameters for making smoothed distributions
para <- "fsc_small"
n.breaks <- 2^10 # 1024
concat <- 4 # 3-minute file x 4 = 12 min chunks
#concat <- 4 # note this does not work here, make as an option later?
out <- NULL


#write a new function to query sqlite database by parameter and population
# new function to query sqlite database by parameter and population and file.name
get.param.by.pop <- function(file.name, para, phyto, db= db.name){
#this sqlite query will return a table with columns: file, param, pop (only phyto)
sql <- paste0("SELECT opp.file, opp.", para, ", vct.pop
Expand All @@ -62,14 +66,6 @@ get.param.by.pop <- function(file.name, para, phyto, db= db.name){
return(opp.slice)
}

#new function to retrieve opp.evt.table from sqlite database, suggest adding to popcycle
get.opp.evt.ratio.table <- function(db=db.name){
sql <- paste0("SELECT * FROM ", opp.evt.ratio.table.name)
con <- dbConnect(SQLite(), dbname= db)
table <- dbGetQuery(con, sql)
dbDisconnect(con)
return (table)
}


###########################
Expand All @@ -90,14 +86,13 @@ all.pop <- subset(all.stat, pop == phyto)
all.files <- all.pop$file
julian.day <- unique(basename(dirname(all.files)))

#load opp.evt.ratios to correct for true cell density, so far not necessary because I use abundance from stats table
#oer <- get.opp.evt.ratio.table()


########################
### INSTRUMENT DRIFT ###
########################
#need this code to normalize to beads fsc_small across the whole cruise
#this also plots the bead distribution for the whole cruise with smoothed spline

spar <- 0.45
beads <- subset(all.stat, pop == "beads" & fsc_small < 60000)
Expand Down Expand Up @@ -128,8 +123,7 @@ for(n in d1:d2){
size.class <- NULL

for(file in filenames){
#still need to figure out how to implement concat to put together 12 min chunks
#do we need to concat at this step? or can it wait until the model?
#select parameter for phytoplankton from each file
slice <- get.param.by.pop(file, para, phyto)

#find smoothed beads from same time stamp to normalize param of interest
Expand All @@ -141,12 +135,11 @@ for(n in d1:d2){
print("no beads found")
}

#insert total abundance of this pop from the correct file, ** is this correct??
ntot <- stat[which(stat$file == file), "abundance"]
#insert ntot from 1/opp_evt_ratio * n_count from stats
ntot <- 1/stat[which(stat$file == file), "opp_evt_ratio"] * stat[which(stat$file == file), "n_count"]
time.class <- rep(time, n.breaks) #make vector of time to go with dist

#Note changed the start range of density from 1 to 0 for smaller pro
#shouldn't this range be dependent on the phyto we are trying to model? maybe this should be a range determined by the max of the slice
#shouldn't this range be dependent on the phyto we are trying to model?
#could add an if statement, if phyto = prochloro then this range for dens
dens <- density(log10(slice[,para]), n=n.breaks,from=0, to=3.5, bw="SJ", kernel='gaussian',na.rm=T)
freq.dist <- dens$y*diff(dens$x)[1]
Expand Down
21 changes: 21 additions & 0 deletions Make_PAR_table.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
### Make_PAR_table.R arg1 arg2 arg3
## run this script before running Model_HD_Division_Rate
## arg 1= db.name
## arg 2= output/directory
## arg 3= cruise

library(popcycle)

args <- commandArgs(TRUE)
db.name <- as.character(args[1])
out.dir <- as.character(args[2])
cruise <- as.character(args[3])

# db.name = "/Volumes/gwennm/popcycle/sqlite/popcycle.db"
# out.dir = "/Volumes/gwennm/DeepDOM/Cell_Division"
# cruise = "DeepDOM"

sfl <- get.sfl.table(db.name)
Par <- sfl[,c("date", "par")]
Par$time <- strptime(Par$date, "%Y-%m-%dT%H:%M:%S", tz="GMT")
write.csv(Par[,c("par", "time")], paste0(out.dir, "/PAR_", cruise))
122 changes: 122 additions & 0 deletions ModelOutput_Viz.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
## MODEL
cruise <- "DeepDOM"
location.model <- "/Volumes/gwennm/DeepDOM/Cell_Division"
phyto <- 'prochloro'
out.dir <- "/Volumes/gwennm/DeepDOM/Cell_Division"
cat <- 2^6 # number of size bin
n.day <- 39 #number of days in dataset, need for below because it did not find in loading model, may be a bug somewhere?

library(rgl)
library(ggplot2)


all.filelist <- list.files(paste0(location.model,"/"),pattern=paste0(phyto,"_modelHD_growth_",cruise,"_Ncat",cat))
filelist <- all.filelist[grep(pattern=paste(phyto), all.filelist)]

n <- c <- 1
Conc.all <- N.proj.all <- V.hist.all <- div.rate <- para.all <- Col <- NULL


for(file in filelist){
#file <- filelist[2]
load(paste(location.model,file, sep="/"))
print(file)
print(n)
dim <- conc.proj.all <- n.proj.all <- v.hist.all <- dr.all <- p.all <- NULL
for(i in seq(2,as.numeric(n.day),by=1)){
try(n.proj <- model[4,i][[1]])
n.proj.all <- cbind(n.proj.all, n.proj)

conc.proj <- cbind(as.numeric(colnames(n.proj)), as.numeric(colSums(n.proj)))
conc.proj.all <- rbind(conc.proj.all, conc.proj)

try(dr <- model[2,i][[1]])
h.dr <- cbind(as.numeric(colnames(dr)), as.numeric(dr))
dr.all <- rbind(dr.all, h.dr)

try(v.proj <- model[3,i][[1]])
v.hist.all <- cbind(v.hist.all, v.proj)

try(para <- model[1,i][[1]])
param <- cbind(time=as.numeric(colnames(n.proj)), para)
p.all <- rbind(p.all, param)
}


div.rate <- rbind(div.rate, dr.all)
N.proj.all <- cbind(N.proj.all, n.proj.all)
Conc.all <- rbind(Conc.all, conc.proj.all)
V.hist.all <- cbind(V.hist.all, v.hist.all)
para.all <- rbind(para.all, p.all)

col <- rep(c, nrow(dr.all))
Col <- c(Col,col)

leg <- unlist(list(strsplit(filelist,"_t")))[seq(2,length(filelist[1:n])*2,2)]

# layout(matrix(c(1,1,2:7),4,2, byrow=T))
# par(pty='m')
# plot(div.rate, ylab="Div Rate", xlab="time",col=Col)
# #abline(v=night$UNIXtime,col='lightgrey');points(div.rate,col=Col)
# legend("topleft",legend=leg, col=1:c, ncol=length(leg), pch=1)
# plot(para.all[,"time"], para.all[,"gmax"], ylab="gmax", xlab="time",col = Col)
# plot(para.all[,"time"], para.all[,"dmax"],ylab="dmax", xlab="time",col = Col)
# plot(para.all[,"time"], para.all[,"a"],ylab="a", xlab="time",col = Col)
# plot(para.all[,"time"], para.all[,"b"],ylab="b", xlab="time",col = Col)
# plot(para.all[,"time"], para.all[,"E_star"],ylab="E_star", xlab="time",col = Col)
# plot(para.all[,"time"], para.all[,"resnorm"],ylab="resnorm", xlab="time",col = Col)

# #
# names(para) <- c("gmax","a","b","E_star","dmax","resnorm")
# par(mfrow=c(4,2))
# barplot(d.GR, col='grey', main="GR")
# for(i in 1:6) barplot(para[,i], main=colnames(para)[i])
n <- n + 1
c <- c + 1
}

### ORDERING DATA by TIME

Div.rate <- div.rate[order(div.rate[,1]),]
Nproj <- N.proj.all[,order(as.numeric(colnames(N.proj.all)))]
Conc.proj <- Conc.all[order(Conc.all[,1]),]
Vproj <- V.hist.all[,order(as.numeric(colnames(V.hist.all)))]
Para.all <- para.all[order(para.all[,"time"]),]

### VISUALIZATION
jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000"))
para <- Vproj
percentile <- cut(unlist(para), 100); plot3d(rep(1:dim(para)[1], dim(para)[2]), rep(1:dim(para)[2], each=dim(para)[1]), z=matrix(para), col=jet.colors(100)[percentile], type='l', lwd=3, xlab="size class", ylab="time", zlab="Frequency")


#Average estimates for division rate from same time point and make new table with Ave, SE and number of estimates
Div.rate.ave <- as.data.frame(matrix(data= NA, nrow=length(na.omit(unique(Div.rate[,1]))) , ncol=5))
colnames(Div.rate.ave)=c("time", "div.ave", "div.sd", "div.se", "div.n")
Div.rate.ave$time <- na.omit(unique(Div.rate[,1]))
for(n in 1:length(Div.rate.ave$time)){
sum = sum(na.omit(Div.rate[,1] == Div.rate.ave$time[n]))
index <- which(Div.rate[,1] == Div.rate.ave$time[n])
if(sum <2){
Div.rate.ave$div.ave[n] <-Div.rate[index,2]
Div.rate.ave$div.sd[n] <- NA
Div.rate.ave$div.n[n] <- 1
Div.rate.ave$div.se[n] <- NA
next
}
sub <- Div.rate[index,]
Div.rate.ave$div.ave[n] <- mean(na.omit(sub[,2]))
Div.rate.ave$div.sd[n] <- sd(na.omit(sub[,2]))
Div.rate.ave$div.n[n] <- sum(!is.na(sub[,2]))
Div.rate.ave$div.se[n] <- Div.rate.ave$div.sd[n]/sqrt(Div.rate.ave$div.n[n])
}

#plot the division rates +- SD
plot(Div.rate.ave$time, Div.rate.ave$div.ave, "l", ylim=c(0, max(na.omit(Div.rate.ave$div.ave + Div.rate.ave$div.sd))), ylab="Division rate (hr-1)", xlab= "Time")
title(main=paste("Model Growth Rate of", phyto), sub=paste("Cruise:", cruise))
#polygon(c(Div.rate.ave$time, rev(Div.rate.ave$time)), c(Div.rate.ave$div.ave, rev(Div.rate.ave$div.ave + Div.rate.ave$div.sd)), col= "grey", border=NA)
arrows(Div.rate.ave$time, Div.rate.ave$div.ave, Div.rate.ave$time, Div.rate.ave$div.ave + Div.rate.ave$div.sd, angle= 90, length=0.01, lwd= 0.15)
arrows(Div.rate.ave$time, Div.rate.ave$div.ave, Div.rate.ave$time, Div.rate.ave$div.ave - Div.rate.ave$div.sd, angle= 90, length=0.01, lwd= 0.15)



write.table(Div.rate.ave, paste0(out.dir, "/", phyto, "Model_Division_summary.csv"), sep=",", row.names=F)
Loading