From fbfe237bbb3112186dfdd341ff4344bc4c882ac3 Mon Sep 17 00:00:00 2001 From: mrpozzi Date: Sat, 13 Aug 2016 17:19:30 -0700 Subject: [PATCH] Partial rewriting of the functions --- NA_replacer.R | 104 +++++++++++++++++++++----------------------------- main.R | 62 +++++++++++++----------------- 2 files changed, 70 insertions(+), 96 deletions(-) diff --git a/NA_replacer.R b/NA_replacer.R index d71e8ff..1dd4625 100755 --- a/NA_replacer.R +++ b/NA_replacer.R @@ -1,61 +1,45 @@ -NA_replacer <- function(df,BIC_matrix) { - -# Ths function finds and replaces NA in df and dfs (maize, wheat and soybeans 2005-2016 prices -# and soybeans 1994-2005 prices) using ARIMA models. -# First df column must contain date. - - -## DATA SCALING ## - -for (i in 2:dim(df)[2]) { - df[,i] <- df[,i]/100 -} - - - -## MEMORY PRE-ALLOCATION ## - -NA_list <- rep(list(NA),dim(df)[2]-1) - - - -## NA SEEK ## - -for (i in 2:dim(df)[2]) { - NA_list[[i-1]] <- which(is.na(df[,i])) -} - - - -## NA REPLACER ## - -# Models fitting and BIC estimation -for (i in 2:dim(df)[2]) { - for (j in 1:length(NA_list[[i-1]])) { - for (p in 1:dim(BIC_matrix)[1]) { - for (q in 1:dim(BIC_matrix)[2]) { - BIC_matrix[p,q] <- BIC(arima(df[1:NA_list[[i-1]][j],i],c(p,0,q))) - } - } - df[NA_list[[i-1]][j],i] <- as.numeric(predict(arima(df[1:NA_list[[i-1]][j],i],c(which(BIC_matrix == min(BIC_matrix), arr.ind = TRUE)[1],0,which(BIC_matrix == min(BIC_matrix), arr.ind = TRUE)[2])),n.ahead=1))[1] - } -} - - -## DATA RESCALING ## - -for (i in 2:dim(df)[2]) { - df[,i] <- df[,i]*100 -} - - - - -return(df) - -} - - - - +# Ths function finds and replaces NA in df and dfs (maize, wheat and soybeans 2005-2016 prices +# and soybeans 1994-2005 prices) using ARIMA models. +# First df column must contain date. + +NA_replacer <- function(df, pq=c(1,1)) { + + if(!is.data.frame(df)){ + df <- data.frame(df) + } + commodity.columns <- colnames(df)[colnames(df)!='Date'] + + ## NA SEEK & REPLACE ## + # Models fitting and BIC estimation + data.frame('Date'=df[,'Date'], + sapply(df[,commodity.columns], function(column){ + + ## DATA SCALING ## + column <- column / 100 + NA.ind <- which(is.na(column)) + min.BIC <- -1 + opt.pq <- c(NA, NA) + + for (j in 1:length(NA.ind)) { + for (p in 1:pq[1]) { + for (q in 1:pq[2]) { + bic <- BIC(arima(column[1:NA.ind[j]],c(p,0,q))) + if(bic < min.BIC){ + min.BIC <- bic + opt.pq <- c(p, q) + } + } + } + column[NA.ind[j]] <- as.numeric( + predict(arima(column[1:NA.ind[j]], c(opt.pq[1], 0, opt.pq[2])), n.ahead=1))[1] + } + ## DATA RESCALING ## + column * 100 + }) + ) +} + + + + \ No newline at end of file diff --git a/main.R b/main.R index f1ed295..7920ac5 100755 --- a/main.R +++ b/main.R @@ -1,36 +1,26 @@ -## PACKAGES ## - -## CLEAR ALL ## - -rm(list = ls()) - - - -## DATA LOADING ## - -df<-read.csv("data_adam.csv") # Main dataframe which contains 2005-2016 weekly maize, wheat and soybeans prices - - -## NA ## - -BIC_matrix <- matrix(nrow=5,ncol=1,NA) # Change p,q of ARIMA by changing p rows and q columns of the BIC matrix -source("NA_replacer.r") # Replaces NA values using ARIMA predictions -df <- NA_replacer(df,BIC_matrix) - - - -## ANALYSIS ## -# analysis[[1]] : Price and Returns plots -# analysis[[2]] : Summaries -# analysis[[3]] : Boxplots -# analysis[[4]] and following : Scatterplots - -w <- 12 # Number of weeks to be aggregated in the boxplots -source("EDA.r") # Runs an Exploratory Data Analysis -analysis <- EDA(df,w) - - - - - - +## PACKAGES ## +## CLEAR ALL ## +rm(list = ls()) +gc() + + +source("NA_replacer.R") +source("EDA.R") + +## DATA LOADING ## +df<-read.csv("data_adam.csv") # Main dataframe which contains 2005-2016 weekly maize, wheat and soybeans prices + +## DATA IMPUTATION ## +df <- NA_replacer(df, c(5,1)) # Replaces NA values using ARIMA predictions + + + +## ANALYSIS ## +# analysis[[1]] : Price and Returns plots +# analysis[[2]] : Summaries +# analysis[[3]] : Boxplots +# analysis[[4]] and following : Scatterplots + +w <- 12 # Number of weeks to be aggregated in the boxplots +analysis <- EDA(df, w) # Runs an Exploratory Data Analysis +