EST-Team-Adam · mrpozzi · Aug 14, 2016
diff --git a/NA_replacer.R b/NA_replacer.R
@@ -1,61 +1,45 @@
-NA_replacer <- function(df,BIC_matrix) {
-
-# Ths function finds and replaces NA in df and dfs (maize, wheat and soybeans 2005-2016 prices
-# and soybeans 1994-2005 prices) using ARIMA models.
-# First df column must contain date.
-
-
-## DATA SCALING ##
-
-for (i in 2:dim(df)[2]) {
-   df[,i] <- df[,i]/100
-}
-
-
-
-## MEMORY PRE-ALLOCATION ##
-
-NA_list <- rep(list(NA),dim(df)[2]-1)
-
-
-
-## NA SEEK ##
-
-for (i in 2:dim(df)[2]) {
-  NA_list[[i-1]] <- which(is.na(df[,i]))
-}
-
-
-
-## NA REPLACER ##
-
-# Models fitting and BIC estimation
-for (i in 2:dim(df)[2]) {
-   for (j in 1:length(NA_list[[i-1]])) {
-      for (p in 1:dim(BIC_matrix)[1]) {
-      for (q in 1:dim(BIC_matrix)[2]) {
-         BIC_matrix[p,q] <- BIC(arima(df[1:NA_list[[i-1]][j],i],c(p,0,q)))
-      }
-      }
-      df[NA_list[[i-1]][j],i] <- as.numeric(predict(arima(df[1:NA_list[[i-1]][j],i],c(which(BIC_matrix == min(BIC_matrix), arr.ind = TRUE)[1],0,which(BIC_matrix == min(BIC_matrix), arr.ind = TRUE)[2])),n.ahead=1))[1]
-   }
-}
-
-
-## DATA RESCALING ##
-
-for (i in 2:dim(df)[2]) {
-   df[,i] <- df[,i]*100
-}
-
-
-
-
-return(df)
-
-}
-
-
-
-
+# Ths function finds and replaces NA in df and dfs (maize, wheat and soybeans 2005-2016 prices
+# and soybeans 1994-2005 prices) using ARIMA models.
+# First df column must contain date.
+
+NA_replacer <- function(df, pq=c(1,1)) {
+
+  if(!is.data.frame(df)){
+    df <- data.frame(df)
+  }
+  commodity.columns <- colnames(df)[colnames(df)!='Date']
+
+  ## NA SEEK & REPLACE ##
+  # Models fitting and BIC estimation
+  data.frame('Date'=df[,'Date'],
+        sapply(df[,commodity.columns], function(column){
+
+          ## DATA SCALING ##
+          column <- column / 100
+          NA.ind <- which(is.na(column))
+          min.BIC <- -1
+          opt.pq <- c(NA, NA)
+
+          for (j in 1:length(NA.ind)) {
+            for (p in 1:pq[1]) {
+              for (q in 1:pq[2]) {
+                bic <- BIC(arima(column[1:NA.ind[j]],c(p,0,q)))
+                if(bic < min.BIC){
+                  min.BIC <- bic
+                  opt.pq <- c(p, q)
+                }
+              }
+            }
+            column[NA.ind[j]] <- as.numeric(
+              predict(arima(column[1:NA.ind[j]], c(opt.pq[1], 0, opt.pq[2])), n.ahead=1))[1]
+          }
+          ## DATA RESCALING ##
+          column * 100
+          })
+        )
+}
+
+
+
+
 
diff --git a/main.R b/main.R
@@ -1,36 +1,26 @@
-## PACKAGES ##
-
-## CLEAR ALL ##
-
-rm(list = ls())
-
-
-
-## DATA LOADING ##
-
-df<-read.csv("data_adam.csv")       # Main dataframe which contains 2005-2016 weekly maize, wheat and soybeans prices
-
-
-## NA ##
-
-BIC_matrix <- matrix(nrow=5,ncol=1,NA)   # Change p,q of ARIMA by changing p rows and q columns of the BIC matrix
-source("NA_replacer.r")                  # Replaces NA values using ARIMA predictions
-df <- NA_replacer(df,BIC_matrix)
-
-
-
-## ANALYSIS ##
-# analysis[[1]] : Price and Returns plots
-# analysis[[2]] : Summaries
-# analysis[[3]] : Boxplots
-# analysis[[4]] and following : Scatterplots
-
-w <- 12                     # Number of weeks to be aggregated in the boxplots
-source("EDA.r")             # Runs an Exploratory Data Analysis
-analysis <- EDA(df,w)
-
-
-
-
-
-
+## PACKAGES ##
+## CLEAR ALL ##
+rm(list = ls())
+gc()
+
+
+source("NA_replacer.R")
+source("EDA.R") 
+
+## DATA LOADING ##
+df<-read.csv("data_adam.csv")       # Main dataframe which contains 2005-2016 weekly maize, wheat and soybeans prices
+
+## DATA IMPUTATION ##
+df <- NA_replacer(df, c(5,1))   # Replaces NA values using ARIMA predictions
+
+
+
+## ANALYSIS ##
+# analysis[[1]] : Price and Returns plots
+# analysis[[2]] : Summaries
+# analysis[[3]] : Boxplots
+# analysis[[4]] and following : Scatterplots
+
+w <- 12                     # Number of weeks to be aggregated in the boxplots
+analysis <- EDA(df, w)      # Runs an Exploratory Data Analysis
+