Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 44 additions & 60 deletions NA_replacer.R
Original file line number Diff line number Diff line change
@@ -1,61 +1,45 @@
NA_replacer <- function(df,BIC_matrix) {

# Ths function finds and replaces NA in df and dfs (maize, wheat and soybeans 2005-2016 prices
# and soybeans 1994-2005 prices) using ARIMA models.
# First df column must contain date.


## DATA SCALING ##

for (i in 2:dim(df)[2]) {
df[,i] <- df[,i]/100
}



## MEMORY PRE-ALLOCATION ##

NA_list <- rep(list(NA),dim(df)[2]-1)



## NA SEEK ##

for (i in 2:dim(df)[2]) {
NA_list[[i-1]] <- which(is.na(df[,i]))
}



## NA REPLACER ##

# Models fitting and BIC estimation
for (i in 2:dim(df)[2]) {
for (j in 1:length(NA_list[[i-1]])) {
for (p in 1:dim(BIC_matrix)[1]) {
for (q in 1:dim(BIC_matrix)[2]) {
BIC_matrix[p,q] <- BIC(arima(df[1:NA_list[[i-1]][j],i],c(p,0,q)))
}
}
df[NA_list[[i-1]][j],i] <- as.numeric(predict(arima(df[1:NA_list[[i-1]][j],i],c(which(BIC_matrix == min(BIC_matrix), arr.ind = TRUE)[1],0,which(BIC_matrix == min(BIC_matrix), arr.ind = TRUE)[2])),n.ahead=1))[1]
}
}


## DATA RESCALING ##

for (i in 2:dim(df)[2]) {
df[,i] <- df[,i]*100
}




return(df)

}




# Ths function finds and replaces NA in df and dfs (maize, wheat and soybeans 2005-2016 prices
# and soybeans 1994-2005 prices) using ARIMA models.
# First df column must contain date.

NA_replacer <- function(df, pq=c(1,1)) {

if(!is.data.frame(df)){
df <- data.frame(df)
}
commodity.columns <- colnames(df)[colnames(df)!='Date']

## NA SEEK & REPLACE ##
# Models fitting and BIC estimation
data.frame('Date'=df[,'Date'],
sapply(df[,commodity.columns], function(column){

## DATA SCALING ##
column <- column / 100
NA.ind <- which(is.na(column))
min.BIC <- -1
opt.pq <- c(NA, NA)

for (j in 1:length(NA.ind)) {
for (p in 1:pq[1]) {
for (q in 1:pq[2]) {
bic <- BIC(arima(column[1:NA.ind[j]],c(p,0,q)))
if(bic < min.BIC){
min.BIC <- bic
opt.pq <- c(p, q)
}
}
}
column[NA.ind[j]] <- as.numeric(
predict(arima(column[1:NA.ind[j]], c(opt.pq[1], 0, opt.pq[2])), n.ahead=1))[1]
}
## DATA RESCALING ##
column * 100
})
)
}





62 changes: 26 additions & 36 deletions main.R
Original file line number Diff line number Diff line change
@@ -1,36 +1,26 @@
## PACKAGES ##

## CLEAR ALL ##

rm(list = ls())



## DATA LOADING ##

df<-read.csv("data_adam.csv") # Main dataframe which contains 2005-2016 weekly maize, wheat and soybeans prices


## NA ##

BIC_matrix <- matrix(nrow=5,ncol=1,NA) # Change p,q of ARIMA by changing p rows and q columns of the BIC matrix
source("NA_replacer.r") # Replaces NA values using ARIMA predictions
df <- NA_replacer(df,BIC_matrix)



## ANALYSIS ##
# analysis[[1]] : Price and Returns plots
# analysis[[2]] : Summaries
# analysis[[3]] : Boxplots
# analysis[[4]] and following : Scatterplots

w <- 12 # Number of weeks to be aggregated in the boxplots
source("EDA.r") # Runs an Exploratory Data Analysis
analysis <- EDA(df,w)






## PACKAGES ##
## CLEAR ALL ##
rm(list = ls())
gc()


source("NA_replacer.R")
source("EDA.R")

## DATA LOADING ##
df<-read.csv("data_adam.csv") # Main dataframe which contains 2005-2016 weekly maize, wheat and soybeans prices

## DATA IMPUTATION ##
df <- NA_replacer(df, c(5,1)) # Replaces NA values using ARIMA predictions



## ANALYSIS ##
# analysis[[1]] : Price and Returns plots
# analysis[[2]] : Summaries
# analysis[[3]] : Boxplots
# analysis[[4]] and following : Scatterplots

w <- 12 # Number of weeks to be aggregated in the boxplots
analysis <- EDA(df, w) # Runs an Exploratory Data Analysis