From fc5d03474f29be9a5b78c3a36925600080f3d46b Mon Sep 17 00:00:00 2001 From: JustGlowing Date: Sat, 19 Sep 2015 10:15:50 +0100 Subject: [PATCH 1/4] Create README.md --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..4afddc8 --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +# machinelearningregression +boilerplate code, scripts, modules, data for Regression Analysis workshop + +Before starting check that your working environment is ready running the `runme.py`: + +``` +$ python runme.py +``` + +If the following message appears: + +``` +Good to go, all packages installed ok, ready to code. +``` + +You're ready fot the workshop! From 75de81ad1ca977f4703d66174343326c22cfa8d6 Mon Sep 17 00:00:00 2001 From: JustGlowing Date: Sun, 20 Sep 2015 15:45:17 +0100 Subject: [PATCH 2/4] Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 4afddc8..145b6cb 100644 --- a/README.md +++ b/README.md @@ -14,3 +14,7 @@ Good to go, all packages installed ok, ready to code. ``` You're ready fot the workshop! + +Otherwise, checkout one of these videos to setup your environment: +- if you have Windows: https://www.youtube.com/watch?v=pzV8Se7Fqgc +- if you have OSX: https://www.youtube.com/watch?v=UMftbOgUON8 From 18d78f240b6fd64e39d05be794b69e263f2885ca Mon Sep 17 00:00:00 2001 From: JustGlowing Date: Mon, 21 Sep 2015 14:38:15 +0100 Subject: [PATCH 3/4] Update reame --- README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/README.md b/README.md index 145b6cb..4afddc8 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,3 @@ Good to go, all packages installed ok, ready to code. ``` You're ready fot the workshop! - -Otherwise, checkout one of these videos to setup your environment: -- if you have Windows: https://www.youtube.com/watch?v=pzV8Se7Fqgc -- if you have OSX: https://www.youtube.com/watch?v=UMftbOgUON8 From 780d2a692ef0f7f8bab63a0a89c20bbf386a3eb5 Mon Sep 17 00:00:00 2001 From: Dimitry Foures Date: Sat, 26 Sep 2015 07:13:22 +0100 Subject: [PATCH 4/4] last minute setup --- functions.py | 64 +++++++------------------------------------------- functions.pyc | Bin 6098 -> 0 bytes module3.py | 14 ++++------- module4.py | 27 +++++++-------------- module5.py | 18 ++++++-------- module6.py | 30 +++++++++-------------- module7.py | 33 ++++++++------------------ runme.py | 14 ++++------- 8 files changed, 55 insertions(+), 145 deletions(-) delete mode 100644 functions.pyc diff --git a/functions.py b/functions.py index 151a533..47777e1 100644 --- a/functions.py +++ b/functions.py @@ -1,4 +1,4 @@ -#cca regression functions +# Advanced functions from sklearn.pipeline import make_pipeline from sklearn.linear_model import LinearRegression from sklearn.preprocessing import StandardScaler, PolynomialFeatures @@ -23,23 +23,20 @@ def model_plot_3d(ax, model, x1, x2): positions = np.vstack([X1.ravel(), X2.ravel()]).T y = model.predict(positions) Y = y.reshape(X1.shape) - ax.plot_surface(X1, X2, Y, alpha = 0.2 , cmap = 'jet') - -def save_figure(name, i): - plt.savefig('../figures/'+name+str(i), bbox_inches='tight') - plt.gcf().clear() + ax.plot_surface(X1, X2, Y, alpha = 0.2 , cmap = 'jet',\ + linewidth=0.5, rstride=1, cstride=1, shade=True) def PolynomialRegression(degree = 1): - return make_pipeline(PolynomialFeatures(degree = degree), LinearRegression()) + return make_pipeline(PolynomialFeatures(degree = degree,\ + include_bias = False), LinearRegression()) def PolynomialRidge(degree = 1, alpha = 1): - return make_pipeline(PolynomialFeatures(degree = degree), StandardScaler(), Ridge(alpha = alpha)) + return make_pipeline(PolynomialFeatures(degree = degree,\ + include_bias = False), StandardScaler(), Ridge(alpha = alpha)) def PolynomialLasso(degree = 1, alpha = 1): - return make_pipeline(PolynomialFeatures(degree = degree), StandardScaler(), Lasso(alpha = alpha)) - - - + return make_pipeline(PolynomialFeatures(degree = degree,\ + include_bias = False), StandardScaler(), Lasso(alpha = alpha)) def polynomial_residual(degree, X, y): polynomial_regression= PolynomialRegression(degree = degree) @@ -48,17 +45,7 @@ def polynomial_residual(degree, X, y): mae = mean_absolute_error(y, y_pred) return mae - def organize_data(to_forecast, window, horizon): - """ - Input: - to_forecast, univariate time series organized as numpy array - window, number of items to use in the forecast window - horizon, horizon of the forecast - Output: - X, a matrix where each row contains a forecast window - y, the target values for each row of X - """ shape = to_forecast.shape[:-1] + (to_forecast.shape[-1] - window + 1, window) strides = to_forecast.strides + (to_forecast.strides[-1],) X = np.lib.stride_tricks.as_strided(to_forecast, @@ -66,36 +53,3 @@ def organize_data(to_forecast, window, horizon): strides=strides) y = np.array([X[i+horizon][-1] for i in range(len(X)-horizon)]) return X[:-horizon], y - - -class NonLinearRegression(object): - - def __init__(self, fun): - self.fun = fun - self._find_n_params() - - def fit(self, X, y): - self.params = basinhopping(lambda p: self._get_residual(X,y,p), np.random.randn(self._n_params), niter = 10000, niter_success = 50)['x'] - - def predict(self, X): - return self.fun(X.flatten(), self.params) - - def get_params(self, deep=True): - return {"fun": self.fun} - - def set_params(self, **parameters): - for parameter, value in parameters.items(): - self.setattr(parameter, value) - return self - - def _get_residual(self, X, y, p): - return np.mean((self.fun(X.flatten(), p)-y)**2) - - def _find_n_params(self): - for n_params in range(1,100)[::-1]: - try: - self.fun(1,np.random.rand(n_params)) - except: - n_params += 1 - break - self._n_params = n_params diff --git a/functions.pyc b/functions.pyc deleted file mode 100644 index 4fd1ff39c791e3513c1ecb0880560ce3ac232f98..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6098 zcmcgw+j1O78UANir7#} zudNMo6;(2Xd#>OWcoH6g=ir)x@B4eTl2hhR%1YhS{ZCJK|L3o}`S0_ye|`HupT{!# zP2hPOrT)SbN$~GUCX##7@x)`lAz4H6rgWOF-;``Z@=57TNDMy{qQPa|ymV8D! zGmbhbSxY*zIxpJiWOq(F=e0W}*#*hxr8Dn_PD{2R`9&U6IZ! zl1@s}l4Mps;NzTo%(=&T_qZS*bgs(&yJD`&?xGxuY&=?)-#4VQB%Nh3_?4o9A&1mmq5B#X2YV)(D&?B>JgR-0#(S$- zv~99F{n99H_!3%XdZtKLyJcZ@9QlZ_(5g*sW?m$XXNxB=s)aIxvW`;!5tS9$_2i%- z2TeI_P^r%*z6ZSLJt zZ$;mdhuqXVy(&#C<@}_!QM{+~RM8WYSz5$rb{ciRGD#X+Z9|tkQQs-hwWzv&R@$&0 zRNW{x$IC?fI$d5Sn2zi2zwaJ5^zp#;9^A0pSih`O3&W~);i1+K+jt>b)jvmT?ep4H z_1bPUT-zw)L2jVFwe|Hoag>;RxRyup4$L%zp%$>AYHX})-9ZuSmQ=wbPX*)FN@#-`h(^colXut&*K19H}ngoI|Xry z&H5bb)z!6b+M_?LS@yTL%l$Ad;vG|~q-@&Tv8qtjSwC|!ub~for?lEu`s~GB{Uj#0 z&bzoy5#>fjm!65}xM}VtbZlH>m&=OxhL_S{Vpc3;xiQ2*EZCHj+o5=n9E1aB+0u<~ zs{JL@mTA*g0~86g#i`WygYUA%7pH9wS1{*sz&eg_KIk<~K#r|}j0+RPMuYT$P=tG62A%3H^e0a)i=+jW|?4cSm@Kk{R&fug@paD1 zHM*${F5(%?plWkv)v|i&gMcn%8G!oZoEaVmF7=FI$N;Q!<&ilPA7EmAVFPuV45DlW ziA^ob!=gu@9Fo>-P{?W_|uXDf$EZ6W8`_8FtIc#8GWm{6UdEiK;ZR#Tgl%IC!$sD)$Ra8R zHIdNyEYs8or7+WOY7}!(Yfvb^5t1>AUHw?o*_nx<9*g0(WS1Tz z-m-Va(_4ZgQK%il`xv1{&_`k9=uSYnCy;?^FC##Ww3|`@j#YI>&cjlU-t>M+FOYJu1`~tG>$L$YO0`Al*S2^28p7YR+MZ ztCB&6ZtyV%XoS-kp-g;M4NUMU>7TQr;Dg(!j#aL$Q;e{;F#9i2l!K{8YogU`HRqbG z$yT%d7U-52gY}-?x2zsr8S2^AUODlG7l~0t-b@zIcJ#b6{;o3ps?>y3^a7fdJNZ+~ z0WHiWGU9B9EP0j!-sRm5046SU*mC(uo536Wh>^37)BbR^>;t_6uLm>{`zpWgXQ3_2 zY%jI-DmtaTnXK~q?`9GZ1z4VwkM@Uv0^C~b=}onCytX@%;g*JQ@aQfv)T=T=V$Hq8 z{yJ{Y4E2eyq?)hddkU$7*D*Ap>VoU6zQ<~X74viOGgQi4xxH&*8&D@_rg7caht3FZ uQ5E&~Q7z!;cNKqgIR3G{(3pc2PwU?-V&26@Yq~Wt*PL(8Pqk)Rm;VPgi9zfD diff --git a/module3.py b/module3.py index 20f3db6..58eefde 100644 --- a/module3.py +++ b/module3.py @@ -1,16 +1,10 @@ -from functions import save_figure -from os.path import splitext -figname = splitext(__file__)[0]+'_' -ifig = 0 +# Module 3: Linear regression +# New imports import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression +from sklearn import metrics -bikes_df = pd.read_csv('./data/bikes_subsampled.csv') - -# We select the variables temperature and bikes_count - -temperature = bikes_df['temperature'].values -bikes_count = bikes_df['count'].values +# Code after this diff --git a/module4.py b/module4.py index 9dd7177..e92f9a4 100644 --- a/module4.py +++ b/module4.py @@ -1,27 +1,18 @@ -from functions import save_figure -from os.path import splitext -figname = splitext(__file__)[0]+'_' -ifig = 0 - -################################################################################ -################################### MODULE 4 ################################### -##################### Multiple variables linear regression ##################### -################################################################################ +# Module 4: Multiple and polynomial regression +# Previous imports import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression -from mpl_toolkits.mplot3d.axes3d import Axes3D -from sklearn.preprocessing import PolynomialFeatures - -from functions import PolynomialRegression, model_plot_3d +# New imports +from functions import PolynomialRegression +from mpl_toolkits.mplot3d.axes3d import Axes3D +from functions import model_plot_3d +from functions import polynomial_residual +# Load dataset bikes_df = pd.read_csv('./data/bikes_subsampled.csv') -# Learning activity 1: Fit a model of 2 variables and plot the model - -features = ['temperature','humidity'] -X = bikes_df[features].values -y = bikes_df['count'].values +# Code after this diff --git a/module5.py b/module5.py index ea09f9c..17aea76 100644 --- a/module5.py +++ b/module5.py @@ -1,23 +1,19 @@ -from functions import save_figure -from os.path import splitext -figname = splitext(__file__)[0]+'_' -ifig = 0 - -################################################################################ -################################### MODULE 5 ################################### -############################# Model evaluation ################################# -################################################################################ -# To be separated in a unique file # +# Module 5: Evaluating model performance +# Previous imports import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from functions import PolynomialRegression -from sklearn.preprocessing import PolynomialFeatures + +# New imports from sklearn.cross_validation import train_test_split, cross_val_score from sklearn.metrics import mean_absolute_error +# Load dataset bikes_df = pd.read_csv('./data/bikes_subsampled.csv') temperature = bikes_df[['temperature']].values bikes_count = bikes_df['count'].values + +# Code after this diff --git a/module6.py b/module6.py index 382ed66..8ab6477 100644 --- a/module6.py +++ b/module6.py @@ -1,26 +1,18 @@ -from functions import save_figure -from os.path import splitext -figname = splitext(__file__)[0]+'_' -ifig = 0 +# Module 6: Avoid overfitting with regularisation - -################################################################################ -################################### MODULE 6 ################################### -############################# Regularisation ################################### -################################################################################ - - -# Learning activity 2: Ridge and Lasso regularisations +# Previous imports import pandas as pd import numpy as np import matplotlib.pyplot as plt -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import StandardScaler, PolynomialFeatures -from sklearn.linear_model import Ridge, Lasso +from sklearn.linear_model import LinearRegression +from functions import PolynomialRegression +from sklearn.cross_validation import cross_val_score -from functions import PolynomialRidge, PolynomialLasso, PolynomialRegression +# New imports +from functions import PolynomialRidge, PolynomialLasso +from sklearn.grid_search import GridSearchCV +# Load dataset bikes_df = pd.read_csv('./data/bikes.csv') -features = ['temperature','humidity','windspeed'] -X = bikes_df[features].values -y = bikes_df['count'].values + +# Code after this diff --git a/module7.py b/module7.py index a71cb45..9411cc9 100644 --- a/module7.py +++ b/module7.py @@ -1,30 +1,17 @@ -from functions import save_figure -from os.path import splitext -figname = splitext(__file__)[0]+'_' -ifig = 0 +# Module 7: Predict the future with autoregression -################################################################################ -################################### MODULE 7 ################################### -############################# Advanced fitting methods ######################### -################################################################################ - -#Learning activity 1: use any sklearn model - -from sklearn.tree import DecisionTreeRegressor -from sklearn.cross_validation import cross_val_score -from sklearn.linear_model import LinearRegression -from sklearn import metrics -from sklearn.ensemble import RandomForestRegressor -from sklearn.neighbors import KNeighborsRegressor -from sklearn.svm import SVR -import numpy as np +# Previous imports import pandas as pd +import numpy as np import matplotlib.pyplot as plt +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_absolute_error -from functions import NonLinearRegression, organize_data - +# New imports +from functions import organize_data -#Learning activity 2: Custom nonlinear regression +# Load dataset bikes_df = pd.read_csv('./data/bikes.csv') -temperature = bikes_df[['temperature']].values bikes = bikes_df['count'].values + +# Code after this diff --git a/runme.py b/runme.py index 4c01d04..fba7918 100644 --- a/runme.py +++ b/runme.py @@ -1,11 +1,7 @@ -import pandas -import matplotlib.pyplot as plt -from sklearn import preprocessing -from sklearn.cross_validation import train_test_split -from sklearn import metrics -import functions -from sklearn import linear_model -from sklearn import cross_validation -from sklearn.grid_search import GridSearchCV +import module3 +import module4 +import module5 +import module6 +import module7 print "Good to go, all packages installed ok, ready to code."