Skip to content

Commit 36da747

Browse files
committed
test ignore_columns_polyfeatures
1 parent 7de2fca commit 36da747

5 files changed

Lines changed: 83 additions & 11 deletions
244 Bytes
Binary file not shown.
Binary file not shown.
Binary file not shown.
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import pandas as pd
2+
3+
from typing import List
4+
from sklearn.preprocessing import PolynomialFeatures
5+
6+
from toolkit.machine_learning import ignore_columns_polyfeatures
7+
8+
9+
def test_log_transform_data_ignore():
10+
df = pd.DataFrame({'a':[0, 0, 1, 0],
11+
'b': [16, 7, 6, 16],
12+
'c':[61, 57, 16, 36],
13+
'd':['12','22','13','44'],
14+
'e':['Green','Red','Blue','Yellow'],
15+
'f':[1, 11, 23, 66]})
16+
17+
df_processed = ignore_columns_polyfeatures(df, variables_to_ignore = ['a', 'd', 'e'], n = 2)
18+
19+
pd.testing.assert_frame_equal(df[['a', 'd', 'e']], df_processed[['a', 'd', 'e']])
20+
21+
22+
23+
def test_log_transform_data_ignore():
24+
df = pd.DataFrame({'a':[0, 0, 1, 0],
25+
'b': [16, 7, 6, 16],
26+
'c':[61, 57, 16, 36],
27+
'd':['12','22','13','44'],
28+
'e':['Green','Red','Blue','Yellow'],
29+
'f':[1, 11, 23, 66]})
30+
31+
df_processed = ignore_columns_polyfeatures(df, variables_to_ignore = ['a', 'd', 'e'], n = 2)
32+
33+
assert len(df_processed.columns) == 13
34+
35+
36+
37+
38+
39+
40+
41+
42+
43+
44+
45+
46+
47+
48+
49+
50+
51+
def ignore_columns_polyfeatures(X: pd.DataFrame, variables_to_ignore: List[str], n: int) -> pd.DataFrame:
52+
'''
53+
This function takes a dataframe as input and will create n polynomial features for all columns except those specified to ignore
54+
It is intended to be used to ignore binary columns for example and to be included in a Pipeline
55+
56+
Parameters
57+
----------
58+
X : dataframe
59+
It is the dataset we want to selectively create polynomial features
60+
variables_to_ignore : List[str]
61+
a list of column names to ignore in the polynomial feature creation
62+
n : int
63+
the degree for the polynomial fearture creation
64+
65+
66+
Return
67+
----------
68+
df : Dataframe with the changes made
69+
'''
70+
X_poly_features = X.drop(columns = variables_to_ignore)
71+
72+
X_ignore = X[variables_to_ignore].reset_index(drop = True)
73+
74+
poly = PolynomialFeatures(degree = n)
75+
76+
poly_array = poly.fit_transform(X_poly_features)
77+
78+
poly_features_names = poly.get_feature_names_out(X_poly_features.columns)
79+
80+
X_poly_features = pd.DataFrame(poly_array, columns = poly_features_names)
81+
82+
return pd.concat([X_ignore, X_poly_features], axis = 1)

test/test_log_transform_data.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,8 @@
11
import pandas as pd
22
import numpy as np
3-
import re
4-
from datetime import datetime
5-
from typing import List
6-
from nltk.corpus import stopwords
7-
from nltk.stem.snowball import SnowballStemmer
8-
import cv2
9-
import os
10-
from skimage.io import imread
11-
import sys
123
import pytest
134

14-
sys.path.append('/home/sean/Documentos/the_bridge_bootcamp/My_Workspaces/MachineLearningToolKit/toolkit')
15-
from data_processing import log_transform_data
5+
from toolkit.data_processing import log_transform_data
166

177

188
def test_log_transform_data_ignore():

0 commit comments

Comments
 (0)