TheBridgeMachineLearningPythonLibrary
diff --git a/‎test/__pycache__/__init__.cpython-37.pyc‎
244 Bytes b/‎test/__pycache__/__init__.cpython-37.pyc‎
244 Bytes
diff --git a/‎test/__pycache__/test_ignore_columns_polyfeatures.cpython-37-pytest-7.2.1.pyc‎
2.93 KB b/‎test/__pycache__/test_ignore_columns_polyfeatures.cpython-37-pytest-7.2.1.pyc‎
2.93 KB
diff --git a/‎test/__pycache__/test_log_transform_data.cpython-37-pytest-7.2.1.pyc‎
1.43 KB b/‎test/__pycache__/test_log_transform_data.cpython-37-pytest-7.2.1.pyc‎
1.43 KB
diff --git a/‎test/test_ignore_columns_polyfeatures.py‎
Lines changed: 82 additions & 0 deletions b/‎test/test_ignore_columns_polyfeatures.py‎
Lines changed: 82 additions & 0 deletions
diff --git a/‎test/test_log_transform_data.py‎
Lines changed: 1 addition & 11 deletions b/‎test/test_log_transform_data.py‎
Lines changed: 1 addition & 11 deletions
@@ -0,0 +1,82 @@
+import pandas as pd
+
+from typing import List
+from sklearn.preprocessing import PolynomialFeatures
+
+from toolkit.machine_learning import ignore_columns_polyfeatures
+
+
+def test_log_transform_data_ignore():
+    df = pd.DataFrame({'a':[0, 0, 1, 0],
+                  'b': [16, 7, 6, 16],
+                  'c':[61, 57, 16, 36],
+                  'd':['12','22','13','44'],
+                  'e':['Green','Red','Blue','Yellow'],
+                  'f':[1, 11, 23, 66]})
+
+    df_processed = ignore_columns_polyfeatures(df, variables_to_ignore = ['a', 'd', 'e'], n = 2)
+
+    pd.testing.assert_frame_equal(df[['a', 'd', 'e']], df_processed[['a', 'd', 'e']])
+
+
+
+def test_log_transform_data_ignore():
+    df = pd.DataFrame({'a':[0, 0, 1, 0],
+                  'b': [16, 7, 6, 16],
+                  'c':[61, 57, 16, 36],
+                  'd':['12','22','13','44'],
+                  'e':['Green','Red','Blue','Yellow'],
+                  'f':[1, 11, 23, 66]})
+
+    df_processed = ignore_columns_polyfeatures(df, variables_to_ignore = ['a', 'd', 'e'], n = 2)
+
+    assert len(df_processed.columns) == 13
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+def ignore_columns_polyfeatures(X: pd.DataFrame, variables_to_ignore: List[str], n: int) -> pd.DataFrame:
+    '''
+    This function takes a dataframe as input and will create n polynomial features for all columns except those specified to ignore
+    It is intended to be used to ignore binary columns for example and to be included in a Pipeline
+
+    Parameters
+    ----------
+    X : dataframe
+        It is the dataset we want to selectively create polynomial features
+    variables_to_ignore : List[str]
+        a list of column names to ignore in the polynomial feature creation
+    n : int
+        the degree for the polynomial fearture creation
+
+    
+    Return
+    ----------
+        df : Dataframe with the changes made
+    '''
+    X_poly_features = X.drop(columns = variables_to_ignore)
+
+    X_ignore = X[variables_to_ignore].reset_index(drop = True)
+
+    poly = PolynomialFeatures(degree = n)
+
+    poly_array = poly.fit_transform(X_poly_features)
+
+    poly_features_names = poly.get_feature_names_out(X_poly_features.columns)
+
+    X_poly_features = pd.DataFrame(poly_array, columns = poly_features_names)
+
+    return pd.concat([X_ignore, X_poly_features], axis = 1)
@@ -1,18 +1,8 @@
 import pandas as pd
 import numpy as np
-import re
-from datetime import datetime
-from typing import List
-from nltk.corpus import stopwords
-from nltk.stem.snowball import SnowballStemmer
-import cv2 
-import os
-from skimage.io import imread
-import sys
 import pytest
 
-sys.path.append('/home/sean/Documentos/the_bridge_bootcamp/My_Workspaces/MachineLearningToolKit/toolkit')
-from data_processing import log_transform_data
+from toolkit.data_processing import log_transform_data
 
 
 def test_log_transform_data_ignore():