From 5aa92157a73e18b9469c1bc08365a6536b612a14 Mon Sep 17 00:00:00 2001 From: Kartik Mathpal Date: Fri, 28 Jul 2017 17:45:37 +0530 Subject: [PATCH 1/2] remove spaces around data --- datacleaner/datacleaner.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datacleaner/datacleaner.py b/datacleaner/datacleaner.py index 0fcb87b..f4bf18b 100644 --- a/datacleaner/datacleaner.py +++ b/datacleaner/datacleaner.py @@ -174,6 +174,9 @@ def autoclean_cv(training_dataframe, testing_dataframe, drop_nans=False, copy=Fa training_dataframe[column] = column_encoder.transform(training_dataframe[column].values) testing_dataframe[column] = column_encoder.transform(testing_dataframe[column].values) + training_dataframe.columns = [c.strip() for c in training_dataframe.columns] + testing_dataframe.columns = [c.strip() for c in testing_dataframe.columns] + return training_dataframe, testing_dataframe From 6b951dca29c036faa5c5b0aac1f1710787ebf5eb Mon Sep 17 00:00:00 2001 From: Kartik Mathpal Date: Fri, 28 Jul 2017 17:55:35 +0530 Subject: [PATCH 2/2] remove spaces from data frame --- datacleaner/datacleaner.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/datacleaner/datacleaner.py b/datacleaner/datacleaner.py index f4bf18b..3e89ba2 100644 --- a/datacleaner/datacleaner.py +++ b/datacleaner/datacleaner.py @@ -94,7 +94,7 @@ def autoclean(input_dataframe, drop_nans=False, copy=False, encoder=None, column_encoder = LabelEncoder().fit(input_dataframe[column].values) input_dataframe[column] = column_encoder.transform(input_dataframe[column].values) - + input_dataframe.columns = [c.strip() for c in input_dataframe.columns] return input_dataframe def autoclean_cv(training_dataframe, testing_dataframe, drop_nans=False, copy=False, @@ -174,8 +174,7 @@ def autoclean_cv(training_dataframe, testing_dataframe, drop_nans=False, copy=Fa training_dataframe[column] = column_encoder.transform(training_dataframe[column].values) testing_dataframe[column] = column_encoder.transform(testing_dataframe[column].values) - training_dataframe.columns = [c.strip() for c in training_dataframe.columns] - testing_dataframe.columns = [c.strip() for c in testing_dataframe.columns] + return training_dataframe, testing_dataframe