1515 DirectoryIterator
1616)
1717
18- def extract_date (df , date_column_name ):
19- '''
20- Function to extract the data of a date, from a column that does not have date format.
21- The function also creates a column for date and year.
22-
23- Parameters
24- ----------
25- df : dataframe
26- It is the dataset where the column that we have to extract the data is located.
27- date_column_name : str
28- is the column that has data in string. From here the data is extracted and converted to date format.
29-
30-
31- Return
32- ----------
33- df : Dataframe with the changes made
34-
35- '''
36- match = re .search (r'\d{4}.\d{2}.\d{2}' , date_column_name )
37- date = datetime .strptime (match .group (), '%Y-%m-%d' ).date ()
38-
39- df ['Year' ] = df [date_column_name ].dt .year
40- df ['Month' ] = df [date_column_name ].dt .month
41-
42- return df
4318
4419def list_categorical_columns (df ):
4520 '''
@@ -844,4 +819,40 @@ class subdirectories (default: False).
844819 next (generator )
845820 images_generated += batch_size
846821
847- return generator
822+ return generator
823+
824+
825+ def Nantreatment (data , replace = True , replace_value = 'None' , replace_numeric_with_mean = False ):
826+ '''
827+ Function:
828+ -----------
829+ This function works with the Nan's inside of a DataFrame, wich give you diferents option when you try to work with them
830+ Parameters:
831+ -----------
832+ data: Pandas DataFrame
833+ Data that the function is going to analyze
834+ replace: bool
835+ Depends if its True or False, True gives you the Nan replace by a zero or the mean if the column is a number
836+ and None if the column is an object,in case that replace is False, drops all the Nan's in the DataFrame
837+ replace_numeric_with_mean: bool
838+ choose if you want to Nan with 0 or with the mean
839+
840+ Returns:
841+ -----------
842+ Pandas DataFrame
843+ The function returns a copy of the input DataFrame with NaN values replaced or dropped.
844+ '''
845+
846+
847+ if replace :
848+ if replace_numeric_with_mean :
849+ data = data .fillna (value = data .mean ())
850+ else :
851+ for name in data .select_dtypes (include = [np .number ]):
852+ data [name ] = data [name ].fillna (value = 0 )
853+ for name in data .select_dtypes (include = [object ]):
854+ data [name ] = data [name ].fillna (replace_value )
855+ else :
856+ data = data .dropna ()
857+
858+ return data .reset_index (drop = True )
0 commit comments