@@ -169,18 +169,13 @@ def buffdescribe(df, stats=['mean', 'median', 'std']):
169169 describe = concatenado .sort_values (by = ["MISSINGS (%)" ], ascending = True )
170170
171171 # Descriptive statistics
172- numeric_cols = df .select_dtypes (include = ['int' , 'float' ]).columns .tolist ()
173- numeric_stats = df [numeric_cols ].agg (stats )
174- numeric_stats .columns = [col + '_' + stat for col in numeric_stats .columns for stat in stats ]
172+ agg = df .agg (stats ).transpose ()
175173
176174 # Merge in a new dataframe
177- describe = pd .merge (concatenado , numeric_stats , left_index = True , right_index = True , how = 'left' )
178- describe .rename (columns = {'mean' : 'MEAN' ,
179- 'median' : 'MEDIAN' , 'std' : 'STD' }, inplace = True ),
175+ describe = pd .merge (concatenado , agg , left_index = True , right_index = True , how = 'left' )
180176
181177 return describe
182178
183-
184179def clean_text (df , column :str , language :str , target :str , filename :str = 'data_processed.csv' ):
185180
186181 '''
@@ -206,7 +201,7 @@ def clean_text(df, column:str, language:str, target:str, filename:str='data_proc
206201 df [column ] = df [column ].str .replace (r'\s*@\w+' , '' , regex = True )
207202
208203 # Remove punctuation marks and convert to lowercase
209- signos = re .compile ("(\.)|(\;)|(\:)|(\!)|(\?)|(\¿)|(\@)|(\,)|(\" )|(\()|(\))|(\[)|(\])|(\d+)" )
204+ signos = re .compile (r "(\.)|(\;)|(\:)|(\!)|(\?)|(\¿)|(\@)|(\,)|(\")|(\()|(\))|(\[)|(\])|(\d+)" )
210205
211206 def signs_tweets (tweet ):
212207 return signos .sub ('' , tweet .lower ())
@@ -220,8 +215,8 @@ def remove_links(df):
220215 df [column ] = df [column ].apply (remove_links )
221216
222217 # Remove stopwords
218+ stopwords = set (stopwords .words (language ))
223219 def remove_stopwords (df ):
224- stopwords = set (stopwords .words (language ))
225220 return " " .join ([word for word in df .split () if word not in stopwords ])
226221 df [column ] = df [column ].apply (remove_stopwords )
227222
0 commit comments