Skip to content

Commit bb179d6

Browse files
committed
pedrofunction
1 parent 12d7b37 commit bb179d6

2 files changed

Lines changed: 37 additions & 0 deletions

File tree

toolkit/data_analysis.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,26 @@ def read_csv_zip(zip_file, csv_file, sep=';'):
5454

5555
return df
5656

57+
def chi_squared_test(df, feature, target):
58+
import scipy.stats as stats
59+
import pandas as pd
60+
"""
61+
This function performs a chi-squared test of independence between two categorical variables.
62+
63+
Params:
64+
- df: A DataFrame containing the variables of interest.
65+
- feature: The independent variable to be analyzed.
66+
- target: The dependent variable to compare the independent variable with.
67+
68+
Returns:
69+
- chi2: The chi-squared value obtained in the test.
70+
- p: The p-value obtained in the test.
71+
"""
72+
# Create a contingency table from the independent and dependent variable data.
73+
contingency_table = pd.crosstab(df[feature], df[target])
74+
75+
# Perform a chi-squared test of independence between the variables.
76+
chi2, p, dof, expected = stats.chi2_contingency(contingency_table)
77+
78+
# Return the chi-squared value and p-value obtained in the test.
79+
return chi2, p

toolkit/data_processing.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,3 +314,17 @@ def convert_to_numeric(df,column:str):
314314
'''
315315
df[column] = df[column].apply(lambda x: pd.to_numeric(x, errors = 'coerce'))
316316
return df
317+
318+
def _exponential_smooth(data, alpha):
319+
"""
320+
Function that exponentially smooths dataset so values are less 'rigid'
321+
:param alpha: weight factor to weight recent values more
322+
"""
323+
324+
smoothed_data = data.ewm(alpha=alpha).mean()
325+
326+
# Check that the first and last values of the smoothed data are the same as the original data
327+
smoothed_data.iloc[0] = data.iloc[0]
328+
smoothed_data.iloc[-1] = data.iloc[-1]
329+
330+
return smoothed_data

0 commit comments

Comments
 (0)