Skip to content

Commit 0f522a4

Browse files
committed
2 parents 28d3992 + 0f8d8ab commit 0f522a4

3 files changed

Lines changed: 88 additions & 2 deletions

File tree

requeriments.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,6 @@ missingno==0.5.1
5555

5656
zipfile36==0.1.3
5757

58-
imblearn==0.0
58+
imblearn==0.0
59+
60+
wordcloud==1.7.0

toolkit/machine_learning.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,4 +140,32 @@ def create_multiclass_prediction_df(model, class_names: List[str], X_test: Union
140140
if only_wrong:
141141
model_predictions_df = model_predictions_df[model_predictions_df['Top Prediction'] != model_predictions_df['Label']]
142142
# return the dataframe
143-
return model_predictions_df
143+
return model_predictions_df
144+
145+
def quickregression(name):
146+
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
147+
"""
148+
Function to save time when doing Machine Learning models.
149+
It only asks the name of the model to train and returns the scoring.
150+
151+
Parameters
152+
----------
153+
name = Name of the ML model.
154+
Input Example = LinearRegression
155+
156+
Returns
157+
----------
158+
MAE, MAPE, MSE, RMSE and R2 Scores.
159+
"""
160+
161+
# Fit of the model in the previously split X_train, y_train
162+
model = name()
163+
model.fit(X_train, y_train)
164+
# Predict of the model with X_test
165+
modpred = model.predict(X_test)
166+
# Scores of the model with y_test and the predict values.
167+
print("MAE test:", mean_absolute_error(y_test, modpred))
168+
print("MAPE test:", mean_absolute_percentage_error(y_test, modpred))
169+
print("MSE test:", mean_squared_error(y_test, modpred))
170+
print("RMSE test:", np.sqrt(mean_squared_error(y_test, modpred)))
171+
return(model.score(X_train, y_train))

toolkit/plot.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from collections import defaultdict
88
import plotly.offline as py
99
from wordcloud import STOPWORDS
10+
import plotly.express as px
11+
1012

1113
def plot_multiclass_prediction_image(df, row_index: int, X_test: Union[pd.DataFrame, np.ndarray], prediction_col: str = 'Top Prediction', label_col: str = 'Label'):
1214
'''
@@ -111,5 +113,59 @@ def horizontal_bar_chart(df, color):
111113
fig['layout'].update(height=1200, width=900, paper_bgcolor='rgb(233,233,233)', title="Word Count Plots")
112114
py.iplot(fig, filename='word-plots')
113115

116+
<<<<<<< HEAD
114117
return fig
115118

119+
=======
120+
def sunburst(df, interior:str, exterior:str, col_num:str, title:str):
121+
'''
122+
This is a Plotly Graph similar to pie chart but with two levels, interior is for columns which have one or two unique values, and
123+
the exterior is for columns which have more values.
124+
125+
Parameters
126+
----------
127+
df -> dataframe we are working with
128+
interior -> recommended for columns which have two or so uniques values. Must be 'str'
129+
exterior -> recommended for columns which have more values, because the graph has more space than inside. Must be 'str'
130+
col_num -> it,s the column which we want measured, show us the quantity of each value for both column (interior and exterior), must be 'str'
131+
title -> the title we want to show in the pie, must be 'str'
132+
133+
Return
134+
----------
135+
136+
Return a pie chart with two levels, interior and exterior.
137+
138+
'''
139+
140+
fig = go.Figure()
141+
fig = px.sunburst(df, path=[interior, exterior], values=col_num, template = 'plotly_dark')
142+
fig.update_layout(width=800, height=600, title = title)
143+
fig.show()
144+
145+
def wordcloudviz(column):
146+
import matplotlib.pyplot as plt
147+
from wordcloud import WordCloud
148+
"""
149+
Function to create a quick visualization of wordclouds in a given column of a dataframe called df.
150+
151+
Parameters
152+
----------
153+
column = name of the column of the dataframe.
154+
Input example: df['column_name']
155+
156+
Return
157+
---------
158+
A wordcloud visualization of the words in the column.
159+
"""
160+
# First, it concatenates the text in a "single" text.
161+
text = " ".join(comment for comment in column)
162+
163+
# Creates a wordcloud visualization
164+
wordcloud = WordCloud(width=800, height=800, background_color='white').generate(text)
165+
166+
plt.figure(figsize=(8, 8), facecolor=None)
167+
plt.imshow(wordcloud)
168+
plt.axis("off")
169+
plt.tight_layout(pad=0)
170+
plt.show()
171+
>>>>>>> 0f8d8abe7a26aac02e768b21b22a1d2e58bd6d30

0 commit comments

Comments
 (0)