Merge branch 'dev' of https://github.com/TheBridgeMachineLearningPythonLibrary/MachineLearningToolKit into dev

celiacnavarro · celiacnavarro · commit 0f522a4a2424 · 2023-02-22T18:50:38.000+01:00
diff --git a/requeriments.txt b/requeriments.txt
@@ -55,4 +55,6 @@ missingno==0.5.1
 
 zipfile36==0.1.3
 
-imblearn==0.0
+imblearn==0.0
+
+wordcloud==1.7.0
diff --git a/toolkit/machine_learning.py b/toolkit/machine_learning.py
@@ -140,4 +140,32 @@ def create_multiclass_prediction_df(model, class_names: List[str], X_test: Union
     if only_wrong:
         model_predictions_df = model_predictions_df[model_predictions_df['Top Prediction'] != model_predictions_df['Label']]
     # return the dataframe
-    return model_predictions_df
+    return model_predictions_df
+
+def quickregression(name):
+    from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
+    """
+    Function to save time when doing Machine Learning models. 
+    It only asks the name of the model to train and returns the scoring.
+
+    Parameters
+    ----------
+    name = Name of the ML model.
+           Input Example = LinearRegression
+    
+    Returns
+    ----------
+    MAE, MAPE, MSE, RMSE and R2 Scores.
+    """
+
+    # Fit of the model in the previously split X_train, y_train
+    model = name()
+    model.fit(X_train, y_train)
+    # Predict of the model with X_test
+    modpred = model.predict(X_test)
+    # Scores of the model with y_test and the predict values.
+    print("MAE test:", mean_absolute_error(y_test, modpred))
+    print("MAPE test:", mean_absolute_percentage_error(y_test, modpred))
+    print("MSE test:", mean_squared_error(y_test, modpred))
+    print("RMSE test:", np.sqrt(mean_squared_error(y_test, modpred)))
+    return(model.score(X_train, y_train))
diff --git a/toolkit/plot.py b/toolkit/plot.py
@@ -7,6 +7,8 @@
 from collections import defaultdict
 import plotly.offline as py
 from wordcloud import STOPWORDS
+import plotly.express as px
+
 
 def plot_multiclass_prediction_image(df, row_index: int, X_test: Union[pd.DataFrame, np.ndarray], prediction_col: str = 'Top Prediction', label_col: str = 'Label'):
     '''
@@ -111,5 +113,59 @@ def horizontal_bar_chart(df, color):
     fig['layout'].update(height=1200, width=900, paper_bgcolor='rgb(233,233,233)', title="Word Count Plots")
     py.iplot(fig, filename='word-plots')
 
+<<<<<<< HEAD
     return fig
 
+=======
+def sunburst(df, interior:str, exterior:str, col_num:str, title:str):
+    '''
+    This is a Plotly Graph similar to pie chart but with two levels, interior is for columns which have one or two unique values, and 
+    the exterior is for columns which have more values.
+
+    Parameters
+	----------
+    df -> dataframe we are working with 
+    interior -> recommended for columns which have two or so uniques values. Must be 'str'
+    exterior -> recommended for columns which have more values, because the graph has more space than inside. Must be 'str'
+    col_num -> it,s the column which we want measured, show us the quantity of each value for both column (interior and exterior), must be 'str'
+    title -> the title we want to show in the pie, must be 'str'
+
+    Return
+	----------
+
+    Return a pie chart with two levels, interior and exterior.
+    
+    '''
+
+    fig = go.Figure()
+    fig = px.sunburst(df, path=[interior, exterior], values=col_num, template = 'plotly_dark')
+    fig.update_layout(width=800, height=600, title = title)
+    fig.show()
+
+def wordcloudviz(column):
+    import matplotlib.pyplot as plt
+    from wordcloud import WordCloud
+    """
+    Function to create a quick visualization of wordclouds in a given column of a dataframe called df.
+
+    Parameters
+    ----------
+    column = name of the column of the dataframe. 
+             Input example: df['column_name']
+
+    Return
+    ---------
+        A wordcloud visualization of the words in the column.
+    """
+    # First, it concatenates the text in a "single" text.
+    text = " ".join(comment for comment in column)
+
+    # Creates a wordcloud visualization
+    wordcloud = WordCloud(width=800, height=800, background_color='white').generate(text)
+
+    plt.figure(figsize=(8, 8), facecolor=None)
+    plt.imshow(wordcloud)
+    plt.axis("off")
+    plt.tight_layout(pad=0)
+    plt.show()
+>>>>>>> 0f8d8abe7a26aac02e768b21b22a1d2e58bd6d30