test Leandro subidos

Lean788 · Lean788 · commit 9599da805aa8 · 2023-02-22T19:40:28.000+01:00
diff --git a/test/test_load_model_zip.py b/test/test_load_model_zip.py
@@ -0,0 +1,28 @@
+import pickle
+import zipfile
+import tempfile
+from toolkit.machine_learning import load_model_zip
+import os
+import tempfile
+
+def test_load_model_zip():
+    # Create a temporary directory to hold the model zip file
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        # Create a test model
+        model = {'name': 'example_model', 'accuracy': 0.95}
+        with open(os.path.join(tmp_dir, 'model.pkl'), 'wb') as f:
+            pickle.dump(model, f)
+
+        # Create a zip file containing the test model
+        with zipfile.ZipFile(os.path.join(tmp_dir, 'model.zip'), 'w') as zip:
+            zip.write(os.path.join(tmp_dir, 'model.pkl'), 'model.pkl')
+
+        # Load the model from the zip file
+        zip_file = os.path.join(tmp_dir, 'model.zip')
+        model_file = 'model.pkl'
+        loaded_model = load_model_zip(zip_file, model_file)
+
+        # Check that the loaded model matches the test model
+        assert isinstance(loaded_model, dict)
+        assert loaded_model['name'] == 'example_model'
+        assert loaded_model['accuracy'] == 0.95
diff --git a/test/test_read_csv_zip.py b/test/test_read_csv_zip.py
@@ -0,0 +1,34 @@
+import pandas as pd
+import os
+from io import BytesIO
+from toolkit.data_analysis import read_csv_zip
+import zipfile
+
+
+def test_read_csv_zip():
+    # Create a CSV file
+    csv_content = f"name;age;city\nJohn;30;Madrid\nSara;28;London\n"
+    csv_file = "test.csv"
+    with BytesIO() as buffer:
+        buffer.write(csv_content.encode())
+        buffer.seek(0)
+        with zipfile.ZipFile('test.zip', 'w') as myzip:
+            myzip.writestr(csv_file, buffer.read())
+
+    # Test case 1: read CSV file with default separator
+    zip_file = 'test.zip'
+    df = read_csv_zip(zip_file, csv_file)
+    assert isinstance(df, pd.DataFrame)
+    assert df.shape == (2, 3)
+    assert df.iloc[0, 0] == 'John'
+    assert df.iloc[1, 2] == 'London'
+
+    # Test case 2: read CSV file with custom separator
+    df = read_csv_zip(zip_file, csv_file, sep=';')
+    assert isinstance(df, pd.DataFrame)
+    assert df.shape == (2, 3)
+    assert df.iloc[0, 0] == 'John'
+    assert df.iloc[1, 2] == 'London'
+
+    # Clean up
+    os.remove(zip_file)
diff --git a/test/test_read_url.py b/test/test_read_url.py
@@ -0,0 +1,20 @@
+import pytest
+import pandas as pd
+from toolkit.data_analysis import read_url
+
+def test_read_url():
+
+    # Test case 1: Valid URL and correct data format
+    url1 = 'https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/02_Filtering_%26_Sorting/Euro12/Euro_2012_stats_TEAM.csv'
+    df1 = read_url(url1)
+    assert isinstance(df1, pd.DataFrame)
+
+    # Test case 2: Valid URL but incorrect data format
+    url2 = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv'
+    with pytest.raises(Exception):
+        read_url(url2)
+
+    # Test case 3: Invalid URL
+    url3 = 'https://thisurlisnotvalid.com'
+    with pytest.raises(Exception):
+        read_url(url3)
diff --git a/toolkit/data_analysis.py b/toolkit/data_analysis.py
@@ -0,0 +1,56 @@
+import pandas as pd
+
+def read_url(url):
+
+    '''
+    This function reads a CSV file from a URL using the pandas library in Python. 
+    The CSV file is read using different delimiters and encodings to handle different file types.
+
+    Parameters
+    ----------
+    url : is a valid url.
+
+    
+    Return
+    ----------
+        df : is the dataframe with the data already loaded into memory.
+    '''
+
+    sep=[',', ';', '|', ':','\t','\s+']
+    encoding=['utf-8', 'latin-1', 'latin1', 'iso-8859-1', 'iso8859-1', 'ascii', 'us-ascii', 'utf-16', 'utf16', 'utf-32', 'utf32']
+    
+    for s in sep:
+        for e in encoding:
+
+                df = pd.read_csv(url, sep=s, encoding=e)
+
+                if df.shape[1] == 1:
+                    continue
+                else:
+                    return df
+                
+
+def read_csv_zip(zip_file, csv_file, sep=';'):
+
+    import pickle
+    import zipfile
+    """
+    Upload a CSV file from a zip file with custom separation.
+
+    Parameters
+    ----------
+         zip_file: The name of the zip file that contains the CSV file.
+         csv_file: The name of the CSV file to upload.
+         sep: The separator to use when reading the CSV file. the default value is ';'
+
+    Returns:
+    ----------
+        A pandas DataFrame object that contains the data from the CSV file.
+    """
+    with zipfile.ZipFile(zip_file, 'r') as zip:
+        with zip.open(csv_file, 'r') as file:
+            # Read CSV file with custom separator
+            df = pd.read_csv(file, sep=sep)
+
+    return df
+
diff --git a/toolkit/machine_learning.py b/toolkit/machine_learning.py
@@ -168,4 +168,28 @@ def quickregression(name):
     print("MAPE test:", mean_absolute_percentage_error(y_test, modpred))
     print("MSE test:", mean_squared_error(y_test, modpred))
     print("RMSE test:", np.sqrt(mean_squared_error(y_test, modpred)))
-    return(model.score(X_train, y_train))
+    return(model.score(X_train, y_train))
+
+
+def load_model_zip(zip_file, model_file):
+    import pickle
+    import zipfile
+    """
+    Uploads a model file from a zip file.
+
+    Parameters
+    ----------
+         zip_file: The name of the zip file where the model file is located.
+         model_file: The name of the model file to load.
+
+    Returns:
+    ----------
+         The model loaded from the file.
+    """
+    # Abre el archivo zip en modo lectura
+    with zipfile.ZipFile(zip_file, "r") as zip:
+        # Lee el archivo de modelo del zip y lo carga en la memoria
+        with zip.open(model_file, "r") as file:
+            model = pickle.load(file)
+
+    return model