Skip to content

Commit 9599da8

Browse files
committed
test Leandro subidos
1 parent 56d8926 commit 9599da8

5 files changed

Lines changed: 163 additions & 1 deletion

File tree

test/test_load_model_zip.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import pickle
2+
import zipfile
3+
import tempfile
4+
from toolkit.machine_learning import load_model_zip
5+
import os
6+
import tempfile
7+
8+
def test_load_model_zip():
9+
# Create a temporary directory to hold the model zip file
10+
with tempfile.TemporaryDirectory() as tmp_dir:
11+
# Create a test model
12+
model = {'name': 'example_model', 'accuracy': 0.95}
13+
with open(os.path.join(tmp_dir, 'model.pkl'), 'wb') as f:
14+
pickle.dump(model, f)
15+
16+
# Create a zip file containing the test model
17+
with zipfile.ZipFile(os.path.join(tmp_dir, 'model.zip'), 'w') as zip:
18+
zip.write(os.path.join(tmp_dir, 'model.pkl'), 'model.pkl')
19+
20+
# Load the model from the zip file
21+
zip_file = os.path.join(tmp_dir, 'model.zip')
22+
model_file = 'model.pkl'
23+
loaded_model = load_model_zip(zip_file, model_file)
24+
25+
# Check that the loaded model matches the test model
26+
assert isinstance(loaded_model, dict)
27+
assert loaded_model['name'] == 'example_model'
28+
assert loaded_model['accuracy'] == 0.95

test/test_read_csv_zip.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import pandas as pd
2+
import os
3+
from io import BytesIO
4+
from toolkit.data_analysis import read_csv_zip
5+
import zipfile
6+
7+
8+
def test_read_csv_zip():
9+
# Create a CSV file
10+
csv_content = f"name;age;city\nJohn;30;Madrid\nSara;28;London\n"
11+
csv_file = "test.csv"
12+
with BytesIO() as buffer:
13+
buffer.write(csv_content.encode())
14+
buffer.seek(0)
15+
with zipfile.ZipFile('test.zip', 'w') as myzip:
16+
myzip.writestr(csv_file, buffer.read())
17+
18+
# Test case 1: read CSV file with default separator
19+
zip_file = 'test.zip'
20+
df = read_csv_zip(zip_file, csv_file)
21+
assert isinstance(df, pd.DataFrame)
22+
assert df.shape == (2, 3)
23+
assert df.iloc[0, 0] == 'John'
24+
assert df.iloc[1, 2] == 'London'
25+
26+
# Test case 2: read CSV file with custom separator
27+
df = read_csv_zip(zip_file, csv_file, sep=';')
28+
assert isinstance(df, pd.DataFrame)
29+
assert df.shape == (2, 3)
30+
assert df.iloc[0, 0] == 'John'
31+
assert df.iloc[1, 2] == 'London'
32+
33+
# Clean up
34+
os.remove(zip_file)

test/test_read_url.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import pytest
2+
import pandas as pd
3+
from toolkit.data_analysis import read_url
4+
5+
def test_read_url():
6+
7+
# Test case 1: Valid URL and correct data format
8+
url1 = 'https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/02_Filtering_%26_Sorting/Euro12/Euro_2012_stats_TEAM.csv'
9+
df1 = read_url(url1)
10+
assert isinstance(df1, pd.DataFrame)
11+
12+
# Test case 2: Valid URL but incorrect data format
13+
url2 = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv'
14+
with pytest.raises(Exception):
15+
read_url(url2)
16+
17+
# Test case 3: Invalid URL
18+
url3 = 'https://thisurlisnotvalid.com'
19+
with pytest.raises(Exception):
20+
read_url(url3)

toolkit/data_analysis.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import pandas as pd
2+
3+
def read_url(url):
4+
5+
'''
6+
This function reads a CSV file from a URL using the pandas library in Python.
7+
The CSV file is read using different delimiters and encodings to handle different file types.
8+
9+
Parameters
10+
----------
11+
url : is a valid url.
12+
13+
14+
Return
15+
----------
16+
df : is the dataframe with the data already loaded into memory.
17+
'''
18+
19+
sep=[',', ';', '|', ':','\t','\s+']
20+
encoding=['utf-8', 'latin-1', 'latin1', 'iso-8859-1', 'iso8859-1', 'ascii', 'us-ascii', 'utf-16', 'utf16', 'utf-32', 'utf32']
21+
22+
for s in sep:
23+
for e in encoding:
24+
25+
df = pd.read_csv(url, sep=s, encoding=e)
26+
27+
if df.shape[1] == 1:
28+
continue
29+
else:
30+
return df
31+
32+
33+
def read_csv_zip(zip_file, csv_file, sep=';'):
34+
35+
import pickle
36+
import zipfile
37+
"""
38+
Upload a CSV file from a zip file with custom separation.
39+
40+
Parameters
41+
----------
42+
zip_file: The name of the zip file that contains the CSV file.
43+
csv_file: The name of the CSV file to upload.
44+
sep: The separator to use when reading the CSV file. the default value is ';'
45+
46+
Returns:
47+
----------
48+
A pandas DataFrame object that contains the data from the CSV file.
49+
"""
50+
with zipfile.ZipFile(zip_file, 'r') as zip:
51+
with zip.open(csv_file, 'r') as file:
52+
# Read CSV file with custom separator
53+
df = pd.read_csv(file, sep=sep)
54+
55+
return df
56+

toolkit/machine_learning.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,4 +168,28 @@ def quickregression(name):
168168
print("MAPE test:", mean_absolute_percentage_error(y_test, modpred))
169169
print("MSE test:", mean_squared_error(y_test, modpred))
170170
print("RMSE test:", np.sqrt(mean_squared_error(y_test, modpred)))
171-
return(model.score(X_train, y_train))
171+
return(model.score(X_train, y_train))
172+
173+
174+
def load_model_zip(zip_file, model_file):
175+
import pickle
176+
import zipfile
177+
"""
178+
Uploads a model file from a zip file.
179+
180+
Parameters
181+
----------
182+
zip_file: The name of the zip file where the model file is located.
183+
model_file: The name of the model file to load.
184+
185+
Returns:
186+
----------
187+
The model loaded from the file.
188+
"""
189+
# Abre el archivo zip en modo lectura
190+
with zipfile.ZipFile(zip_file, "r") as zip:
191+
# Lee el archivo de modelo del zip y lo carga en la memoria
192+
with zip.open(model_file, "r") as file:
193+
model = pickle.load(file)
194+
195+
return model

0 commit comments

Comments
 (0)