Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66,528 changes: 66,528 additions & 0 deletions Data/wine_food_data.csv

Large diffs are not rendered by default.

Binary file added Data/winemag-data-130k-v2.csv.zip
Binary file not shown.
1,173 changes: 1,173 additions & 0 deletions Wineset_analysis.ipynb

Large diffs are not rendered by default.

87 changes: 87 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from flask import Flask, render_template, request, url_for, redirect
from filter import f_country, f_price, filters, price_cat, wine_cat, origin
import pandas as pd

app = Flask(__name__)

@app.route('/')
def home():
return render_template('home.js')

@app.route('/wine')
def wine():
return render_template('wine.js')

@app.route('/food')
def food():
return render_template('Food.js')

@app.route('/information')
def information():
return render_template('information.js')

@app.route("/button", methods=["POST", "GET"])
def button():
if request.method == "POST":
get_country = request.form["nm"]
return redirect(url_for("get_country", country=get_country))
else:
return render_template('button.html')

@app.route('country/<country>')
def get_country(country):
return f_country(country)

@app.route("/filter", methods=["POST", "GET"])
def filter():
if request.method == "POST":
get_filter = request.form["wine"]
get_filter = request.form["origin"]
return redirect(url_for("get_filter", wine = get_filter, origin = get_filter))
else:
return render_template("winefilter.html")

@app.route('/tro/<wine>/<origin>')
def get_filter(wine, origin):
return filters(wine, origin)

@app.route("/category", methods=["POST", "GET"])
def category():
if request.method == "POST":
get_winecat = request.form["nm"]
return redirect(url_for("get_winecat", wine=get_winecat))
else:
return render_template('wine_cat.html')

@app.route("/where", methods=["POST", "GET"])
def where():
if request.method == "POST":
get_origin = request.form["nm"]
return redirect(url_for("get_origin", name=get_origin))
else:
return render_template('wine_cat.html')

@app.route("/price", methods=["POST", "GET"])
def price():
if request.method == "POST":
get_pricecat = request.form["nm"]
return redirect(url_for("get_pricecat", price=get_pricecat))
else:
return render_template('price_cat.html')

@app.route('/pricecat/<price>')
def get_pricecat(price):
return price_cat(price)

@app.route('/origin/<name>')
def get_origin(name):
return origin(name)

@app.route('/winecat/<wine>')
def get_winecat(wine):
return wine_cat(wine)


if __name__ == '__main__':
app.run(debug=True)

129 changes: 129 additions & 0 deletions data_preaparation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

wine_data = pd.read_csv('Data/winemag-data-130k-v2.csv.zip', compression='zip', low_memory=False)

print(wine_data.head())

# delete unnecessary columns
wine_data = wine_data.drop(['Unnamed: 0', 'taster_name', 'taster_twitter_handle', 'region_2'], axis = 1)

# data overview
def resumetable(df):
print(f"Dataset Shape: {df.shape}")
summary = pd.DataFrame(df.dtypes,columns=['dtypes'])
summary = summary.reset_index()
summary['Name'] = summary['index']
summary = summary[['Name','dtypes']]
summary['counts'] = df.count().values
summary['Missing'] = df.isnull().sum().values
summary['missing_ration'] = (df.isnull().sum().values / df.shape[0]) * 100
summary['Uniques'] = df.nunique().values
return summary

print(resumetable(wine_data))

# drop missing values in certain columns
wine_data = wine_data.dropna(subset=['price', 'country', 'variety'])
wine_data = wine_data.drop(['designation'], axis = 1)

# drop double entries based on description and title
wine_data = wine_data.drop_duplicates(['description','title'])

wine_data = wine_data.reset_index(drop = True)

print(wine_data.describe())

# transform point system
def cat_points(points):
if points in list(range(80,83)):
return 0
elif points in list(range(83,87)):
return 1
elif points in list(range(87,90)):
return 2
elif points in list(range(90,94)):
return 3
elif points in list(range(94,98)):
return 4
else:
return 5

wine_data["rating"] = wine_data["points"].apply(cat_points)

wine_data = wine_data.drop(['points'], axis = 1)

# assign different varieties to categories of wine
conditions_wine = [
(wine_data['variety'].str.contains('Sauvignon Blanc')) | (wine_data['variety'].str.contains('Pinot Grigio')) | (wine_data['variety'].str.contains('Albariño')) | (wine_data['variety'].str.contains('Pino Gris')),
(wine_data['variety'].str.contains('Gewürztraminer')) | (wine_data['variety'].str.contains('Malvasia')) | (wine_data['variety'].str.contains('Moscato')) | (wine_data['variety'].str.contains('Riesling')),
(wine_data['variety'].str.contains('Chardonnay')) | (wine_data['variety'].str.contains('Viognier')) | (wine_data['variety'].str.contains('Roussanne')) | (wine_data['variety'].str.contains('Marsanne')),
(wine_data['variety'].str.contains('Champagne')) | (wine_data['variety'].str.contains('Prosecco')) | (wine_data['variety'].str.contains('Sparkling')) | (wine_data['variety'].str.contains('Cava')),
(wine_data['variety'].str.contains('St. Laurent')) | (wine_data['variety'].str.contains('Gamay')) | (wine_data['variety'].str.contains('Pinot Noir')) | (wine_data['variety'].str.contains('Zweigelt')),
(wine_data['variety'].str.contains('Red Table Wine')) | (wine_data['variety'].str.contains('Zinfandel')) | (wine_data['variety'].str.contains('Merlot')),
(wine_data['variety'].str.contains('Cabernet Sauvignon')) | (wine_data['variety'].str.contains('Malbec')) | (wine_data['variety'].str.contains('Anglianico')) | (wine_data['variety'].str.contains('Syrah')) | (wine_data['variety'].str.contains('Sangiovese')) | (wine_data['variety'].str.contains('Carbanet Franc')),
(wine_data['variety'].str.contains('Late Harvest')) | (wine_data['variety'] == 'Port') | (wine_data['variety'].str.contains('Ice Wine')) | (wine_data['variety'].str.contains('Sherry')),
(wine_data['variety'].str.contains('Rosé'))]

# categories of wine
values_wine = ['dry white wine', 'sweet white wine', 'rich white wine', 'sparkling wine', 'light red wine', 'medium red wine', 'bold red wine', 'dessert wine', 'rosé']

wine_data['wine_categories'] = np.select(conditions_wine, values_wine)

# not all data can be assigned to categorie
# variety with largest amount of different sorts of wine in this dataset are assigned manually
variety_u_null = wine_data[wine_data['wine_categories'] == '0']
variety_u_null_g = variety_u_null.groupby('variety')['variety'].count()
v_sort = variety_u_null_g.sort_values(ascending = False)

# price categories
price = wine_data.price
price = price.sort_values(ascending = False)
summe = price.sum()
price = pd.DataFrame(price)
price['perci'] = price/summe
price['perci_c'] = price['perci'].cumsum()
FFF = price[price['perci_c'] <= 0.15]
FF = price[(price['perci_c'] > 0.15) & (price['perci_c'] <= 0.87)]
F = price[price['perci_c'] > 0.87]

conditions_price = [
(wine_data['price'] >= 101),
(wine_data['price'] < 101) & (wine_data['price'] >= 21),
(wine_data['price'] < 21)
]

# price categories: 4 is most expensive
values_price = [3, 2, 1]

wine_data['price_cat'] = np.select(conditions_price, values_price)

# food link
conditions_food = [
(wine_data['wine_categories'] == 'dry white wine'),
(wine_data['wine_categories'] == 'sweet white wine'),
(wine_data['wine_categories'] == 'rich white wine'),
(wine_data['wine_categories'] == 'sparkling wine'),
(wine_data['wine_categories'] == 'light red wine'),
(wine_data['wine_categories'] == 'medium red wine'),
(wine_data['wine_categories'] == 'bold red wine'),
(wine_data['wine_categories'] == 'desert wine'),
(wine_data['wine_categories'] == 'rosé')]

values_food = ['vegetables, roasted vegetables, starches, fish',
'soft cheese, hard cheese, cured meat, sweets',
'soft cheese, starches, fish, rich fish, white meat',
'vegetables, soft cheese, hard cheese, starches, fish',
'roasted vegetables, starches, rich fish, white meat, cured meat',
'roasted vegetables, hard cheese, starches, white meat, red meat, cured meat',
'hard cheese, starches, red meat, cured meat',
'soft cheese, starches, cured meat, sweets',
'everything']

wine_data['food'] = np.select(conditions_food, values_food)

wine_data_food = wine_data[wine_data['food'] != '0']
wine_data_food = wine_data_food.reset_index(drop = True)

wine_data_food_f = wine_data_food.drop(['description'], axis = 1)
133 changes: 133 additions & 0 deletions filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import pandas as pd
import numpy as np

wine_data = pd.read_csv('Data/wine_food_data_f.csv', low_memory=False)


def f_country(country: str):
wine_data = pd.read_csv('data/wine_food_data.csv', low_memory=False)
selection_c = wine_data[wine_data['country'] == country]
result_c = selection_c[['country', 'title', 'price', 'rating']]
result_c_p = result_c.to_dict()
return result_c_p

# filter wine categorie
def wine_cat(wine = 'red'):
cat_result = wine_data[wine_data['wine_categories'].str.contains(wine)]
output_dict_c = cat_result.transpose().to_dict()
return output_dict_c

# filter origin
def origin(name):
wine_data = pd.read_csv('data/wine_food_data.csv', low_memory=False)
if name == 'europe':
return wine_data[(wine_data['country'] == 'Spain')
| (wine_data['country'] == 'Portugal')
| (wine_data['country'] == 'France')
| (wine_data['country'] == 'Germany')
| (wine_data['country'] == 'Italy')
| (wine_data['country'] == 'Romania')
| (wine_data['country'] == 'Austria')
| (wine_data['country'] == 'Slovenia')
| (wine_data['country'] == 'Croatia')
| (wine_data['country'] == 'England')
| (wine_data['country'] == 'Czech Republic')
| (wine_data['country'] == 'Bulgaria')
| (wine_data['country'] == 'Greece')
| (wine_data['country'] == 'Turkey')
| (wine_data['country'] == 'Moldova')
| (wine_data['country'] == 'Hungary')
| (wine_data['country'] == 'Switzerland')
| (wine_data['country'] == 'Ukraine')
| (wine_data['country'] == 'Slovakia')
| (wine_data['country'] == 'Serbia')
| (wine_data['country'] == 'Luxembourg')
| (wine_data['country'] == 'Macedonia')].transpose().to_dict()
elif name == 'national':
return wine_data[wine_data['country'] == 'Germany'].transpose().to_dict()
elif name == 'international':
return wine_data[(wine_data['country'] == 'Mexico')
| (wine_data['country'] == 'China')
| (wine_data['country'] == 'US')
| (wine_data['country'] == 'Argentina')
| (wine_data['country'] == 'Chile')
| (wine_data['country'] == 'Australia')
| (wine_data['country'] == 'South Africa')
| (wine_data['country'] == 'New Zealand')
| (wine_data['country'] == 'Israel')
| (wine_data['country'] == 'Canada')
| (wine_data['country'] == 'Lebanon')
| (wine_data['country'] == 'Brazil')
| (wine_data['country'] == 'Morocco')
| (wine_data['country'] == 'Uruguay')
| (wine_data['country'] == 'Peru')
| (wine_data['country'] == 'India')
| (wine_data['country'] == 'Georgia')
| (wine_data['country'] == 'Armenia')].transpose().to_dict()


# filter price category
def price_cat(price = 1):
selection = wine_data[wine_data['price_cat'] == price]
result_p = selection.sort_values('price')
result_p = result_p.transpose().to_dict()
return result_p



# filter together
def filters(wine, origin):
if wine in ['red', 'white', 'rosé']:
wine_cat = wine_data[wine_data['wine_categories'].str.contains(wine)]
else:
wine_cat = wine_data

if origin == 'europe':
wine_o = wine_cat[(wine_cat['country'] == 'Spain')
| (wine_cat['country'] == 'Portugal')
| (wine_cat['country'] == 'France')
| (wine_cat['country'] == 'Germany')
| (wine_cat['country'] == 'Italy')
| (wine_cat['country'] == 'Romania')
| (wine_cat['country'] == 'Austria')
| (wine_cat['country'] == 'Slovenia')
| (wine_cat['country'] == 'Croatia')
| (wine_cat['country'] == 'England')
| (wine_cat['country'] == 'Czech Republic')
| (wine_cat['country'] == 'Bulgaria')
| (wine_cat['country'] == 'Greece')
| (wine_cat['country'] == 'Turkey')
| (wine_cat['country'] == 'Moldova')
| (wine_cat['country'] == 'Hungary')
| (wine_cat['country'] == 'Switzerland')
| (wine_cat['country'] == 'Ukraine')
| (wine_cat['country'] == 'Slovakia')
| (wine_cat['country'] == 'Serbia')
| (wine_cat['country'] == 'Luxembourg')
| (wine_cat['country'] == 'Macedonia')]
elif origin == 'national':
wine_o = wine_cat[wine_cat['country'] == 'Germany']
elif origin == 'international':
wine_o = wine_cat[(wine_cat['country'] == 'Mexico')
| (wine_cat['country'] == 'China')
| (wine_cat['country'] == 'US')
| (wine_cat['country'] == 'Argentina')
| (wine_cat['country'] == 'Chile')
| (wine_cat['country'] == 'Australia')
| (wine_cat['country'] == 'South Africa')
| (wine_cat['country'] == 'New Zealand')
| (wine_cat['country'] == 'Israel')
| (wine_cat['country'] == 'Canada')
| (wine_cat['country'] == 'Lebanon')
| (wine_cat['country'] == 'Brazil')
| (wine_cat['country'] == 'Morocco')
| (wine_cat['country'] == 'Uruguay')
| (wine_cat['country'] == 'Peru')
| (wine_cat['country'] == 'India')
| (wine_cat['country'] == 'Georgia')
| (wine_cat['country'] == 'Armenia')]
else:
wine_o = wine_cat

output = wine_o.transpose().to_dict()
return output
9 changes: 9 additions & 0 deletions templates/Food.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import React from 'react';

const Food = () => {
return <div>
<h1>what matches your food?</h1>
</div>;
};

export default Food;
11 changes: 11 additions & 0 deletions templates/base.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<!doctype html>

<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
{% block body %}{% endblock %}
</body>
</html>
9 changes: 9 additions & 0 deletions templates/button.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{% block title %}Button{% endblock %}

{% block content %}
<form action="#" method="post">
<p>Infos:</p>
<p><input type="text" name="nm" /></p>
<p><input type="submit" value="submit"/></p>
</form>
{% endblock %}
Loading