-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp1.py
More file actions
107 lines (83 loc) · 3.71 KB
/
Copy pathapp1.py
File metadata and controls
107 lines (83 loc) · 3.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# -*- coding: utf-8 -*-
"""app1
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/14rFtwlcNqRZ4juDNmO9YbjOtpWiEaV6f
"""
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# --- PAGE SETUP ---
st.set_page_config(page_title="Data Science Portfolio", layout="wide")
st.title("📊 My Data Science Portfolio")
# --- SIDEBAR NAVIGATION ---
st.sidebar.title("Select Project")
project = st.sidebar.radio("Go to:", ["Covid-19 Analysis", "Austin Weather Prediction", "Tumor Detection Data"])
# --- PROJECT 1: COVID-19 ANALYSIS ---
if project == "Covid-19 Analysis":
st.header("🦠 Covid-19 Global Analysis")
st.write("Analysis of confirmed cases and deaths using John Hopkins data.")
# Load Data
try:
confirmed_df = pd.read_csv("covid19_Confirmed_dataset.csv")
deaths_df = pd.read_csv("covid19_deaths_dataset.csv")
# Drop meaningless columns for the plot
cols_to_drop = ['Lat', 'Long', 'Province/State']
confirmed_clean = confirmed_df.drop(columns=cols_to_drop, errors='ignore').groupby("Country/Region").sum()
# User Selection
countries = st.multiselect("Select Countries to Compare:", confirmed_clean.index, default=["China", "Italy", "US"])
if countries:
st.subheader("Confirmed Cases Over Time")
# Transpose for plotting (Dates as Index)
plot_df = confirmed_clean.loc[countries].T
st.line_chart(plot_df)
st.write("### Raw Data Preview")
st.dataframe(confirmed_df.head())
except Exception as e:
st.error(f"Error loading files: {e}. Make sure 'covid19_Confirmed_dataset.csv' is in the repo.")
# --- PROJECT 2: AUSTIN WEATHER ---
elif project == "Austin Weather Prediction":
st.header("🌦️ Austin Weather Analysis")
try:
df_weather = pd.read_csv("austin_weather.csv")
# Cleanup 'Date' column if needed
if 'Date' in df_weather.columns:
df_weather['Date'] = pd.to_datetime(df_weather['Date'])
df_weather.set_index('Date', inplace=True)
st.subheader("Temperature Trends (High/Avg/Low)")
# Clean non-numeric data if present
cols = ['TempHighF', 'TempAvgF', 'TempLowF']
st.line_chart(df_weather[cols])
st.subheader("Correlation Matrix")
fig, ax = plt.subplots(figsize=(10, 6))
# Select only numeric columns for correlation
numeric_df = df_weather.select_dtypes(include=['float64', 'int64'])
sns.heatmap(numeric_df.corr(), annot=False, cmap='coolwarm', ax=ax)
st.pyplot(fig)
except Exception as e:
st.error(f"Error loading weather data: {e}")
# --- PROJECT 3: TUMOR DETECTION ---
elif project == "Tumor Detection Data":
st.header("🧠 Tumor Detection Dataset Explorer")
try:
df_tumor = pd.read_csv("Tumor_Detection.csv")
st.write(f"Dataset Shape: {df_tumor.shape}")
st.write("### First 5 Rows")
st.dataframe(df_tumor.head())
# If there is a diagnosis/label column, plot it
# Guessing column name based on common datasets (diagnosis, label, class)
target_col = None
for col in ['diagnosis', 'Class', 'Label', 'target']:
if col in df_tumor.columns:
target_col = col
break
if target_col:
st.subheader(f"Class Distribution ({target_col})")
fig, ax = plt.subplots()
sns.countplot(x=df_tumor[target_col], ax=ax)
st.pyplot(fig)
else:
st.info("Could not automatically find a target column to plot distribution.")
except Exception as e:
st.error(f"Error loading tumor data: {e}")