DataScienceProject/app1.py at main · devc2255/DataScienceProject · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# -*- coding: utf-8 -*-
"""app1

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/14rFtwlcNqRZ4juDNmO9YbjOtpWiEaV6f
"""

import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# --- PAGE SETUP ---
st.set_page_config(page_title="Data Science Portfolio", layout="wide")
st.title("📊 My Data Science Portfolio")

# --- SIDEBAR NAVIGATION ---
st.sidebar.title("Select Project")
project = st.sidebar.radio("Go to:", ["Covid-19 Analysis", "Austin Weather Prediction", "Tumor Detection Data"])

# --- PROJECT 1: COVID-19 ANALYSIS ---
if project == "Covid-19 Analysis":
    st.header("🦠 Covid-19 Global Analysis")
    st.write("Analysis of confirmed cases and deaths using John Hopkins data.")

    # Load Data
    try:
        confirmed_df = pd.read_csv("covid19_Confirmed_dataset.csv")
        deaths_df = pd.read_csv("covid19_deaths_dataset.csv")

        # Drop meaningless columns for the plot
        cols_to_drop = ['Lat', 'Long', 'Province/State']
        confirmed_clean = confirmed_df.drop(columns=cols_to_drop, errors='ignore').groupby("Country/Region").sum()

        # User Selection
        countries = st.multiselect("Select Countries to Compare:", confirmed_clean.index, default=["China", "Italy", "US"])

        if countries:
            st.subheader("Confirmed Cases Over Time")
            # Transpose for plotting (Dates as Index)
            plot_df = confirmed_clean.loc[countries].T
            st.line_chart(plot_df)

        st.write("### Raw Data Preview")
        st.dataframe(confirmed_df.head())

    except Exception as e:
        st.error(f"Error loading files: {e}. Make sure 'covid19_Confirmed_dataset.csv' is in the repo.")

# --- PROJECT 2: AUSTIN WEATHER ---
elif project == "Austin Weather Prediction":
    st.header("🌦️ Austin Weather Analysis")

    try:
        df_weather = pd.read_csv("austin_weather.csv")

        # Cleanup 'Date' column if needed
        if 'Date' in df_weather.columns:
            df_weather['Date'] = pd.to_datetime(df_weather['Date'])
            df_weather.set_index('Date', inplace=True)

        st.subheader("Temperature Trends (High/Avg/Low)")
        # Clean non-numeric data if present
        cols = ['TempHighF', 'TempAvgF', 'TempLowF']
        st.line_chart(df_weather[cols])

        st.subheader("Correlation Matrix")
        fig, ax = plt.subplots(figsize=(10, 6))
        # Select only numeric columns for correlation
        numeric_df = df_weather.select_dtypes(include=['float64', 'int64'])
        sns.heatmap(numeric_df.corr(), annot=False, cmap='coolwarm', ax=ax)
        st.pyplot(fig)

    except Exception as e:
        st.error(f"Error loading weather data: {e}")

# --- PROJECT 3: TUMOR DETECTION ---
elif project == "Tumor Detection Data":
    st.header("🧠 Tumor Detection Dataset Explorer")

    try:
        df_tumor = pd.read_csv("Tumor_Detection.csv")

        st.write(f"Dataset Shape: {df_tumor.shape}")
        st.write("### First 5 Rows")
        st.dataframe(df_tumor.head())

        # If there is a diagnosis/label column, plot it
        # Guessing column name based on common datasets (diagnosis, label, class)
        target_col = None
        for col in ['diagnosis', 'Class', 'Label', 'target']:
            if col in df_tumor.columns:
                target_col = col
                break

        if target_col:
            st.subheader(f"Class Distribution ({target_col})")
            fig, ax = plt.subplots()
            sns.countplot(x=df_tumor[target_col], ax=ax)
            st.pyplot(fig)
        else:
            st.info("Could not automatically find a target column to plot distribution.")

    except Exception as e:
        st.error(f"Error loading tumor data: {e}")