-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata.py
More file actions
58 lines (42 loc) · 2.27 KB
/
data.py
File metadata and controls
58 lines (42 loc) · 2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import numpy as np
import pandas as pd
def transform_datasets(_winter_df: pd.DataFrame, _summer_df: pd.DataFrame) -> pd.DataFrame:
_winter_df['Semester'] = 'W'
_summer_df['Semester'] = 'S'
_df = pd.concat([_winter_df, _summer_df], axis=0)
_df['Start'] = pd.to_datetime(_df['Date'] + " " + _df['Start'])
_df['End'] = pd.to_datetime(_df['Date'] + " " + _df['End'])
_df.insert(3, 'Duration [hrs]', (_df['End'] - _df['Start']) / pd.Timedelta(hours=1))
return _df
def compute_expected_duration_per_subject(_df: pd.DataFrame, _ects: pd.DataFrame) -> pd.DataFrame:
_ret_df = _df[['Subject', 'Semester', 'Duration [hrs]']].groupby(['Subject', 'Semester'], as_index=False).sum()
_ret_df = _ret_df.merge(_ects, on=['Subject'], how='inner')
_ret_df = _ret_df.sort_values(by=['ECTS', 'Duration [hrs]'], ascending=False)
_ret_df['Min exp duration [hrs]'] = _ret_df['ECTS'] * 25
_ret_df['Max exp duration [hrs]'] = _ret_df['ECTS'] * 30
_ret_df['MinMaxDiff'] = _ret_df['Max exp duration [hrs]'] - _ret_df['Min exp duration [hrs]']
return _ret_df
def compute_total_duration_per_subject(_df: pd.DataFrame) -> pd.DataFrame:
_ret_df = _df[['Subject', 'Semester', 'Duration [hrs]']].groupby(['Subject', 'Semester'], as_index=False).sum()
_ret_df.sort_values(by='Duration [hrs]', ascending=False, inplace=True)
return _ret_df
def compute_activity(_df: pd.DataFrame) -> pd.DataFrame:
_ret_df = _df[['Subject', 'Activity', 'Duration [hrs]']].groupby(['Subject', 'Activity'], as_index=False).sum()
return _ret_df
def filter_by_semester(_df: pd.DataFrame, key: str) -> pd.DataFrame:
match key:
case 'winter':
return _df[_df['Semester'] == 'W']
case 'summer':
return _df[_df['Semester'] == 'S']
return _df
# load the datasets
winter_df = pd.read_csv("data/winter-semester.csv")
summer_df = pd.read_csv("data/summer-semester.csv")
ects_df = pd.read_csv("data/ects.csv")
calendar_heatmap_data = np.load("data/calendar_heatmap.npy", allow_pickle=True)
# transform the dataset
df = transform_datasets(winter_df, summer_df)
expected_and_realised_dur_per_subj_df = compute_expected_duration_per_subject(df, ects_df)
total_dur_per_subj_df = compute_total_duration_per_subject(df)
activity_df = compute_activity(df)