-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfuture_forecast.R
More file actions
122 lines (107 loc) · 3.98 KB
/
Copy pathfuture_forecast.R
File metadata and controls
122 lines (107 loc) · 3.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
suppressPackageStartupMessages({
library(dplyr)
library(readr)
})
table_dir <- "outputs/tables"
output_path <- file.path(table_dir, "future_forecast_summary.txt")
read_latest_actual_date <- function() {
bus_path <- "data/processed/silver/ridership_bus_daily.csv"
rail_path <- "data/processed/silver/ridership_rail_station_daily.csv"
actual_dates <- c(
read_csv(bus_path, show_col_types = FALSE) %>% summarise(latest_date = max(as.Date(date))) %>% pull(latest_date),
read_csv(rail_path, show_col_types = FALSE) %>% summarise(latest_date = max(as.Date(date))) %>% pull(latest_date)
)
min(actual_dates, na.rm = TRUE)
}
read_forecast <- function(path, mode_name) {
if (!file.exists(path)) {
stop("Missing forecast file: ", path, call. = FALSE)
}
read_csv(path, show_col_types = FALSE) %>%
mutate(
date = as.Date(date),
mode = mode_name
) %>%
arrange(date) %>%
slice_head(n = 30)
}
summarise_forecast <- function(data) {
data %>%
summarise(
start_date = min(date),
end_date = max(date),
days = n(),
total_ridership = sum(prediction, na.rm = TRUE),
average_daily_ridership = mean(prediction, na.rm = TRUE),
low_day = date[which.min(prediction)],
low_day_ridership = min(prediction, na.rm = TRUE),
high_day = date[which.max(prediction)],
high_day_ridership = max(prediction, na.rm = TRUE),
average_lower_80 = mean(lower_80, na.rm = TRUE),
average_upper_80 = mean(upper_80, na.rm = TRUE),
.groups = "drop"
)
}
format_number <- function(x) {
format(round(x), big.mark = ",", scientific = FALSE)
}
format_daily_lines <- function(data) {
data %>%
mutate(
line = paste0(
date,
": ",
format_number(prediction),
" [",
format_number(lower_80),
" to ",
format_number(upper_80),
"]"
)
) %>%
pull(line)
}
format_mode_block <- function(mode_name, summary_row, forecast_data) {
c(
paste0(mode_name, " forecast"),
paste0("- Date range: ", summary_row$start_date, " to ", summary_row$end_date),
paste0("- Forecast days: ", summary_row$days),
paste0("- Total predicted ridership: ", format_number(summary_row$total_ridership)),
paste0("- Average daily ridership: ", format_number(summary_row$average_daily_ridership)),
paste0("- Lowest predicted day: ", summary_row$low_day, " (", format_number(summary_row$low_day_ridership), ")"),
paste0("- Highest predicted day: ", summary_row$high_day, " (", format_number(summary_row$high_day_ridership), ")"),
paste0(
"- Average 80% range: ",
format_number(summary_row$average_lower_80),
" to ",
format_number(summary_row$average_upper_80)
),
"",
format_daily_lines(forecast_data)
)
}
last_true_data_date <- read_latest_actual_date()
bus_forecast <- read_forecast(file.path(table_dir, "bus_forecast_output.csv"), "Bus")
rail_forecast <- read_forecast(file.path(table_dir, "rail_system_forecast_output.csv"), "Rail")
bus_summary <- summarise_forecast(bus_forecast)
rail_summary <- summarise_forecast(rail_forecast)
combined_summary <- bind_rows(bus_forecast, rail_forecast) %>%
summarise_forecast()
output_lines <- c(
"WMATA 30-Day Future Forecast Summary",
paste0("Generated: ", Sys.Date()),
paste0("Last True Data Date: ", last_true_data_date),
"",
format_mode_block("Bus", bus_summary, bus_forecast),
"",
format_mode_block("Rail", rail_summary, rail_forecast),
"",
"Combined Bus + Rail forecast",
paste0("- Date range: ", combined_summary$start_date, " to ", combined_summary$end_date),
paste0("- Forecast days by mode: ", combined_summary$days / 2),
paste0("- Total predicted ridership: ", format_number(combined_summary$total_ridership)),
paste0("- Average daily ridership across both modes: ", format_number(combined_summary$total_ridership / (combined_summary$days / 2)))
)
dir.create(table_dir, recursive = TRUE, showWarnings = FALSE)
writeLines(output_lines, output_path)
message("Wrote ", output_path)