From c9bbe11cdfb84230582962a64d7fc388ede5559e Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Tue, 31 Mar 2026 11:18:13 +0100 Subject: [PATCH 01/53] Add functionality to get all schedules per server --- cicada/cli.py | 14 +++ cicada/commands/smart_schedule.py | 163 ++++++++++++++++++++++++++++++ cicada/lib/scheduler.py | 36 +++++++ 3 files changed, 213 insertions(+) create mode 100644 cicada/commands/smart_schedule.py diff --git a/cicada/cli.py b/cicada/cli.py index 93b5c6b..896b0d2 100644 --- a/cicada/cli.py +++ b/cicada/cli.py @@ -18,6 +18,7 @@ from cicada.commands import ping_slack from cicada.commands import list_schedules from cicada.commands import delete_schedule +from cicada.commands import smart_schedule @utils.named_exception_handler("Cicada") @@ -273,6 +274,19 @@ def delete_schedule(): args = parser.parse_args(sys.argv[2:]) delete_schedule.main(args.schedule_id) + @staticmethod + def smart_schedule(): + """Generate smart schedules for a server using genetic algorithm""" + parser = argparse.ArgumentParser( + allow_abbrev=False, + add_help=True, + prog=inspect.stack()[0][3], + description="Generate smart schedules for a server using genetic algorithm", + ) + parser.add_argument("--server_id", type=str, required=False, help="ID of the server") + args = parser.parse_args(sys.argv[2:]) + smart_schedule.main(args.server_id) + @staticmethod def version(): """Return version of cicada package""" diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py new file mode 100644 index 0000000..497e58d --- /dev/null +++ b/cicada/commands/smart_schedule.py @@ -0,0 +1,163 @@ +"""Shifts the schedules on a node to distribute the load""" + +import datetime +import sys +# from typing import Optional +# from croniter import croniter +from cicada.lib import postgres, utils +from cicada.lib import scheduler +# from cicada.lib.SmartScheduling import pygad +# from cicada.lib.SmartScheduling.domain import Tap + + +def get_schedules_per_server(server_id, dbname=None): + """Get all schedules for a given server_id.""" + db_conn = postgres.db_cicada(dbname) + db_cur = db_conn.cursor() + # Don't get the schedules that aren't taps -> schedule_description LIKE '%==%' ? !? + schedule_ids = [row[0] for row in scheduler.get_all_schedule_ids_per_server(db_cur, server_id)] + db_cur.close() + db_conn.close() + + if not schedule_ids: + print(f"No schedules found for server_id {server_id}") + return [] + return schedule_ids + + +def find_all_server_ids(dbname=None): + """Find all server_ids in the system.""" + db_conn = postgres.db_cicada(dbname) + db_cur = db_conn.cursor() + server_ids = scheduler.get_all_server_ids(db_cur) + db_cur.close() + db_conn.close() + return server_ids + + +# def create_tap_objects(schedule_ids, dbname=None): + """Create Tap objects from schedule_ids.""" + + db_conn = postgres.db_cicada(dbname) + db_cur = db_conn.cursor() + taps : list[pygad.Tap] = [] + + # Fetch details for each schedule and convert to Tap objects + for schedule_id in schedule_ids: + details = scheduler.get_schedule_details(db_cur, schedule_id) + try: + tap = pygad.Tap( + schedule_id=schedule_id, + interval_mask=details.get("interval_mask") + ) + + # Ignore the few taps that have irregular cron expressions for now. There are few enough that this shouldn't impact the optimisation and is not worth the effort to try and support these irregular schedules in the GA + if not tap.is_regular_schedule(tap.interval_mask): + raise ValueError(f"Skipping irregular cron expression: {tap.interval_mask}") + else: + tap.determine_attributes(db_cur) + taps.append(tap) + + except Exception as e: + print(f"Skipping schedule {schedule_id} due to error: {e}") + + db_cur.close() + db_conn.close() + return taps + +# def update_schedule_cron(tap : Tap) -> str: + """ + Uses the start_blocks to shift the cron expression accordingly. Gene space is already limited from 0 to the frequency of the tap + + Ex. form of cron expression: "8-59/15 * * * *" (every 15 minutes starting at minute 8 of each hour) + """ + frequency = tap.frequency_minutes + shift = tap.shift + + if not shift or frequency >= 60: + return tap # No shift needed + + if shift == 0: + return tap # No shift needed + if shift < 60: + if frequency >= 60: + minute = shift % 60 + hour = shift // 60 + croniter(tap.interval_mask).get_next(datetime).hour # Get the hour of the first scheduled run and add the shift in hours + tap.interval_mask = f"{minute} {hour} * * *" + # Check that the new cron expression is valid + if not croniter.is_valid(tap.interval_mask): + raise ValueError(f"Invalid cron expression generated: {tap.interval_mask}") + return tap + else: + tap.interval_mask = f"{shift}-59/{frequency} * * * *" + # Check that the new cron expression is valid + if not croniter.is_valid(tap.interval_mask): + raise ValueError(f"Invalid cron expression generated: {tap.interval_mask}") + return tap + # Do we only want to support shifts of less than an hour? --> most schedules are on a 30 minute basis and it would simplify this function ! + if shift >= 60: + raise NotImplementedError("Design decision needed on how to implement schedule shifting for different frequencies and shift amounts.") + + + +# def assign_new_schedules(optimised_taps: list[pygad.Tap], dbname=None): + """Assign new schedules based on the optimal schedule found.""" + db_conn = postgres.db_cicada(dbname) + db_cur = db_conn.cursor() + + # For each tap, update the schedule in the DB with the new interval_mask based on the shift calculated by the GA optimizer + for tap in optimised_taps: + tap = update_schedule_cron(tap) + schedule_details = { + "schedule_id": tap.schedule_id, + "interval_mask": tap.interval_mask + } + scheduler.update_schedule_details(db_cur=db_cur, schedule_details=schedule_details) + db_conn.commit() + db_cur.close() + db_conn.close() + +# def rollback(tap : Optional[Tap], dbname=None): + """ + Rollback to original schedules in case of any issues during assignment. + Args: tap: Optional[Tap] : the tap to rollback, if None rollback all taps to their original schedules + + """ + # This would require storing the original schedules before making changes, which is not currently implemented. + # We could potentially store the original interval_masks in a separate table or in memory before updating, and then use that for rollback if needed. + raise NotImplementedError("Rollback functionality not yet implemented") + + +@utils.named_exception_handler("smart_schedule") +def main(server_id=None, dbname=None): + + if not server_id: + # Recursively call main for each server_id if no specific server_id is provided + server_ids = find_all_server_ids(dbname) + for id in server_ids: + main(server_id=id[0], dbname=dbname) + return + + # Get schedules for the server_id + schedule_ids = get_schedules_per_server(server_id=server_id, dbname=dbname) + print(f"Found schedules for server_id {server_id}: {schedule_ids}") + + # # Build Tap objects + # taps = create_tap_objects(schedule_ids, dbname=dbname) + # if not taps: + # print("No valid schedules found to optimize.") + # sys.exit(1) + + + # # Run GA optimizer ---> add in way to change GAConfig parameters ! + # try: + # ga = pygad.GAPyGADScheduler() + # optimised_taps = ga.solve(taps) + + # # Add logic to actually assign the new schedules ! + # assign_new_schedules(optimised_taps, dbname=dbname) + + # except Exception as e: + # print(f"Error during optimization for server_id {server_id}: {e}") + # sys.exit(1) + diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index f958b4e..27f331c 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -370,3 +370,39 @@ def get_all_schedule_ids(db_cur): def delete_schedule(db_cur, schedule_id): sqlquery = f"DELETE from schedules WHERE schedule_id = '{schedule_id}'" db_cur.execute(sqlquery) + + +def get_all_server_ids(db_cur): + """Get all possible server_ids from the servers table""" + sqlquery = "SELECT DISTINCT server_id FROM schedules ORDER BY server_id" + db_cur.execute(sqlquery) + server_ids = db_cur.fetchall() + + return server_ids + +def get_all_schedule_ids_per_server(db_cur, server_id): + """Get all possible schedule_ids for each server from the schedules table""" + sqlquery = f"SELECT DISTINCT schedule_id FROM schedules WHERE server_id = '{server_id}' ORDER BY schedule_id" + db_cur.execute(sqlquery) + schedule_ids = db_cur.fetchall() + + return schedule_ids + + +def get_median_run_time(db_cur, schedule_id): + sqlquery = f""" + SELECT percentile_cont(0.5) + WITHIN GROUP (ORDER BY EXTRACT(EPOCH FROM (end_time - start_time)) / 60) + AS median_minutes_taken + FROM schedule_log + WHERE schedule_id = '{schedule_id}' + """ + db_cur.execute(sqlquery) + row = db_cur.fetchone() + + try: + average_runtime_minutes = float(row[0]) + return average_runtime_minutes + except Exception: + print(f"ERROR : No runs associated with the schedule_id {schedule_id}") + sys.exit(1) \ No newline at end of file From ddf33337009dce4ea901dc35136e2d8eb6b3f3fb Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Tue, 31 Mar 2026 11:47:14 +0100 Subject: [PATCH 02/53] Create taps objects for each schedule --- cicada/commands/smart_schedule.py | 133 ++++++++++++++------------- cicada/lib/SmartScheduling/config.py | 16 ++++ cicada/lib/SmartScheduling/domain.py | 73 +++++++++++++++ 3 files changed, 156 insertions(+), 66 deletions(-) create mode 100644 cicada/lib/SmartScheduling/config.py create mode 100644 cicada/lib/SmartScheduling/domain.py diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 497e58d..6ba968e 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -7,7 +7,7 @@ from cicada.lib import postgres, utils from cicada.lib import scheduler # from cicada.lib.SmartScheduling import pygad -# from cicada.lib.SmartScheduling.domain import Tap +from cicada.lib.SmartScheduling.domain import Tap def get_schedules_per_server(server_id, dbname=None): @@ -35,24 +35,26 @@ def find_all_server_ids(dbname=None): return server_ids -# def create_tap_objects(schedule_ids, dbname=None): +def create_tap_objects(schedule_ids, dbname=None): """Create Tap objects from schedule_ids.""" db_conn = postgres.db_cicada(dbname) db_cur = db_conn.cursor() - taps : list[pygad.Tap] = [] + taps : list[Tap] = [] # Fetch details for each schedule and convert to Tap objects for schedule_id in schedule_ids: details = scheduler.get_schedule_details(db_cur, schedule_id) try: - tap = pygad.Tap( - schedule_id=schedule_id, - interval_mask=details.get("interval_mask") + tap = Tap( + schedule_id=details.get("schedule_id"), + server_id=details.get("server_id"), + interval_mask=details.get("interval_mask"), + db_cur=db_cur ) # Ignore the few taps that have irregular cron expressions for now. There are few enough that this shouldn't impact the optimisation and is not worth the effort to try and support these irregular schedules in the GA - if not tap.is_regular_schedule(tap.interval_mask): + if not tap.is_regular_schedule(): raise ValueError(f"Skipping irregular cron expression: {tap.interval_mask}") else: tap.determine_attributes(db_cur) @@ -66,66 +68,66 @@ def find_all_server_ids(dbname=None): return taps # def update_schedule_cron(tap : Tap) -> str: - """ - Uses the start_blocks to shift the cron expression accordingly. Gene space is already limited from 0 to the frequency of the tap +# """ +# Uses the start_blocks to shift the cron expression accordingly. Gene space is already limited from 0 to the frequency of the tap - Ex. form of cron expression: "8-59/15 * * * *" (every 15 minutes starting at minute 8 of each hour) - """ - frequency = tap.frequency_minutes - shift = tap.shift +# Ex. form of cron expression: "8-59/15 * * * *" (every 15 minutes starting at minute 8 of each hour) +# """ +# frequency = tap.frequency_minutes +# shift = tap.shift - if not shift or frequency >= 60: - return tap # No shift needed +# if not shift or frequency >= 60: +# return tap # No shift needed - if shift == 0: - return tap # No shift needed - if shift < 60: - if frequency >= 60: - minute = shift % 60 - hour = shift // 60 + croniter(tap.interval_mask).get_next(datetime).hour # Get the hour of the first scheduled run and add the shift in hours - tap.interval_mask = f"{minute} {hour} * * *" - # Check that the new cron expression is valid - if not croniter.is_valid(tap.interval_mask): - raise ValueError(f"Invalid cron expression generated: {tap.interval_mask}") - return tap - else: - tap.interval_mask = f"{shift}-59/{frequency} * * * *" - # Check that the new cron expression is valid - if not croniter.is_valid(tap.interval_mask): - raise ValueError(f"Invalid cron expression generated: {tap.interval_mask}") - return tap - # Do we only want to support shifts of less than an hour? --> most schedules are on a 30 minute basis and it would simplify this function ! - if shift >= 60: - raise NotImplementedError("Design decision needed on how to implement schedule shifting for different frequencies and shift amounts.") +# if shift == 0: +# return tap # No shift needed +# if shift < 60: +# if frequency >= 60: +# minute = shift % 60 +# hour = shift // 60 + croniter(tap.interval_mask).get_next(datetime).hour # Get the hour of the first scheduled run and add the shift in hours +# tap.interval_mask = f"{minute} {hour} * * *" +# # Check that the new cron expression is valid +# if not croniter.is_valid(tap.interval_mask): +# raise ValueError(f"Invalid cron expression generated: {tap.interval_mask}") +# return tap +# else: +# tap.interval_mask = f"{shift}-59/{frequency} * * * *" +# # Check that the new cron expression is valid +# if not croniter.is_valid(tap.interval_mask): +# raise ValueError(f"Invalid cron expression generated: {tap.interval_mask}") +# return tap +# # Do we only want to support shifts of less than an hour? --> most schedules are on a 30 minute basis and it would simplify this function ! +# if shift >= 60: +# raise NotImplementedError("Design decision needed on how to implement schedule shifting for different frequencies and shift amounts.") # def assign_new_schedules(optimised_taps: list[pygad.Tap], dbname=None): - """Assign new schedules based on the optimal schedule found.""" - db_conn = postgres.db_cicada(dbname) - db_cur = db_conn.cursor() - - # For each tap, update the schedule in the DB with the new interval_mask based on the shift calculated by the GA optimizer - for tap in optimised_taps: - tap = update_schedule_cron(tap) - schedule_details = { - "schedule_id": tap.schedule_id, - "interval_mask": tap.interval_mask - } - scheduler.update_schedule_details(db_cur=db_cur, schedule_details=schedule_details) - db_conn.commit() - db_cur.close() - db_conn.close() - -# def rollback(tap : Optional[Tap], dbname=None): - """ - Rollback to original schedules in case of any issues during assignment. - Args: tap: Optional[Tap] : the tap to rollback, if None rollback all taps to their original schedules - - """ - # This would require storing the original schedules before making changes, which is not currently implemented. - # We could potentially store the original interval_masks in a separate table or in memory before updating, and then use that for rollback if needed. - raise NotImplementedError("Rollback functionality not yet implemented") +# """Assign new schedules based on the optimal schedule found.""" +# db_conn = postgres.db_cicada(dbname) +# db_cur = db_conn.cursor() + +# # For each tap, update the schedule in the DB with the new interval_mask based on the shift calculated by the GA optimizer +# for tap in optimised_taps: +# tap = update_schedule_cron(tap) +# schedule_details = { +# "schedule_id": tap.schedule_id, +# "interval_mask": tap.interval_mask +# } +# scheduler.update_schedule_details(db_cur=db_cur, schedule_details=schedule_details) +# db_conn.commit() +# db_cur.close() +# db_conn.close() + +# # def rollback(tap : Optional[Tap], dbname=None): +# """ +# Rollback to original schedules in case of any issues during assignment. +# Args: tap: Optional[Tap] : the tap to rollback, if None rollback all taps to their original schedules + +# """ +# # This would require storing the original schedules before making changes, which is not currently implemented. +# # We could potentially store the original interval_masks in a separate table or in memory before updating, and then use that for rollback if needed. +# raise NotImplementedError("Rollback functionality not yet implemented") @utils.named_exception_handler("smart_schedule") @@ -142,12 +144,11 @@ def main(server_id=None, dbname=None): schedule_ids = get_schedules_per_server(server_id=server_id, dbname=dbname) print(f"Found schedules for server_id {server_id}: {schedule_ids}") - # # Build Tap objects - # taps = create_tap_objects(schedule_ids, dbname=dbname) - # if not taps: - # print("No valid schedules found to optimize.") - # sys.exit(1) - + # Build Tap objects + taps = create_tap_objects(schedule_ids, dbname=dbname) + if not taps: + print("No valid schedules found to optimize.") + sys.exit(1) # # Run GA optimizer ---> add in way to change GAConfig parameters ! # try: diff --git a/cicada/lib/SmartScheduling/config.py b/cicada/lib/SmartScheduling/config.py new file mode 100644 index 0000000..477a2ed --- /dev/null +++ b/cicada/lib/SmartScheduling/config.py @@ -0,0 +1,16 @@ +from dataclasses import dataclass +from typing import Optional + +@dataclass +class GAConfig: + minutes_per_block: int = 5 + num_generations: int = 200 + sol_per_pop: int = 40 + num_parents_mating: int = 10 + mutation_percent_genes: int = 20 + parent_selection_type: str = "rank" + crossover_type: str = "uniform" + mutation_type: str = "random" + keep_elitism: int = 1 + random_seed: Optional[int] = None + blacklist_schedule_ids: Optional[list[str]] = None \ No newline at end of file diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py new file mode 100644 index 0000000..1c78490 --- /dev/null +++ b/cicada/lib/SmartScheduling/domain.py @@ -0,0 +1,73 @@ +from __future__ import annotations +from dataclasses import dataclass +from typing import Optional, List +import numpy as np +from croniter import croniter +import datetime +from ..scheduler import get_median_run_time + + +@dataclass(frozen=False) +class Tap: + schedule_id: int + server_id: int + original_interval_mask: str + interval_mask: str + frequency_minutes: int + # cpu_max: float + median_runtime_minutes: int + shift: Optional[int] = 0 + # start_time: Optional[int] = None + + def __init__(self, schedule_id, server_id, interval_mask, db_cur): + self.schedule_id = schedule_id + self.server_id = server_id + self.interval_mask = interval_mask + self.original_interval_mask = interval_mask + self.determine_attributes(db_cur) + + def determine_attributes(self, db_cur): + """Determine frequency and average runtime from interval_mask and scheduler module""" + self._determine_frequency() + self._get_average_runtime(db_cur) + + def _determine_frequency(self): + """Determine frequency in minutes from interval_mask using crontier""" + schedule = croniter(self.interval_mask) + first_iter = schedule.get_next(datetime.datetime) + second_iter = schedule.get_next(datetime.datetime) + frequency = (second_iter - first_iter).total_seconds() / 60 + + self.frequency_minutes = int(frequency) + + + def _get_average_runtime(self, db_cur): + """Get average runtime from scheduler module""" + # for local testing set everything to 5 mins + self.median_runtime_minutes = 5 + # self.median_runtime_minutes = get_median_run_time(db_cur, self.schedule_id) + + def is_regular_schedule(self): + """Check if the cron expression is a regular schedule that can be optimized by the GA """ + try: + schedule = croniter(self.interval_mask) + iter1 = schedule.get_next(datetime.datetime) + iter2 = schedule.get_next(datetime.datetime) + iter3 = schedule.get_next(datetime.datetime) + iter4 = schedule.get_next(datetime.datetime) + iter5 = schedule.get_next(datetime.datetime) + freq1 = (iter2 - iter1) + freq2 = (iter3 - iter2) + freq3 = (iter4 - iter3) + freq4 = (iter5 - iter4) + return freq1 == freq2 == freq3 == freq4 + except (ValueError, KeyError): + return False + + +@dataclass +class Schedule: + start_blocks: List[int] + usage: np.ndarray + peak_cpu: float + taps: List[Tap] \ No newline at end of file From d547bb31289818365f0022cc239136a84656c869 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Tue, 31 Mar 2026 15:00:01 +0100 Subject: [PATCH 03/53] Get pygad to work with dummy taps --- cicada/commands/smart_schedule.py | 18 ++-- cicada/lib/SmartScheduling/config.py | 7 +- cicada/lib/SmartScheduling/domain.py | 18 ++-- cicada/lib/SmartScheduling/evaluation.py | 68 +++++++++++++++ cicada/lib/SmartScheduling/pygad.py | 102 +++++++++++++++++++++++ setup.py | 2 + 6 files changed, 194 insertions(+), 21 deletions(-) create mode 100644 cicada/lib/SmartScheduling/evaluation.py create mode 100644 cicada/lib/SmartScheduling/pygad.py diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 6ba968e..9460499 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -6,7 +6,7 @@ # from croniter import croniter from cicada.lib import postgres, utils from cicada.lib import scheduler -# from cicada.lib.SmartScheduling import pygad +from cicada.lib.SmartScheduling import pygad from cicada.lib.SmartScheduling.domain import Tap @@ -150,15 +150,15 @@ def main(server_id=None, dbname=None): print("No valid schedules found to optimize.") sys.exit(1) - # # Run GA optimizer ---> add in way to change GAConfig parameters ! - # try: - # ga = pygad.GAPyGADScheduler() - # optimised_taps = ga.solve(taps) - + # Run GA optimizer ---> add in way to change GAConfig parameters ! + try: + ga = pygad.GAPyGADScheduler() + optimised_taps, start_blocks, peak_cpu, usage = ga.solve(taps) + print(f"Optimized schedule for server_id {server_id}: {[tap.schedule_id for tap in optimised_taps]} with start blocks {start_blocks}, peak CPU {peak_cpu}, and usage {usage}") # # Add logic to actually assign the new schedules ! # assign_new_schedules(optimised_taps, dbname=dbname) - # except Exception as e: - # print(f"Error during optimization for server_id {server_id}: {e}") - # sys.exit(1) + except Exception as e: + print(f"Error during optimization for server_id {server_id}: {e}") + sys.exit(1) diff --git a/cicada/lib/SmartScheduling/config.py b/cicada/lib/SmartScheduling/config.py index 477a2ed..f0355c9 100644 --- a/cicada/lib/SmartScheduling/config.py +++ b/cicada/lib/SmartScheduling/config.py @@ -1,5 +1,6 @@ -from dataclasses import dataclass -from typing import Optional +from __future__ import annotations +from dataclasses import dataclass, field +from typing import Optional, List @dataclass class GAConfig: @@ -13,4 +14,4 @@ class GAConfig: mutation_type: str = "random" keep_elitism: int = 1 random_seed: Optional[int] = None - blacklist_schedule_ids: Optional[list[str]] = None \ No newline at end of file + blacklist_schedule_ids: Optional[List[str]] = field(default_factory=list) \ No newline at end of file diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index 1c78490..66fd0d0 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -14,10 +14,10 @@ class Tap: original_interval_mask: str interval_mask: str frequency_minutes: int - # cpu_max: float - median_runtime_minutes: int + cpu_max: float = 1 + median_runtime_minutes: int = 5 shift: Optional[int] = 0 - # start_time: Optional[int] = None + start_time_blocks: Optional[int] = None def __init__(self, schedule_id, server_id, interval_mask, db_cur): self.schedule_id = schedule_id @@ -65,9 +65,9 @@ def is_regular_schedule(self): return False -@dataclass -class Schedule: - start_blocks: List[int] - usage: np.ndarray - peak_cpu: float - taps: List[Tap] \ No newline at end of file +# @dataclass +# class Schedule: +# start_blocks: List[int] = None +# usage: np.ndarray +# peak_cpu: float +# taps: List[Tap] \ No newline at end of file diff --git a/cicada/lib/SmartScheduling/evaluation.py b/cicada/lib/SmartScheduling/evaluation.py new file mode 100644 index 0000000..05ac8c8 --- /dev/null +++ b/cicada/lib/SmartScheduling/evaluation.py @@ -0,0 +1,68 @@ +import math +import numpy as np +from typing import Sequence, Tuple, List +from .domain import Tap + + +def calculate_blocks_per_day(minutes_per_block: int) -> int: + """ + Calculate the number of time blocks in a day given the minutes per block. + Raises error if the minutes_per_block does not give a whole number of blocks per day + Args: + minutes_per_block: int : number of minutes per time block + Returns: + int : number of time blocks in a day + """ + if (24 * 60) % minutes_per_block != 0: + raise ValueError("minutes_per_block must divide evenly into 1440 (the number of minutes in a day)") + return (24 * 60) // minutes_per_block + + +def discretize_taps(taps: Sequence[Tap], minutes_per_block: int) -> Tuple[List[int], List[int]]: + """ + Discretize taps into frequency and runtime blocks based on minutes per block. + Args: + taps: Sequence[Tap] : list of Tap objects + minutes_per_block: int : number of minutes per time block + Returns: + Tuple[List[int], List[int]] : frequency blocks and runtime blocks for each tap + freq_blocks: List[int] : amount of time blocks between each run of the tap (based on frequency_minutes) + run_blocks: List[int] : amount of time blocks the tap runs for (based on median_runtime_minutes) + """ + freq_blocks, run_blocks = [], [] + for t in taps: + fb = max(1, t.frequency_minutes // minutes_per_block) + rb = max(1, math.ceil(t.median_runtime_minutes / minutes_per_block)) + freq_blocks.append(fb) + run_blocks.append(rb) + return freq_blocks, run_blocks + +def evaluate_cpu_usage_and_peak(start_blocks: Sequence[int], taps: Sequence[Tap], minutes_per_block: int): + """ + Returns the CPU usage time series and peak CPU usage for a given schedule solution + Args: + start_blocks: Sequence[int] : start time blocks for each tap + taps: Sequence[Tap] : list of Tap objects + minutes_per_block: int : number of minutes per time block + Returns: + usage: np.ndarray : CPU usage time series + peak: float : peak CPU usage + """ + + blocks_per_day = calculate_blocks_per_day(minutes_per_block) + freq_blocks, run_blocks = discretize_taps(taps, minutes_per_block) + diff = np.zeros(blocks_per_day + 1, dtype=float) + for i, tap in enumerate(taps): + s0 = int(start_blocks[i]) + freq = freq_blocks[i] + run_len = run_blocks[i] + cpu = float(tap.cpu_max) + block = s0 + while block < blocks_per_day: + end = min(block + run_len, blocks_per_day) + diff[block] += cpu + diff[end] -= cpu + block += freq + usage = np.cumsum(diff[:-1]) + peak = float(np.max(usage)) if usage.size else 0.0 + return usage, peak \ No newline at end of file diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/pygad.py new file mode 100644 index 0000000..4f9c63f --- /dev/null +++ b/cicada/lib/SmartScheduling/pygad.py @@ -0,0 +1,102 @@ +from __future__ import annotations +from typing import List, Optional, Sequence +import numpy as np +from .config import GAConfig +from .domain import Tap +from .evaluation import evaluate_cpu_usage_and_peak, discretize_taps, calculate_blocks_per_day +import pygad + + +# """Blacklist functionality - to be added later""" + +class GAPyGADScheduler: + """ + Genetic Algorithm Scheduler using PyGAD + Args: + config: Optional[GAConfig] : configuration for the genetic algorithm + Returns: + Schedule : optimized schedule for all taps + + + Implementation Note: We only consider the regular taps during fitness evaluation to aid simplicity as there are few irregular + taps. All regular taps are fed into the scheduler however those on the blacklist will remain unchanged + and are kept purely to ensure the fitness evaluation is accurate to the actual schedule. + + We cap the max shift of a tap to within the hour to prevent large shifts for taps that run daily. + """ + + def __init__(self, config: Optional[GAConfig] = None): + self.cfg = config or GAConfig() + + + def _gene_space(self, taps: Sequence[Tap]) -> List[List[int]]: + # Build gene_space per tap: each gene space is limited by it's frequency (e.g. a 15min freq tap can only traverse the first 15min worth of time blocks) + # Computed in blocks to make it time-block-interval agnostic. we don't want to have to rewrite all the start_times if we e.g. decide to change the scheduling interval + freq_blocks, _ = discretize_taps(taps, self.cfg.minutes_per_block) + + + for i, tap in enumerate(taps): + # Ignore any blacklist taps -> fix the gene space to be 0 so they're still included in the fitness eval + if tap.schedule_id in self.cfg.blacklist_schedule_ids: + freq_blocks[i] = 1 + + # Limit gene space to only shift within the hour for the taps which run less frequently + if tap.frequency_minutes >= 60: + freq_blocks[i] = 59 // self.cfg.minutes_per_block + return [list(range(fb)) for fb in freq_blocks] + + def _initial_population(self, taps: Sequence[Tap], gene_space: List[List[int]]) -> np.ndarray: + rng = np.random.default_rng(self.cfg.random_seed) + seed = [] + + # Add current start blocks as first solution to bias solution space towaards current solution + for i, tap in enumerate(taps): + gs = gene_space[i] + s = 0 if tap.start_time_blocks is None else int(tap.start_time_blocks) + seed.append(max(min(s, gs[-1]), gs[0])) + pop = [seed] + + # Populate the rest of the initial population randomly within the gene space limits for each tap + for _ in range(self.cfg.sol_per_pop - 1): + pop.append([int(rng.integers(0, len(gene_space[i]))) for i in range(len(taps))]) + return np.asarray(pop, dtype=int) + + def _blacklist(self): + self.cfg.blacklist_schedule_ids = set(self.cfg.blacklist_schedule_ids) + raise NotImplementedError("Blacklist functionality not yet implemented") + + def fitness_fn(self, ga, solution, solution_idx): + _, peak = evaluate_cpu_usage_and_peak(solution, self.taps, self.cfg.minutes_per_block) + return -float(peak) + + def solve(self, taps: Sequence[Tap]) -> tuple[Sequence[Tap], List[int], float, np.ndarray]: + gene_space = self._gene_space(taps) + self.taps = taps + + initial_population = self._initial_population(taps, gene_space) + print("Initial population fitness (max_cpu load):", -self.fitness_fn(None, initial_population[0], 0)) + + ga = pygad.GA( + num_generations=self.cfg.num_generations, + sol_per_pop=self.cfg.sol_per_pop, + num_parents_mating=self.cfg.num_parents_mating, + num_genes=len(taps), + gene_type=int, + gene_space=gene_space, + mutation_percent_genes=self.cfg.mutation_percent_genes, + fitness_func=self.fitness_fn, + parent_selection_type=self.cfg.parent_selection_type, + keep_elitism=self.cfg.keep_elitism, + crossover_type=self.cfg.crossover_type, + mutation_type=self.cfg.mutation_type, + allow_duplicate_genes=True, + initial_population=initial_population, + random_seed=self.cfg.random_seed, + ) + ga.run() + + best_solution, best_fitness, _ = ga.best_solution() + start_blocks = [int(v) for v in best_solution] + peak_cpu = -float(best_fitness) + usage, _ = evaluate_cpu_usage_and_peak(start_blocks, taps, self.cfg.minutes_per_block) + return taps, start_blocks, peak_cpu, usage \ No newline at end of file diff --git a/setup.py b/setup.py index bfa429a..944ca84 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,8 @@ "tabulate==0.9.*", "slack-sdk==3.37.*", "backoff==2.2.*", + "numpy==1.24.*", + "pygad==3.5.*" ], extras_require={ "dev": [ From 9433d71424b6eb12b501184fa538914c57982874 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Tue, 31 Mar 2026 18:32:59 +0100 Subject: [PATCH 04/53] Create script to mimic current PPW taps locally and remove schedules that don't correspond to taps --- cicada/commands/smart_schedule.py | 1 - cicada/lib/scheduler.py | 2 +- local-dev/cicada_db/Dockerfile | 7 +- local-dev/docker-compose.yml | 4 +- local-dev/entrypoint.sh | 1 + setup/create_test_tap_setup.sql | 522 ++++++++++++++++++++++++++++++ 6 files changed, 531 insertions(+), 6 deletions(-) create mode 100644 setup/create_test_tap_setup.sql diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 9460499..30c10c1 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -14,7 +14,6 @@ def get_schedules_per_server(server_id, dbname=None): """Get all schedules for a given server_id.""" db_conn = postgres.db_cicada(dbname) db_cur = db_conn.cursor() - # Don't get the schedules that aren't taps -> schedule_description LIKE '%==%' ? !? schedule_ids = [row[0] for row in scheduler.get_all_schedule_ids_per_server(db_cur, server_id)] db_cur.close() db_conn.close() diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index 27f331c..86fff05 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -382,7 +382,7 @@ def get_all_server_ids(db_cur): def get_all_schedule_ids_per_server(db_cur, server_id): """Get all possible schedule_ids for each server from the schedules table""" - sqlquery = f"SELECT DISTINCT schedule_id FROM schedules WHERE server_id = '{server_id}' ORDER BY schedule_id" + sqlquery = f"SELECT DISTINCT schedule_id FROM schedules WHERE server_id = '{server_id}' and schedule_description not like '%==%' ORDER BY schedule_id" db_cur.execute(sqlquery) schedule_ids = db_cur.fetchall() diff --git a/local-dev/cicada_db/Dockerfile b/local-dev/cicada_db/Dockerfile index a00ca90..404a796 100644 --- a/local-dev/cicada_db/Dockerfile +++ b/local-dev/cicada_db/Dockerfile @@ -1,7 +1,7 @@ FROM debezium/postgres:15-alpine -COPY --chown=postgres:postgres server.crt /var/lib/postgresql/server.crt -COPY --chown=postgres:postgres --chmod=600 server.key /var/lib/postgresql/server.key +COPY --chown=postgres:postgres local-dev/cicada_db/server.crt /var/lib/postgresql/server.crt +COPY --chown=postgres:postgres --chmod=600 local-dev/cicada_db/server.key /var/lib/postgresql/server.key RUN apk add --no-cache --virtual .debezium-build-deps gcc clang15 llvm15 git make musl-dev pkgconf \ && git clone --depth 1 --branch wal2json_2_6 https://github.com/eulerto/wal2json.git \ @@ -10,3 +10,6 @@ RUN apk add --no-cache --virtual .debezium-build-deps gcc clang15 llvm15 git mak && cd / \ && rm -rf wal2json \ && apk del .debezium-build-deps + +COPY --chown=postgres:postgres setup/schema.sql /docker-entrypoint-initdb.d/01_schema.sql +COPY --chown=postgres:postgres setup/create_test_tap_setup.sql /docker-entrypoint-initdb.d/02_create_test_tap_setup.sql \ No newline at end of file diff --git a/local-dev/docker-compose.yml b/local-dev/docker-compose.yml index 10064d9..45ad9fa 100644 --- a/local-dev/docker-compose.yml +++ b/local-dev/docker-compose.yml @@ -26,8 +26,8 @@ services: # PostgreSQL database container used as backend cicada_db: build: - context: ./cicada_db - dockerfile: Dockerfile + context: ../ + dockerfile: local-dev/cicada_db/Dockerfile container_name: cicada_db volumes: - ./cicada_db/pg_hba.conf:/var/lib/postgresql/pg_hba.conf diff --git a/local-dev/entrypoint.sh b/local-dev/entrypoint.sh index fb99d1f..b151bf7 100755 --- a/local-dev/entrypoint.sh +++ b/local-dev/entrypoint.sh @@ -45,6 +45,7 @@ pwd # Build backend database export PGPASSWORD=${DB_POSTGRES_PASS} psql -v ON_ERROR_STOP=1 "sslmode=prefer user=${DB_POSTGRES_USER} host=${DB_POSTGRES_HOST} port=${DB_POSTGRES_PORT} dbname=${DB_POSTGRES_DB}" --file=setup/schema.sql --quiet +psql -v ON_ERROR_STOP=1 "sslmode=prefer user=${DB_POSTGRES_USER} host=${DB_POSTGRES_HOST} port=${DB_POSTGRES_PORT} dbname=${DB_POSTGRES_DB}" --file=setup/create_test_tap_setup.sql --quiet # If not exists, create definitions file for cli test -f ${CICADA_HOME}/config/definitions.yml || cat > ${CICADA_HOME}/config/definitions.yml < Date: Tue, 31 Mar 2026 18:35:27 +0100 Subject: [PATCH 05/53] Add functionality for updating schedules produced by smart_schedule --- cicada/commands/smart_schedule.py | 165 ++++++++++++++++----------- cicada/lib/SmartScheduling/domain.py | 11 +- cicada/lib/SmartScheduling/pygad.py | 5 + local-dev/cicada_db/Dockerfile | 3 - 4 files changed, 111 insertions(+), 73 deletions(-) diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 30c10c1..b0c9d26 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -1,9 +1,10 @@ """Shifts the schedules on a node to distribute the load""" +from __future__ import annotations import datetime import sys -# from typing import Optional -# from croniter import croniter +from croniter import croniter +from typing import Optional from cicada.lib import postgres, utils from cicada.lib import scheduler from cicada.lib.SmartScheduling import pygad @@ -45,12 +46,7 @@ def create_tap_objects(schedule_ids, dbname=None): for schedule_id in schedule_ids: details = scheduler.get_schedule_details(db_cur, schedule_id) try: - tap = Tap( - schedule_id=details.get("schedule_id"), - server_id=details.get("server_id"), - interval_mask=details.get("interval_mask"), - db_cur=db_cur - ) + tap = Tap(details, db_cur=db_cur) # Ignore the few taps that have irregular cron expressions for now. There are few enough that this shouldn't impact the optimisation and is not worth the effort to try and support these irregular schedules in the GA if not tap.is_regular_schedule(): @@ -66,67 +62,106 @@ def create_tap_objects(schedule_ids, dbname=None): db_conn.close() return taps -# def update_schedule_cron(tap : Tap) -> str: -# """ -# Uses the start_blocks to shift the cron expression accordingly. Gene space is already limited from 0 to the frequency of the tap +def update_schedule_cron(tap : Tap) -> str: + """ + Uses the start_blocks to shift the cron expression accordingly. Gene space is already limited from 0 to the frequency of the tap -# Ex. form of cron expression: "8-59/15 * * * *" (every 15 minutes starting at minute 8 of each hour) -# """ -# frequency = tap.frequency_minutes -# shift = tap.shift + Ex. form of cron expression: "8-59/15 * * * *" (every 15 minutes starting at minute 8 of each hour) + """ + frequency = tap.frequency_minutes + shift = tap.shift -# if not shift or frequency >= 60: -# return tap # No shift needed + if not shift or shift == 0: + return tap # No shift needed -# if shift == 0: -# return tap # No shift needed -# if shift < 60: -# if frequency >= 60: -# minute = shift % 60 -# hour = shift // 60 + croniter(tap.interval_mask).get_next(datetime).hour # Get the hour of the first scheduled run and add the shift in hours -# tap.interval_mask = f"{minute} {hour} * * *" -# # Check that the new cron expression is valid -# if not croniter.is_valid(tap.interval_mask): -# raise ValueError(f"Invalid cron expression generated: {tap.interval_mask}") -# return tap -# else: -# tap.interval_mask = f"{shift}-59/{frequency} * * * *" -# # Check that the new cron expression is valid -# if not croniter.is_valid(tap.interval_mask): -# raise ValueError(f"Invalid cron expression generated: {tap.interval_mask}") -# return tap -# # Do we only want to support shifts of less than an hour? --> most schedules are on a 30 minute basis and it would simplify this function ! -# if shift >= 60: -# raise NotImplementedError("Design decision needed on how to implement schedule shifting for different frequencies and shift amounts.") + if frequency >= 60: + minute = shift % 60 + hour = shift // 60 + croniter(tap.original_interval_mask).get_next(datetime.datetime).hour # Get the hour of the first scheduled run and add the shift in hours + tap.interval_mask = f"{minute} {hour} * * *" + # Check that the new cron expression is valid + if not croniter.is_valid(tap.interval_mask): + raise ValueError(f"Invalid cron expression generated: {tap.interval_mask}") + return tap + else: + tap.interval_mask = f"{shift}-59/{frequency} * * * *" + # Check that the new cron expression is valid + if not croniter.is_valid(tap.interval_mask): + raise ValueError(f"Invalid cron expression generated: {tap.interval_mask}") + return tap -# def assign_new_schedules(optimised_taps: list[pygad.Tap], dbname=None): -# """Assign new schedules based on the optimal schedule found.""" -# db_conn = postgres.db_cicada(dbname) -# db_cur = db_conn.cursor() - -# # For each tap, update the schedule in the DB with the new interval_mask based on the shift calculated by the GA optimizer -# for tap in optimised_taps: -# tap = update_schedule_cron(tap) -# schedule_details = { -# "schedule_id": tap.schedule_id, -# "interval_mask": tap.interval_mask -# } -# scheduler.update_schedule_details(db_cur=db_cur, schedule_details=schedule_details) -# db_conn.commit() -# db_cur.close() -# db_conn.close() - -# # def rollback(tap : Optional[Tap], dbname=None): -# """ -# Rollback to original schedules in case of any issues during assignment. -# Args: tap: Optional[Tap] : the tap to rollback, if None rollback all taps to their original schedules - -# """ -# # This would require storing the original schedules before making changes, which is not currently implemented. -# # We could potentially store the original interval_masks in a separate table or in memory before updating, and then use that for rollback if needed. -# raise NotImplementedError("Rollback functionality not yet implemented") +def assign_new_schedules(optimised_taps: list[pygad.Tap], dbname=None): + """Assign new schedules based on the optimal schedule found.""" + db_conn = postgres.db_cicada(dbname) + db_cur = db_conn.cursor() + + # For each tap, update the schedule in the DB with the new interval_mask based on the shift calculated by the GA optimizer + for tap in optimised_taps: + tap = update_schedule_cron(tap) + + schedule_details = { + "adhoc_parameters": None, + "adhoc_execute": None, + "schedule_group_id": None, + "parameters": None, + "server_id": None, + "last_run_date": None, + "is_enabled": None, + "interval_mask": tap.interval_mask, + "schedule_description": None, + "auto_update_time": None, + "schedule_order": None, + "schedule_id": tap.schedule_id, + "is_async": None, + "abort_running": None, + "exec_command": None, + "first_run_date": None, + "is_running": None + } + scheduler.update_schedule_details(db_cur=db_cur, schedule_details=schedule_details) + db_conn.commit() + db_cur.close() + db_conn.close() + +def rollback(server_id : Optional[int], db_cur, dbname=None): + """ + Rollback to original schedules in case of any issues during assignment. + Args: server_id: Optional[int] : the server_id to rollback, if None rollback all servers + """ + + if not server_id: + # Recursively call rollback for each server_id if no specific server_id is provided + server_ids = find_all_server_ids(dbname) + for id in server_ids: + rollback(server_id=id[0], db_cur=db_cur, dbname=dbname) + return + + taps = get_schedules_per_server(server_id=server_id, dbname=dbname) + + for tap in taps: + schedule_details = { + "adhoc_parameters": None, + "adhoc_execute": None, + "schedule_group_id": None, + "parameters": None, + "server_id": None, + "last_run_date": None, + "is_enabled": None, + "interval_mask": tap.original_interval_mask, + "schedule_description": None, + "auto_update_time": None, + "schedule_order": None, + "schedule_id": tap.schedule_id, + "is_async": None, + "abort_running": None, + "exec_command": None, + "first_run_date": None, + "is_running": None + } + scheduler.update_schedule_details(db_cur=db_cur, schedule_details=schedule_details) + + raise NotImplementedError("Rollback functionality not yet implemented") @utils.named_exception_handler("smart_schedule") @@ -154,8 +189,8 @@ def main(server_id=None, dbname=None): ga = pygad.GAPyGADScheduler() optimised_taps, start_blocks, peak_cpu, usage = ga.solve(taps) print(f"Optimized schedule for server_id {server_id}: {[tap.schedule_id for tap in optimised_taps]} with start blocks {start_blocks}, peak CPU {peak_cpu}, and usage {usage}") - # # Add logic to actually assign the new schedules ! - # assign_new_schedules(optimised_taps, dbname=dbname) + + assign_new_schedules(optimised_taps, dbname=dbname) except Exception as e: print(f"Error during optimization for server_id {server_id}: {e}") diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index 66fd0d0..22a5a7e 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -18,12 +18,13 @@ class Tap: median_runtime_minutes: int = 5 shift: Optional[int] = 0 start_time_blocks: Optional[int] = None + - def __init__(self, schedule_id, server_id, interval_mask, db_cur): - self.schedule_id = schedule_id - self.server_id = server_id - self.interval_mask = interval_mask - self.original_interval_mask = interval_mask + def __init__(self, details, db_cur): + self.schedule_id = details['schedule_id'] + self.server_id = details['server_id'] + self.interval_mask = details['interval_mask'] + self.original_interval_mask = details['interval_mask'] self.determine_attributes(db_cur) def determine_attributes(self, db_cur): diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/pygad.py index 4f9c63f..d0e232a 100644 --- a/cicada/lib/SmartScheduling/pygad.py +++ b/cicada/lib/SmartScheduling/pygad.py @@ -99,4 +99,9 @@ def solve(self, taps: Sequence[Tap]) -> tuple[Sequence[Tap], List[int], float, n start_blocks = [int(v) for v in best_solution] peak_cpu = -float(best_fitness) usage, _ = evaluate_cpu_usage_and_peak(start_blocks, taps, self.cfg.minutes_per_block) + + # Update tap objects shift attribute based on GA solution + for i, tap in enumerate(taps): + tap.shift = start_blocks[i] * self.cfg.minutes_per_block + return taps, start_blocks, peak_cpu, usage \ No newline at end of file diff --git a/local-dev/cicada_db/Dockerfile b/local-dev/cicada_db/Dockerfile index 404a796..ed77d1f 100644 --- a/local-dev/cicada_db/Dockerfile +++ b/local-dev/cicada_db/Dockerfile @@ -10,6 +10,3 @@ RUN apk add --no-cache --virtual .debezium-build-deps gcc clang15 llvm15 git mak && cd / \ && rm -rf wal2json \ && apk del .debezium-build-deps - -COPY --chown=postgres:postgres setup/schema.sql /docker-entrypoint-initdb.d/01_schema.sql -COPY --chown=postgres:postgres setup/create_test_tap_setup.sql /docker-entrypoint-initdb.d/02_create_test_tap_setup.sql \ No newline at end of file From dcd49fa955eec6e5b06b139e44a0380eb87db419 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Thu, 2 Apr 2026 17:26:35 +0100 Subject: [PATCH 06/53] Add rollback functionality and table --- cicada/cli.py | 28 ++++++++ cicada/commands/rollback.py | 41 +++++++++++ cicada/commands/smart_schedule.py | 93 ++++++------------------- cicada/commands/upsert_schedule.py | 3 + cicada/lib/scheduler.py | 106 ++++++++++++++++++++++++++++- local-dev/entrypoint.sh | 2 +- setup/schema.sql | 49 +++++++++++++ 7 files changed, 248 insertions(+), 74 deletions(-) create mode 100644 cicada/commands/rollback.py diff --git a/cicada/cli.py b/cicada/cli.py index 896b0d2..e36d531 100644 --- a/cicada/cli.py +++ b/cicada/cli.py @@ -19,6 +19,7 @@ from cicada.commands import list_schedules from cicada.commands import delete_schedule from cicada.commands import smart_schedule +from cicada.commands import rollback @utils.named_exception_handler("Cicada") @@ -287,6 +288,33 @@ def smart_schedule(): args = parser.parse_args(sys.argv[2:]) smart_schedule.main(args.server_id) + @staticmethod + def rollback(): + """Rollback to original schedules in case of any issues during assignment.""" + parser = argparse.ArgumentParser( + allow_abbrev=False, + add_help=True, + prog=inspect.stack()[0][3], + description="Rollback to original schedules in case of any issues during assignment", + ) + parser.add_argument( + "--full", + default=False, + action="store_true", + help="If specified, will roll back to the original schedule instead of the previous schedule", + ) + group = parser.add_mutually_exclusive_group() + group.add_argument( + "--server_id", + type=str, + required=False, + help="ID of the server to rollback, if not specified will rollback all servers", + ) + group.add_argument("--schedule_id", type=str, required=False, help="ID of the schedule to rollback") + args = parser.parse_args(sys.argv[2:]) + rollback.main(args.server_id, args.schedule_id, full = args.full) + + @staticmethod def version(): """Return version of cicada package""" diff --git a/cicada/commands/rollback.py b/cicada/commands/rollback.py new file mode 100644 index 0000000..641b3e2 --- /dev/null +++ b/cicada/commands/rollback.py @@ -0,0 +1,41 @@ + +from typing import Optional +from cicada.lib import postgres, utils +from cicada.lib import scheduler + + +@utils.named_exception_handler("rollback") +def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbname=None, full=False): + """ + Roll back schedules in case of issues during assignment. + If neither server_id and schedule_id are provided, rollback applies to all servers. + + Args: + server_id: Optional[int] [Mutually exclusive with schedule_id] + Target server to roll back. + schedule_id: Optional[int] [Mutually exclusive with server_id] + Target schedule to roll back. + db_cur: Database cursor to use for the rollback operations. + dbname: Optional[str] + Database name to connect to if db_cur is not provided. If db_cur is provided, dbname is ignored. + prev: bool + If True, roll back to the previous schedule in the schedule_backups table. If False, roll back to the original schedule + """ + db_conn = postgres.db_cicada(dbname) + db_cur = db_conn.cursor() + + if not server_id and not schedule_id: + # Recursively call rollback for each server_id if no specific server_id is provided + server_ids = scheduler.get_all_server_ids(db_cur) + for id in server_ids: + main(server_id=id[0], dbname=dbname, full=full) + return + + if server_id: + scheduler.restore_previous_schedules(db_cur=db_cur, server_id=server_id, full=full) + else: + scheduler.restore_previous_schedules(db_cur=db_cur, schedule_id=schedule_id, full=full) + + + db_cur.close() + db_conn.close() \ No newline at end of file diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index b0c9d26..26b84cb 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -11,35 +11,20 @@ from cicada.lib.SmartScheduling.domain import Tap -def get_schedules_per_server(server_id, dbname=None): +def get_schedules_per_server(server_id, db_cur=None): """Get all schedules for a given server_id.""" - db_conn = postgres.db_cicada(dbname) - db_cur = db_conn.cursor() schedule_ids = [row[0] for row in scheduler.get_all_schedule_ids_per_server(db_cur, server_id)] - db_cur.close() - db_conn.close() if not schedule_ids: print(f"No schedules found for server_id {server_id}") - return [] - return schedule_ids + return schedule_ids -def find_all_server_ids(dbname=None): - """Find all server_ids in the system.""" - db_conn = postgres.db_cicada(dbname) - db_cur = db_conn.cursor() - server_ids = scheduler.get_all_server_ids(db_cur) - db_cur.close() - db_conn.close() - return server_ids -def create_tap_objects(schedule_ids, dbname=None): +def create_tap_objects(schedule_ids, db_cur): """Create Tap objects from schedule_ids.""" - db_conn = postgres.db_cicada(dbname) - db_cur = db_conn.cursor() taps : list[Tap] = [] # Fetch details for each schedule and convert to Tap objects @@ -58,8 +43,6 @@ def create_tap_objects(schedule_ids, dbname=None): except Exception as e: print(f"Skipping schedule {schedule_id} due to error: {e}") - db_cur.close() - db_conn.close() return taps def update_schedule_cron(tap : Tap) -> str: @@ -76,7 +59,7 @@ def update_schedule_cron(tap : Tap) -> str: if frequency >= 60: minute = shift % 60 - hour = shift // 60 + croniter(tap.original_interval_mask).get_next(datetime.datetime).hour # Get the hour of the first scheduled run and add the shift in hours + hour = shift // 60 + croniter(tap.interval_mask).get_next(datetime.datetime).hour # Get the hour of the first scheduled run and add the shift in hours tap.interval_mask = f"{minute} {hour} * * *" # Check that the new cron expression is valid if not croniter.is_valid(tap.interval_mask): @@ -88,13 +71,10 @@ def update_schedule_cron(tap : Tap) -> str: if not croniter.is_valid(tap.interval_mask): raise ValueError(f"Invalid cron expression generated: {tap.interval_mask}") return tap - - -def assign_new_schedules(optimised_taps: list[pygad.Tap], dbname=None): + +def assign_new_schedules(optimised_taps: list[pygad.Tap], db_cur): """Assign new schedules based on the optimal schedule found.""" - db_conn = postgres.db_cicada(dbname) - db_cur = db_conn.cursor() # For each tap, update the schedule in the DB with the new interval_mask based on the shift calculated by the GA optimizer for tap in optimised_taps: @@ -120,66 +100,35 @@ def assign_new_schedules(optimised_taps: list[pygad.Tap], dbname=None): "is_running": None } scheduler.update_schedule_details(db_cur=db_cur, schedule_details=schedule_details) - db_conn.commit() - db_cur.close() - db_conn.close() - -def rollback(server_id : Optional[int], db_cur, dbname=None): - """ - Rollback to original schedules in case of any issues during assignment. - Args: server_id: Optional[int] : the server_id to rollback, if None rollback all servers - """ - - if not server_id: - # Recursively call rollback for each server_id if no specific server_id is provided - server_ids = find_all_server_ids(dbname) - for id in server_ids: - rollback(server_id=id[0], db_cur=db_cur, dbname=dbname) - return - taps = get_schedules_per_server(server_id=server_id, dbname=dbname) - - for tap in taps: - schedule_details = { - "adhoc_parameters": None, - "adhoc_execute": None, - "schedule_group_id": None, - "parameters": None, - "server_id": None, - "last_run_date": None, - "is_enabled": None, - "interval_mask": tap.original_interval_mask, - "schedule_description": None, - "auto_update_time": None, - "schedule_order": None, + previous_schedule_details = { "schedule_id": tap.schedule_id, - "is_async": None, - "abort_running": None, - "exec_command": None, - "first_run_date": None, - "is_running": None - } - scheduler.update_schedule_details(db_cur=db_cur, schedule_details=schedule_details) - - raise NotImplementedError("Rollback functionality not yet implemented") + "server_id": tap.server_id, + "previous_interval_mask": tap.original_interval_mask, + "interval_mask": tap.interval_mask, + "start_time_shift_mins": tap.shift or 0 + } + scheduler.update_schedule_backups(db_cur, previous_schedule_details) @utils.named_exception_handler("smart_schedule") def main(server_id=None, dbname=None): + db_conn = postgres.db_cicada(dbname) + db_cur = db_conn.cursor() if not server_id: # Recursively call main for each server_id if no specific server_id is provided - server_ids = find_all_server_ids(dbname) + server_ids = scheduler.get_all_server_ids(db_cur) for id in server_ids: main(server_id=id[0], dbname=dbname) return # Get schedules for the server_id - schedule_ids = get_schedules_per_server(server_id=server_id, dbname=dbname) + schedule_ids = get_schedules_per_server(server_id=server_id, db_cur=db_cur) print(f"Found schedules for server_id {server_id}: {schedule_ids}") # Build Tap objects - taps = create_tap_objects(schedule_ids, dbname=dbname) + taps = create_tap_objects(schedule_ids, db_cur=db_cur) if not taps: print("No valid schedules found to optimize.") sys.exit(1) @@ -188,11 +137,13 @@ def main(server_id=None, dbname=None): try: ga = pygad.GAPyGADScheduler() optimised_taps, start_blocks, peak_cpu, usage = ga.solve(taps) - print(f"Optimized schedule for server_id {server_id}: {[tap.schedule_id for tap in optimised_taps]} with start blocks {start_blocks}, peak CPU {peak_cpu}, and usage {usage}") + print(f"Optimized schedule for server_id {server_id}: new peak CPU {peak_cpu}") - assign_new_schedules(optimised_taps, dbname=dbname) + assign_new_schedules(optimised_taps, db_cur=db_cur) except Exception as e: print(f"Error during optimization for server_id {server_id}: {e}") sys.exit(1) + db_cur.close() + db_conn.close() \ No newline at end of file diff --git a/cicada/commands/upsert_schedule.py b/cicada/commands/upsert_schedule.py index 919644c..ff08fa0 100644 --- a/cicada/commands/upsert_schedule.py +++ b/cicada/commands/upsert_schedule.py @@ -79,6 +79,9 @@ def main(schedule_details, dbname=None): new_schedule_details["schedule_group_id"] = schedule_details["schedule_group_id"] scheduler.update_schedule_details(db_cur, new_schedule_details) + + scheduler.reset_schedule_backup_mask(db_cur, new_schedule_details) + print("Updated schedule details in the schedule_backups table for potential rollback.") print(tabulate(new_schedule_details.items(), ["Detail", "Value"], tablefmt="psql")) db_cur.close() diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index 86fff05..a37ca0e 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -382,7 +382,13 @@ def get_all_server_ids(db_cur): def get_all_schedule_ids_per_server(db_cur, server_id): """Get all possible schedule_ids for each server from the schedules table""" - sqlquery = f"SELECT DISTINCT schedule_id FROM schedules WHERE server_id = '{server_id}' and schedule_description not like '%==%' ORDER BY schedule_id" + sqlquery = f""" + SELECT DISTINCT schedule_id + FROM schedules + WHERE server_id = '{server_id}' + AND (schedule_description IS NULL OR schedule_description NOT LIKE '%==%') + ORDER BY schedule_id + """ db_cur.execute(sqlquery) schedule_ids = db_cur.fetchall() @@ -405,4 +411,100 @@ def get_median_run_time(db_cur, schedule_id): return average_runtime_minutes except Exception: print(f"ERROR : No runs associated with the schedule_id {schedule_id}") - sys.exit(1) \ No newline at end of file + sys.exit(1) + +def reset_schedule_backup_mask(db_cur, schedule_details): + """ + Update the interval_mask of a schedule in the schedule_backups table. + Called when schedule frequency is changed or a new schedule is added. + """ + sqlquery = f""" + MERGE INTO public.schedule_backups + USING (SELECT '{schedule_details["schedule_id"]}' AS schedule_id) AS src + ON schedule_backups.schedule_id = src.schedule_id + WHEN MATCHED THEN + UPDATE SET + interval_mask = '{schedule_details["interval_mask"]}', + original_interval_mask = '{schedule_details["interval_mask"]}', + previous_interval_mask = '{schedule_details["interval_mask"]}' + """ + (f", server_id = {schedule_details['server_id']}" if schedule_details["server_id"] is not None else "") + f""" + WHEN NOT MATCHED THEN + INSERT (schedule_id, interval_mask, original_interval_mask, previous_interval_mask) + VALUES ('{schedule_details["schedule_id"]}', '{schedule_details["interval_mask"]}', '{schedule_details["interval_mask"]}', '{schedule_details["interval_mask"]}') + """ + db_cur.execute(sqlquery) + + +def update_schedule_backups(db_cur, previous_schedule_details): + """Insert a schedule configuration into the schedule_backups table for rollback.""" + + sqlquery = f""" + INSERT INTO schedule_backups (schedule_id, server_id, interval_mask, previous_interval_mask, start_time_shift_mins, original_interval_mask) + VALUES ( + '{previous_schedule_details["schedule_id"]}', + '{previous_schedule_details["server_id"]}', + '{previous_schedule_details["interval_mask"]}', + '{previous_schedule_details["previous_interval_mask"]}', + '{previous_schedule_details["start_time_shift_mins"]}', + '{previous_schedule_details["previous_interval_mask"]}') -- Assuming original_interval_mask is the same as previous_interval_mask on the first insert + ON CONFLICT (schedule_id) DO UPDATE SET + interval_mask = EXCLUDED.interval_mask, + previous_interval_mask = EXCLUDED.previous_interval_mask, + start_time_shift_mins = EXCLUDED.start_time_shift_mins; + """ + db_cur.execute(sqlquery) + + +def rollback_schedule_backup_mask(db_cur, schedule_id=None, server_id=None): + """ + Sets the interval_masks back to the schedule_backups table to the original_interval_mask. + """ + sqlquery = f""" + UPDATE public.schedule_backups + SET interval_mask = original_interval_mask, + previous_interval_mask = original_interval_mask, + server_id = server_id + WHERE schedule_id = '{schedule_id}' or server_id = '{server_id}' + """ + db_cur.execute(sqlquery) + + +def restore_previous_schedules(db_cur, server_id=None, schedule_id=None, full=False): + """ + Restore schedules from the last-known rollback snapshot or the original schedule. + Args: + db_cur: Database cursor. + server_id: Optional[int] Target server to roll back. + schedule_id: Optional[str] Target schedule to roll back. + prev: bool If True, restore from previous_interval_mask, else restore from original_interval_mask. + """ + if server_id is None and not schedule_id: + raise ValueError("Either server_id or schedule_id must be provided") + + if server_id and schedule_id: + raise ValueError("server_id and schedule_id cannot both be provided") + + sqlquery = """ + UPDATE schedules AS s + SET + interval_mask = ps. + """ + ("previous_interval_mask" if not full else "original_interval_mask") + """ + FROM schedule_backups AS ps + WHERE s.schedule_id = ps.schedule_id + """ + + params = [] + if server_id is not None: + sqlquery = sqlquery + " AND ps.server_id = %s" + params.append(server_id) + + if schedule_id is not None: + sqlquery = sqlquery + " AND ps.schedule_id = %s" + params.append(schedule_id) + + db_cur.execute(sqlquery, tuple(params)) + + # Rewrite the schedule_backups table to remove the rolled back schedules + rollback_schedule_backup_mask(db_cur, schedule_id, server_id) + + diff --git a/local-dev/entrypoint.sh b/local-dev/entrypoint.sh index b151bf7..abe3c79 100755 --- a/local-dev/entrypoint.sh +++ b/local-dev/entrypoint.sh @@ -45,7 +45,6 @@ pwd # Build backend database export PGPASSWORD=${DB_POSTGRES_PASS} psql -v ON_ERROR_STOP=1 "sslmode=prefer user=${DB_POSTGRES_USER} host=${DB_POSTGRES_HOST} port=${DB_POSTGRES_PORT} dbname=${DB_POSTGRES_DB}" --file=setup/schema.sql --quiet -psql -v ON_ERROR_STOP=1 "sslmode=prefer user=${DB_POSTGRES_USER} host=${DB_POSTGRES_HOST} port=${DB_POSTGRES_PORT} dbname=${DB_POSTGRES_DB}" --file=setup/create_test_tap_setup.sql --quiet # If not exists, create definitions file for cli test -f ${CICADA_HOME}/config/definitions.yml || cat > ${CICADA_HOME}/config/definitions.yml < Date: Fri, 3 Apr 2026 11:04:13 +0100 Subject: [PATCH 07/53] Start GA from last checkpoint instead of restarting each time --- cicada/commands/smart_schedule.py | 14 ++++++++++---- cicada/lib/SmartScheduling/domain.py | 8 -------- cicada/lib/SmartScheduling/pygad.py | 5 +++-- cicada/lib/scheduler.py | 13 +++++++++++-- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 26b84cb..eb7cbd1 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -27,17 +27,20 @@ def create_tap_objects(schedule_ids, db_cur): taps : list[Tap] = [] + # Get information from schedule_backups table + schedule_backups = scheduler.get_all_schedule_backups(db_cur) + # Fetch details for each schedule and convert to Tap objects for schedule_id in schedule_ids: details = scheduler.get_schedule_details(db_cur, schedule_id) try: tap = Tap(details, db_cur=db_cur) - # Ignore the few taps that have irregular cron expressions for now. There are few enough that this shouldn't impact the optimisation and is not worth the effort to try and support these irregular schedules in the GA if not tap.is_regular_schedule(): raise ValueError(f"Skipping irregular cron expression: {tap.interval_mask}") else: - tap.determine_attributes(db_cur) + tap.start_time_blocks = [backup[1] for backup in schedule_backups if backup[0] == schedule_id][0] + tap.original_interval_mask = [backup[2] for backup in schedule_backups if backup[0] == schedule_id][0] taps.append(tap) except Exception as e: @@ -136,10 +139,13 @@ def main(server_id=None, dbname=None): # Run GA optimizer ---> add in way to change GAConfig parameters ! try: ga = pygad.GAPyGADScheduler() - optimised_taps, start_blocks, peak_cpu, usage = ga.solve(taps) + optimised_taps, start_blocks, peak_cpu, usage, initial_fitness = ga.solve(taps) print(f"Optimized schedule for server_id {server_id}: new peak CPU {peak_cpu}") - assign_new_schedules(optimised_taps, db_cur=db_cur) + if peak_cpu < initial_fitness: # Only update schedules if we have found an improvement + assign_new_schedules(optimised_taps, db_cur=db_cur) + else: + print(f"No improvement found for server_id {server_id}. Current peak CPU: {initial_fitness}, Optimized peak CPU: {peak_cpu}. No schedule updates will be made.") except Exception as e: print(f"Error during optimization for server_id {server_id}: {e}") diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index 22a5a7e..4e1b225 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -64,11 +64,3 @@ def is_regular_schedule(self): return freq1 == freq2 == freq3 == freq4 except (ValueError, KeyError): return False - - -# @dataclass -# class Schedule: -# start_blocks: List[int] = None -# usage: np.ndarray -# peak_cpu: float -# taps: List[Tap] \ No newline at end of file diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/pygad.py index d0e232a..b42365f 100644 --- a/cicada/lib/SmartScheduling/pygad.py +++ b/cicada/lib/SmartScheduling/pygad.py @@ -74,7 +74,8 @@ def solve(self, taps: Sequence[Tap]) -> tuple[Sequence[Tap], List[int], float, n self.taps = taps initial_population = self._initial_population(taps, gene_space) - print("Initial population fitness (max_cpu load):", -self.fitness_fn(None, initial_population[0], 0)) + initial_fitness = self.fitness_fn(None, initial_population[0], 0) + print("Initial population fitness (max_cpu load):", -initial_fitness) ga = pygad.GA( num_generations=self.cfg.num_generations, @@ -104,4 +105,4 @@ def solve(self, taps: Sequence[Tap]) -> tuple[Sequence[Tap], List[int], float, n for i, tap in enumerate(taps): tap.shift = start_blocks[i] * self.cfg.minutes_per_block - return taps, start_blocks, peak_cpu, usage \ No newline at end of file + return taps, start_blocks, peak_cpu, usage, -initial_fitness \ No newline at end of file diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index a37ca0e..202a8de 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -394,6 +394,14 @@ def get_all_schedule_ids_per_server(db_cur, server_id): return schedule_ids +def get_all_schedule_backups(db_cur): + """Get all entries from the schedule_backups table""" + sqlquery = "SELECT schedule_id, start_time_shift_mins, original_interval_mask FROM schedule_backups" + db_cur.execute(sqlquery) + schedule_backups = db_cur.fetchall() + + return schedule_backups + def get_median_run_time(db_cur, schedule_id): sqlquery = f""" @@ -415,8 +423,8 @@ def get_median_run_time(db_cur, schedule_id): def reset_schedule_backup_mask(db_cur, schedule_details): """ - Update the interval_mask of a schedule in the schedule_backups table. - Called when schedule frequency is changed or a new schedule is added. + Resets the interval_mask of a schedule in the schedule_backups table. Called when schedule frequency is changed or a new schedule is added. + Sets all interval_mask fiedls to the new interval_mask to ensure that the rollback won't revert to an outdated frequency. """ sqlquery = f""" MERGE INTO public.schedule_backups @@ -463,6 +471,7 @@ def rollback_schedule_backup_mask(db_cur, schedule_id=None, server_id=None): UPDATE public.schedule_backups SET interval_mask = original_interval_mask, previous_interval_mask = original_interval_mask, + start_time_shift_mins = 0, server_id = server_id WHERE schedule_id = '{schedule_id}' or server_id = '{server_id}' """ From 03d5e1202f20b64302857497f163b725583c7769 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Fri, 3 Apr 2026 16:34:28 +0100 Subject: [PATCH 08/53] Allow GAConfig parameters to be passed in and change logic to how start_blocks are handled --- cicada/cli.py | 27 ++++++- cicada/commands/smart_schedule.py | 94 ++++++++++++++---------- cicada/lib/SmartScheduling/domain.py | 30 +++++++- cicada/lib/SmartScheduling/evaluation.py | 15 +++- cicada/lib/SmartScheduling/pygad.py | 47 ++++++++---- 5 files changed, 149 insertions(+), 64 deletions(-) diff --git a/cicada/cli.py b/cicada/cli.py index e36d531..cd91817 100644 --- a/cicada/cli.py +++ b/cicada/cli.py @@ -285,8 +285,33 @@ def smart_schedule(): description="Generate smart schedules for a server using genetic algorithm", ) parser.add_argument("--server_id", type=str, required=False, help="ID of the server") + + # Optional GA Configurations + ga_config = parser.add_argument_group("ga_config", "Optional configurations for the genetic algorithm optimizer") + ga_config.add_argument("--minutes_per_block", type=int, required=False, help="Minutes per block for the genetic algorithm") + ga_config.add_argument("--num_generations",type=int,required=False, help="Number of generations for the genetic algorithm") + ga_config.add_argument("--sol_per_pop",type=int,required=False, help="Number of solutions per population for the genetic algorithm") + ga_config.add_argument("--num_parents_mating",type=int,required=False, help="Number of parents mating for the genetic algorithm") + ga_config.add_argument("--mutation_percent_genes",type=int,required=False, help="Mutation percentage of genes for the genetic algorithm") + ga_config.add_argument("--parent_selection_type",type=str,required=False, help="Parent selection type for the genetic algorithm. Allowed values: ['sss', 'rws', 'sus', 'tournament', 'rank', 'random']") + ga_config.add_argument("--crossover_type",type=str,required=False, help="Crossover type for the genetic algorithm. Allowed values: ['single_point', 'two_point', 'uniform']") + ga_config.add_argument("--mutation_type",type=str,required=False, help="Mutation type for the genetic algorithm. Allowed values: ['random', 'swap', 'inversion', 'scramble']") + ga_config.add_argument("--keep_elitism",type=int,required=False, help="Number of elite solutions to keep for the next generation") args = parser.parse_args(sys.argv[2:]) - smart_schedule.main(args.server_id) + smart_schedule.main( + args.server_id, + ga_config={ + "minutes_per_block": args.minutes_per_block, + "num_generations": args.num_generations, + "sol_per_pop": args.sol_per_pop, + "num_parents_mating": args.num_parents_mating, + "mutation_percent_genes": args.mutation_percent_genes, + "parent_selection_type": args.parent_selection_type, + "crossover_type": args.crossover_type, + "mutation_type": args.mutation_type, + "keep_elitism": args.keep_elitism, + }, + ) @staticmethod def rollback(): diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index eb7cbd1..937313e 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -17,6 +17,7 @@ def get_schedules_per_server(server_id, db_cur=None): if not schedule_ids: print(f"No schedules found for server_id {server_id}") + sys.exit(1) return schedule_ids @@ -27,20 +28,22 @@ def create_tap_objects(schedule_ids, db_cur): taps : list[Tap] = [] - # Get information from schedule_backups table - schedule_backups = scheduler.get_all_schedule_backups(db_cur) - # Fetch details for each schedule and convert to Tap objects for schedule_id in schedule_ids: details = scheduler.get_schedule_details(db_cur, schedule_id) try: tap = Tap(details, db_cur=db_cur) # Ignore the few taps that have irregular cron expressions for now. There are few enough that this shouldn't impact the optimisation and is not worth the effort to try and support these irregular schedules in the GA - if not tap.is_regular_schedule(): - raise ValueError(f"Skipping irregular cron expression: {tap.interval_mask}") + if tap.is_unsupported(): + if tap.is_blacklisted(): + print(f"Skipping blacklisted schedule {tap.schedule_id} with cron expression {tap.interval_mask}") + elif not tap.is_regular_schedule(): + print(f"Skipping irregular schedule {tap.schedule_id} with cron expression {tap.interval_mask}") + else: + print(f"Skipping schedule {tap.schedule_id} with frequency {tap.frequency_minutes} minutes as shifting for these taps is unsupported currently") + else: - tap.start_time_blocks = [backup[1] for backup in schedule_backups if backup[0] == schedule_id][0] - tap.original_interval_mask = [backup[2] for backup in schedule_backups if backup[0] == schedule_id][0] + tap._determine_start_time_mins() taps.append(tap) except Exception as e: @@ -57,23 +60,32 @@ def update_schedule_cron(tap : Tap) -> str: frequency = tap.frequency_minutes shift = tap.shift - if not shift or shift == 0: + if not shift: return tap # No shift needed - if frequency >= 60: - minute = shift % 60 - hour = shift // 60 + croniter(tap.interval_mask).get_next(datetime.datetime).hour # Get the hour of the first scheduled run and add the shift in hours + if frequency == 1440: # For daily taps, we can shift within the hour + hour = shift // 60 + minute = (shift - hour * 60) % 60 tap.interval_mask = f"{minute} {hour} * * *" # Check that the new cron expression is valid if not croniter.is_valid(tap.interval_mask): - raise ValueError(f"Invalid cron expression generated: {tap.interval_mask}") + raise ValueError(f"Invalid cron expression generated: {tap.interval_mask} for tap {tap.schedule_id}") return tap - else: + elif frequency == 60: # For hourly taps, we can shift within the hour + minute = shift % 60 + tap.interval_mask = f"{minute} * * * *" + # Check that the new cron expression is valid + if not croniter.is_valid(tap.interval_mask): + raise ValueError(f"Invalid cron expression generated: {tap.interval_mask} for tap {tap.schedule_id}") + return tap + elif frequency < 60: + assert shift < frequency, f"Shift {shift} cannot be greater than or equal to frequency {frequency} for tap {tap.schedule_id}" tap.interval_mask = f"{shift}-59/{frequency} * * * *" # Check that the new cron expression is valid if not croniter.is_valid(tap.interval_mask): - raise ValueError(f"Invalid cron expression generated: {tap.interval_mask}") + raise ValueError(f"Invalid cron expression generated: {tap.interval_mask} for tap {tap.schedule_id}") return tap + def assign_new_schedules(optimised_taps: list[pygad.Tap], db_cur): @@ -81,7 +93,10 @@ def assign_new_schedules(optimised_taps: list[pygad.Tap], db_cur): # For each tap, update the schedule in the DB with the new interval_mask based on the shift calculated by the GA optimizer for tap in optimised_taps: + previous_schedule_mask = tap.interval_mask tap = update_schedule_cron(tap) + print(f"Updating schedule {tap.schedule_id} with new interval mask: {tap.interval_mask} and shift of {tap.shift} minutes") + tap._determine_start_time_mins() schedule_details = { "adhoc_parameters": None, @@ -107,15 +122,15 @@ def assign_new_schedules(optimised_taps: list[pygad.Tap], db_cur): previous_schedule_details = { "schedule_id": tap.schedule_id, "server_id": tap.server_id, - "previous_interval_mask": tap.original_interval_mask, + "previous_interval_mask": previous_schedule_mask, "interval_mask": tap.interval_mask, - "start_time_shift_mins": tap.shift or 0 + "start_time_shift_mins": tap.start_time_mins } scheduler.update_schedule_backups(db_cur, previous_schedule_details) @utils.named_exception_handler("smart_schedule") -def main(server_id=None, dbname=None): +def main(server_id=None, dbname=None, ga_config=None): db_conn = postgres.db_cicada(dbname) db_cur = db_conn.cursor() @@ -124,32 +139,31 @@ def main(server_id=None, dbname=None): server_ids = scheduler.get_all_server_ids(db_cur) for id in server_ids: main(server_id=id[0], dbname=dbname) - return - - # Get schedules for the server_id - schedule_ids = get_schedules_per_server(server_id=server_id, db_cur=db_cur) - print(f"Found schedules for server_id {server_id}: {schedule_ids}") - - # Build Tap objects - taps = create_tap_objects(schedule_ids, db_cur=db_cur) - if not taps: - print("No valid schedules found to optimize.") - sys.exit(1) + + else: + # Get schedules for the server_id + schedule_ids = get_schedules_per_server(server_id=server_id, db_cur=db_cur) + print(f"Found {len(schedule_ids)} schedules for server_id {server_id}") + + # Build Tap objects + taps = create_tap_objects(schedule_ids, db_cur=db_cur) + if not taps: + print("No valid schedules found to optimize.") + sys.exit(1) - # Run GA optimizer ---> add in way to change GAConfig parameters ! - try: - ga = pygad.GAPyGADScheduler() - optimised_taps, start_blocks, peak_cpu, usage, initial_fitness = ga.solve(taps) - print(f"Optimized schedule for server_id {server_id}: new peak CPU {peak_cpu}") + try: + ga = pygad.GAPyGADScheduler(config=ga_config) + optimised_taps, start_blocks, peak_cpu, usage, initial_fitness = ga.solve(taps) + print(f"Optimized schedule for server_id {server_id}: new peak CPU {peak_cpu}") - if peak_cpu < initial_fitness: # Only update schedules if we have found an improvement - assign_new_schedules(optimised_taps, db_cur=db_cur) - else: - print(f"No improvement found for server_id {server_id}. Current peak CPU: {initial_fitness}, Optimized peak CPU: {peak_cpu}. No schedule updates will be made.") + if peak_cpu < initial_fitness: # Only update schedules if we have found an improvement + assign_new_schedules(optimised_taps, db_cur=db_cur) + else: + print(f"No improvement found for server_id {server_id}. Current peak CPU: {initial_fitness}, Optimized peak CPU: {peak_cpu}. No schedule updates will be made.") - except Exception as e: - print(f"Error during optimization for server_id {server_id}: {e}") - sys.exit(1) + except Exception as e: + print(f"Error during optimization for server_id {server_id}: {e}") + sys.exit(1) db_cur.close() db_conn.close() \ No newline at end of file diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index 4e1b225..819265f 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -11,20 +11,18 @@ class Tap: schedule_id: int server_id: int - original_interval_mask: str interval_mask: str frequency_minutes: int cpu_max: float = 1 median_runtime_minutes: int = 5 shift: Optional[int] = 0 - start_time_blocks: Optional[int] = None + start_time_mins: Optional[int] = None def __init__(self, details, db_cur): self.schedule_id = details['schedule_id'] self.server_id = details['server_id'] self.interval_mask = details['interval_mask'] - self.original_interval_mask = details['interval_mask'] self.determine_attributes(db_cur) def determine_attributes(self, db_cur): @@ -38,7 +36,6 @@ def _determine_frequency(self): first_iter = schedule.get_next(datetime.datetime) second_iter = schedule.get_next(datetime.datetime) frequency = (second_iter - first_iter).total_seconds() / 60 - self.frequency_minutes = int(frequency) @@ -48,6 +45,31 @@ def _get_average_runtime(self, db_cur): self.median_runtime_minutes = 5 # self.median_runtime_minutes = get_median_run_time(db_cur, self.schedule_id) + def _determine_start_time_mins(self): + """Determine the start time in minutes from midnight from the interval_mask""" + + today = datetime.datetime.now().date() + midnight = datetime.datetime.combine(today, datetime.time.min) + + it = croniter(self.interval_mask, midnight) + if croniter.match(self.interval_mask, midnight): + first_iter = midnight + self.start_time_mins = 0 + else: + first_iter = it.get_next(datetime.datetime) + self.start_time_mins = first_iter.hour * 60 + first_iter.minute + + def is_blacklisted(self): + """Determine if the tap is blacklisted based on schedule_id""" + return False + # Change implementation to check against blacklist in DB once blacklist functionality is implemented + # Blacklist shouldn't be stored in GA and instead be in db + return self.schedule_id in self.cfg.blacklist_schedule_ids + + def is_unsupported(self): + """Determine if the tap is unsupported for smart scheduling based on frequency or if it's blacklisted""" + return ((self.frequency_minutes != 1440 and self.frequency_minutes > 60) or self.is_blacklisted() or not self.is_regular_schedule()) + def is_regular_schedule(self): """Check if the cron expression is a regular schedule that can be optimized by the GA """ try: diff --git a/cicada/lib/SmartScheduling/evaluation.py b/cicada/lib/SmartScheduling/evaluation.py index 05ac8c8..c07be33 100644 --- a/cicada/lib/SmartScheduling/evaluation.py +++ b/cicada/lib/SmartScheduling/evaluation.py @@ -52,17 +52,24 @@ def evaluate_cpu_usage_and_peak(start_blocks: Sequence[int], taps: Sequence[Tap] blocks_per_day = calculate_blocks_per_day(minutes_per_block) freq_blocks, run_blocks = discretize_taps(taps, minutes_per_block) diff = np.zeros(blocks_per_day + 1, dtype=float) + for i, tap in enumerate(taps): - s0 = int(start_blocks[i]) freq = freq_blocks[i] run_len = run_blocks[i] cpu = float(tap.cpu_max) - block = s0 + block = int(start_blocks[i]) + + # Iterate through the day in increments of the tap's frequency, adding the tap's CPU usage to the diff array for the duration + # of its runtime. We use a diff array to efficiently calculate the cumulative CPU usage at each time block. Instead of + # appending the CPU usage for each block the tap runs in, we add the CPU usage at the start block and subtract it at the end block. while block < blocks_per_day: end = min(block + run_len, blocks_per_day) diff[block] += cpu diff[end] -= cpu block += freq - usage = np.cumsum(diff[:-1]) - peak = float(np.max(usage)) if usage.size else 0.0 + + # Sums up everything in the diff array to get the total CPU usage at each time block, and finds the peak usage. + # Ignore the last element of the diff array since it's just a placeholder to handle the end block subtraction for taps that run until the end of the day. + usage = np.cumsum(diff[:-1]) + peak = float(np.max(usage)) if usage.size else 0.0 return usage, peak \ No newline at end of file diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/pygad.py index b42365f..8183976 100644 --- a/cicada/lib/SmartScheduling/pygad.py +++ b/cicada/lib/SmartScheduling/pygad.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import List, Optional, Sequence +from typing import List, Mapping, Optional, Sequence import numpy as np from .config import GAConfig from .domain import Tap @@ -7,8 +7,6 @@ import pygad -# """Blacklist functionality - to be added later""" - class GAPyGADScheduler: """ Genetic Algorithm Scheduler using PyGAD @@ -25,34 +23,53 @@ class GAPyGADScheduler: We cap the max shift of a tap to within the hour to prevent large shifts for taps that run daily. """ - def __init__(self, config: Optional[GAConfig] = None): - self.cfg = config or GAConfig() + def __init__(self, config: Optional[Mapping[str, object]] = None): + if config is None: + self.cfg = GAConfig() + else: + filtered_config = {key: value for key, value in config.items() if value is not None} + self.cfg = GAConfig(**filtered_config) def _gene_space(self, taps: Sequence[Tap]) -> List[List[int]]: # Build gene_space per tap: each gene space is limited by it's frequency (e.g. a 15min freq tap can only traverse the first 15min worth of time blocks) - # Computed in blocks to make it time-block-interval agnostic. we don't want to have to rewrite all the start_times if we e.g. decide to change the scheduling interval - freq_blocks, _ = discretize_taps(taps, self.cfg.minutes_per_block) - + # Unless the tap is unsupported (either blacklisted, irregular or has frequency greater than 60 mins) in which case we set the gene space to be just 0 + # so they remain unchanged in the GA but are still included in the fitness evaluation. Also constrain taps with frequency > 60 mins to an hour to prevent + # large shifts and huge gene spaces. + # Computed in blocks to make it time-block-interval agnostic + interval_blocks, _ = discretize_taps(taps, self.cfg.minutes_per_block) + start_blocks = [0] * len(taps) + end_blocks = [1] * len(taps) + blocks_per_day = calculate_blocks_per_day(self.cfg.minutes_per_block) for i, tap in enumerate(taps): # Ignore any blacklist taps -> fix the gene space to be 0 so they're still included in the fitness eval - if tap.schedule_id in self.cfg.blacklist_schedule_ids: - freq_blocks[i] = 1 + if tap.is_unsupported(): + pass # Limit gene space to only shift within the hour for the taps which run less frequently - if tap.frequency_minutes >= 60: - freq_blocks[i] = 59 // self.cfg.minutes_per_block - return [list(range(fb)) for fb in freq_blocks] + elif tap.frequency_minutes >= 60: + interval_blocks[i] = 59 // self.cfg.minutes_per_block + # Prevent any end blocks from going beyond the day limit + end_blocks[i] = min(tap.start_time_mins // self.cfg.minutes_per_block + interval_blocks[i], blocks_per_day) + start_blocks[i] = end_blocks[i] - interval_blocks[i] + + # Gene space for the rest is just the frequency + else: + start_blocks[i] = 0 + end_blocks[i] = interval_blocks[i] + + return [list(range(start_block, end_block)) for start_block, end_block in zip(start_blocks, end_blocks)] + def _initial_population(self, taps: Sequence[Tap], gene_space: List[List[int]]) -> np.ndarray: rng = np.random.default_rng(self.cfg.random_seed) seed = [] - # Add current start blocks as first solution to bias solution space towaards current solution + # Add current start minutes as first solution to bias solution space towards current solution for i, tap in enumerate(taps): gs = gene_space[i] - s = 0 if tap.start_time_blocks is None else int(tap.start_time_blocks) + s = 0 if tap.start_time_mins is None else int(tap.start_time_mins // self.cfg.minutes_per_block) seed.append(max(min(s, gs[-1]), gs[0])) pop = [seed] From aaea7174d2386d7133b615f68474c17be05f3957 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Tue, 7 Apr 2026 11:10:31 +0100 Subject: [PATCH 09/53] Add tests --- cicada/commands/smart_schedule.py | 18 +- cicada/lib/SmartScheduling/domain.py | 9 +- cicada/lib/SmartScheduling/evaluation.py | 2 + cicada/lib/SmartScheduling/pygad.py | 2 +- cicada/lib/scheduler.py | 8 +- tests/test_smart_scheduling.py | 689 +++++++++++++++++++++++ 6 files changed, 718 insertions(+), 10 deletions(-) create mode 100644 tests/test_smart_scheduling.py diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 937313e..ae08090 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -43,7 +43,6 @@ def create_tap_objects(schedule_ids, db_cur): print(f"Skipping schedule {tap.schedule_id} with frequency {tap.frequency_minutes} minutes as shifting for these taps is unsupported currently") else: - tap._determine_start_time_mins() taps.append(tap) except Exception as e: @@ -51,17 +50,22 @@ def create_tap_objects(schedule_ids, db_cur): return taps -def update_schedule_cron(tap : Tap) -> str: +def update_schedule_cron(tap : Tap): """ Uses the start_blocks to shift the cron expression accordingly. Gene space is already limited from 0 to the frequency of the tap Ex. form of cron expression: "8-59/15 * * * *" (every 15 minutes starting at minute 8 of each hour) + + Args: + tap (Tap): Tap object with updated shift attribute based on GA solution + Returns: + Updated tap object with new interval_mask based on the shift calculated by the GA optimizer """ frequency = tap.frequency_minutes shift = tap.shift if not shift: - return tap # No shift needed + return # No shift needed if frequency == 1440: # For daily taps, we can shift within the hour hour = shift // 60 @@ -70,21 +74,21 @@ def update_schedule_cron(tap : Tap) -> str: # Check that the new cron expression is valid if not croniter.is_valid(tap.interval_mask): raise ValueError(f"Invalid cron expression generated: {tap.interval_mask} for tap {tap.schedule_id}") - return tap + return elif frequency == 60: # For hourly taps, we can shift within the hour minute = shift % 60 tap.interval_mask = f"{minute} * * * *" # Check that the new cron expression is valid if not croniter.is_valid(tap.interval_mask): raise ValueError(f"Invalid cron expression generated: {tap.interval_mask} for tap {tap.schedule_id}") - return tap + return elif frequency < 60: assert shift < frequency, f"Shift {shift} cannot be greater than or equal to frequency {frequency} for tap {tap.schedule_id}" tap.interval_mask = f"{shift}-59/{frequency} * * * *" # Check that the new cron expression is valid if not croniter.is_valid(tap.interval_mask): raise ValueError(f"Invalid cron expression generated: {tap.interval_mask} for tap {tap.schedule_id}") - return tap + return @@ -94,7 +98,7 @@ def assign_new_schedules(optimised_taps: list[pygad.Tap], db_cur): # For each tap, update the schedule in the DB with the new interval_mask based on the shift calculated by the GA optimizer for tap in optimised_taps: previous_schedule_mask = tap.interval_mask - tap = update_schedule_cron(tap) + update_schedule_cron(tap) print(f"Updating schedule {tap.schedule_id} with new interval mask: {tap.interval_mask} and shift of {tap.shift} minutes") tap._determine_start_time_mins() diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index 819265f..b05c18d 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -28,6 +28,7 @@ def __init__(self, details, db_cur): def determine_attributes(self, db_cur): """Determine frequency and average runtime from interval_mask and scheduler module""" self._determine_frequency() + self._determine_start_time_mins() self._get_average_runtime(db_cur) def _determine_frequency(self): @@ -66,9 +67,15 @@ def is_blacklisted(self): # Blacklist shouldn't be stored in GA and instead be in db return self.schedule_id in self.cfg.blacklist_schedule_ids + def frequency_is_supported(self): + """Determine if the tap frequency is supported for smart scheduling""" + if (self.frequency_minutes != 1440 and self.frequency_minutes > 60): return False + if (self.frequency_minutes <= 1): return False + return True + def is_unsupported(self): """Determine if the tap is unsupported for smart scheduling based on frequency or if it's blacklisted""" - return ((self.frequency_minutes != 1440 and self.frequency_minutes > 60) or self.is_blacklisted() or not self.is_regular_schedule()) + return (not self.frequency_is_supported() or self.is_blacklisted() or not self.is_regular_schedule()) def is_regular_schedule(self): """Check if the cron expression is a regular schedule that can be optimized by the GA """ diff --git a/cicada/lib/SmartScheduling/evaluation.py b/cicada/lib/SmartScheduling/evaluation.py index c07be33..3b28d32 100644 --- a/cicada/lib/SmartScheduling/evaluation.py +++ b/cicada/lib/SmartScheduling/evaluation.py @@ -52,6 +52,8 @@ def evaluate_cpu_usage_and_peak(start_blocks: Sequence[int], taps: Sequence[Tap] blocks_per_day = calculate_blocks_per_day(minutes_per_block) freq_blocks, run_blocks = discretize_taps(taps, minutes_per_block) diff = np.zeros(blocks_per_day + 1, dtype=float) + assert len(start_blocks) == len(taps) == len(freq_blocks) == len(run_blocks), "Length of start_blocks, taps, freq_blocks, and run_blocks must all be the same" + assert all(start_blocks[i] < freq_blocks[i] for i in range(len(start_blocks))), "Start block should be the earliest it can be" for i, tap in enumerate(taps): freq = freq_blocks[i] diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/pygad.py index 8183976..9a070f6 100644 --- a/cicada/lib/SmartScheduling/pygad.py +++ b/cicada/lib/SmartScheduling/pygad.py @@ -49,7 +49,7 @@ def _gene_space(self, taps: Sequence[Tap]) -> List[List[int]]: # Limit gene space to only shift within the hour for the taps which run less frequently elif tap.frequency_minutes >= 60: - interval_blocks[i] = 59 // self.cfg.minutes_per_block + interval_blocks[i] = 60 // self.cfg.minutes_per_block # Prevent any end blocks from going beyond the day limit end_blocks[i] = min(tap.start_time_mins // self.cfg.minutes_per_block + interval_blocks[i], blocks_per_day) start_blocks[i] = end_blocks[i] - interval_blocks[i] diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index 202a8de..80a839f 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -444,7 +444,13 @@ def reset_schedule_backup_mask(db_cur, schedule_details): def update_schedule_backups(db_cur, previous_schedule_details): - """Insert a schedule configuration into the schedule_backups table for rollback.""" + """ + Insert a schedule configuration into the schedule_backups table for rollback. + Args: db_cur: Database cursor. + previous_schedule_details: dict with keys schedule_id, server_id, interval_mask, previous_interval_mask, start_time_shift_mins, original_interval_mask + or + dict with keys schedule_id, interval_mask, previous_interval_mask, start_time_shift_mins + """ sqlquery = f""" INSERT INTO schedule_backups (schedule_id, server_id, interval_mask, previous_interval_mask, start_time_shift_mins, original_interval_mask) diff --git a/tests/test_smart_scheduling.py b/tests/test_smart_scheduling.py new file mode 100644 index 0000000..22070f0 --- /dev/null +++ b/tests/test_smart_scheduling.py @@ -0,0 +1,689 @@ +"""Tests for smart scheduling and rollback functionality""" + +import croniter +import pytest +from unittest.mock import Mock, MagicMock, patch, call +import numpy as np +from datetime import datetime, timedelta + +from cicada.lib.SmartScheduling.domain import Tap +from cicada.lib.SmartScheduling.config import GAConfig +from cicada.lib.SmartScheduling.evaluation import ( + calculate_blocks_per_day, + discretize_taps, + evaluate_cpu_usage_and_peak, +) +import cicada.commands.smart_schedule as smart_schedule +from cicada.lib.SmartScheduling.pygad import GAPyGADScheduler +from cicada.lib import scheduler + + +class TestCalculateBlocksPerDay: + """Tests for calculate_blocks_per_day function""" + + def test_calculate_blocks_per_day_1_minute_blocks(self): + """Test calculating blocks per day with 1-minute blocks""" + blocks = calculate_blocks_per_day(1) + assert blocks == 1440 + + def test_calculate_blocks_per_day_5_minute_blocks(self): + """Test calculating blocks per day with 5-minute blocks""" + blocks = calculate_blocks_per_day(5) + assert blocks == 288 + + def test_calculate_blocks_per_day_60_minute_blocks(self): + """Test calculating blocks per day with 60-minute blocks""" + blocks = calculate_blocks_per_day(60) + assert blocks == 24 + + def test_calculate_blocks_per_day_invalid_divisor(self): + """Test that invalid divisors raise ValueError""" + with pytest.raises(ValueError): + calculate_blocks_per_day(7) + + def test_calculate_blocks_per_day_1440(self): + """Test that 1440 minutes divides evenly""" + blocks = calculate_blocks_per_day(1440) + assert blocks == 1 + + def test_calculate_blocks_per_day_greater_than_1440(self): + """Test that minutes_per_block greater than 1440 raises ValueError""" + with pytest.raises(ValueError): + calculate_blocks_per_day(1500) + + def test_calculate_blocks_per_day_zero(self): + """Test that zero minutes per block raises ValueError""" + with pytest.raises(ZeroDivisionError): + calculate_blocks_per_day(0) + + def test_calculate_blocks_per_day_non_divisible(self): + """Test that non-divisible minutes per block raises ValueError""" + with pytest.raises(ValueError): + calculate_blocks_per_day(7) + + +class TestDiscretizeTaps: + """Tests for discretize_taps function""" + + def test_discretize_single_tap(self): + """Test discretizing a single tap""" + db_cur = Mock() + tap_details = { + "schedule_id": 1, + "server_id": 1, + "interval_mask": "0 * * * *", # Every hour + } + tap = Tap(tap_details, db_cur) + tap.frequency_minutes = 60 + tap.median_runtime_minutes = 5 + + freq_blocks, run_blocks = discretize_taps([tap], minutes_per_block=1) + + assert freq_blocks == [60] + assert run_blocks == [5] + + freq_blocks, run_blocks = discretize_taps([tap], minutes_per_block=2) + + assert freq_blocks == [30] + assert run_blocks == [3] + + + def test_discretize_multiple_taps(self): + """Test discretizing multiple taps with different frequencies""" + db_cur = Mock() + + tap1_details = { + "schedule_id": 1, + "server_id": 1, + "interval_mask": "0 * * * *", + } + tap1 = Tap(tap1_details, db_cur) + tap1.frequency_minutes = 60 + tap1.median_runtime_minutes = 5 + + tap2_details = { + "schedule_id": 2, + "server_id": 1, + "interval_mask": "*/15 * * * *", + } + tap2 = Tap(tap2_details, db_cur) + tap2.frequency_minutes = 15 + tap2.median_runtime_minutes = 3 + + freq_blocks, run_blocks = discretize_taps([tap1, tap2], minutes_per_block=1) + + assert freq_blocks == [60, 15] + assert run_blocks == [5, 3] + + + def test_discretize_minimum_blocks(self): + """Test that minimum block size is 1""" + db_cur = Mock() + tap_details = { + "schedule_id": 1, + "server_id": 1, + "interval_mask": "0 * * * *", + } + tap = Tap(tap_details, db_cur) + tap.frequency_minutes = 2 + tap.median_runtime_minutes = 1 + + freq_blocks, run_blocks = discretize_taps([tap], minutes_per_block=5) + + assert freq_blocks == [1] + assert run_blocks == [1] + + +class TestEvaluateCpuUsageAndPeak: + """Tests for evaluate_cpu_usage_and_peak function""" + + def test_evaluate_single_tap_no_overlap(self): + """Test CPU evaluation with a single tap that doesn't overlap""" + db_cur = Mock() + tap_details = { + "schedule_id": 1, + "server_id": 1, + "interval_mask": "0 * * * *", + } + tap = Tap(tap_details, db_cur) + tap.frequency_minutes = 60 + tap.median_runtime_minutes = 5 + tap.cpu_max = 1 + + start_blocks = [0] + usage, peak = evaluate_cpu_usage_and_peak(start_blocks, [tap], minutes_per_block=1) + + assert usage.shape == (1440,) + assert peak == 1 + for i in range(24): + mins = i * 60 + assert (usage[mins:mins+5] == 1).all() + assert (usage[mins+5:(i+1)*60] == 0).all() + + def test_evaluate_multiple_taps_no_overlap(self): + """Test CPU evaluation with multiple taps that don't overlap""" + db_cur = Mock() + + tap1_details = { + "schedule_id": 1, + "server_id": 1, + "interval_mask": "0 * * * *", + } + tap1 = Tap(tap1_details, db_cur) + tap1.frequency_minutes = 60 + tap1.median_runtime_minutes = 5 + tap1.cpu_max = 0.5 + + tap2_details = { + "schedule_id": 2, + "server_id": 1, + "interval_mask": "30 * * * *", + } + tap2 = Tap(tap2_details, db_cur) + tap2.frequency_minutes = 60 + tap2.median_runtime_minutes = 5 + tap2.cpu_max = 0.3 + + start_blocks = [0, 30] + usage, peak = evaluate_cpu_usage_and_peak(start_blocks, [tap1, tap2], minutes_per_block=1) + + assert peak == 0.5 + assert (usage[0:5] == 0.5).all() + assert (usage[6:30] == 0.0).all() + assert (usage[30:35] == 0.3).all() + assert (usage[35:60] == 0.0).all() + + def test_evaluate_overlapping_taps(self): + """Test CPU evaluation with overlapping taps""" + db_cur = Mock() + + tap1_details = { + "schedule_id": 1, + "server_id": 1, + "interval_mask": "0 * * * *", + } + tap1 = Tap(tap1_details, db_cur) + tap1.frequency_minutes = 60 + tap1.median_runtime_minutes = 10 + tap1.cpu_max = 0.5 + + tap2_details = { + "schedule_id": 2, + "server_id": 1, + "interval_mask": "0 * * * *", + } + tap2 = Tap(tap2_details, db_cur) + tap2.frequency_minutes = 60 + tap2.median_runtime_minutes = 5 + tap2.cpu_max = 0.3 + + start_blocks = [0, 0] + usage, peak = evaluate_cpu_usage_and_peak(start_blocks, [tap1, tap2], minutes_per_block=1) + + assert peak == 0.8 + assert usage[0] == 0.8 + assert usage[5] == 0.5 + + + def test_evaluate_wrapping_around_day(self): + """Test that taps wrapping around midnight work correctly""" + db_cur = Mock() + tap_details = { + "schedule_id": 1, + "server_id": 1, + "interval_mask": "0 * * * *", + } + tap = Tap(tap_details, db_cur) + tap.frequency_minutes = 60 + tap.median_runtime_minutes = 5 + tap.cpu_max = 1.0 + start_blocks = [1430] # (1430 mins = 23:50) + + # Should throw an assertion error that the start block is too late for the frequency of the tap + with pytest.raises(AssertionError): + evaluate_cpu_usage_and_peak(start_blocks, [tap], minutes_per_block=1) + + +class TestTapDomain: + """Tests for Tap domain object""" + + def test_tap_initialization(self): + """Test Tap object initialization""" + db_cur = Mock() + tap_details = { + "schedule_id": "test-id-1", + "server_id": 5, + "interval_mask": "0 * * * *", + } + tap = Tap(tap_details, db_cur) + + assert tap.schedule_id == "test-id-1" + assert tap.server_id == 5 + assert tap.interval_mask == "0 * * * *" + + def test_tap_frequency_hourly(self): + """Test frequency determination for hourly cron""" + db_cur = Mock() + tap_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "0 * * * *", # Every hour + } + tap = Tap(tap_details, db_cur) + + assert tap.frequency_minutes == 60 + + def test_tap_frequency_daily(self): + """Test frequency determination for daily cron""" + db_cur = Mock() + tap_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "0 0 * * *", + } + tap = Tap(tap_details, db_cur) + + assert tap.frequency_minutes == 1440 + + def test_tap_is_unsupported_irregular_cron(self): + """Test that taps with irregular cron expressions are marked as unsupported""" + db_cur = Mock() + tap_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "0-15 */9 * * *", + } + tap = Tap(tap_details, db_cur) + + assert tap.is_unsupported() + assert not tap.frequency_is_supported() + assert not tap.is_regular_schedule() + + def test_tap_is_unsupported_low_frequency(self): + """Test that taps with unsupported low frequencies are marked as unsupported""" + db_cur = Mock() + tap_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "0 0 * * 0", # Weekly + } + tap = Tap(tap_details, db_cur) + + assert tap.is_unsupported() + + def test_tap_is_regular_schedule_hourly(self): + """Test that hourly schedules are recognized as regular""" + db_cur = Mock() + tap_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "0 * * * *", + } + tap = Tap(tap_details, db_cur) + + assert tap.is_regular_schedule() + + def test_tap_is_regular_schedule_every_15_mins(self): + """Test that every-15-minute schedules are recognized as regular""" + db_cur = Mock() + tap_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "*/15 * * * *", + } + tap = Tap(tap_details, db_cur) + + assert tap.is_regular_schedule() + + def test_tap_is_regular_schedule_daily(self): + """Test that daily schedules are recognized as regular""" + db_cur = Mock() + tap_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "0 0 * * *", + } + tap = Tap(tap_details, db_cur) + + assert tap.is_regular_schedule() + + def test_tap_45_min_schedule_is_supported(self): + """Test that 45-minute frequency schedules are recognized as supported""" + db_cur = Mock() + tap_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "*/45 * * * *", + } + tap = Tap(tap_details, db_cur) + + assert not tap.is_unsupported() + # Fails due to cronitor issue -> means any */45 gets missed out of the smart scheduling + + def test_tap_is_irregular_schedule_weekdays(self): + """Test that weekday-only schedules are marked as irregular""" + db_cur = Mock() + tap_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "0 9 * * 1-5", # Weekdays only + } + tap = Tap(tap_details, db_cur) + + assert not tap.is_regular_schedule() + + +class TestGAConfig: + """Tests for GAConfig configuration class""" + + def test_custom_config(self): + """Test GAConfig with custom values""" + config = GAConfig( + minutes_per_block=5, + num_generations=50, + sol_per_pop=100, + random_seed=42, + ) + assert config.minutes_per_block == 5 + assert config.num_generations == 50 + assert config.sol_per_pop == 100 + assert config.random_seed == 42 + assert config.num_parents_mating == 10 + assert config.mutation_percent_genes == 20 + assert config.parent_selection_type == "rank" + assert config.crossover_type == "uniform" + assert config.mutation_type == "random" + assert config.keep_elitism == 1 + assert config.blacklist_schedule_ids == [] + + +class TestGAPyGADScheduler: + """Tests for GAPyGADScheduler""" + + def test_scheduler_uses_default_config_when_optional_config_is_missing(self): + scheduler = GAPyGADScheduler() + + assert scheduler.cfg == GAConfig() + assert scheduler.cfg.minutes_per_block == 1 + assert scheduler.cfg.num_generations == 20 + + def test_scheduler_initialization_custom_config(self): + """Test scheduler initialization with custom config""" + config = {"minutes_per_block": 5, "num_generations": 30} + scheduler = GAPyGADScheduler(config) + + assert scheduler.cfg.minutes_per_block == 5 + assert scheduler.cfg.num_generations == 30 + + def test_scheduler_initialization_filters_none_values(self): + """Test that None values are filtered out when initializing config""" + config = {"minutes_per_block": 5, "num_generations": None} + scheduler = GAPyGADScheduler(config) + + assert scheduler.cfg.minutes_per_block == 5 + assert scheduler.cfg.num_generations == 20 + + +class TestSchedulerDatabaseFunctions: + """Tests for scheduler database functions (rollback/backup)""" + + def test_get_all_schedule_backups(self): + """Test retrieving all schedule backups""" + db_cur = Mock() + db_cur.fetchall.return_value = [ + ("schedule-1", 10, "0 * * * *"), + ("schedule-2", 20, "0 0 * * *"), + ] + + result = scheduler.get_all_schedule_backups(db_cur) + + assert len(result) == 2 + assert result[0] == ("schedule-1", 10, "0 * * * *") + assert result[1] == ("schedule-2", 20, "0 0 * * *") + db_cur.execute.assert_called_once() + + def test_restore_previous_schedules_requires_scope(self): + """Test that restore_previous_schedules requires either schedule_id or server_id""" + db_cur = Mock() + + with pytest.raises(ValueError): + scheduler.restore_previous_schedules(db_cur) + + +class TestEndToEndSmartScheduling: + """Integration tests for end-to-end smart scheduling workflow""" + + def test_create_taps_from_details(self): + """Test creating multiple Tap objects from details""" + db_cur = Mock() + + taps_data = [ + { + "schedule_id": "sched-1", + "server_id": 1, + "interval_mask": "0 * * * *", + }, + { + "schedule_id": "sched-2", + "server_id": 1, + "interval_mask": "*/30 * * * *", + }, + ] + + taps = [Tap(data, db_cur) for data in taps_data] + + assert len(taps) == 2 + assert taps[0].schedule_id == "sched-1" + assert taps[1].schedule_id == "sched-2" + + def test_discretize_and_evaluate_flow(self): + """Test the flow of discretizing taps and evaluating CPU""" + db_cur = Mock() + + tap_data = { + "schedule_id": "sched-1", + "server_id": 1, + "interval_mask": "0 * * * *", + } + tap = Tap(tap_data, db_cur) + tap.frequency_minutes = 60 + tap.median_runtime_minutes = 5 + tap.cpu_max = 0.5 + + # Discretize + freq_blocks, run_blocks = discretize_taps([tap], minutes_per_block=1) + assert len(freq_blocks) == 1 + assert len(run_blocks) == 1 + + # Evaluate + start_blocks = [0] + usage, peak = evaluate_cpu_usage_and_peak(start_blocks, [tap], minutes_per_block=1) + assert peak == 0.5 + assert usage.shape == (1440,) + + def test_backup_restore_workflow(self): + """Test the workflow of backing up and restoring schedules""" + db_cur = Mock() + + # Step 1: Update backups + schedule_details = { + "schedule_id": "sched-1", + "server_id": 1, + "interval_mask": "0 * * * *", + "previous_interval_mask": "30 * * * *", + "start_time_shift_mins": 30 + } + scheduler.update_schedule_backups(db_cur, schedule_details) + assert db_cur.execute.call_count == 1 + + # Step 2: Restore schedules + db_cur.reset_mock() + scheduler.restore_previous_schedules(db_cur, schedule_id="sched-1") + assert db_cur.execute.call_count >= 1 + + def test_multiple_overlapping_taps_evaluation(self): + """Test evaluating CPU usage for multiple overlapping taps""" + db_cur = Mock() + + # Create 3 taps with different schedules + taps = [] + for i in range(3): + tap_data = { + "schedule_id": f"sched-{i}", + "server_id": 1, + "interval_mask": "0 * * * *" if i == 0 else f"*/{15 * (i + 1)} * * * *", + } + tap = Tap(tap_data, db_cur) + tap.frequency_minutes = 60 + tap.median_runtime_minutes = 5 + tap.cpu_max = 0.3 + (i * 0.2) + taps.append(tap) + + # Stagger start times to create overlaps + start_blocks = [0, 10, 20] + usage, peak = evaluate_cpu_usage_and_peak(start_blocks, taps, minutes_per_block=1) + + assert peak > 0.3 # Should have some overlapping usage + assert usage.shape == (1440,) + +class TestSmartSchedulingCommand: + """Tests for the smart scheduling command""" + + def test_smart_scheduling_frequency_unchanged_hourly_tap(self): + """Test that the frequency of the schedule remains unchanged after smart scheduling""" + db_cur = Mock() + + hourly_tap_details = { + "schedule_id": "test-schedule-1", + "server_id": 1, + "interval_mask": "0 * * * *", + } + hourly_tap = Tap(hourly_tap_details, db_cur) + hourly_tap.shift = 15 + + smart_schedule.update_schedule_cron(hourly_tap) + assert hourly_tap.interval_mask == "15 * * * *" + assert hourly_tap.frequency_minutes == 60 + + hourly_tap.determine_attributes(db_cur) + assert hourly_tap.is_regular_schedule() + assert hourly_tap.frequency_minutes == 60 + + hourly_tap.interval_mask = "*/60 * * * *" + smart_schedule.update_schedule_cron(hourly_tap) + assert hourly_tap.interval_mask == "15 * * * *" + assert hourly_tap.frequency_minutes == 60 + + + def test_smart_scheduling_frequency_unchanged_fifteen_min_tap(self): + """Test that the frequency of the schedule remains unchanged after smart scheduling""" + db_cur = Mock() + + fifteen_min_tap_details = { + "schedule_id": "test-schedule-2", + "server_id": 1, + "interval_mask": "*/15 * * * *", + } + fifteen_min_tap = Tap(fifteen_min_tap_details, db_cur) + fifteen_min_tap.shift = 3 + + smart_schedule.update_schedule_cron(fifteen_min_tap) + assert fifteen_min_tap.interval_mask == "3-59/15 * * * *" + assert fifteen_min_tap.frequency_minutes == 15 + + fifteen_min_tap.determine_attributes(db_cur) + assert fifteen_min_tap.is_regular_schedule() + assert fifteen_min_tap.frequency_minutes == 15 + + + def test_gene_space_constraints(self): + """Test that the gene space constraints are respected when updating schedule crons""" + db_cur = Mock() + + tap_details = { + "schedule_id": "test-schedule-3", + "server_id": 1, + "interval_mask": "*/45 * * * *", + } + tap = Tap(tap_details, db_cur) + tap.frequency_minutes = 45 + tap.shift = 50 # Shift greater than frequency + + with pytest.raises(AssertionError): + smart_schedule.update_schedule_cron(tap) + + def test_smart_scheduling_gene_space_constraints_30_min(self): + """Test that the gene space constraints don't create invalid cron expressions""" + db_cur = Mock() + smartScheduler = GAPyGADScheduler() + + tap_details = { + "schedule_id": "test-schedule-4", + "server_id": 1, + "interval_mask": "*/30 * * * *", + } + tap = Tap(tap_details, db_cur) + gene_space = (smartScheduler._gene_space([tap])) + + tap.shift = gene_space[0][-1] + smart_schedule.update_schedule_cron(tap) + assert tap.interval_mask == "29-59/30 * * * *" + assert tap.is_regular_schedule() + assert croniter.croniter.is_valid(tap.interval_mask) + tap.determine_attributes(db_cur) + assert tap.frequency_minutes == 30 + + tap.interval_mask = "*/30 * * * *" # Reset to original + tap.shift = gene_space[0][0] + smart_schedule.update_schedule_cron(tap) + print(tap.shift, tap.interval_mask) + assert tap.interval_mask == "*/30 * * * *" + assert croniter.croniter.is_valid(tap.interval_mask) + tap.determine_attributes(db_cur) + assert tap.frequency_minutes == 30 + + tap.shift = gene_space[0][1] + smart_schedule.update_schedule_cron(tap) + print(tap.shift, tap.interval_mask) + assert tap.interval_mask == "1-59/30 * * * *" + assert croniter.croniter.is_valid(tap.interval_mask) + tap.determine_attributes(db_cur) + assert tap.frequency_minutes == 30 + + def test_smart_scheduling_gene_space_constraints_daily(self): + """Test that the gene space constraints don't create invalid cron expressions""" + db_cur = Mock() + smartScheduler = GAPyGADScheduler() + + tap_details = { + "schedule_id": "test-schedule-4", + "server_id": 1, + "interval_mask": "30 8 * * *", + } + tap = Tap(tap_details, db_cur) + gene_space = (smartScheduler._gene_space([tap])) + + tap.shift = gene_space[0][-1] + smart_schedule.update_schedule_cron(tap) + assert tap.interval_mask == "29 9 * * *" + assert tap.is_regular_schedule() + assert croniter.croniter.is_valid(tap.interval_mask) + tap.determine_attributes(db_cur) + assert tap.frequency_minutes == 1440 + + tap.interval_mask = "30 8 * * *" # Reset to original + tap.shift = gene_space[0][0] + smart_schedule.update_schedule_cron(tap) + print(tap.shift, tap.interval_mask) + assert tap.interval_mask == "30 8 * * *" + assert croniter.croniter.is_valid(tap.interval_mask) + tap.determine_attributes(db_cur) + assert tap.frequency_minutes == 1440 + + tap.shift = gene_space[0][1] + smart_schedule.update_schedule_cron(tap) + print(tap.shift, tap.interval_mask) + assert tap.interval_mask == "31 8 * * *" + assert croniter.croniter.is_valid(tap.interval_mask) + tap.determine_attributes(db_cur) + assert tap.frequency_minutes == 1440 From a37674de5bb4b37e133200eae5bef90750d215ff Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Thu, 9 Apr 2026 09:29:15 +0100 Subject: [PATCH 10/53] Change default config to testing values --- cicada/lib/SmartScheduling/config.py | 4 +- cicada/lib/SmartScheduling/pygad.py | 2 +- tests/test_smart_scheduling.py | 689 --------------------------- 3 files changed, 3 insertions(+), 692 deletions(-) delete mode 100644 tests/test_smart_scheduling.py diff --git a/cicada/lib/SmartScheduling/config.py b/cicada/lib/SmartScheduling/config.py index f0355c9..a2b3a2b 100644 --- a/cicada/lib/SmartScheduling/config.py +++ b/cicada/lib/SmartScheduling/config.py @@ -4,8 +4,8 @@ @dataclass class GAConfig: - minutes_per_block: int = 5 - num_generations: int = 200 + minutes_per_block: int = 1 + num_generations: int = 20 sol_per_pop: int = 40 num_parents_mating: int = 10 mutation_percent_genes: int = 20 diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/pygad.py index 9a070f6..c114c3c 100644 --- a/cicada/lib/SmartScheduling/pygad.py +++ b/cicada/lib/SmartScheduling/pygad.py @@ -48,7 +48,7 @@ def _gene_space(self, taps: Sequence[Tap]) -> List[List[int]]: pass # Limit gene space to only shift within the hour for the taps which run less frequently - elif tap.frequency_minutes >= 60: + elif tap.frequency_minutes > 60: interval_blocks[i] = 60 // self.cfg.minutes_per_block # Prevent any end blocks from going beyond the day limit end_blocks[i] = min(tap.start_time_mins // self.cfg.minutes_per_block + interval_blocks[i], blocks_per_day) diff --git a/tests/test_smart_scheduling.py b/tests/test_smart_scheduling.py deleted file mode 100644 index 22070f0..0000000 --- a/tests/test_smart_scheduling.py +++ /dev/null @@ -1,689 +0,0 @@ -"""Tests for smart scheduling and rollback functionality""" - -import croniter -import pytest -from unittest.mock import Mock, MagicMock, patch, call -import numpy as np -from datetime import datetime, timedelta - -from cicada.lib.SmartScheduling.domain import Tap -from cicada.lib.SmartScheduling.config import GAConfig -from cicada.lib.SmartScheduling.evaluation import ( - calculate_blocks_per_day, - discretize_taps, - evaluate_cpu_usage_and_peak, -) -import cicada.commands.smart_schedule as smart_schedule -from cicada.lib.SmartScheduling.pygad import GAPyGADScheduler -from cicada.lib import scheduler - - -class TestCalculateBlocksPerDay: - """Tests for calculate_blocks_per_day function""" - - def test_calculate_blocks_per_day_1_minute_blocks(self): - """Test calculating blocks per day with 1-minute blocks""" - blocks = calculate_blocks_per_day(1) - assert blocks == 1440 - - def test_calculate_blocks_per_day_5_minute_blocks(self): - """Test calculating blocks per day with 5-minute blocks""" - blocks = calculate_blocks_per_day(5) - assert blocks == 288 - - def test_calculate_blocks_per_day_60_minute_blocks(self): - """Test calculating blocks per day with 60-minute blocks""" - blocks = calculate_blocks_per_day(60) - assert blocks == 24 - - def test_calculate_blocks_per_day_invalid_divisor(self): - """Test that invalid divisors raise ValueError""" - with pytest.raises(ValueError): - calculate_blocks_per_day(7) - - def test_calculate_blocks_per_day_1440(self): - """Test that 1440 minutes divides evenly""" - blocks = calculate_blocks_per_day(1440) - assert blocks == 1 - - def test_calculate_blocks_per_day_greater_than_1440(self): - """Test that minutes_per_block greater than 1440 raises ValueError""" - with pytest.raises(ValueError): - calculate_blocks_per_day(1500) - - def test_calculate_blocks_per_day_zero(self): - """Test that zero minutes per block raises ValueError""" - with pytest.raises(ZeroDivisionError): - calculate_blocks_per_day(0) - - def test_calculate_blocks_per_day_non_divisible(self): - """Test that non-divisible minutes per block raises ValueError""" - with pytest.raises(ValueError): - calculate_blocks_per_day(7) - - -class TestDiscretizeTaps: - """Tests for discretize_taps function""" - - def test_discretize_single_tap(self): - """Test discretizing a single tap""" - db_cur = Mock() - tap_details = { - "schedule_id": 1, - "server_id": 1, - "interval_mask": "0 * * * *", # Every hour - } - tap = Tap(tap_details, db_cur) - tap.frequency_minutes = 60 - tap.median_runtime_minutes = 5 - - freq_blocks, run_blocks = discretize_taps([tap], minutes_per_block=1) - - assert freq_blocks == [60] - assert run_blocks == [5] - - freq_blocks, run_blocks = discretize_taps([tap], minutes_per_block=2) - - assert freq_blocks == [30] - assert run_blocks == [3] - - - def test_discretize_multiple_taps(self): - """Test discretizing multiple taps with different frequencies""" - db_cur = Mock() - - tap1_details = { - "schedule_id": 1, - "server_id": 1, - "interval_mask": "0 * * * *", - } - tap1 = Tap(tap1_details, db_cur) - tap1.frequency_minutes = 60 - tap1.median_runtime_minutes = 5 - - tap2_details = { - "schedule_id": 2, - "server_id": 1, - "interval_mask": "*/15 * * * *", - } - tap2 = Tap(tap2_details, db_cur) - tap2.frequency_minutes = 15 - tap2.median_runtime_minutes = 3 - - freq_blocks, run_blocks = discretize_taps([tap1, tap2], minutes_per_block=1) - - assert freq_blocks == [60, 15] - assert run_blocks == [5, 3] - - - def test_discretize_minimum_blocks(self): - """Test that minimum block size is 1""" - db_cur = Mock() - tap_details = { - "schedule_id": 1, - "server_id": 1, - "interval_mask": "0 * * * *", - } - tap = Tap(tap_details, db_cur) - tap.frequency_minutes = 2 - tap.median_runtime_minutes = 1 - - freq_blocks, run_blocks = discretize_taps([tap], minutes_per_block=5) - - assert freq_blocks == [1] - assert run_blocks == [1] - - -class TestEvaluateCpuUsageAndPeak: - """Tests for evaluate_cpu_usage_and_peak function""" - - def test_evaluate_single_tap_no_overlap(self): - """Test CPU evaluation with a single tap that doesn't overlap""" - db_cur = Mock() - tap_details = { - "schedule_id": 1, - "server_id": 1, - "interval_mask": "0 * * * *", - } - tap = Tap(tap_details, db_cur) - tap.frequency_minutes = 60 - tap.median_runtime_minutes = 5 - tap.cpu_max = 1 - - start_blocks = [0] - usage, peak = evaluate_cpu_usage_and_peak(start_blocks, [tap], minutes_per_block=1) - - assert usage.shape == (1440,) - assert peak == 1 - for i in range(24): - mins = i * 60 - assert (usage[mins:mins+5] == 1).all() - assert (usage[mins+5:(i+1)*60] == 0).all() - - def test_evaluate_multiple_taps_no_overlap(self): - """Test CPU evaluation with multiple taps that don't overlap""" - db_cur = Mock() - - tap1_details = { - "schedule_id": 1, - "server_id": 1, - "interval_mask": "0 * * * *", - } - tap1 = Tap(tap1_details, db_cur) - tap1.frequency_minutes = 60 - tap1.median_runtime_minutes = 5 - tap1.cpu_max = 0.5 - - tap2_details = { - "schedule_id": 2, - "server_id": 1, - "interval_mask": "30 * * * *", - } - tap2 = Tap(tap2_details, db_cur) - tap2.frequency_minutes = 60 - tap2.median_runtime_minutes = 5 - tap2.cpu_max = 0.3 - - start_blocks = [0, 30] - usage, peak = evaluate_cpu_usage_and_peak(start_blocks, [tap1, tap2], minutes_per_block=1) - - assert peak == 0.5 - assert (usage[0:5] == 0.5).all() - assert (usage[6:30] == 0.0).all() - assert (usage[30:35] == 0.3).all() - assert (usage[35:60] == 0.0).all() - - def test_evaluate_overlapping_taps(self): - """Test CPU evaluation with overlapping taps""" - db_cur = Mock() - - tap1_details = { - "schedule_id": 1, - "server_id": 1, - "interval_mask": "0 * * * *", - } - tap1 = Tap(tap1_details, db_cur) - tap1.frequency_minutes = 60 - tap1.median_runtime_minutes = 10 - tap1.cpu_max = 0.5 - - tap2_details = { - "schedule_id": 2, - "server_id": 1, - "interval_mask": "0 * * * *", - } - tap2 = Tap(tap2_details, db_cur) - tap2.frequency_minutes = 60 - tap2.median_runtime_minutes = 5 - tap2.cpu_max = 0.3 - - start_blocks = [0, 0] - usage, peak = evaluate_cpu_usage_and_peak(start_blocks, [tap1, tap2], minutes_per_block=1) - - assert peak == 0.8 - assert usage[0] == 0.8 - assert usage[5] == 0.5 - - - def test_evaluate_wrapping_around_day(self): - """Test that taps wrapping around midnight work correctly""" - db_cur = Mock() - tap_details = { - "schedule_id": 1, - "server_id": 1, - "interval_mask": "0 * * * *", - } - tap = Tap(tap_details, db_cur) - tap.frequency_minutes = 60 - tap.median_runtime_minutes = 5 - tap.cpu_max = 1.0 - start_blocks = [1430] # (1430 mins = 23:50) - - # Should throw an assertion error that the start block is too late for the frequency of the tap - with pytest.raises(AssertionError): - evaluate_cpu_usage_and_peak(start_blocks, [tap], minutes_per_block=1) - - -class TestTapDomain: - """Tests for Tap domain object""" - - def test_tap_initialization(self): - """Test Tap object initialization""" - db_cur = Mock() - tap_details = { - "schedule_id": "test-id-1", - "server_id": 5, - "interval_mask": "0 * * * *", - } - tap = Tap(tap_details, db_cur) - - assert tap.schedule_id == "test-id-1" - assert tap.server_id == 5 - assert tap.interval_mask == "0 * * * *" - - def test_tap_frequency_hourly(self): - """Test frequency determination for hourly cron""" - db_cur = Mock() - tap_details = { - "schedule_id": "test-id-1", - "server_id": 1, - "interval_mask": "0 * * * *", # Every hour - } - tap = Tap(tap_details, db_cur) - - assert tap.frequency_minutes == 60 - - def test_tap_frequency_daily(self): - """Test frequency determination for daily cron""" - db_cur = Mock() - tap_details = { - "schedule_id": "test-id-1", - "server_id": 1, - "interval_mask": "0 0 * * *", - } - tap = Tap(tap_details, db_cur) - - assert tap.frequency_minutes == 1440 - - def test_tap_is_unsupported_irregular_cron(self): - """Test that taps with irregular cron expressions are marked as unsupported""" - db_cur = Mock() - tap_details = { - "schedule_id": "test-id-1", - "server_id": 1, - "interval_mask": "0-15 */9 * * *", - } - tap = Tap(tap_details, db_cur) - - assert tap.is_unsupported() - assert not tap.frequency_is_supported() - assert not tap.is_regular_schedule() - - def test_tap_is_unsupported_low_frequency(self): - """Test that taps with unsupported low frequencies are marked as unsupported""" - db_cur = Mock() - tap_details = { - "schedule_id": "test-id-1", - "server_id": 1, - "interval_mask": "0 0 * * 0", # Weekly - } - tap = Tap(tap_details, db_cur) - - assert tap.is_unsupported() - - def test_tap_is_regular_schedule_hourly(self): - """Test that hourly schedules are recognized as regular""" - db_cur = Mock() - tap_details = { - "schedule_id": "test-id-1", - "server_id": 1, - "interval_mask": "0 * * * *", - } - tap = Tap(tap_details, db_cur) - - assert tap.is_regular_schedule() - - def test_tap_is_regular_schedule_every_15_mins(self): - """Test that every-15-minute schedules are recognized as regular""" - db_cur = Mock() - tap_details = { - "schedule_id": "test-id-1", - "server_id": 1, - "interval_mask": "*/15 * * * *", - } - tap = Tap(tap_details, db_cur) - - assert tap.is_regular_schedule() - - def test_tap_is_regular_schedule_daily(self): - """Test that daily schedules are recognized as regular""" - db_cur = Mock() - tap_details = { - "schedule_id": "test-id-1", - "server_id": 1, - "interval_mask": "0 0 * * *", - } - tap = Tap(tap_details, db_cur) - - assert tap.is_regular_schedule() - - def test_tap_45_min_schedule_is_supported(self): - """Test that 45-minute frequency schedules are recognized as supported""" - db_cur = Mock() - tap_details = { - "schedule_id": "test-id-1", - "server_id": 1, - "interval_mask": "*/45 * * * *", - } - tap = Tap(tap_details, db_cur) - - assert not tap.is_unsupported() - # Fails due to cronitor issue -> means any */45 gets missed out of the smart scheduling - - def test_tap_is_irregular_schedule_weekdays(self): - """Test that weekday-only schedules are marked as irregular""" - db_cur = Mock() - tap_details = { - "schedule_id": "test-id-1", - "server_id": 1, - "interval_mask": "0 9 * * 1-5", # Weekdays only - } - tap = Tap(tap_details, db_cur) - - assert not tap.is_regular_schedule() - - -class TestGAConfig: - """Tests for GAConfig configuration class""" - - def test_custom_config(self): - """Test GAConfig with custom values""" - config = GAConfig( - minutes_per_block=5, - num_generations=50, - sol_per_pop=100, - random_seed=42, - ) - assert config.minutes_per_block == 5 - assert config.num_generations == 50 - assert config.sol_per_pop == 100 - assert config.random_seed == 42 - assert config.num_parents_mating == 10 - assert config.mutation_percent_genes == 20 - assert config.parent_selection_type == "rank" - assert config.crossover_type == "uniform" - assert config.mutation_type == "random" - assert config.keep_elitism == 1 - assert config.blacklist_schedule_ids == [] - - -class TestGAPyGADScheduler: - """Tests for GAPyGADScheduler""" - - def test_scheduler_uses_default_config_when_optional_config_is_missing(self): - scheduler = GAPyGADScheduler() - - assert scheduler.cfg == GAConfig() - assert scheduler.cfg.minutes_per_block == 1 - assert scheduler.cfg.num_generations == 20 - - def test_scheduler_initialization_custom_config(self): - """Test scheduler initialization with custom config""" - config = {"minutes_per_block": 5, "num_generations": 30} - scheduler = GAPyGADScheduler(config) - - assert scheduler.cfg.minutes_per_block == 5 - assert scheduler.cfg.num_generations == 30 - - def test_scheduler_initialization_filters_none_values(self): - """Test that None values are filtered out when initializing config""" - config = {"minutes_per_block": 5, "num_generations": None} - scheduler = GAPyGADScheduler(config) - - assert scheduler.cfg.minutes_per_block == 5 - assert scheduler.cfg.num_generations == 20 - - -class TestSchedulerDatabaseFunctions: - """Tests for scheduler database functions (rollback/backup)""" - - def test_get_all_schedule_backups(self): - """Test retrieving all schedule backups""" - db_cur = Mock() - db_cur.fetchall.return_value = [ - ("schedule-1", 10, "0 * * * *"), - ("schedule-2", 20, "0 0 * * *"), - ] - - result = scheduler.get_all_schedule_backups(db_cur) - - assert len(result) == 2 - assert result[0] == ("schedule-1", 10, "0 * * * *") - assert result[1] == ("schedule-2", 20, "0 0 * * *") - db_cur.execute.assert_called_once() - - def test_restore_previous_schedules_requires_scope(self): - """Test that restore_previous_schedules requires either schedule_id or server_id""" - db_cur = Mock() - - with pytest.raises(ValueError): - scheduler.restore_previous_schedules(db_cur) - - -class TestEndToEndSmartScheduling: - """Integration tests for end-to-end smart scheduling workflow""" - - def test_create_taps_from_details(self): - """Test creating multiple Tap objects from details""" - db_cur = Mock() - - taps_data = [ - { - "schedule_id": "sched-1", - "server_id": 1, - "interval_mask": "0 * * * *", - }, - { - "schedule_id": "sched-2", - "server_id": 1, - "interval_mask": "*/30 * * * *", - }, - ] - - taps = [Tap(data, db_cur) for data in taps_data] - - assert len(taps) == 2 - assert taps[0].schedule_id == "sched-1" - assert taps[1].schedule_id == "sched-2" - - def test_discretize_and_evaluate_flow(self): - """Test the flow of discretizing taps and evaluating CPU""" - db_cur = Mock() - - tap_data = { - "schedule_id": "sched-1", - "server_id": 1, - "interval_mask": "0 * * * *", - } - tap = Tap(tap_data, db_cur) - tap.frequency_minutes = 60 - tap.median_runtime_minutes = 5 - tap.cpu_max = 0.5 - - # Discretize - freq_blocks, run_blocks = discretize_taps([tap], minutes_per_block=1) - assert len(freq_blocks) == 1 - assert len(run_blocks) == 1 - - # Evaluate - start_blocks = [0] - usage, peak = evaluate_cpu_usage_and_peak(start_blocks, [tap], minutes_per_block=1) - assert peak == 0.5 - assert usage.shape == (1440,) - - def test_backup_restore_workflow(self): - """Test the workflow of backing up and restoring schedules""" - db_cur = Mock() - - # Step 1: Update backups - schedule_details = { - "schedule_id": "sched-1", - "server_id": 1, - "interval_mask": "0 * * * *", - "previous_interval_mask": "30 * * * *", - "start_time_shift_mins": 30 - } - scheduler.update_schedule_backups(db_cur, schedule_details) - assert db_cur.execute.call_count == 1 - - # Step 2: Restore schedules - db_cur.reset_mock() - scheduler.restore_previous_schedules(db_cur, schedule_id="sched-1") - assert db_cur.execute.call_count >= 1 - - def test_multiple_overlapping_taps_evaluation(self): - """Test evaluating CPU usage for multiple overlapping taps""" - db_cur = Mock() - - # Create 3 taps with different schedules - taps = [] - for i in range(3): - tap_data = { - "schedule_id": f"sched-{i}", - "server_id": 1, - "interval_mask": "0 * * * *" if i == 0 else f"*/{15 * (i + 1)} * * * *", - } - tap = Tap(tap_data, db_cur) - tap.frequency_minutes = 60 - tap.median_runtime_minutes = 5 - tap.cpu_max = 0.3 + (i * 0.2) - taps.append(tap) - - # Stagger start times to create overlaps - start_blocks = [0, 10, 20] - usage, peak = evaluate_cpu_usage_and_peak(start_blocks, taps, minutes_per_block=1) - - assert peak > 0.3 # Should have some overlapping usage - assert usage.shape == (1440,) - -class TestSmartSchedulingCommand: - """Tests for the smart scheduling command""" - - def test_smart_scheduling_frequency_unchanged_hourly_tap(self): - """Test that the frequency of the schedule remains unchanged after smart scheduling""" - db_cur = Mock() - - hourly_tap_details = { - "schedule_id": "test-schedule-1", - "server_id": 1, - "interval_mask": "0 * * * *", - } - hourly_tap = Tap(hourly_tap_details, db_cur) - hourly_tap.shift = 15 - - smart_schedule.update_schedule_cron(hourly_tap) - assert hourly_tap.interval_mask == "15 * * * *" - assert hourly_tap.frequency_minutes == 60 - - hourly_tap.determine_attributes(db_cur) - assert hourly_tap.is_regular_schedule() - assert hourly_tap.frequency_minutes == 60 - - hourly_tap.interval_mask = "*/60 * * * *" - smart_schedule.update_schedule_cron(hourly_tap) - assert hourly_tap.interval_mask == "15 * * * *" - assert hourly_tap.frequency_minutes == 60 - - - def test_smart_scheduling_frequency_unchanged_fifteen_min_tap(self): - """Test that the frequency of the schedule remains unchanged after smart scheduling""" - db_cur = Mock() - - fifteen_min_tap_details = { - "schedule_id": "test-schedule-2", - "server_id": 1, - "interval_mask": "*/15 * * * *", - } - fifteen_min_tap = Tap(fifteen_min_tap_details, db_cur) - fifteen_min_tap.shift = 3 - - smart_schedule.update_schedule_cron(fifteen_min_tap) - assert fifteen_min_tap.interval_mask == "3-59/15 * * * *" - assert fifteen_min_tap.frequency_minutes == 15 - - fifteen_min_tap.determine_attributes(db_cur) - assert fifteen_min_tap.is_regular_schedule() - assert fifteen_min_tap.frequency_minutes == 15 - - - def test_gene_space_constraints(self): - """Test that the gene space constraints are respected when updating schedule crons""" - db_cur = Mock() - - tap_details = { - "schedule_id": "test-schedule-3", - "server_id": 1, - "interval_mask": "*/45 * * * *", - } - tap = Tap(tap_details, db_cur) - tap.frequency_minutes = 45 - tap.shift = 50 # Shift greater than frequency - - with pytest.raises(AssertionError): - smart_schedule.update_schedule_cron(tap) - - def test_smart_scheduling_gene_space_constraints_30_min(self): - """Test that the gene space constraints don't create invalid cron expressions""" - db_cur = Mock() - smartScheduler = GAPyGADScheduler() - - tap_details = { - "schedule_id": "test-schedule-4", - "server_id": 1, - "interval_mask": "*/30 * * * *", - } - tap = Tap(tap_details, db_cur) - gene_space = (smartScheduler._gene_space([tap])) - - tap.shift = gene_space[0][-1] - smart_schedule.update_schedule_cron(tap) - assert tap.interval_mask == "29-59/30 * * * *" - assert tap.is_regular_schedule() - assert croniter.croniter.is_valid(tap.interval_mask) - tap.determine_attributes(db_cur) - assert tap.frequency_minutes == 30 - - tap.interval_mask = "*/30 * * * *" # Reset to original - tap.shift = gene_space[0][0] - smart_schedule.update_schedule_cron(tap) - print(tap.shift, tap.interval_mask) - assert tap.interval_mask == "*/30 * * * *" - assert croniter.croniter.is_valid(tap.interval_mask) - tap.determine_attributes(db_cur) - assert tap.frequency_minutes == 30 - - tap.shift = gene_space[0][1] - smart_schedule.update_schedule_cron(tap) - print(tap.shift, tap.interval_mask) - assert tap.interval_mask == "1-59/30 * * * *" - assert croniter.croniter.is_valid(tap.interval_mask) - tap.determine_attributes(db_cur) - assert tap.frequency_minutes == 30 - - def test_smart_scheduling_gene_space_constraints_daily(self): - """Test that the gene space constraints don't create invalid cron expressions""" - db_cur = Mock() - smartScheduler = GAPyGADScheduler() - - tap_details = { - "schedule_id": "test-schedule-4", - "server_id": 1, - "interval_mask": "30 8 * * *", - } - tap = Tap(tap_details, db_cur) - gene_space = (smartScheduler._gene_space([tap])) - - tap.shift = gene_space[0][-1] - smart_schedule.update_schedule_cron(tap) - assert tap.interval_mask == "29 9 * * *" - assert tap.is_regular_schedule() - assert croniter.croniter.is_valid(tap.interval_mask) - tap.determine_attributes(db_cur) - assert tap.frequency_minutes == 1440 - - tap.interval_mask = "30 8 * * *" # Reset to original - tap.shift = gene_space[0][0] - smart_schedule.update_schedule_cron(tap) - print(tap.shift, tap.interval_mask) - assert tap.interval_mask == "30 8 * * *" - assert croniter.croniter.is_valid(tap.interval_mask) - tap.determine_attributes(db_cur) - assert tap.frequency_minutes == 1440 - - tap.shift = gene_space[0][1] - smart_schedule.update_schedule_cron(tap) - print(tap.shift, tap.interval_mask) - assert tap.interval_mask == "31 8 * * *" - assert croniter.croniter.is_valid(tap.interval_mask) - tap.determine_attributes(db_cur) - assert tap.frequency_minutes == 1440 From 559cdafff57eca51e1ae37f1b20bf7a6709f0cc0 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Thu, 9 Apr 2026 16:54:57 +0100 Subject: [PATCH 11/53] Add blacklist and enhance rollback functionality --- cicada/commands/rollback.py | 24 ++++++++++++++++++++---- cicada/commands/smart_schedule.py | 20 ++++++++++++++++++-- cicada/lib/SmartScheduling/domain.py | 10 +++++----- cicada/lib/SmartScheduling/pygad.py | 3 ++- cicada/lib/scheduler.py | 27 +++++++++++++++++++-------- setup/create_test_tap_setup.sql | 2 ++ setup/schema.sql | 14 ++++++++++++++ 7 files changed, 80 insertions(+), 20 deletions(-) diff --git a/cicada/commands/rollback.py b/cicada/commands/rollback.py index 641b3e2..c7cf60f 100644 --- a/cicada/commands/rollback.py +++ b/cicada/commands/rollback.py @@ -31,11 +31,27 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn main(server_id=id[0], dbname=dbname, full=full) return - if server_id: - scheduler.restore_previous_schedules(db_cur=db_cur, server_id=server_id, full=full) - else: - scheduler.restore_previous_schedules(db_cur=db_cur, schedule_id=schedule_id, full=full) + if full: print("\n------------Starting RollbackTo Orig Schedules-----------------") + else: print("\n------------Starting Rollback To Previous Schedules-----------------") + try: + if server_id: + scheduler.restore_previous_schedules(db_cur=db_cur, server_id=server_id, full=full) + schedule_ids = [row[0] for row in scheduler.get_all_schedule_ids_per_server(db_cur, server_id)] + schedule_masks = [scheduler.get_schedule_details(db_cur, schedule_id)["interval_mask"] for schedule_id in schedule_ids] + print("New Schedules after rollback:\n") + for schedule_id, schedule_mask in zip(schedule_ids, schedule_masks): + print(f"{schedule_id} : {schedule_mask}") + else: + scheduler.restore_previous_schedules(db_cur=db_cur, schedule_id=schedule_id, full=full) + schedule = scheduler.get_schedule_details(db_cur, schedule_id) + if len(schedule) == 0: + raise Exception(f"Schedule with schedule_id {schedule_id} not found for rollback.") + print(f"Schedule {schedule_id} rolled back successfully to {schedule['interval_mask']}.") + print("Rollback successful") + + except Exception as e: + print(f"Error during rollback for server_id {server_id} and schedule_id {schedule_id}: {e}") db_cur.close() db_conn.close() \ No newline at end of file diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index ae08090..b040128 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -27,16 +27,22 @@ def create_tap_objects(schedule_ids, db_cur): """Create Tap objects from schedule_ids.""" taps : list[Tap] = [] + blacklisted_taps = scheduler.get_blacklisted_schedule_ids(db_cur) # Fetch details for each schedule and convert to Tap objects for schedule_id in schedule_ids: details = scheduler.get_schedule_details(db_cur, schedule_id) + if schedule_id in blacklisted_taps: + details['blacklisted'] = True + else: + details['blacklisted'] = False + try: tap = Tap(details, db_cur=db_cur) # Ignore the few taps that have irregular cron expressions for now. There are few enough that this shouldn't impact the optimisation and is not worth the effort to try and support these irregular schedules in the GA if tap.is_unsupported(): if tap.is_blacklisted(): - print(f"Skipping blacklisted schedule {tap.schedule_id} with cron expression {tap.interval_mask}") + print(f"Skipping blacklisted schedule {tap.schedule_id}") elif not tap.is_regular_schedule(): print(f"Skipping irregular schedule {tap.schedule_id} with cron expression {tap.interval_mask}") else: @@ -146,6 +152,7 @@ def main(server_id=None, dbname=None, ga_config=None): else: # Get schedules for the server_id + print("\n-----------------Tap Setup----------------------") schedule_ids = get_schedules_per_server(server_id=server_id, db_cur=db_cur) print(f"Found {len(schedule_ids)} schedules for server_id {server_id}") @@ -154,16 +161,25 @@ def main(server_id=None, dbname=None, ga_config=None): if not taps: print("No valid schedules found to optimize.") sys.exit(1) + print("-------------------------------------------------\n") + try: - ga = pygad.GAPyGADScheduler(config=ga_config) + print("\n------------Starting Optimisation-----------------") + blacklist_schedule_ids = scheduler.get_blacklisted_schedule_ids(db_cur) + print(f"Blacklisted schedule IDs that will be excluded from optimization: {blacklist_schedule_ids}") + ga = pygad.GAPyGADScheduler(config=ga_config, blacklist_schedule_ids=blacklist_schedule_ids) optimised_taps, start_blocks, peak_cpu, usage, initial_fitness = ga.solve(taps) print(f"Optimized schedule for server_id {server_id}: new peak CPU {peak_cpu}") + print("--------------------------------------------------\n") + + print("\n-------------Updating Schedules------------------") if peak_cpu < initial_fitness: # Only update schedules if we have found an improvement assign_new_schedules(optimised_taps, db_cur=db_cur) else: print(f"No improvement found for server_id {server_id}. Current peak CPU: {initial_fitness}, Optimized peak CPU: {peak_cpu}. No schedule updates will be made.") + print("--------------------------------------------------\n") except Exception as e: print(f"Error during optimization for server_id {server_id}: {e}") diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index b05c18d..41fe75d 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -17,6 +17,7 @@ class Tap: median_runtime_minutes: int = 5 shift: Optional[int] = 0 start_time_mins: Optional[int] = None + blacklisted: bool = False def __init__(self, details, db_cur): @@ -24,6 +25,8 @@ def __init__(self, details, db_cur): self.server_id = details['server_id'] self.interval_mask = details['interval_mask'] self.determine_attributes(db_cur) + if details['blacklisted'] is not None: + self.blacklisted = details['blacklisted'] def determine_attributes(self, db_cur): """Determine frequency and average runtime from interval_mask and scheduler module""" @@ -38,7 +41,7 @@ def _determine_frequency(self): second_iter = schedule.get_next(datetime.datetime) frequency = (second_iter - first_iter).total_seconds() / 60 self.frequency_minutes = int(frequency) - + def _get_average_runtime(self, db_cur): """Get average runtime from scheduler module""" @@ -62,10 +65,7 @@ def _determine_start_time_mins(self): def is_blacklisted(self): """Determine if the tap is blacklisted based on schedule_id""" - return False - # Change implementation to check against blacklist in DB once blacklist functionality is implemented - # Blacklist shouldn't be stored in GA and instead be in db - return self.schedule_id in self.cfg.blacklist_schedule_ids + return self.blacklisted def frequency_is_supported(self): """Determine if the tap frequency is supported for smart scheduling""" diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/pygad.py index c114c3c..2de7a4f 100644 --- a/cicada/lib/SmartScheduling/pygad.py +++ b/cicada/lib/SmartScheduling/pygad.py @@ -23,12 +23,13 @@ class GAPyGADScheduler: We cap the max shift of a tap to within the hour to prevent large shifts for taps that run daily. """ - def __init__(self, config: Optional[Mapping[str, object]] = None): + def __init__(self, config: Optional[Mapping[str, object]] = None, blacklist_schedule_ids: Optional[List[str]] = None): if config is None: self.cfg = GAConfig() else: filtered_config = {key: value for key, value in config.items() if value is not None} self.cfg = GAConfig(**filtered_config) + self.blacklist_schedule_ids = blacklist_schedule_ids if blacklist_schedule_ids is not None else [] def _gene_space(self, taps: Sequence[Tap]) -> List[List[int]]: diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index 80a839f..bb67332 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -477,10 +477,15 @@ def rollback_schedule_backup_mask(db_cur, schedule_id=None, server_id=None): UPDATE public.schedule_backups SET interval_mask = original_interval_mask, previous_interval_mask = original_interval_mask, - start_time_shift_mins = 0, - server_id = server_id - WHERE schedule_id = '{schedule_id}' or server_id = '{server_id}' + start_time_shift_mins = 0 """ + if schedule_id is not None: + sqlquery = sqlquery + f" WHERE schedule_id = '{schedule_id}'" + elif server_id is not None: + sqlquery = sqlquery + f",\n server_id = server_id\n " + sqlquery = sqlquery + f" WHERE server_id = '{server_id}'" + else: + raise ValueError("Either schedule_id or server_id must be provided for rollback_schedule_backup_mask") db_cur.execute(sqlquery) @@ -491,12 +496,12 @@ def restore_previous_schedules(db_cur, server_id=None, schedule_id=None, full=Fa db_cur: Database cursor. server_id: Optional[int] Target server to roll back. schedule_id: Optional[str] Target schedule to roll back. - prev: bool If True, restore from previous_interval_mask, else restore from original_interval_mask. + full: bool If True, roll back to the original schedule in the schedule_backups table. If False, roll back to the previous schedule in the schedule_backups table. """ - if server_id is None and not schedule_id: + if server_id is None and schedule_id is None: raise ValueError("Either server_id or schedule_id must be provided") - if server_id and schedule_id: + if server_id is not None and schedule_id is not None: raise ValueError("server_id and schedule_id cannot both be provided") sqlquery = """ @@ -519,7 +524,13 @@ def restore_previous_schedules(db_cur, server_id=None, schedule_id=None, full=Fa db_cur.execute(sqlquery, tuple(params)) - # Rewrite the schedule_backups table to remove the rolled back schedules - rollback_schedule_backup_mask(db_cur, schedule_id, server_id) + print("Resetting schedule_backups table to reflect rolled back schedules...") + rollback_schedule_backup_mask(db_cur, schedule_id=schedule_id, server_id=server_id) +def get_blacklisted_schedule_ids(db_cur): + """Get a list of schedule_ids that are blacklisted from optimization""" + sqlquery = "SELECT schedule_id FROM schedule_blacklist" + db_cur.execute(sqlquery) + blacklist_schedule_ids = [row[0] for row in db_cur.fetchall()] + return blacklist_schedule_ids diff --git a/setup/create_test_tap_setup.sql b/setup/create_test_tap_setup.sql index 508037b..6d99d9d 100644 --- a/setup/create_test_tap_setup.sql +++ b/setup/create_test_tap_setup.sql @@ -519,4 +519,6 @@ INSERT INTO public.schedules (SERVER_ID, SCHEDULE_ID, INTERVAL_MASK, EXEC_COMMAN INSERT INTO public.schedules (SERVER_ID, SCHEDULE_ID, INTERVAL_MASK, EXEC_COMMAND, IS_ENABLED, IS_RUNNING) VALUES (6, 'cumulative_limit', '*/30 * * * *', 'dummy_command', 1, 1) ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blacklist (SCHEDULE_ID, SERVER_ID, REASON) VALUES ('plastic_full_table', 3, 'test for blacklist functionality') ON CONFLICT DO NOTHING; + COMMIT TRANSACTION; diff --git a/setup/schema.sql b/setup/schema.sql index c519f74..ed6fe9b 100644 --- a/setup/schema.sql +++ b/setup/schema.sql @@ -237,4 +237,18 @@ CREATE INDEX IF NOT EXISTS schedule_backups_server_id_idx USING btree (server_id); + +CREATE TABLE IF NOT EXISTS public.schedule_blacklist +( + schedule_id character varying(255) NOT NULL, + server_id integer, + timestamp timestamp without time zone NOT NULL DEFAULT (now())::timestamp without time zone, + reason character varying(255), + CONSTRAINT schedule_blacklist_pkey PRIMARY KEY (schedule_id) +) +WITH ( + OIDS=FALSE +); + + COMMIT TRANSACTION; From 905e06dbf80f0e278d2e9404c3252135cc7a7e91 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Fri, 17 Apr 2026 17:27:45 +0100 Subject: [PATCH 12/53] Add smart scheduling docs and claude config --- CLAUDE.md | 162 ++++++++++++++ docs/Smart Scheduler Technical Overview.md | 245 +++++++++++++++++++++ docs/genetic-algorithm-process-cycle.png | Bin 0 -> 24492 bytes docs/offspring-ga.png | Bin 0 -> 18288 bytes 4 files changed, 407 insertions(+) create mode 100644 CLAUDE.md create mode 100644 docs/Smart Scheduler Technical Overview.md create mode 100644 docs/genetic-algorithm-process-cycle.png create mode 100644 docs/offspring-ga.png diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..793a282 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,162 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +**Cicada** is a centralized, distributed job scheduler for Pipelinewise taps. It acts as a lightweight management layer between Linux CRON and executables, allowing jobs to be scheduled across multiple nodes via a central database rather than local cron. + +Key architectural concepts: +- **Nodes/Servers**: Machines that register with Cicada and pull scheduling information from the central database. They execute `cicada exec_server_schedules` via cron. +- **Schedules**: Jobs defined in the database with cron expressions, parameters, and target servers. +- **SmartScheduling**: A Genetic Algorithm (GA) optimization module that shifts job start times to distribute load across a 24-hour period, avoiding resource conflicts. + +## Development Setup + +### Install and Build +```bash +make dev # Create venv with dev dependencies (black, flake8, pytest) +make # Create venv with only production dependencies +``` + +The project uses a standard Python venv setup. The `Makefile` is the single source of truth for build commands. + +### Run Tests +```bash +make pytest # Run all tests with coverage (must be ≥80%) +``` + +To run a single test file or specific test: +```bash +. venv/bin/activate +pytest tests/test_lib_scheduler.py -v +pytest tests/test_lib_scheduler.py::test_function_name -v +``` + +### Code Quality +```bash +make flake8 # Lint (checks E9, F63, F7, F82 only, max line length 120) +make black # Format check (line length 120) +``` + +Black is used for code style; run it with `black --line-length 120 cicada/ tests/ --diff` to preview changes before committing. + +## Codebase Structure + +### Core Modules + +**`cicada/lib/scheduler.py`** +- Central scheduling logic: retrieving schedules, managing execution state, cron parsing +- Functions like `get_schedule_details()`, `get_all_schedule_ids_per_server()`, `get_server_id()` +- Uses `croniter` for cron expression parsing +- Contains SQL queries for the main `schedules` and `servers` tables + +**`cicada/lib/postgres.py`** +- Database connection management and helpers +- Connection pooling and statement execution + +**`cicada/lib/utils.py`** +- Utility functions and decorators for exception handling and logging + +**`cicada/cli.py`** +- Command dispatcher using argparse +- Routes subcommands to handlers in `cicada/commands/` + +### Commands +Commands are located in `cicada/commands/` and implement specific operations: +- `exec_server_schedules.py` – Main loop executed by cron on each node; fetches and runs scheduled jobs +- `upsert_schedule.py`, `show_schedule.py`, `delete_schedule.py` – CRUD operations on schedules +- `smart_schedule.py` – Invokes GA optimization (see SmartScheduling below) +- `spread_schedules.py` – Distributes schedules across servers +- `rollback.py` – Reverts SmartScheduling changes using checkpoint history +- `register_server.py`, `archive_schedule_log.py`, `ping_slack.py` – Administrative operations + +### SmartScheduling Module +Located in `cicada/lib/SmartScheduling/` + +**`domain.py`** +- `Tap` dataclass: represents a schedule as a "tap" (job) with properties: + - `schedule_id`, `server_id`, `interval_mask` (cron expression) + - `frequency_minutes`, `median_runtime_minutes`, `cpu_max` + - `shift`: offset in minutes applied to shift job start time + - `start_time_mins`: job start time from midnight (calculated from cron) + - `blacklisted`: flag to exclude from GA optimization + +**`config.py`** +- `GAConfig` dataclass: hyperparameters for the genetic algorithm + - `num_generations`, `sol_per_pop`, `mutation_percent_genes`, etc. + - `blacklist_schedule_ids`: list of schedule IDs to exclude from optimization + +**`pygad.py`** +- Wraps the external `pygad` library (genetic algorithm) +- Fitness function: evaluates how well a shift assignment distributes load +- Implements crossover and mutation operations on shifts + +**`evaluation.py`** +- Scoring logic: calculates resource contention, overlap penalties, and fitness metrics + +### Database Schema +Key tables: +- `servers` – Registered nodes with hostname, FQDN, IP address +- `schedules` – Job definitions with cron expressions, parameters, execution state +- `schedule_logs` – Historical execution records with runtime, status, output +- `schedule_backups` – GA optimization snapshots for rollback + +Database setup SQL is in `setup/db_and_user.sql` and `setup/schema.sql`. Example tap setup for smart scheduling in `setup/create_test_tap_sertup`. + +## Key Architectural Patterns + +### Cron Expression Handling +- All scheduling uses standard cron format (5 fields: minute hour dom month dow) +- `croniter` library parses expressions and calculates next/previous execution times + +### Command Execution +- Jobs are executed as shell commands by `exec_server_schedules` +- Commands can include parameters via template substitution +- Outputs and exit codes are logged to `schedule_logs` table + +### Configuration +- Database connection details from `config/definitions.yml` (user must create from `config/example.yml`) +- Each command may accept CLI flags (e.g., `--schedule_id`, `--adhoc_execute`) + +### SmartScheduling Workflow +1. **Load schedules**: Fetch all schedules for a server via `get_schedules_per_server()` +2. **Create Tap objects**: Convert schedule details to Tap instances; filter unsupported schedules (irregular cron, too frequent, blacklisted) +3. **Run GA optimization**: PyGAD evolves shifts over N generations to minimize resource conflicts +4. **Apply and checkpoint**: Save optimized shifts back to DB; record checkpoint for potential rollback + +## Testing + +Tests are in `tests/` and use `pytest` with fixtures: +- `test_functional_main.py` – Integration tests for the main execution loop +- `test_functional_cli_entrypoint.py` – CLI command tests +- `test_functional_spread_schedules.py` – SmartScheduling and load distribution tests +- `test_lib_scheduler.py` – Unit tests for scheduler utility functions +- `test_lib_postgres.py` – Database connection tests + +Mock fixtures often include a test PostgreSQL database or in-memory alternatives. Freezegun is used for time-based testing. + +## Common Development Tasks + +### Adding a New CLI Command +- Create a new file in `cicada/commands/` with a `main()` function +- Import and add an entry point in `cicada/cli.py` +- Add tests in `tests/test_functional_cli_entrypoint.py` + +### Modifying Schedule Logic +- Edit `cicada/lib/scheduler.py` for core logic changes (e.g., new state transitions) +- Update `cicada/lib/SmartScheduling/domain.py` if Tap validation rules change +- Update tests in `test_lib_scheduler.py` to cover new behavior + +### Database Schema Changes +- Modify SQL in `setup/schema.sql` (note: existing deployments require migration scripts) +- Update query strings in `scheduler.py` and corresponding test fixtures + +## Important Notes + +- **PostgreSQL only**: Only PostgreSQL is supported (versions 12.9–15.14 verified) +- **No external APIs**: Uses only core Python and database; runs offline +- **Cron safety**: Jobs execute only when registered server node is running; they respect cron expressions and database state +- **Rollback support**: SmartScheduling changes can be rolled back via checkpoints stored in the database +- **Line length**: Maximum 120 characters (enforced by Black and Flake8) +- **Code coverage**: Must maintain ≥80% test coverage for commits diff --git a/docs/Smart Scheduler Technical Overview.md b/docs/Smart Scheduler Technical Overview.md new file mode 100644 index 0000000..1eb7cd5 --- /dev/null +++ b/docs/Smart Scheduler Technical Overview.md @@ -0,0 +1,245 @@ +# Technical Overview: SmartScheduling Feature (AP-2566) + +## Problem Statement + +Cicada schedules jobs across multiple servers. Without optimization, jobs naturally cluster at the same start times (e.g., many cron jobs at :00 or :30 every hour), causing resource spikes and conflicts. Jobs compete for CPU, I/O, and network bandwidth, reducing overall system efficiency and reliability. + +## Solution Overview + +**SmartScheduling** uses a Genetic Algorithm (GA) to automatically shift job start times, distributing them across the 24-hour period while respecting original cron frequencies and maintaining schedule validity. This reduces peak resource contention and improves system throughput. + +The GA evolves shift offsets for each schedule over multiple generations to find near-optimal start time distributions that minimize peak CPU load. + +## Architecture + +### Core Components + +1. **Domain Layer** (`domain.py`) — Represents schedules as "Taps" + +2. **GA Configuration** (`config.py`) — Hyperparameters for optimization + +3. **Genetic Algorithm Engine** (`pygad.py`) — PyGAD-based optimizer + +4. **Fitness Evaluation** (`evaluation.py`) — Scoring logic + +5. **Command Handler** (`smart_schedule.py`) — Orchestrates optimization + +6. **Rollback System** (`rollback.py`) — Recovery mechanism + +### Database Schema Changes + +**New Tables:** + +- **`schedule_backups`** — Audit trail of schedule modifications + - `schedule_id` (PK): unique identifier + - `original_interval_mask`: pristine cron expression (before any optimization) + - `previous_interval_mask`: cron before this optimization run + - `interval_mask`: current cron after optimization + - `start_time_shift_mins`: shift offset applied (in minutes) + - `snapshot_at`: timestamp of last update (auto-set on INSERT/UPDATE) + - Indexes on `schedule_id` and `server_id` for fast lookups + +- **`schedule_blacklist`** — Excludes schedules from optimization + - `schedule_id` (PK): schedule to exclude + - `reason`: free-text explanation (e.g., "manual request", "irregular cron") + - Allows selective opt-out without deleting schedule + + +## Data Flow + +### Optimization Workflow + +``` +1. Load Schedules + └─> Query database for all schedules on a server + +2. Create Tap Objects per schedule_id + └─> Calculating properties based on cron schedule + └─> Check whether it's supported (blacklist, irregular etc.) + +3. Run GA Optimization + └─> GAPyGADScheduler.solve(taps): + ├─> Build gene_space (permissible shifts per tap) + │ ├─> Unsupported taps: gene_space = [0] (no shift) + │ ├─> Frequent taps (< 60 min): gene_space = [0..frequency) + │ └─> Infrequent taps (> 60 min): gene_space = [0..60) + ├─> Initialize population with current solution as seed + ├─> PyGAD evolves population over N generations + │ └─> Each generation: mutation, crossover, fitness evaluation + ├─> Calculate fitness (-peak_cpu) for each candidate + └─> Returns best solution + +4. Evaluate Improvement + └─> Compare initial_peak_cpu vs optimized_peak_cpu. If improved: proceed to assignment + +5. Update Schedules + └─> both schedule table and schedule_backups in case of rollback +``` + +### Rollback Workflow + +``` +Rollback command triggered with server_id or schedule_id + +1. Query schedule_backups for affected schedules +2. Restore to previous interval_mask (or original if full=True) +3. Update schedules table +4. Update schedule_backups with new checkpoint +``` + +## Genetic Algorithm Details + +

+ +

+ + +### Gene Representation + +Each gene is a minute representing where a tap should start within a day. +- Gene value can take any value between the min and max start time +- The gene space is limited to the smallest range it could be and then extrapolated out when it comes to evaluating the max cpu +- Defining unique gene spaces where each tap has it's own gene space allows us to reduce the search space considerably. +- By tailoring our gene spaces we can allow the tap to only traverse a couple of discrete positions, this makes our algorithm run as efficiently as possible and have a more comprehensive search of the solution space. + + +### Fitness Function + +Inverse of the peak_cpu since it's a minimisation problem. Peak_cpu is calculated over a single day since that covers 99% of all taps. + - For each tap, add its cpu_max to the usage array from `start_time` to `start_time + runtime` + - Repeat for minute + - Use a difference array for efficient cumulative calculation + - Uses only the maximum CPU usage across entire day + + +### Crossover & Mutation + +

+ +

+ + + +- **Crossover Type**: Uniform (each gene inherited from random parent per individual) +- **Mutation Type**: Random (randomly select genes and replace with random value from gene space) +- **Elitism**: Keep the best solution across generations (default: 1) + +The creation of the offsprings uses different methods to change the solutions, however they must remain within the gene limits. For more information checkout the official [PyGAD documentation](https://pypi.org/project/pygad/2.1.0/) as it will be infinitely better than anything I can produce + +### Population Seeding + +We seed the initial population with the current solution as we want to **prioritise stability over minor gains**. +The system is already quite imprecise: + - assumes uniform CPU usage across all taps + - rounds runtime to nearest minute + - assumes consistent runtimes from one tap run to another + +Because of this imprecision and a natural desire not to overfit the system (e.g. we don't want a solution that minimises the peak cpu unless a heavy tap runs a minute longer than usual and then it clashes with another heavy tap) we want to only change the tap runs when it offers an actual advantage. Shifting schedules occassionally is needed to minimise the cpu usage, however shifting can also cause missed tap runs (if we e.g. change schedule 13-59/15 * * * * to 9-59/15 * * * * at 11 minutes past the hour). + +## Configuration + +The `GAConfig` dataclass controls GA behavior. + +Tuning Considerations: +- ↑ `num_generations`: Better solutions but slower +- ↑ `sol_per_pop`: Larger search space but slower +- ↑ `mutation_percent_genes`: More exploration, less exploitation +- ↓ `minutes_per_block`: Finer granularity but larger gene space and slower evaluation + +## Schedule Validation & Filtering + +Not all schedules are suitable for GA optimization: + +### Supported Schedules +- **Frequency**: `x > 1` & `x <=1440` minutes (1 minute to 1 day) +- **Regularity**: Cron expression must be perfectly regular (same interval between every consecutive run) +- **Blacklist**: Schedule is not in `schedule_blacklist` table + +### Unsupported Schedules (Skipped) +- **Irregular cron**: e.g., "0 0,12 * * *" (runs at two different times) — frequency varies +- **Too frequent**: <= 1 minute +- **Too rare**: > 1440 minutes (more than a day) +- **Blacklisted**: Explicitly marked in `schedule_blacklist` table +- **Parsing errors**: Invalid cron expressions + +**Unsupported taps remain in the fitness evaluation** but don't participate in the optimization (shift = 0 fixed), ensuring the fitness score reflects realistic daily load. + + +## Checkpointing & Rollback + +Every optimization run creates a checkpoint in `schedule_backups`: + +- **Original**: pristine pre-optimization cron (set once, never changes unless the schedule gets upserted) +- **Previous**: cron before this optimization +- **Current**: new cron after optimization + +**Rollback Options:** +- `--full`: Revert to original (wipe all optimization history) +- (default): Revert to previous (undo last optimization only) + +**Selective Rollback:** +- `--server_id`: Rollback all schedules on a server +- `--schedule_id`: Rollback a single schedule + +## Integration Points + +### CLI Entry Points + +Added to `cicada/cli.py`: +- `cicada smart_schedule [--server_id ]` — Run optimization +- `cicada rollback [--server_id | --schedule_id ] [--full]` — Revert changes + +### Command-Line Parameters for `smart_schedule` + +- `--server_id`: Optimize schedules on specific server (optional; all servers if omitted) +- `--dbname`: Database name override +- GA hyperparameters can be passed via `ga_config` parameter (advanced use) + +### Database Dependencies + +- **Read**: `schedules`, `servers`, `schedule_logs`, `schedule_blacklist` +- **Write**: `schedules` (interval_mask), `schedule_backups` (checkpoints) +- **Functions**: `set_snapshot_at()` trigger + +## Design Decisions + +### Why a Genetic Algorithm? + +1. **NP-hard problem**: Optimal schedule assignment is combinatorially hard; GA provides good approximations in reasonable time +2. **Flexible constraints**: GA naturally handles irregular constraints (blacklists, unsupported frequencies). It allows us an easy mechanism to include these in the calculations while not changing them +3. **No gradient**: Fitness landscape is non-smooth; gradient-free methods suit this +4. **Mature libraries**: PyGAD is well-maintained and configurable +5. **Discrete Runs**: Works well with discrete times + + +### Why Seed with Current Solution? + +- Biases the GA towards an existing solution space (there are many solutions that are equally good and we don't need to explore every single one, this isn't a problem with a clear global minimum) +- Biases search toward improvements on the current baseline which avoids "thrashing" between very different schedules + +### Why Unsupported Taps Stay in Fitness? + +- Ensures fitness reflects real daily load (including irregular jobs, blacklisted, etc.) +- Allows GA to account for load from non-optimizable schedules +- Prevents GA from misoptimizing around missing jobs + +### Why No Frequency Constraints in Fitness? + +- GA gene space enforces frequency constraints (each tap's shifts are within its frequency) +- Fitness function only evaluates peak, not constraint satisfaction +- Simpler, faster fitness evaluation without redundant checking + +## Performance Considerations + +- **Time Complexity**: O(pop_size × num_generations × num_taps × blocks_per_day) +- **Space Complexity**: O(num_taps × blocks_per_day) +- **Typical Runtime**: ~5-30 seconds for 100-500 schedules, 20 generations, 40 population size +- **Bottleneck**: Fitness evaluation (diff array accumulation); PyGAD overhead minimal + +**Optimization Tips:** +- Blacklist irregular/infrequent taps to reduce optimization scope +- Reduce `num_generations` or `sol_per_pop` for faster, lower-quality results (specifically for when testing) + +## Future Enhancements + + **CPU**: Add CPU to the model. Needs a separate mechanism to capture CPU levels so this is likely not worthwhile implementing diff --git a/docs/genetic-algorithm-process-cycle.png b/docs/genetic-algorithm-process-cycle.png new file mode 100644 index 0000000000000000000000000000000000000000..04dfa4b46898cd1a6c6b489ab138108a6d4ff01a GIT binary patch literal 24492 zcma%ibx>Ph)Gbh)Qrz94Sb^Z~THGnLKyfIgxCIKdxCPfB!QERdIHh=Smry9~ZZG|P zZ|2Q>f4!N^&CI#EM{@Srd#|J+@MnXcuRaH^YMM6ShL_$L5z(hf`Fx;*( zAtAkYS5=VJ_dz~fK})8YdF(npJ{}uMx{Pfj_=<$}Nj6G62ogscW%hMlw_QZhl)Zl? zx~=UyDkvfB$Nk-rzpL%hG5D=qvrJyrN9(q0_wCQc4c@C+TN-yO;;vq#p45YG*(vD% zzNphE5SKxl0mQ}ao$dc_0Y;AV>h%wp>J=H-gw?$sjlo3CC2j&e$o0|pWOnKFn~qL9 z7Qb?GuvO^0Rv}YKkU=HLd5Ab!z<}2=HTlJDTP1HRC4{k|9cn;(VK17xa!+-eU(&TC{m$FJqTB}dkQppm{ImoJ7=5r8={>h& zEp})l)5(cGp@X2z+8RHf3#^oEYzI0V|W(8BCt}X0%lgeCNKdq&oX_ z)m)*|@38vL_TXV*gKFH`e$qMJe?!(Py1}t@TM9TFN2MBF{B!3xo-MA&VeFl){vpK8 zYg<9zVMzaQeq-`Rr?mb|lydN=@+T!O?sxdg;P%+pnhk&WLkHGgF^p`ePu)(WDgoPUlO$Fjxrts{92eAIU{loAJqF@HwNkwkNHXhxXBL^<^s zqfX!J_0qj|Q&fxz4a_JwU|)RHc!rt{CEw%=_22}fNE}>*M*wk&8;(I-;6FzGZ%ZL5 zRmdh+e$+tTf43@;SSlwRwD!`WL_42TF`YafT&6W&p!NT=G`od93dfSa-g67rUX!jh z6TWnXr2Qr%+aAj_SD}Fy6-1?!^7^>HO^4kA3~>i_qDS`*gflKPQ}I|Tk#O>&&Y2wf z%H5@>eZgv{NQ=&#G}jCZulW8O^&z1kwKTJy_t1MzPrPYzE`(Z9p8FXTRpYVQ0eg&x z-Oe$aGSFhi!TI4Lc^`Ln8`<{l-pp9bn!JiLEx@Mn&e9B2TXg0%e^pQ`ZDs$H`g$P- zUP@^AW|!f911R<+G?!kSm{TQzbWIMd?`)aP6C@5sE*yl5wc5Hpj^i<<8Gx^SZ%kmtIHI6uZ_ut# z@VE9(GcM zgT*8R+hWZ)GuI27nOdLyPl*y|A!s`v_ilzm1LISKXiO-ls+Cu9wbb_G=pS4+FDcin z8VE4ySLb$2%TDIRNagbAoQI zO*VXFK-^)UtDuqk78+E1q;iUmx64O=bcO;hOVF*355WYHAK#1kM$dX4d6-E%Gr;HY z&XXdL$n~HWP!?nqe2i|{pajrWbo2brsdQ=FqFnM2W?(&ob8bAj#4~hlF4ED_aWHmD z#3HdvTU-Y!Vhk148it|{zh>HWX%AdoJL+}3Sv)`On;Z%Z+IMYy@mnrrXO1^Iy{plM zSN*936BtbXly!Q7f{z*mmwhT0r$s;eUxk0bRxQmrixl_u!8O-S!(F(=8pAB zR@Yr>JGMzO#ySZ2%-c%FTR8%|4|j2%n?8IBV|EDMY5{qf%W9N-DkI{3oAcTCE@nS8 z@Hxz{yBewvz8**O|C8fh0fozMG5g*O6_)~TOB!_9%sH!CRV^`0y_M(_!4g{+?dPsq z;2<+AaBsIf%V|~;5mZVlZ~zzQm7wReuda5;3)YNEN3OsD&dDPhk4LQc zw*;nAgjU*yECfO8hNs?44c_J?xc-s>r=5F+UXfD1XaqyNY7cimF5Xvbfggl}rgHdb zFHw;RrpoNjO5JsXelqx8pcU`8Mq_W_EeZ$zHrVYJW_kWxKj)Iya{KCOdxvBVSEfu~CSS>x`4N_X&Srtz@V?}&1Ir1AZO zNjO-(UuAkzWOSVrNfq+o;*GNwP<^!5{iFm{1G~9BEM$oVrM7GopUpN*Qt|TLfR^F$ zVdIhH6jeU*eDul*85R1uW9|Dn!~9Q3q8rLl>=$lS634B{woHjTYNCnQO$cC1w%p% z@SaQk+ZEBER1oY!@wt)2W^!rVkII>6_v7}Wds8{$cpXP1Ta^5n2#e5F6V&bOyeIfubLQb8LLxh!LBWc$pZg_ln0v%KSY%&w* zW|Xhkx@F`tC*}Eb~ zh-V(CWpHqVa#OeLWI*nr@V&oV|T%T65&erdDx< z^M$1I>Elt+U{a8T*gSmwAk?U^)ekskJL@F2&uy|}ybC0^pbTruf*%1Ys<7X;rqwdi zo)aiR8mm%Ai*w6e4N@dHX_)pna2ymfJl5Wv4oYWT9S(6OY-&cFFooDUb^xh-&757? zVt%iBqUFZ21yVUT7}f57Ee;fBep7S^_xNe5UY4SPibAgf1lD%jE+`+(?q!$?3f$ey z^c_K?SA9sEekwoP*u{0LNqkBs-fr+KGmN7#le9!(tVGtO!p#;A=!+T5{p)|3vKvQp zYa_U}8|Lq^?PMP}qvCS24c9eJmw3<+_)$2PLxTGZxSlo_b5E7FuNj$1%ed;mw=Q{m zDKtMnmX4lxll3W9b%~>){b;`*96)%`-m=wn)v~~HzFu|7)-Zpwe~mxs8;S|?8nG*N z1o|Vf%%8`5{r<5$<&AIG@XW3S_D^zHBZ-g-&EQTtQatBg$TOAn0bym_`SSDL ze+}+?AmI7;z2I+K@Q-lbwd>gxOoCL~$w5(5Q7YtG_rV(~*S9bva?)s~m|DL`;>)9s z50Giq^KBR>~G}RWFA3iUkc8ls|2MH3RgS1$DGTB^6kc zI^arE)$V7!JC4+$9~2Bs6!`XCKek?|b!aT6;b~J+HFtQGYwK;jm4(?`)L$I4TEG1zy%|?ZB;fMk>Y6RjM)+CA%AntzD72 zfxG6|2VXwV_g3KhTo}!&J_^*)osz z|KxT~jin>JoI!1WoTpKaM$Z7aBQfaSIwOmB;x~5C&SRl4cy{llqimtp+FM#a5#Pmc z8v=N07Ky_4JInIIis1Gn%7ECa>d6fF!{If~vpYCPoW#^v8Kqot`(}PiBz3*c|B>kpj={(`&afyAyFDF`aiZ`1E({{fn)ygWxU^^`1L) z#qPz7TNXYVY=IbD7GOi@kUa6z+UL&>Z+N55)vU=lC zm2KcZRCj809AZn?+CP>{^57WM9+o3DHkKW7>dq@!-ejPn?|rMI zb&lx_5cl8j?`B;}oaf60=sN8T%1g)kccwmEPEL~XD)oNuqpR7BR#G)u{ILl8lGWtKdL^}Kr}ti{|Xvi!N7Ly*PF6_#gp}xl@>$!mz-ZSs%3hH9GD5QW5wW%ZO^b zGD0@YZ}D?#`QxcYdFkV+PlYB}Pb zWRc#+lEXgnq0tvMvGt8ac?2aHJb^VuadTi%JN-$YLy4y3A7eoGW)CTCKN|UZZcNS$ zIC*N18*!}XG>O+p)5o0t<>RL6$L&|MM>J@F)p7T1LVXDElx^UV?|fEx)gX__DdVtB zt>`Xx`iUlx<>&2xxU1nyP_pZ^f!H>sGdgj}w_4GhtH#GYpZW$_6OMm?KXTt=>%He5 z+&*jyR$6psChq*kVC^dm@{b45>A`+Hh!=yi)HZ6z>GF*LJ^51#yI_F-?x`Q@Kl;_q zYRFid0+ffwYI`+5j#0h!;TMV18vesRV||dOOq5#FZicPUMU5K^!+)Ct&*KZ zU=Zk@X!72nh11Fy7zei%(44Use|SilCa#mz*nrQ20M^8(VC88jIK`$AJ(Sq-VoKobyUp z6&nPoOlG0rOt=8jLuJ@q&o$Y`IL zZGMf)h9USCpN<#$$fqpi>1VB|8*HAYE;^YpqlNa>w6HE0KZ%!B%$Vf8ZZl< z&|qH>jcJdmrA}tepa1D50@(4y*<<6wvb>!at1&Z@LmKpWr_A}C9uW0GO+vzwAe4i@ zn{@ZH4-X|7;p?|7hcf6OFZaUrej}E8Qbpt1JB9u? z+NoeT|BGUGyqaUyhzU?6R`c5U7TJV^f}q%$T>0RnLZfD{YklVAhonXbs^z!&hy_=+ zr_Wp#GQ+IE+?G!mRk?lG{60C4ABOdBY*=#%CiQ#x#W!K!*g*Bmh<`zKhb>+#Y5qn2 zH^TOZ4^Vjmd4asTIOl}4?WXZJGYPM zj6I8E-^DgQ(*@!DyDH;RT%&U&L#o?Acd!{C6B(%0%ml<`y zNpdlb4#zG(OwJOf+K&RW6yK#F=oce4{JFXwz^wg8&9naFln*R~4jr-Ehv2}o{=fTp zo=?;gnF4618grf9)%Pr3e=F8jRBS2PIm2h?|7KOr30N-rSBj{rfc1)&7$Pi1MR^2* z%eVQ(>}(|*oFz8pbKX@mjK(l0Ru`?JFZr&_CC(l)C(4D{(hDm?Z<1y;&~5tdl^R@U zSesQcW@&rWY*iXlz1o(hKb@BHO@`Mv*ifhmY+)43vmtVl4liH+9nC6oxok8W8(6kd z;g{l+9nY&o)*DPcp?2NHgW888gxjSo<*~?QNmNvCxD&K_53V|wMjno;BN3v^!MS6D zbk**Whgfl}B=m5rCNt8G#*ZdsC@%P$@?j%@a;&kte0}lsjU-BdB^nxwZIU>UR1@)^@=4LhZPRl0kOaaSF?Se=K!PzzXA?zgB+mBOi_~6uT zw+y%BpLB~gT1;cLjE$Xqr#ElrnIm~cMl}JiOGGc{)K%y!@tMqn5UGSUAy+J%Jlj^G zLTPb>3?OT5t8f}NK{xMA1;2SDv@qj+cpY7DRjKI@B0VLxLwJ8IP6~ITjRz8!kpq!j z0yHW%77mfoln-Jz7HquNzB#rjB$iC#PlkKNhzK<*ED(nva-0@+@`g{zgOdH!BqXEQ zb{=rex+De&+IhaNNn%1Rxr=Kq;ovio z0m7TvRT0>Ej*ut_ii|DFut^rAtkn;=Bhr!Y;J~Th$1-l&P4Xkj;ggsMOESUP+TLxn z``7jz!Q7VS$f^Ii0q`kLrd*F-(m1lpA^k45)xn_;-mH9!DkF=87iPc}cW{C;vVF}c z#Tkke0?$R6crd9jATia8jGauGdxK^p7*S`k8L4}oZ}^f$oGRAjT-3r8Cds~{MwA6l zp;yH>6qH5kRb!M(NZ8a>Ehgx8Ysj!wuA_{lB<;M(%Xu@9I?jl#lSzdv&A{T);dzG+ z6`R}#D6_5Y9w`_%V4*!i_Q5@tQ@uM8ox-fZqv0v`R=d0=ko2=Z{r3LMwjL| z$LrDMGhyy56t!s3FLm0_x@WcUlNZ$9B?}1Kh{lWW44~67r7WBrteKU(iMgPRQLR^U zBdl^cs~zzqbMhT_dYMo$D(S9%bLv#OS=pcAR*Adj!%BHZ;pmXjr*teq0Tfin_WN4< z{c~TMl${-2o0gMZp;|O#q4gkV&e1@0%ru)q+2saSsG@ciVwyPS6u@($CvuG!uTdL*B6*0v2Z&3QI5UT0bgQ{&RQI+ddqM<-y$2}5LDCQ~@ zY#k9Es9G=VgrYq84*4T;w#c+I_n2u0h4Q2b>SxsLos%dNd68!Qs-oP&(tZ;s0XXLW zE=?df{{L01`2T4k|5perF|ipS=qB$AUKgoI$7H1E5@6VIm_-!6ej{9P%v{j<7MNEB z?B%{U?L>q@!{j0c7peFj(Xx?;Xjh~4v^>+=L4}G%=p)CnfgaA+FBaxLe481=dvJM` zVAvsJ+#o77_Ik^u(|UCwXk02V=qjg8Lno`x^;v^;tiyv0L6@Qt%e)MJU7ja>^Gcr^ z`|OdN*r3sJWRqLHR44$?>2Q;;eyfNWb>p5pkF>;>u;vRq6GSA!+K;)vTh+Ny0U|0W#sbS~hKLk==IRy1tAa zDu@NCum-vNTpfuQMh(rA8&C^pxj{-j@6vJV@g4=PCoZqrY6^JY*GOI`oP^pzJa@J> zc6=U%)oDC+HiHXK3A?W_AX;sLfdhw;9r0Z_G8*BlWre(NY5cACwwxZ{diPSg7SiYq zK6866$Cg?1^5x7SYYcuWv*sD63b5OYfMyz2>F~P!%CLZr;v++TxNOd`6)35Rv*L*| zdG^GX>A9X@93e3l<><0Kj4s)yP4kgq_*2L1zyU_(UeghOdzIbeS53V~Ggk8gYf%u) zu{w>v;@%G`P&nAmv?cV;S8Fra8WbcUEO`?*ok*sp&>CX0An*>0+@>$N#L1}Q$6&F|<-hy9 zhv_;`LjG)clHB)OGz)DtSXZJVr&-obsnfT@%5;GVKQ~5}<)5$6KXFuvL!Q4V9t|lA z2VTzL~|n&ng(^1!A8$3F=;9^ zZr@wFyIQHnhb7(HI2_fTbxSNrI&bY)X6*jtwh8!-eDhJ=osjfhC-_*-p9W@h5!+t7 zdMEKoV>@$7qvh#IsI8Fjz|**P$|3{$9`y6Frd#tKqm3qvBxvHO8weef`eJwWl%*G* zW4U-$&-8tvjL02NB3<;j{iMo>;PbMgfHI-4V^tLooDG2O#3K^(4p(KBA?01 z?TgkL(ZTKha_@D7>qzk1_5){e7pto{PK^>CpU#$ zGBl^nkr~!ZGefH);i7g_hN0qcvEaWKK6(G#-5d8b(zvU}m@@yI@V7$iQFbY>PoVgY z%9*6|L`c2riPS{7qpj|~)3+A) z^1bgH--`uPolka}42CtVSWQo8RBT%p3VA$`_L45TvM#>9?Rs$_-Od3*zD4|1wI zS-JaamYhf__RIEReo8w1m^u+?HVm?;|I#811MxrNRa|&pqU_fvqxgj%V6bu$O!Y96 z9O*%igIDNI9a8%a#Y+2*9tWk1L{|u;Lg-KXbG_UMu-Wi8RJbsHP3W+dH69A^y||s^ z)B>d?zjA2`DBhj;PG6Sdj@f(iIH#wK8#fsDdpYQZkOE<|v*y)ybdxP>FeNB=cOss& zM`y-F0L}-VGk5d2TV5i9it}7|y**9FDt5mf@wEHkq$7j*vV08#$#wY2>x?~Mm~n>H zbKSB@_(|~TL2*yQ`K7&Y%;_mk-MLOzLz43T@48M7K_`moxm8Osez$4{5=SRHbd;vG^iplbmb|*`&_R()t??9;~xLa zG!MM)LmtHNr^yxh`)fHme*dENfr|EAh$75js;DAIvUh)i*Yh-|SXo+7MaNRY(f(rh zKC?`f#y=4{1Kh=NZPptYSi?o5>bNdQPR_Il?3sHnr_Db(5r1~<$1{Ik`=RY|Ds9C< z>p2Hbo4@V)0Pb}kmu9CCFB$*2y5_g`;kgju`t-5Q7;csh$esOVYOg~C zuA)wI(z@$aeh&z+vUn}btp(Iv^b4!WVzJAbUtB7qJp)mTkr9%aRoDsaL6%y$Pi%$U#Tm~}S!J{NOMLEG5Uy5#$30?@RXNR zU)8sb?Wwb&R6+U z92uRN1^C{lj(MjwBNRuiN0-_;*GCuc;#U6a%|cT>1Z(>4UC_JWZ+C$}Ao3v+-VEY~19#XOq{XDto zi}@?$vZTq2wA1#eli987^eK;~$2->!cRQlIzA{^+y5k1&w}H%9Gh`<}b)GRV@52o; z9Cp$TDkQ|Og8!Uv8FJP14V zk;X=#^C|aF_prc^O6Pj!($Y$EEt!Kb|j+3kfy!RpuAZPw$6g>yOYP#?L@ULCMH-R=4jJ&gN2t+8B#1)k`|a zs9(r3#K9{sGu)`Z7F_7QdyP}7vL^5` zw4&=ol4G^c8HmSg~xyM{n6hGyL~1-{#*3%5vOPl3+Be#Fd8&s z7_V|~K2rP)s)m+)O$3%?eGO7}K&KwGyZk~8)m}VS*B>R+X&Unlr5GPpYO$B)Eg$^K zNV}h1AZQ&aaHCkiOJfy~^I3YKtZ~uM?;+!dQppoW*z@+EL`SwwP893}b|ndXTPWmP z2jo&nY>ywnS}Ei1a|>B4q)%8gM_*O5ZPoE1rcnPA7lttI-rDJ#Xq06YyRXguPSh1&y%1Xfx{En-7%NBJ@BgjP z=5+B+4Dr0kYVf#PLULbKwA%HL8}rL51u5xN<*+v;ajaChT(Wf zOj?#@2^EcAey2}BP!o35rd}Z{c5i^c>&uw2E9S~>{NH>H^;ZD*3maS8`CP0~!QaJ> z^>!P1cX_7Yrg-_|8|^;C6XfS%2*JwvxC;1Ei2f$i*9fGf0C43ei%q4n-%%7GWm>L^ z!%pX-xH3E~-MI+IlqC2#Z1Ok1!o2YN%~b12EHV|ahEO_71e1P^y}-#UKbJ~;M{(F} zeYFo+R4bA6ggxT_PtC5XqW~`vNmZ?i|!y2OikqlwpJePVOL-5X2 zm(Ud}A`b#8|ByxGuT=MWik<1^+#@%^e`zRM10xX% zI6_0!nnH&L?sF-V2e;`j*aY1s8lPu>kBEf$_=*HGN~%X9sB(p_U<}(+82*bUYgdQ| z&aS_h4V{B6DSP?I^Ghtqie@PWdeX;kKU;Lcq?^Q2+sPfV2xKP3eYK4NdM*&>AcT4H z)l0`4WSb*@(44H;0T!*#<$d##yj^E7J5-^IzTT%hY4Z{@>c)l=!9PNlEOjL&;#vul z|FAqs5a40pGv$Ui7+L__71~FeU#`V*d6;kPs?9!5? zPe$lMZ%bhCS{Z+CXy7@|3KLaq+mW(#>^^C<4#ydy%90K3E!d$#5mFg&2jd#y^gjua`u;S$eQHpsHm|_aD^bu_Q{fDsRrd*1%ubHHqwH zP;AvnP|J%*Y(^UdzVzH8EgQDV;521bqFFa2aor1FU9 zWVksMAj8MK1TF&J7LiJg8n3(Kp3`UpUdRSFG_RtpNBBIVLp0lz&Y~O=na#6l1HMh- z4UP7g@PX#!cLEBMXl1!M)eu$Kr-7zUG$k4D z0PdBsgRh2`0}By!9499}T9GdyJ_q}iAD#xSUl<>FuaRvcK`@c&2CaTfbVHfwi);Hw zJ5Bl##$eb>tl*pCK(?TU_bhm#22o~AAye7>aQor`%^dRhTUOeq#2hn4jl$CU=94#v z>g`Wve)|`EXFtb$GZs}pI8gTBu{((&+pvI&Z!6w7s0l%~WXyDkW2vp6_evE^$xWe{ zu}mD+ewFYl=(o9w8izSF>(7~Xw$uiD9I+5>NmWg3xuDf~B zEpgc*(h62y{i@frN80d5@Y;o9N18Oxc(atPJ`VhhyH~L50PK5w;;$HdI+s0m$7@v$ ztLQdO>fTAKL6!rYV%LJCL;Kt>p2o7{J(dkVIQS{GK3Lr47K0x(D+Z?ks9}CRrVJ*| z%|Ru;fZkIRb83T4u?UC!Fb^^cA0}X~W^4F@t3&lW*r7vHj>lpa=xzF?5H2dJ$F6ia z?8M;l_SwIwE8gqfVl-C)?)yt@h=(;d-Rv8F;po#j9bt@QbZPIGXNCLD>Pyjd2kgQ0 zC~`?U+Ov#0ETiA7jbo`}9qNdu?A{DDd;R4Q`@=UnCM#c+Q?5r>htLdgXw|2GAcWbf z@1C*kdLTv4BHZMhxmt^i1P2AZ`Ky#y{_92d^dAr@0qjy`w^kU~4WcQ^t0BIU4+t^T zZRNO?O!yYhweJz^A*m+zoEc*A!B{0qxC?d5)BMCjw^bBKASCZat0NkX#meH+QWaEq z<-cAcYs}1^3LC|j2YT;*iSUpJrU$dMH#RP6h|zJGlTvRSF)soR{u&oy{Y%hZb0@1< z*e$x$=+PF~N=OqL=!p{$)32;Je<`_;)Dk;lU%?YHlR!AyX`ZQV&e|pt3(|)deI1OYc(RP9p=7)n%yc<}4@mz5sHh?&(LY*rYQi|Af zNpFu6EtJ*LIv|6Vw&&qRJKgK~D)GIiXOKtD`lAKdwT7;_N+SrDr?~ftK6>Y4)(Yn% zZ%XyO3bfmHe+TkW4)wh;_@eGYCqDj zK5kewI{%>g(oXZG5*SEYg|6xrg#V_}Ki4qO2`XU$efI<2jtAM{^YNV~!oJgO%zq_q z1eI7t_?ubHW_iup_dGsn*84lUNk>VvY$`x+)W<9Ag4a1`A0Lgvb7c;VKMv)GfY~0$ zlIgX=A8j@WXlrJyZ92~1sP19;s4TvcEPBEW2dfu^UHLPi{EfLLqq*(*qfid5cW%}iN$xtV>57_pb?#&PA+_f{jUKO zWnQ~PDk2uSDvQlhP_cvkIkWwhZhPJ1!J9YCi%25@vA8_F!))cqrIcm8;|!IkMY-C7 z1whCeC9iBXVVH-wBddM227tAkRX7W*=JD=CN$d%1`h;toUTFO(Ve~jjDAeQ142g1Y zem~MZ*mg&~Iiy{4T5r#G(}a`I!jf+=>Q24L3zQyb@Hb53MhN9v4skx1ftp1=cyT|i8w5@{J=`B z7_9Ww-NVW+H{nQ(0Axf-jPE;Xh2u-cd6W^O`*m>Lct$nRAFa|mlIKUvs#>5j zTwM6u3I)4oTxX`}9n$4yu6E+x}Tx29T2;Mi?l)+6@iPvc_L zS$-Zpvsog0(VjcdWaEw;ygTl2=u;b^Eb!;aM&$bpLPHahI=q)oh3WJWo>kOq0r5{f zP-(?rd2t$`Pxk!uJF~8StS;GEtsJLpjM#?8Lme?^7Fmc;9M0ehG16)B>)spPh7QD{2c`Jede( z;w7n(Y0xuPPz2>nrxMKYEF^vu=lF}JcIi{KrX$N@Um+txhEU!?Rif?+MXSiMBGya` zMd!ILU+#K=*20(D?|PZ4gTDPp{ZYTMBXi!Cbfn?c9jt?k;R+*coc*p0j${@qx5XTm z=}tIezTHpA$TxsvV)t`SFM9w6IEkSO?gA4LsVa~>u=Aq`=D#X3qsLy*#Zq+X4rcW3 z*(>AIjxfJgY&_R8*S&~jXU43x7a?$-Owo!96pvC=xpN4o#E1TCm;um-o8t|(ob@eM zCeC0jrKp9}=A??62q4;q8!nK(M!eJ!U`ygbf`mB z5e&p11}RL;;X#P1hYTSXr!u1QuWR2pa`#C1GhTo_mL9Yx^G}CGofOV&&-^u+q2fU+ zPE}=YE#tBV@WK`GNx0-m)x6&zt9=JKEVi9$Q@uvDG!gOAgt%Kl?eatgS#llr7fc9Z z6YUlM>xh>w2{qr1nc?^Ix=L%|!kx)(AYs{gNt4SbXt{6PR4T>2k5HZRDd>X+SeL>luc--(?nWuZjs>P) zukRpoY_6w3E2+Lbz#DpeJuR6oZLRL7k@qdZ*S&mWx;Gt!n3QGo?3lpgJc2e7OG1RF zLeP}Fkz^D0j0O2;=hPer+*KI6e}_Ge!tF8n`V3tFO9{9Cd#FzHl1Yu$u$3#lZ*4oWNiX-VK*i+eXLfRp8_+aV9BD=|7mkw)&liImV&+{eKSg4hL6c$IJ?IKH-huh4}C zZ%c&J?`qn@gVZXjJYXGSSI3zd439MrL+|=k)*6bIbi{UnG{8vBAj7*H&K0~D8+Ib5 zmK_$zu{$J|AOPB+Ilw*afp)|nw+4gP<&|JT1^1g-#d?H!WOO&fVTzDg{G=@#gE>g&JW>DrDWYCz zFuQ*=Wlz}Hy)_9y7$92>Q!J|zKE5{adtnH>wED1A(mT;ZZ|kSQ8#I_DZ_cw#AVUZv zwx8n+`IiQrVL4NAiVcj#1FY|37^?S@RGm5}AXJAhBI{cAdQtxiBLs;(Rr<2_ zT+!cOcdm2A8omX2h0+JVRL@5UE=JN2EEQ!Vr~p^B(RLtc0y9Z2VAB+^Nd}4HQ&qio zATQPyiV_KL(8ney0AwcY1(RonAYOyRyrm(a9N#}!+D)7jl=%$Qn8?6WEm?s~9f@6X zXai+rkaIks9*ahcpVW7^O{MfXm5}De0EAGP{^ugtN6w+qIwlM~;TOp)Vc{T za;-F@5!S{X)JY%@JA6v{H#84E0e$0czm=VCjp~nXbz50jkx@ca5jm+JO{fo&PR5v@ zi65BN#oOvZnpBykGS5-s+oOuN@=Hus7RCMio}jFI${{b9ty4P^fx?L{M0}IwR#X%9 z`+zP(4c*%zL_z*ce9|8D^v6Gwro!44Rz6w3kz)JE2^pt=9cu`zqc{9vy7`ueQa|fuPKSb!McQ^qr8mxm^vN7vCi}h;uVE$D5J%DeA1gN)m z1=Ytx=E{KYh1h$(q>{xR4NSE&E`VYbMBC-Bn@W(AY-Ff!397!a&Cjp6J4y6XDXfs;ND-gSZO%(~DF4 z*Z;J%AjcYu%+kyOzJ8_sX1v{}nmfG1VLx!d`RhASH#MjDUh*^$^$ko39^2Mf+ zRz;Xt#La39DfM-kf_2D+572k1E-ET;=*!!d^ybj?rC9lju<3{#bt?913~Pn^x; zFO@^o%Jk^@bqc?ZLwo>|h&+hnFM}*cEGtJL1&iUVMqJ_inK4KX&Ov;s^z!XEqTjy4 zk4&lG=7cSG%x3lDh=AFrt@oI*th%k-nE!Ks!jXzn>kUip?Cal@zuW)_ZGtKVr8?v@ z4Pg-i#*F=D(4C;+jaN#7%>Pao1i|Nbb!rtsHuQ~4X#Z4Dksul@(@=nvfN2r=tW2DB zMb#Wj-f5po+X^C*WT&vt80sFHkXm5yQl`n9G>F8Zp;=Nu+Cc2c|73oKq*(S1qOoi~ zlA-n*eUt-;(N9tHPnn^3K!Shal`vA}K2RpL3P2-a=3tT}qA9}VwCKzALf*|)BN%b4 zD>dX=qlquj^C#PBzjC)!@=+^SEpjAsm=BC3e;P0Ur$0aRLe8K3IB5+?5e$K|dwn5x zPPEE^h^tt-{*!wZ5hWUC!HXOKs@N1WDkcIVaaFYzY;^gj5cpF%(!cfHhP8iUUTWZK zUzHW%8qVx;<04OuRaL!T!M8N{w?YLf4r4nC@a%z3Ri=R<@1RbBLp}#T*9!(H+mIoB zCHBAlQxZGG`#wO_j!Woe8&*##8aF^0n0ncIhfr5DvQX75hChq*2x#ter(Ny<-sacY z@I|w;SLDK{gKnTEgiL^3?5Y+y2}2shbP|JK`8RMeGZ;6{7W7TYiSyc$xKl@0HCtQ< z+t4$s70n3ISBm{NLCUrZUEf?%Y_J!M?s4agMTJ>)|zyL z-t!FU(_^Y?0+H%DIxA}YOzEn16x9SU?l+y|r^;%x)8HXl8vN71-vWDxIp%;AVHY&lh_; zMIl`Y%M>?i7D_VK=C=%7IPw*vhI1qWh;9`4Bp%U$mk%ziS#`L@b4< zf*r)qMT7A0K1;$d*wcxK4Js@6(MG3PxonHT4u!X_tw#TIEjmFQtC(R;AUI~37YuISSIhcun$X}+;a@FX zwnP|VK)eSz{A|(zJMImYtSWl}m7UJPal#O}cH+&+2YZcdrg8;tCnh)%F%vQ)KmePP z(16J9Mii7VhQaV%Fb#2bBG@G3C!_MiQXYc;@hE1C&vZh()uqhnMJ5C%Cu&iNl-up4 z?yF2=!S$qXn9Wpoq;&yZhIUOt(M3NAm}$~CQOcG1A62U|P_flsFiGOELD?`zvzq3O z;p7IHbOfgoF|scic?3p5+VcoTzo@|i!-gO!VH4TZ&;DlR;3O^!&c{^CAoqWoIP0h; z-~VwV(uy!zP&%dF2!eEXH-bn>j1=c+7}AZTFdQh|B`_%wB$OB(A}|_>fplz)@8jq9 z&+o6DowKvu&;8tI&nvFiC0R+@YFc@nuUfmj{CV;s&7I3^bB&x}_ zSTX6k_IB*9x{)=t=jY)1UHiIIe#G_NT(JE!Es7xR^CBg;TC?UB8P;p)#lG%gTqTR{30HmfFmAL*hvAWa2o-&C&SmQ!r-O*G*5? zK6!hJ>o9gL;_DNceTMq#_^6KkMig-_DO;*Skbcz=2(B3YauEFzdqQ5ez3w4!LqV5x zB5&KqMxye$m>DJ|2f;d@wL8FT)ZI4eBEH$U+8;%AT>&us>f=dVd6<8OK^vpowx8P5 zO6X>aZrqMh2{Gtzx@LaboX{s)Z&j`tiTIW`?e+UJzgK+}lryu{i$B^f@mOkIj!fzE zPEUKDGAVP6q`RN=X+mMLFFb~97P^!bvlNc!?b|yKL;TC0iE~YyB8!8yjL~i(4x25B zZRP4RWUy;V`1sJy}G!tpCPoT(bv*UZDk74KW+89@6}(&gI|BH|H0 zR`IOepn0zbqX-4Ztq1y#xdE2mrY(>h%C4m?0_1U)O*j;{M(-uj{khJ6+tK1Ab zC&n+87ZiF%0_Y$BErwKXFl!oR?I$;RYIw=hLAj7BmqFtaVv}ij<8T&i98O_o>jFnd zZBmWT{qmb|GXuBM(W)0Dr_a~`u;$8$**E$k~>WSYi4?a)#*}G+%eK%Z6iVcmj zwiYtCk(56lmIn@2n$AT*FnejVDGOzwWgFWaR*$%k`VubLsJwdybg;`?l@hwfIl+G| zfL&4r8u-?x8;>U!+@Q`ljMjtcfC;ye-Io66*NimLemJ_`)$)GYfhV)WbC(dzRQ%Hf zgrfe?=9gg!sY9H!*4^byOXK%E{0<}R6}5*=+vS8Q*`wf=++1m89?}$duM!t`;EE7G zI4Pbo7gF$cGxvy==oo)9t|l&dR%u~&>%*L5&5+*bBKGRE2g+dl182-?K6n$rHpw_~ zQs(Q}tmbS7%4OiamNK}0cO||vT{^cA4yQJ zM&BX4$UA_uh9F|Xza+W zDHPkQP)1|1^`cGnC?QfF<{`P1kZ^y+^t|7+>$@&#a4rM-t{kE_|q$ zHfB|`ybZ8dJT*vssdw~kEv}|AMpW@78b(4icJ$0|i3p*j!qv_P34&M#$gUeaScM+_;aq?oK5=c|4|1`R=#GCE1&Xm zH4Xu~^+PVgtLp?Z_J@hZ}u}Dn-RLq>`719KcK;s(R#_rS?qCu^Een)2xM^=+tgHq zZtY7z0PulMseWw$Yjd#T)0{X9Go_K%E{O;YW84dPH)G!Gq`7Vz0;xZ)i?qF`hX~-2 zrrmpOR6pwQ;zqxtR(=ncJGHU5Cs0}pbv!_mVgUulKMBqU2F=~d?}mzF`~x(S2{BaT z@cZQ&b{$|>Jv~-{#hTFE-CWPd`#T<}UVEKS{FiZwZ5>B`r^T}7&utm{ZE_Q?`f^?C zw-_Q`tJmLve&vZq^0QvOIBS6M;Sb1sij?;YbAkJyTMQ|4zENOGC2;$c`-ai%t<;gi- z0c^$q_$^U~UB^0|5|<6!b3U zbnJFFn2uu$bd~u2L+ld?6Hd(OuXHnoq5klCt#t=tS171ED2jE5Bc>ojFHdB5mG9!& z(Rb+go%P3SRXs!qn6z#WgSoD)ll14MmC5!ez7Vq0V6YOqu(7W5{y~N|M<;#*icpia) zarKoDJi_P0PFj{IK|r*63UZonIt@BGlw>8+DrARV+3734Ej9_CQMqS-7{p+PTgnSU zTzL?~Z$*!3frACWhC&1uK+}xmHi@yol#;&s{yw&;@PSe{cS#dh!zibinQ|N*lHeMM;_G6j!td(Ml~z0Wn2~A zPQ=fi&QIBbj{VP2&^#%MO8zCEzGK>4#>5$S24$UTh1$E}qK{015~dI|OtHG$o<31~ z5B72==T48*i{Zs!)syZ#d`u+3PR9{Mx$&q?(JE1MaS}Ats|kf)gzXeV^aoJAgeqPd zk2w5ty4|gX*UO!cf4WE%XW2}wl_I+$m6SfT6--8U8g8w zLs;h5J+|V>pBaXk=_b9$$hp>K{P`XHsSe&tGzYc&_JYMT?`G_GMc2}^R|C7zR2>I3 zQUoO>pCwuGz9-vNr$erF+rt&xi9Y&x@PCd3JumSH(FD5>PxqR7+LuJWTyI2ZDv+OD z$oH=W!w-)R0$x}{Z9BrwbxW^hxJ?6R6|G)v*$^W;pTFALU20>|R@UCh)YW^>dgWr~K39DBLcqaLDH5I%sQ^XF-?%HAoYB8nn8M)>3wbRUsNJ z3ku{go&@UY+K1OMP|3` z|Gc}LrMI@!r_*^HAGbXw4&93eHwbInx42@wtN*R+*TYmIa`iXs;#jIlBs1Nh zA_JAO1fz5YDRZawGEq}&^MX|AT-h%9n(5)q`0$6cnjcL4Q4YT}eRPorCjQsYWc=X> zSxuT-`Dt&IuTBJuM3?TN!^Yq2}bOuBmjSeplN^p7t?Wv^*EN3M%h9t(Eb&z+qm=27j&#_CDarn8ol%3W)1N^=l;;}IE>1D(sk0>H|GmP7yz}HL8MeF2e)&J6d zdcQL?J!PPUM_8?oC3YfeGxRvQvXSIn%EJjSYncC9at(5!5!*1p-lOCEW63W! zCXyER)PM#YB6_~#)EZouk?sxG+RX3BU~i$-*z)pwk6eneVxXmVAP1R2XXpyY3~-XH z@`NuB{noIzkv|s#p>juD@5)V6d*ip%2H2v2y|o$V6HV)5_08V?a*tvaC}Geshv9Tq z&j+^hpWaEnytT3w2XLYvc5X3b{>h;Fp?#HLv+ARvcF^Tewpj4>$S+Lo0yO!_^=cIsEviW0 zd6h|rG{CJ`g=MF1nWmLcPtmw&GyFw68c}}&SMZvE&7R#ZPu_x^nzwn(_CRNyr?vW8 zv9&1q*OB1G@i)&Dp%a0J^`a8BGmW`K;WV+#G3l0t)AT}Ipwx6iSs%IZN|#-|sunfM zNlp5`e)eiPpb?JBH)%fSH6hD)0KI)~5fL)QuCU56mDuPOP8K)oU-Wlmwg*|D)O+w5 z6h@d?Z_z;@78hRs4u$ZMIgjE_&x&XvLxl*;&#!rp00Lv=|Tznuh( zl?*F{RK?H}IkuhDwujWa43--d4#H^v%1IEsrR!H!(`l;+YAxI600qh;mFdUyEabTQ zO^F;pBj)b<(NgyIa)&J1^tl!Y0K2{A>`W`LH<>rBZ~ncpkz5Mj`KDWLv;KQ8lbY2p zIXfC+cohc)ruwJfQgy(vLVAHu7s6ZSgp>ANk~SS6_&~@$GwBO4>>wBoPuSuju)c3R z`VnxlFmLZYt~+KJAKs#N()m5#z(p2i-s{}oAXfR&L`{CWFH!U2hu2S9R3BBbsz)8f zRn|;-?I4ElCZwfF1UT2d=&&r@vAuw>*kxboP0Srdj#`!>q{+8b=wmhPcA-XuZ>xw;ezSVD!dwU^Ujiy-!R;cws zX4}LwdAs55#n(#b{kt}%2;7>zY^djcmSzKk`p{OCsA9Sdw(a8kc`u*r4Su8| zfwK+n3#l8@ILtHL$PbN!KUOO`w;NAzH(i)SJz!47T5OGqVI-4WLfqLS3@q>-*&KoyxXdRLjQaA2ZW0{H*JLvJ33dcb>nN+ZRmgH;tKHe3RzlP9O_a!%%Ly z>q`sgAQX$vde*2NK;{TZ)?2G>c_=ZjcJ~$<_KI9VgG1823SY_dgV~Y(+Y*+aHl_i& zjLViDNnY!&JM6oySlF9UmZhufrjVFTcMpTB(E4AjlHyWdw=C*rWzsEHpD#*lUf%Yb z_LTkwAAb0Lw>rMpct1C|2#G$!`nbl1J)ne9jTj3-EkD#wmVMG`n0XP@h>#;JxdgZG zif~4VRqH^XG;cj**$erI#H7#^uTi0YEcl8^-k!1z^IW}2IYKT-UoAKPJ&irALCEu9 zQ}dcjKX`g=&WooHJm=^Au@q!J2fIVT?z}4^{n+=!c}c^jR~4NU)5%AhQJ$<2@YVY$ zZh5&oV#@Z(T(;r6@P#g)W>Zbh<&5?%KUs*JU~c&%4;DUV`Np&L&NMR1VP=Yg>kndl zCh{@UhQ*TyJNs}NN*L%<3GJa2TKJ$2xk4%-J5I_r_2QJT=dPeucSHoK%I)Yc4r{4> zQ^(8=zT3a^%-#)l?UVZ~CNFrp2(JKB$0_&OmybVpX!sDxozoEj8Bw;%H}*S>QRd1@ zZg)Z8$@`+Oa-Zn}ta)71cJ{?}0Zo_UBj2%=7ifKIs;Df@WW`sS;NK66#Udvo>oRaN zBH|u=K$6$pgrVJo$k7LK+wd8FkqGSlWd*E8W!Mp9T@)oze`+Ifd(bgq<`XnQGD(?3 z^E)l(b-LU(AuQ6XAtJ2H(xTbInAvIv85J(H{P;5ikoy7WY5{`WU~LtHOm1|uc9ksX zJ_yS0T4Rf}yXJs)cj*r(6x)=bI%@ysiXPVn8YRqgYS2`y8@n zgm$Rn<$dTB@V>9c1niZ_j?xG6?JiCjw^`USEW8#dM@Axml%9UFP0OQFJE6!_qcXU~yyJdibM zIsui7mAuvgAC(#@`*+}3vE#F2Ya6g;s_q`_3mE;qXo>hQZfT$z6Cd4(wR6-w_$^W&KcoX>eUYuhGcVJb4ii=W(Z>=--f|1H@Rx zA4p(_ZR6#`SQHM+yEf|FQ)J%-C|m&QhcCm`8$d?i`;q4kjmi07#^GN>C1WLJKzcYU z1`IIbLsI%d)Ku42fm%%eE~wBDoqDG zHgLo8mL9)^jC$>zs$IOLS#|?=z1x=4txx7{te@ovZ^ZwZd~cMio*$HXSmZFuXV`a*^3b*W3bqQmP*;>-v7~B?EI@Fw>mTxA6ewU)v+(H_dY+NO?G6) z09Zo;LO~?tuSkg7$CcyJ-U1i_1@*>b%Y0_4`>O}oU>Nq{nuF@z!uEKqPtdib!Gqwm z%$8dDE&G~WVA3jNb8~2fmxUPnFsU!)eyK5&!s!>`0%Dy9u{VJesQiezJ&fi2U@kt4 zG|!^%FcSoQUjPpBr^Fu^33#)G;zgL9ecDyTnfJc>%bi^Bq3cRp`bS)5O#bam7VUQ` zK1a8YlFV0}lx7g~6&tFkI>qpCQv0&hk%ia1VgdG6{3(2)40dPt#V*(hR^Dw{^=hnG ziq^-yY?LZMzRW?$KZamhZ#v(*Y`We+R|hK5no7@I z;15lW)QFa~98}V88Pn_1SJbq?O*1!mzg}FY$9EoKruZMgllx=f#$g72@HhM4@GDF6f{{|rDXwe7E0FWHB~`?Jc=q~eQ z5BWp|t)p8?v8w+F$#NWBw$6)BK=E`9i${@evbK_ca;EiJo{_DSHjwH zG@qXFl)+8ir%=gc^hP`6ckVN2@*C>Ki!Me-#c||6955~Hoe`KEblT2*^%ZF>ilcL& z6n@T)53d1%Yc1(;%8`XscD?d`Z4goLbnSh7y32sUeM?Ax9iV^V7tSU>66yFg^&-N> zsv;ARDkXLqn-H$Nk2{f*#?A~cWZYjJm#{&)~747AEm7ypH6Ho zGVj$hIs&7FBgxxi!gHtoR@VQ~dLrv)!ydt%N=yn5UYFJ|dgiAqU9g#zDW=akSxz=f zaRQ~?DKY3qXn?{;oJx?F=f^ zl+P}L38+_!^Ke#loGc4Q+TMh}C;1dj&7p<;y_Y3}n+*viavi&JI=?q>*0f>Y?cN$Da=u0GGgKUVKgPq=qU zfs2pK^c8*mxe)ReAwgmX?k7?y4vn2^cpoJ1p7V;f^|HF5*Qj2^mL#_BYgXu$ci~-< z%uT&(`+-g%zj2=T*N9?J^{DKTDnKnk5Hl74$L&cTeig5JlaA|B1#C=2uX4e+%o=

<`V=polY4VTM&HslBA;&PtqV($b4n6+z%-iKSdLp;mCAV%@;Pjn}| zznBk|UIA^%GVL!*f2wD5u=z7dW!@xKMsUSjs)brdS0?jzGWeK|lv)v>8@pC5A;~Rk zQ#BxdEr7Vb$IpM-bQ!u&sA6n4xlPr?tOXDfz;GVW1rRJ~)G{lAEIBNZTFa*moAnHC zQnQGvSAH&TA9v>hcQIC+3Gx0U}a=9Lr6j-DqM2g z?Q<2~Io~*3O|TbIfT=SERytrj<;dmE3;v~P7E%L+kh!K+v7;hHtY_k?9QwKVIo!B@ z`q7=1ytyN=lUk6kWVMu~XiDzrnkEHQH~(*PDwD^IgV(0k9Bm|<-wv{;97EgDcr@hq~EQ-69pCMW%G?|O=KF<#a@Ml2g~MJbOWg z1|r3O*q^%DGa2QD;$B~PgbeWbLFq*Pd-?bMKpgP@sX$PbM>nsWz27W?=6V1NKY_No Lfm*GKeZ>C(QpmAM literal 0 HcmV?d00001 diff --git a/docs/offspring-ga.png b/docs/offspring-ga.png new file mode 100644 index 0000000000000000000000000000000000000000..1566901fff64af7764afe0b4ec2b47c4bef3405e GIT binary patch literal 18288 zcmcJ%c|2A9yFR`v$~+e##7?T6p=2JSjf~+z$SgvpZOBYX#t<@O%r;b%F+;`-*-1ze z88g&YGS9Z*doR!P`JT_~oZtDL^Utr3=cD$v)?Vwq?)QCP*L7dF2yHDDS}Jxb6bePF zdKIgSLLH<>p$=G39EMk9TvAKm8`&)dO$8LHG?seXk{tehz+G2G5mnU1`3wHRV|`Uu z6NU2SN1^V9pio=z(%oqk%2ODH`fY(iNu{7rtgh)bIx_IaVJmeNENTz=-_!bM3GfP~ zi?XsioKY739)q%c!}$?;(eE9j{2D3lUW z6|118?#$xf zr#9YCeu?ggu~A3c1j%2%o09ZCtoY_)4~b~`VLgBw3xA%8GJ2ulyQo=;3tljFd?~ZB-OL+^RI(p<+-xZeU>G zZoa)M=MmH=JJIhob8$}8)YNh5N94uZ=)=x4DaYsX1zpZ0o=om&64mt1Ox_qh!pkn( zz5G;w%5mth>&CZ}6bIqcoQ6J$(uP_-?XmL+$XS=6FDPjC99fXuTe-FuqL%l$LX+j) z`!1!VAAIJi0^d?(9^UhiqBTmgWIOCQG?#kF(&(7uP^ds816lU`cDV7Y!#XUbuV05m zN7IJm2l^_0`Eb#+pjQf_*xdf^9gjXCY4FP65K1DZca@Wuy@3b!$maOW%#3LZ z&FmfNKXC$yY6GLAbbF(w{j>=oVf5UgvGXIxPOV8iJekGBm#DTGiC@_<&C^}{>&0W2 z)cko;gUe85R>zNRy|aC*#be9&XPe}7Upy|hPldONXMD#3s*C)lHXkTv&*1WOf4#?< z9iwK-!-M_gsz_7(b#|(kc9qkJNedLLh$vCHG8CkxQ!WMy*=jL}39t zKRQt@Z)US;Qcr(YO;Ku3*nI5(7TdyzCMTHY9*UXVc%k<}h=rlV%kH;|UlhN*m-at#r6)x!wlRwdkp9%kOsd*nfNP2ATXy*M() ze~_?Kx>Eazw7bZCxoPb?hjJtFNYM69%sZ|l_#UcV{%d?2+Y38Qeq6!CB(*#~b6qKU zZeMOznfzt-s|y2x<~O%a(LdX4o*)HooDz^QbKT}S%uk=w1QD&Z#jh>E663*o*rb1z zwq@a{lB9^wv{#)5|Kz#I&#OWkJQQ-5g9)BeXLKx=E{hry0?Ox4Z$)#6U^rQ~@J01-4(i<=-%53U?49aDSB#^{h&&(SEQ>Y^y!7c9=pGW1bVQ7KtjDusPgfB8Q< zu6z<-elEEKbyqF#VE6Kt96#IOAOCHm^XJcxkJkHnZ>`u@7e5uFaUH1b;ZRvnvi%~P zf)~j2_GhPst3Jz?|NiC$WHDKkC~Fsjoj)YMFeqdoJCJX0Wo32n;>C-|0)#ai&3u%O zxgQY`QDVo!NZbGUGt*6@P^Ten7V@;TG}A_x3S(`SQ-6$PZkJYWZu(#Z1@R-!ha@nD zto~gd{NzP5p%GNBhPzS%X5zm&NaW_hXA?ifP4-R;`>_2Lt<1+MO;hmkQd(!x+{fg0 z+j2-B%5O~Q!i4X(DFDRAxJPODJgHid||P*wS_2cRx9bW zUH(Xb#~EtLlicyZTsW9A@|~54e>z(Alc%h|hU-alb8~JVX|gQIP)iHAul^#_88x&W zcEW5<0gJtLD>dKzY+`d>MALrn-3xE~@GL7Z60-lq zyC~Muw#BJ-X8+|*al5La(zep_^84@y`679s&R4UI!26KozV~>RqeFRvjkYWx{Ahpx>4{L$$ z3o@gm(wZ9f&C>a?#5is*9*Kd$nKaSrv0IS|FDnO7s~gzo^xnDSHTI zQtx}IlFj%P)PH2VHICO7QS0kl8W~`&u)$6DzBU}R4hoXj;Pmwif*X=}$VhJ550GPS zq?Xm$-ITSaC}+g^G=BXaGk?VN=MpE5U>*xUKk2U$YwPPG($dr@@7--l_&Z#f70*Vfi}quvSAz*VEdmgNU$#pXuq zqmVE`)1myh!l#*@q@}61=DxkvooVUo%RXPXOn2wb9Tcj+*qqRrCN3f=dE7@Plh`Co z^SmI^?G2|o$BP20e$O5|(MRn!dF6L;@CmLzJ{+yv`dt{uDU?!2g@QyXOmhgu({TYx z2?78$58iD*#Hf=SY!+HyFKf~mD7(FP{|G%CmU#8F1^_0y(~s2RSvD6TiD%FF;ss;g zS{8pwHV=$Q6L(P#bkyvHa~1TRDvEc*=^jA3_3M z$3Xr5Y`lcY8#nT|<5eTi^Lp*4mVGei&u1HA6njOzGg;e9)$^Fh6fW1B#ctQd zmNz!W(#vh2Pz!5AE(E44^TV+ga%#j9<#YrzE7=7FamRAY%iWQ@sdsTpw)?(~k)=ZZ zT^Prt8p3dg_igL5YLK(N@}y0!Sd>Cm4) ze=Hydq@|_3SEhAxO{ac&u$N%~xkJF~umEP)=>6lTz?~TAeoj>LK3X;Iddl`f3w>!- zRaizwhU);Y>+;Xe_XZ|kW;|i9z(p!Dge-X4i?YX65>)pW<zy{fapPQN#UJ@E{lggNm3x2PPirU=hBNAE{2a(A1p08h@kX!xK z+^mR?e_)m_tO|+#5R!WShi~J$n!N1i#sbz)H0-Pm4cy)if*2YY8fuvmh5a%p@A ziTiQ*+V`2Ia&*1mSkT_ir}ea@fU(%skKxmw9V(396PFLk5f;=N^b43^VAwUqTXFTM#k5FF0SjHa!0YVjPYZaIm+I1!@C%P!ebCM+)> z3&{^~onUgy4SD9C4a@FBjL{DvMJB32qWIxR*j-ow{eI0b2oGAgu?f1^ z@UC#!xJu2-GIpt<$~gy6uAHam9)(XogomkvKATAO;fLGxYh(w2p=jg|h!H1!xb|5O z5IS6#dp53uoH2U%=TCl(UTlSNwDjXvy9(o!Cr=avlkejNAaeiu>#u>~;fK<%-}_{S zL0PJ(sBo7hR5}Sn9yINrl*^-zg^!PZO!R>=ZfI^!e_He4+uy$(BtmfCfcz=xe6JCN zwD7gRU#A#Jp+L*>(&(**#6&eYp0BAi60gRG*p^2?)>=Pf>djdRclueBwhUu!X*vFW zUMeA?1KaNj08lxnr%w=857IRN8GHLPbHW5Isf?eWu3-n3k<~Lg=3%M=yI%S+HV*w_ ze$7KFdSt}Z(a9;h&jEJr#MSG8uMAo~e$?o%yi*$;5kdLI;Swa{ckh%we*B0aBS3tn zvd5Eb9>l)ta~QWa6TA6|RQKY=8RTtucjio@rH{{M9zohU4KIMAWA2QIXzBRe(Umji zzIB3H!sJKqo2)MPS${28I#{ygqV-!Aa*RFUQ*M&xy|1fVu5$K!?f^x*b>TQd%7BE$ zcKmP)%+{7DI}gk-W-??*Pk$AN;odX(o%U{ES7#HKjw&r4RiX@{;{vvbP(0uuw=O&> zB23o=I)2RPh7&XZCF{=9Z3j|iI^>)6tl)R)(Ah*aw(^34hM5{6+1ODcrx!-E7bx7( zwqd`bCigAbHi+4=vk~tyfm28PO?%7An7jlc611K{&44_vr>~T)jlj&Lv&@?o}NdGW*Syxe6!p+p@>i%tgNc4 z?b7dkT||I_nR#9KR&(X-)lvFMB+O||L5#?^=PZ&?kjo;}`>ij)Cqbftn$H$`KuM>W zjJNvoVOGx;VV+3dn?`hn?#4;v1RQL}xK3NZBax7o43i#2M@KIl9JtY0ZXy`wt_4hL zAJQ+#%(qtGPq~{FW-bNA+Iz0@_>_#6m61V8_)2Esq=cpJongt6W9^HuvwA+gD2Q~Z zi1e4^hJ{Oplb4j-t@WCp_|^N&)l^*LSlKtn6g^Z}`+ZD@<{uRo^FYzCrZXDOvnjJS}ZNNX>f(xJr8F3a@zBhxos2n{itfd z+i}-Ww5loVD_AxHuB5c|00GCN3S8u{>j0l6oAO5I;Ia(BBn1Xwls%4O@O6O4zY7;U z9yZDEPS@KZ_yInlm!iwlFrv!_(n@$}KJd0)(U--=gcE93KbCzcf>EaZwKbjhT3UIl zpBP$LS{?}I39()&!-T0SmrPx(emYnQFcO$%X<1qDn>TMtSaa5co!FEmu#77f;dqIa zALTpF^}A)+VG3Q7=%%vHK;l`7gN53ehh~r!18`zh`o3aK2!afR9JeHyiHl#*00W3&)jodPOB-?)%n{uCEb)xV3HH#d_ui30^XV zwkTV{zpmgAs zlBK)Bpd(?TBbaOyoEmNx30K()mn+HuYn;r99QG9Uwy7I(dH3Ojc=d;;o<~e$;Nf|w zmclh=98G{19RdTnf_c6JTE&H3cr>;%=e4oYdu&4OIpg-oJ?eqZT%g`I!+V(IlJAl)TS(u_F&?L|U$0O`(4 z+;C$MxBTvgHSZwbnPT<$2{ti^hnky!2GFX_pFiskXEXGqSal!jIb~&ZOr!TEvN)`3 zaC#=>)?1pV?oG!7j}PW4VPj#;-~b*kc+a4aT(eigL;-uH3EpOca{t_*u+B>4#wqH=3@`;V5398OO;MdJhRh8rS6N%Nk3XW4iP$+>lPG`-l9>M_(X~}=oNiE zH2c26RL*aW2Iac?yKi7Hr_Z6O(&;he2w9M?3kpgw=aSZoY>Z6n6?J+&cdq{O?)L0_jW}xzf*3CrpnWae=XJ>xfm`C~cpsn{gTuNuoDR8jz04#&{fud{tr81(~0Bn;G%gtTOd73qr=$aM zXFVGl_+Pmt#US3@-m2sRKM>2E$mitC(6ovCgU`@<28j~5;MNeNhDxHd*{Z{M0$RzyK6$_miuj3WwZcBCD81D}K3Cn(mmERpYTF^LBMeu@cZC8%x!QB)m|4)y>eZo} zw{KHJ{i`Wz5!DRGhx^vYBqkKu7WAJY)QIqWQrEA*xUGzB*SyngJE6 z!dUhK1{zW{Emv;}Q8Lnox^soMeS(xm)AAM);c&J`=WRgULqkIW#EWEw3h`xH8R1i7|zO$IXhC(-kDp18;EYz?G2X@Ame zRcZgm-tWRNJ}Am?q)u|U%Kz^z};u&-5g&60Cl7kAdj=mQ$hjIHk&c0UZ) z>MX+maD~o5M{dj0d_=Z2C@lS0z4E}JF#I1Ai;5Pt(2#n(~3MYoY zl^9~&!h^$OKv)!L!k<+bkt{y>J1-e48VH!+u?0w9uBNQ;@c6z3xfVJORRS^fdDh_$ zg{9KzN_;N12unG^tQ04!<{0a4?c;zCVYi|D{WRAD7%^HEx(a8Q!`#c9I3n?HD`%^( zP@0bcCFWcFRm0`weMP>iTxK5sTEztN)u~Sc>e|JxUO^Z0!LoqC_3O-=noqjADrZX$ z+Fg!E11g0=P~F0@^(Nzo4{M(6L^EjHl%kPLUy^J9aG_AdOD2lHJW`8&j#LsPH?fZe z^6j^vfhMl$Eq{#*ZvY}|kw7c9%+CKp4 zDGGdu>mL=SY19_jfBT1n>4r9{8xf z{_4di51~cr-y-ba@7WTN;UP$Hn(=aOm?ss@Y~|^`m@$!BA0gEV3{CPvQhY4@K+#fw z)u-PxKwp0c+J@K99)%ub^|e_=X+a+fLB&XX2&LkJadpuQ(M?i5p?xC~?;*?objG(| zE|~g;YZFxrv&mDSy?Z-jd$JnX;C0=`sJwjpfXSJWbycFP?hTJNmTi->iQ9_VpDJFh z`B0GIei`o=8)awLa%^waeD49G{_QOZ?J?cH_Tg`6wgA9M zgsv@aTEfid$IbIYndns|A0%H_nn47 zB8UG6l^co@C%z0roo^4F9=^2B(Betpv@HQAJ8?C6sSaBmg3Tjy*@Jc9)3Yp-{;@!t?OrfUVKj7hw!QOFT^HrNn_?2_~OZ)itE_ zbaFcX@)79Iu`FI9|$#U9Nb%=-(#4el>Hkw%9_=AH%YLm;3ua~ z3y=qVrF;lFw!VIKQAb`fB`~&@y(m1z_XW>PxC^u@1KG(s*uMH7J3Ak+p6nW{8o$d(ckeLJ38Z(B~Oub*vioPq@fe7N3E+&^;hTz9klh7OxPV7%`4P+G7L&2HT^xT=_qLYeqVhAwT&R903ZP|3HRPKO1M z)b%Rg3gdT8O-*q@IUD&LCjMbQmh!Q^CXesubq4c1H7m~f=7|61c#y45h5y%V91F0M zwl8gMhp8SM1VJpP=PhUqhG$$pz8-P{y$GZOf4NwwK1G{?3U0t9WxA=WAdBG*3O$`T z^cn;FGLL}tK%t14&-;EO@N}=J`IgL3CBQ{^x&{UY!s6nr$AYAsVlOid@wFQgAwe4^ z9o70&U{GjiWE3HdmdbGyqc-htAthHAahSafmOKb3H8npQ`0o%+%Uw(dVNIBvb^Lp? z-*9ewS3pM@O|R&9&D_<3)J$c$EPCQyvSle>q1~OX^V3Hum|*2w#*TE)2beW-MdVhf z#FxS+YJlhk+)W^oDe1B8gzzrt|8Q|2ZWhfzf$6lps6hj%3UcBBPJY^^SD!he+e%3v ziOWbQuE0CJD`2LE%S>Q0-EPQEv^kptW$Ax9s)64|-p#!VSI% zy(c-db- za~GZ?q5>yy#M-ZG%1z5Mpxj;7VS%azqRVUJR|A7+K=4*?Na;m0mNz_hqxh0KJ3ALD z&ahB{;CcKT3DCJEP`gAmkn0(*Wy)AiT*gMkrA#5hpI*6x%Bkl};QuIXXozvBpd#R! zIVhEsbRL&Xml-$&7pt-@u${V+5M-cuCXqQSPR?P>50Rmq2gj zOPVz!Bkh$xEyps64_H4+SW_^CjnX>+>$y;x^VOjO+U1C3`%Pb8|H#=fDUGI&yVYc> z>ccJKHAPn#LW+2`0C*)5LsxeJc|yM$9@&~ZKd+S5pLR%4mgg7$p$S{95t&6Z%JDK& z%bR?-R-=^GWf_1$uI;*<;KoVTt1vzvj%QOpko(k)Z((^m7SyXd`Uf~eZOhwNr?8K` zOX+Vwez3fK`&=A%3YDYluFIhHLBg|SQ~AoK@(#MlGO_|R%z2%i zn)@`3@y3hE@6DXUB0?KM!XXT+CMcw@OH4rO^vT@x7G_ZDjx6`(KzU-q#jN0RfXtb)z?+ep4UHd)8fw1%O{3&lKdA`nV zK{AG|oR^nZvSp%Yx|*kZrmjgN{qrx;pFe-bpy@Kah*7#Ol0*C5qQi_mHgBi0uK;45 z*BUV55(7m9usvVJA$)^jYCa_a2Yohx^7lXzu8PRFhaP?SA1fadKQer(##Wgn->6e; zxF84JjqR*|1M_rN(bqo4uATA9}OhR}l_@b<>tq+x9E}lGz$67DlZ7~abdR4#C_b@!~V;x_= z8hA6Kdqsgs{9fJ4tM{{NTnnG%q6e%D1Zw^ojzY!SSXO(0p-h&WKpa3K1n^RAJrkoR01wf_ z&_*n&sCWRy1UlV7u-RYXSU`YgO@vyh2R99Co!BFoTHK1MD zc1PWdx29SbkW|Y+&kdLQ2eEQn0&~kJ**QQlK}4V!NBT%B*?_GTllANNRYTBpg9N~W zG`GLP$7iz{g9_NG zC?Jp@n3_m-3_VXJEaL_4LA?ao6dWnq*RR3qB740q`#N;4CMK-@xL*#ps{=x%_WZk6 zL25p2z?i$cyJbCv4vP+C5%^f-D*hTTA=e2uN&<=sWmop|P_w%k86#~lJhEm0N-6mf zV1!V1tFtf*u}_8Pp3bU(RdGeII(#s+u?7c?(FE$BAlYa_@0%B5T2d%DTydWYfKvce zA+W^6)UiG-H-m76^?%8#;L(my0p|aU<4ZXk-OWP45BKOWQHjA+4yoZGL?1{!(2wG- z_E;^Ul@j8et~WF_m0d3lA`OjA#0G8ifrZA(>3-wxYVpQ+827@A`JU8QiH6;++t7(7 z$O%2baYj#qlcU)sa5>Fzas%FvG~ZoH-q>F54yzf<9vKP_S3Z}a6;{E0(A7HPee6>#h}XU>-2xC1p_q`aYlu2+<~V(VmDrxmnZu4Tz7y)g-e zT$P%t2t3P$HnW=uz!M(r1YL4?S48`vo)j=7!6UmOWGaiOPUp^@gC-6o=&Qh2Ax4&# zE`WmqPz|(NpoKb|&D0}_@b4?I!x_jd(Rqboc~xduXe7aI+fPstza>xNn~s zi?0mM&1q|W@!_UbjelAI@u2WRPtBy8k}-qsI&j1nUD7Vf`Zc}`kZvoTK&BX~Jb;Rs zyv*{>)$fH~WWWR;BW(;^C|jXHA#lM8V<&-2ETQefG|&tBPP9pkM`xP|9ztPt9(tY+ ztQ`rRso|?1AHaeA1^j1zH%ZXo%FoZYx}Ct2Uv1^Ry&;17{=2WRv_jz%9SE4Eja+ZX zI~U0m!hZz#G9hUMlc8lD5O5+-ml7#7%@P^G#tf?ZFZ576Ntv-ARLZ1Ne^%9D8Lr0h zRf=sw*B!hd96mIl(Rr_rc!Brl5USek2Nie*qJfHBS`vdcdtm?Vi_+3jH>#YKGO>^= zLa+G0wJZx5wdrbU1v+T$?CdOP5}@#`ysi$g-Xz^2uesr0TkDV>+K#DfbDyN+_Fp&v z`V62f=aHJD&QUqEz!=!V@B$#fzEj~2V6o=}I;nS7As!7P$-+->)h&)&AYYJczcN7b zWNB&KvI1mL^L><4%iGpXt+Sc5B@TQTLUcc?DZ*FLzndO-Lrk-%=JNt} zuY?c@;=SpEs?U0%-hp9iP1@!2;@t3?Ua#X8<=;Xu9dRb_?~g~$^cPRA_M6Ans-igQ zEVmo?*7p{+H{Jc0+E2stk9JyZ-`$mi(iesVTvEhK@Gu)u=FUhV3b4B03eo}~FMs`- zOoxTpv}WrD^Ye_$Z>g+X&nO zi0&2NRaih>lLDoQJa1s%iPw(#%#6g2Jt4lu~ zf@Nh;T6|Axshk}sEW5M63>9T%@dKaxlE3{w%r-kenV?c_|GwTB>5-}MiOW22^AyBN z%^=@ZJNy!q5i1uLdhq6`MCy%zWL~D4zUqtQ3#e6q)MF;!ifW3oM_(P9L_?7hEV0<& zwa+UQsvW9wncUqWc`tlFz$NP&^6QuVaE<3V5oXqePh8?IqcRuf4m|EFjD}7Zs z_|fy5*{ooyGIfPy4VuR7+qaR%A}bld9(CySod5x~K=A})v=HpAWYm0}Dz*iB#?$do zflHjy@(LrShY>}dN#WOTwY=tWXdAL3KY7)asF_s6k6*s}PfJvZuq*@U5N^5@W4iP8 zhRb-AP{Uh*7hs`W`*v%i!+bXwdg!+~fo?U8?d^KC#l4jPE#qF*sPDP25^|(J7yTi3 zZY*c*$$Z%<>w=C|ucSYCDif+(j;9cPHg-w#mP63-^50qQhbsCOaZTRl6f&!OySF>P zcOizXZBc%2Sw6x$E9M|H69r#@Nf&H2pZl<)!pIft)s84@*d@*Qq5xYUlsfIG#)FP#>8uLoVdk+tGVN4&(R)>gOgw-wHS z(3>6wR+>JCL!JG<*3+XtbLm*a=~00V%lQQb@QnD-s!XY!{J;>AAKf2-D4NNfkp%q^ z-v-@U7SIxa7nWddW7Mzrb^vT{s5~*ib*ly39#o56X+&@`!O(_wVILsmhhT%z(A2a9 z4)E3zSM+^711dVaTT}A|Y<@Vvcp4fS7Phwh!z%MQKqM`DRmSJRGiC*CJ!WQRuwp}x zJ0OeI@Wu_qSp(_-o7o+?XHHSz6Z&`O%mb42ui==Xd;u!d0Hcd){^zez6^i2P?5p|)s-v};NF z2}b*!=LNr?>7E(GagMDAnD?(11wgpTgQEr}AWoc4Xb*|q+?b60m-9yFagp_U8%JG3 zYW`|;#k$04FwqR=Uu^qM)O|6H-T-w)V7?LAEx< zItS;oWf6j@hrT|o(9`WGgnVHl0=V9LoR(Bpa)o9i@lNLL`CY99nU&o?5;5@M zE@13P=b>b~*_h*;5ETslY7Z=NfjmRA)R1ORD3TZT7MH%@70mNimjp3nfOz%>3L~NQ#zYx0o!u-q zdSB*0g{%sX(@8|-laBqHm0>WLw`CYd#J+6-GX|@Hc!vZWG#J9SjE#+9ItXJbOIFXC z1f<%kl#QQ0)DoIrKqx$AyK$T;`NwyuGlPc7&t__MA_FM^uq}(yJy=1{!@*N;g&!8_3qjc*cg4y>qXK$ENjBi{ zwYISVo5yM^n{Vu-kJ$h-Q-8fyu*+tKnb-=5IVSx|FlY7Q0~MUEx%t}tmBZ)?&G=AH zxF0NFY5=ZqMnglxaOrgFynP!hG`alcpc@A&6L^@x=I?V5Ry65{E2iPPSlWXk1UiE* z2r>|By#;r@R4iY|1&v-{K)`omUzvh_E#bwNLhMOHkx?o?(j#lX{zw=ath<8V&hzdd z?h*3g^PzRLq6^QBy@|Zo+_MF!H@G=bVXF5n*#Ptdzdt=!-eVV{8u5=VxI7S&vn)|l z6vo;0>o!q{&IjzLQH9W6}S7rb>#zxQ$K{po!xT? zEj|5cX&PSklgMp-f*YK+LK68E0#p%J#86>34Z023B8^D*rh`Gj!19TRiywhTe`Vz{ zF9EFR&3!KOGVV=r&4$6?qWc2jUVXeT0~ z^b=&VW|A&CybG~)N1H%h*6f`Ck54G_ZzU^;*h(k&=fV5kNGuaY|J+{=0-SlbR2WE+AYv z;Mmt;p$KL`u1gu1JU%WJysKbX1*v(TI#M%J@v>ioD;8kNP$fg{6T3;eD7e-iFCqkv zAr5>|qz13ljqR>gZA^0Sg@ZH7aB>gf;5$1z*w)1sdin4Djk}(I7yf~tZBJ)ev$W@6ATF{rJpiw?Gub##tFVN>NSwiQHv=izF` z+nnq3V}$Zs-As%Gz#=K({in{%$Li3~yMf~ibl(s-!+-k?z{a$$10sgAZu-l8W#c5= z6NoJBS3%tcD=P{NnR-(M+L#f02ZysuhNuv)K-mu637uA63dNW6y@ANy%3~QGasR}t z-=bUCSEJ_9QTA|kb=5E-oL+vL0Vr3TP}Mf*7hn#Ib-d1U>a(Ft_Z6s5^K0PK1KNmT zodYEn=tmoGB$(uEdE+&Za4}T`Q5OUan96b6=D`65giZl~Sz#Katsej6saDIMKQK0O zMqzmpcfDq zRO`RJfg5w#)$sv<|2Hdt_vv5K_(lyx%>KAW?iDsf6DLDh-SE#EK@Zc|AN!^p-WpLb zIrC5;^HhS#QKv$oGQmKFbr4kZT-LCiodAUFfpg|vj!LYReF6kk31JeLM@nX9+tk#} zToFVxFOGuMe;u~p>PFEK-=Pqr91cL9;l!^n5E)GiGS0A49kkV(wEzwYgd8lnY~ABgKwRz|E_L91MO->E-V3OF z`!=Qory)2ubW|!iZrG?yF>%K*&4jDJo21=dKr_B+?HtR{TqKM}~A@IAHUU!o1p3i(MsT zA_H+qvfPL6Cd~SMWxD#TH;zj_h4Z+^M{1y4@HPC#{d9aEJ))-UZf};7XXA~FU`E$* zNKUsz9XvK+SB~p^4DkLYtZI@V;Rmg*Z~?1*pbnTw#(zV2sr7)wOZ!hGdK3`BY@msk zjsZ>F;ald&l=Tb!_3PKwNo|%N@9Tms9%$($@b~@_#p=j>=GtLOllc2&z~cZ{;WzRL6|0m@;LU%ufqUJ2=lAtE1J86|&wu`4kim(Jj1wByt|1P-ST5N}5Av)3W}10d z3lQqF^hJk%QKS%VH#$i z3L6s625=&-?LW9-W-NGRKy|-5DFG%N-kQf+XTON5?L;!3LSz=!p;8w}Sevq!wv0?o zqy24^~6pu%>q$=&*wSGCIc*~=ZfpxHcD;wIUO*+hzNwiZ!8kHVU#2Bx>*zKPTq-q zy>#XvdU9tG7^K@Y$p)q@khBWb7e)?wptk%^-w^+3atzTYpnps~KEy+6WW5wGz?;@d zx4O|XeEQu?S$#fIyVUnb9o(kO`4bdg8O)FjP5Z+Fw=L(~IiZjG_c%oL70K%wJDKj^ z%>Dvg;$h!6CFNC3qcJej4oAzJJ(`_gyFpZ@Rykpwm4P*%{EcPXY}YWJ*0z%`{f_7q4Y%K0*Q|wE7R~!v)O{*`uM-dS#{%g zd@l#Y2IcxVN#48_JliBca7-y?eB>~&&q9oMsOE}v|7MvvToZKR8Xou|W%7g!q+RD~%vAZw_*Fl7Lc#kFn-$BCjK zYM?Fy1;9W9?L;Qgb5`hYIsdJtcqMoyF6hEI^$v_Y^m~FYa$^_{g>eBLmX@*FqA2i& zFNh!sEWKzPXxf0MT>X%5_s&Za-P)u?m-f#?H01(UrPo8!b^f`>JE9g0iokez% z6q!3~3^E+zCjk77A$*yE=SdYGy{{>M-v0H(~JFG3tbUzaqV1J9G`kIUt<@AVh^;5BBEQxFhSZAka!@tC$G4z%CfRtd+QR1Igg=|&4_!)ghQ0L6?&!#M z&*0dbNEU_>T8RIOafcgzV}w0BR)?msa}8h>5MOA(ZTfRTo`n6Q!gY&H*;@yGZv?O_ zmUnzdF-}9!BLM3km@%4Q*2>WC`LZIb$y3c|DTM5syf zqzXo<#y49+zfwnlg{NaFf<)VJ?=Y(XxHhOw=HC?nj_wsT0asBxTA|+)dP@i?Bq?Sy zthqvi!zd{hef~asTf_Y*AJ@R9;b0wPKwJpLWF+sX2^-W5OSVU7O5~Ev!Y;$m8BAj5+e1*{1;7q-#}o^*Mc()51=)6L)?p;r zTck&n7KU-nioqbqHx_>#93rm_z$xTmNoc#zg%REhgA^=ahF!jU9n*Xk0o;AkTkneUdIXd>RjrCK3|p>Y4Vnm%69e7PPfO83MdRgVr$A=R|92Vxe^C_nxZ5{{QVd>a zAA#QkbIL=>(Br11hmDlAn+<$JT@V%#J1>0Uyok7-@I|Q$!crm^&j|}l2@7+D?ZGdk z`L_d{TyEOi`uz6?xD2XDzySitC%E3UmAY!LYvZoy;^}^LErS0K DGh!d4 literal 0 HcmV?d00001 From 5255bed756f85cc983aca6f75d82fc061092000d Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Fri, 17 Apr 2026 17:30:54 +0100 Subject: [PATCH 13/53] Remove superfluous tap discretization (chunking minutes into configurable time blocks) --- cicada/cli.py | 2 - cicada/lib/SmartScheduling/config.py | 1 - cicada/lib/SmartScheduling/evaluation.py | 80 +++++++----------------- cicada/lib/SmartScheduling/pygad.py | 45 ++++++------- 4 files changed, 47 insertions(+), 81 deletions(-) diff --git a/cicada/cli.py b/cicada/cli.py index cd91817..5d5970e 100644 --- a/cicada/cli.py +++ b/cicada/cli.py @@ -288,7 +288,6 @@ def smart_schedule(): # Optional GA Configurations ga_config = parser.add_argument_group("ga_config", "Optional configurations for the genetic algorithm optimizer") - ga_config.add_argument("--minutes_per_block", type=int, required=False, help="Minutes per block for the genetic algorithm") ga_config.add_argument("--num_generations",type=int,required=False, help="Number of generations for the genetic algorithm") ga_config.add_argument("--sol_per_pop",type=int,required=False, help="Number of solutions per population for the genetic algorithm") ga_config.add_argument("--num_parents_mating",type=int,required=False, help="Number of parents mating for the genetic algorithm") @@ -301,7 +300,6 @@ def smart_schedule(): smart_schedule.main( args.server_id, ga_config={ - "minutes_per_block": args.minutes_per_block, "num_generations": args.num_generations, "sol_per_pop": args.sol_per_pop, "num_parents_mating": args.num_parents_mating, diff --git a/cicada/lib/SmartScheduling/config.py b/cicada/lib/SmartScheduling/config.py index a2b3a2b..c8d999b 100644 --- a/cicada/lib/SmartScheduling/config.py +++ b/cicada/lib/SmartScheduling/config.py @@ -4,7 +4,6 @@ @dataclass class GAConfig: - minutes_per_block: int = 1 num_generations: int = 20 sol_per_pop: int = 40 num_parents_mating: int = 10 diff --git a/cicada/lib/SmartScheduling/evaluation.py b/cicada/lib/SmartScheduling/evaluation.py index 3b28d32..5fedde9 100644 --- a/cicada/lib/SmartScheduling/evaluation.py +++ b/cicada/lib/SmartScheduling/evaluation.py @@ -1,77 +1,45 @@ -import math import numpy as np -from typing import Sequence, Tuple, List +from typing import Sequence from .domain import Tap -def calculate_blocks_per_day(minutes_per_block: int) -> int: - """ - Calculate the number of time blocks in a day given the minutes per block. - Raises error if the minutes_per_block does not give a whole number of blocks per day - Args: - minutes_per_block: int : number of minutes per time block - Returns: - int : number of time blocks in a day - """ - if (24 * 60) % minutes_per_block != 0: - raise ValueError("minutes_per_block must divide evenly into 1440 (the number of minutes in a day)") - return (24 * 60) // minutes_per_block - - -def discretize_taps(taps: Sequence[Tap], minutes_per_block: int) -> Tuple[List[int], List[int]]: - """ - Discretize taps into frequency and runtime blocks based on minutes per block. - Args: - taps: Sequence[Tap] : list of Tap objects - minutes_per_block: int : number of minutes per time block - Returns: - Tuple[List[int], List[int]] : frequency blocks and runtime blocks for each tap - freq_blocks: List[int] : amount of time blocks between each run of the tap (based on frequency_minutes) - run_blocks: List[int] : amount of time blocks the tap runs for (based on median_runtime_minutes) - """ - freq_blocks, run_blocks = [], [] - for t in taps: - fb = max(1, t.frequency_minutes // minutes_per_block) - rb = max(1, math.ceil(t.median_runtime_minutes / minutes_per_block)) - freq_blocks.append(fb) - run_blocks.append(rb) - return freq_blocks, run_blocks - -def evaluate_cpu_usage_and_peak(start_blocks: Sequence[int], taps: Sequence[Tap], minutes_per_block: int): +def evaluate_cpu_usage_and_peak(start_times: Sequence[int], taps: Sequence[Tap]): """ Returns the CPU usage time series and peak CPU usage for a given schedule solution Args: - start_blocks: Sequence[int] : start time blocks for each tap + start_times: Sequence[int] : start time in minutes for each tap taps: Sequence[Tap] : list of Tap objects - minutes_per_block: int : number of minutes per time block Returns: usage: np.ndarray : CPU usage time series peak: float : peak CPU usage """ - blocks_per_day = calculate_blocks_per_day(minutes_per_block) - freq_blocks, run_blocks = discretize_taps(taps, minutes_per_block) - diff = np.zeros(blocks_per_day + 1, dtype=float) - assert len(start_blocks) == len(taps) == len(freq_blocks) == len(run_blocks), "Length of start_blocks, taps, freq_blocks, and run_blocks must all be the same" - assert all(start_blocks[i] < freq_blocks[i] for i in range(len(start_blocks))), "Start block should be the earliest it can be" + mins_per_day = 1440 + freqs = [tap.frequency_minutes for tap in taps] + run_times = [tap.median_runtime_minutes for tap in taps] + + + diff = np.zeros(mins_per_day + 1, dtype=float) + assert len(start_times) == len(taps) == len(freqs) == len(run_times), "Length of start_times, taps, freqs, and run_times must all be the same" + assert all(start_times[i] < freqs[i] for i in range(len(start_times))), "Start time should be the earliest it can be" for i, tap in enumerate(taps): - freq = freq_blocks[i] - run_len = run_blocks[i] + freq = freqs[i] + run_time = run_times[i] cpu = float(tap.cpu_max) - block = int(start_blocks[i]) - - # Iterate through the day in increments of the tap's frequency, adding the tap's CPU usage to the diff array for the duration - # of its runtime. We use a diff array to efficiently calculate the cumulative CPU usage at each time block. Instead of - # appending the CPU usage for each block the tap runs in, we add the CPU usage at the start block and subtract it at the end block. - while block < blocks_per_day: - end = min(block + run_len, blocks_per_day) - diff[block] += cpu + minute = int(start_times[i]) + + # Iterate through the day in increments of the tap's frequency, adding the tap's CPU usage to the diff array for the duration of its runtime. + # We use a diff array to efficiently calculate the cumulative CPU usage at each minute. Instead of appending the CPU usage for each minute the + # tap runs in, we add the CPU usage at the starting minute and subtract it at the end minute. + while minute < mins_per_day: + end = min(minute + run_time, mins_per_day) + diff[minute] += cpu diff[end] -= cpu - block += freq + minute += freq - # Sums up everything in the diff array to get the total CPU usage at each time block, and finds the peak usage. - # Ignore the last element of the diff array since it's just a placeholder to handle the end block subtraction for taps that run until the end of the day. + # Sums up everything in the diff array to get the total CPU usage at each minute, and finds the peak usage. + # Ignore the last element of the diff array since it's just a placeholder to handle the end minute subtraction for taps that run until the end of the day. usage = np.cumsum(diff[:-1]) peak = float(np.max(usage)) if usage.size else 0.0 return usage, peak \ No newline at end of file diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/pygad.py index 2de7a4f..38d2990 100644 --- a/cicada/lib/SmartScheduling/pygad.py +++ b/cicada/lib/SmartScheduling/pygad.py @@ -3,7 +3,7 @@ import numpy as np from .config import GAConfig from .domain import Tap -from .evaluation import evaluate_cpu_usage_and_peak, discretize_taps, calculate_blocks_per_day +from .evaluation import evaluate_cpu_usage_and_peak import pygad @@ -33,34 +33,32 @@ def __init__(self, config: Optional[Mapping[str, object]] = None, blacklist_sche def _gene_space(self, taps: Sequence[Tap]) -> List[List[int]]: - # Build gene_space per tap: each gene space is limited by it's frequency (e.g. a 15min freq tap can only traverse the first 15min worth of time blocks) + # Build gene_space per tap: each gene space is limited by it's frequency # Unless the tap is unsupported (either blacklisted, irregular or has frequency greater than 60 mins) in which case we set the gene space to be just 0 # so they remain unchanged in the GA but are still included in the fitness evaluation. Also constrain taps with frequency > 60 mins to an hour to prevent # large shifts and huge gene spaces. - # Computed in blocks to make it time-block-interval agnostic - interval_blocks, _ = discretize_taps(taps, self.cfg.minutes_per_block) - start_blocks = [0] * len(taps) - end_blocks = [1] * len(taps) - blocks_per_day = calculate_blocks_per_day(self.cfg.minutes_per_block) + + min_start_times = [0] * len(taps) + max_start_times = [1] * len(taps) + mins_per_day = 1440 for i, tap in enumerate(taps): - # Ignore any blacklist taps -> fix the gene space to be 0 so they're still included in the fitness eval + # Ignore any blacklist taps -> this fixes the gene space to be 0 so they're still included in the fitness eval but remain unshifted if tap.is_unsupported(): pass # Limit gene space to only shift within the hour for the taps which run less frequently elif tap.frequency_minutes > 60: - interval_blocks[i] = 60 // self.cfg.minutes_per_block - # Prevent any end blocks from going beyond the day limit - end_blocks[i] = min(tap.start_time_mins // self.cfg.minutes_per_block + interval_blocks[i], blocks_per_day) - start_blocks[i] = end_blocks[i] - interval_blocks[i] + # Prevent any max_start_time from going beyond the day limit + max_start_times[i] = min(tap.start_time_mins + 60, mins_per_day) + min_start_times[i] = max_start_times[i] - 60 # Gene space for the rest is just the frequency else: - start_blocks[i] = 0 - end_blocks[i] = interval_blocks[i] + min_start_times[i] = 0 + max_start_times[i] = tap.frequency_minutes - return [list(range(start_block, end_block)) for start_block, end_block in zip(start_blocks, end_blocks)] + return [list(range(min_start_time, max_start_time)) for min_start_time, max_start_time in zip(min_start_times, max_start_times)] def _initial_population(self, taps: Sequence[Tap], gene_space: List[List[int]]) -> np.ndarray: @@ -70,7 +68,7 @@ def _initial_population(self, taps: Sequence[Tap], gene_space: List[List[int]]) # Add current start minutes as first solution to bias solution space towards current solution for i, tap in enumerate(taps): gs = gene_space[i] - s = 0 if tap.start_time_mins is None else int(tap.start_time_mins // self.cfg.minutes_per_block) + s = 0 if tap.start_time_mins is None else int(tap.start_time_mins) seed.append(max(min(s, gs[-1]), gs[0])) pop = [seed] @@ -84,14 +82,17 @@ def _blacklist(self): raise NotImplementedError("Blacklist functionality not yet implemented") def fitness_fn(self, ga, solution, solution_idx): - _, peak = evaluate_cpu_usage_and_peak(solution, self.taps, self.cfg.minutes_per_block) + _, peak = evaluate_cpu_usage_and_peak(solution, self.taps) return -float(peak) def solve(self, taps: Sequence[Tap]) -> tuple[Sequence[Tap], List[int], float, np.ndarray]: - gene_space = self._gene_space(taps) self.taps = taps + gene_space = self._gene_space(taps) + print("Successfully initialised gene space") initial_population = self._initial_population(taps, gene_space) + print("Created initial population") + initial_fitness = self.fitness_fn(None, initial_population[0], 0) print("Initial population fitness (max_cpu load):", -initial_fitness) @@ -115,12 +116,12 @@ def solve(self, taps: Sequence[Tap]) -> tuple[Sequence[Tap], List[int], float, n ga.run() best_solution, best_fitness, _ = ga.best_solution() - start_blocks = [int(v) for v in best_solution] + start_times = [int(v) for v in best_solution] peak_cpu = -float(best_fitness) - usage, _ = evaluate_cpu_usage_and_peak(start_blocks, taps, self.cfg.minutes_per_block) + usage, _ = evaluate_cpu_usage_and_peak(start_times, taps) # Update tap objects shift attribute based on GA solution for i, tap in enumerate(taps): - tap.shift = start_blocks[i] * self.cfg.minutes_per_block + tap.shift = start_times[i] - return taps, start_blocks, peak_cpu, usage, -initial_fitness \ No newline at end of file + return taps, start_times, peak_cpu, usage, -initial_fitness \ No newline at end of file From 02ed34307dd3966693544388b4f8a791ffed5e01 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Fri, 17 Apr 2026 18:14:55 +0100 Subject: [PATCH 14/53] Add better error detection and allow delete schedule to remove from schedule_backups --- cicada/cli.py | 23 +++++++++++++---------- cicada/commands/delete_schedule.py | 1 + cicada/commands/rollback.py | 3 +++ cicada/commands/smart_schedule.py | 27 ++++++++++++++++----------- cicada/lib/SmartScheduling/config.py | 2 +- cicada/lib/scheduler.py | 7 ++++++- 6 files changed, 40 insertions(+), 23 deletions(-) diff --git a/cicada/cli.py b/cicada/cli.py index 5d5970e..c596ba2 100644 --- a/cicada/cli.py +++ b/cicada/cli.py @@ -31,6 +31,7 @@ def __init__(self): "register_server", "list_server_schedules", "exec_server_schedules", + "smart_schedule", "show_schedule", "upsert_schedule", "exec_schedule", @@ -284,18 +285,19 @@ def smart_schedule(): prog=inspect.stack()[0][3], description="Generate smart schedules for a server using genetic algorithm", ) - parser.add_argument("--server_id", type=str, required=False, help="ID of the server") + parser.add_argument("--server_id", type=int, required=False, help="ID of the server") # Optional GA Configurations ga_config = parser.add_argument_group("ga_config", "Optional configurations for the genetic algorithm optimizer") - ga_config.add_argument("--num_generations",type=int,required=False, help="Number of generations for the genetic algorithm") - ga_config.add_argument("--sol_per_pop",type=int,required=False, help="Number of solutions per population for the genetic algorithm") - ga_config.add_argument("--num_parents_mating",type=int,required=False, help="Number of parents mating for the genetic algorithm") - ga_config.add_argument("--mutation_percent_genes",type=int,required=False, help="Mutation percentage of genes for the genetic algorithm") - ga_config.add_argument("--parent_selection_type",type=str,required=False, help="Parent selection type for the genetic algorithm. Allowed values: ['sss', 'rws', 'sus', 'tournament', 'rank', 'random']") - ga_config.add_argument("--crossover_type",type=str,required=False, help="Crossover type for the genetic algorithm. Allowed values: ['single_point', 'two_point', 'uniform']") - ga_config.add_argument("--mutation_type",type=str,required=False, help="Mutation type for the genetic algorithm. Allowed values: ['random', 'swap', 'inversion', 'scramble']") - ga_config.add_argument("--keep_elitism",type=int,required=False, help="Number of elite solutions to keep for the next generation") + ga_config.add_argument("--num_generations",type=int,required=False, help="Number of generations for the genetic algorithm. Default: 20") + ga_config.add_argument("--sol_per_pop",type=int,required=False, help="Number of solutions per population for the genetic algorithm. Default: 40") + ga_config.add_argument("--num_parents_mating",type=int,required=False, help="Number of parents mating for the genetic algorithm. Default: 10") + ga_config.add_argument("--mutation_percent_genes",type=int,required=False, help="Mutation percentage of genes for the genetic algorithm. Default: 20") + ga_config.add_argument("--parent_selection_type",type=str,required=False, help="Parent selection type for the genetic algorithm. Allowed values: ['sss', 'rws', 'sus', 'tournament', 'rank', 'random']. Default: rank") + ga_config.add_argument("--crossover_type",type=str,required=False, help="Crossover type for the genetic algorithm. Allowed values: ['single_point', 'two_point', 'uniform']. Default: uniform") + ga_config.add_argument("--mutation_type",type=str,required=False, help="Mutation type for the genetic algorithm. Allowed values: ['random', 'swap', 'inversion', 'scramble']. Default: random") + ga_config.add_argument("--keep_elitism",type=int,required=False, help="Number of elite solutions to keep for the next generation. Default: 2") + ga_config.add_argument("--random-seed",type=int,required=False, help="Set a random seed to get repeatable results. Default: None") args = parser.parse_args(sys.argv[2:]) smart_schedule.main( args.server_id, @@ -308,6 +310,7 @@ def smart_schedule(): "crossover_type": args.crossover_type, "mutation_type": args.mutation_type, "keep_elitism": args.keep_elitism, + "random_seed": args.random_seed, }, ) @@ -329,7 +332,7 @@ def rollback(): group = parser.add_mutually_exclusive_group() group.add_argument( "--server_id", - type=str, + type=int, required=False, help="ID of the server to rollback, if not specified will rollback all servers", ) diff --git a/cicada/commands/delete_schedule.py b/cicada/commands/delete_schedule.py index 734b11a..b637f04 100644 --- a/cicada/commands/delete_schedule.py +++ b/cicada/commands/delete_schedule.py @@ -16,6 +16,7 @@ def main(schedule_id, dbname=None): db_conn = postgres.db_cicada(dbname) db_cur = db_conn.cursor() scheduler.delete_schedule(db_cur, str(schedule_id)) + scheduler.delete_schedule_backup(db_cur, str(schedule_id)) db_cur.close() db_conn.close() diff --git a/cicada/commands/rollback.py b/cicada/commands/rollback.py index c7cf60f..b5e89f8 100644 --- a/cicada/commands/rollback.py +++ b/cicada/commands/rollback.py @@ -21,6 +21,9 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn prev: bool If True, roll back to the previous schedule in the schedule_backups table. If False, roll back to the original schedule """ + if type(server_id) != int and server_id is not None: raise TypeError(f"server_id needs to be of type int. {type(server_id)}") + if type(schedule_id) != str and schedule_id is not None: raise TypeError("schedule_id needs to be of type str") + db_conn = postgres.db_cicada(dbname) db_cur = db_conn.cursor() diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index b040128..dc22f59 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -11,8 +11,10 @@ from cicada.lib.SmartScheduling.domain import Tap -def get_schedules_per_server(server_id, db_cur=None): +def _get_schedules_per_server(server_id, db_cur=None): """Get all schedules for a given server_id.""" + existing_servers = [server[0] for server in scheduler.get_all_server_ids(db_cur)] + if server_id not in existing_servers: raise ValueError(f"server_id not in list of existing servers. Existing servers: {existing_servers}") schedule_ids = [row[0] for row in scheduler.get_all_schedule_ids_per_server(db_cur, server_id)] if not schedule_ids: @@ -23,7 +25,7 @@ def get_schedules_per_server(server_id, db_cur=None): -def create_tap_objects(schedule_ids, db_cur): +def _create_tap_objects(schedule_ids, db_cur): """Create Tap objects from schedule_ids.""" taps : list[Tap] = [] @@ -56,9 +58,9 @@ def create_tap_objects(schedule_ids, db_cur): return taps -def update_schedule_cron(tap : Tap): +def _update_schedule_cron(tap : Tap): """ - Uses the start_blocks to shift the cron expression accordingly. Gene space is already limited from 0 to the frequency of the tap + Uses the start_time to shift the cron expression accordingly. Gene space is already limited from 0 to the frequency of the tap Ex. form of cron expression: "8-59/15 * * * *" (every 15 minutes starting at minute 8 of each hour) @@ -98,14 +100,14 @@ def update_schedule_cron(tap : Tap): -def assign_new_schedules(optimised_taps: list[pygad.Tap], db_cur): +def _assign_new_schedules(optimised_taps: list[pygad.Tap], db_cur): """Assign new schedules based on the optimal schedule found.""" # For each tap, update the schedule in the DB with the new interval_mask based on the shift calculated by the GA optimizer for tap in optimised_taps: previous_schedule_mask = tap.interval_mask - update_schedule_cron(tap) - print(f"Updating schedule {tap.schedule_id} with new interval mask: {tap.interval_mask} and shift of {tap.shift} minutes") + _update_schedule_cron(tap) + print(f"- {tap.schedule_id} : {tap.interval_mask}") tap._determine_start_time_mins() schedule_details = { @@ -141,6 +143,8 @@ def assign_new_schedules(optimised_taps: list[pygad.Tap], db_cur): @utils.named_exception_handler("smart_schedule") def main(server_id=None, dbname=None, ga_config=None): + if type(server_id) != int: raise TypeError(f"server_id should be int not {type(server_id)}") + db_conn = postgres.db_cicada(dbname) db_cur = db_conn.cursor() @@ -153,11 +157,11 @@ def main(server_id=None, dbname=None, ga_config=None): else: # Get schedules for the server_id print("\n-----------------Tap Setup----------------------") - schedule_ids = get_schedules_per_server(server_id=server_id, db_cur=db_cur) + schedule_ids = _get_schedules_per_server(server_id=server_id, db_cur=db_cur) print(f"Found {len(schedule_ids)} schedules for server_id {server_id}") # Build Tap objects - taps = create_tap_objects(schedule_ids, db_cur=db_cur) + taps = _create_tap_objects(schedule_ids, db_cur=db_cur) if not taps: print("No valid schedules found to optimize.") sys.exit(1) @@ -169,14 +173,15 @@ def main(server_id=None, dbname=None, ga_config=None): blacklist_schedule_ids = scheduler.get_blacklisted_schedule_ids(db_cur) print(f"Blacklisted schedule IDs that will be excluded from optimization: {blacklist_schedule_ids}") ga = pygad.GAPyGADScheduler(config=ga_config, blacklist_schedule_ids=blacklist_schedule_ids) - optimised_taps, start_blocks, peak_cpu, usage, initial_fitness = ga.solve(taps) + print("Running PyGAD solver ...") + optimised_taps, __, peak_cpu, __, initial_fitness = ga.solve(taps) print(f"Optimized schedule for server_id {server_id}: new peak CPU {peak_cpu}") print("--------------------------------------------------\n") print("\n-------------Updating Schedules------------------") if peak_cpu < initial_fitness: # Only update schedules if we have found an improvement - assign_new_schedules(optimised_taps, db_cur=db_cur) + _assign_new_schedules(optimised_taps, db_cur=db_cur) else: print(f"No improvement found for server_id {server_id}. Current peak CPU: {initial_fitness}, Optimized peak CPU: {peak_cpu}. No schedule updates will be made.") print("--------------------------------------------------\n") diff --git a/cicada/lib/SmartScheduling/config.py b/cicada/lib/SmartScheduling/config.py index c8d999b..0112228 100644 --- a/cicada/lib/SmartScheduling/config.py +++ b/cicada/lib/SmartScheduling/config.py @@ -11,6 +11,6 @@ class GAConfig: parent_selection_type: str = "rank" crossover_type: str = "uniform" mutation_type: str = "random" - keep_elitism: int = 1 + keep_elitism: int = 2 random_seed: Optional[int] = None blacklist_schedule_ids: Optional[List[str]] = field(default_factory=list) \ No newline at end of file diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index bb67332..9d909b0 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -372,6 +372,11 @@ def delete_schedule(db_cur, schedule_id): db_cur.execute(sqlquery) +def delete_schedule_backup(db_cur, schedule_id): + sqlquery = f"DELETE from schedule_backups WHERE schedule_id = '{schedule_id}'" + db_cur.execute(sqlquery) + + def get_all_server_ids(db_cur): """Get all possible server_ids from the servers table""" sqlquery = "SELECT DISTINCT server_id FROM schedules ORDER BY server_id" @@ -533,4 +538,4 @@ def get_blacklisted_schedule_ids(db_cur): sqlquery = "SELECT schedule_id FROM schedule_blacklist" db_cur.execute(sqlquery) blacklist_schedule_ids = [row[0] for row in db_cur.fetchall()] - return blacklist_schedule_ids + return blacklist_schedule_ids \ No newline at end of file From c1d37ffdbab2fd11617fffe037e27da4012cad21 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Mon, 20 Apr 2026 12:23:59 +0100 Subject: [PATCH 15/53] Populate schedule_backups upon creation --- cicada/commands/smart_schedule.py | 2 +- cicada/lib/SmartScheduling/domain.py | 13 +++---------- docs/Smart Scheduler Technical Overview.md | 1 - setup/schema.sql | 12 ++++++++++++ 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index dc22f59..9e6ffb4 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -143,7 +143,7 @@ def _assign_new_schedules(optimised_taps: list[pygad.Tap], db_cur): @utils.named_exception_handler("smart_schedule") def main(server_id=None, dbname=None, ga_config=None): - if type(server_id) != int: raise TypeError(f"server_id should be int not {type(server_id)}") + if server_id and type(server_id) != int: raise TypeError(f"server_id should be int not {type(server_id)}") db_conn = postgres.db_cicada(dbname) db_cur = db_conn.cursor() diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index 41fe75d..6e58ccb 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -81,15 +81,8 @@ def is_regular_schedule(self): """Check if the cron expression is a regular schedule that can be optimized by the GA """ try: schedule = croniter(self.interval_mask) - iter1 = schedule.get_next(datetime.datetime) - iter2 = schedule.get_next(datetime.datetime) - iter3 = schedule.get_next(datetime.datetime) - iter4 = schedule.get_next(datetime.datetime) - iter5 = schedule.get_next(datetime.datetime) - freq1 = (iter2 - iter1) - freq2 = (iter3 - iter2) - freq3 = (iter4 - iter3) - freq4 = (iter5 - iter4) - return freq1 == freq2 == freq3 == freq4 + iters = [schedule.get_next(datetime.datetime) for _ in range(20)] + freqs = [iters[i + 1] - iters[i] for i in range(len(iters) - 1)] + return all(f == freqs[0] for f in freqs) except (ValueError, KeyError): return False diff --git a/docs/Smart Scheduler Technical Overview.md b/docs/Smart Scheduler Technical Overview.md index 1eb7cd5..85ebda5 100644 --- a/docs/Smart Scheduler Technical Overview.md +++ b/docs/Smart Scheduler Technical Overview.md @@ -144,7 +144,6 @@ Tuning Considerations: - ↑ `num_generations`: Better solutions but slower - ↑ `sol_per_pop`: Larger search space but slower - ↑ `mutation_percent_genes`: More exploration, less exploitation -- ↓ `minutes_per_block`: Finer granularity but larger gene space and slower evaluation ## Schedule Validation & Filtering diff --git a/setup/schema.sql b/setup/schema.sql index ed6fe9b..cdec569 100644 --- a/setup/schema.sql +++ b/setup/schema.sql @@ -218,6 +218,18 @@ WITH ( OIDS=FALSE ); +INSERT INTO public.schedule_backups (schedule_id, server_id, original_interval_mask, previous_interval_mask, interval_mask, start_time_shift_mins, snapshot_at) + SELECT + schedule_id, + server_id, + interval_mask, + interval_mask, + interval_mask, + 0, + now() + FROM schedules +ON CONFLICT (schedule_id) DO NOTHING; + DROP TRIGGER IF EXISTS tr_schedule_backups ON public.schedule_backups; CREATE TRIGGER tr_schedule_backups BEFORE UPDATE From e5930fcfdc12e4606d30d9217e3ecb34978ed9e8 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Mon, 20 Apr 2026 13:56:29 +0100 Subject: [PATCH 16/53] Change locations in docker --- local-dev/cicada_db/Dockerfile | 4 ++-- local-dev/docker-compose.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/local-dev/cicada_db/Dockerfile b/local-dev/cicada_db/Dockerfile index ed77d1f..a00ca90 100644 --- a/local-dev/cicada_db/Dockerfile +++ b/local-dev/cicada_db/Dockerfile @@ -1,7 +1,7 @@ FROM debezium/postgres:15-alpine -COPY --chown=postgres:postgres local-dev/cicada_db/server.crt /var/lib/postgresql/server.crt -COPY --chown=postgres:postgres --chmod=600 local-dev/cicada_db/server.key /var/lib/postgresql/server.key +COPY --chown=postgres:postgres server.crt /var/lib/postgresql/server.crt +COPY --chown=postgres:postgres --chmod=600 server.key /var/lib/postgresql/server.key RUN apk add --no-cache --virtual .debezium-build-deps gcc clang15 llvm15 git make musl-dev pkgconf \ && git clone --depth 1 --branch wal2json_2_6 https://github.com/eulerto/wal2json.git \ diff --git a/local-dev/docker-compose.yml b/local-dev/docker-compose.yml index 45ad9fa..10064d9 100644 --- a/local-dev/docker-compose.yml +++ b/local-dev/docker-compose.yml @@ -26,8 +26,8 @@ services: # PostgreSQL database container used as backend cicada_db: build: - context: ../ - dockerfile: local-dev/cicada_db/Dockerfile + context: ./cicada_db + dockerfile: Dockerfile container_name: cicada_db volumes: - ./cicada_db/pg_hba.conf:/var/lib/postgresql/pg_hba.conf From 01e9968e7fd212cba335fbac8d4649bf17c94a9e Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Mon, 20 Apr 2026 15:51:40 +0100 Subject: [PATCH 17/53] Refactor shift and start_time_shift_mins --- CLAUDE.md | 1 - cicada/commands/smart_schedule.py | 20 ++++++++++---------- cicada/lib/SmartScheduling/domain.py | 4 ++-- cicada/lib/SmartScheduling/pygad.py | 15 +++++++++++---- cicada/lib/scheduler.py | 21 +++++++++------------ docs/Smart Scheduler Technical Overview.md | 1 - setup/schema.sql | 4 +--- 7 files changed, 33 insertions(+), 33 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 793a282..132e9e2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -79,7 +79,6 @@ Located in `cicada/lib/SmartScheduling/` - `schedule_id`, `server_id`, `interval_mask` (cron expression) - `frequency_minutes`, `median_runtime_minutes`, `cpu_max` - `shift`: offset in minutes applied to shift job start time - - `start_time_mins`: job start time from midnight (calculated from cron) - `blacklisted`: flag to exclude from GA optimization **`config.py`** diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 9e6ffb4..3d636b6 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -69,30 +69,31 @@ def _update_schedule_cron(tap : Tap): Returns: Updated tap object with new interval_mask based on the shift calculated by the GA optimizer """ + frequency = tap.frequency_minutes - shift = tap.shift + start_time_mins = tap.start_time_mins - if not shift: + if tap.shifted == False or start_time_mins is None: return # No shift needed if frequency == 1440: # For daily taps, we can shift within the hour - hour = shift // 60 - minute = (shift - hour * 60) % 60 + hour = start_time_mins // 60 + minute = (start_time_mins - hour * 60) % 60 tap.interval_mask = f"{minute} {hour} * * *" # Check that the new cron expression is valid if not croniter.is_valid(tap.interval_mask): raise ValueError(f"Invalid cron expression generated: {tap.interval_mask} for tap {tap.schedule_id}") return elif frequency == 60: # For hourly taps, we can shift within the hour - minute = shift % 60 - tap.interval_mask = f"{minute} * * * *" + assert start_time_mins < frequency, f"Shift {start_time_mins} cannot be greater than or equal to frequency {frequency} for tap {tap.schedule_id}" + tap.interval_mask = f"{start_time_mins} * * * *" # Check that the new cron expression is valid if not croniter.is_valid(tap.interval_mask): raise ValueError(f"Invalid cron expression generated: {tap.interval_mask} for tap {tap.schedule_id}") return elif frequency < 60: - assert shift < frequency, f"Shift {shift} cannot be greater than or equal to frequency {frequency} for tap {tap.schedule_id}" - tap.interval_mask = f"{shift}-59/{frequency} * * * *" + assert start_time_mins < frequency, f"Shift {start_time_mins} cannot be greater than or equal to frequency {frequency} for tap {tap.schedule_id}" + tap.interval_mask = f"{start_time_mins}-59/{frequency} * * * *" # Check that the new cron expression is valid if not croniter.is_valid(tap.interval_mask): raise ValueError(f"Invalid cron expression generated: {tap.interval_mask} for tap {tap.schedule_id}") @@ -103,7 +104,7 @@ def _update_schedule_cron(tap : Tap): def _assign_new_schedules(optimised_taps: list[pygad.Tap], db_cur): """Assign new schedules based on the optimal schedule found.""" - # For each tap, update the schedule in the DB with the new interval_mask based on the shift calculated by the GA optimizer + # For each tap, update the schedule in the DB with the new interval_mask based on the start_time_mins calculated by the GA optimizer for tap in optimised_taps: previous_schedule_mask = tap.interval_mask _update_schedule_cron(tap) @@ -136,7 +137,6 @@ def _assign_new_schedules(optimised_taps: list[pygad.Tap], db_cur): "server_id": tap.server_id, "previous_interval_mask": previous_schedule_mask, "interval_mask": tap.interval_mask, - "start_time_shift_mins": tap.start_time_mins } scheduler.update_schedule_backups(db_cur, previous_schedule_details) diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index 6e58ccb..efb12c9 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -15,8 +15,8 @@ class Tap: frequency_minutes: int cpu_max: float = 1 median_runtime_minutes: int = 5 - shift: Optional[int] = 0 - start_time_mins: Optional[int] = None + shifted: bool = False + start_time_mins: Optional[int] = 0 blacklisted: bool = False diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/pygad.py index 38d2990..011db2f 100644 --- a/cicada/lib/SmartScheduling/pygad.py +++ b/cicada/lib/SmartScheduling/pygad.py @@ -68,7 +68,7 @@ def _initial_population(self, taps: Sequence[Tap], gene_space: List[List[int]]) # Add current start minutes as first solution to bias solution space towards current solution for i, tap in enumerate(taps): gs = gene_space[i] - s = 0 if tap.start_time_mins is None else int(tap.start_time_mins) + s = int(tap.start_time_mins) seed.append(max(min(s, gs[-1]), gs[0])) pop = [seed] @@ -91,7 +91,8 @@ def solve(self, taps: Sequence[Tap]) -> tuple[Sequence[Tap], List[int], float, n print("Successfully initialised gene space") initial_population = self._initial_population(taps, gene_space) - print("Created initial population") + print("Created initial population. Current Solution Start Times:") + print(initial_population[0]) initial_fitness = self.fitness_fn(None, initial_population[0], 0) print("Initial population fitness (max_cpu load):", -initial_fitness) @@ -118,10 +119,16 @@ def solve(self, taps: Sequence[Tap]) -> tuple[Sequence[Tap], List[int], float, n best_solution, best_fitness, _ = ga.best_solution() start_times = [int(v) for v in best_solution] peak_cpu = -float(best_fitness) + + print(f"Optimised for {self.cfg.num_generations} generations. Best Solution Start Times:") + print(best_solution) + usage, _ = evaluate_cpu_usage_and_peak(start_times, taps) - # Update tap objects shift attribute based on GA solution + # Update tap objects start_time_mins attribute based on GA solution for i, tap in enumerate(taps): - tap.shift = start_times[i] + if tap.start_time_mins != start_times[i]: + tap.shifted = True + tap.start_time_mins = start_times[i] return taps, start_times, peak_cpu, usage, -initial_fitness \ No newline at end of file diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index 9d909b0..ad0e165 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -401,7 +401,7 @@ def get_all_schedule_ids_per_server(db_cur, server_id): def get_all_schedule_backups(db_cur): """Get all entries from the schedule_backups table""" - sqlquery = "SELECT schedule_id, start_time_shift_mins, original_interval_mask FROM schedule_backups" + sqlquery = "SELECT schedule_id, original_interval_mask FROM schedule_backups" db_cur.execute(sqlquery) schedule_backups = db_cur.fetchall() @@ -452,37 +452,34 @@ def update_schedule_backups(db_cur, previous_schedule_details): """ Insert a schedule configuration into the schedule_backups table for rollback. Args: db_cur: Database cursor. - previous_schedule_details: dict with keys schedule_id, server_id, interval_mask, previous_interval_mask, start_time_shift_mins, original_interval_mask + previous_schedule_details: dict with keys schedule_id, server_id, interval_mask, previous_interval_mask, original_interval_mask or - dict with keys schedule_id, interval_mask, previous_interval_mask, start_time_shift_mins + dict with keys schedule_id, interval_mask, previous_interval_mask """ sqlquery = f""" - INSERT INTO schedule_backups (schedule_id, server_id, interval_mask, previous_interval_mask, start_time_shift_mins, original_interval_mask) + INSERT INTO schedule_backups (schedule_id, server_id, interval_mask, previous_interval_mask, original_interval_mask) VALUES ( '{previous_schedule_details["schedule_id"]}', '{previous_schedule_details["server_id"]}', '{previous_schedule_details["interval_mask"]}', '{previous_schedule_details["previous_interval_mask"]}', - '{previous_schedule_details["start_time_shift_mins"]}', '{previous_schedule_details["previous_interval_mask"]}') -- Assuming original_interval_mask is the same as previous_interval_mask on the first insert ON CONFLICT (schedule_id) DO UPDATE SET interval_mask = EXCLUDED.interval_mask, - previous_interval_mask = EXCLUDED.previous_interval_mask, - start_time_shift_mins = EXCLUDED.start_time_shift_mins; + previous_interval_mask = EXCLUDED.previous_interval_mask; """ db_cur.execute(sqlquery) -def rollback_schedule_backup_mask(db_cur, schedule_id=None, server_id=None): +def rollback_schedule_backup_mask(db_cur, schedule_id=None, server_id=None, full=False): """ Sets the interval_masks back to the schedule_backups table to the original_interval_mask. """ sqlquery = f""" UPDATE public.schedule_backups - SET interval_mask = original_interval_mask, - previous_interval_mask = original_interval_mask, - start_time_shift_mins = 0 + SET interval_mask = {'original_interval_mask' if full else 'previous_interval_mask'}, + previous_interval_mask = original_interval_mask """ if schedule_id is not None: sqlquery = sqlquery + f" WHERE schedule_id = '{schedule_id}'" @@ -530,7 +527,7 @@ def restore_previous_schedules(db_cur, server_id=None, schedule_id=None, full=Fa db_cur.execute(sqlquery, tuple(params)) print("Resetting schedule_backups table to reflect rolled back schedules...") - rollback_schedule_backup_mask(db_cur, schedule_id=schedule_id, server_id=server_id) + rollback_schedule_backup_mask(db_cur, schedule_id=schedule_id, server_id=server_id, full=full) def get_blacklisted_schedule_ids(db_cur): diff --git a/docs/Smart Scheduler Technical Overview.md b/docs/Smart Scheduler Technical Overview.md index 85ebda5..f4c443d 100644 --- a/docs/Smart Scheduler Technical Overview.md +++ b/docs/Smart Scheduler Technical Overview.md @@ -35,7 +35,6 @@ The GA evolves shift offsets for each schedule over multiple generations to find - `original_interval_mask`: pristine cron expression (before any optimization) - `previous_interval_mask`: cron before this optimization run - `interval_mask`: current cron after optimization - - `start_time_shift_mins`: shift offset applied (in minutes) - `snapshot_at`: timestamp of last update (auto-set on INSERT/UPDATE) - Indexes on `schedule_id` and `server_id` for fast lookups diff --git a/setup/schema.sql b/setup/schema.sql index cdec569..93ccbb8 100644 --- a/setup/schema.sql +++ b/setup/schema.sql @@ -210,7 +210,6 @@ CREATE TABLE IF NOT EXISTS public.schedule_backups original_interval_mask character varying(32) NOT NULL, previous_interval_mask character varying(32) NOT NULL, interval_mask character varying(32) NOT NULL, - start_time_shift_mins INT NOT NULL DEFAULT 0, -- Number of minutes the schedule was shifted from its original schedule snapshot_at timestamp without time zone NOT NULL DEFAULT (now())::timestamp without time zone, CONSTRAINT schedule_backups_pkey PRIMARY KEY (schedule_id) ) @@ -218,14 +217,13 @@ WITH ( OIDS=FALSE ); -INSERT INTO public.schedule_backups (schedule_id, server_id, original_interval_mask, previous_interval_mask, interval_mask, start_time_shift_mins, snapshot_at) +INSERT INTO public.schedule_backups (schedule_id, server_id, original_interval_mask, previous_interval_mask, interval_mask, snapshot_at) SELECT schedule_id, server_id, interval_mask, interval_mask, interval_mask, - 0, now() FROM schedules ON CONFLICT (schedule_id) DO NOTHING; From 9b15b8156de35d274805a0b23a6920fc71fe2296 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Mon, 20 Apr 2026 16:39:43 +0100 Subject: [PATCH 18/53] Add unsupported taps into pygad eval --- cicada/cli.py | 1 + cicada/commands/smart_schedule.py | 11 ++--------- cicada/lib/SmartScheduling/pygad.py | 14 ++++++++------ setup/schema.sql | 2 +- 4 files changed, 12 insertions(+), 16 deletions(-) diff --git a/cicada/cli.py b/cicada/cli.py index c596ba2..b6be71d 100644 --- a/cicada/cli.py +++ b/cicada/cli.py @@ -32,6 +32,7 @@ def __init__(self): "list_server_schedules", "exec_server_schedules", "smart_schedule", + "rollback", "show_schedule", "upsert_schedule", "exec_schedule", diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 3d636b6..b421e58 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -42,17 +42,10 @@ def _create_tap_objects(schedule_ids, db_cur): try: tap = Tap(details, db_cur=db_cur) # Ignore the few taps that have irregular cron expressions for now. There are few enough that this shouldn't impact the optimisation and is not worth the effort to try and support these irregular schedules in the GA - if tap.is_unsupported(): - if tap.is_blacklisted(): - print(f"Skipping blacklisted schedule {tap.schedule_id}") - elif not tap.is_regular_schedule(): + if not tap.is_regular_schedule(): print(f"Skipping irregular schedule {tap.schedule_id} with cron expression {tap.interval_mask}") - else: - print(f"Skipping schedule {tap.schedule_id} with frequency {tap.frequency_minutes} minutes as shifting for these taps is unsupported currently") - else: taps.append(tap) - except Exception as e: print(f"Skipping schedule {schedule_id} due to error: {e}") @@ -108,7 +101,7 @@ def _assign_new_schedules(optimised_taps: list[pygad.Tap], db_cur): for tap in optimised_taps: previous_schedule_mask = tap.interval_mask _update_schedule_cron(tap) - print(f"- {tap.schedule_id} : {tap.interval_mask}") + if tap.shifted: print(f"- {tap.schedule_id} : {tap.interval_mask}") tap._determine_start_time_mins() schedule_details = { diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/pygad.py index 011db2f..4759c91 100644 --- a/cicada/lib/SmartScheduling/pygad.py +++ b/cicada/lib/SmartScheduling/pygad.py @@ -43,9 +43,10 @@ def _gene_space(self, taps: Sequence[Tap]) -> List[List[int]]: mins_per_day = 1440 for i, tap in enumerate(taps): - # Ignore any blacklist taps -> this fixes the gene space to be 0 so they're still included in the fitness eval but remain unshifted + # Fix the gene space so they're still included in the fitness eval but remain unshifted if tap.is_unsupported(): - pass + min_start_times[i] = tap.start_time_mins + max_start_times[i] = tap.start_time_mins + 1 # Limit gene space to only shift within the hour for the taps which run less frequently elif tap.frequency_minutes > 60: @@ -55,7 +56,6 @@ def _gene_space(self, taps: Sequence[Tap]) -> List[List[int]]: # Gene space for the rest is just the frequency else: - min_start_times[i] = 0 max_start_times[i] = tap.frequency_minutes return [list(range(min_start_time, max_start_time)) for min_start_time, max_start_time in zip(min_start_times, max_start_times)] @@ -74,7 +74,7 @@ def _initial_population(self, taps: Sequence[Tap], gene_space: List[List[int]]) # Populate the rest of the initial population randomly within the gene space limits for each tap for _ in range(self.cfg.sol_per_pop - 1): - pop.append([int(rng.integers(0, len(gene_space[i]))) for i in range(len(taps))]) + pop.append([gene_space[i][int(rng.integers(0, len(gene_space[i])))] for i in range(len(taps))]) return np.asarray(pop, dtype=int) def _blacklist(self): @@ -122,12 +122,14 @@ def solve(self, taps: Sequence[Tap]) -> tuple[Sequence[Tap], List[int], float, n print(f"Optimised for {self.cfg.num_generations} generations. Best Solution Start Times:") print(best_solution) - + usage, _ = evaluate_cpu_usage_and_peak(start_times, taps) # Update tap objects start_time_mins attribute based on GA solution for i, tap in enumerate(taps): - if tap.start_time_mins != start_times[i]: + assert start_times[i] >= gene_space[i][0] and start_times[i] <= gene_space[i][-1], f"Start time for tap {tap.schedule_id} is out of gene space bounds. Start time: {start_times[i]}, Gene space: {gene_space[i]}" + if tap.is_unsupported(): assert start_times[i] == tap.start_time_mins, f"Unsupported tap {tap.schedule_id} should not have been shifted in the GA solution. {tap.start_time_mins} != {start_times[i]}" + elif tap.start_time_mins != start_times[i]: tap.shifted = True tap.start_time_mins = start_times[i] diff --git a/setup/schema.sql b/setup/schema.sql index 93ccbb8..6332825 100644 --- a/setup/schema.sql +++ b/setup/schema.sql @@ -202,7 +202,7 @@ WITH ( ; --- Table to record previous scheduleing for rollback functionality with smart scheduling +-- Table to record previous scheduling for rollback functionality with smart scheduling CREATE TABLE IF NOT EXISTS public.schedule_backups ( schedule_id character varying(255) NOT NULL, From 1679e61792e5069f96bf6d26cb7f02a5596c664b Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Mon, 20 Apr 2026 17:01:11 +0100 Subject: [PATCH 19/53] Changes how median runtime is set. Doesn't disregard new taps as an estimate is better than ignoring it --- cicada/lib/SmartScheduling/domain.py | 5 ++--- cicada/lib/scheduler.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index efb12c9..43beab4 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -1,5 +1,6 @@ from __future__ import annotations from dataclasses import dataclass +import math from typing import Optional, List import numpy as np from croniter import croniter @@ -45,9 +46,7 @@ def _determine_frequency(self): def _get_average_runtime(self, db_cur): """Get average runtime from scheduler module""" - # for local testing set everything to 5 mins - self.median_runtime_minutes = 5 - # self.median_runtime_minutes = get_median_run_time(db_cur, self.schedule_id) + self.median_runtime_minutes = math.ceil(get_median_run_time(db_cur, self.schedule_id)) def _determine_start_time_mins(self): """Determine the start time in minutes from midnight from the interval_mask""" diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index ad0e165..5479eb0 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -409,6 +409,13 @@ def get_all_schedule_backups(db_cur): def get_median_run_time(db_cur, schedule_id): + """ + Calculate the median runtime in minutes for a schedule_id from the schedule_log table. + + Zero runs => 5 mins (conservative estimate, allows local testing without data and for new taps to be + scheduled without having to wait for historical data to be collected. + """ + sqlquery = f""" SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY EXTRACT(EPOCH FROM (end_time - start_time)) / 60) @@ -423,8 +430,8 @@ def get_median_run_time(db_cur, schedule_id): average_runtime_minutes = float(row[0]) return average_runtime_minutes except Exception: - print(f"ERROR : No runs associated with the schedule_id {schedule_id}") - sys.exit(1) + # No runs -> assigns default runtime of 5 minutes + return 5 def reset_schedule_backup_mask(db_cur, schedule_details): """ From 124f797cc30756153d08a09d6142468e813df37f Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Mon, 20 Apr 2026 17:17:02 +0100 Subject: [PATCH 20/53] Address Copilot comments - SQL Injection, GA Config etc. --- cicada/commands/smart_schedule.py | 2 +- cicada/lib/scheduler.py | 39 +++++++++++++--------- docs/Smart Scheduler Technical Overview.md | 2 +- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index b421e58..aad2125 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -145,7 +145,7 @@ def main(server_id=None, dbname=None, ga_config=None): # Recursively call main for each server_id if no specific server_id is provided server_ids = scheduler.get_all_server_ids(db_cur) for id in server_ids: - main(server_id=id[0], dbname=dbname) + main(server_id=id[0], dbname=dbname, ga_config=ga_config) else: # Get schedules for the server_id diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index 5479eb0..fd93cea 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -387,14 +387,14 @@ def get_all_server_ids(db_cur): def get_all_schedule_ids_per_server(db_cur, server_id): """Get all possible schedule_ids for each server from the schedules table""" - sqlquery = f""" + sqlquery = """ SELECT DISTINCT schedule_id FROM schedules - WHERE server_id = '{server_id}' - AND (schedule_description IS NULL OR schedule_description NOT LIKE '%==%') + WHERE server_id = %s + AND (schedule_description IS NULL OR schedule_description NOT LIKE '%%==%%') ORDER BY schedule_id """ - db_cur.execute(sqlquery) + db_cur.execute(sqlquery, (server_id,)) schedule_ids = db_cur.fetchall() return schedule_ids @@ -438,19 +438,26 @@ def reset_schedule_backup_mask(db_cur, schedule_details): Resets the interval_mask of a schedule in the schedule_backups table. Called when schedule frequency is changed or a new schedule is added. Sets all interval_mask fiedls to the new interval_mask to ensure that the rollback won't revert to an outdated frequency. """ + insert_columns = "schedule_id, interval_mask, original_interval_mask, previous_interval_mask" + insert_values = ( + f"'{schedule_details['schedule_id']}', " + f"'{schedule_details['interval_mask']}', " + f"'{schedule_details['interval_mask']}', " + f"'{schedule_details['interval_mask']}'" + ) + update_server_id = "" + if schedule_details["server_id"] is not None: + insert_columns += ", server_id" + insert_values += f", '{schedule_details['server_id']}'" + update_server_id = f", server_id = '{schedule_details['server_id']}'" sqlquery = f""" - MERGE INTO public.schedule_backups - USING (SELECT '{schedule_details["schedule_id"]}' AS schedule_id) AS src - ON schedule_backups.schedule_id = src.schedule_id - WHEN MATCHED THEN - UPDATE SET - interval_mask = '{schedule_details["interval_mask"]}', - original_interval_mask = '{schedule_details["interval_mask"]}', - previous_interval_mask = '{schedule_details["interval_mask"]}' - """ + (f", server_id = {schedule_details['server_id']}" if schedule_details["server_id"] is not None else "") + f""" - WHEN NOT MATCHED THEN - INSERT (schedule_id, interval_mask, original_interval_mask, previous_interval_mask) - VALUES ('{schedule_details["schedule_id"]}', '{schedule_details["interval_mask"]}', '{schedule_details["interval_mask"]}', '{schedule_details["interval_mask"]}') + INSERT INTO public.schedule_backups ({insert_columns}) + VALUES ({insert_values}) + ON CONFLICT (schedule_id) DO UPDATE SET + interval_mask = EXCLUDED.interval_mask, + original_interval_mask = EXCLUDED.original_interval_mask, + previous_interval_mask = EXCLUDED.previous_interval_mask + {update_server_id} """ db_cur.execute(sqlquery) diff --git a/docs/Smart Scheduler Technical Overview.md b/docs/Smart Scheduler Technical Overview.md index f4c443d..a8ebad1 100644 --- a/docs/Smart Scheduler Technical Overview.md +++ b/docs/Smart Scheduler Technical Overview.md @@ -123,7 +123,7 @@ Inverse of the peak_cpu since it's a minimisation problem. Peak_cpu is calculate - **Mutation Type**: Random (randomly select genes and replace with random value from gene space) - **Elitism**: Keep the best solution across generations (default: 1) -The creation of the offsprings uses different methods to change the solutions, however they must remain within the gene limits. For more information checkout the official [PyGAD documentation](https://pypi.org/project/pygad/2.1.0/) as it will be infinitely better than anything I can produce +The creation of the offsprings uses different methods to change the solutions, however they must remain within the gene limits. For more information checkout the official [PyGAD documentation](https://pypi.org/project/pygad/5.3.0/) as it will be infinitely better than anything I can produce ### Population Seeding From 4a54c36bfc535a19f36e3825c7b1b6b7bc0dc25a Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Mon, 20 Apr 2026 17:23:53 +0100 Subject: [PATCH 21/53] Change IPs in dev setup --- setup/create_test_tap_setup.sql | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/setup/create_test_tap_setup.sql b/setup/create_test_tap_setup.sql index 6d99d9d..61f08b7 100644 --- a/setup/create_test_tap_setup.sql +++ b/setup/create_test_tap_setup.sql @@ -2,13 +2,13 @@ START TRANSACTION; -- Add all servers -INSERT INTO public.servers (server_id, hostname, fqdn, ip4_address, is_enabled) VALUES (1, '4e538ad035e1', '4e538ad035e1', ' 192.168.52.3', 1) ON CONFLICT (server_id) DO UPDATE SET is_enabled = 1; -INSERT INTO public.servers (server_id, hostname, fqdn, ip4_address, is_enabled) VALUES (2, '4e538ad035e2', '4e538ad035e2', ' 192.168.52.2', 0) ON CONFLICT DO NOTHING; -INSERT INTO public.servers (server_id, hostname, fqdn, ip4_address, is_enabled) VALUES (3, '4e538ad035e3', '4e538ad035e3', ' 192.168.52.1', 0) ON CONFLICT DO NOTHING; -INSERT INTO public.servers (server_id, hostname, fqdn, ip4_address, is_enabled) VALUES (4, '4e538ad035e4', '4e538ad035e4', ' 192.168.52.4', 0) ON CONFLICT DO NOTHING; -INSERT INTO public.servers (server_id, hostname, fqdn, ip4_address, is_enabled) VALUES (5, '4e538ad035e5', '4e538ad035e5', ' 192.168.52.5', 0) ON CONFLICT DO NOTHING; -INSERT INTO public.servers (server_id, hostname, fqdn, ip4_address, is_enabled) VALUES (6, '4e538ad035e6', '4e538ad035e6', ' 192.168.52.6', 0) ON CONFLICT DO NOTHING; -INSERT INTO public.servers (server_id, hostname, fqdn, ip4_address, is_enabled) VALUES (7, '4e538ad035e7', '4e538ad035e7', ' 192.168.52.7', 0) ON CONFLICT DO NOTHING; +INSERT INTO public.servers (server_id, hostname, fqdn, ip4_address, is_enabled) VALUES (1, '4e538ad035e1', '4e538ad035e1', 'ip4_address_1', 1) ON CONFLICT (server_id) DO UPDATE SET is_enabled = 1; +INSERT INTO public.servers (server_id, hostname, fqdn, ip4_address, is_enabled) VALUES (2, '4e538ad035e2', '4e538ad035e2', 'ip4_address_2', 0) ON CONFLICT DO NOTHING; +INSERT INTO public.servers (server_id, hostname, fqdn, ip4_address, is_enabled) VALUES (3, '4e538ad035e3', '4e538ad035e3', 'ip4_address_3', 0) ON CONFLICT DO NOTHING; +INSERT INTO public.servers (server_id, hostname, fqdn, ip4_address, is_enabled) VALUES (4, '4e538ad035e4', '4e538ad035e4', 'ip4_address_4', 0) ON CONFLICT DO NOTHING; +INSERT INTO public.servers (server_id, hostname, fqdn, ip4_address, is_enabled) VALUES (5, '4e538ad035e5', '4e538ad035e5', 'ip4_address_5', 0) ON CONFLICT DO NOTHING; +INSERT INTO public.servers (server_id, hostname, fqdn, ip4_address, is_enabled) VALUES (6, '4e538ad035e6', '4e538ad035e6', 'ip4_address_6', 0) ON CONFLICT DO NOTHING; +INSERT INTO public.servers (server_id, hostname, fqdn, ip4_address, is_enabled) VALUES (7, '4e538ad035e7', '4e538ad035e7', 'ip4_address_7', 0) ON CONFLICT DO NOTHING; COMMIT TRANSACTION; From c9d0d88a8cff08dc8444911d3796f124f3247a6e Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Fri, 1 May 2026 16:41:24 +0100 Subject: [PATCH 22/53] Change naming for blocklist --- CLAUDE.md | 6 +++--- cicada/commands/smart_schedule.py | 14 +++++++------- cicada/lib/SmartScheduling/config.py | 2 +- cicada/lib/SmartScheduling/domain.py | 16 ++++++++-------- cicada/lib/SmartScheduling/pygad.py | 14 +++++++------- cicada/lib/scheduler.py | 10 +++++----- docs/Smart Scheduler Technical Overview.md | 16 ++++++++-------- setup/create_test_tap_setup.sql | 2 +- setup/schema.sql | 6 ++++-- 9 files changed, 44 insertions(+), 42 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 132e9e2..2721500 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -79,12 +79,12 @@ Located in `cicada/lib/SmartScheduling/` - `schedule_id`, `server_id`, `interval_mask` (cron expression) - `frequency_minutes`, `median_runtime_minutes`, `cpu_max` - `shift`: offset in minutes applied to shift job start time - - `blacklisted`: flag to exclude from GA optimization + - `blocklisted`: flag to exclude from GA optimization **`config.py`** - `GAConfig` dataclass: hyperparameters for the genetic algorithm - `num_generations`, `sol_per_pop`, `mutation_percent_genes`, etc. - - `blacklist_schedule_ids`: list of schedule IDs to exclude from optimization + - `blocklist_schedule_ids`: list of schedule IDs to exclude from optimization **`pygad.py`** - Wraps the external `pygad` library (genetic algorithm) @@ -120,7 +120,7 @@ Database setup SQL is in `setup/db_and_user.sql` and `setup/schema.sql`. Example ### SmartScheduling Workflow 1. **Load schedules**: Fetch all schedules for a server via `get_schedules_per_server()` -2. **Create Tap objects**: Convert schedule details to Tap instances; filter unsupported schedules (irregular cron, too frequent, blacklisted) +2. **Create Tap objects**: Convert schedule details to Tap instances; filter unsupported schedules (irregular cron, too frequent, blocklisted) 3. **Run GA optimization**: PyGAD evolves shifts over N generations to minimize resource conflicts 4. **Apply and checkpoint**: Save optimized shifts back to DB; record checkpoint for potential rollback diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index aad2125..fee8b28 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -29,15 +29,15 @@ def _create_tap_objects(schedule_ids, db_cur): """Create Tap objects from schedule_ids.""" taps : list[Tap] = [] - blacklisted_taps = scheduler.get_blacklisted_schedule_ids(db_cur) + blocklisted_taps = scheduler.get_blocklisted_schedule_ids(db_cur) # Fetch details for each schedule and convert to Tap objects for schedule_id in schedule_ids: details = scheduler.get_schedule_details(db_cur, schedule_id) - if schedule_id in blacklisted_taps: - details['blacklisted'] = True + if schedule_id in blocklisted_taps: + details['blocklisted'] = True else: - details['blacklisted'] = False + details['blocklisted'] = False try: tap = Tap(details, db_cur=db_cur) @@ -163,9 +163,9 @@ def main(server_id=None, dbname=None, ga_config=None): try: print("\n------------Starting Optimisation-----------------") - blacklist_schedule_ids = scheduler.get_blacklisted_schedule_ids(db_cur) - print(f"Blacklisted schedule IDs that will be excluded from optimization: {blacklist_schedule_ids}") - ga = pygad.GAPyGADScheduler(config=ga_config, blacklist_schedule_ids=blacklist_schedule_ids) + blocklist_schedule_ids = scheduler.get_blocklisted_schedule_ids(db_cur) + print(f"blocklisted schedule IDs that will be excluded from optimization: {blocklist_schedule_ids}") + ga = pygad.GAPyGADScheduler(config=ga_config, blocklist_schedule_ids=blocklist_schedule_ids) print("Running PyGAD solver ...") optimised_taps, __, peak_cpu, __, initial_fitness = ga.solve(taps) print(f"Optimized schedule for server_id {server_id}: new peak CPU {peak_cpu}") diff --git a/cicada/lib/SmartScheduling/config.py b/cicada/lib/SmartScheduling/config.py index 0112228..ea790da 100644 --- a/cicada/lib/SmartScheduling/config.py +++ b/cicada/lib/SmartScheduling/config.py @@ -13,4 +13,4 @@ class GAConfig: mutation_type: str = "random" keep_elitism: int = 2 random_seed: Optional[int] = None - blacklist_schedule_ids: Optional[List[str]] = field(default_factory=list) \ No newline at end of file + blocklist_schedule_ids: Optional[List[str]] = field(default_factory=list) \ No newline at end of file diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index 43beab4..fb9835c 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -18,7 +18,7 @@ class Tap: median_runtime_minutes: int = 5 shifted: bool = False start_time_mins: Optional[int] = 0 - blacklisted: bool = False + blocklisted: bool = False def __init__(self, details, db_cur): @@ -26,8 +26,8 @@ def __init__(self, details, db_cur): self.server_id = details['server_id'] self.interval_mask = details['interval_mask'] self.determine_attributes(db_cur) - if details['blacklisted'] is not None: - self.blacklisted = details['blacklisted'] + if details['blocklisted'] is not None: + self.blocklisted = details['blocklisted'] def determine_attributes(self, db_cur): """Determine frequency and average runtime from interval_mask and scheduler module""" @@ -62,9 +62,9 @@ def _determine_start_time_mins(self): first_iter = it.get_next(datetime.datetime) self.start_time_mins = first_iter.hour * 60 + first_iter.minute - def is_blacklisted(self): - """Determine if the tap is blacklisted based on schedule_id""" - return self.blacklisted + def is_blocklisted(self): + """Determine if the tap is blocklisted based on schedule_id""" + return self.blocklisted def frequency_is_supported(self): """Determine if the tap frequency is supported for smart scheduling""" @@ -73,8 +73,8 @@ def frequency_is_supported(self): return True def is_unsupported(self): - """Determine if the tap is unsupported for smart scheduling based on frequency or if it's blacklisted""" - return (not self.frequency_is_supported() or self.is_blacklisted() or not self.is_regular_schedule()) + """Determine if the tap is unsupported for smart scheduling based on frequency or if it's blocklisted""" + return (not self.frequency_is_supported() or self.is_blocklisted() or not self.is_regular_schedule()) def is_regular_schedule(self): """Check if the cron expression is a regular schedule that can be optimized by the GA """ diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/pygad.py index 4759c91..c1a3ef5 100644 --- a/cicada/lib/SmartScheduling/pygad.py +++ b/cicada/lib/SmartScheduling/pygad.py @@ -17,24 +17,24 @@ class GAPyGADScheduler: Implementation Note: We only consider the regular taps during fitness evaluation to aid simplicity as there are few irregular - taps. All regular taps are fed into the scheduler however those on the blacklist will remain unchanged + taps. All regular taps are fed into the scheduler however those on the blocklist will remain unchanged and are kept purely to ensure the fitness evaluation is accurate to the actual schedule. We cap the max shift of a tap to within the hour to prevent large shifts for taps that run daily. """ - def __init__(self, config: Optional[Mapping[str, object]] = None, blacklist_schedule_ids: Optional[List[str]] = None): + def __init__(self, config: Optional[Mapping[str, object]] = None, blocklist_schedule_ids: Optional[List[str]] = None): if config is None: self.cfg = GAConfig() else: filtered_config = {key: value for key, value in config.items() if value is not None} self.cfg = GAConfig(**filtered_config) - self.blacklist_schedule_ids = blacklist_schedule_ids if blacklist_schedule_ids is not None else [] + self.blocklist_schedule_ids = blocklist_schedule_ids if blocklist_schedule_ids is not None else [] def _gene_space(self, taps: Sequence[Tap]) -> List[List[int]]: # Build gene_space per tap: each gene space is limited by it's frequency - # Unless the tap is unsupported (either blacklisted, irregular or has frequency greater than 60 mins) in which case we set the gene space to be just 0 + # Unless the tap is unsupported (either blocklisted, irregular or has frequency greater than 60 mins) in which case we set the gene space to be just 0 # so they remain unchanged in the GA but are still included in the fitness evaluation. Also constrain taps with frequency > 60 mins to an hour to prevent # large shifts and huge gene spaces. @@ -77,9 +77,9 @@ def _initial_population(self, taps: Sequence[Tap], gene_space: List[List[int]]) pop.append([gene_space[i][int(rng.integers(0, len(gene_space[i])))] for i in range(len(taps))]) return np.asarray(pop, dtype=int) - def _blacklist(self): - self.cfg.blacklist_schedule_ids = set(self.cfg.blacklist_schedule_ids) - raise NotImplementedError("Blacklist functionality not yet implemented") + def _blocklist(self): + self.cfg.blocklist_schedule_ids = set(self.cfg.blocklist_schedule_ids) + raise NotImplementedError("blocklist functionality not yet implemented") def fitness_fn(self, ga, solution, solution_idx): _, peak = evaluate_cpu_usage_and_peak(solution, self.taps) diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index fd93cea..898aed6 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -544,9 +544,9 @@ def restore_previous_schedules(db_cur, server_id=None, schedule_id=None, full=Fa rollback_schedule_backup_mask(db_cur, schedule_id=schedule_id, server_id=server_id, full=full) -def get_blacklisted_schedule_ids(db_cur): - """Get a list of schedule_ids that are blacklisted from optimization""" - sqlquery = "SELECT schedule_id FROM schedule_blacklist" +def get_blocklisted_schedule_ids(db_cur): + """Get a list of schedule_ids that are blocklisted from optimization""" + sqlquery = "SELECT schedule_id FROM schedule_blocklist" db_cur.execute(sqlquery) - blacklist_schedule_ids = [row[0] for row in db_cur.fetchall()] - return blacklist_schedule_ids \ No newline at end of file + blocklist_schedule_ids = [row[0] for row in db_cur.fetchall()] + return blocklist_schedule_ids \ No newline at end of file diff --git a/docs/Smart Scheduler Technical Overview.md b/docs/Smart Scheduler Technical Overview.md index a8ebad1..1e29597 100644 --- a/docs/Smart Scheduler Technical Overview.md +++ b/docs/Smart Scheduler Technical Overview.md @@ -38,7 +38,7 @@ The GA evolves shift offsets for each schedule over multiple generations to find - `snapshot_at`: timestamp of last update (auto-set on INSERT/UPDATE) - Indexes on `schedule_id` and `server_id` for fast lookups -- **`schedule_blacklist`** — Excludes schedules from optimization +- **`schedule_blocklist`** — Excludes schedules from optimization - `schedule_id` (PK): schedule to exclude - `reason`: free-text explanation (e.g., "manual request", "irregular cron") - Allows selective opt-out without deleting schedule @@ -54,7 +54,7 @@ The GA evolves shift offsets for each schedule over multiple generations to find 2. Create Tap Objects per schedule_id └─> Calculating properties based on cron schedule - └─> Check whether it's supported (blacklist, irregular etc.) + └─> Check whether it's supported (blocklist, irregular etc.) 3. Run GA Optimization └─> GAPyGADScheduler.solve(taps): @@ -151,13 +151,13 @@ Not all schedules are suitable for GA optimization: ### Supported Schedules - **Frequency**: `x > 1` & `x <=1440` minutes (1 minute to 1 day) - **Regularity**: Cron expression must be perfectly regular (same interval between every consecutive run) -- **Blacklist**: Schedule is not in `schedule_blacklist` table +- **blocklist**: Schedule is not in `schedule_blocklist` table ### Unsupported Schedules (Skipped) - **Irregular cron**: e.g., "0 0,12 * * *" (runs at two different times) — frequency varies - **Too frequent**: <= 1 minute - **Too rare**: > 1440 minutes (more than a day) -- **Blacklisted**: Explicitly marked in `schedule_blacklist` table +- **blocklisted**: Explicitly marked in `schedule_blocklist` table - **Parsing errors**: Invalid cron expressions **Unsupported taps remain in the fitness evaluation** but don't participate in the optimization (shift = 0 fixed), ensuring the fitness score reflects realistic daily load. @@ -195,7 +195,7 @@ Added to `cicada/cli.py`: ### Database Dependencies -- **Read**: `schedules`, `servers`, `schedule_logs`, `schedule_blacklist` +- **Read**: `schedules`, `servers`, `schedule_logs`, `schedule_blocklist` - **Write**: `schedules` (interval_mask), `schedule_backups` (checkpoints) - **Functions**: `set_snapshot_at()` trigger @@ -204,7 +204,7 @@ Added to `cicada/cli.py`: ### Why a Genetic Algorithm? 1. **NP-hard problem**: Optimal schedule assignment is combinatorially hard; GA provides good approximations in reasonable time -2. **Flexible constraints**: GA naturally handles irregular constraints (blacklists, unsupported frequencies). It allows us an easy mechanism to include these in the calculations while not changing them +2. **Flexible constraints**: GA naturally handles irregular constraints (blocklists, unsupported frequencies). It allows us an easy mechanism to include these in the calculations while not changing them 3. **No gradient**: Fitness landscape is non-smooth; gradient-free methods suit this 4. **Mature libraries**: PyGAD is well-maintained and configurable 5. **Discrete Runs**: Works well with discrete times @@ -217,7 +217,7 @@ Added to `cicada/cli.py`: ### Why Unsupported Taps Stay in Fitness? -- Ensures fitness reflects real daily load (including irregular jobs, blacklisted, etc.) +- Ensures fitness reflects real daily load (including irregular jobs, blocklisted, etc.) - Allows GA to account for load from non-optimizable schedules - Prevents GA from misoptimizing around missing jobs @@ -235,7 +235,7 @@ Added to `cicada/cli.py`: - **Bottleneck**: Fitness evaluation (diff array accumulation); PyGAD overhead minimal **Optimization Tips:** -- Blacklist irregular/infrequent taps to reduce optimization scope +- blocklist irregular/infrequent taps to reduce optimization scope - Reduce `num_generations` or `sol_per_pop` for faster, lower-quality results (specifically for when testing) ## Future Enhancements diff --git a/setup/create_test_tap_setup.sql b/setup/create_test_tap_setup.sql index 61f08b7..a179f15 100644 --- a/setup/create_test_tap_setup.sql +++ b/setup/create_test_tap_setup.sql @@ -519,6 +519,6 @@ INSERT INTO public.schedules (SERVER_ID, SCHEDULE_ID, INTERVAL_MASK, EXEC_COMMAN INSERT INTO public.schedules (SERVER_ID, SCHEDULE_ID, INTERVAL_MASK, EXEC_COMMAND, IS_ENABLED, IS_RUNNING) VALUES (6, 'cumulative_limit', '*/30 * * * *', 'dummy_command', 1, 1) ON CONFLICT DO NOTHING; -INSERT INTO public.schedule_blacklist (SCHEDULE_ID, SERVER_ID, REASON) VALUES ('plastic_full_table', 3, 'test for blacklist functionality') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, SERVER_ID, REASON) VALUES ('plastic_full_table', 3, 'test for blocklist functionality') ON CONFLICT DO NOTHING; COMMIT TRANSACTION; diff --git a/setup/schema.sql b/setup/schema.sql index 6332825..751651d 100644 --- a/setup/schema.sql +++ b/setup/schema.sql @@ -248,17 +248,19 @@ CREATE INDEX IF NOT EXISTS schedule_backups_server_id_idx (server_id); -CREATE TABLE IF NOT EXISTS public.schedule_blacklist +CREATE TABLE IF NOT EXISTS public.schedule_blocklist ( schedule_id character varying(255) NOT NULL, server_id integer, timestamp timestamp without time zone NOT NULL DEFAULT (now())::timestamp without time zone, reason character varying(255), - CONSTRAINT schedule_blacklist_pkey PRIMARY KEY (schedule_id) + CONSTRAINT schedule_blocklist_pkey PRIMARY KEY (schedule_id) ) WITH ( OIDS=FALSE ); +-- Add in schedules + COMMIT TRANSACTION; From 19ef0da4c40cc43b4d09ea0232ae82cfefb5220d Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Fri, 1 May 2026 16:55:39 +0100 Subject: [PATCH 23/53] Change Tap terminology to Schedule Co-authored-by: Copilot --- CLAUDE.md | 10 +- cicada/commands/smart_schedule.py | 102 ++++++++++----------- cicada/lib/SmartScheduling/domain.py | 8 +- cicada/lib/SmartScheduling/evaluation.py | 24 ++--- cicada/lib/SmartScheduling/pygad.py | 78 ++++++++-------- cicada/lib/scheduler.py | 2 +- docs/Smart Scheduler Technical Overview.md | 42 ++++----- setup/create_test_tap_setup.sql | 2 +- 8 files changed, 134 insertions(+), 134 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 2721500..5487b28 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -**Cicada** is a centralized, distributed job scheduler for Pipelinewise taps. It acts as a lightweight management layer between Linux CRON and executables, allowing jobs to be scheduled across multiple nodes via a central database rather than local cron. +**Cicada** is a centralized, distributed job scheduler for Pipelinewise schedules. It acts as a lightweight management layer between Linux CRON and executables, allowing jobs to be scheduled across multiple nodes via a central database rather than local cron. Key architectural concepts: - **Nodes/Servers**: Machines that register with Cicada and pull scheduling information from the central database. They execute `cicada exec_server_schedules` via cron. @@ -75,7 +75,7 @@ Commands are located in `cicada/commands/` and implement specific operations: Located in `cicada/lib/SmartScheduling/` **`domain.py`** -- `Tap` dataclass: represents a schedule as a "tap" (job) with properties: +- `Schedule` dataclass: represents a schedule as a "schedule" (job) with properties: - `schedule_id`, `server_id`, `interval_mask` (cron expression) - `frequency_minutes`, `median_runtime_minutes`, `cpu_max` - `shift`: offset in minutes applied to shift job start time @@ -101,7 +101,7 @@ Key tables: - `schedule_logs` – Historical execution records with runtime, status, output - `schedule_backups` – GA optimization snapshots for rollback -Database setup SQL is in `setup/db_and_user.sql` and `setup/schema.sql`. Example tap setup for smart scheduling in `setup/create_test_tap_sertup`. +Database setup SQL is in `setup/db_and_user.sql` and `setup/schema.sql`. Example schedule setup for smart scheduling in `setup/create_test_schedule_sertup`. ## Key Architectural Patterns @@ -120,7 +120,7 @@ Database setup SQL is in `setup/db_and_user.sql` and `setup/schema.sql`. Example ### SmartScheduling Workflow 1. **Load schedules**: Fetch all schedules for a server via `get_schedules_per_server()` -2. **Create Tap objects**: Convert schedule details to Tap instances; filter unsupported schedules (irregular cron, too frequent, blocklisted) +2. **Create Schedule objects**: Convert schedule details to Schedule instances; filter unsupported schedules (irregular cron, too frequent, blocklisted) 3. **Run GA optimization**: PyGAD evolves shifts over N generations to minimize resource conflicts 4. **Apply and checkpoint**: Save optimized shifts back to DB; record checkpoint for potential rollback @@ -144,7 +144,7 @@ Mock fixtures often include a test PostgreSQL database or in-memory alternatives ### Modifying Schedule Logic - Edit `cicada/lib/scheduler.py` for core logic changes (e.g., new state transitions) -- Update `cicada/lib/SmartScheduling/domain.py` if Tap validation rules change +- Update `cicada/lib/SmartScheduling/domain.py` if Schedule validation rules change - Update tests in `test_lib_scheduler.py` to cover new behavior ### Database Schema Changes diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index fee8b28..a250a39 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -8,7 +8,7 @@ from cicada.lib import postgres, utils from cicada.lib import scheduler from cicada.lib.SmartScheduling import pygad -from cicada.lib.SmartScheduling.domain import Tap +from cicada.lib.SmartScheduling.domain import Schedule def _get_schedules_per_server(server_id, db_cur=None): @@ -25,84 +25,84 @@ def _get_schedules_per_server(server_id, db_cur=None): -def _create_tap_objects(schedule_ids, db_cur): - """Create Tap objects from schedule_ids.""" +def _create_schedule_objects(schedule_ids, db_cur): + """Create Schedule objects from schedule_ids.""" - taps : list[Tap] = [] - blocklisted_taps = scheduler.get_blocklisted_schedule_ids(db_cur) + schedules : list[Schedule] = [] + blocklisted_schedules = scheduler.get_blocklisted_schedule_ids(db_cur) - # Fetch details for each schedule and convert to Tap objects + # Fetch details for each schedule and convert to Schedule objects for schedule_id in schedule_ids: details = scheduler.get_schedule_details(db_cur, schedule_id) - if schedule_id in blocklisted_taps: + if schedule_id in blocklisted_schedules: details['blocklisted'] = True else: details['blocklisted'] = False try: - tap = Tap(details, db_cur=db_cur) - # Ignore the few taps that have irregular cron expressions for now. There are few enough that this shouldn't impact the optimisation and is not worth the effort to try and support these irregular schedules in the GA - if not tap.is_regular_schedule(): - print(f"Skipping irregular schedule {tap.schedule_id} with cron expression {tap.interval_mask}") + schedule = Schedule(details, db_cur=db_cur) + # Ignore the few schedules that have irregular cron expressions for now. There are few enough that this shouldn't impact the optimisation and is not worth the effort to try and support these irregular schedules in the GA + if not schedule.is_regular_schedule(): + print(f"Skipping irregular schedule {schedule.schedule_id} with cron expression {schedule.interval_mask}") else: - taps.append(tap) + schedules.append(schedule) except Exception as e: print(f"Skipping schedule {schedule_id} due to error: {e}") - return taps + return schedules -def _update_schedule_cron(tap : Tap): +def _update_schedule_cron(schedule : Schedule): """ - Uses the start_time to shift the cron expression accordingly. Gene space is already limited from 0 to the frequency of the tap + Uses the start_time to shift the cron expression accordingly. Gene space is already limited from 0 to the frequency of the schedule Ex. form of cron expression: "8-59/15 * * * *" (every 15 minutes starting at minute 8 of each hour) Args: - tap (Tap): Tap object with updated shift attribute based on GA solution + schedule (Schedule): Schedule object with updated shift attribute based on GA solution Returns: - Updated tap object with new interval_mask based on the shift calculated by the GA optimizer + Updated Schedule object with new interval_mask based on the shift calculated by the GA optimizer """ - frequency = tap.frequency_minutes - start_time_mins = tap.start_time_mins + frequency = schedule.frequency_minutes + start_time_mins = schedule.start_time_mins - if tap.shifted == False or start_time_mins is None: + if schedule.shifted == False or start_time_mins is None: return # No shift needed - if frequency == 1440: # For daily taps, we can shift within the hour + if frequency == 1440: # For daily schedules, we can shift within the hour hour = start_time_mins // 60 minute = (start_time_mins - hour * 60) % 60 - tap.interval_mask = f"{minute} {hour} * * *" + schedule.interval_mask = f"{minute} {hour} * * *" # Check that the new cron expression is valid - if not croniter.is_valid(tap.interval_mask): - raise ValueError(f"Invalid cron expression generated: {tap.interval_mask} for tap {tap.schedule_id}") + if not croniter.is_valid(schedule.interval_mask): + raise ValueError(f"Invalid cron expression generated: {schedule.interval_mask} for schedule {schedule.schedule_id}") return - elif frequency == 60: # For hourly taps, we can shift within the hour - assert start_time_mins < frequency, f"Shift {start_time_mins} cannot be greater than or equal to frequency {frequency} for tap {tap.schedule_id}" - tap.interval_mask = f"{start_time_mins} * * * *" + elif frequency == 60: # For hourly schedules, we can shift within the hour + assert start_time_mins < frequency, f"Shift {start_time_mins} cannot be greater than or equal to frequency {frequency} for schedule {schedule.schedule_id}" + schedule.interval_mask = f"{start_time_mins} * * * *" # Check that the new cron expression is valid - if not croniter.is_valid(tap.interval_mask): - raise ValueError(f"Invalid cron expression generated: {tap.interval_mask} for tap {tap.schedule_id}") + if not croniter.is_valid(schedule.interval_mask): + raise ValueError(f"Invalid cron expression generated: {schedule.interval_mask} for schedule {schedule.schedule_id}") return elif frequency < 60: - assert start_time_mins < frequency, f"Shift {start_time_mins} cannot be greater than or equal to frequency {frequency} for tap {tap.schedule_id}" - tap.interval_mask = f"{start_time_mins}-59/{frequency} * * * *" + assert start_time_mins < frequency, f"Shift {start_time_mins} cannot be greater than or equal to frequency {frequency} for schedule {schedule.schedule_id}" + schedule.interval_mask = f"{start_time_mins}-59/{frequency} * * * *" # Check that the new cron expression is valid - if not croniter.is_valid(tap.interval_mask): - raise ValueError(f"Invalid cron expression generated: {tap.interval_mask} for tap {tap.schedule_id}") + if not croniter.is_valid(schedule.interval_mask): + raise ValueError(f"Invalid cron expression generated: {schedule.interval_mask} for schedule {schedule.schedule_id}") return -def _assign_new_schedules(optimised_taps: list[pygad.Tap], db_cur): +def _assign_new_schedules(optimised_schedules: list[pygad.Schedule], db_cur): """Assign new schedules based on the optimal schedule found.""" - # For each tap, update the schedule in the DB with the new interval_mask based on the start_time_mins calculated by the GA optimizer - for tap in optimised_taps: - previous_schedule_mask = tap.interval_mask - _update_schedule_cron(tap) - if tap.shifted: print(f"- {tap.schedule_id} : {tap.interval_mask}") - tap._determine_start_time_mins() + # For each schedule, update the schedule in the DB with the new interval_mask based on the start_time_mins calculated by the GA optimizer + for schedule in optimised_schedules: + previous_schedule_mask = schedule.interval_mask + _update_schedule_cron(schedule) + if schedule.shifted: print(f"- {schedule.schedule_id} : {schedule.interval_mask}") + schedule._determine_start_time_mins() schedule_details = { "adhoc_parameters": None, @@ -112,11 +112,11 @@ def _assign_new_schedules(optimised_taps: list[pygad.Tap], db_cur): "server_id": None, "last_run_date": None, "is_enabled": None, - "interval_mask": tap.interval_mask, + "interval_mask": schedule.interval_mask, "schedule_description": None, "auto_update_time": None, "schedule_order": None, - "schedule_id": tap.schedule_id, + "schedule_id": schedule.schedule_id, "is_async": None, "abort_running": None, "exec_command": None, @@ -126,10 +126,10 @@ def _assign_new_schedules(optimised_taps: list[pygad.Tap], db_cur): scheduler.update_schedule_details(db_cur=db_cur, schedule_details=schedule_details) previous_schedule_details = { - "schedule_id": tap.schedule_id, - "server_id": tap.server_id, + "schedule_id": schedule.schedule_id, + "server_id": schedule.server_id, "previous_interval_mask": previous_schedule_mask, - "interval_mask": tap.interval_mask, + "interval_mask": schedule.interval_mask, } scheduler.update_schedule_backups(db_cur, previous_schedule_details) @@ -149,13 +149,13 @@ def main(server_id=None, dbname=None, ga_config=None): else: # Get schedules for the server_id - print("\n-----------------Tap Setup----------------------") + print("\n-----------------Schedule Setup----------------------") schedule_ids = _get_schedules_per_server(server_id=server_id, db_cur=db_cur) print(f"Found {len(schedule_ids)} schedules for server_id {server_id}") - # Build Tap objects - taps = _create_tap_objects(schedule_ids, db_cur=db_cur) - if not taps: + # Build schedule objects + schedules = _create_schedule_objects(schedule_ids, db_cur=db_cur) + if not schedules: print("No valid schedules found to optimize.") sys.exit(1) print("-------------------------------------------------\n") @@ -167,14 +167,14 @@ def main(server_id=None, dbname=None, ga_config=None): print(f"blocklisted schedule IDs that will be excluded from optimization: {blocklist_schedule_ids}") ga = pygad.GAPyGADScheduler(config=ga_config, blocklist_schedule_ids=blocklist_schedule_ids) print("Running PyGAD solver ...") - optimised_taps, __, peak_cpu, __, initial_fitness = ga.solve(taps) + optimised_schedules, __, peak_cpu, __, initial_fitness = ga.solve(schedules) print(f"Optimized schedule for server_id {server_id}: new peak CPU {peak_cpu}") print("--------------------------------------------------\n") print("\n-------------Updating Schedules------------------") if peak_cpu < initial_fitness: # Only update schedules if we have found an improvement - _assign_new_schedules(optimised_taps, db_cur=db_cur) + _assign_new_schedules(optimised_schedules, db_cur=db_cur) else: print(f"No improvement found for server_id {server_id}. Current peak CPU: {initial_fitness}, Optimized peak CPU: {peak_cpu}. No schedule updates will be made.") print("--------------------------------------------------\n") diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index fb9835c..fe56b91 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -9,7 +9,7 @@ @dataclass(frozen=False) -class Tap: +class Schedule: schedule_id: int server_id: int interval_mask: str @@ -63,17 +63,17 @@ def _determine_start_time_mins(self): self.start_time_mins = first_iter.hour * 60 + first_iter.minute def is_blocklisted(self): - """Determine if the tap is blocklisted based on schedule_id""" + """Determine if the Schedule is blocklisted based on schedule_id""" return self.blocklisted def frequency_is_supported(self): - """Determine if the tap frequency is supported for smart scheduling""" + """Determine if the Schedule frequency is supported for smart scheduling""" if (self.frequency_minutes != 1440 and self.frequency_minutes > 60): return False if (self.frequency_minutes <= 1): return False return True def is_unsupported(self): - """Determine if the tap is unsupported for smart scheduling based on frequency or if it's blocklisted""" + """Determine if the Schedule is unsupported for smart scheduling based on frequency or if it's blocklisted""" return (not self.frequency_is_supported() or self.is_blocklisted() or not self.is_regular_schedule()) def is_regular_schedule(self): diff --git a/cicada/lib/SmartScheduling/evaluation.py b/cicada/lib/SmartScheduling/evaluation.py index 5fedde9..97da82d 100644 --- a/cicada/lib/SmartScheduling/evaluation.py +++ b/cicada/lib/SmartScheduling/evaluation.py @@ -1,37 +1,37 @@ import numpy as np from typing import Sequence -from .domain import Tap +from .domain import Schedule -def evaluate_cpu_usage_and_peak(start_times: Sequence[int], taps: Sequence[Tap]): +def evaluate_cpu_usage_and_peak(start_times: Sequence[int], schedules: Sequence[Schedule]): """ Returns the CPU usage time series and peak CPU usage for a given schedule solution Args: - start_times: Sequence[int] : start time in minutes for each tap - taps: Sequence[Tap] : list of Tap objects + start_times: Sequence[int] : start time in minutes for each schedule + schedules: Sequence[Schedule] : list of Schedule objects Returns: usage: np.ndarray : CPU usage time series peak: float : peak CPU usage """ mins_per_day = 1440 - freqs = [tap.frequency_minutes for tap in taps] - run_times = [tap.median_runtime_minutes for tap in taps] + freqs = [schedule.frequency_minutes for schedule in schedules] + run_times = [schedule.median_runtime_minutes for schedule in schedules] diff = np.zeros(mins_per_day + 1, dtype=float) - assert len(start_times) == len(taps) == len(freqs) == len(run_times), "Length of start_times, taps, freqs, and run_times must all be the same" + assert len(start_times) == len(schedules) == len(freqs) == len(run_times), "Length of start_times, schedules, freqs, and run_times must all be the same" assert all(start_times[i] < freqs[i] for i in range(len(start_times))), "Start time should be the earliest it can be" - for i, tap in enumerate(taps): + for i, schedule in enumerate(schedules): freq = freqs[i] run_time = run_times[i] - cpu = float(tap.cpu_max) + cpu = float(schedule.cpu_max) minute = int(start_times[i]) - # Iterate through the day in increments of the tap's frequency, adding the tap's CPU usage to the diff array for the duration of its runtime. + # Iterate through the day in increments of the schedule's frequency, adding the schedule's CPU usage to the diff array for the duration of its runtime. # We use a diff array to efficiently calculate the cumulative CPU usage at each minute. Instead of appending the CPU usage for each minute the - # tap runs in, we add the CPU usage at the starting minute and subtract it at the end minute. + # schedule runs in, we add the CPU usage at the starting minute and subtract it at the end minute. while minute < mins_per_day: end = min(minute + run_time, mins_per_day) diff[minute] += cpu @@ -39,7 +39,7 @@ def evaluate_cpu_usage_and_peak(start_times: Sequence[int], taps: Sequence[Tap]) minute += freq # Sums up everything in the diff array to get the total CPU usage at each minute, and finds the peak usage. - # Ignore the last element of the diff array since it's just a placeholder to handle the end minute subtraction for taps that run until the end of the day. + # Ignore the last element of the diff array since it's just a placeholder to handle the end minute subtraction for schedules that run until the end of the day. usage = np.cumsum(diff[:-1]) peak = float(np.max(usage)) if usage.size else 0.0 return usage, peak \ No newline at end of file diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/pygad.py index c1a3ef5..f7470eb 100644 --- a/cicada/lib/SmartScheduling/pygad.py +++ b/cicada/lib/SmartScheduling/pygad.py @@ -2,7 +2,7 @@ from typing import List, Mapping, Optional, Sequence import numpy as np from .config import GAConfig -from .domain import Tap +from .domain import Schedule from .evaluation import evaluate_cpu_usage_and_peak import pygad @@ -13,14 +13,14 @@ class GAPyGADScheduler: Args: config: Optional[GAConfig] : configuration for the genetic algorithm Returns: - Schedule : optimized schedule for all taps + Schedule : optimized schedule for all schedules - Implementation Note: We only consider the regular taps during fitness evaluation to aid simplicity as there are few irregular - taps. All regular taps are fed into the scheduler however those on the blocklist will remain unchanged + Implementation Note: We only consider the regular schedules during fitness evaluation to aid simplicity as there are few irregular + schedules. All regular schedules are fed into the scheduler however those on the blocklist will remain unchanged and are kept purely to ensure the fitness evaluation is accurate to the actual schedule. - We cap the max shift of a tap to within the hour to prevent large shifts for taps that run daily. + We cap the max shift of a schedule to within the hour to prevent large shifts for schedules that run daily. """ def __init__(self, config: Optional[Mapping[str, object]] = None, blocklist_schedule_ids: Optional[List[str]] = None): @@ -32,49 +32,49 @@ def __init__(self, config: Optional[Mapping[str, object]] = None, blocklist_sche self.blocklist_schedule_ids = blocklist_schedule_ids if blocklist_schedule_ids is not None else [] - def _gene_space(self, taps: Sequence[Tap]) -> List[List[int]]: - # Build gene_space per tap: each gene space is limited by it's frequency - # Unless the tap is unsupported (either blocklisted, irregular or has frequency greater than 60 mins) in which case we set the gene space to be just 0 - # so they remain unchanged in the GA but are still included in the fitness evaluation. Also constrain taps with frequency > 60 mins to an hour to prevent + def _gene_space(self, schedules: Sequence[Schedule]) -> List[List[int]]: + # Build gene_space per schedule: each gene space is limited by it's frequency + # Unless the schedule is unsupported (either blocklisted, irregular or has frequency greater than 60 mins) in which case we set the gene space to be just 0 + # so they remain unchanged in the GA but are still included in the fitness evaluation. Also constrain schedules with frequency > 60 mins to an hour to prevent # large shifts and huge gene spaces. - min_start_times = [0] * len(taps) - max_start_times = [1] * len(taps) + min_start_times = [0] * len(schedules) + max_start_times = [1] * len(schedules) mins_per_day = 1440 - for i, tap in enumerate(taps): + for i, schedule in enumerate(schedules): # Fix the gene space so they're still included in the fitness eval but remain unshifted - if tap.is_unsupported(): - min_start_times[i] = tap.start_time_mins - max_start_times[i] = tap.start_time_mins + 1 + if schedule.is_unsupported(): + min_start_times[i] = schedule.start_time_mins + max_start_times[i] = schedule.start_time_mins + 1 - # Limit gene space to only shift within the hour for the taps which run less frequently - elif tap.frequency_minutes > 60: + # Limit gene space to only shift within the hour for the schedules which run less frequently + elif schedule.frequency_minutes > 60: # Prevent any max_start_time from going beyond the day limit - max_start_times[i] = min(tap.start_time_mins + 60, mins_per_day) + max_start_times[i] = min(schedule.start_time_mins + 60, mins_per_day) min_start_times[i] = max_start_times[i] - 60 # Gene space for the rest is just the frequency else: - max_start_times[i] = tap.frequency_minutes + max_start_times[i] = schedule.frequency_minutes return [list(range(min_start_time, max_start_time)) for min_start_time, max_start_time in zip(min_start_times, max_start_times)] - def _initial_population(self, taps: Sequence[Tap], gene_space: List[List[int]]) -> np.ndarray: + def _initial_population(self, schedules: Sequence[Schedule], gene_space: List[List[int]]) -> np.ndarray: rng = np.random.default_rng(self.cfg.random_seed) seed = [] # Add current start minutes as first solution to bias solution space towards current solution - for i, tap in enumerate(taps): + for i, schedule in enumerate(schedules): gs = gene_space[i] - s = int(tap.start_time_mins) + s = int(schedule.start_time_mins) seed.append(max(min(s, gs[-1]), gs[0])) pop = [seed] - # Populate the rest of the initial population randomly within the gene space limits for each tap + # Populate the rest of the initial population randomly within the gene space limits for each schedule for _ in range(self.cfg.sol_per_pop - 1): - pop.append([gene_space[i][int(rng.integers(0, len(gene_space[i])))] for i in range(len(taps))]) + pop.append([gene_space[i][int(rng.integers(0, len(gene_space[i])))] for i in range(len(schedules))]) return np.asarray(pop, dtype=int) def _blocklist(self): @@ -82,15 +82,15 @@ def _blocklist(self): raise NotImplementedError("blocklist functionality not yet implemented") def fitness_fn(self, ga, solution, solution_idx): - _, peak = evaluate_cpu_usage_and_peak(solution, self.taps) + _, peak = evaluate_cpu_usage_and_peak(solution, self.schedules) return -float(peak) - def solve(self, taps: Sequence[Tap]) -> tuple[Sequence[Tap], List[int], float, np.ndarray]: - self.taps = taps - gene_space = self._gene_space(taps) + def solve(self, schedules: Sequence[Schedule]) -> tuple[Sequence[Schedule], List[int], float, np.ndarray]: + self.schedules = schedules + gene_space = self._gene_space(schedules) print("Successfully initialised gene space") - initial_population = self._initial_population(taps, gene_space) + initial_population = self._initial_population(schedules, gene_space) print("Created initial population. Current Solution Start Times:") print(initial_population[0]) @@ -101,7 +101,7 @@ def solve(self, taps: Sequence[Tap]) -> tuple[Sequence[Tap], List[int], float, n num_generations=self.cfg.num_generations, sol_per_pop=self.cfg.sol_per_pop, num_parents_mating=self.cfg.num_parents_mating, - num_genes=len(taps), + num_genes=len(schedules), gene_type=int, gene_space=gene_space, mutation_percent_genes=self.cfg.mutation_percent_genes, @@ -123,14 +123,14 @@ def solve(self, taps: Sequence[Tap]) -> tuple[Sequence[Tap], List[int], float, n print(f"Optimised for {self.cfg.num_generations} generations. Best Solution Start Times:") print(best_solution) - usage, _ = evaluate_cpu_usage_and_peak(start_times, taps) + usage, _ = evaluate_cpu_usage_and_peak(start_times, schedules) - # Update tap objects start_time_mins attribute based on GA solution - for i, tap in enumerate(taps): - assert start_times[i] >= gene_space[i][0] and start_times[i] <= gene_space[i][-1], f"Start time for tap {tap.schedule_id} is out of gene space bounds. Start time: {start_times[i]}, Gene space: {gene_space[i]}" - if tap.is_unsupported(): assert start_times[i] == tap.start_time_mins, f"Unsupported tap {tap.schedule_id} should not have been shifted in the GA solution. {tap.start_time_mins} != {start_times[i]}" - elif tap.start_time_mins != start_times[i]: - tap.shifted = True - tap.start_time_mins = start_times[i] + # Update schedule objects start_time_mins attribute based on GA solution + for i, schedule in enumerate(schedules): + assert start_times[i] >= gene_space[i][0] and start_times[i] <= gene_space[i][-1], f"Start time for schedule {schedule.schedule_id} is out of gene space bounds. Start time: {start_times[i]}, Gene space: {gene_space[i]}" + if schedule.is_unsupported(): assert start_times[i] == schedule.start_time_mins, f"Unsupported schedule {schedule.schedule_id} should not have been shifted in the GA solution. {schedule.start_time_mins} != {start_times[i]}" + elif schedule.start_time_mins != start_times[i]: + schedule.shifted = True + schedule.start_time_mins = start_times[i] - return taps, start_times, peak_cpu, usage, -initial_fitness \ No newline at end of file + return schedules, start_times, peak_cpu, usage, -initial_fitness \ No newline at end of file diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index 898aed6..cfdd75e 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -412,7 +412,7 @@ def get_median_run_time(db_cur, schedule_id): """ Calculate the median runtime in minutes for a schedule_id from the schedule_log table. - Zero runs => 5 mins (conservative estimate, allows local testing without data and for new taps to be + Zero runs => 5 mins (conservative estimate, allows local testing without data and for new schedules to be scheduled without having to wait for historical data to be collected. """ diff --git a/docs/Smart Scheduler Technical Overview.md b/docs/Smart Scheduler Technical Overview.md index 1e29597..f7ea7dc 100644 --- a/docs/Smart Scheduler Technical Overview.md +++ b/docs/Smart Scheduler Technical Overview.md @@ -14,7 +14,7 @@ The GA evolves shift offsets for each schedule over multiple generations to find ### Core Components -1. **Domain Layer** (`domain.py`) — Represents schedules as "Taps" +1. **Domain Layer** (`domain.py`) — Represents schedules as Schedule objects 2. **GA Configuration** (`config.py`) — Hyperparameters for optimization @@ -52,16 +52,16 @@ The GA evolves shift offsets for each schedule over multiple generations to find 1. Load Schedules └─> Query database for all schedules on a server -2. Create Tap Objects per schedule_id +2. Create schedule Objects per schedule_id └─> Calculating properties based on cron schedule └─> Check whether it's supported (blocklist, irregular etc.) 3. Run GA Optimization - └─> GAPyGADScheduler.solve(taps): - ├─> Build gene_space (permissible shifts per tap) - │ ├─> Unsupported taps: gene_space = [0] (no shift) - │ ├─> Frequent taps (< 60 min): gene_space = [0..frequency) - │ └─> Infrequent taps (> 60 min): gene_space = [0..60) + └─> GAPyGADScheduler.solve(schedules): + ├─> Build gene_space (permissible shifts per schedule) + │ ├─> Unsupported schedules: gene_space = [0] (no shift) + │ ├─> Frequent schedules (< 60 min): gene_space = [0..frequency) + │ └─> Infrequent schedules (> 60 min): gene_space = [0..60) ├─> Initialize population with current solution as seed ├─> PyGAD evolves population over N generations │ └─> Each generation: mutation, crossover, fitness evaluation @@ -95,17 +95,17 @@ Rollback command triggered with server_id or schedule_id ### Gene Representation -Each gene is a minute representing where a tap should start within a day. +Each gene is a minute representing where a schedule should start within a day. - Gene value can take any value between the min and max start time - The gene space is limited to the smallest range it could be and then extrapolated out when it comes to evaluating the max cpu -- Defining unique gene spaces where each tap has it's own gene space allows us to reduce the search space considerably. -- By tailoring our gene spaces we can allow the tap to only traverse a couple of discrete positions, this makes our algorithm run as efficiently as possible and have a more comprehensive search of the solution space. +- Defining unique gene spaces where each schedule has it's own gene space allows us to reduce the search space considerably. +- By tailoring our gene spaces we can allow the schedule to only traverse a couple of discrete positions, this makes our algorithm run as efficiently as possible and have a more comprehensive search of the solution space. ### Fitness Function -Inverse of the peak_cpu since it's a minimisation problem. Peak_cpu is calculated over a single day since that covers 99% of all taps. - - For each tap, add its cpu_max to the usage array from `start_time` to `start_time + runtime` +Inverse of the peak_cpu since it's a minimisation problem. Peak_cpu is calculated over a single day since that covers 99% of all schedules. + - For each schedule, add its cpu_max to the usage array from `start_time` to `start_time + runtime` - Repeat for minute - Use a difference array for efficient cumulative calculation - Uses only the maximum CPU usage across entire day @@ -129,11 +129,11 @@ The creation of the offsprings uses different methods to change the solutions, h We seed the initial population with the current solution as we want to **prioritise stability over minor gains**. The system is already quite imprecise: - - assumes uniform CPU usage across all taps + - assumes uniform CPU usage across all schedules - rounds runtime to nearest minute - - assumes consistent runtimes from one tap run to another + - assumes consistent runtimes from one schedule run to another -Because of this imprecision and a natural desire not to overfit the system (e.g. we don't want a solution that minimises the peak cpu unless a heavy tap runs a minute longer than usual and then it clashes with another heavy tap) we want to only change the tap runs when it offers an actual advantage. Shifting schedules occassionally is needed to minimise the cpu usage, however shifting can also cause missed tap runs (if we e.g. change schedule 13-59/15 * * * * to 9-59/15 * * * * at 11 minutes past the hour). +Because of this imprecision and a natural desire not to overfit the system (e.g. we don't want a solution that minimises the peak cpu unless a heavy schedule runs a minute longer than usual and then it clashes with another heavy schedule) we want to only change the schedule runs when it offers an actual advantage. Shifting schedules occassionally is needed to minimise the cpu usage, however shifting can also cause missed schedule runs (if we e.g. change schedule 13-59/15 * * * * to 9-59/15 * * * * at 11 minutes past the hour). ## Configuration @@ -160,7 +160,7 @@ Not all schedules are suitable for GA optimization: - **blocklisted**: Explicitly marked in `schedule_blocklist` table - **Parsing errors**: Invalid cron expressions -**Unsupported taps remain in the fitness evaluation** but don't participate in the optimization (shift = 0 fixed), ensuring the fitness score reflects realistic daily load. +**Unsupported schedules remain in the fitness evaluation** but don't participate in the optimization (shift = 0 fixed), ensuring the fitness score reflects realistic daily load. ## Checkpointing & Rollback @@ -215,7 +215,7 @@ Added to `cicada/cli.py`: - Biases the GA towards an existing solution space (there are many solutions that are equally good and we don't need to explore every single one, this isn't a problem with a clear global minimum) - Biases search toward improvements on the current baseline which avoids "thrashing" between very different schedules -### Why Unsupported Taps Stay in Fitness? +### Why Unsupported schedules Stay in Fitness? - Ensures fitness reflects real daily load (including irregular jobs, blocklisted, etc.) - Allows GA to account for load from non-optimizable schedules @@ -223,19 +223,19 @@ Added to `cicada/cli.py`: ### Why No Frequency Constraints in Fitness? -- GA gene space enforces frequency constraints (each tap's shifts are within its frequency) +- GA gene space enforces frequency constraints (each schedule's shifts are within its frequency) - Fitness function only evaluates peak, not constraint satisfaction - Simpler, faster fitness evaluation without redundant checking ## Performance Considerations -- **Time Complexity**: O(pop_size × num_generations × num_taps × blocks_per_day) -- **Space Complexity**: O(num_taps × blocks_per_day) +- **Time Complexity**: O(pop_size × num_generations × num_schedules × blocks_per_day) +- **Space Complexity**: O(num_schedules × blocks_per_day) - **Typical Runtime**: ~5-30 seconds for 100-500 schedules, 20 generations, 40 population size - **Bottleneck**: Fitness evaluation (diff array accumulation); PyGAD overhead minimal **Optimization Tips:** -- blocklist irregular/infrequent taps to reduce optimization scope +- blocklist irregular/infrequent schedules to reduce optimization scope - Reduce `num_generations` or `sol_per_pop` for faster, lower-quality results (specifically for when testing) ## Future Enhancements diff --git a/setup/create_test_tap_setup.sql b/setup/create_test_tap_setup.sql index a179f15..c85dd53 100644 --- a/setup/create_test_tap_setup.sql +++ b/setup/create_test_tap_setup.sql @@ -1,4 +1,4 @@ --- Current state of taps as of 31st Mar 2026 +-- Current state of schedules as of 31st Mar 2026 START TRANSACTION; -- Add all servers From c75223d4dce80d9a93affd030dcbb202bbe3e8c4 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Tue, 5 May 2026 14:26:49 +0100 Subject: [PATCH 24/53] Rollback renaming and refactoring --- cicada/cli.py | 10 ++++------ cicada/commands/smart_schedule.py | 10 +++++++--- .../{rollback.py => smart_schedule_rollback.py} | 5 ++--- docs/Smart Scheduler Technical Overview.md | 4 ++-- 4 files changed, 15 insertions(+), 14 deletions(-) rename cicada/commands/{rollback.py => smart_schedule_rollback.py} (95%) diff --git a/cicada/cli.py b/cicada/cli.py index b6be71d..7145cf3 100644 --- a/cicada/cli.py +++ b/cicada/cli.py @@ -19,7 +19,6 @@ from cicada.commands import list_schedules from cicada.commands import delete_schedule from cicada.commands import smart_schedule -from cicada.commands import rollback @utils.named_exception_handler("Cicada") @@ -32,7 +31,6 @@ def __init__(self): "list_server_schedules", "exec_server_schedules", "smart_schedule", - "rollback", "show_schedule", "upsert_schedule", "exec_schedule", @@ -316,13 +314,13 @@ def smart_schedule(): ) @staticmethod - def rollback(): - """Rollback to original schedules in case of any issues during assignment.""" + def smart_schedule_rollback(): + """Rollback to original cron schedules.""" parser = argparse.ArgumentParser( allow_abbrev=False, add_help=True, prog=inspect.stack()[0][3], - description="Rollback to original schedules in case of any issues during assignment", + description="Rollback for smart scheduling, it resets the schedule to its original cron in case of any issues", ) parser.add_argument( "--full", @@ -339,7 +337,7 @@ def rollback(): ) group.add_argument("--schedule_id", type=str, required=False, help="ID of the schedule to rollback") args = parser.parse_args(sys.argv[2:]) - rollback.main(args.server_id, args.schedule_id, full = args.full) + smart_schedule_rollback.main(args.server_id, args.schedule_id, full = args.full) @staticmethod diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index a250a39..d729151 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -1,7 +1,6 @@ """Shifts the schedules on a node to distribute the load""" from __future__ import annotations -import datetime import sys from croniter import croniter from typing import Optional @@ -9,7 +8,7 @@ from cicada.lib import scheduler from cicada.lib.SmartScheduling import pygad from cicada.lib.SmartScheduling.domain import Schedule - +from cicada.commands import smart_schedule_rollback def _get_schedules_per_server(server_id, db_cur=None): """Get all schedules for a given server_id.""" @@ -135,7 +134,12 @@ def _assign_new_schedules(optimised_schedules: list[pygad.Schedule], db_cur): @utils.named_exception_handler("smart_schedule") -def main(server_id=None, dbname=None, ga_config=None): +def main(server_id=None, dbname=None, ga_config=None, rollback=False, schedule_id: Optional[str] = None, full=False): + if rollback: + print("Initiating rollback schedules.") + smart_schedule_rollback.main(server_id=server_id, schedule_id=schedule_id, dbname=dbname, full=full) + return + if server_id and type(server_id) != int: raise TypeError(f"server_id should be int not {type(server_id)}") db_conn = postgres.db_cicada(dbname) diff --git a/cicada/commands/rollback.py b/cicada/commands/smart_schedule_rollback.py similarity index 95% rename from cicada/commands/rollback.py rename to cicada/commands/smart_schedule_rollback.py index b5e89f8..a41f2eb 100644 --- a/cicada/commands/rollback.py +++ b/cicada/commands/smart_schedule_rollback.py @@ -1,10 +1,9 @@ - from typing import Optional from cicada.lib import postgres, utils from cicada.lib import scheduler -@utils.named_exception_handler("rollback") +@utils.named_exception_handler("smart_schedule_rollback") def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbname=None, full=False): """ Roll back schedules in case of issues during assignment. @@ -13,7 +12,7 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn Args: server_id: Optional[int] [Mutually exclusive with schedule_id] Target server to roll back. - schedule_id: Optional[int] [Mutually exclusive with server_id] + schedule_id: Optional[str] [Mutually exclusive with server_id] Target schedule to roll back. db_cur: Database cursor to use for the rollback operations. dbname: Optional[str] diff --git a/docs/Smart Scheduler Technical Overview.md b/docs/Smart Scheduler Technical Overview.md index f7ea7dc..7c2ddf4 100644 --- a/docs/Smart Scheduler Technical Overview.md +++ b/docs/Smart Scheduler Technical Overview.md @@ -24,7 +24,7 @@ The GA evolves shift offsets for each schedule over multiple generations to find 5. **Command Handler** (`smart_schedule.py`) — Orchestrates optimization -6. **Rollback System** (`rollback.py`) — Recovery mechanism +6. **Rollback System** (`smart_schedule_rollback.py`) — Recovery mechanism ### Database Schema Changes @@ -185,7 +185,7 @@ Every optimization run creates a checkpoint in `schedule_backups`: Added to `cicada/cli.py`: - `cicada smart_schedule [--server_id ]` — Run optimization -- `cicada rollback [--server_id | --schedule_id ] [--full]` — Revert changes +- `cicada smart_schedule_rollback [--server_id | --schedule_id ] [--full]` — Revert changes ### Command-Line Parameters for `smart_schedule` From e326e0cc813076dbbda551998713cd9a4d83704b Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Tue, 5 May 2026 14:34:23 +0100 Subject: [PATCH 25/53] Remove CPU references --- CLAUDE.md | 2 +- cicada/commands/smart_schedule.py | 8 ++++---- cicada/lib/SmartScheduling/domain.py | 1 - cicada/lib/SmartScheduling/evaluation.py | 21 ++++++++++----------- cicada/lib/SmartScheduling/pygad.py | 12 ++++++------ docs/Smart Scheduler Technical Overview.md | 16 ++++++++-------- setup/schema.sql | 2 +- 7 files changed, 30 insertions(+), 32 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 5487b28..2fee441 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -77,7 +77,7 @@ Located in `cicada/lib/SmartScheduling/` **`domain.py`** - `Schedule` dataclass: represents a schedule as a "schedule" (job) with properties: - `schedule_id`, `server_id`, `interval_mask` (cron expression) - - `frequency_minutes`, `median_runtime_minutes`, `cpu_max` + - `frequency_minutes`, `median_runtime_minutes` - `shift`: offset in minutes applied to shift job start time - `blocklisted`: flag to exclude from GA optimization diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index d729151..68db90b 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -171,16 +171,16 @@ def main(server_id=None, dbname=None, ga_config=None, rollback=False, schedule_i print(f"blocklisted schedule IDs that will be excluded from optimization: {blocklist_schedule_ids}") ga = pygad.GAPyGADScheduler(config=ga_config, blocklist_schedule_ids=blocklist_schedule_ids) print("Running PyGAD solver ...") - optimised_schedules, __, peak_cpu, __, initial_fitness = ga.solve(schedules) - print(f"Optimized schedule for server_id {server_id}: new peak CPU {peak_cpu}") + optimised_schedules, __, peak_usage, __, initial_fitness = ga.solve(schedules) + print(f"Optimized schedule for server_id {server_id}: new peak usage {peak_usage}") print("--------------------------------------------------\n") print("\n-------------Updating Schedules------------------") - if peak_cpu < initial_fitness: # Only update schedules if we have found an improvement + if peak_usage < initial_fitness: # Only update schedules if we have found an improvement _assign_new_schedules(optimised_schedules, db_cur=db_cur) else: - print(f"No improvement found for server_id {server_id}. Current peak CPU: {initial_fitness}, Optimized peak CPU: {peak_cpu}. No schedule updates will be made.") + print(f"No improvement found for server_id {server_id}. Current peak usage: {initial_fitness}, Optimized peak usage: {peak_usage}. No schedule updates will be made.") print("--------------------------------------------------\n") except Exception as e: diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index fe56b91..e426804 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -14,7 +14,6 @@ class Schedule: server_id: int interval_mask: str frequency_minutes: int - cpu_max: float = 1 median_runtime_minutes: int = 5 shifted: bool = False start_time_mins: Optional[int] = 0 diff --git a/cicada/lib/SmartScheduling/evaluation.py b/cicada/lib/SmartScheduling/evaluation.py index 97da82d..c17fd07 100644 --- a/cicada/lib/SmartScheduling/evaluation.py +++ b/cicada/lib/SmartScheduling/evaluation.py @@ -3,15 +3,15 @@ from .domain import Schedule -def evaluate_cpu_usage_and_peak(start_times: Sequence[int], schedules: Sequence[Schedule]): +def evaluate_usage_and_peak(start_times: Sequence[int], schedules: Sequence[Schedule]): """ - Returns the CPU usage time series and peak CPU usage for a given schedule solution + Returns the usage time series and peak usage for a given schedule solution Args: start_times: Sequence[int] : start time in minutes for each schedule schedules: Sequence[Schedule] : list of Schedule objects Returns: - usage: np.ndarray : CPU usage time series - peak: float : peak CPU usage + usage: np.ndarray : usage time series + peak: float : peak usage """ mins_per_day = 1440 @@ -26,19 +26,18 @@ def evaluate_cpu_usage_and_peak(start_times: Sequence[int], schedules: Sequence[ for i, schedule in enumerate(schedules): freq = freqs[i] run_time = run_times[i] - cpu = float(schedule.cpu_max) minute = int(start_times[i]) - # Iterate through the day in increments of the schedule's frequency, adding the schedule's CPU usage to the diff array for the duration of its runtime. - # We use a diff array to efficiently calculate the cumulative CPU usage at each minute. Instead of appending the CPU usage for each minute the - # schedule runs in, we add the CPU usage at the starting minute and subtract it at the end minute. + # Iterate through the day in increments of the schedule's frequency, adding the schedule's usage to the diff array for the duration of its runtime. + # We use a diff array to efficiently calculate the cumulative usage at each minute. Instead of appending the usage for each minute the + # schedule runs in, we add the usage at the starting minute and subtract it at the end minute. while minute < mins_per_day: end = min(minute + run_time, mins_per_day) - diff[minute] += cpu - diff[end] -= cpu + diff[minute] += 1 + diff[end] -= 1 minute += freq - # Sums up everything in the diff array to get the total CPU usage at each minute, and finds the peak usage. + # Sums up everything in the diff array to get the total usage at each minute, and finds the peak usage. # Ignore the last element of the diff array since it's just a placeholder to handle the end minute subtraction for schedules that run until the end of the day. usage = np.cumsum(diff[:-1]) peak = float(np.max(usage)) if usage.size else 0.0 diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/pygad.py index f7470eb..20df13a 100644 --- a/cicada/lib/SmartScheduling/pygad.py +++ b/cicada/lib/SmartScheduling/pygad.py @@ -3,7 +3,7 @@ import numpy as np from .config import GAConfig from .domain import Schedule -from .evaluation import evaluate_cpu_usage_and_peak +from .evaluation import evaluate_usage_and_peak import pygad @@ -82,7 +82,7 @@ def _blocklist(self): raise NotImplementedError("blocklist functionality not yet implemented") def fitness_fn(self, ga, solution, solution_idx): - _, peak = evaluate_cpu_usage_and_peak(solution, self.schedules) + _, peak = evaluate_usage_and_peak(solution, self.schedules) return -float(peak) def solve(self, schedules: Sequence[Schedule]) -> tuple[Sequence[Schedule], List[int], float, np.ndarray]: @@ -95,7 +95,7 @@ def solve(self, schedules: Sequence[Schedule]) -> tuple[Sequence[Schedule], List print(initial_population[0]) initial_fitness = self.fitness_fn(None, initial_population[0], 0) - print("Initial population fitness (max_cpu load):", -initial_fitness) + print("Initial population fitness (max usage):", -initial_fitness) ga = pygad.GA( num_generations=self.cfg.num_generations, @@ -118,12 +118,12 @@ def solve(self, schedules: Sequence[Schedule]) -> tuple[Sequence[Schedule], List best_solution, best_fitness, _ = ga.best_solution() start_times = [int(v) for v in best_solution] - peak_cpu = -float(best_fitness) + peak_usage = -float(best_fitness) print(f"Optimised for {self.cfg.num_generations} generations. Best Solution Start Times:") print(best_solution) - usage, _ = evaluate_cpu_usage_and_peak(start_times, schedules) + usage, _ = evaluate_usage_and_peak(start_times, schedules) # Update schedule objects start_time_mins attribute based on GA solution for i, schedule in enumerate(schedules): @@ -133,4 +133,4 @@ def solve(self, schedules: Sequence[Schedule]) -> tuple[Sequence[Schedule], List schedule.shifted = True schedule.start_time_mins = start_times[i] - return schedules, start_times, peak_cpu, usage, -initial_fitness \ No newline at end of file + return schedules, start_times, peak_usage, usage, -initial_fitness \ No newline at end of file diff --git a/docs/Smart Scheduler Technical Overview.md b/docs/Smart Scheduler Technical Overview.md index 7c2ddf4..ad22690 100644 --- a/docs/Smart Scheduler Technical Overview.md +++ b/docs/Smart Scheduler Technical Overview.md @@ -8,7 +8,7 @@ Cicada schedules jobs across multiple servers. Without optimization, jobs natura **SmartScheduling** uses a Genetic Algorithm (GA) to automatically shift job start times, distributing them across the 24-hour period while respecting original cron frequencies and maintaining schedule validity. This reduces peak resource contention and improves system throughput. -The GA evolves shift offsets for each schedule over multiple generations to find near-optimal start time distributions that minimize peak CPU load. +The GA evolves shift offsets for each schedule over multiple generations to find near-optimal start time distributions that minimize peak usage. ## Architecture @@ -65,11 +65,11 @@ The GA evolves shift offsets for each schedule over multiple generations to find ├─> Initialize population with current solution as seed ├─> PyGAD evolves population over N generations │ └─> Each generation: mutation, crossover, fitness evaluation - ├─> Calculate fitness (-peak_cpu) for each candidate + ├─> Calculate fitness (-peak_usage) for each candidate └─> Returns best solution 4. Evaluate Improvement - └─> Compare initial_peak_cpu vs optimized_peak_cpu. If improved: proceed to assignment + └─> Compare initial_peak_usage vs optimized_peak_usage. If improved: proceed to assignment 5. Update Schedules └─> both schedule table and schedule_backups in case of rollback @@ -97,18 +97,18 @@ Rollback command triggered with server_id or schedule_id Each gene is a minute representing where a schedule should start within a day. - Gene value can take any value between the min and max start time -- The gene space is limited to the smallest range it could be and then extrapolated out when it comes to evaluating the max cpu +- The gene space is limited to the smallest range it could be and then extrapolated out when it comes to evaluating the max usage over the day - Defining unique gene spaces where each schedule has it's own gene space allows us to reduce the search space considerably. - By tailoring our gene spaces we can allow the schedule to only traverse a couple of discrete positions, this makes our algorithm run as efficiently as possible and have a more comprehensive search of the solution space. ### Fitness Function -Inverse of the peak_cpu since it's a minimisation problem. Peak_cpu is calculated over a single day since that covers 99% of all schedules. - - For each schedule, add its cpu_max to the usage array from `start_time` to `start_time + runtime` +Inverse of the peak_usage since it's a minimisation problem. Peak_usage is calculated over a single day since that covers 99% of all schedules. + - For each schedule, add it to the usage array from `start_time` to `start_time + runtime` - Repeat for minute - Use a difference array for efficient cumulative calculation - - Uses only the maximum CPU usage across entire day + - Uses only the maximum usage across entire day ### Crossover & Mutation @@ -133,7 +133,7 @@ The system is already quite imprecise: - rounds runtime to nearest minute - assumes consistent runtimes from one schedule run to another -Because of this imprecision and a natural desire not to overfit the system (e.g. we don't want a solution that minimises the peak cpu unless a heavy schedule runs a minute longer than usual and then it clashes with another heavy schedule) we want to only change the schedule runs when it offers an actual advantage. Shifting schedules occassionally is needed to minimise the cpu usage, however shifting can also cause missed schedule runs (if we e.g. change schedule 13-59/15 * * * * to 9-59/15 * * * * at 11 minutes past the hour). +Because of this imprecision and a natural desire not to overfit the system (e.g. we don't want a solution that minimises the peak usage unless a heavy schedule runs a minute longer than usual and then it clashes with another heavy schedule) we want to only change the schedule runs when it offers an actual advantage. Shifting schedules occassionally is needed to minimise the usage, however shifting can also cause missed schedule runs (if we e.g. change schedule 13-59/15 * * * * to 9-59/15 * * * * at 11 minutes past the hour). ## Configuration diff --git a/setup/schema.sql b/setup/schema.sql index 751651d..fb28f10 100644 --- a/setup/schema.sql +++ b/setup/schema.sql @@ -202,7 +202,7 @@ WITH ( ; --- Table to record previous scheduling for rollback functionality with smart scheduling +-- Table to record previous scheduling for smart schedule rollback functionality with smart scheduling CREATE TABLE IF NOT EXISTS public.schedule_backups ( schedule_id character varying(255) NOT NULL, From d962cf1dee005e7f72accc0b88da17ffa03ebdc7 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Mon, 11 May 2026 10:32:54 +0100 Subject: [PATCH 26/53] Change server_id in the backups table for spread_schedules --- cicada/commands/spread_schedules.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cicada/commands/spread_schedules.py b/cicada/commands/spread_schedules.py index 8c5d837..bbe1a33 100644 --- a/cicada/commands/spread_schedules.py +++ b/cicada/commands/spread_schedules.py @@ -117,6 +117,16 @@ def main(spread_details, dbname=None): output_message += " | Forced abort_running and adhoc_execute" scheduler.update_schedule_details(db_cur, new_schedule_details) + + # Update schedule_backups for rollback functionality + previous_schedule_details = { + "schedule_id": current_schedule_details["schedule_id"], + "server_id": current_schedule_details["server_id"], + "previous_interval_mask": current_schedule_details["interval_mask"], + "interval_mask": new_schedule_details["interval_mask"], + } + scheduler.update_schedule_backups(db_cur, previous_schedule_details) + else: output_message = ( f"'{str(current_schedule_details['schedule_id'])}' will be reassigned : " From 122eee6104ebf4cbf94a2ec880b79f61f3b58d70 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Tue, 19 May 2026 09:47:15 +0100 Subject: [PATCH 27/53] Reroute rollback in CLI --- cicada/cli.py | 92 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 64 insertions(+), 28 deletions(-) diff --git a/cicada/cli.py b/cicada/cli.py index 7145cf3..221e06b 100644 --- a/cicada/cli.py +++ b/cicada/cli.py @@ -277,17 +277,27 @@ def delete_schedule(): @staticmethod def smart_schedule(): - """Generate smart schedules for a server using genetic algorithm""" + """Generate smart schedules for a server using genetic algorithm, or rollback previous changes""" parser = argparse.ArgumentParser( allow_abbrev=False, add_help=True, prog=inspect.stack()[0][3], - description="Generate smart schedules for a server using genetic algorithm", + description="Generate smart schedules for a server using genetic algorithm, or rollback previous changes", ) - parser.add_argument("--server_id", type=int, required=False, help="ID of the server") + + # Two subcommands; optimise for generating smart schedules and rollback for rolling back to previous schedules + subparsers = parser.add_subparsers(dest="action", help="Action to perform. Either optimise schedules or rollback to previous/original schedules. Default = optimise") + + # (Default) optimise subcommand + optimise_parser = subparsers.add_parser( + "optimise", + help="optimise schedules using genetic algorithm", + add_help=True, + ) + optimise_parser.add_argument("--server_id", type=int, required=False, help="ID of the server") # Optional GA Configurations - ga_config = parser.add_argument_group("ga_config", "Optional configurations for the genetic algorithm optimizer") + ga_config = optimise_parser.add_argument_group("ga_config", "Optional configurations for the genetic algorithm optimiser") ga_config.add_argument("--num_generations",type=int,required=False, help="Number of generations for the genetic algorithm. Default: 20") ga_config.add_argument("--sol_per_pop",type=int,required=False, help="Number of solutions per population for the genetic algorithm. Default: 40") ga_config.add_argument("--num_parents_mating",type=int,required=False, help="Number of parents mating for the genetic algorithm. Default: 10") @@ -297,38 +307,33 @@ def smart_schedule(): ga_config.add_argument("--mutation_type",type=str,required=False, help="Mutation type for the genetic algorithm. Allowed values: ['random', 'swap', 'inversion', 'scramble']. Default: random") ga_config.add_argument("--keep_elitism",type=int,required=False, help="Number of elite solutions to keep for the next generation. Default: 2") ga_config.add_argument("--random-seed",type=int,required=False, help="Set a random seed to get repeatable results. Default: None") - args = parser.parse_args(sys.argv[2:]) - smart_schedule.main( - args.server_id, - ga_config={ - "num_generations": args.num_generations, - "sol_per_pop": args.sol_per_pop, - "num_parents_mating": args.num_parents_mating, - "mutation_percent_genes": args.mutation_percent_genes, - "parent_selection_type": args.parent_selection_type, - "crossover_type": args.crossover_type, - "mutation_type": args.mutation_type, - "keep_elitism": args.keep_elitism, - "random_seed": args.random_seed, - }, - ) - @staticmethod - def smart_schedule_rollback(): - """Rollback to original cron schedules.""" - parser = argparse.ArgumentParser( - allow_abbrev=False, + # Rollback subcommand + rollback_parser = subparsers.add_parser( + "rollback", + help="Rollback to original or previous cron schedules", add_help=True, prog=inspect.stack()[0][3], description="Rollback for smart scheduling, it resets the schedule to its original cron in case of any issues", ) - parser.add_argument( + + # Mutually exclusive flags for rollback mode + rollback_mode = rollback_parser.add_mutually_exclusive_group(required=True) + rollback_mode.add_argument( "--full", default=False, action="store_true", - help="If specified, will roll back to the original schedule instead of the previous schedule", + help="Rollback to original schedule (set smart_interval_mask to NULL)", ) - group = parser.add_mutually_exclusive_group() + rollback_mode.add_argument( + "--previous", + default=False, + action="store_true", + help="Rollback to most recent snapshot (step back one optimization)", + ) + + # Add mutually exclusive arguments for rollback subcommand to specify either server_id or schedule_id for targeted rollback + group = rollback_parser.add_mutually_exclusive_group() group.add_argument( "--server_id", type=int, @@ -336,9 +341,40 @@ def smart_schedule_rollback(): help="ID of the server to rollback, if not specified will rollback all servers", ) group.add_argument("--schedule_id", type=str, required=False, help="ID of the schedule to rollback") + rollback_parser.add_argument( + "--snapshot_id", + type=int, + required=False, + help="Specific snapshot ID to restore to (optional, used with --previous)", + ) + + # Parse arguments and call smart_schedule.main with appropriate arguments based on subcommand args = parser.parse_args(sys.argv[2:]) - smart_schedule_rollback.main(args.server_id, args.schedule_id, full = args.full) + if args.action == "optimize" or args.action is None: + smart_schedule.main( + server_id=getattr(args, 'server_id', None), + ga_config={ + "num_generations": getattr(args, 'num_generations', None), + "sol_per_pop": getattr(args, 'sol_per_pop', None), + "num_parents_mating": getattr(args, 'num_parents_mating', None), + "mutation_percent_genes": getattr(args, 'mutation_percent_genes', None), + "parent_selection_type": getattr(args, 'parent_selection_type', None), + "crossover_type": getattr(args, 'crossover_type', None), + "mutation_type": getattr(args, 'mutation_type', None), + "keep_elitism": getattr(args, 'keep_elitism', None), + "random_seed": getattr(args, 'random_seed', None), + }, + ) + elif args.action == "rollback": + smart_schedule.main( + server_id=getattr(args, 'server_id', None), + schedule_id=getattr(args, 'schedule_id', None), + rollback=True, + full=getattr(args, 'full', False), + previous=getattr(args, 'previous', False), + snapshot_id=getattr(args, 'snapshot_id', None), + ) @staticmethod def version(): From a6d7f624d60636a75a279d707aa9964a48cd14c7 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Tue, 19 May 2026 11:26:06 +0100 Subject: [PATCH 28/53] Refactor schedule backups -> adds a separate column for the smart_interval_mask allowing easy rollback by setting it to null. Rolling back to the previous fetches the latest from the new schedules_backups table. New table does the automatic snapshotting instead of the backing up done within the code --- cicada/cli.py | 4 +- cicada/commands/smart_schedule.py | 17 +-- cicada/commands/smart_schedule_rollback.py | 113 +++++++++----- cicada/commands/upsert_schedule.py | 4 - cicada/lib/scheduler.py | 166 +++++++-------------- setup/schema.sql | 84 ++++++----- 6 files changed, 177 insertions(+), 211 deletions(-) diff --git a/cicada/cli.py b/cicada/cli.py index 221e06b..44b55d4 100644 --- a/cicada/cli.py +++ b/cicada/cli.py @@ -351,7 +351,8 @@ def smart_schedule(): # Parse arguments and call smart_schedule.main with appropriate arguments based on subcommand args = parser.parse_args(sys.argv[2:]) - if args.action == "optimize" or args.action is None: + if args.action == "optimise" or args.action is None: + print("Initiating smart schedule optimization.") smart_schedule.main( server_id=getattr(args, 'server_id', None), ga_config={ @@ -367,6 +368,7 @@ def smart_schedule(): }, ) elif args.action == "rollback": + print("Initiating smart schedule rollback.") smart_schedule.main( server_id=getattr(args, 'server_id', None), schedule_id=getattr(args, 'schedule_id', None), diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 68db90b..4852f0e 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -98,7 +98,6 @@ def _assign_new_schedules(optimised_schedules: list[pygad.Schedule], db_cur): # For each schedule, update the schedule in the DB with the new interval_mask based on the start_time_mins calculated by the GA optimizer for schedule in optimised_schedules: - previous_schedule_mask = schedule.interval_mask _update_schedule_cron(schedule) if schedule.shifted: print(f"- {schedule.schedule_id} : {schedule.interval_mask}") schedule._determine_start_time_mins() @@ -120,24 +119,18 @@ def _assign_new_schedules(optimised_schedules: list[pygad.Schedule], db_cur): "abort_running": None, "exec_command": None, "first_run_date": None, - "is_running": None - } + "is_running": None, + "smart_interval_mask": None + } scheduler.update_schedule_details(db_cur=db_cur, schedule_details=schedule_details) - previous_schedule_details = { - "schedule_id": schedule.schedule_id, - "server_id": schedule.server_id, - "previous_interval_mask": previous_schedule_mask, - "interval_mask": schedule.interval_mask, - } - scheduler.update_schedule_backups(db_cur, previous_schedule_details) @utils.named_exception_handler("smart_schedule") -def main(server_id=None, dbname=None, ga_config=None, rollback=False, schedule_id: Optional[str] = None, full=False): +def main(server_id=None, dbname=None, ga_config=None, rollback=False, schedule_id: Optional[str] = None, full=False, previous=False, snapshot_id: Optional[int] = None): if rollback: print("Initiating rollback schedules.") - smart_schedule_rollback.main(server_id=server_id, schedule_id=schedule_id, dbname=dbname, full=full) + smart_schedule_rollback.main(server_id=server_id, schedule_id=schedule_id, dbname=dbname, full=full, previous=previous, snapshot_id=snapshot_id) return if server_id and type(server_id) != int: raise TypeError(f"server_id should be int not {type(server_id)}") diff --git a/cicada/commands/smart_schedule_rollback.py b/cicada/commands/smart_schedule_rollback.py index a41f2eb..1f88cf4 100644 --- a/cicada/commands/smart_schedule_rollback.py +++ b/cicada/commands/smart_schedule_rollback.py @@ -4,56 +4,89 @@ @utils.named_exception_handler("smart_schedule_rollback") -def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbname=None, full=False): +def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbname=None, full=False, previous=False, snapshot_id: Optional[int] = None): """ - Roll back schedules in case of issues during assignment. - If neither server_id and schedule_id are provided, rollback applies to all servers. + Roll back schedules after smart_schedule optimization. Args: server_id: Optional[int] [Mutually exclusive with schedule_id] - Target server to roll back. + Target server to roll back. schedule_id: Optional[str] [Mutually exclusive with server_id] - Target schedule to roll back. - db_cur: Database cursor to use for the rollback operations. + Target schedule to roll back. dbname: Optional[str] - Database name to connect to if db_cur is not provided. If db_cur is provided, dbname is ignored. - prev: bool - If True, roll back to the previous schedule in the schedule_backups table. If False, roll back to the original schedule + Database name to connect to. + full: bool + If True, set smart_interval_mask to NULL (revert to original interval_mask). + previous: bool + If True, restore to the most recent snapshot (step back one optimization). + snapshot_id: Optional[int] + Specific snapshot_id to restore to (used with --previous). """ - if type(server_id) != int and server_id is not None: raise TypeError(f"server_id needs to be of type int. {type(server_id)}") - if type(schedule_id) != str and schedule_id is not None: raise TypeError("schedule_id needs to be of type str") + if type(server_id) != int and server_id is not None: + raise TypeError(f"server_id needs to be of type int. {type(server_id)}") + if type(schedule_id) != str and schedule_id is not None: + raise TypeError("schedule_id needs to be of type str") + + if not full and not previous: + raise ValueError("Either --full or --previous flag must be provided") + + if full and previous: + raise ValueError("Cannot use both --full and --previous flags") db_conn = postgres.db_cicada(dbname) db_cur = db_conn.cursor() - - if not server_id and not schedule_id: - # Recursively call rollback for each server_id if no specific server_id is provided - server_ids = scheduler.get_all_server_ids(db_cur) - for id in server_ids: - main(server_id=id[0], dbname=dbname, full=full) - return - - if full: print("\n------------Starting RollbackTo Orig Schedules-----------------") - else: print("\n------------Starting Rollback To Previous Schedules-----------------") try: - if server_id: - scheduler.restore_previous_schedules(db_cur=db_cur, server_id=server_id, full=full) - schedule_ids = [row[0] for row in scheduler.get_all_schedule_ids_per_server(db_cur, server_id)] - schedule_masks = [scheduler.get_schedule_details(db_cur, schedule_id)["interval_mask"] for schedule_id in schedule_ids] - print("New Schedules after rollback:\n") - for schedule_id, schedule_mask in zip(schedule_ids, schedule_masks): - print(f"{schedule_id} : {schedule_mask}") - else: - scheduler.restore_previous_schedules(db_cur=db_cur, schedule_id=schedule_id, full=full) - schedule = scheduler.get_schedule_details(db_cur, schedule_id) - if len(schedule) == 0: - raise Exception(f"Schedule with schedule_id {schedule_id} not found for rollback.") - print(f"Schedule {schedule_id} rolled back successfully to {schedule['interval_mask']}.") - print("Rollback successful") - + if full: + print("\n------------Starting Full Rollback (set smart_interval_mask to NULL)-----------------") + _rollback_full(db_cur, server_id, schedule_id) + print("Full rollback successful\n") + + elif previous: + print("\n------------Starting Rollback to Previous Snapshot-----------------") + _rollback_previous(db_cur, server_id, schedule_id, snapshot_id) + print("Rollback to previous snapshot successful\n") + except Exception as e: - print(f"Error during rollback for server_id {server_id} and schedule_id {schedule_id}: {e}") - - db_cur.close() - db_conn.close() \ No newline at end of file + print(f"Error during rollback: {e}") + raise + + finally: + db_cur.close() + db_conn.close() + + +def _rollback_full(db_cur, server_id: Optional[int], schedule_id: Optional[str]): + """ + Full rollback: Set smart_interval_mask to NULL for all affected schedules. + """ + if not server_id and not schedule_id: + raise ValueError("Either server_id or schedule_id must be provided") + + if server_id and schedule_id: + raise ValueError("Cannot specify both server_id and schedule_id") + + # Determine which schedules to rollback + if schedule_id: + schedule_ids = [schedule_id] + else: + query = "SELECT DISTINCT schedule_id FROM schedules WHERE server_id = %s" + db_cur.execute(query, (server_id,)) + schedule_ids = [row[0] for row in db_cur.fetchall()] + + update_all_schedules_query = """ + UPDATE schedules SET smart_interval_mask = NULL WHERE schedule_id = ANY(%s) + """ + db_cur.execute(update_all_schedules_query, (schedule_ids,)) + print(f"{len(schedule_ids)} schedules rolled back to original interval_mask") + + +def _rollback_previous(db_cur, server_id: Optional[int], snapshot_id: Optional[int]): + """ + Rollback to previous snapshot: Restore schedule state from a historical snapshot. + """ + if not server_id: + raise ValueError("server_id must be provided") + + scheduler.restore_previous_schedules(db_cur, server_id=server_id, snapshot_id=snapshot_id) + print(f"Schedules for server_id {server_id} rolled back to snapshot_id {snapshot_id} from schedule_backups") \ No newline at end of file diff --git a/cicada/commands/upsert_schedule.py b/cicada/commands/upsert_schedule.py index ff08fa0..e430eb7 100644 --- a/cicada/commands/upsert_schedule.py +++ b/cicada/commands/upsert_schedule.py @@ -79,10 +79,6 @@ def main(schedule_details, dbname=None): new_schedule_details["schedule_group_id"] = schedule_details["schedule_group_id"] scheduler.update_schedule_details(db_cur, new_schedule_details) - - scheduler.reset_schedule_backup_mask(db_cur, new_schedule_details) - print("Updated schedule details in the schedule_backups table for potential rollback.") - print(tabulate(new_schedule_details.items(), ["Detail", "Value"], tablefmt="psql")) db_cur.close() db_conn.close() diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index cfdd75e..e184f20 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -81,6 +81,7 @@ def get_schedule_details(db_cur, schedule_id): ,is_running ,abort_running ,interval_mask + ,smart_interval_mask ,first_run_date ,last_run_date ,exec_command @@ -107,12 +108,13 @@ def get_schedule_details(db_cur, schedule_id): schedule_details["is_running"] = row[7] schedule_details["abort_running"] = row[8] schedule_details["interval_mask"] = row[9] - schedule_details["first_run_date"] = row[10] - schedule_details["last_run_date"] = row[11] - schedule_details["exec_command"] = row[12] - schedule_details["parameters"] = row[13] - schedule_details["adhoc_parameters"] = row[14] - schedule_details["schedule_group_id"] = row[15] + schedule_details["smart_interval_mask"] = row[10] + schedule_details["first_run_date"] = row[11] + schedule_details["last_run_date"] = row[12] + schedule_details["exec_command"] = row[13] + schedule_details["parameters"] = row[14] + schedule_details["adhoc_parameters"] = row[15] + schedule_details["schedule_group_id"] = row[16] return schedule_details @@ -288,7 +290,7 @@ def get_all_schedules(db_cur, server_id, is_async): ( /* foo */ (SELECT schedule_id, - interval_mask, + COALESCE(smart_interval_mask, interval_mask), exec_command, COALESCE(adhoc_parameters, parameters, '') AS parameters, adhoc_execute, @@ -372,10 +374,6 @@ def delete_schedule(db_cur, schedule_id): db_cur.execute(sqlquery) -def delete_schedule_backup(db_cur, schedule_id): - sqlquery = f"DELETE from schedule_backups WHERE schedule_id = '{schedule_id}'" - db_cur.execute(sqlquery) - def get_all_server_ids(db_cur): """Get all possible server_ids from the servers table""" @@ -391,7 +389,6 @@ def get_all_schedule_ids_per_server(db_cur, server_id): SELECT DISTINCT schedule_id FROM schedules WHERE server_id = %s - AND (schedule_description IS NULL OR schedule_description NOT LIKE '%%==%%') ORDER BY schedule_id """ db_cur.execute(sqlquery, (server_id,)) @@ -399,14 +396,6 @@ def get_all_schedule_ids_per_server(db_cur, server_id): return schedule_ids -def get_all_schedule_backups(db_cur): - """Get all entries from the schedule_backups table""" - sqlquery = "SELECT schedule_id, original_interval_mask FROM schedule_backups" - db_cur.execute(sqlquery) - schedule_backups = db_cur.fetchall() - - return schedule_backups - def get_median_run_time(db_cur, schedule_id): """ @@ -433,115 +422,66 @@ def get_median_run_time(db_cur, schedule_id): # No runs -> assigns default runtime of 5 minutes return 5 -def reset_schedule_backup_mask(db_cur, schedule_details): + +def retrieve_snapshots(db_cur, server_id): """ - Resets the interval_mask of a schedule in the schedule_backups table. Called when schedule frequency is changed or a new schedule is added. - Sets all interval_mask fiedls to the new interval_mask to ensure that the rollback won't revert to an outdated frequency. + Retrieve all snapshots for a schedule in reverse chronological order. """ - insert_columns = "schedule_id, interval_mask, original_interval_mask, previous_interval_mask" - insert_values = ( - f"'{schedule_details['schedule_id']}', " - f"'{schedule_details['interval_mask']}', " - f"'{schedule_details['interval_mask']}', " - f"'{schedule_details['interval_mask']}'" - ) - update_server_id = "" - if schedule_details["server_id"] is not None: - insert_columns += ", server_id" - insert_values += f", '{schedule_details['server_id']}'" - update_server_id = f", server_id = '{schedule_details['server_id']}'" - sqlquery = f""" - INSERT INTO public.schedule_backups ({insert_columns}) - VALUES ({insert_values}) - ON CONFLICT (schedule_id) DO UPDATE SET - interval_mask = EXCLUDED.interval_mask, - original_interval_mask = EXCLUDED.original_interval_mask, - previous_interval_mask = EXCLUDED.previous_interval_mask - {update_server_id} + sqlquery = """ + SELECT DISTINCT(snapshot_id), snapshot_timestamp + FROM schedule_backups + WHERE server_id = %s + ORDER BY snapshot_timestamp DESC """ - db_cur.execute(sqlquery) + db_cur.execute(sqlquery, (server_id,)) + return db_cur.fetchall() -def update_schedule_backups(db_cur, previous_schedule_details): +def get_schedules_by_snapshot_id(db_cur, snapshot_id, server_id): """ - Insert a schedule configuration into the schedule_backups table for rollback. - Args: db_cur: Database cursor. - previous_schedule_details: dict with keys schedule_id, server_id, interval_mask, previous_interval_mask, original_interval_mask - or - dict with keys schedule_id, interval_mask, previous_interval_mask + Fetch a specific snapshot by ID. """ - - sqlquery = f""" - INSERT INTO schedule_backups (schedule_id, server_id, interval_mask, previous_interval_mask, original_interval_mask) - VALUES ( - '{previous_schedule_details["schedule_id"]}', - '{previous_schedule_details["server_id"]}', - '{previous_schedule_details["interval_mask"]}', - '{previous_schedule_details["previous_interval_mask"]}', - '{previous_schedule_details["previous_interval_mask"]}') -- Assuming original_interval_mask is the same as previous_interval_mask on the first insert - ON CONFLICT (schedule_id) DO UPDATE SET - interval_mask = EXCLUDED.interval_mask, - previous_interval_mask = EXCLUDED.previous_interval_mask; + sqlquery = """ + SELECT + schedule_id, + interval_mask, + smart_interval_mask + FROM schedule_backups + WHERE snapshot_id = %s AND server_id = %s """ - db_cur.execute(sqlquery) + db_cur.execute(sqlquery, (snapshot_id, server_id)) + row = db_cur.fetchone() + if not row: + return None -def rollback_schedule_backup_mask(db_cur, schedule_id=None, server_id=None, full=False): - """ - Sets the interval_masks back to the schedule_backups table to the original_interval_mask. - """ - sqlquery = f""" - UPDATE public.schedule_backups - SET interval_mask = {'original_interval_mask' if full else 'previous_interval_mask'}, - previous_interval_mask = original_interval_mask - """ - if schedule_id is not None: - sqlquery = sqlquery + f" WHERE schedule_id = '{schedule_id}'" - elif server_id is not None: - sqlquery = sqlquery + f",\n server_id = server_id\n " - sqlquery = sqlquery + f" WHERE server_id = '{server_id}'" - else: - raise ValueError("Either schedule_id or server_id must be provided for rollback_schedule_backup_mask") - db_cur.execute(sqlquery) + return { + 'schedule_id': row[0], + 'interval_mask': row[1], + 'smart_interval_mask': row[2], + } -def restore_previous_schedules(db_cur, server_id=None, schedule_id=None, full=False): +def restore_previous_schedules(db_cur, server_id, snapshot_id=None): """ - Restore schedules from the last-known rollback snapshot or the original schedule. - Args: - db_cur: Database cursor. - server_id: Optional[int] Target server to roll back. - schedule_id: Optional[str] Target schedule to roll back. - full: bool If True, roll back to the original schedule in the schedule_backups table. If False, roll back to the previous schedule in the schedule_backups table. + Restore schedules from snapshots. """ - if server_id is None and schedule_id is None: - raise ValueError("Either server_id or schedule_id must be provided") + if not snapshot_id: + snapshot_id = retrieve_snapshots(db_cur, server_id)[0][0] - if server_id is not None and schedule_id is not None: - raise ValueError("server_id and schedule_id cannot both be provided") - + print(f"Restoring schedules for server_id {server_id} from snapshot_id {snapshot_id}") + print("Skipping any schedules that aren't in the snapshot or have a different interval mask...") sqlquery = """ - UPDATE schedules AS s - SET - interval_mask = ps. - """ + ("previous_interval_mask" if not full else "original_interval_mask") + """ - FROM schedule_backups AS ps - WHERE s.schedule_id = ps.schedule_id - """ - - params = [] - if server_id is not None: - sqlquery = sqlquery + " AND ps.server_id = %s" - params.append(server_id) - - if schedule_id is not None: - sqlquery = sqlquery + " AND ps.schedule_id = %s" - params.append(schedule_id) - - db_cur.execute(sqlquery, tuple(params)) - - print("Resetting schedule_backups table to reflect rolled back schedules...") - rollback_schedule_backup_mask(db_cur, schedule_id=schedule_id, server_id=server_id, full=full) + UPDATE schedules + SET smart_interval_mask = schedule_backups.smart_interval_mask + FROM schedule_backups + WHERE schedules.schedule_id = schedule_backups.schedule_id + AND schedules.server_id = %s + AND schedule_backups.snapshot_id = %s + AND schedules.interval_mask = schedule_backups.interval_mask + """ + db_cur.execute(sqlquery, (server_id, snapshot_id)) + return def get_blocklisted_schedule_ids(db_cur): diff --git a/setup/schema.sql b/setup/schema.sql index fb28f10..132b7ce 100644 --- a/setup/schema.sql +++ b/setup/schema.sql @@ -73,6 +73,7 @@ CREATE TABLE IF NOT EXISTS public.schedules is_running smallint NOT NULL DEFAULT 0, abort_running smallint NOT NULL DEFAULT 0, interval_mask character varying(32) NOT NULL, + smart_interval_mask character varying(32), first_run_date timestamp(3) without time zone NOT NULL DEFAULT '1000-01-01 00:00:00.000'::timestamp without time zone, last_run_date timestamp(3) without time zone NOT NULL DEFAULT '9999-12-31 23:59:59.999'::timestamp without time zone, exec_command character varying NOT NULL, @@ -104,6 +105,10 @@ COMMENT ON COLUMN schedules.parameters IS 'Exact string of parameters for comman COMMENT ON COLUMN schedules.adhoc_parameters IS 'If specified, will overwrite parameters for next run'; COMMENT ON COLUMN schedules.schedule_group_id IS 'Optional field to help group schedules'; +-- Add smart_interval_mask column if not exists (for existing installations upgrading to a version with smart scheduling) +ALTER TABLE public.schedules +ADD COLUMN IF NOT EXISTS smart_interval_mask character varying(32); + -- Index: schedules_adhoc_execute_idx CREATE INDEX IF NOT EXISTS schedules_adhoc_execute_idx ON public.schedules @@ -202,50 +207,38 @@ WITH ( ; --- Table to record previous scheduling for smart schedule rollback functionality with smart scheduling -CREATE TABLE IF NOT EXISTS public.schedule_backups -( - schedule_id character varying(255) NOT NULL, - server_id integer, - original_interval_mask character varying(32) NOT NULL, - previous_interval_mask character varying(32) NOT NULL, - interval_mask character varying(32) NOT NULL, - snapshot_at timestamp without time zone NOT NULL DEFAULT (now())::timestamp without time zone, - CONSTRAINT schedule_backups_pkey PRIMARY KEY (schedule_id) -) -WITH ( - OIDS=FALSE -); +-- Table to snapshot full schedule state whenever smart_schedule command runs (optimize or rollback) +-- Keeps last 5 snapshots per schedule for rollback and audit trail +CREATE TABLE IF NOT EXISTS public.schedule_backups AS TABLE public.schedules WITH NO DATA; -INSERT INTO public.schedule_backups (schedule_id, server_id, original_interval_mask, previous_interval_mask, interval_mask, snapshot_at) - SELECT - schedule_id, - server_id, - interval_mask, - interval_mask, - interval_mask, - now() - FROM schedules -ON CONFLICT (schedule_id) DO NOTHING; - -DROP TRIGGER IF EXISTS tr_schedule_backups ON public.schedule_backups; -CREATE TRIGGER tr_schedule_backups - BEFORE UPDATE - ON public.schedule_backups - FOR EACH ROW - EXECUTE PROCEDURE set_snapshot_at() -; +ALTER TABLE public.schedule_backups ADD COLUMN IF NOT EXISTS snapshot_id serial NOT NULL; +ALTER TABLE public.schedule_backups ADD COLUMN IF NOT EXISTS snapshot_timestamp timestamp without time zone NOT NULL DEFAULT (now())::timestamp without time zone; +ALTER TABLE public.schedule_backups ADD COLUMN IF NOT EXISTS operation_type character varying(20); +ALTER TABLE public.schedule_backups ADD CONSTRAINT schedule_backups_pkey PRIMARY KEY (snapshot_id, schedule_id); -CREATE UNIQUE INDEX IF NOT EXISTS schedule_backups_schedule_id_idx - ON public.schedule_backups - USING btree - (schedule_id); +CREATE OR REPLACE FUNCTION snapshot_schedules_table +() + RETURNS TRIGGER AS $$ + BEGIN + -- Increment snapshot_id for existing snapshots + UPDATE schedule_backups SET snapshot_id = snapshot_id + 1; -CREATE INDEX IF NOT EXISTS schedule_backups_server_id_idx - ON public.schedule_backups - USING btree - (server_id); + -- Snapshot current schedules table + INSERT INTO schedule_backups SELECT NEW.*, 1, NOW(), 'UPDATE'; + + -- Keep only the most recent 10 snapshots + DELETE FROM schedule_backups + WHERE snapshot_id NOT IN ( + SELECT snapshot_id FROM schedule_backups ORDER BY snapshot_timestamp DESC LIMIT 10 + ); + RETURN NEW; + END; + $$ LANGUAGE plpgsql; + +CREATE TRIGGER schedule_changes + AFTER UPDATE ON schedules + FOR EACH ROW EXECUTE FUNCTION snapshot_schedules_table(); CREATE TABLE IF NOT EXISTS public.schedule_blocklist @@ -260,7 +253,16 @@ WITH ( OIDS=FALSE ); --- Add in schedules +-- Add in cicada schedules used for running (admin schedules) +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('source_obf_views_force', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('source_obf_views', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('create_snowflake_static_objects_force', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('create_schema_roles', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('create_snowflake_service_accounts_force', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('create_roles', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('create_roles_force', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('import_pipelinewise_config_force', 'Admin Schedules') ON CONFLICT DO NOTHING; + COMMIT TRANSACTION; From 92c43f6694b7301782429403922fb32bd45cb6c2 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Tue, 19 May 2026 12:33:39 +0100 Subject: [PATCH 29/53] Add bulk update for schedules to reduce the amount of db roundtrips the smart_scheduling has to make --- cicada/commands/smart_schedule.py | 61 ++++++++-------- cicada/commands/smart_schedule_rollback.py | 44 ++---------- cicada/commands/spread_schedules.py | 2 +- cicada/lib/scheduler.py | 84 +++++++++++++++++++++- setup/schema.sql | 9 ++- 5 files changed, 126 insertions(+), 74 deletions(-) diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 4852f0e..08c403a 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -24,11 +24,11 @@ def _get_schedules_per_server(server_id, db_cur=None): -def _create_schedule_objects(schedule_ids, db_cur): +def _create_schedule_objects(schedule_ids, db_cur, server_id): """Create Schedule objects from schedule_ids.""" schedules : list[Schedule] = [] - blocklisted_schedules = scheduler.get_blocklisted_schedule_ids(db_cur) + blocklisted_schedules = scheduler.get_blocklisted_schedule_ids(db_cur, server_id=server_id) # Fetch details for each schedule and convert to Schedule objects for schedule_id in schedule_ids: @@ -96,34 +96,37 @@ def _update_schedule_cron(schedule : Schedule): def _assign_new_schedules(optimised_schedules: list[pygad.Schedule], db_cur): """Assign new schedules based on the optimal schedule found.""" + schedule_details_list = [] # For each schedule, update the schedule in the DB with the new interval_mask based on the start_time_mins calculated by the GA optimizer for schedule in optimised_schedules: _update_schedule_cron(schedule) - if schedule.shifted: print(f"- {schedule.schedule_id} : {schedule.interval_mask}") - schedule._determine_start_time_mins() - - schedule_details = { - "adhoc_parameters": None, - "adhoc_execute": None, - "schedule_group_id": None, - "parameters": None, - "server_id": None, - "last_run_date": None, - "is_enabled": None, - "interval_mask": schedule.interval_mask, - "schedule_description": None, - "auto_update_time": None, - "schedule_order": None, - "schedule_id": schedule.schedule_id, - "is_async": None, - "abort_running": None, - "exec_command": None, - "first_run_date": None, - "is_running": None, - "smart_interval_mask": None - } - scheduler.update_schedule_details(db_cur=db_cur, schedule_details=schedule_details) - + if schedule.shifted: + print(f"- {schedule.schedule_id} : {schedule.interval_mask}") + schedule._determine_start_time_mins() + + schedule_details = { + "adhoc_parameters": None, + "adhoc_execute": None, + "schedule_group_id": None, + "parameters": None, + "server_id": None, + "last_run_date": None, + "is_enabled": None, + "interval_mask": None, + "schedule_description": None, + "auto_update_time": None, + "schedule_order": None, + "schedule_id": schedule.schedule_id, + "is_async": None, + "abort_running": None, + "exec_command": None, + "first_run_date": None, + "is_running": None, + "smart_interval_mask": schedule.interval_mask + } + schedule_details_list.append(schedule_details) + + scheduler.update_schedule_details_bulk(db_cur=db_cur, schedule_list=schedule_details_list) @utils.named_exception_handler("smart_schedule") @@ -151,7 +154,7 @@ def main(server_id=None, dbname=None, ga_config=None, rollback=False, schedule_i print(f"Found {len(schedule_ids)} schedules for server_id {server_id}") # Build schedule objects - schedules = _create_schedule_objects(schedule_ids, db_cur=db_cur) + schedules = _create_schedule_objects(schedule_ids, db_cur=db_cur, server_id=server_id) if not schedules: print("No valid schedules found to optimize.") sys.exit(1) @@ -160,7 +163,7 @@ def main(server_id=None, dbname=None, ga_config=None, rollback=False, schedule_i try: print("\n------------Starting Optimisation-----------------") - blocklist_schedule_ids = scheduler.get_blocklisted_schedule_ids(db_cur) + blocklist_schedule_ids = scheduler.get_blocklisted_schedule_ids(db_cur, server_id=server_id) print(f"blocklisted schedule IDs that will be excluded from optimization: {blocklist_schedule_ids}") ga = pygad.GAPyGADScheduler(config=ga_config, blocklist_schedule_ids=blocklist_schedule_ids) print("Running PyGAD solver ...") diff --git a/cicada/commands/smart_schedule_rollback.py b/cicada/commands/smart_schedule_rollback.py index 1f88cf4..e316c01 100644 --- a/cicada/commands/smart_schedule_rollback.py +++ b/cicada/commands/smart_schedule_rollback.py @@ -38,13 +38,15 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn try: if full: - print("\n------------Starting Full Rollback (set smart_interval_mask to NULL)-----------------") - _rollback_full(db_cur, server_id, schedule_id) + print("\n------------Starting Full Rollback-----------------") + scheduler.full_rollback(db_cur, server_id, schedule_id) print("Full rollback successful\n") elif previous: print("\n------------Starting Rollback to Previous Snapshot-----------------") - _rollback_previous(db_cur, server_id, schedule_id, snapshot_id) + if not server_id: + raise ValueError("server_id must be provided for rollback to previous snapshot, rollback for individual schedules must be a full rollback") + scheduler.restore_previous_schedules(db_cur, server_id=server_id, snapshot_id=snapshot_id) print("Rollback to previous snapshot successful\n") except Exception as e: @@ -54,39 +56,3 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn finally: db_cur.close() db_conn.close() - - -def _rollback_full(db_cur, server_id: Optional[int], schedule_id: Optional[str]): - """ - Full rollback: Set smart_interval_mask to NULL for all affected schedules. - """ - if not server_id and not schedule_id: - raise ValueError("Either server_id or schedule_id must be provided") - - if server_id and schedule_id: - raise ValueError("Cannot specify both server_id and schedule_id") - - # Determine which schedules to rollback - if schedule_id: - schedule_ids = [schedule_id] - else: - query = "SELECT DISTINCT schedule_id FROM schedules WHERE server_id = %s" - db_cur.execute(query, (server_id,)) - schedule_ids = [row[0] for row in db_cur.fetchall()] - - update_all_schedules_query = """ - UPDATE schedules SET smart_interval_mask = NULL WHERE schedule_id = ANY(%s) - """ - db_cur.execute(update_all_schedules_query, (schedule_ids,)) - print(f"{len(schedule_ids)} schedules rolled back to original interval_mask") - - -def _rollback_previous(db_cur, server_id: Optional[int], snapshot_id: Optional[int]): - """ - Rollback to previous snapshot: Restore schedule state from a historical snapshot. - """ - if not server_id: - raise ValueError("server_id must be provided") - - scheduler.restore_previous_schedules(db_cur, server_id=server_id, snapshot_id=snapshot_id) - print(f"Schedules for server_id {server_id} rolled back to snapshot_id {snapshot_id} from schedule_backups") \ No newline at end of file diff --git a/cicada/commands/spread_schedules.py b/cicada/commands/spread_schedules.py index bbe1a33..4a8591a 100644 --- a/cicada/commands/spread_schedules.py +++ b/cicada/commands/spread_schedules.py @@ -125,7 +125,7 @@ def main(spread_details, dbname=None): "previous_interval_mask": current_schedule_details["interval_mask"], "interval_mask": new_schedule_details["interval_mask"], } - scheduler.update_schedule_backups(db_cur, previous_schedule_details) + scheduler.reset_schedule_backups(db_cur) else: output_message = ( diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index e184f20..4dee763 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -235,6 +235,56 @@ def update_schedule_details(db_cur, schedule_details): db_cur.execute(sqlquery) +def update_schedule_details_bulk(db_cur, schedule_list): + """Update multiple schedules in a single bulk query using CASE statements. + + Args: + db_cur: Database cursor + schedule_list: List of dicts, each with schedule_id and any fields to update + + """ + if not schedule_list: + return + + schedule_ids = [str(s["schedule_id"]) for s in schedule_list] + columns_to_update = set() + + for schedule in schedule_list: + columns_to_update.update(k for k, v in schedule.items() if k != "schedule_id" and v is not None) + + if not columns_to_update: + print("No fields to update for any schedules. Bulk update skipped.") + return + + case_clauses = [] + + for col in sorted(columns_to_update): + case_whens = [] + for schedule in schedule_list: + if col in schedule and schedule[col] is not None: + val = schedule[col] + if col in ["schedule_description", "interval_mask", "smart_interval_mask", "first_run_date", + "last_run_date", "exec_command", "parameters", "adhoc_parameters"]: + if col in ["exec_command", "parameters", "adhoc_parameters"]: + escaped_val = postgres.escape_upsert_string(str(val)) + else: + escaped_val = str(val) + case_whens.append(f"WHEN schedule_id = '{str(schedule['schedule_id'])}' THEN '{escaped_val}'") + else: + case_whens.append(f"WHEN schedule_id = '{str(schedule['schedule_id'])}' THEN {val}") + + if case_whens: + case_clauses.append(f"{col} = CASE {' '.join(case_whens)} ELSE {col} END") + + if not case_clauses: + print("No fields to update for any schedules. Bulk update skipped.") + return + + sqlquery = f"UPDATE schedules SET {', '.join(case_clauses)} WHERE schedule_id = ANY(%s)" + db_cur.execute(sqlquery, (schedule_ids,)) + return + + def get_schedule_executable(db_cur, schedule_id): """Extract details of executable of a schedule""" sqlquery = ( @@ -461,6 +511,23 @@ def get_schedules_by_snapshot_id(db_cur, snapshot_id, server_id): 'smart_interval_mask': row[2], } +def full_rollback(db_cur, server_id=None, schedule_id=None): + + if server_id and schedule_id: + raise ValueError("Cannot specify both server_id and schedule_id for full rollback, please specify only one to rollback all schedules for a server or an individual schedule respectively") + if server_id: + schedule_ids = [row[0] for row in get_all_schedule_ids_per_server(db_cur, server_id)] + + print(f"Rolling back schedules for server_id {server_id} to original interval_mask by setting smart_interval_mask to NULL...") + print(f"Found {len(schedule_ids)} schedules to rollback for server_id {server_id}...") + # Set smart_schedule_mask to NULL for all affected schedules to rollback to original interval_mask + update_all_schedules_query = """ + UPDATE schedules SET smart_interval_mask = NULL WHERE schedule_id = ANY(%s) + """ + db_cur.execute(update_all_schedules_query, (schedule_ids,)) + print(f"Schedules Updated: {schedule_ids}") + return + def restore_previous_schedules(db_cur, server_id, snapshot_id=None): """ @@ -484,9 +551,20 @@ def restore_previous_schedules(db_cur, server_id, snapshot_id=None): return -def get_blocklisted_schedule_ids(db_cur): +def get_blocklisted_schedule_ids(db_cur, server_id=None): """Get a list of schedule_ids that are blocklisted from optimization""" sqlquery = "SELECT schedule_id FROM schedule_blocklist" - db_cur.execute(sqlquery) + if server_id: + sqlquery += " WHERE server_id = %s" + db_cur.execute(sqlquery, (server_id,)) + else: + db_cur.execute(sqlquery) blocklist_schedule_ids = [row[0] for row in db_cur.fetchall()] - return blocklist_schedule_ids \ No newline at end of file + return blocklist_schedule_ids + + +def reset_schedule_backups(db_cur): + """Reset schedule_backups table by deleting all entries, used before creating new backups""" + sqlquery = "DELETE FROM schedule_backups" + db_cur.execute(sqlquery) + return \ No newline at end of file diff --git a/setup/schema.sql b/setup/schema.sql index 132b7ce..9883b96 100644 --- a/setup/schema.sql +++ b/setup/schema.sql @@ -225,7 +225,7 @@ CREATE OR REPLACE FUNCTION snapshot_schedules_table UPDATE schedule_backups SET snapshot_id = snapshot_id + 1; -- Snapshot current schedules table - INSERT INTO schedule_backups SELECT NEW.*, 1, NOW(), 'UPDATE'; + INSERT INTO schedule_backups SELECT *, 1, NOW(), 'UPDATE' FROM schedules; -- Keep only the most recent 10 snapshots DELETE FROM schedule_backups @@ -238,7 +238,7 @@ CREATE OR REPLACE FUNCTION snapshot_schedules_table CREATE TRIGGER schedule_changes AFTER UPDATE ON schedules - FOR EACH ROW EXECUTE FUNCTION snapshot_schedules_table(); + EXECUTE FUNCTION snapshot_schedules_table(); CREATE TABLE IF NOT EXISTS public.schedule_blocklist @@ -262,6 +262,11 @@ INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('create_snow INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('create_roles', 'Admin Schedules') ON CONFLICT DO NOTHING; INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('create_roles_force', 'Admin Schedules') ON CONFLICT DO NOTHING; INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('import_pipelinewise_config_force', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('archive_ap_tools_logs', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('archive_cicada_schedule_log', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('archive_pipelinewise_logs', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('expose_iceberg_tables', 'Admin Schedules') ON CONFLICT DO NOTHING; + From 615fb846ee023578ea2ddc6cc15175af08ea0faf Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Wed, 20 May 2026 09:48:51 +0100 Subject: [PATCH 30/53] Change snapshotting mechanism and change rollback functionality accordingly --- cicada/commands/smart_schedule.py | 9 ++- cicada/lib/SmartScheduling/domain.py | 5 +- cicada/lib/scheduler.py | 88 ++++++++++++++++++++++++---- setup/schema.sql | 67 ++++++++++++--------- 4 files changed, 122 insertions(+), 47 deletions(-) diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 08c403a..64d6546 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -93,10 +93,11 @@ def _update_schedule_cron(schedule : Schedule): -def _assign_new_schedules(optimised_schedules: list[pygad.Schedule], db_cur): +def _assign_new_schedules(optimised_schedules: list[pygad.Schedule], server_id, db_cur): """Assign new schedules based on the optimal schedule found.""" schedule_details_list = [] + schedule_ids = [] # For each schedule, update the schedule in the DB with the new interval_mask based on the start_time_mins calculated by the GA optimizer for schedule in optimised_schedules: _update_schedule_cron(schedule) @@ -125,8 +126,10 @@ def _assign_new_schedules(optimised_schedules: list[pygad.Schedule], db_cur): "smart_interval_mask": schedule.interval_mask } schedule_details_list.append(schedule_details) + schedule_ids.append(schedule.schedule_id) - scheduler.update_schedule_details_bulk(db_cur=db_cur, schedule_list=schedule_details_list) + scheduler.update_schedule_details_bulk(db_cur=db_cur, schedule_list=schedule_details_list, reason='Smart Schedule Optimization') + scheduler.snapshot_schedules(db_cur, schedule_ids, operation_type='BULK UPDATE', server_id=server_id, reason='Smart Schedule Optimization') @utils.named_exception_handler("smart_schedule") @@ -174,7 +177,7 @@ def main(server_id=None, dbname=None, ga_config=None, rollback=False, schedule_i print("\n-------------Updating Schedules------------------") if peak_usage < initial_fitness: # Only update schedules if we have found an improvement - _assign_new_schedules(optimised_schedules, db_cur=db_cur) + _assign_new_schedules(optimised_schedules, server_id=server_id, db_cur=db_cur) else: print(f"No improvement found for server_id {server_id}. Current peak usage: {initial_fitness}, Optimized peak usage: {peak_usage}. No schedule updates will be made.") print("--------------------------------------------------\n") diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index e426804..8fba567 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -24,6 +24,7 @@ def __init__(self, details, db_cur): self.schedule_id = details['schedule_id'] self.server_id = details['server_id'] self.interval_mask = details['interval_mask'] + self.current_interval_mask = details['smart_interval_mask'] if details['smart_interval_mask'] is not None else self.interval_mask self.determine_attributes(db_cur) if details['blocklisted'] is not None: self.blocklisted = details['blocklisted'] @@ -53,8 +54,8 @@ def _determine_start_time_mins(self): today = datetime.datetime.now().date() midnight = datetime.datetime.combine(today, datetime.time.min) - it = croniter(self.interval_mask, midnight) - if croniter.match(self.interval_mask, midnight): + it = croniter(self.current_interval_mask, midnight) + if croniter.match(self.current_interval_mask, midnight): first_iter = midnight self.start_time_mins = 0 else: diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index 4dee763..72bee95 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -235,7 +235,7 @@ def update_schedule_details(db_cur, schedule_details): db_cur.execute(sqlquery) -def update_schedule_details_bulk(db_cur, schedule_list): +def update_schedule_details_bulk(db_cur, schedule_list, reason=None): """Update multiple schedules in a single bulk query using CASE statements. Args: @@ -282,9 +282,51 @@ def update_schedule_details_bulk(db_cur, schedule_list): sqlquery = f"UPDATE schedules SET {', '.join(case_clauses)} WHERE schedule_id = ANY(%s)" db_cur.execute(sqlquery, (schedule_ids,)) + return +def snapshot_schedules(db_cur, schedule_ids, operation_type=None, server_id=None, reason=None): + """Create a snapshot of specific schedules with the same snapshot_id. + + Args: + db_cur: Database cursor + schedule_ids: List of schedule_ids to snapshot + operation_type: Type of operation (e.g., 'UPDATE', 'OPTIMIZE', 'SPREAD') + server_id: server_id for the snapshot + reason: Optional reason/context for the snapshot + """ + if not schedule_ids: + return + + # Insert into snapshots table to get a new snapshot_id + sqlquery = "INSERT INTO snapshots (operation_type, reason, server_id) VALUES (%s, %s, %s) RETURNING snapshot_id" + db_cur.execute(sqlquery, (operation_type, reason, server_id)) + snapshot_id = db_cur.fetchone()[0] + + # Snapshot the specified schedules with the same snapshot_id + sqlquery = """ + INSERT INTO schedule_backups (schedule_id, server_id, interval_mask, smart_interval_mask, snapshot_id) + SELECT schedule_id, server_id, interval_mask, smart_interval_mask, %s + FROM schedules WHERE schedule_id = ANY(%s) + """ + db_cur.execute(sqlquery, (snapshot_id, schedule_ids)) + + # Clean up old snapshots (keep last 3 per schedule_id) + cleanup_query = """ + DELETE FROM schedule_backups sb + WHERE sb.schedule_id = ANY(%s) + AND sb.snapshot_id NOT IN ( + SELECT snapshot_id FROM schedule_backups + WHERE schedule_id = sb.schedule_id + ORDER BY snapshot_id DESC + LIMIT 3 + ) + """ + print("Updated snapshots table") + db_cur.execute(cleanup_query, (schedule_ids,)) + + def get_schedule_executable(db_cur, schedule_id): """Extract details of executable of a schedule""" sqlquery = ( @@ -475,11 +517,11 @@ def get_median_run_time(db_cur, schedule_id): def retrieve_snapshots(db_cur, server_id): """ - Retrieve all snapshots for a schedule in reverse chronological order. + Retrieve all snapshots in reverse chronological order. """ sqlquery = """ - SELECT DISTINCT(snapshot_id), snapshot_timestamp - FROM schedule_backups + SELECT snapshot_id, snapshot_timestamp + FROM snapshots WHERE server_id = %s ORDER BY snapshot_timestamp DESC """ @@ -516,16 +558,20 @@ def full_rollback(db_cur, server_id=None, schedule_id=None): if server_id and schedule_id: raise ValueError("Cannot specify both server_id and schedule_id for full rollback, please specify only one to rollback all schedules for a server or an individual schedule respectively") if server_id: + print(f"Rolling back schedules for server_id {server_id} to original interval_mask by setting smart_interval_mask to NULL...") schedule_ids = [row[0] for row in get_all_schedule_ids_per_server(db_cur, server_id)] - - print(f"Rolling back schedules for server_id {server_id} to original interval_mask by setting smart_interval_mask to NULL...") - print(f"Found {len(schedule_ids)} schedules to rollback for server_id {server_id}...") + else: + print(f"Rolling back schedules for all servers to original interval_mask by setting smart_interval_mask to NULL...") + schedule_ids =[row[1] for row in get_all_schedule_ids(db_cur)] + + print(f"Found {len(schedule_ids)} schedules to rollback for server_id ...") # Set smart_schedule_mask to NULL for all affected schedules to rollback to original interval_mask update_all_schedules_query = """ - UPDATE schedules SET smart_interval_mask = NULL WHERE schedule_id = ANY(%s) + UPDATE schedules SET smart_interval_mask = NULL WHERE schedule_id = ANY(%s::text[]) """ db_cur.execute(update_all_schedules_query, (schedule_ids,)) - print(f"Schedules Updated: {schedule_ids}") + print(f"Schedules Updated:{chr(10).join(schedule_ids)}") + return @@ -536,6 +582,8 @@ def restore_previous_schedules(db_cur, server_id, snapshot_id=None): if not snapshot_id: snapshot_id = retrieve_snapshots(db_cur, server_id)[0][0] + schedule_ids = get_all_schedule_ids_per_server(db_cur, server_id) + print(f"Restoring schedules for server_id {server_id} from snapshot_id {snapshot_id}") print("Skipping any schedules that aren't in the snapshot or have a different interval mask...") sqlquery = """ @@ -548,6 +596,8 @@ def restore_previous_schedules(db_cur, server_id, snapshot_id=None): AND schedules.interval_mask = schedule_backups.interval_mask """ db_cur.execute(sqlquery, (server_id, snapshot_id)) + print(f"Schedules Restored: {[schedule_id for schedule_id in schedule_ids]}") + reset_schedule_backups(db_cur, snapshot_id=snapshot_id) return @@ -563,8 +613,20 @@ def get_blocklisted_schedule_ids(db_cur, server_id=None): return blocklist_schedule_ids -def reset_schedule_backups(db_cur): - """Reset schedule_backups table by deleting all entries, used before creating new backups""" - sqlquery = "DELETE FROM schedule_backups" - db_cur.execute(sqlquery) +def reset_schedule_backups(db_cur, snapshot_id=None): + """Reset schedule_backups table by deleting all entries""" + if snapshot_id: + sqlquery = "DELETE FROM schedule_backups WHERE snapshot_id = %s" + db_cur.execute(sqlquery, (snapshot_id,)) + + sqlquery_snapshots = "DELETE FROM snapshots WHERE snapshot_id = %s " + db_cur.execute(sqlquery_snapshots, (snapshot_id,)) + + else: + sqlquery_backups = "DELETE FROM schedule_backups" + db_cur.execute(sqlquery_backups) + + sqlquery_snapshots = "DELETE FROM snapshots" + db_cur.execute(sqlquery_snapshots) + return \ No newline at end of file diff --git a/setup/schema.sql b/setup/schema.sql index 9883b96..b213ffb 100644 --- a/setup/schema.sql +++ b/setup/schema.sql @@ -207,38 +207,39 @@ WITH ( ; --- Table to snapshot full schedule state whenever smart_schedule command runs (optimize or rollback) --- Keeps last 5 snapshots per schedule for rollback and audit trail -CREATE TABLE IF NOT EXISTS public.schedule_backups AS TABLE public.schedules WITH NO DATA; - -ALTER TABLE public.schedule_backups ADD COLUMN IF NOT EXISTS snapshot_id serial NOT NULL; -ALTER TABLE public.schedule_backups ADD COLUMN IF NOT EXISTS snapshot_timestamp timestamp without time zone NOT NULL DEFAULT (now())::timestamp without time zone; -ALTER TABLE public.schedule_backups ADD COLUMN IF NOT EXISTS operation_type character varying(20); -ALTER TABLE public.schedule_backups ADD CONSTRAINT schedule_backups_pkey PRIMARY KEY (snapshot_id, schedule_id); - - -CREATE OR REPLACE FUNCTION snapshot_schedules_table -() - RETURNS TRIGGER AS $$ - BEGIN - -- Increment snapshot_id for existing snapshots - UPDATE schedule_backups SET snapshot_id = snapshot_id + 1; +-- Snapshots table stores metadata about each snapshot (timestamp, operation type) +CREATE TABLE IF NOT EXISTS public.snapshots +( + snapshot_id serial NOT NULL, + snapshot_timestamp timestamp without time zone NOT NULL DEFAULT (now())::timestamp without time zone, + server_id integer, + operation_type character varying(20), + reason character varying(255), + CONSTRAINT snapshots_pkey PRIMARY KEY (snapshot_id) +) +WITH ( + OIDS=FALSE +); - -- Snapshot current schedules table - INSERT INTO schedule_backups SELECT *, 1, NOW(), 'UPDATE' FROM schedules; +-- Table to store schedule snapshots for rollback +-- Keeps last 5 snapshots per schedule_id for rollback and audit trail +CREATE TABLE IF NOT EXISTS public.schedule_backups +( + schedule_id character varying(255) NOT NULL, + server_id integer NOT NULL, + interval_mask character varying(32) NOT NULL, + smart_interval_mask character varying(32), + snapshot_id integer NOT NULL, + CONSTRAINT schedule_backups_pkey PRIMARY KEY (schedule_id, snapshot_id), + CONSTRAINT schedule_backups_snapshot_fkey FOREIGN KEY (snapshot_id) + REFERENCES snapshots (snapshot_id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE CASCADE +) +WITH ( + OIDS=FALSE +); - -- Keep only the most recent 10 snapshots - DELETE FROM schedule_backups - WHERE snapshot_id NOT IN ( - SELECT snapshot_id FROM schedule_backups ORDER BY snapshot_timestamp DESC LIMIT 10 - ); - RETURN NEW; - END; - $$ LANGUAGE plpgsql; -CREATE TRIGGER schedule_changes - AFTER UPDATE ON schedules - EXECUTE FUNCTION snapshot_schedules_table(); CREATE TABLE IF NOT EXISTS public.schedule_blocklist @@ -257,15 +258,23 @@ WITH ( INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('source_obf_views_force', 'Admin Schedules') ON CONFLICT DO NOTHING; INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('source_obf_views', 'Admin Schedules') ON CONFLICT DO NOTHING; INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('create_snowflake_static_objects_force', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('create_snowflake_static_objects', 'Admin Schedules') ON CONFLICT DO NOTHING; INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('create_schema_roles', 'Admin Schedules') ON CONFLICT DO NOTHING; INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('create_snowflake_service_accounts_force', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('create_snowflake_service_accounts', 'Admin Schedules') ON CONFLICT DO NOTHING; INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('create_roles', 'Admin Schedules') ON CONFLICT DO NOTHING; INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('create_roles_force', 'Admin Schedules') ON CONFLICT DO NOTHING; INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('import_pipelinewise_config_force', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('import_pipelinewise_config', 'Admin Schedules') ON CONFLICT DO NOTHING; INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('archive_ap_tools_logs', 'Admin Schedules') ON CONFLICT DO NOTHING; INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('archive_cicada_schedule_log', 'Admin Schedules') ON CONFLICT DO NOTHING; INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('archive_pipelinewise_logs', 'Admin Schedules') ON CONFLICT DO NOTHING; INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('expose_iceberg_tables', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('terminate_mixpanel_taps', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('backup_states', 'Admin Schedules') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('purge_elt_cluster_tmp', 'Admin Schedules') ON CONFLICT DO NOTHING; + + From 106519328dc0962a11ceb94d964244140de170b4 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Wed, 20 May 2026 14:16:53 +0100 Subject: [PATCH 31/53] Addressing Copilot comments --- CLAUDE.md | 28 ++++++- cicada/commands/delete_schedule.py | 2 +- cicada/commands/smart_schedule.py | 28 +++---- cicada/commands/spread_schedules.py | 8 -- cicada/lib/SmartScheduling/domain.py | 3 +- cicada/lib/SmartScheduling/evaluation.py | 13 +++- cicada/lib/SmartScheduling/pygad.py | 10 ++- cicada/lib/scheduler.py | 88 +++++++++++----------- docs/Smart Scheduler Technical Overview.md | 1 - setup/schema.sql | 14 +--- 10 files changed, 105 insertions(+), 90 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 2fee441..4362721 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -99,9 +99,11 @@ Key tables: - `servers` – Registered nodes with hostname, FQDN, IP address - `schedules` – Job definitions with cron expressions, parameters, execution state - `schedule_logs` – Historical execution records with runtime, status, output -- `schedule_backups` – GA optimization snapshots for rollback +- `snapshots` – Metadata about optimization/rollback events (operation_type, reason, timestamp, server_id) +- `schedule_backups` – Schedule state snapshots: stores `interval_mask` and `smart_interval_mask` at each snapshot for potential rollback +- `schedule_changes` – Linked-list audit trail of all changes to schedules (replaces older snapshot model); each entry has `previous_change_id` for chain traversal, `changes_delta` (JSON) for what changed, and `operation_type` (SMART_SCHEDULE, ROLLBACK_FULL, ROLLBACK_TO_CHANGE, SPREAD_SCHEDULES, etc.) -Database setup SQL is in `setup/db_and_user.sql` and `setup/schema.sql`. Example schedule setup for smart scheduling in `setup/create_test_schedule_sertup`. +Database setup SQL is in `setup/db_and_user.sql` and `setup/schema.sql`. Migration script: `setup/migrate_snapshots_to_changes.sql`. Example schedule setup for smart scheduling in `setup/create_test_tap_setup`. ## Key Architectural Patterns @@ -122,7 +124,27 @@ Database setup SQL is in `setup/db_and_user.sql` and `setup/schema.sql`. Example 1. **Load schedules**: Fetch all schedules for a server via `get_schedules_per_server()` 2. **Create Schedule objects**: Convert schedule details to Schedule instances; filter unsupported schedules (irregular cron, too frequent, blocklisted) 3. **Run GA optimization**: PyGAD evolves shifts over N generations to minimize resource conflicts -4. **Apply and checkpoint**: Save optimized shifts back to DB; record checkpoint for potential rollback +4. **Apply and checkpoint**: Save optimized shifts back to DB; record change entry via `record_schedule_change()` for audit trail and rollback + +### Rollback System +Cicada supports two rollback mechanisms: + +**Full Rollback** (`--full` flag): +- Sets `smart_interval_mask = NULL` for affected schedules, reverting to original `interval_mask` +- Works per-schedule or per-server +- Records a `ROLLBACK_FULL` change entry in `schedule_changes` + +**Rollback to Specific Change** (`--change-id` flag): +- Uses linked-list traversal via `compute_schedule_state_at_change()` to reconstruct schedule state at any historical change +- Requires `schedule_id` and `change_id` +- Records a `ROLLBACK_TO_CHANGE` entry documenting what was restored +- Marks the target change as reverted + +**Change History** (`--history` flag): +- Displays complete audit trail for a schedule via `get_schedule_history()` +- Each entry shows operation_type, reason, timestamp, and delta (what changed) + +**Migration Note**: Old snapshot/schedule_backups model supported only last 3 snapshots. New `schedule_changes` model retains unlimited history via linked-list structure. ## Testing diff --git a/cicada/commands/delete_schedule.py b/cicada/commands/delete_schedule.py index b637f04..7de6e94 100644 --- a/cicada/commands/delete_schedule.py +++ b/cicada/commands/delete_schedule.py @@ -16,7 +16,7 @@ def main(schedule_id, dbname=None): db_conn = postgres.db_cicada(dbname) db_cur = db_conn.cursor() scheduler.delete_schedule(db_cur, str(schedule_id)) - scheduler.delete_schedule_backup(db_cur, str(schedule_id)) + scheduler.reset_schedule_backups(db_cur, str(schedule_id)) db_cur.close() db_conn.close() diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 64d6546..eeaeaa1 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -71,24 +71,26 @@ def _update_schedule_cron(schedule : Schedule): if frequency == 1440: # For daily schedules, we can shift within the hour hour = start_time_mins // 60 minute = (start_time_mins - hour * 60) % 60 - schedule.interval_mask = f"{minute} {hour} * * *" + schedule.smart_interval_mask = f"{minute} {hour} * * *" # Check that the new cron expression is valid - if not croniter.is_valid(schedule.interval_mask): - raise ValueError(f"Invalid cron expression generated: {schedule.interval_mask} for schedule {schedule.schedule_id}") + if not croniter.is_valid(schedule.smart_interval_mask): + raise ValueError(f"Invalid cron expression generated: {schedule.smart_interval_mask} for schedule {schedule.schedule_id}") return elif frequency == 60: # For hourly schedules, we can shift within the hour - assert start_time_mins < frequency, f"Shift {start_time_mins} cannot be greater than or equal to frequency {frequency} for schedule {schedule.schedule_id}" - schedule.interval_mask = f"{start_time_mins} * * * *" + if start_time_mins >= frequency: + raise ValueError(f"Shift {start_time_mins} cannot be greater than or equal to frequency {frequency} for schedule {schedule.schedule_id}") + schedule.smart_interval_mask = f"{start_time_mins} * * * *" # Check that the new cron expression is valid - if not croniter.is_valid(schedule.interval_mask): - raise ValueError(f"Invalid cron expression generated: {schedule.interval_mask} for schedule {schedule.schedule_id}") + if not croniter.is_valid(schedule.smart_interval_mask): + raise ValueError(f"Invalid cron expression generated: {schedule.smart_interval_mask} for schedule {schedule.schedule_id}") return elif frequency < 60: - assert start_time_mins < frequency, f"Shift {start_time_mins} cannot be greater than or equal to frequency {frequency} for schedule {schedule.schedule_id}" - schedule.interval_mask = f"{start_time_mins}-59/{frequency} * * * *" + if start_time_mins >= frequency: + raise ValueError(f"Shift {start_time_mins} cannot be greater than or equal to frequency {frequency} for schedule {schedule.schedule_id}") + schedule.smart_interval_mask = f"{start_time_mins}-59/{frequency} * * * *" # Check that the new cron expression is valid - if not croniter.is_valid(schedule.interval_mask): - raise ValueError(f"Invalid cron expression generated: {schedule.interval_mask} for schedule {schedule.schedule_id}") + if not croniter.is_valid(schedule.smart_interval_mask): + raise ValueError(f"Invalid cron expression generated: {schedule.smart_interval_mask} for schedule {schedule.schedule_id}") return @@ -102,7 +104,7 @@ def _assign_new_schedules(optimised_schedules: list[pygad.Schedule], server_id, for schedule in optimised_schedules: _update_schedule_cron(schedule) if schedule.shifted: - print(f"- {schedule.schedule_id} : {schedule.interval_mask}") + print(f"- {schedule.schedule_id} : {schedule.smart_interval_mask}") schedule._determine_start_time_mins() schedule_details = { @@ -123,7 +125,7 @@ def _assign_new_schedules(optimised_schedules: list[pygad.Schedule], server_id, "exec_command": None, "first_run_date": None, "is_running": None, - "smart_interval_mask": schedule.interval_mask + "smart_interval_mask": schedule.smart_interval_mask } schedule_details_list.append(schedule_details) schedule_ids.append(schedule.schedule_id) diff --git a/cicada/commands/spread_schedules.py b/cicada/commands/spread_schedules.py index 4a8591a..eb531d4 100644 --- a/cicada/commands/spread_schedules.py +++ b/cicada/commands/spread_schedules.py @@ -117,14 +117,6 @@ def main(spread_details, dbname=None): output_message += " | Forced abort_running and adhoc_execute" scheduler.update_schedule_details(db_cur, new_schedule_details) - - # Update schedule_backups for rollback functionality - previous_schedule_details = { - "schedule_id": current_schedule_details["schedule_id"], - "server_id": current_schedule_details["server_id"], - "previous_interval_mask": current_schedule_details["interval_mask"], - "interval_mask": new_schedule_details["interval_mask"], - } scheduler.reset_schedule_backups(db_cur) else: diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index 8fba567..0ecc04e 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -10,7 +10,7 @@ @dataclass(frozen=False) class Schedule: - schedule_id: int + schedule_id: str server_id: int interval_mask: str frequency_minutes: int @@ -82,6 +82,7 @@ def is_regular_schedule(self): schedule = croniter(self.interval_mask) iters = [schedule.get_next(datetime.datetime) for _ in range(20)] freqs = [iters[i + 1] - iters[i] for i in range(len(iters) - 1)] + if any(freq <= datetime.timedelta(minutes=1) for freq in freqs): return False return all(f == freqs[0] for f in freqs) except (ValueError, KeyError): return False diff --git a/cicada/lib/SmartScheduling/evaluation.py b/cicada/lib/SmartScheduling/evaluation.py index c17fd07..f483342 100644 --- a/cicada/lib/SmartScheduling/evaluation.py +++ b/cicada/lib/SmartScheduling/evaluation.py @@ -20,10 +20,17 @@ def evaluate_usage_and_peak(start_times: Sequence[int], schedules: Sequence[Sche diff = np.zeros(mins_per_day + 1, dtype=float) - assert len(start_times) == len(schedules) == len(freqs) == len(run_times), "Length of start_times, schedules, freqs, and run_times must all be the same" - assert all(start_times[i] < freqs[i] for i in range(len(start_times))), "Start time should be the earliest it can be" + if not (len(start_times) == len(schedules) == len(freqs) == len(run_times)): + raise ValueError("Length of start_times, schedules, freqs, and run_times must all be the same") + + for i in range(len(start_times)): + if schedules[i].frequency_is_supported() and start_times[i] >= freqs[i]: + raise ValueError(f"Start time should be the earliest it can be for unsupported schedule at index {i}") for i, schedule in enumerate(schedules): + if not schedule.frequency_is_supported(): + continue + freq = freqs[i] run_time = run_times[i] minute = int(start_times[i]) @@ -36,7 +43,7 @@ def evaluate_usage_and_peak(start_times: Sequence[int], schedules: Sequence[Sche diff[minute] += 1 diff[end] -= 1 minute += freq - + # Sums up everything in the diff array to get the total usage at each minute, and finds the peak usage. # Ignore the last element of the diff array since it's just a placeholder to handle the end minute subtraction for schedules that run until the end of the day. usage = np.cumsum(diff[:-1]) diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/pygad.py index 20df13a..219ccda 100644 --- a/cicada/lib/SmartScheduling/pygad.py +++ b/cicada/lib/SmartScheduling/pygad.py @@ -85,7 +85,7 @@ def fitness_fn(self, ga, solution, solution_idx): _, peak = evaluate_usage_and_peak(solution, self.schedules) return -float(peak) - def solve(self, schedules: Sequence[Schedule]) -> tuple[Sequence[Schedule], List[int], float, np.ndarray]: + def solve(self, schedules: Sequence[Schedule]) -> tuple[Sequence[Schedule], List[int], float, np.ndarray, float]: self.schedules = schedules gene_space = self._gene_space(schedules) print("Successfully initialised gene space") @@ -127,9 +127,11 @@ def solve(self, schedules: Sequence[Schedule]) -> tuple[Sequence[Schedule], List # Update schedule objects start_time_mins attribute based on GA solution for i, schedule in enumerate(schedules): - assert start_times[i] >= gene_space[i][0] and start_times[i] <= gene_space[i][-1], f"Start time for schedule {schedule.schedule_id} is out of gene space bounds. Start time: {start_times[i]}, Gene space: {gene_space[i]}" - if schedule.is_unsupported(): assert start_times[i] == schedule.start_time_mins, f"Unsupported schedule {schedule.schedule_id} should not have been shifted in the GA solution. {schedule.start_time_mins} != {start_times[i]}" - elif schedule.start_time_mins != start_times[i]: + if not (start_times[i] >= gene_space[i][0] and start_times[i] <= gene_space[i][-1]): + raise RuntimeError(f"Start time for schedule {schedule.schedule_id} is out of gene space bounds. Start time: {start_times[i]}, Gene space: {gene_space[i]}") + if schedule.is_unsupported() and start_times[i] != schedule.start_time_mins: + raise RuntimeError(f"Unsupported schedule {schedule.schedule_id} should not have been shifted in the GA solution. {schedule.start_time_mins} != {start_times[i]}") + elif schedule.start_time_mins != start_times[i]: schedule.shifted = True schedule.start_time_mins = start_times[i] diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index 72bee95..395e361 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -236,19 +236,16 @@ def update_schedule_details(db_cur, schedule_details): def update_schedule_details_bulk(db_cur, schedule_list, reason=None): - """Update multiple schedules in a single bulk query using CASE statements. + """Update multiple schedules in a single bulk query. Args: db_cur: Database cursor schedule_list: List of dicts, each with schedule_id and any fields to update - """ if not schedule_list: return - schedule_ids = [str(s["schedule_id"]) for s in schedule_list] columns_to_update = set() - for schedule in schedule_list: columns_to_update.update(k for k, v in schedule.items() if k != "schedule_id" and v is not None) @@ -257,33 +254,31 @@ def update_schedule_details_bulk(db_cur, schedule_list, reason=None): return case_clauses = [] + params = [] + # Construct CASE statements for each column to update for col in sorted(columns_to_update): - case_whens = [] + case_parts = [] for schedule in schedule_list: if col in schedule and schedule[col] is not None: - val = schedule[col] - if col in ["schedule_description", "interval_mask", "smart_interval_mask", "first_run_date", - "last_run_date", "exec_command", "parameters", "adhoc_parameters"]: - if col in ["exec_command", "parameters", "adhoc_parameters"]: - escaped_val = postgres.escape_upsert_string(str(val)) - else: - escaped_val = str(val) - case_whens.append(f"WHEN schedule_id = '{str(schedule['schedule_id'])}' THEN '{escaped_val}'") - else: - case_whens.append(f"WHEN schedule_id = '{str(schedule['schedule_id'])}' THEN {val}") - - if case_whens: - case_clauses.append(f"{col} = CASE {' '.join(case_whens)} ELSE {col} END") + params.append(schedule['schedule_id']) + params.append(schedule[col]) + case_parts.append("WHEN schedule_id = %s THEN %s") + + if case_parts: + case_clauses.append(f"{col} = CASE {' '.join(case_parts)} ELSE {col} END") if not case_clauses: - print("No fields to update for any schedules. Bulk update skipped.") return + # Add schedule_ids to params + schedule_ids = [s['schedule_id'] for s in schedule_list] + params.append(schedule_ids) + sqlquery = f"UPDATE schedules SET {', '.join(case_clauses)} WHERE schedule_id = ANY(%s)" - db_cur.execute(sqlquery, (schedule_ids,)) + db_cur.execute(sqlquery, tuple(params)) - return + return def snapshot_schedules(db_cur, schedule_ids, operation_type=None, server_id=None, reason=None): @@ -313,8 +308,9 @@ def snapshot_schedules(db_cur, schedule_ids, operation_type=None, server_id=None db_cur.execute(sqlquery, (snapshot_id, schedule_ids)) # Clean up old snapshots (keep last 3 per schedule_id) - cleanup_query = """ + cleanup_backups_query = """ DELETE FROM schedule_backups sb + DELETE FROM snapshots s WHERE sb.schedule_id = ANY(%s) AND sb.snapshot_id NOT IN ( SELECT snapshot_id FROM schedule_backups @@ -322,9 +318,9 @@ def snapshot_schedules(db_cur, schedule_ids, operation_type=None, server_id=None ORDER BY snapshot_id DESC LIMIT 3 ) + AND s.snapshot_id = sb.snapshot_id """ - print("Updated snapshots table") - db_cur.execute(cleanup_query, (schedule_ids,)) + print("Updated schedule_backups table") def get_schedule_executable(db_cur, schedule_id): @@ -382,7 +378,7 @@ def get_all_schedules(db_cur, server_id, is_async): ( /* foo */ (SELECT schedule_id, - COALESCE(smart_interval_mask, interval_mask), + COALESCE(smart_interval_mask, interval_mask) as interval_mask, exec_command, COALESCE(adhoc_parameters, parameters, '') AS parameters, adhoc_execute, @@ -542,7 +538,7 @@ def get_schedules_by_snapshot_id(db_cur, snapshot_id, server_id): WHERE snapshot_id = %s AND server_id = %s """ db_cur.execute(sqlquery, (snapshot_id, server_id)) - row = db_cur.fetchone() + row = db_cur.fetchall() if not row: return None @@ -554,18 +550,27 @@ def get_schedules_by_snapshot_id(db_cur, snapshot_id, server_id): } def full_rollback(db_cur, server_id=None, schedule_id=None): + """ + Roll back schedules to original interval_mask by setting smart_interval_mask to NULL for either a server_id or an individual schedule_id. + Args: + server_id | schedule_id: Optional[int | str] [Mutually exclusive] + Target server/schedule to roll back all schedules for. If not provided, will roll back all schedules for all servers. + """ if server_id and schedule_id: raise ValueError("Cannot specify both server_id and schedule_id for full rollback, please specify only one to rollback all schedules for a server or an individual schedule respectively") if server_id: - print(f"Rolling back schedules for server_id {server_id} to original interval_mask by setting smart_interval_mask to NULL...") + print(f"Rolling back schedules for server_id {server_id} to original interval_mask...") schedule_ids = [row[0] for row in get_all_schedule_ids_per_server(db_cur, server_id)] + elif schedule_id: + print(f"Rolling back schedule_id {schedule_id} to original interval_mask...") + schedule_ids = [schedule_id] else: - print(f"Rolling back schedules for all servers to original interval_mask by setting smart_interval_mask to NULL...") + print(f"Rolling back schedules for all servers to original interval_mask...") schedule_ids =[row[1] for row in get_all_schedule_ids(db_cur)] print(f"Found {len(schedule_ids)} schedules to rollback for server_id ...") - # Set smart_schedule_mask to NULL for all affected schedules to rollback to original interval_mask + print("Removing smart_interval_mask for selected schedules...") update_all_schedules_query = """ UPDATE schedules SET smart_interval_mask = NULL WHERE schedule_id = ANY(%s::text[]) """ @@ -613,20 +618,17 @@ def get_blocklisted_schedule_ids(db_cur, server_id=None): return blocklist_schedule_ids -def reset_schedule_backups(db_cur, snapshot_id=None): +def reset_schedule_backups(db_cur, snapshot_id=None, schedule_id=None): """Reset schedule_backups table by deleting all entries""" + sqlquery_backups = "DELETE FROM schedule_backups WHERE 1=1" + sqlquery_snapshots = "DELETE FROM snapshots WHERE 1=1" if snapshot_id: - sqlquery = "DELETE FROM schedule_backups WHERE snapshot_id = %s" - db_cur.execute(sqlquery, (snapshot_id,)) - - sqlquery_snapshots = "DELETE FROM snapshots WHERE snapshot_id = %s " - db_cur.execute(sqlquery_snapshots, (snapshot_id,)) - - else: - sqlquery_backups = "DELETE FROM schedule_backups" - db_cur.execute(sqlquery_backups) - - sqlquery_snapshots = "DELETE FROM snapshots" - db_cur.execute(sqlquery_snapshots) - + sqlquery_backups += " AND snapshot_id = %s" + sqlquery_snapshots += " AND snapshot_id = %s" + if schedule_id: + sqlquery_backups += " AND schedule_id = %s" + sqlquery_snapshots += " AND schedule_id = %s" + + db_cur.execute(sqlquery_backups, (snapshot_id, schedule_id) if snapshot_id and schedule_id else (schedule_id,) if schedule_id else (snapshot_id,) if snapshot_id else None) + db_cur.execute(sqlquery_snapshots, (snapshot_id, schedule_id) if snapshot_id and schedule_id else (schedule_id,) if schedule_id else (snapshot_id,) if snapshot_id else None) return \ No newline at end of file diff --git a/docs/Smart Scheduler Technical Overview.md b/docs/Smart Scheduler Technical Overview.md index ad22690..b84db87 100644 --- a/docs/Smart Scheduler Technical Overview.md +++ b/docs/Smart Scheduler Technical Overview.md @@ -197,7 +197,6 @@ Added to `cicada/cli.py`: - **Read**: `schedules`, `servers`, `schedule_logs`, `schedule_blocklist` - **Write**: `schedules` (interval_mask), `schedule_backups` (checkpoints) -- **Functions**: `set_snapshot_at()` trigger ## Design Decisions diff --git a/setup/schema.sql b/setup/schema.sql index b213ffb..f995bdc 100644 --- a/setup/schema.sql +++ b/setup/schema.sql @@ -19,18 +19,6 @@ BEGIN END; $BODY$; --- FUNCTION: set_snapshot_at() -CREATE OR REPLACE FUNCTION public.set_snapshot_at() - RETURNS trigger - LANGUAGE 'plpgsql' - COST 100 - VOLATILE NOT LEAKPROOF -AS $BODY$ -BEGIN - NEW.snapshot_at = now()::timestamp without time zone; - RETURN NEW; -END; -$BODY$; -- Table: servers CREATE TABLE IF NOT EXISTS public.servers @@ -222,7 +210,7 @@ WITH ( ); -- Table to store schedule snapshots for rollback --- Keeps last 5 snapshots per schedule_id for rollback and audit trail +-- Keeps last 3 snapshots per schedule_id for rollback and audit trail CREATE TABLE IF NOT EXISTS public.schedule_backups ( schedule_id character varying(255) NOT NULL, From ee4e704bc933d1ccaae1721b2ca707f4d2428cfb Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Thu, 21 May 2026 10:45:42 +0100 Subject: [PATCH 32/53] Add blocklist command and rollback improvements --- CLAUDE.md | 6 +- cicada/cli.py | 46 ++++++++--- cicada/commands/blocklist_schedule.py | 53 +++++++++++++ cicada/commands/smart_schedule.py | 27 ++++--- cicada/commands/smart_schedule_rollback.py | 41 ++++++++-- cicada/lib/SmartScheduling/domain.py | 2 +- cicada/lib/scheduler.py | 92 ++++++++++------------ setup/create_test_tap_setup.sql | 2 +- setup/schema.sql | 3 +- 9 files changed, 187 insertions(+), 85 deletions(-) create mode 100644 cicada/commands/blocklist_schedule.py diff --git a/CLAUDE.md b/CLAUDE.md index 4362721..c43d282 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -99,9 +99,9 @@ Key tables: - `servers` – Registered nodes with hostname, FQDN, IP address - `schedules` – Job definitions with cron expressions, parameters, execution state - `schedule_logs` – Historical execution records with runtime, status, output -- `snapshots` – Metadata about optimization/rollback events (operation_type, reason, timestamp, server_id) +- `snapshots` – Metadata about optimization/rollback events (reason, timestamp, server_id) - `schedule_backups` – Schedule state snapshots: stores `interval_mask` and `smart_interval_mask` at each snapshot for potential rollback -- `schedule_changes` – Linked-list audit trail of all changes to schedules (replaces older snapshot model); each entry has `previous_change_id` for chain traversal, `changes_delta` (JSON) for what changed, and `operation_type` (SMART_SCHEDULE, ROLLBACK_FULL, ROLLBACK_TO_CHANGE, SPREAD_SCHEDULES, etc.) +- `schedule_changes` – Linked-list audit trail of all changes to schedules (replaces older snapshot model); each entry has `previous_change_id` for chain traversal, `changes_delta` (JSON) for what changed Database setup SQL is in `setup/db_and_user.sql` and `setup/schema.sql`. Migration script: `setup/migrate_snapshots_to_changes.sql`. Example schedule setup for smart scheduling in `setup/create_test_tap_setup`. @@ -142,7 +142,7 @@ Cicada supports two rollback mechanisms: **Change History** (`--history` flag): - Displays complete audit trail for a schedule via `get_schedule_history()` -- Each entry shows operation_type, reason, timestamp, and delta (what changed) +- Each entry shows reason, timestamp, and delta (what changed) **Migration Note**: Old snapshot/schedule_backups model supported only last 3 snapshots. New `schedule_changes` model retains unlimited history via linked-list structure. diff --git a/cicada/cli.py b/cicada/cli.py index 44b55d4..2445092 100644 --- a/cicada/cli.py +++ b/cicada/cli.py @@ -19,6 +19,7 @@ from cicada.commands import list_schedules from cicada.commands import delete_schedule from cicada.commands import smart_schedule +from cicada.commands import blocklist_schedule as blocklist_schedule_cmd @utils.named_exception_handler("Cicada") @@ -277,16 +278,16 @@ def delete_schedule(): @staticmethod def smart_schedule(): - """Generate smart schedules for a server using genetic algorithm, or rollback previous changes""" + """Generate smart schedules for a server using genetic algorithm, or rollback/manage blocklist""" parser = argparse.ArgumentParser( allow_abbrev=False, add_help=True, prog=inspect.stack()[0][3], - description="Generate smart schedules for a server using genetic algorithm, or rollback previous changes", + description="Generate smart schedules for a server using genetic algorithm, or rollback previous changes, or manage schedule blocklist", ) - # Two subcommands; optimise for generating smart schedules and rollback for rolling back to previous schedules - subparsers = parser.add_subparsers(dest="action", help="Action to perform. Either optimise schedules or rollback to previous/original schedules. Default = optimise") + # Subcommands: optimise, rollback, blocklist + subparsers = parser.add_subparsers(dest="action", help="Action to perform. Options: optimise (default), rollback, or blocklist") # (Default) optimise subcommand optimise_parser = subparsers.add_parser( @@ -341,18 +342,37 @@ def smart_schedule(): help="ID of the server to rollback, if not specified will rollback all servers", ) group.add_argument("--schedule_id", type=str, required=False, help="ID of the schedule to rollback") - rollback_parser.add_argument( - "--snapshot_id", - type=int, + + + # Blocklist subcommand + blocklist_parser = subparsers.add_parser( + "blocklist", + help="Add or remove a schedule from the blocklist (excluded from smart scheduling optimization)", + add_help=True, + ) + blocklist_parser.add_argument( + "--schedule_id", + type=str, + required=True, + help="Id of the schedule to blocklist/unblocklist", + ) + blocklist_parser.add_argument( + "--remove", + default=False, + action="store_true", + help="Remove the schedule from the blocklist instead of adding it", + ) + blocklist_parser.add_argument( + "--reason", + type=str, required=False, - help="Specific snapshot ID to restore to (optional, used with --previous)", + help="Reason for blocklisting (optional, only used when adding)", ) # Parse arguments and call smart_schedule.main with appropriate arguments based on subcommand args = parser.parse_args(sys.argv[2:]) if args.action == "optimise" or args.action is None: - print("Initiating smart schedule optimization.") smart_schedule.main( server_id=getattr(args, 'server_id', None), ga_config={ @@ -368,14 +388,18 @@ def smart_schedule(): }, ) elif args.action == "rollback": - print("Initiating smart schedule rollback.") smart_schedule.main( server_id=getattr(args, 'server_id', None), schedule_id=getattr(args, 'schedule_id', None), rollback=True, full=getattr(args, 'full', False), previous=getattr(args, 'previous', False), - snapshot_id=getattr(args, 'snapshot_id', None), + ) + elif args.action == "blocklist": + blocklist_schedule_cmd.main( + schedule_id=args.schedule_id, + remove=getattr(args, 'remove', False), + reason=getattr(args, 'reason', None), ) @staticmethod diff --git a/cicada/commands/blocklist_schedule.py b/cicada/commands/blocklist_schedule.py new file mode 100644 index 0000000..3cbdfa3 --- /dev/null +++ b/cicada/commands/blocklist_schedule.py @@ -0,0 +1,53 @@ +"""Add or remove schedules from the blocklist (excluded from smart scheduling optimization).""" + +from typing import Optional +from cicada.lib import postgres, utils +from cicada.lib import scheduler + + +@utils.named_exception_handler("blocklist_schedule") +def main(schedule_id: str, remove: bool = False, reason: Optional[str] = None, dbname=None): + """ + Add or remove a schedule from the blocklist. + + Blocklisted schedules are excluded from smart scheduling optimizations. + + Args: + schedule_id: The schedule_id to blocklist or unblocklist. + remove: True to remove from blocklist, False to add to blocklist. + reason: Optional reason for blocklisting (used when remove=False). + dbname: Optional database name to connect to. + """ + + if not schedule_id or not isinstance(schedule_id, str): + raise TypeError("schedule_id must be a non-empty string") + + db_conn = postgres.db_cicada(dbname) + db_cur = db_conn.cursor() + + try: + if remove: + scheduler.remove_blocklist_schedule(db_cur, schedule_id=schedule_id) + print(f"Schedule {schedule_id} has been removed from the blocklist successfully.") + scheduler.full_rollback(db_cur, schedule_id=schedule_id) + print(f"Schedule {schedule_id} has been rolled back to original settings successfully.") + scheduler.reset_schedule_backups(db_cur, schedule_id=schedule_id) + print(f"Backups for schedule {schedule_id} have been removed successfully.") + + else: + schedule_details = scheduler.get_schedule_details(db_cur, schedule_id) + if not schedule_details or not schedule_details.get('schedule_id'): + print(f"ERROR: Schedule {schedule_id} not found") + return + scheduler.blocklist_schedule(db_cur, schedule_id=schedule_id, reason=reason) + print(f"Schedule {schedule_id} has been blocklisted successfully.") + + + + except Exception as e: + print(f"Error during blocklist operation: {e}") + raise + + finally: + db_cur.close() + db_conn.close() diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index eeaeaa1..45391bb 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -3,7 +3,7 @@ from __future__ import annotations import sys from croniter import croniter -from typing import Optional +from typing import Optional, Sequence from cicada.lib import postgres, utils from cicada.lib import scheduler from cicada.lib.SmartScheduling import pygad @@ -24,11 +24,11 @@ def _get_schedules_per_server(server_id, db_cur=None): -def _create_schedule_objects(schedule_ids, db_cur, server_id): +def _create_schedule_objects(schedule_ids, db_cur): """Create Schedule objects from schedule_ids.""" schedules : list[Schedule] = [] - blocklisted_schedules = scheduler.get_blocklisted_schedule_ids(db_cur, server_id=server_id) + blocklisted_schedules = scheduler.get_blocklisted_schedule_ids(db_cur) # Fetch details for each schedule and convert to Schedule objects for schedule_id in schedule_ids: @@ -95,7 +95,7 @@ def _update_schedule_cron(schedule : Schedule): -def _assign_new_schedules(optimised_schedules: list[pygad.Schedule], server_id, db_cur): +def _assign_new_schedules(optimised_schedules: Schedule, db_cur): """Assign new schedules based on the optimal schedule found.""" schedule_details_list = [] @@ -131,14 +131,13 @@ def _assign_new_schedules(optimised_schedules: list[pygad.Schedule], server_id, schedule_ids.append(schedule.schedule_id) scheduler.update_schedule_details_bulk(db_cur=db_cur, schedule_list=schedule_details_list, reason='Smart Schedule Optimization') - scheduler.snapshot_schedules(db_cur, schedule_ids, operation_type='BULK UPDATE', server_id=server_id, reason='Smart Schedule Optimization') @utils.named_exception_handler("smart_schedule") -def main(server_id=None, dbname=None, ga_config=None, rollback=False, schedule_id: Optional[str] = None, full=False, previous=False, snapshot_id: Optional[int] = None): +def main(server_id=None, dbname=None, ga_config=None, rollback=False, schedule_id: Optional[str] = None, full=False, previous=False): if rollback: - print("Initiating rollback schedules.") - smart_schedule_rollback.main(server_id=server_id, schedule_id=schedule_id, dbname=dbname, full=full, previous=previous, snapshot_id=snapshot_id) + print("Initiating rollback of schedules...") + smart_schedule_rollback.main(server_id=server_id, schedule_id=schedule_id, dbname=dbname, full=full, previous=previous) return if server_id and type(server_id) != int: raise TypeError(f"server_id should be int not {type(server_id)}") @@ -153,13 +152,17 @@ def main(server_id=None, dbname=None, ga_config=None, rollback=False, schedule_i main(server_id=id[0], dbname=dbname, ga_config=ga_config) else: + if not scheduler.validate_server_id(db_cur, server_id=server_id): + print(f"No valid server with server_id={server_id} does not exist in the database") + sys.exit(1) + # Get schedules for the server_id print("\n-----------------Schedule Setup----------------------") schedule_ids = _get_schedules_per_server(server_id=server_id, db_cur=db_cur) print(f"Found {len(schedule_ids)} schedules for server_id {server_id}") # Build schedule objects - schedules = _create_schedule_objects(schedule_ids, db_cur=db_cur, server_id=server_id) + schedules = _create_schedule_objects(schedule_ids, db_cur=db_cur) if not schedules: print("No valid schedules found to optimize.") sys.exit(1) @@ -168,7 +171,7 @@ def main(server_id=None, dbname=None, ga_config=None, rollback=False, schedule_i try: print("\n------------Starting Optimisation-----------------") - blocklist_schedule_ids = scheduler.get_blocklisted_schedule_ids(db_cur, server_id=server_id) + blocklist_schedule_ids = scheduler.get_blocklisted_schedule_ids(db_cur) print(f"blocklisted schedule IDs that will be excluded from optimization: {blocklist_schedule_ids}") ga = pygad.GAPyGADScheduler(config=ga_config, blocklist_schedule_ids=blocklist_schedule_ids) print("Running PyGAD solver ...") @@ -179,7 +182,9 @@ def main(server_id=None, dbname=None, ga_config=None, rollback=False, schedule_i print("\n-------------Updating Schedules------------------") if peak_usage < initial_fitness: # Only update schedules if we have found an improvement - _assign_new_schedules(optimised_schedules, server_id=server_id, db_cur=db_cur) + _assign_new_schedules(optimised_schedules, db_cur=db_cur) + optimised_schedule_ids = [schedule.schedule_id for schedule in optimised_schedules if schedule.shifted] + scheduler.snapshot_schedules(db_cur, optimised_schedule_ids, server_id=server_id, computed_usage=peak_usage, reason='Smart Schedule Optimization') else: print(f"No improvement found for server_id {server_id}. Current peak usage: {initial_fitness}, Optimized peak usage: {peak_usage}. No schedule updates will be made.") print("--------------------------------------------------\n") diff --git a/cicada/commands/smart_schedule_rollback.py b/cicada/commands/smart_schedule_rollback.py index e316c01..e9a9a7e 100644 --- a/cicada/commands/smart_schedule_rollback.py +++ b/cicada/commands/smart_schedule_rollback.py @@ -3,8 +3,32 @@ from cicada.lib import scheduler + +def _rollback_to_previous_snapshot(db_cur, server_id): + """ + Roll back to the previous snapshot for a given server_id. If no previous snapshot exists, perform a full rollback. + """ + print(f"\n [Rolling back server {server_id}]") + snapshots = scheduler.retrieve_snapshots(db_cur, server_id) + current_snapshot = snapshots[0][0] if snapshots and len(snapshots) > 0 else None + previous_snapshot = snapshots[1][0] if snapshots and len(snapshots) > 1 else None + + if current_snapshot is not None: + scheduler.reset_schedule_backups(db_cur, snapshot_id=current_snapshot) + scheduler.remove_snapshot(db_cur, current_snapshot) + + if previous_snapshot is not None: + scheduler.restore_previous_schedules(db_cur, server_id=server_id, snapshot_id=previous_snapshot) + scheduler.reset_schedule_backups(db_cur, snapshot_id=previous_snapshot) + else: + print("No previous snapshot. Commencing full rollback instead...") + scheduler.full_rollback(db_cur, server_id) + schedule_ids = scheduler.get_all_schedule_ids_per_server(db_cur, server_id) + scheduler.snapshot_schedules(db_cur, schedule_ids=schedule_ids, server_id=server_id, reason='Full Rollback') + + @utils.named_exception_handler("smart_schedule_rollback") -def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbname=None, full=False, previous=False, snapshot_id: Optional[int] = None): +def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbname=None, full=False, previous=False): """ Roll back schedules after smart_schedule optimization. @@ -19,8 +43,6 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn If True, set smart_interval_mask to NULL (revert to original interval_mask). previous: bool If True, restore to the most recent snapshot (step back one optimization). - snapshot_id: Optional[int] - Specific snapshot_id to restore to (used with --previous). """ if type(server_id) != int and server_id is not None: raise TypeError(f"server_id needs to be of type int. {type(server_id)}") @@ -41,13 +63,20 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn print("\n------------Starting Full Rollback-----------------") scheduler.full_rollback(db_cur, server_id, schedule_id) print("Full rollback successful\n") + sever_ids = [server_id] if server_id else scheduler.get_all_server_ids(db_cur) + for server_id in sever_ids: + schedule_ids = scheduler.get_all_schedule_ids_per_server(db_cur, server_id) + scheduler.snapshot_schedules(db_cur, schedule_ids=schedule_ids, server_id=server_id, reason='Full Rollback') elif previous: print("\n------------Starting Rollback to Previous Snapshot-----------------") if not server_id: - raise ValueError("server_id must be provided for rollback to previous snapshot, rollback for individual schedules must be a full rollback") - scheduler.restore_previous_schedules(db_cur, server_id=server_id, snapshot_id=snapshot_id) - print("Rollback to previous snapshot successful\n") + print(f"Rolling back all servers...") + for server in scheduler.get_all_server_ids(db_cur): + server_id = server[0] + _rollback_to_previous_snapshot(db_cur, server_id) + else: + _rollback_to_previous_snapshot(db_cur, server_id) except Exception as e: print(f"Error during rollback: {e}") diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index 0ecc04e..ec97af7 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -80,7 +80,7 @@ def is_regular_schedule(self): """Check if the cron expression is a regular schedule that can be optimized by the GA """ try: schedule = croniter(self.interval_mask) - iters = [schedule.get_next(datetime.datetime) for _ in range(20)] + iters = [schedule.get_next(datetime.datetime) for _ in range(100)] freqs = [iters[i + 1] - iters[i] for i in range(len(iters) - 1)] if any(freq <= datetime.timedelta(minutes=1) for freq in freqs): return False return all(f == freqs[0] for f in freqs) diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index 395e361..953ee09 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -281,22 +281,22 @@ def update_schedule_details_bulk(db_cur, schedule_list, reason=None): return -def snapshot_schedules(db_cur, schedule_ids, operation_type=None, server_id=None, reason=None): +def snapshot_schedules(db_cur, schedule_ids, server_id=None, computed_usage=None, reason=None): """Create a snapshot of specific schedules with the same snapshot_id. Args: db_cur: Database cursor schedule_ids: List of schedule_ids to snapshot - operation_type: Type of operation (e.g., 'UPDATE', 'OPTIMIZE', 'SPREAD') server_id: server_id for the snapshot + computed_usage: Computed usage for the snapshot reason: Optional reason/context for the snapshot """ if not schedule_ids: return # Insert into snapshots table to get a new snapshot_id - sqlquery = "INSERT INTO snapshots (operation_type, reason, server_id) VALUES (%s, %s, %s) RETURNING snapshot_id" - db_cur.execute(sqlquery, (operation_type, reason, server_id)) + sqlquery = "INSERT INTO snapshots (reason, server_id, computed_usage) VALUES (%s, %s, %s) RETURNING snapshot_id" + db_cur.execute(sqlquery, (reason, server_id, computed_usage)) snapshot_id = db_cur.fetchone()[0] # Snapshot the specified schedules with the same snapshot_id @@ -513,7 +513,7 @@ def get_median_run_time(db_cur, schedule_id): def retrieve_snapshots(db_cur, server_id): """ - Retrieve all snapshots in reverse chronological order. + Retrieve all snapshots in reverse chronological order. Returns None if no snapshots exist. """ sqlquery = """ SELECT snapshot_id, snapshot_timestamp @@ -522,33 +522,10 @@ def retrieve_snapshots(db_cur, server_id): ORDER BY snapshot_timestamp DESC """ db_cur.execute(sqlquery, (server_id,)) - return db_cur.fetchall() + snapshots = db_cur.fetchall() + return snapshots if snapshots else None -def get_schedules_by_snapshot_id(db_cur, snapshot_id, server_id): - """ - Fetch a specific snapshot by ID. - """ - sqlquery = """ - SELECT - schedule_id, - interval_mask, - smart_interval_mask - FROM schedule_backups - WHERE snapshot_id = %s AND server_id = %s - """ - db_cur.execute(sqlquery, (snapshot_id, server_id)) - row = db_cur.fetchall() - - if not row: - return None - - return { - 'schedule_id': row[0], - 'interval_mask': row[1], - 'smart_interval_mask': row[2], - } - def full_rollback(db_cur, server_id=None, schedule_id=None): """ Roll back schedules to original interval_mask by setting smart_interval_mask to NULL for either a server_id or an individual schedule_id. @@ -556,11 +533,9 @@ def full_rollback(db_cur, server_id=None, schedule_id=None): server_id | schedule_id: Optional[int | str] [Mutually exclusive] Target server/schedule to roll back all schedules for. If not provided, will roll back all schedules for all servers. """ - if server_id and schedule_id: raise ValueError("Cannot specify both server_id and schedule_id for full rollback, please specify only one to rollback all schedules for a server or an individual schedule respectively") if server_id: - print(f"Rolling back schedules for server_id {server_id} to original interval_mask...") schedule_ids = [row[0] for row in get_all_schedule_ids_per_server(db_cur, server_id)] elif schedule_id: print(f"Rolling back schedule_id {schedule_id} to original interval_mask...") @@ -579,14 +554,13 @@ def full_rollback(db_cur, server_id=None, schedule_id=None): return - -def restore_previous_schedules(db_cur, server_id, snapshot_id=None): +def restore_previous_schedules(db_cur, server_id, snapshot_id): """ Restore schedules from snapshots. """ if not snapshot_id: - snapshot_id = retrieve_snapshots(db_cur, server_id)[0][0] - + raise ValueError("snapshot_id is required to restore previous schedules") + schedule_ids = get_all_schedule_ids_per_server(db_cur, server_id) print(f"Restoring schedules for server_id {server_id} from snapshot_id {snapshot_id}") @@ -601,19 +575,15 @@ def restore_previous_schedules(db_cur, server_id, snapshot_id=None): AND schedules.interval_mask = schedule_backups.interval_mask """ db_cur.execute(sqlquery, (server_id, snapshot_id)) - print(f"Schedules Restored: {[schedule_id for schedule_id in schedule_ids]}") + print(f"{len(schedule_ids)} Schedules restored") reset_schedule_backups(db_cur, snapshot_id=snapshot_id) return -def get_blocklisted_schedule_ids(db_cur, server_id=None): +def get_blocklisted_schedule_ids(db_cur): """Get a list of schedule_ids that are blocklisted from optimization""" sqlquery = "SELECT schedule_id FROM schedule_blocklist" - if server_id: - sqlquery += " WHERE server_id = %s" - db_cur.execute(sqlquery, (server_id,)) - else: - db_cur.execute(sqlquery) + db_cur.execute(sqlquery) blocklist_schedule_ids = [row[0] for row in db_cur.fetchall()] return blocklist_schedule_ids @@ -622,13 +592,35 @@ def reset_schedule_backups(db_cur, snapshot_id=None, schedule_id=None): """Reset schedule_backups table by deleting all entries""" sqlquery_backups = "DELETE FROM schedule_backups WHERE 1=1" sqlquery_snapshots = "DELETE FROM snapshots WHERE 1=1" - if snapshot_id: - sqlquery_backups += " AND snapshot_id = %s" - sqlquery_snapshots += " AND snapshot_id = %s" if schedule_id: sqlquery_backups += " AND schedule_id = %s" - sqlquery_snapshots += " AND schedule_id = %s" + db_cur.execute(sqlquery_backups, (schedule_id,)) + elif snapshot_id: + sqlquery_backups += " AND snapshot_id = %s" + sqlquery_snapshots += " AND snapshot_id = %s" + db_cur.execute(sqlquery_backups, (snapshot_id,)) + db_cur.execute(sqlquery_snapshots, (snapshot_id,)) + else: + db_cur.execute(sqlquery_backups) + db_cur.execute(sqlquery_snapshots) + return + +def blocklist_schedule(db_cur, schedule_id, reason=None): + """Add a schedule_id to the blocklist""" + sqlquery = "INSERT INTO schedule_blocklist (schedule_id, reason) VALUES (%s, %s) ON CONFLICT DO NOTHING" + db_cur.execute(sqlquery, (schedule_id, reason)) + return + +def remove_snapshot(db_cur, snapshot_id): + """Remove a snapshot_id from the snapshots table""" + sqlquery = "DELETE FROM snapshots WHERE snapshot_id = %s" + db_cur.execute(sqlquery, (snapshot_id,)) + return - db_cur.execute(sqlquery_backups, (snapshot_id, schedule_id) if snapshot_id and schedule_id else (schedule_id,) if schedule_id else (snapshot_id,) if snapshot_id else None) - db_cur.execute(sqlquery_snapshots, (snapshot_id, schedule_id) if snapshot_id and schedule_id else (schedule_id,) if schedule_id else (snapshot_id,) if snapshot_id else None) - return \ No newline at end of file + +def validate_server_id(db_cur, server_id): + """Validate that a server_id exists in the servers table""" + sqlquery = "SELECT COUNT(1) FROM servers WHERE server_id = %s" + db_cur.execute(sqlquery, (server_id,)) + row = db_cur.fetchone() + return (row[0] == 1) \ No newline at end of file diff --git a/setup/create_test_tap_setup.sql b/setup/create_test_tap_setup.sql index c85dd53..d42bb88 100644 --- a/setup/create_test_tap_setup.sql +++ b/setup/create_test_tap_setup.sql @@ -519,6 +519,6 @@ INSERT INTO public.schedules (SERVER_ID, SCHEDULE_ID, INTERVAL_MASK, EXEC_COMMAN INSERT INTO public.schedules (SERVER_ID, SCHEDULE_ID, INTERVAL_MASK, EXEC_COMMAND, IS_ENABLED, IS_RUNNING) VALUES (6, 'cumulative_limit', '*/30 * * * *', 'dummy_command', 1, 1) ON CONFLICT DO NOTHING; -INSERT INTO public.schedule_blocklist (SCHEDULE_ID, SERVER_ID, REASON) VALUES ('plastic_full_table', 3, 'test for blocklist functionality') ON CONFLICT DO NOTHING; +INSERT INTO public.schedule_blocklist (SCHEDULE_ID, REASON) VALUES ('plastic_full_table', 'test for blocklist functionality') ON CONFLICT DO NOTHING; COMMIT TRANSACTION; diff --git a/setup/schema.sql b/setup/schema.sql index f995bdc..67b5bc9 100644 --- a/setup/schema.sql +++ b/setup/schema.sql @@ -201,7 +201,7 @@ CREATE TABLE IF NOT EXISTS public.snapshots snapshot_id serial NOT NULL, snapshot_timestamp timestamp without time zone NOT NULL DEFAULT (now())::timestamp without time zone, server_id integer, - operation_type character varying(20), + computed_usage character varying(255), reason character varying(255), CONSTRAINT snapshots_pkey PRIMARY KEY (snapshot_id) ) @@ -233,7 +233,6 @@ WITH ( CREATE TABLE IF NOT EXISTS public.schedule_blocklist ( schedule_id character varying(255) NOT NULL, - server_id integer, timestamp timestamp without time zone NOT NULL DEFAULT (now())::timestamp without time zone, reason character varying(255), CONSTRAINT schedule_blocklist_pkey PRIMARY KEY (schedule_id) From a983e6d7fd890a0441ea9ff128c93675af70f4e5 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Thu, 21 May 2026 11:01:14 +0100 Subject: [PATCH 33/53] fix incorrect rollback routing --- cicada/cli.py | 13 ++++++------- cicada/commands/smart_schedule.py | 7 +------ 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/cicada/cli.py b/cicada/cli.py index 2445092..cf8f38e 100644 --- a/cicada/cli.py +++ b/cicada/cli.py @@ -8,6 +8,7 @@ from cicada.lib import utils from cicada.commands import register_server +from cicada.commands import smart_schedule_rollback from cicada.commands import list_server_schedules from cicada.commands import exec_server_schedules from cicada.commands import upsert_schedule @@ -388,13 +389,11 @@ def smart_schedule(): }, ) elif args.action == "rollback": - smart_schedule.main( - server_id=getattr(args, 'server_id', None), - schedule_id=getattr(args, 'schedule_id', None), - rollback=True, - full=getattr(args, 'full', False), - previous=getattr(args, 'previous', False), - ) + smart_schedule_rollback.main( + server_id=getattr(args, 'server_id', None), + schedule_id=getattr(args, 'schedule_id', None), + full=getattr(args, 'full', False), + previous=getattr(args, 'previous', False)) elif args.action == "blocklist": blocklist_schedule_cmd.main( schedule_id=args.schedule_id, diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 45391bb..780bbd7 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -134,12 +134,7 @@ def _assign_new_schedules(optimised_schedules: Schedule, db_cur): @utils.named_exception_handler("smart_schedule") -def main(server_id=None, dbname=None, ga_config=None, rollback=False, schedule_id: Optional[str] = None, full=False, previous=False): - if rollback: - print("Initiating rollback of schedules...") - smart_schedule_rollback.main(server_id=server_id, schedule_id=schedule_id, dbname=dbname, full=full, previous=previous) - return - +def main(server_id=None, dbname=None, ga_config=None): if server_id and type(server_id) != int: raise TypeError(f"server_id should be int not {type(server_id)}") db_conn = postgres.db_cicada(dbname) From 18349efe2d2754380bedf9f6bb11b387eabcbbe3 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Thu, 21 May 2026 15:40:12 +0100 Subject: [PATCH 34/53] Add Smart Schedule Tests --- cicada/lib/SmartScheduling/domain.py | 6 +- cicada/lib/scheduler.py | 6 + tests/test_functional_cli_entrypoint.py | 12 +- tests/test_smart_scheduling.py | 934 ++++++++++++++++++++++++ 4 files changed, 949 insertions(+), 9 deletions(-) create mode 100644 tests/test_smart_scheduling.py diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index ec97af7..2e134f4 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -24,10 +24,10 @@ def __init__(self, details, db_cur): self.schedule_id = details['schedule_id'] self.server_id = details['server_id'] self.interval_mask = details['interval_mask'] - self.current_interval_mask = details['smart_interval_mask'] if details['smart_interval_mask'] is not None else self.interval_mask + self.current_interval_mask = details.get('smart_interval_mask') if details.get('smart_interval_mask') is not None else self.interval_mask self.determine_attributes(db_cur) - if details['blocklisted'] is not None: - self.blocklisted = details['blocklisted'] + if details.get('blocklisted') is not None: + self.blocklisted = details.get('blocklisted') def determine_attributes(self, db_cur): """Determine frequency and average runtime from interval_mask and scheduler module""" diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index 953ee09..65ec361 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -611,6 +611,12 @@ def blocklist_schedule(db_cur, schedule_id, reason=None): db_cur.execute(sqlquery, (schedule_id, reason)) return +def remove_blocklist_schedule(db_cur, schedule_id): + """Remove a schedule_id from the blocklist""" + sqlquery = "DELETE FROM schedule_blocklist WHERE schedule_id = %s" + db_cur.execute(sqlquery, (schedule_id,)) + return + def remove_snapshot(db_cur, snapshot_id): """Remove a snapshot_id from the snapshots table""" sqlquery = "DELETE FROM snapshots WHERE snapshot_id = %s" diff --git a/tests/test_functional_cli_entrypoint.py b/tests/test_functional_cli_entrypoint.py index 82147b8..43cf138 100644 --- a/tests/test_functional_cli_entrypoint.py +++ b/tests/test_functional_cli_entrypoint.py @@ -21,9 +21,9 @@ def test_cicada_help(): positional arguments: command register_server , list_server_schedules , exec_server_schedules - , show_schedule , upsert_schedule , exec_schedule , - spread_schedules , archive_schedule_log , ping_slack , - list_schedule_ids , delete_schedule , version + , smart_schedule , show_schedule , upsert_schedule , + exec_schedule , spread_schedules , archive_schedule_log , + ping_slack , list_schedule_ids , delete_schedule , version optional arguments: -h, --help show this help message and exit @@ -41,9 +41,9 @@ def test_bad_command(): positional arguments: command register_server , list_server_schedules , exec_server_schedules - , show_schedule , upsert_schedule , exec_schedule , - spread_schedules , archive_schedule_log , ping_slack , - list_schedule_ids , delete_schedule , version + , smart_schedule , show_schedule , upsert_schedule , + exec_schedule , spread_schedules , archive_schedule_log , + ping_slack , list_schedule_ids , delete_schedule , version optional arguments: -h, --help show this help message and exit diff --git a/tests/test_smart_scheduling.py b/tests/test_smart_scheduling.py new file mode 100644 index 0000000..4d119d1 --- /dev/null +++ b/tests/test_smart_scheduling.py @@ -0,0 +1,934 @@ +"""Tests for smart scheduling and rollback functionality""" + +import croniter +import pytest +import os +import datetime +from unittest.mock import Mock, MagicMock, patch, call +import numpy as np +import psycopg2 + +from cicada.lib.SmartScheduling.domain import Schedule +from cicada.lib.SmartScheduling.config import GAConfig +from cicada.lib.SmartScheduling.evaluation import evaluate_usage_and_peak +import cicada.commands.smart_schedule as smart_schedule +from cicada.lib.SmartScheduling.pygad import GAPyGADScheduler +from cicada.lib import scheduler + + +@pytest.fixture(scope="session", autouse=True) +def get_env_vars(): + """get_env_vars""" + + pytest.cicada_home = os.environ.get("CICADA_HOME") + + pytest.db_host = os.environ.get("DB_POSTGRES_HOST") + pytest.db_port = os.environ.get("DB_POSTGRES_PORT") + pytest.db_user = os.environ.get("DB_POSTGRES_USER") + pytest.db_pass = os.environ.get("DB_POSTGRES_PASS") + + pytest.db_test = f"pytest_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S_%f')}" + + +@pytest.fixture() +def db_setup(get_env_vars): + """db_setup""" + + # Create the test_db + pg_conn = psycopg2.connect( + host=pytest.db_host, + port=pytest.db_port, + user=pytest.db_user, + password=pytest.db_pass, + database="postgres", + ) + pg_conn.autocommit = True + pg_cur = pg_conn.cursor() + pg_cur.execute(f"CREATE DATABASE {pytest.db_test}") + pg_cur.close() + pg_conn.close() + + # Create test_db structure + test_conn = psycopg2.connect( + host=pytest.db_host, + port=pytest.db_port, + user=pytest.db_user, + password=pytest.db_pass, + database=pytest.db_test, + ) + test_conn.autocommit = True + test_cur = test_conn.cursor() + test_cur.execute(open(f"{pytest.cicada_home}/setup/schema.sql", "r", encoding="utf-8").read()) + test_cur.close() + test_conn.close() + + yield + + # Cleanup: terminate all connections and drop test database + pg_conn = psycopg2.connect( + host=pytest.db_host, + port=pytest.db_port, + user=pytest.db_user, + password=pytest.db_pass, + database="postgres", + ) + pg_conn.autocommit = True + pg_cur = pg_conn.cursor() + # Terminate all connections to the test database + pg_cur.execute( + f""" + SELECT pg_terminate_backend(pg_stat_activity.pid) + FROM pg_stat_activity + WHERE pg_stat_activity.datname = '{pytest.db_test}' + AND pid <> pg_backend_pid() + """ + ) + pg_cur.close() + pg_conn.close() + + # Now drop the database + pg_conn = psycopg2.connect( + host=pytest.db_host, + port=pytest.db_port, + user=pytest.db_user, + password=pytest.db_pass, + database="postgres", + ) + pg_conn.autocommit = True + pg_cur = pg_conn.cursor() + pg_cur.execute(f"DROP DATABASE {pytest.db_test}") + pg_cur.close() + pg_conn.close() + + +def query_test_db(query): + """Run a SQL query in a postgres database""" + rows = [] + conn = None + try: + conn = psycopg2.connect( + host=pytest.db_host, + port=pytest.db_port, + user=pytest.db_user, + password=pytest.db_pass, + database=pytest.db_test, + ) + conn.set_session(readonly=False, autocommit=True) + + cur = conn.cursor() + cur.execute(query) + + if cur.rowcount > 0 and cur.description: + rows = cur.fetchall() + + cur.close() + finally: + if conn: + conn.close() + return rows + + +def get_db_cursor(): + """Get a cursor to the test database""" + conn = psycopg2.connect( + host=pytest.db_host, + port=pytest.db_port, + user=pytest.db_user, + password=pytest.db_pass, + database=pytest.db_test, + ) + conn.set_session(readonly=False, autocommit=True) + return conn, conn.cursor() + + +class TestEvaluateUsageAndPeak: + """Tests for evaluate_usage_and_peak function""" + + def test_evaluate_single_schedule_no_overlap(self, db_setup): + """Test usage evaluation with a single schedule that doesn't overlap""" + db_conn, db_cur = get_db_cursor() + try: + schedule_details = { + "schedule_id": 1, + "server_id": 1, + "interval_mask": "0 * * * *", + } + test_schedule = Schedule(schedule_details, db_cur) + + start_blocks = [0] + usage, peak = evaluate_usage_and_peak(start_blocks, [test_schedule]) + + assert usage.shape == (1440,) + assert peak == 1 + for i in range(24): + mins = i * 60 + assert (usage[mins : mins + 5] == 1).all() + assert (usage[mins + 5 : (i + 1) * 60] == 0).all() + finally: + db_cur.close() + db_conn.close() + + def test_evaluate_multiple_schedules_no_overlap(self, db_setup): + """Test evaluation with multiple schedules that don't overlap""" + db_conn, db_cur = get_db_cursor() + try: + schedule1_details = { + "schedule_id": 1, + "server_id": 1, + "interval_mask": "0 * * * *", + } + schedule1 = Schedule(schedule1_details, db_cur) + schedule1.frequency_minutes = 60 + schedule1.median_runtime_minutes = 5 + + schedule2_details = { + "schedule_id": 2, + "server_id": 1, + "interval_mask": "30 * * * *", + } + schedule2 = Schedule(schedule2_details, db_cur) + schedule2.frequency_minutes = 60 + schedule2.median_runtime_minutes = 5 + + start_blocks = [0, 30] + usage, peak = evaluate_usage_and_peak(start_blocks, [schedule1, schedule2]) + + assert (usage[0:5] == 1).all() + assert (usage[6:30] == 0.0).all() + assert (usage[30:35] == 1).all() + assert (usage[35:60] == 0.0).all() + assert peak == 1 + finally: + db_cur.close() + db_conn.close() + + def test_evaluate_overlapping_schedules(self, db_setup): + """Test evaluation with overlapping schedules""" + db_conn, db_cur = get_db_cursor() + try: + schedule1_details = { + "schedule_id": 1, + "server_id": 1, + "interval_mask": "0 * * * *", + } + schedule1 = Schedule(schedule1_details, db_cur) + schedule1.frequency_minutes = 60 + schedule1.median_runtime_minutes = 10 + + schedule2_details = { + "schedule_id": 2, + "server_id": 1, + "interval_mask": "0 * * * *", + } + schedule2 = Schedule(schedule2_details, db_cur) + schedule2.frequency_minutes = 60 + schedule2.median_runtime_minutes = 5 + + start_blocks = [0, 0] + usage, peak = evaluate_usage_and_peak(start_blocks, [schedule1, schedule2]) + + assert peak == 2 + assert usage[0] == 2 + assert usage[5] == 1 + finally: + db_cur.close() + db_conn.close() + + def test_evaluate_wrapping_around_day(self, db_setup): + """Test that schedules wrapping around midnight work correctly""" + db_conn, db_cur = get_db_cursor() + try: + schedule_details = { + "schedule_id": 1, + "server_id": 1, + "interval_mask": "0 0 * * *", + } + test_schedule = Schedule(schedule_details, db_cur) + test_schedule.frequency_minutes = 60 + test_schedule.median_runtime_minutes = 5 + start_blocks = [1430] # (1430 mins = 23:50) + + # Should throw an assertion error that the start block is too late for the frequency of the schedule + with pytest.raises(ValueError): + evaluate_usage_and_peak(start_blocks, [test_schedule]) + finally: + db_cur.close() + db_conn.close() + + + +class TestScheduleDomain: + """Tests for Schedule domain object""" + + def test_schedule_initialization(self, db_setup): + """Test Schedule object initialization""" + db_conn, db_cur = get_db_cursor() + try: + schedule_details = { + "schedule_id": "test-id-1", + "server_id": 5, + "interval_mask": "0 * * * *", + } + test_schedule = Schedule(schedule_details, db_cur) + + assert test_schedule.schedule_id == "test-id-1" + assert test_schedule.server_id == 5 + assert test_schedule.interval_mask == "0 * * * *" + finally: + db_cur.close() + db_conn.close() + + def test_schedule_frequency_hourly(self, db_setup): + """Test frequency determination for hourly cron""" + db_conn, db_cur = get_db_cursor() + try: + schedule_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "0 * * * *", # Every hour + } + test_schedule = Schedule(schedule_details, db_cur) + + assert test_schedule.frequency_minutes == 60 + finally: + db_cur.close() + db_conn.close() + + def test_schedule_frequency_daily(self, db_setup): + """Test frequency determination for daily cron""" + db_conn, db_cur = get_db_cursor() + try: + schedule_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "0 0 * * *", + } + test_schedule = Schedule(schedule_details, db_cur) + + assert test_schedule.frequency_minutes == 1440 + finally: + db_cur.close() + db_conn.close() + + def test_schedule_is_unsupported_irregular_cron(self, db_setup): + """Test that schedules with irregular cron expressions are marked as unsupported""" + db_conn, db_cur = get_db_cursor() + try: + schedule_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "0-15 */9 * * *", + } + test_schedule = Schedule(schedule_details, db_cur) + + assert test_schedule.is_unsupported() + assert not test_schedule.frequency_is_supported() + assert not test_schedule.is_regular_schedule() + finally: + db_cur.close() + db_conn.close() + + def test_schedule_is_unsupported_low_frequency(self, db_setup): + """Test that schedules with unsupported low frequencies are marked as unsupported""" + db_conn, db_cur = get_db_cursor() + try: + schedule_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "0 0 * * 0", # Weekly + } + test_schedule = Schedule(schedule_details, db_cur) + + assert test_schedule.is_unsupported() + finally: + db_cur.close() + db_conn.close() + + def test_schedule_is_regular_schedule_hourly(self, db_setup): + """Test that hourly schedules are recognized as regular""" + db_conn, db_cur = get_db_cursor() + try: + schedule_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "0 * * * *", + } + test_schedule = Schedule(schedule_details, db_cur) + + assert test_schedule.is_regular_schedule() + finally: + db_cur.close() + db_conn.close() + + def test_schedule_is_regular_schedule_every_15_mins(self, db_setup): + """Test that every-15-minute schedules are recognized as regular""" + db_conn, db_cur = get_db_cursor() + try: + schedule_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "*/15 * * * *", + } + test_schedule = Schedule(schedule_details, db_cur) + + assert test_schedule.is_regular_schedule() + finally: + db_cur.close() + db_conn.close() + + def test_schedule_is_regular_schedule_daily(self, db_setup): + """Test that daily schedules are recognized as regular""" + db_conn, db_cur = get_db_cursor() + try: + schedule_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "0 0 * * *", + } + test_schedule = Schedule(schedule_details, db_cur) + + assert test_schedule.is_regular_schedule() + finally: + db_cur.close() + db_conn.close() + + def test_schedule_45_min_schedule_is_supported(self, db_setup): + """Test that 45-minute frequency schedules are recognized as supported""" + db_conn, db_cur = get_db_cursor() + try: + schedule_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "*/45 * * * *", + } + test_schedule = Schedule(schedule_details, db_cur) + + assert not test_schedule.is_unsupported() + # Fails due to cronitor issue -> means any */45 gets missed out of the smart scheduling + finally: + db_cur.close() + db_conn.close() + + def test_schedule_is_irregular_schedule_weekdays(self, db_setup): + """Test that weekday-only schedules are marked as irregular""" + db_conn, db_cur = get_db_cursor() + try: + schedule_details = { + "schedule_id": "test-id-1", + "server_id": 1, + "interval_mask": "0 9 * * 1-5", # Weekdays only + } + test_schedule = Schedule(schedule_details, db_cur) + + assert not test_schedule.is_regular_schedule() + finally: + db_cur.close() + db_conn.close() + + +class TestGAPyGADScheduler: + """Tests for GAPyGADScheduler""" + + def test_custom_config(self): + """Test GAConfig with custom values""" + config = GAConfig( + num_generations=50, + sol_per_pop=100, + random_seed=42, + ) + assert config.num_generations == 50 + assert config.sol_per_pop == 100 + assert config.random_seed == 42 + assert config.num_parents_mating == 10 + assert config.mutation_percent_genes == 20 + assert config.parent_selection_type == "rank" + assert config.crossover_type == "uniform" + assert config.mutation_type == "random" + assert config.blocklist_schedule_ids == [] + + def test_scheduler_uses_default_config_when_optional_config_is_missing(self): + ga_scheduler = GAPyGADScheduler() + + assert ga_scheduler.cfg == GAConfig() + assert ga_scheduler.cfg.num_generations == 20 + + def test_scheduler_initialization_custom_config(self): + """Test scheduler initialization with custom config""" + config = {"num_generations": 30} + ga_scheduler = GAPyGADScheduler(config) + + assert ga_scheduler.cfg.num_generations == 30 + + def test_scheduler_initialization_filters_none_values(self): + """Test that None values are filtered out when initializing config""" + config = {"num_generations": None} + ga_scheduler = GAPyGADScheduler(config) + + assert ga_scheduler.cfg.num_generations == 20 + + +class TestSchedulerDatabaseFunctions: + """Tests for scheduler database functions (rollback/snapshot)""" + + def test_get_blocklisted_schedule_ids_empty(self, db_setup): + """Test retrieving blocklisted schedule IDs when none exist""" + db_conn, db_cur = get_db_cursor() + try: + # Initially should have the 10 admin schedules + result = scheduler.get_blocklisted_schedule_ids(db_cur) + assert len(result) == 18 + finally: + db_cur.close() + db_conn.close() + + def test_blocklist_schedule(self, db_setup): + """Test blocklisting a schedule""" + db_conn, db_cur = get_db_cursor() + try: + # First register a server and create a schedule + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server', 'test-server.local', 'G')""" + ) + query_test_db( + """INSERT INTO schedules (schedule_id, server_id, interval_mask, exec_command) + VALUES ('test-sched-1', 1, '0 * * * *', 'echo test')""" + ) + + # Blocklist the schedule + scheduler.blocklist_schedule(db_cur, "test-sched-1", reason="Testing") + + # Verify it's blocklisted + result = scheduler.get_blocklisted_schedule_ids(db_cur) + assert len(result) >= 1 + assert "test-sched-1" in result + finally: + db_cur.close() + db_conn.close() + + def test_snapshot_schedules_basic(self, db_setup): + """Test snapshotting schedules""" + db_conn, db_cur = get_db_cursor() + query_test_db("DELETE FROM schedules") + + try: + # Register a server and create schedules + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server', 'test-server.local', 'G')""" + ) + query_test_db( + """INSERT INTO schedules (schedule_id, server_id, interval_mask, smart_interval_mask, exec_command) + VALUES ('test-sched-1', 1, '0 * * * *', '30 * * * *', 'echo test')""" + ) + + # Snapshot the schedule + scheduler.snapshot_schedules(db_cur, ["test-sched-1"], reason="Test optimization") + + # Verify snapshot was created + snapshots = query_test_db("SELECT snapshot_id FROM snapshots WHERE reason = 'Test optimization'") + assert len(snapshots) > 0 + finally: + db_cur.close() + db_conn.close() + + def test_full_rollback_with_schedule_id(self, db_setup): + """Test full rollback for a specific schedule""" + db_conn, db_cur = get_db_cursor() + try: + # Register a server and create a schedule with smart_interval_mask set + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server', 'test-server.local', 'B')""" + ) + query_test_db( + """INSERT INTO schedules (schedule_id, server_id, interval_mask, smart_interval_mask, exec_command) + VALUES ('test-sched-1', 1, '0 * * * *', '30 * * * *', 'echo test')""" + ) + + # Perform full rollback + scheduler.full_rollback(db_cur, schedule_id="test-sched-1") + + # Verify smart_interval_mask is set to NULL + result = query_test_db( + "SELECT smart_interval_mask FROM schedules WHERE schedule_id = 'test-sched-1'" + ) + assert result[0][0] is None + finally: + db_cur.close() + db_conn.close() + + def test_restore_previous_schedules_requires_snapshot_id(self): + """Test that restore_previous_schedules requires snapshot_id""" + db_cur = Mock() + + with pytest.raises(TypeError): + scheduler.restore_previous_schedules(db_cur) + + +class TestEndToEndSmartScheduling: + """Integration tests for end-to-end smart scheduling workflow""" + + def test_create_schedules_from_details(self, db_setup): + """Test creating multiple Schedule objects from details""" + db_conn, db_cur = get_db_cursor() + try: + schedules_data = [ + { + "schedule_id": "sched-1", + "server_id": 1, + "interval_mask": "0 * * * *", + }, + { + "schedule_id": "sched-2", + "server_id": 1, + "interval_mask": "*/30 * * * *", + }, + ] + + schedules = [Schedule(data, db_cur) for data in schedules_data] + + assert len(schedules) == 2 + assert schedules[0].schedule_id == "sched-1" + assert schedules[1].schedule_id == "sched-2" + finally: + db_cur.close() + db_conn.close() + + def test_snapshot_schedules(self, db_setup): + """Test the snapshot_schedules function""" + db_conn, db_cur = get_db_cursor() + try: + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server', 'test-server.local', '192.168.1.1')""" + ) + query_test_db( + """INSERT INTO schedules (schedule_id, server_id, interval_mask, smart_interval_mask, exec_command) + VALUES ('sched-1', 1, '0 * * * *', '0 * * * *', 'echo test')""" + ) + query_test_db( + """INSERT INTO schedules (schedule_id, server_id, interval_mask, smart_interval_mask, exec_command) + VALUES ('sched-2', 1, '*/30 * * * *', '*/30 * * * *', 'echo test')""" + ) + + schedule_ids = ["sched-1", "sched-2"] + scheduler.snapshot_schedules(db_cur, schedule_ids, reason="Test optimization") + + # Verify that snapshots were created + snapshots = query_test_db("SELECT snapshot_id FROM snapshots WHERE reason = 'Test optimization'") + assert len(snapshots) > 0 + finally: + db_cur.close() + db_conn.close() + + def test_retrieve_snapshots(self, db_setup): + """Test retrieving schedule snapshots""" + db_conn, db_cur = get_db_cursor() + try: + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server', 'test-server.local', '192.168.1.1')""" + ) + query_test_db( + """INSERT INTO schedules (schedule_id, server_id, interval_mask, smart_interval_mask, exec_command) + VALUES ('test-sched', 1, '0 * * * *', '30 * * * *', 'echo test')""" + ) + query_test_db( + """INSERT INTO snapshots (snapshot_id, server_id, reason, snapshot_timestamp) + VALUES (1, 1, 'GA optimization', NOW())""" + ) + + snapshots = scheduler.retrieve_snapshots(db_cur, 1) + + assert len(snapshots) > 0 + finally: + db_cur.close() + db_conn.close() + + def test_multiple_overlapping_schedules_evaluation(self, db_setup): + """Test evaluating usage for multiple overlapping schedules""" + db_conn, db_cur = get_db_cursor() + try: + # Create 3 schedules with different patterns + schedules = [] + for i in range(3): + schedule_data = { + "schedule_id": f"sched-{i}", + "server_id": 1, + "interval_mask": "0 * * * *" if i == 0 else f"*/{15 * (i + 1)} * * * *", + } + test_schedule = Schedule(schedule_data, db_cur) + test_schedule.frequency_minutes = 60 + test_schedule.median_runtime_minutes = 5 + schedules.append(test_schedule) + + # Stagger start times to create overlaps + start_blocks = [0, 10, 20] + usage, peak = evaluate_usage_and_peak(start_blocks, schedules) + + assert peak > 0.3 # Should have some overlapping usage + assert usage.shape == (1440,) + finally: + db_cur.close() + db_conn.close() + + +class TestSmartSchedulingCommand: + """Tests for the smart scheduling command""" + + def test_smart_scheduling_frequency_unchanged_hourly_schedule(self, db_setup): + """Test that the frequency of the schedule remains unchanged after smart scheduling""" + db_conn, db_cur = get_db_cursor() + try: + hourly_schedule_details = { + "schedule_id": "test-schedule-1", + "server_id": 1, + "interval_mask": "0 * * * *", + } + hourly_schedule = Schedule(hourly_schedule_details, db_cur) + hourly_schedule.shifted = True + hourly_schedule.start_time_mins = 15 + + smart_schedule._update_schedule_cron(hourly_schedule) + assert hourly_schedule.smart_interval_mask == "15 * * * *" + assert hourly_schedule.interval_mask == "0 * * * *" + assert hourly_schedule.frequency_minutes == 60 + + hourly_schedule.determine_attributes(db_cur) + assert hourly_schedule.is_regular_schedule() + assert hourly_schedule.frequency_minutes == 60 + finally: + db_cur.close() + db_conn.close() + + def test_smart_scheduling_frequency_unchanged_fifteen_min_schedule(self, db_setup): + """Test that the frequency of the schedule remains unchanged after smart scheduling""" + db_conn, db_cur = get_db_cursor() + try: + fifteen_min_schedule_details = { + "schedule_id": "test-schedule-2", + "server_id": 1, + "interval_mask": "*/15 * * * *", + } + fifteen_min_schedule = Schedule(fifteen_min_schedule_details, db_cur) + fifteen_min_schedule.shifted = True + fifteen_min_schedule.start_time_mins = 3 + + smart_schedule._update_schedule_cron(fifteen_min_schedule) + assert fifteen_min_schedule.smart_interval_mask == "3-59/15 * * * *" + assert fifteen_min_schedule.frequency_minutes == 15 + + fifteen_min_schedule.determine_attributes(db_cur) + assert fifteen_min_schedule.is_regular_schedule() + assert fifteen_min_schedule.frequency_minutes == 15 + finally: + db_cur.close() + db_conn.close() + + def test_gene_space_constraints(self, db_setup): + """Test that the gene space constraints are respected when updating schedule crons""" + db_conn, db_cur = get_db_cursor() + try: + schedule_details = { + "schedule_id": "test-schedule-3", + "server_id": 1, + "interval_mask": "*/45 * * * *", + } + test_schedule = Schedule(schedule_details, db_cur) + test_schedule.frequency_minutes = 45 + test_schedule.shifted = True + test_schedule.start_time_mins = 50 # Shift greater than frequency + + with pytest.raises(ValueError): + smart_schedule._update_schedule_cron(test_schedule) + finally: + db_cur.close() + db_conn.close() + + def test_smart_scheduling_gene_space_constraints_30_min(self, db_setup): + """Test that the gene space constraints don't create invalid cron expressions""" + db_conn, db_cur = get_db_cursor() + try: + ga_scheduler = GAPyGADScheduler() + + schedule_details = { + "schedule_id": "test-schedule-4", + "server_id": 1, + "interval_mask": "*/30 * * * *", + } + test_schedule = Schedule(schedule_details, db_cur) + gene_space = ga_scheduler._gene_space([test_schedule]) + + test_schedule.shifted = True + test_schedule.start_time_mins = gene_space[0][-1] + smart_schedule._update_schedule_cron(test_schedule) + assert test_schedule.smart_interval_mask == "29-59/30 * * * *" + assert croniter.croniter.is_valid(test_schedule.smart_interval_mask) + assert test_schedule.frequency_minutes == 30 + + test_schedule.start_time_mins = gene_space[0][1] + smart_schedule._update_schedule_cron(test_schedule) + assert test_schedule.smart_interval_mask == "1-59/30 * * * *" + assert croniter.croniter.is_valid(test_schedule.smart_interval_mask) + assert test_schedule.frequency_minutes == 30 + finally: + db_cur.close() + db_conn.close() + + def test_smart_scheduling_gene_space_constraints_daily(self, db_setup): + """Test that the gene space constraints don't create invalid cron expressions""" + db_conn, db_cur = get_db_cursor() + try: + ga_scheduler = GAPyGADScheduler() + + schedule_details = { + "schedule_id": "test-schedule-4", + "server_id": 1, + "interval_mask": "30 8 * * *", + } + test_schedule = Schedule(schedule_details, db_cur) + gene_space = ga_scheduler._gene_space([test_schedule]) + + test_schedule.shifted = True + test_schedule.start_time_mins = gene_space[0][-1] + smart_schedule._update_schedule_cron(test_schedule) + assert test_schedule.smart_interval_mask == "29 9 * * *" + assert croniter.croniter.is_valid(test_schedule.smart_interval_mask) + assert test_schedule.frequency_minutes == 1440 + + test_schedule.start_time_mins = gene_space[0][0] + smart_schedule._update_schedule_cron(test_schedule) + assert test_schedule.smart_interval_mask == "30 8 * * *" + assert croniter.croniter.is_valid(test_schedule.smart_interval_mask) + assert test_schedule.frequency_minutes == 1440 + + test_schedule.start_time_mins = gene_space[0][1] + smart_schedule._update_schedule_cron(test_schedule) + assert test_schedule.smart_interval_mask == "31 8 * * *" + assert croniter.croniter.is_valid(test_schedule.smart_interval_mask) + assert test_schedule.frequency_minutes == 1440 + finally: + db_cur.close() + db_conn.close() + + +class TestScheduleSnapshots: + """Tests for schedule snapshots functionality""" + + def test_snapshot_schedules(self, db_setup): + """Test snapshotting schedules and automatic schedule backup creation""" + db_conn, db_cur = get_db_cursor() + try: + # Create server and schedules first + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server', 'test-server.local', 'C')""" + ) + schedule_ids = ["test-schedule-1", "test-schedule-2"] + for sched_id in schedule_ids: + query_test_db( + f"""INSERT INTO schedules (schedule_id, server_id, interval_mask, smart_interval_mask, exec_command) + VALUES ('{sched_id}', 1, '0 * * * *', '30 * * * *', 'echo test')""" + ) + + # Snapshot the schedules + scheduler.snapshot_schedules(db_cur, schedule_ids, server_id = 1, reason="Test optimization") + + # Verify snapshot was created + snapshot_result = query_test_db("SELECT snapshot_id, server_id FROM snapshots WHERE reason = 'Test optimization'") + assert len(snapshot_result) > 0 + snapshot_id = snapshot_result[0][0] + server_id = snapshot_result[0][1] + assert server_id == 1 + + # Verify schedule backups exist for this snapshot + schedule_backups_result = query_test_db("SELECT schedule_id FROM schedule_backups WHERE snapshot_id = %s" % snapshot_id) + assert len(schedule_backups_result) == len(schedule_ids) + backup_schedule_ids = [row[0] for row in schedule_backups_result] + for schedule_id in schedule_ids: + assert schedule_id in backup_schedule_ids + + finally: + db_cur.close() + db_conn.close() + + + def test_full_rollback_by_server_id(self, db_setup): + """Test full rollback for a server""" + db_conn, db_cur = get_db_cursor() + try: + # Register a server and create a schedule with smart_interval_mask set + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server', 'test-server.local', 'D')""" + ) + query_test_db( + """INSERT INTO schedules (schedule_id, server_id, interval_mask, smart_interval_mask, exec_command) + VALUES ('test-schedule-1', 1, '0 * * * *', '30 * * * *', 'echo test')""" + ) + scheduler.full_rollback(db_cur, server_id=1) + assert query_test_db("SELECT smart_interval_mask FROM schedules WHERE schedule_id = 'test-schedule-1'")[0][0] is None + assert query_test_db("SELECT interval_mask FROM schedules WHERE schedule_id = 'test-schedule-1'")[0][0] == "0 * * * *" + + + finally: + db_cur.close() + db_conn.close() + + + def test_full_rollback_by_schedule_id(self, db_setup): + """Test full rollback for a specific schedule""" + db_conn, db_cur = get_db_cursor() + try: + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server', 'test-server.local', 'E')""" + ) + query_test_db( + """INSERT INTO schedules (schedule_id, server_id, interval_mask, smart_interval_mask, exec_command) + VALUES ('test-schedule-1', 1, '0 * * * *', '30 * * * *', 'echo test')""" + ) + + # Perform full rollback + scheduler.full_rollback(db_cur, schedule_id="test-schedule-1") + + # Verify smart_interval_mask is set to NULL + assert query_test_db("SELECT smart_interval_mask FROM schedules WHERE schedule_id = 'test-schedule-1'")[0][0] is None + assert query_test_db("SELECT interval_mask FROM schedules WHERE schedule_id = 'test-schedule-1'")[0][0] == "0 * * * *" + + finally: + db_cur.close() + db_conn.close() + + def test_restore_previous_schedules(self, db_setup): + """Test rollback to previous for a specific schedule""" + db_conn, db_cur = get_db_cursor() + try: + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server', 'test-server.local', 'F')""" + ) + query_test_db( + """INSERT INTO schedules (schedule_id, server_id, interval_mask, smart_interval_mask, exec_command) + VALUES ('test-schedule-1', 1, '0 * * * *', '30 * * * *', 'echo test')""" + ) + scheduler.snapshot_schedules(db_cur, ["test-schedule-1"], reason="Test optimization", server_id=1) + assert query_test_db("SELECT smart_interval_mask FROM schedule_backups WHERE schedule_id = 'test-schedule-1'")[0][0] == "30 * * * *" + assert query_test_db("SELECT COUNT(*) FROM schedule_backups WHERE schedule_id = 'test-schedule-1'")[0][0] == 1 + + query_test_db("UPDATE schedules SET smart_interval_mask = '45 * * * *' WHERE schedule_id = 'test-schedule-1'") + + scheduler.snapshot_schedules(db_cur, ["test-schedule-1"], reason="Test optimization", server_id=1) + assert query_test_db("SELECT server_id FROM snapshots ORDER BY snapshot_id DESC LIMIT 1")[0][0] == 1 + assert query_test_db("SELECT smart_interval_mask FROM schedule_backups WHERE schedule_id = 'test-schedule-1' ORDER BY snapshot_id DESC LIMIT 1")[0][0] == "45 * * * *" + assert query_test_db("SELECT COUNT(*) FROM schedule_backups WHERE schedule_id = 'test-schedule-1'")[0][0] == 2 + + # Perform rollback to previous snapshot + prev_snapshot_id = query_test_db("SELECT snapshot_id FROM snapshots ORDER BY snapshot_timestamp DESC LIMIT 1 OFFSET 1")[0][0] + scheduler.restore_previous_schedules(db_cur, snapshot_id=prev_snapshot_id, server_id=1) + assert query_test_db("SELECT smart_interval_mask FROM schedules WHERE schedule_id = 'test-schedule-1'")[0][0] == "30 * * * *" + + finally: + db_cur.close() + db_conn.close() \ No newline at end of file From 5b54cd0de3acc0b136241122f1eed8a807c521de Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Fri, 22 May 2026 10:28:19 +0100 Subject: [PATCH 35/53] Remove surperfluous blocklist schedule parameter in PyGAD and prevent creep for infrequent taps --- cicada/commands/smart_schedule.py | 13 ++++++------- cicada/lib/SmartScheduling/domain.py | 9 ++++++++- cicada/lib/SmartScheduling/pygad.py | 7 +------ 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 780bbd7..bbff15b 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -40,9 +40,10 @@ def _create_schedule_objects(schedule_ids, db_cur): try: schedule = Schedule(details, db_cur=db_cur) - # Ignore the few schedules that have irregular cron expressions for now. There are few enough that this shouldn't impact the optimisation and is not worth the effort to try and support these irregular schedules in the GA + # Ignore the few schedules that have irregular cron expressions for now. + # There are few enough that this shouldn't impact the optimisation and is not worth the effort to try and support these in the GA if not schedule.is_regular_schedule(): - print(f"Skipping irregular schedule {schedule.schedule_id} with cron expression {schedule.interval_mask}") + print(f"Skipping irregular schedule {schedule.schedule_id} with cron expression {schedule.interval_mask}") else: schedules.append(schedule) except Exception as e: @@ -166,20 +167,18 @@ def main(server_id=None, dbname=None, ga_config=None): try: print("\n------------Starting Optimisation-----------------") - blocklist_schedule_ids = scheduler.get_blocklisted_schedule_ids(db_cur) - print(f"blocklisted schedule IDs that will be excluded from optimization: {blocklist_schedule_ids}") - ga = pygad.GAPyGADScheduler(config=ga_config, blocklist_schedule_ids=blocklist_schedule_ids) print("Running PyGAD solver ...") + ga = pygad.GAPyGADScheduler(config=ga_config) optimised_schedules, __, peak_usage, __, initial_fitness = ga.solve(schedules) print(f"Optimized schedule for server_id {server_id}: new peak usage {peak_usage}") - print("--------------------------------------------------\n") - print("\n-------------Updating Schedules------------------") if peak_usage < initial_fitness: # Only update schedules if we have found an improvement + print("\n-------------Updating Schedules------------------") _assign_new_schedules(optimised_schedules, db_cur=db_cur) optimised_schedule_ids = [schedule.schedule_id for schedule in optimised_schedules if schedule.shifted] scheduler.snapshot_schedules(db_cur, optimised_schedule_ids, server_id=server_id, computed_usage=peak_usage, reason='Smart Schedule Optimization') + print("--------------------------------------------------\n") else: print(f"No improvement found for server_id {server_id}. Current peak usage: {initial_fitness}, Optimized peak usage: {peak_usage}. No schedule updates will be made.") print("--------------------------------------------------\n") diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index 2e134f4..46a1ba9 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -54,7 +54,14 @@ def _determine_start_time_mins(self): today = datetime.datetime.now().date() midnight = datetime.datetime.combine(today, datetime.time.min) - it = croniter(self.current_interval_mask, midnight) + # Infrequent taps aren't bounded by their frequency but instead shift within the hour + # Basing it on the original interval mask prevents creep over multiple optimizations + # and ensures the schedule doesn't shift more than an hour from the original schedule + if self.frequency_minutes > 60: + it = croniter(self.interval_mask, midnight) + else: + it = croniter(self.current_interval_mask, midnight) + if croniter.match(self.current_interval_mask, midnight): first_iter = midnight self.start_time_mins = 0 diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/pygad.py index 219ccda..dbbf7ec 100644 --- a/cicada/lib/SmartScheduling/pygad.py +++ b/cicada/lib/SmartScheduling/pygad.py @@ -23,13 +23,12 @@ class GAPyGADScheduler: We cap the max shift of a schedule to within the hour to prevent large shifts for schedules that run daily. """ - def __init__(self, config: Optional[Mapping[str, object]] = None, blocklist_schedule_ids: Optional[List[str]] = None): + def __init__(self, config: Optional[Mapping[str, object]] = None): if config is None: self.cfg = GAConfig() else: filtered_config = {key: value for key, value in config.items() if value is not None} self.cfg = GAConfig(**filtered_config) - self.blocklist_schedule_ids = blocklist_schedule_ids if blocklist_schedule_ids is not None else [] def _gene_space(self, schedules: Sequence[Schedule]) -> List[List[int]]: @@ -76,10 +75,6 @@ def _initial_population(self, schedules: Sequence[Schedule], gene_space: List[Li for _ in range(self.cfg.sol_per_pop - 1): pop.append([gene_space[i][int(rng.integers(0, len(gene_space[i])))] for i in range(len(schedules))]) return np.asarray(pop, dtype=int) - - def _blocklist(self): - self.cfg.blocklist_schedule_ids = set(self.cfg.blocklist_schedule_ids) - raise NotImplementedError("blocklist functionality not yet implemented") def fitness_fn(self, ga, solution, solution_idx): _, peak = evaluate_usage_and_peak(solution, self.schedules) From 466947fa2b016c81ce901150502efe6e5c6d5636 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Fri, 22 May 2026 11:06:29 +0100 Subject: [PATCH 36/53] Switch blocklist add/remove functionality and remove reset_schedule_backups from being called automatically with restore_previous_schedules --- cicada/commands/blocklist_schedule.py | 8 ++++---- cicada/commands/smart_schedule.py | 5 +++-- cicada/commands/smart_schedule_rollback.py | 9 ++++----- cicada/lib/scheduler.py | 19 ++++--------------- 4 files changed, 15 insertions(+), 26 deletions(-) diff --git a/cicada/commands/blocklist_schedule.py b/cicada/commands/blocklist_schedule.py index 3cbdfa3..859acad 100644 --- a/cicada/commands/blocklist_schedule.py +++ b/cicada/commands/blocklist_schedule.py @@ -29,10 +29,6 @@ def main(schedule_id: str, remove: bool = False, reason: Optional[str] = None, d if remove: scheduler.remove_blocklist_schedule(db_cur, schedule_id=schedule_id) print(f"Schedule {schedule_id} has been removed from the blocklist successfully.") - scheduler.full_rollback(db_cur, schedule_id=schedule_id) - print(f"Schedule {schedule_id} has been rolled back to original settings successfully.") - scheduler.reset_schedule_backups(db_cur, schedule_id=schedule_id) - print(f"Backups for schedule {schedule_id} have been removed successfully.") else: schedule_details = scheduler.get_schedule_details(db_cur, schedule_id) @@ -41,6 +37,10 @@ def main(schedule_id: str, remove: bool = False, reason: Optional[str] = None, d return scheduler.blocklist_schedule(db_cur, schedule_id=schedule_id, reason=reason) print(f"Schedule {schedule_id} has been blocklisted successfully.") + scheduler.full_rollback(db_cur, schedule_id=schedule_id) + print(f"Schedule {schedule_id} has been rolled back to original settings successfully.") + scheduler.reset_schedule_backups(db_cur, schedule_id=schedule_id) + print(f"Backups for schedule {schedule_id} have been removed successfully.") diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index bbff15b..18a2b27 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -101,6 +101,7 @@ def _assign_new_schedules(optimised_schedules: Schedule, db_cur): schedule_details_list = [] schedule_ids = [] + # For each schedule, update the schedule in the DB with the new interval_mask based on the start_time_mins calculated by the GA optimizer for schedule in optimised_schedules: _update_schedule_cron(schedule) @@ -131,7 +132,7 @@ def _assign_new_schedules(optimised_schedules: Schedule, db_cur): schedule_details_list.append(schedule_details) schedule_ids.append(schedule.schedule_id) - scheduler.update_schedule_details_bulk(db_cur=db_cur, schedule_list=schedule_details_list, reason='Smart Schedule Optimization') + scheduler.update_schedule_details_bulk(db_cur=db_cur, schedule_list=schedule_details_list) @utils.named_exception_handler("smart_schedule") @@ -173,7 +174,7 @@ def main(server_id=None, dbname=None, ga_config=None): print(f"Optimized schedule for server_id {server_id}: new peak usage {peak_usage}") - if peak_usage < initial_fitness: # Only update schedules if we have found an improvement + if peak_usage < initial_fitness: print("\n-------------Updating Schedules------------------") _assign_new_schedules(optimised_schedules, db_cur=db_cur) optimised_schedule_ids = [schedule.schedule_id for schedule in optimised_schedules if schedule.shifted] diff --git a/cicada/commands/smart_schedule_rollback.py b/cicada/commands/smart_schedule_rollback.py index e9a9a7e..70bcef8 100644 --- a/cicada/commands/smart_schedule_rollback.py +++ b/cicada/commands/smart_schedule_rollback.py @@ -8,23 +8,22 @@ def _rollback_to_previous_snapshot(db_cur, server_id): """ Roll back to the previous snapshot for a given server_id. If no previous snapshot exists, perform a full rollback. """ - print(f"\n [Rolling back server {server_id}]") + print(f"\n[Rolling back server {server_id}]") snapshots = scheduler.retrieve_snapshots(db_cur, server_id) current_snapshot = snapshots[0][0] if snapshots and len(snapshots) > 0 else None previous_snapshot = snapshots[1][0] if snapshots and len(snapshots) > 1 else None + # Remove the current snapshot (if it exists) to prevent it from being restored in future rollbacks if current_snapshot is not None: scheduler.reset_schedule_backups(db_cur, snapshot_id=current_snapshot) scheduler.remove_snapshot(db_cur, current_snapshot) + # Restore the previous snapshot if it exists. If no previous snapshot exists, perform a full rollback instead if previous_snapshot is not None: scheduler.restore_previous_schedules(db_cur, server_id=server_id, snapshot_id=previous_snapshot) - scheduler.reset_schedule_backups(db_cur, snapshot_id=previous_snapshot) else: - print("No previous snapshot. Commencing full rollback instead...") + print("No previous snapshot found. Commencing full rollback instead...\n") scheduler.full_rollback(db_cur, server_id) - schedule_ids = scheduler.get_all_schedule_ids_per_server(db_cur, server_id) - scheduler.snapshot_schedules(db_cur, schedule_ids=schedule_ids, server_id=server_id, reason='Full Rollback') @utils.named_exception_handler("smart_schedule_rollback") diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index 65ec361..1045347 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -235,13 +235,8 @@ def update_schedule_details(db_cur, schedule_details): db_cur.execute(sqlquery) -def update_schedule_details_bulk(db_cur, schedule_list, reason=None): - """Update multiple schedules in a single bulk query. - - Args: - db_cur: Database cursor - schedule_list: List of dicts, each with schedule_id and any fields to update - """ +def update_schedule_details_bulk(db_cur, schedule_list): + """Update multiple schedules in a single bulk query.""" if not schedule_list: return @@ -473,12 +468,7 @@ def get_all_server_ids(db_cur): def get_all_schedule_ids_per_server(db_cur, server_id): """Get all possible schedule_ids for each server from the schedules table""" - sqlquery = """ - SELECT DISTINCT schedule_id - FROM schedules - WHERE server_id = %s - ORDER BY schedule_id - """ + sqlquery = """ SELECT DISTINCT schedule_id FROM schedules WHERE server_id = %s """ db_cur.execute(sqlquery, (server_id,)) schedule_ids = db_cur.fetchall() @@ -550,7 +540,7 @@ def full_rollback(db_cur, server_id=None, schedule_id=None): UPDATE schedules SET smart_interval_mask = NULL WHERE schedule_id = ANY(%s::text[]) """ db_cur.execute(update_all_schedules_query, (schedule_ids,)) - print(f"Schedules Updated:{chr(10).join(schedule_ids)}") + print(f"Schedules Updated:'{chr(10).join([f'- {sid}' for sid in schedule_ids])}") return @@ -576,7 +566,6 @@ def restore_previous_schedules(db_cur, server_id, snapshot_id): """ db_cur.execute(sqlquery, (server_id, snapshot_id)) print(f"{len(schedule_ids)} Schedules restored") - reset_schedule_backups(db_cur, snapshot_id=snapshot_id) return From 65a969dc4bb4f6c00f08f7c9ab751961852264ad Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Fri, 22 May 2026 11:26:23 +0100 Subject: [PATCH 37/53] Fix spread_schedules and increase snapshots stored per server to 5 --- cicada/commands/smart_schedule_rollback.py | 11 +++++------ cicada/commands/spread_schedules.py | 2 +- cicada/lib/scheduler.py | 11 +++++++---- tests/test_smart_scheduling.py | 4 ++-- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/cicada/commands/smart_schedule_rollback.py b/cicada/commands/smart_schedule_rollback.py index 70bcef8..d0cbbe2 100644 --- a/cicada/commands/smart_schedule_rollback.py +++ b/cicada/commands/smart_schedule_rollback.py @@ -23,7 +23,7 @@ def _rollback_to_previous_snapshot(db_cur, server_id): scheduler.restore_previous_schedules(db_cur, server_id=server_id, snapshot_id=previous_snapshot) else: print("No previous snapshot found. Commencing full rollback instead...\n") - scheduler.full_rollback(db_cur, server_id) + scheduler.full_rollback(db_cur, server_id=server_id) @utils.named_exception_handler("smart_schedule_rollback") @@ -62,7 +62,7 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn print("\n------------Starting Full Rollback-----------------") scheduler.full_rollback(db_cur, server_id, schedule_id) print("Full rollback successful\n") - sever_ids = [server_id] if server_id else scheduler.get_all_server_ids(db_cur) + sever_ids = [server_id] if server_id else [server[0] for server in scheduler.get_all_server_ids(db_cur)] for server_id in sever_ids: schedule_ids = scheduler.get_all_schedule_ids_per_server(db_cur, server_id) scheduler.snapshot_schedules(db_cur, schedule_ids=schedule_ids, server_id=server_id, reason='Full Rollback') @@ -71,11 +71,10 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn print("\n------------Starting Rollback to Previous Snapshot-----------------") if not server_id: print(f"Rolling back all servers...") - for server in scheduler.get_all_server_ids(db_cur): - server_id = server[0] - _rollback_to_previous_snapshot(db_cur, server_id) + for server_id in scheduler.get_all_server_ids(db_cur): + _rollback_to_previous_snapshot(db_cur, server_id=server_id[0]) else: - _rollback_to_previous_snapshot(db_cur, server_id) + _rollback_to_previous_snapshot(db_cur, server_id=server_id) except Exception as e: print(f"Error during rollback: {e}") diff --git a/cicada/commands/spread_schedules.py b/cicada/commands/spread_schedules.py index eb531d4..23aca17 100644 --- a/cicada/commands/spread_schedules.py +++ b/cicada/commands/spread_schedules.py @@ -117,7 +117,7 @@ def main(spread_details, dbname=None): output_message += " | Forced abort_running and adhoc_execute" scheduler.update_schedule_details(db_cur, new_schedule_details) - scheduler.reset_schedule_backups(db_cur) + scheduler.reset_schedule_backups(db_cur, schedule_id=schedule_id) else: output_message = ( diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index 1045347..54c1644 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -287,7 +287,10 @@ def snapshot_schedules(db_cur, schedule_ids, server_id=None, computed_usage=None reason: Optional reason/context for the snapshot """ if not schedule_ids: - return + raise ValueError("schedule_ids list cannot be empty") + + if not server_id: + raise ValueError("server_id must be provided for snapshot") # Insert into snapshots table to get a new snapshot_id sqlquery = "INSERT INTO snapshots (reason, server_id, computed_usage) VALUES (%s, %s, %s) RETURNING snapshot_id" @@ -302,7 +305,7 @@ def snapshot_schedules(db_cur, schedule_ids, server_id=None, computed_usage=None """ db_cur.execute(sqlquery, (snapshot_id, schedule_ids)) - # Clean up old snapshots (keep last 3 per schedule_id) + # Clean up old snapshots (keep last 5 per schedule_id) cleanup_backups_query = """ DELETE FROM schedule_backups sb DELETE FROM snapshots s @@ -311,11 +314,11 @@ def snapshot_schedules(db_cur, schedule_ids, server_id=None, computed_usage=None SELECT snapshot_id FROM schedule_backups WHERE schedule_id = sb.schedule_id ORDER BY snapshot_id DESC - LIMIT 3 + LIMIT 5 ) AND s.snapshot_id = sb.snapshot_id """ - print("Updated schedule_backups table") + print(f"Updated schedule_backups table for server {server_id}") def get_schedule_executable(db_cur, schedule_id): diff --git a/tests/test_smart_scheduling.py b/tests/test_smart_scheduling.py index 4d119d1..cad4f5a 100644 --- a/tests/test_smart_scheduling.py +++ b/tests/test_smart_scheduling.py @@ -523,7 +523,7 @@ def test_snapshot_schedules_basic(self, db_setup): ) # Snapshot the schedule - scheduler.snapshot_schedules(db_cur, ["test-sched-1"], reason="Test optimization") + scheduler.snapshot_schedules(db_cur, ["test-sched-1"], reason="Test optimization", server_id = 1) # Verify snapshot was created snapshots = query_test_db("SELECT snapshot_id FROM snapshots WHERE reason = 'Test optimization'") @@ -613,7 +613,7 @@ def test_snapshot_schedules(self, db_setup): ) schedule_ids = ["sched-1", "sched-2"] - scheduler.snapshot_schedules(db_cur, schedule_ids, reason="Test optimization") + scheduler.snapshot_schedules(db_cur, schedule_ids, reason="Test optimization", server_id = 1) # Verify that snapshots were created snapshots = query_test_db("SELECT snapshot_id FROM snapshots WHERE reason = 'Test optimization'") From 502d7414f8492f520e52d9a869cdecfbf2bac8a8 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Fri, 22 May 2026 15:17:57 +0100 Subject: [PATCH 38/53] Add CLI tests for smart scheduling commands --- cicada/commands/smart_schedule_rollback.py | 15 ++--- cicada/lib/SmartScheduling/__init__.py | 0 tests/test_functional_cli_entrypoint.py | 67 ++++++++++++++++++++++ 3 files changed, 73 insertions(+), 9 deletions(-) create mode 100644 cicada/lib/SmartScheduling/__init__.py diff --git a/cicada/commands/smart_schedule_rollback.py b/cicada/commands/smart_schedule_rollback.py index d0cbbe2..1bc948d 100644 --- a/cicada/commands/smart_schedule_rollback.py +++ b/cicada/commands/smart_schedule_rollback.py @@ -35,24 +35,21 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn server_id: Optional[int] [Mutually exclusive with schedule_id] Target server to roll back. schedule_id: Optional[str] [Mutually exclusive with server_id] - Target schedule to roll back. + Target schedule to roll back - can only be used with --full flag. dbname: Optional[str] Database name to connect to. full: bool - If True, set smart_interval_mask to NULL (revert to original interval_mask). + If used, sets smart_interval_mask to NULL (revert to original interval_mask). previous: bool - If True, restore to the most recent snapshot (step back one optimization). + If used, restores to the most recent snapshot (step back one optimization). """ if type(server_id) != int and server_id is not None: raise TypeError(f"server_id needs to be of type int. {type(server_id)}") if type(schedule_id) != str and schedule_id is not None: raise TypeError("schedule_id needs to be of type str") - - if not full and not previous: - raise ValueError("Either --full or --previous flag must be provided") - - if full and previous: - raise ValueError("Cannot use both --full and --previous flags") + + if not(full or previous) or (full and previous): + raise ValueError("Exactly one of --full or --previous flags must be provided") db_conn = postgres.db_cicada(dbname) db_cur = db_conn.cursor() diff --git a/cicada/lib/SmartScheduling/__init__.py b/cicada/lib/SmartScheduling/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_functional_cli_entrypoint.py b/tests/test_functional_cli_entrypoint.py index 43cf138..d8f5fae 100644 --- a/tests/test_functional_cli_entrypoint.py +++ b/tests/test_functional_cli_entrypoint.py @@ -331,3 +331,70 @@ def test_list_schedule_ids(): -h, --help show this help message and exit """ assert actual == expected + + + +def test_smart_schedule_help(): + """test_smart_schedule_help""" + actual = subprocess.run(["cicada", "smart_schedule", "-h"], check=True, stdout=subprocess.PIPE).stdout.decode("utf-8") + + assert "optimise" in actual.lower() + assert "rollback" in actual.lower() + assert "blocklist" in actual.lower() + + +def test_smart_schedule_optimise_help(): + """test_smart_schedule optimise subcommand help""" + actual = subprocess.run(["cicada", "smart_schedule", "optimise", "-h"], check=True, stdout=subprocess.PIPE).stdout.decode("utf-8") + expected_snippet = """usage: smart_schedule optimise [-h] [--server_id SERVER_ID]""" + + assert expected_snippet in actual + + +def test_smart_schedule_rollback_help(): + """test_smart_schedule rollback subcommand help""" + actual = subprocess.run(["cicada", "smart_schedule", "rollback", "-h"], check=True, stdout=subprocess.PIPE).stdout.decode( + "utf-8" + ) + + expected_snippet = """usage: smart_schedule [-h] (--full | --previous)""" + assert expected_snippet in actual + +def test_smart_schedule_rollback_missing_flags(): + """test_smart_schedule rollback requires either --full or --previous""" + actual = subprocess.run(["cicada", "smart_schedule", "rollback"], check=False, stderr=subprocess.PIPE).stderr.decode("utf-8") + expected_snippet = """error: one of the arguments --full --previous is required""" + + assert expected_snippet in actual + + +def test_smart_schedule_rollback_mutually_exclusive(): + """test_smart_schedule rollback --full and --previous are mutually exclusive""" + actual = subprocess.run( + ["cicada", "smart_schedule", "rollback", "--full", "--previous", "--server_id", "1"], + check=False, + stderr=subprocess.PIPE + ).stderr.decode("utf-8") + expected_snippet = "smart_schedule: error: argument --previous: not allowed with argument --full" + + assert expected_snippet in actual + + +def test_smart_schedule_blocklist_help(): + """test_smart_schedule blocklist subcommand help""" + actual = subprocess.run(["cicada", "smart_schedule", "blocklist", "-h"], check=True, stdout=subprocess.PIPE).stdout.decode( + "utf-8" + ) + + assert "--schedule_id SCHEDULE_ID" in actual + assert "--remove" in actual + + +def test_smart_schedule_blocklist_missing_schedule_id(): + """test_smart_schedule blocklist requires --schedule_id""" + actual = subprocess.run(["cicada", "smart_schedule", "blocklist"], check=False, stderr=subprocess.PIPE).stderr.decode("utf-8") + + expected_snippet = """error: the following arguments are required: --schedule_id""" + assert expected_snippet in actual + + From 1ace70e3b502f6e2c781dfc1cf3d7c054f29c21b Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Fri, 22 May 2026 17:09:18 +0100 Subject: [PATCH 39/53] Addressing PR comments --- cicada/commands/blocklist_schedule.py | 9 ++-- cicada/commands/smart_schedule.py | 50 ++++++++++++++-------- cicada/commands/smart_schedule_rollback.py | 50 ++++++++++++++-------- cicada/lib/SmartScheduling/domain.py | 5 +-- 4 files changed, 70 insertions(+), 44 deletions(-) diff --git a/cicada/commands/blocklist_schedule.py b/cicada/commands/blocklist_schedule.py index 859acad..4f988dd 100644 --- a/cicada/commands/blocklist_schedule.py +++ b/cicada/commands/blocklist_schedule.py @@ -24,6 +24,7 @@ def main(schedule_id: str, remove: bool = False, reason: Optional[str] = None, d db_conn = postgres.db_cicada(dbname) db_cur = db_conn.cursor() + db_cur.execute("BEGIN;") try: if remove: @@ -41,12 +42,12 @@ def main(schedule_id: str, remove: bool = False, reason: Optional[str] = None, d print(f"Schedule {schedule_id} has been rolled back to original settings successfully.") scheduler.reset_schedule_backups(db_cur, schedule_id=schedule_id) print(f"Backups for schedule {schedule_id} have been removed successfully.") - - + db_cur.execute("COMMIT;") except Exception as e: - print(f"Error during blocklist operation: {e}") - raise + db_cur.execute("ROLLBACK;") + print("Database changes have been rolled back due to the error.") + raise Exception(f"Error during blocklist operation for schedule_id {schedule_id}: {e}") finally: db_cur.close() diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 18a2b27..02e9095 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -2,13 +2,12 @@ from __future__ import annotations import sys +from typing import List from croniter import croniter -from typing import Optional, Sequence from cicada.lib import postgres, utils from cicada.lib import scheduler from cicada.lib.SmartScheduling import pygad from cicada.lib.SmartScheduling.domain import Schedule -from cicada.commands import smart_schedule_rollback def _get_schedules_per_server(server_id, db_cur=None): """Get all schedules for a given server_id.""" @@ -17,8 +16,7 @@ def _get_schedules_per_server(server_id, db_cur=None): schedule_ids = [row[0] for row in scheduler.get_all_schedule_ids_per_server(db_cur, server_id)] if not schedule_ids: - print(f"No schedules found for server_id {server_id}") - sys.exit(1) + raise ValueError(f"No schedules found for server_id {server_id}") return schedule_ids @@ -39,7 +37,7 @@ def _create_schedule_objects(schedule_ids, db_cur): details['blocklisted'] = False try: - schedule = Schedule(details, db_cur=db_cur) + schedule = Schedule(details, db_cur) # Ignore the few schedules that have irregular cron expressions for now. # There are few enough that this shouldn't impact the optimisation and is not worth the effort to try and support these in the GA if not schedule.is_regular_schedule(): @@ -96,7 +94,7 @@ def _update_schedule_cron(schedule : Schedule): -def _assign_new_schedules(optimised_schedules: Schedule, db_cur): +def _assign_new_schedules(optimised_schedules: List[Schedule], db_cur): """Assign new schedules based on the optimal schedule found.""" schedule_details_list = [] @@ -141,28 +139,38 @@ def main(server_id=None, dbname=None, ga_config=None): db_conn = postgres.db_cicada(dbname) db_cur = db_conn.cursor() + optimise(db_cur=db_cur, server_id=server_id, ga_config=ga_config) + db_cur.close() + db_conn.close() + +def optimise(db_cur, server_id=None, ga_config=None): if not server_id: # Recursively call main for each server_id if no specific server_id is provided server_ids = scheduler.get_all_server_ids(db_cur) for id in server_ids: - main(server_id=id[0], dbname=dbname, ga_config=ga_config) + optimise(db_cur=db_cur, server_id=id[0], ga_config=ga_config) else: + if not scheduler.validate_server_id(db_cur, server_id=server_id): - print(f"No valid server with server_id={server_id} does not exist in the database") - sys.exit(1) + raise ValueError(f"Server with server_id={server_id} does not exist in the database") # Get schedules for the server_id print("\n-----------------Schedule Setup----------------------") - schedule_ids = _get_schedules_per_server(server_id=server_id, db_cur=db_cur) + # Prevents process from progressing if no schedules are found for the server_id + # however allows optimisation to still run for other server_ids if running for all servers (server_id=None) + try: + schedule_ids = _get_schedules_per_server(server_id=server_id, db_cur=db_cur) + except ValueError as e: + print(e) + return print(f"Found {len(schedule_ids)} schedules for server_id {server_id}") # Build schedule objects schedules = _create_schedule_objects(schedule_ids, db_cur=db_cur) if not schedules: - print("No valid schedules found to optimize.") - sys.exit(1) + raise ValueError(f"No valid schedules found to optimize for server_id {server_id}") print("-------------------------------------------------\n") @@ -175,10 +183,17 @@ def main(server_id=None, dbname=None, ga_config=None): if peak_usage < initial_fitness: - print("\n-------------Updating Schedules------------------") - _assign_new_schedules(optimised_schedules, db_cur=db_cur) - optimised_schedule_ids = [schedule.schedule_id for schedule in optimised_schedules if schedule.shifted] - scheduler.snapshot_schedules(db_cur, optimised_schedule_ids, server_id=server_id, computed_usage=peak_usage, reason='Smart Schedule Optimization') + try: + print("\n-------------Updating Schedules------------------") + db_cur.execute("BEGIN;") + _assign_new_schedules(optimised_schedules, db_cur=db_cur) + optimised_schedule_ids = [schedule.schedule_id for schedule in optimised_schedules if schedule.shifted] + scheduler.snapshot_schedules(db_cur, optimised_schedule_ids, server_id=server_id, computed_usage=peak_usage, reason='Smart Schedule Optimization') + db_cur.execute("COMMIT;") + except Exception as e: + db_cur.execute("ROLLBACK;") + print("Database changes have been rolled back due to the error.") + raise Exception(f"Error during schedule update for server_id {server_id}: {e}") print("--------------------------------------------------\n") else: print(f"No improvement found for server_id {server_id}. Current peak usage: {initial_fitness}, Optimized peak usage: {peak_usage}. No schedule updates will be made.") @@ -187,6 +202,3 @@ def main(server_id=None, dbname=None, ga_config=None): except Exception as e: print(f"Error during optimization for server_id {server_id}: {e}") sys.exit(1) - - db_cur.close() - db_conn.close() \ No newline at end of file diff --git a/cicada/commands/smart_schedule_rollback.py b/cicada/commands/smart_schedule_rollback.py index 1bc948d..4d802a2 100644 --- a/cicada/commands/smart_schedule_rollback.py +++ b/cicada/commands/smart_schedule_rollback.py @@ -13,17 +13,24 @@ def _rollback_to_previous_snapshot(db_cur, server_id): current_snapshot = snapshots[0][0] if snapshots and len(snapshots) > 0 else None previous_snapshot = snapshots[1][0] if snapshots and len(snapshots) > 1 else None - # Remove the current snapshot (if it exists) to prevent it from being restored in future rollbacks - if current_snapshot is not None: - scheduler.reset_schedule_backups(db_cur, snapshot_id=current_snapshot) - scheduler.remove_snapshot(db_cur, current_snapshot) - - # Restore the previous snapshot if it exists. If no previous snapshot exists, perform a full rollback instead - if previous_snapshot is not None: - scheduler.restore_previous_schedules(db_cur, server_id=server_id, snapshot_id=previous_snapshot) - else: - print("No previous snapshot found. Commencing full rollback instead...\n") - scheduler.full_rollback(db_cur, server_id=server_id) + db_cur.execute("BEGIN;") + try: + # Remove the current snapshot (if it exists) to prevent it from being restored in future rollbacks + if current_snapshot is not None: + scheduler.reset_schedule_backups(db_cur, snapshot_id=current_snapshot) + scheduler.remove_snapshot(db_cur, current_snapshot) + + # Restore the previous snapshot if it exists. If no previous snapshot exists, perform a full rollback instead + if previous_snapshot is not None: + scheduler.restore_previous_schedules(db_cur, server_id=server_id, snapshot_id=previous_snapshot) + else: + print("No previous snapshot found. Commencing full rollback instead...\n") + scheduler.full_rollback(db_cur, server_id=server_id) + db_cur.execute("COMMIT;") + except Exception as e: + db_cur.execute("ROLLBACK;") + print("Database changes have been rolled back due to the error.") + raise Exception(f"Error during rollback to previous snapshot for server_id {server_id}: {e}") @utils.named_exception_handler("smart_schedule_rollback") @@ -47,9 +54,10 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn raise TypeError(f"server_id needs to be of type int. {type(server_id)}") if type(schedule_id) != str and schedule_id is not None: raise TypeError("schedule_id needs to be of type str") - if not(full or previous) or (full and previous): raise ValueError("Exactly one of --full or --previous flags must be provided") + if schedule_id and not full: + raise ValueError("schedule_id can only be used with --full flag") db_conn = postgres.db_cicada(dbname) db_cur = db_conn.cursor() @@ -57,12 +65,18 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn try: if full: print("\n------------Starting Full Rollback-----------------") - scheduler.full_rollback(db_cur, server_id, schedule_id) - print("Full rollback successful\n") - sever_ids = [server_id] if server_id else [server[0] for server in scheduler.get_all_server_ids(db_cur)] - for server_id in sever_ids: - schedule_ids = scheduler.get_all_schedule_ids_per_server(db_cur, server_id) - scheduler.snapshot_schedules(db_cur, schedule_ids=schedule_ids, server_id=server_id, reason='Full Rollback') + db_cur.execute("BEGIN;") + try: + scheduler.full_rollback(db_cur, server_id, schedule_id) + print("Full rollback successful\n") + server_ids = [server_id] if server_id else [server[0] for server in scheduler.get_all_server_ids(db_cur)] + for server in server_ids: + schedule_ids = [row[0] for row in scheduler.get_all_schedule_ids_per_server(db_cur, server)] + scheduler.snapshot_schedules(db_cur, schedule_ids=schedule_ids, server_id=server, reason='Full Rollback') + db_cur.execute("COMMIT;") + except Exception as e: + db_cur.execute("ROLLBACK;") + raise Exception(f"Error during full rollback: {e}") elif previous: print("\n------------Starting Rollback to Previous Snapshot-----------------") diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index 46a1ba9..0754f1c 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -1,11 +1,10 @@ from __future__ import annotations from dataclasses import dataclass import math -from typing import Optional, List -import numpy as np +from typing import Optional from croniter import croniter import datetime -from ..scheduler import get_median_run_time +from cicada.lib.scheduler import get_median_run_time @dataclass(frozen=False) From a1a44c812d48dace4811c4cb140815c5e0adc690 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Tue, 26 May 2026 17:23:08 +0100 Subject: [PATCH 40/53] Addressing PR comments --- cicada/cli.py | 20 +- cicada/commands/smart_schedule.py | 4 +- cicada/lib/SmartScheduling/domain.py | 4 +- cicada/lib/SmartScheduling/evaluation.py | 23 +- cicada/lib/SmartScheduling/pygad.py | 6 +- cicada/lib/scheduler.py | 1 - ...ng => genetic_algorithm_process_cycle.png} | Bin docs/{offspring-ga.png => offspring_ga.png} | Bin ... => smart_scheduler_technical_overview.md} | 24 +- setup/MIGRATION_GUIDE.md | 368 ++++++++++++++++++ tests/test_smart_scheduling.py | 96 ++++- 11 files changed, 500 insertions(+), 46 deletions(-) rename docs/{genetic-algorithm-process-cycle.png => genetic_algorithm_process_cycle.png} (100%) rename docs/{offspring-ga.png => offspring_ga.png} (100%) rename docs/{Smart Scheduler Technical Overview.md => smart_scheduler_technical_overview.md} (92%) create mode 100644 setup/MIGRATION_GUIDE.md diff --git a/cicada/cli.py b/cicada/cli.py index cf8f38e..c1b16cd 100644 --- a/cicada/cli.py +++ b/cicada/cli.py @@ -308,7 +308,7 @@ def smart_schedule(): ga_config.add_argument("--crossover_type",type=str,required=False, help="Crossover type for the genetic algorithm. Allowed values: ['single_point', 'two_point', 'uniform']. Default: uniform") ga_config.add_argument("--mutation_type",type=str,required=False, help="Mutation type for the genetic algorithm. Allowed values: ['random', 'swap', 'inversion', 'scramble']. Default: random") ga_config.add_argument("--keep_elitism",type=int,required=False, help="Number of elite solutions to keep for the next generation. Default: 2") - ga_config.add_argument("--random-seed",type=int,required=False, help="Set a random seed to get repeatable results. Default: None") + ga_config.add_argument("--random_seed",type=int,required=False, help="Set a random seed to get repeatable results. Default: None") # Rollback subcommand rollback_parser = subparsers.add_parser( @@ -377,15 +377,15 @@ def smart_schedule(): smart_schedule.main( server_id=getattr(args, 'server_id', None), ga_config={ - "num_generations": getattr(args, 'num_generations', None), - "sol_per_pop": getattr(args, 'sol_per_pop', None), - "num_parents_mating": getattr(args, 'num_parents_mating', None), - "mutation_percent_genes": getattr(args, 'mutation_percent_genes', None), - "parent_selection_type": getattr(args, 'parent_selection_type', None), - "crossover_type": getattr(args, 'crossover_type', None), - "mutation_type": getattr(args, 'mutation_type', None), - "keep_elitism": getattr(args, 'keep_elitism', None), - "random_seed": getattr(args, 'random_seed', None), + "num_generations": args.num_generations, + "sol_per_pop": args.sol_per_pop, + "num_parents_mating": args.num_parents_mating, + "mutation_percent_genes": args.mutation_percent_genes, + "parent_selection_type": args.parent_selection_type, + "crossover_type": args.crossover_type, + "mutation_type": args.mutation_type, + "keep_elitism": args.keep_elitism, + "random_seed": args.random_seed, }, ) elif args.action == "rollback": diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 02e9095..5f0a9a7 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -11,12 +11,10 @@ def _get_schedules_per_server(server_id, db_cur=None): """Get all schedules for a given server_id.""" - existing_servers = [server[0] for server in scheduler.get_all_server_ids(db_cur)] - if server_id not in existing_servers: raise ValueError(f"server_id not in list of existing servers. Existing servers: {existing_servers}") schedule_ids = [row[0] for row in scheduler.get_all_schedule_ids_per_server(db_cur, server_id)] if not schedule_ids: - raise ValueError(f"No schedules found for server_id {server_id}") + raise ValueError(f"No schedules found for server_id {server_id}. Check if server_id exists and has schedules assigned.") return schedule_ids diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index 0754f1c..804a6fe 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -50,8 +50,8 @@ def _get_average_runtime(self, db_cur): def _determine_start_time_mins(self): """Determine the start time in minutes from midnight from the interval_mask""" - today = datetime.datetime.now().date() - midnight = datetime.datetime.combine(today, datetime.time.min) + today = datetime.datetime.now(datetime.timezone.utc).date() + midnight = datetime.datetime.combine(today, datetime.time.min, tzinfo=datetime.timezone.utc) # Infrequent taps aren't bounded by their frequency but instead shift within the hour # Basing it on the original interval mask prevents creep over multiple optimizations diff --git a/cicada/lib/SmartScheduling/evaluation.py b/cicada/lib/SmartScheduling/evaluation.py index f483342..c83ac83 100644 --- a/cicada/lib/SmartScheduling/evaluation.py +++ b/cicada/lib/SmartScheduling/evaluation.py @@ -1,6 +1,6 @@ import numpy as np from typing import Sequence -from .domain import Schedule +from cicada.lib.SmartScheduling.domain import Schedule def evaluate_usage_and_peak(start_times: Sequence[int], schedules: Sequence[Schedule]): @@ -15,24 +15,19 @@ def evaluate_usage_and_peak(start_times: Sequence[int], schedules: Sequence[Sche """ mins_per_day = 1440 - freqs = [schedule.frequency_minutes for schedule in schedules] - run_times = [schedule.median_runtime_minutes for schedule in schedules] - - diff = np.zeros(mins_per_day + 1, dtype=float) - if not (len(start_times) == len(schedules) == len(freqs) == len(run_times)): - raise ValueError("Length of start_times, schedules, freqs, and run_times must all be the same") - - for i in range(len(start_times)): - if schedules[i].frequency_is_supported() and start_times[i] >= freqs[i]: - raise ValueError(f"Start time should be the earliest it can be for unsupported schedule at index {i}") for i, schedule in enumerate(schedules): if not schedule.frequency_is_supported(): continue - - freq = freqs[i] - run_time = run_times[i] + + freq = schedule.frequency_minutes + if start_times[i] >= freq: + raise ValueError(f"Start time should be the earliest it can be for schedule: {schedule} with start time {start_times[i]} exceeds frequency {freq}") + if freq <= 0: + raise ValueError(f"Unsupported frequency {freq} for schedule {schedule} {schedule.interval_mask} should have been labelled unsupported and caught earlier.") + + run_time = schedule.median_runtime_minutes minute = int(start_times[i]) # Iterate through the day in increments of the schedule's frequency, adding the schedule's usage to the diff array for the duration of its runtime. diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/pygad.py index dbbf7ec..e91d30a 100644 --- a/cicada/lib/SmartScheduling/pygad.py +++ b/cicada/lib/SmartScheduling/pygad.py @@ -1,9 +1,9 @@ from __future__ import annotations from typing import List, Mapping, Optional, Sequence import numpy as np -from .config import GAConfig -from .domain import Schedule -from .evaluation import evaluate_usage_and_peak +from cicada.lib.SmartScheduling.config import GAConfig +from cicada.lib.SmartScheduling.domain import Schedule +from cicada.lib.SmartScheduling.evaluation import evaluate_usage_and_peak import pygad diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index 54c1644..3e2e79b 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -273,7 +273,6 @@ def update_schedule_details_bulk(db_cur, schedule_list): sqlquery = f"UPDATE schedules SET {', '.join(case_clauses)} WHERE schedule_id = ANY(%s)" db_cur.execute(sqlquery, tuple(params)) - return def snapshot_schedules(db_cur, schedule_ids, server_id=None, computed_usage=None, reason=None): diff --git a/docs/genetic-algorithm-process-cycle.png b/docs/genetic_algorithm_process_cycle.png similarity index 100% rename from docs/genetic-algorithm-process-cycle.png rename to docs/genetic_algorithm_process_cycle.png diff --git a/docs/offspring-ga.png b/docs/offspring_ga.png similarity index 100% rename from docs/offspring-ga.png rename to docs/offspring_ga.png diff --git a/docs/Smart Scheduler Technical Overview.md b/docs/smart_scheduler_technical_overview.md similarity index 92% rename from docs/Smart Scheduler Technical Overview.md rename to docs/smart_scheduler_technical_overview.md index b84db87..59c17c4 100644 --- a/docs/Smart Scheduler Technical Overview.md +++ b/docs/smart_scheduler_technical_overview.md @@ -26,17 +26,21 @@ The GA evolves shift offsets for each schedule over multiple generations to find 6. **Rollback System** (`smart_schedule_rollback.py`) — Recovery mechanism +7. **Blocklisting** (`blocklist.py`) - Adding/Removing taps from Blocklist + ### Database Schema Changes **New Tables:** -- **`schedule_backups`** — Audit trail of schedule modifications - - `schedule_id` (PK): unique identifier - - `original_interval_mask`: pristine cron expression (before any optimization) - - `previous_interval_mask`: cron before this optimization run - - `interval_mask`: current cron after optimization - - `snapshot_at`: timestamp of last update (auto-set on INSERT/UPDATE) - - Indexes on `schedule_id` and `server_id` for fast lookups +- **`schedule_backups`** — Interval mask of schedules at different points in time to allow for rolling back + - `schedule_id`: schedule snapshotted + - `snapshot_id`: snapshot identifier + - `server_id`: server_id of the snapshot at that point in time (used to prevent rollback to a previous version which was on a different server) + - `interval_mask`: original interval mask (used to prevent rollback to a previous version which had a different interval_mask) + - `smart_interval_mask`: smart interval mask at that point in time + +- **`snapshot_table`** - Snapshot metadata + -`snapshot_timestamp` - **`schedule_blocklist`** — Excludes schedules from optimization - `schedule_id` (PK): schedule to exclude @@ -89,7 +93,7 @@ Rollback command triggered with server_id or schedule_id ## Genetic Algorithm Details

- +

@@ -114,7 +118,7 @@ Inverse of the peak_usage since it's a minimisation problem. Peak_usage is calcu ### Crossover & Mutation

- +

@@ -123,7 +127,7 @@ Inverse of the peak_usage since it's a minimisation problem. Peak_usage is calcu - **Mutation Type**: Random (randomly select genes and replace with random value from gene space) - **Elitism**: Keep the best solution across generations (default: 1) -The creation of the offsprings uses different methods to change the solutions, however they must remain within the gene limits. For more information checkout the official [PyGAD documentation](https://pypi.org/project/pygad/5.3.0/) as it will be infinitely better than anything I can produce +The creation of the offsprings uses different methods to change the solutions, however they must remain within the gene limits. For more information checkout the official [PyGAD documentation](https://pypi.org/project/pygad) as it will be infinitely better than anything I can produce ### Population Seeding diff --git a/setup/MIGRATION_GUIDE.md b/setup/MIGRATION_GUIDE.md new file mode 100644 index 0000000..a805884 --- /dev/null +++ b/setup/MIGRATION_GUIDE.md @@ -0,0 +1,368 @@ +# Schema Migration Guide + +This guide explains how to add schema changes that work gracefully for both fresh installations and existing deployments. + +## Philosophy + +Cicada's migration strategy ensures: +- **Fresh installs** get the complete schema from `schema.sql` +- **Existing installs** are updated via migration scripts +- **Idempotency** — migrations can be run multiple times safely +- **Rollback awareness** — changes integrate with the existing `schedule_changes` audit trail + +## Pattern: Adding a New Column to `schedules` + +This is the most common migration scenario. Here's the approach: + +### Step 1: Update `setup/schema.sql` + +Add the column definition **within the `CREATE TABLE IF NOT EXISTS public.schedules` statement**: + +```sql +-- In the CREATE TABLE IF NOT EXISTS public.schedules section: +CREATE TABLE IF NOT EXISTS public.schedules +( + -- ... existing columns ... + my_new_column VARCHAR(255) DEFAULT 'default_value', -- Add here + CONSTRAINT schedules_pkey PRIMARY KEY (schedule_id), + -- ... rest of constraints ... +) +WITH (OIDS=FALSE); +``` + +Then, add a defensive `ALTER TABLE` with `IF NOT EXISTS` immediately after the `CREATE TABLE`: + +```sql +-- Add my_new_column if not exists (for existing installations upgrading) +ALTER TABLE public.schedules +ADD COLUMN IF NOT EXISTS my_new_column VARCHAR(255) DEFAULT 'default_value'; +``` + +**Why both?** +- The column in `CREATE TABLE` satisfies fresh installs (cleaner schema) +- The `ALTER TABLE ... IF NOT EXISTS` ensures existing installations get the column without errors + +### Step 2: Create a Versioned Migration Script (Optional but Recommended) + +If the migration is complex or you want a separate, self-contained migration file: + +Create `setup/migrate_YYYYMMDD_description.sql`: + +```sql +/** Migration: Add my_new_column to schedules + Run as cicada user on db_cicada database + Date: 2026-05-21 +**/ +START TRANSACTION; + +-- Add my_new_column to schedules if it doesn't exist +ALTER TABLE public.schedules +ADD COLUMN IF NOT EXISTS my_new_column VARCHAR(255) DEFAULT 'default_value'; + +-- If the migration adds a column with NOT NULL and no default, +-- backfill existing rows first: +-- UPDATE public.schedules SET my_new_column = 'backfill_value' +-- WHERE my_new_column IS NULL; + +COMMIT TRANSACTION; +``` + +**When to create a separate migration:** +- The change requires data backfilling +- The change is complex (e.g., adding constraints, creating indexes, altering types) +- You want an audit trail of when the migration was run +- The migration takes significant time (separate script = easier to monitor) + +**Naming convention:** `migrate_YYYYMMDD_short_description.sql` (e.g., `migrate_20260521_add_priority_column.sql`) + +### Step 3: Update Code to Handle Missing Columns + +In `cicada/lib/scheduler.py` and related modules, make code defensive: + +```python +# Instead of assuming the column exists: +# schedule = result['my_new_column'] # ❌ KeyError on old schema + +# Use getattr with defaults: +schedule = result.get('my_new_column', 'default_value') # ✅ Safe for old and new schema + +# Or use SQL COALESCE for consistent defaults: +SELECT + *, + COALESCE(my_new_column, 'default_value') as my_new_column +FROM schedules; +``` + +### Step 4: Update Tests + +Add fixtures that test **both old and new schema** versions: + +```python +# tests/conftest.py or relevant test file + +@pytest.fixture +def db_with_old_schema(pg_conn): + """Database with old schema (before migration)""" + # Run schema.sql without the new column + # Or mock a result without the column + return pg_conn + +@pytest.fixture +def db_with_new_schema(pg_conn): + """Database with new schema (after migration)""" + # Run full schema.sql with the new column + return pg_conn + +def test_code_handles_missing_column(db_with_old_schema): + # Verify code doesn't crash when column is missing + result = get_schedule_details(schedule_id, db=db_with_old_schema) + assert result['schedule_id'] == schedule_id + # Should not raise KeyError even though my_new_column is missing +``` + +## Pattern: Creating a New Table + +### Example: Adding `schedule_notifications` table + +#### Step 1: Update `schema.sql` + +```sql +-- Table: schedule_notifications +-- New table to track notification settings for schedules +CREATE TABLE IF NOT EXISTS public.schedule_notifications +( + notification_id SERIAL NOT NULL, + schedule_id VARCHAR(255) NOT NULL, + notify_on_failure SMALLINT NOT NULL DEFAULT 1, + notify_email VARCHAR(255), + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + CONSTRAINT schedule_notifications_pkey PRIMARY KEY (notification_id), + CONSTRAINT schedule_notifications_schedule_fkey FOREIGN KEY (schedule_id) + REFERENCES public.schedules (schedule_id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE CASCADE +) +WITH (OIDS=FALSE); + +CREATE INDEX IF NOT EXISTS schedule_notifications_schedule_id_idx + ON public.schedule_notifications + USING btree (schedule_id); +``` + +#### Step 2: Create Migration Script (Recommended) + +```sql +/** Migration: Add schedule_notifications table + Run as cicada user on db_cicada database + Date: 2026-05-21 +**/ +START TRANSACTION; + +CREATE TABLE IF NOT EXISTS public.schedule_notifications +( + notification_id SERIAL NOT NULL, + schedule_id VARCHAR(255) NOT NULL, + notify_on_failure SMALLINT NOT NULL DEFAULT 1, + notify_email VARCHAR(255), + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + CONSTRAINT schedule_notifications_pkey PRIMARY KEY (notification_id), + CONSTRAINT schedule_notifications_schedule_fkey FOREIGN KEY (schedule_id) + REFERENCES public.schedules (schedule_id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE CASCADE +) +WITH (OIDS=FALSE); + +CREATE INDEX IF NOT EXISTS schedule_notifications_schedule_id_idx + ON public.schedule_notifications + USING btree (schedule_id); + +COMMIT TRANSACTION; +``` + +## Pattern: Modifying an Existing Column + +### Example: Making `parameters` column larger + +#### Step 1: Update `schema.sql` + +Document the change in a comment: + +```sql +-- Modified 2026-05-21: Increased from VARCHAR(1024) to VARCHAR(4096) +parameters VARCHAR(4096), +``` + +#### Step 2: Create Migration Script + +```sql +/** Migration: Increase parameters column size + Run as cicada user on db_cicada database + Date: 2026-05-21 +**/ +START TRANSACTION; + +-- Alter the column type +ALTER TABLE public.schedules +ALTER COLUMN parameters TYPE VARCHAR(4096); + +COMMIT TRANSACTION; +``` + +**Note:** Expanding VARCHAR is always safe and fast in PostgreSQL. Contracting requires data validation first. + +## Pattern: Adding an Index + +### Example: Optimize queries on `interval_mask` + +#### Step 1: Update `schema.sql` + +```sql +-- Index: schedules_interval_mask_idx +CREATE INDEX IF NOT EXISTS schedules_interval_mask_idx + ON public.schedules + USING btree (interval_mask); +``` + +#### Step 2: Create Migration Script + +```sql +/** Migration: Add index on schedules.interval_mask + Run as cicada user on db_cicada database + Date: 2026-05-21 +**/ +START TRANSACTION; + +CREATE INDEX IF NOT EXISTS schedules_interval_mask_idx + ON public.schedules + USING btree (interval_mask); + +COMMIT TRANSACTION; +``` + +**Note:** Use `IF NOT EXISTS` so the migration is idempotent. + +## Deployment Workflow + +### For Fresh Installations +1. User runs `setup/schema.sql` → gets complete schema with all columns, tables, indexes + +### For Existing Installations +1. User deploys new code version +2. User applies migration scripts **in order** by date: + ```bash + psql -U cicada -d db_cicada -f setup/migrate_20260501_first_change.sql + psql -U cicada -d db_cicada -f setup/migrate_20260521_second_change.sql + ``` +3. Code continues running (defensive handling of optional columns/tables) + +### Deployment Documentation + +Include in your release notes: + +```markdown +## v2.0.0 - 2026-05-21 + +### Database Migrations Required + +Run migrations **in order**: +```bash +psql -U cicada -d db_cicada -f setup/migrate_20260501_new_feature.sql +psql -U cicada -d db_cicada -f setup/migrate_20260521_add_priority.sql +``` + +### Changes +- Added `priority` column to `schedules` table +- Added new `schedule_notifications` table for notification settings +- Code is backward-compatible; migrations are optional but recommended for full feature support + +### Rollback +If you encounter issues: +- Migrations can be reversed by dropping the new columns/tables (see comments in migration scripts) +- Older code versions will continue to work with the new schema (via defensive defaults) +``` + +## Testing Migrations Locally + +### Setup +```bash +# Create a test database +psql -U postgres -c "CREATE DATABASE db_cicada_test;" +psql -U postgres -d db_cicada_test -c "CREATE USER cicada WITH PASSWORD 'password';" +psql -U postgres -d db_cicada_test -c "GRANT ALL PRIVILEGES ON DATABASE db_cicada_test TO cicada;" +``` + +### Test Fresh Install +```bash +psql -U cicada -d db_cicada_test -f setup/schema.sql +``` + +### Test Migration Path +```bash +# Apply old schema, then run migrations +psql -U cicada -d db_cicada_test -f setup/schema.sql +psql -U cicada -d db_cicada_test -f setup/migrate_20260521_add_priority_column.sql + +# Verify the migration +psql -U cicada -d db_cicada_test -c "SELECT column_name FROM information_schema.columns + WHERE table_name='schedules' AND column_name='priority';" +``` + +## Best Practices + +1. **Always use `IF NOT EXISTS`** for idempotency + - `CREATE TABLE IF NOT EXISTS` + - `CREATE INDEX IF NOT EXISTS` + - `ALTER TABLE ... ADD COLUMN IF NOT EXISTS` + +2. **Set sensible defaults** for new columns + - Avoid `NULL` unless necessary + - Use `DEFAULT` clause in schema + +3. **Wrap migrations in transactions** + ```sql + START TRANSACTION; + -- migration SQL + COMMIT TRANSACTION; + ``` + +4. **Make code defensive** + - Use `dict.get()` with defaults instead of direct key access + - Use SQL `COALESCE()` for optional columns in queries + - Catch exceptions gracefully if a table doesn't exist yet + +5. **Document changes** + - Add comments in `schema.sql` noting when columns were added/modified + - Include the date and reason in migration script headers + - Update `CLAUDE.md` if architectural changes occur + +6. **Test both paths** + - Verify fresh installs work + - Verify migrations work on existing schema + - Verify code handles missing columns gracefully + +## Rollback Guidance + +If a migration causes issues: + +### Reversing Column Additions +```sql +ALTER TABLE public.schedules +DROP COLUMN my_new_column; +``` + +### Reversing Table Creations +```sql +DROP TABLE IF EXISTS public.schedule_notifications CASCADE; +``` + +### Reversing Index Additions +```sql +DROP INDEX IF EXISTS public.schedule_notifications_schedule_id_idx; +``` + +Document these in comments within the migration file for quick reference. + +## References + +- See `setup/schema.sql` for the current complete schema +- See existing patterns like the `smart_interval_mask` column (added in line 96-98) +- See the `schedule_blocklist` table (added with `IF NOT EXISTS` pattern) diff --git a/tests/test_smart_scheduling.py b/tests/test_smart_scheduling.py index cad4f5a..8e8c168 100644 --- a/tests/test_smart_scheduling.py +++ b/tests/test_smart_scheduling.py @@ -27,12 +27,11 @@ def get_env_vars(): pytest.db_user = os.environ.get("DB_POSTGRES_USER") pytest.db_pass = os.environ.get("DB_POSTGRES_PASS") - pytest.db_test = f"pytest_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S_%f')}" - @pytest.fixture() def db_setup(get_env_vars): """db_setup""" + pytest.db_test = f"pytest_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S_%f')}" # Create the test_db pg_conn = psycopg2.connect( @@ -278,6 +277,42 @@ def test_schedule_initialization(self, db_setup): db_cur.close() db_conn.close() + def test_schedule_dataclass_fields_initialized(self, db_setup): + """Test that all dataclass fields are properly initialized, including defaults""" + db_conn, db_cur = get_db_cursor() + try: + schedule_details = { + "schedule_id": "test-id-1", + "server_id": 5, + "interval_mask": "0 * * * *", + } + test_schedule = Schedule(schedule_details, db_cur) + + # Verify all fields exist and have correct default values + assert hasattr(test_schedule, 'shifted') + assert hasattr(test_schedule, 'median_runtime_minutes') + assert hasattr(test_schedule, 'start_time_mins') + assert hasattr(test_schedule, 'blocklisted') + assert hasattr(test_schedule, 'frequency_minutes') + + # Verify default values + assert test_schedule.shifted is False + assert test_schedule.median_runtime_minutes == 5 # Will be updated by _get_average_runtime + assert test_schedule.start_time_mins == 0 + assert test_schedule.blocklisted is False + + # Accessing any of these should NOT raise AttributeError + try: + _ = test_schedule.shifted + _ = test_schedule.median_runtime_minutes + _ = test_schedule.start_time_mins + _ = test_schedule.blocklisted + except AttributeError as e: + pytest.fail(f"AttributeError raised when accessing dataclass field: {e}") + finally: + db_cur.close() + db_conn.close() + def test_schedule_frequency_hourly(self, db_setup): """Test frequency determination for hourly cron""" db_conn, db_cur = get_db_cursor() @@ -753,7 +788,7 @@ def test_smart_scheduling_gene_space_constraints_30_min(self, db_setup): ga_scheduler = GAPyGADScheduler() schedule_details = { - "schedule_id": "test-schedule-4", + "schedule_id": "test-schedule-1", "server_id": 1, "interval_mask": "*/30 * * * *", } @@ -812,6 +847,61 @@ def test_smart_scheduling_gene_space_constraints_daily(self, db_setup): db_cur.close() db_conn.close() + def test_get_schedules_per_server_no_schedules_single_server(self, db_setup): + """Test that _get_schedules_per_server raises ValueError when no schedules exist for a server""" + try: + # Create two servers (omit server_id to use auto-increment) + query_test_db( + """INSERT INTO servers (hostname, fqdn, ip4_address) + VALUES ('test-server-1', 'test-server-1.local', '127.0.0.1'), + ('test-server-2', 'test-server-2.local', '127.0.0.2')""" + ) + + # Add a schedule only to server 1 + query_test_db( + """INSERT INTO schedules + (schedule_id, server_id, interval_mask, exec_command) + VALUES ('schedule-1', 1, '0 * * * *', 'echo test')""" + ) + + # Get a fresh cursor after data insertion + db_conn, db_cur = get_db_cursor() + + # Attempt to get schedules for server 2 without any schedules + with pytest.raises(ValueError, match="No schedules found for server_id 2"): + smart_schedule._get_schedules_per_server(server_id=2, db_cur=db_cur) + + db_cur.close() + db_conn.close() + except Exception as e: + raise e + + def test_main_no_schedules_single_server(self, db_setup, capsys): + """Test that main() handles servers without schedules gracefully (single server)""" + try: + # Create two servers + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server-1', 'test-server-1.local', '127.0.0.1'), + (2, 'test-server-2', 'test-server-2.local', '127.0.0.2')""" + ) + + # Add a schedule only to server 1 + query_test_db( + """INSERT INTO schedules + (schedule_id, server_id, interval_mask, exec_command) + VALUES ('schedule-1', 1, '0 * * * *', 'echo test')""" + ) + + # Call main with server_id 2 (should return early without error) + smart_schedule.main(server_id=2, dbname=pytest.db_test) + + # Verify that ValueError message was printed + captured = capsys.readouterr() + assert "No schedules found for server_id 2" in captured.out + except Exception as e: + raise e + class TestScheduleSnapshots: """Tests for schedule snapshots functionality""" From fa60b1f6f3bf99c08f56c4155a6a5399a9c9196f Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Wed, 27 May 2026 10:55:34 +0100 Subject: [PATCH 41/53] Rename pygad -> GAPyGAD --- cicada/cli.py | 37 ++++++++++--------- cicada/commands/smart_schedule.py | 4 +- .../SmartScheduling/{pygad.py => GAPyGAD.py} | 5 ++- tests/test_smart_scheduling.py | 2 +- 4 files changed, 26 insertions(+), 22 deletions(-) rename cicada/lib/SmartScheduling/{pygad.py => GAPyGAD.py} (99%) diff --git a/cicada/cli.py b/cicada/cli.py index c1b16cd..1c10656 100644 --- a/cicada/cli.py +++ b/cicada/cli.py @@ -374,31 +374,34 @@ def smart_schedule(): args = parser.parse_args(sys.argv[2:]) if args.action == "optimise" or args.action is None: + optimise_args = optimise_parser.parse_args(sys.argv[3:]) smart_schedule.main( - server_id=getattr(args, 'server_id', None), + server_id=optimise_args.server_id, ga_config={ - "num_generations": args.num_generations, - "sol_per_pop": args.sol_per_pop, - "num_parents_mating": args.num_parents_mating, - "mutation_percent_genes": args.mutation_percent_genes, - "parent_selection_type": args.parent_selection_type, - "crossover_type": args.crossover_type, - "mutation_type": args.mutation_type, - "keep_elitism": args.keep_elitism, - "random_seed": args.random_seed, + "num_generations": optimise_args.num_generations, + "sol_per_pop": optimise_args.sol_per_pop, + "num_parents_mating": optimise_args.num_parents_mating, + "mutation_percent_genes": optimise_args.mutation_percent_genes, + "parent_selection_type": optimise_args.parent_selection_type, + "crossover_type": optimise_args.crossover_type, + "mutation_type": optimise_args.mutation_type, + "keep_elitism": optimise_args.keep_elitism, + "random_seed": optimise_args.random_seed, }, ) elif args.action == "rollback": + rollback_args = rollback_parser.parse_args(sys.argv[3:]) smart_schedule_rollback.main( - server_id=getattr(args, 'server_id', None), - schedule_id=getattr(args, 'schedule_id', None), - full=getattr(args, 'full', False), - previous=getattr(args, 'previous', False)) + server_id=rollback_args.server_id, + schedule_id=rollback_args.schedule_id, + full=rollback_args.full, + previous=rollback_args.previous) elif args.action == "blocklist": + blocklist_args = blocklist_parser.parse_args(sys.argv[3:]) blocklist_schedule_cmd.main( - schedule_id=args.schedule_id, - remove=getattr(args, 'remove', False), - reason=getattr(args, 'reason', None), + schedule_id=blocklist_args.schedule_id, + remove=blocklist_args.remove, + reason=blocklist_args.reason, ) @staticmethod diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 5f0a9a7..11e6116 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -6,7 +6,7 @@ from croniter import croniter from cicada.lib import postgres, utils from cicada.lib import scheduler -from cicada.lib.SmartScheduling import pygad +from cicada.lib.SmartScheduling.GAPyGAD import GAPyGADScheduler from cicada.lib.SmartScheduling.domain import Schedule def _get_schedules_per_server(server_id, db_cur=None): @@ -175,7 +175,7 @@ def optimise(db_cur, server_id=None, ga_config=None): try: print("\n------------Starting Optimisation-----------------") print("Running PyGAD solver ...") - ga = pygad.GAPyGADScheduler(config=ga_config) + ga = GAPyGADScheduler(config=ga_config) optimised_schedules, __, peak_usage, __, initial_fitness = ga.solve(schedules) print(f"Optimized schedule for server_id {server_id}: new peak usage {peak_usage}") diff --git a/cicada/lib/SmartScheduling/pygad.py b/cicada/lib/SmartScheduling/GAPyGAD.py similarity index 99% rename from cicada/lib/SmartScheduling/pygad.py rename to cicada/lib/SmartScheduling/GAPyGAD.py index e91d30a..be2f75f 100644 --- a/cicada/lib/SmartScheduling/pygad.py +++ b/cicada/lib/SmartScheduling/GAPyGAD.py @@ -1,10 +1,11 @@ from __future__ import annotations from typing import List, Mapping, Optional, Sequence import numpy as np +import pygad + from cicada.lib.SmartScheduling.config import GAConfig from cicada.lib.SmartScheduling.domain import Schedule from cicada.lib.SmartScheduling.evaluation import evaluate_usage_and_peak -import pygad class GAPyGADScheduler: @@ -58,7 +59,7 @@ def _gene_space(self, schedules: Sequence[Schedule]) -> List[List[int]]: max_start_times[i] = schedule.frequency_minutes return [list(range(min_start_time, max_start_time)) for min_start_time, max_start_time in zip(min_start_times, max_start_times)] - + def _initial_population(self, schedules: Sequence[Schedule], gene_space: List[List[int]]) -> np.ndarray: rng = np.random.default_rng(self.cfg.random_seed) diff --git a/tests/test_smart_scheduling.py b/tests/test_smart_scheduling.py index 8e8c168..16ec185 100644 --- a/tests/test_smart_scheduling.py +++ b/tests/test_smart_scheduling.py @@ -12,7 +12,7 @@ from cicada.lib.SmartScheduling.config import GAConfig from cicada.lib.SmartScheduling.evaluation import evaluate_usage_and_peak import cicada.commands.smart_schedule as smart_schedule -from cicada.lib.SmartScheduling.pygad import GAPyGADScheduler +from cicada.lib.SmartScheduling.GAPyGAD import GAPyGADScheduler from cicada.lib import scheduler From b589fb2b6147ff59818c83731f811d6823527e97 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Wed, 27 May 2026 11:23:15 +0100 Subject: [PATCH 42/53] Replace genespace with dictionary --- cicada/lib/SmartScheduling/GAPyGAD.py | 45 ++++++++++++--------------- tests/test_smart_scheduling.py | 14 +++------ 2 files changed, 24 insertions(+), 35 deletions(-) diff --git a/cicada/lib/SmartScheduling/GAPyGAD.py b/cicada/lib/SmartScheduling/GAPyGAD.py index be2f75f..182e247 100644 --- a/cicada/lib/SmartScheduling/GAPyGAD.py +++ b/cicada/lib/SmartScheduling/GAPyGAD.py @@ -32,34 +32,29 @@ def __init__(self, config: Optional[Mapping[str, object]] = None): self.cfg = GAConfig(**filtered_config) - def _gene_space(self, schedules: Sequence[Schedule]) -> List[List[int]]: - # Build gene_space per schedule: each gene space is limited by it's frequency - # Unless the schedule is unsupported (either blocklisted, irregular or has frequency greater than 60 mins) in which case we set the gene space to be just 0 - # so they remain unchanged in the GA but are still included in the fitness evaluation. Also constrain schedules with frequency > 60 mins to an hour to prevent - # large shifts and huge gene spaces. - - min_start_times = [0] * len(schedules) - max_start_times = [1] * len(schedules) + def _gene_space(self, schedules: Sequence[Schedule]) -> List[dict]: + # Build gene_space per schedule: each gene space is limited by its frequency + # Unless the schedule is unsupported (either blocklisted, irregular or has frequency greater than 60 mins), + # in which case we fix the gene space so it remains unchanged in the GA but is still included in fitness evaluation. + # Constrain schedules with frequency > 60 mins to an hour to prevent large shifts. + + gene_space = [] mins_per_day = 1440 - for i, schedule in enumerate(schedules): - # Fix the gene space so they're still included in the fitness eval but remain unshifted + for schedule in schedules: if schedule.is_unsupported(): - min_start_times[i] = schedule.start_time_mins - max_start_times[i] = schedule.start_time_mins + 1 - - # Limit gene space to only shift within the hour for the schedules which run less frequently + # Fix gene space to current start time (no shift allowed) + gene_space.append({"low": schedule.start_time_mins, "high": schedule.start_time_mins}) elif schedule.frequency_minutes > 60: - # Prevent any max_start_time from going beyond the day limit - max_start_times[i] = min(schedule.start_time_mins + 60, mins_per_day) - min_start_times[i] = max_start_times[i] - 60 - - # Gene space for the rest is just the frequency + # Shift within the hour, clamped to day limit + high = min(schedule.start_time_mins + 59, mins_per_day) + low = max(high - 59, 0) + gene_space.append({"low": low, "high": high}) else: - max_start_times[i] = schedule.frequency_minutes - - return [list(range(min_start_time, max_start_time)) for min_start_time, max_start_time in zip(min_start_times, max_start_times)] + # Shift within frequency range + gene_space.append({"low": 0, "high": schedule.frequency_minutes - 1}) + return gene_space def _initial_population(self, schedules: Sequence[Schedule], gene_space: List[List[int]]) -> np.ndarray: rng = np.random.default_rng(self.cfg.random_seed) @@ -69,12 +64,12 @@ def _initial_population(self, schedules: Sequence[Schedule], gene_space: List[Li for i, schedule in enumerate(schedules): gs = gene_space[i] s = int(schedule.start_time_mins) - seed.append(max(min(s, gs[-1]), gs[0])) + seed.append(max(min(s, gs["high"]), gs["low"])) pop = [seed] # Populate the rest of the initial population randomly within the gene space limits for each schedule for _ in range(self.cfg.sol_per_pop - 1): - pop.append([gene_space[i][int(rng.integers(0, len(gene_space[i])))] for i in range(len(schedules))]) + pop.append([int(rng.integers(gene_space[i]["low"], gene_space[i]["high"] + 1)) for i in range(len(schedules))]) return np.asarray(pop, dtype=int) def fitness_fn(self, ga, solution, solution_idx): @@ -123,7 +118,7 @@ def solve(self, schedules: Sequence[Schedule]) -> tuple[Sequence[Schedule], List # Update schedule objects start_time_mins attribute based on GA solution for i, schedule in enumerate(schedules): - if not (start_times[i] >= gene_space[i][0] and start_times[i] <= gene_space[i][-1]): + if not (start_times[i] >= gene_space[i]["low"] and start_times[i] <= gene_space[i]["high"]): raise RuntimeError(f"Start time for schedule {schedule.schedule_id} is out of gene space bounds. Start time: {start_times[i]}, Gene space: {gene_space[i]}") if schedule.is_unsupported() and start_times[i] != schedule.start_time_mins: raise RuntimeError(f"Unsupported schedule {schedule.schedule_id} should not have been shifted in the GA solution. {schedule.start_time_mins} != {start_times[i]}") diff --git a/tests/test_smart_scheduling.py b/tests/test_smart_scheduling.py index 16ec185..e51a67e 100644 --- a/tests/test_smart_scheduling.py +++ b/tests/test_smart_scheduling.py @@ -796,13 +796,13 @@ def test_smart_scheduling_gene_space_constraints_30_min(self, db_setup): gene_space = ga_scheduler._gene_space([test_schedule]) test_schedule.shifted = True - test_schedule.start_time_mins = gene_space[0][-1] + test_schedule.start_time_mins = gene_space[0]["high"] smart_schedule._update_schedule_cron(test_schedule) assert test_schedule.smart_interval_mask == "29-59/30 * * * *" assert croniter.croniter.is_valid(test_schedule.smart_interval_mask) assert test_schedule.frequency_minutes == 30 - test_schedule.start_time_mins = gene_space[0][1] + test_schedule.start_time_mins = gene_space[0]["low"] + 1 smart_schedule._update_schedule_cron(test_schedule) assert test_schedule.smart_interval_mask == "1-59/30 * * * *" assert croniter.croniter.is_valid(test_schedule.smart_interval_mask) @@ -826,23 +826,17 @@ def test_smart_scheduling_gene_space_constraints_daily(self, db_setup): gene_space = ga_scheduler._gene_space([test_schedule]) test_schedule.shifted = True - test_schedule.start_time_mins = gene_space[0][-1] + test_schedule.start_time_mins = gene_space[0]["high"] smart_schedule._update_schedule_cron(test_schedule) assert test_schedule.smart_interval_mask == "29 9 * * *" assert croniter.croniter.is_valid(test_schedule.smart_interval_mask) assert test_schedule.frequency_minutes == 1440 - test_schedule.start_time_mins = gene_space[0][0] + test_schedule.start_time_mins = gene_space[0]["low"] smart_schedule._update_schedule_cron(test_schedule) assert test_schedule.smart_interval_mask == "30 8 * * *" assert croniter.croniter.is_valid(test_schedule.smart_interval_mask) assert test_schedule.frequency_minutes == 1440 - - test_schedule.start_time_mins = gene_space[0][1] - smart_schedule._update_schedule_cron(test_schedule) - assert test_schedule.smart_interval_mask == "31 8 * * *" - assert croniter.croniter.is_valid(test_schedule.smart_interval_mask) - assert test_schedule.frequency_minutes == 1440 finally: db_cur.close() db_conn.close() From 3cb815c61bdb5cef20c892fe042cc6083c02590d Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Wed, 27 May 2026 11:32:54 +0100 Subject: [PATCH 43/53] Change bulk update to chain multiple insert statements togerther rather than conditional case-based query --- cicada/lib/scheduler.py | 42 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index 3e2e79b..34e5f5c 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -236,45 +236,35 @@ def update_schedule_details(db_cur, schedule_details): def update_schedule_details_bulk(db_cur, schedule_list): - """Update multiple schedules in a single bulk query.""" + """Update multiple schedules with individual UPDATE statements in a single execute call.""" if not schedule_list: return - columns_to_update = set() + statements = [] + params = [] + for schedule in schedule_list: - columns_to_update.update(k for k, v in schedule.items() if k != "schedule_id" and v is not None) + updates = {k: v for k, v in schedule.items() if k != "schedule_id" and v is not None} - if not columns_to_update: - print("No fields to update for any schedules. Bulk update skipped.") - return + if not updates: + continue - case_clauses = [] - params = [] + set_clause = ", ".join([f"{col} = %s" for col in sorted(updates.keys())]) + statement = f"UPDATE schedules SET {set_clause} WHERE schedule_id = %s" + statements.append(statement) - # Construct CASE statements for each column to update - for col in sorted(columns_to_update): - case_parts = [] - for schedule in schedule_list: - if col in schedule and schedule[col] is not None: - params.append(schedule['schedule_id']) - params.append(schedule[col]) - case_parts.append("WHEN schedule_id = %s THEN %s") + for col in sorted(updates.keys()): + params.append(updates[col]) + params.append(schedule['schedule_id']) - if case_parts: - case_clauses.append(f"{col} = CASE {' '.join(case_parts)} ELSE {col} END") - - if not case_clauses: + if not statements: + print("No fields to update for any schedules. Bulk update skipped.") return - # Add schedule_ids to params - schedule_ids = [s['schedule_id'] for s in schedule_list] - params.append(schedule_ids) - - sqlquery = f"UPDATE schedules SET {', '.join(case_clauses)} WHERE schedule_id = ANY(%s)" + sqlquery = "; ".join(statements) db_cur.execute(sqlquery, tuple(params)) - def snapshot_schedules(db_cur, schedule_ids, server_id=None, computed_usage=None, reason=None): """Create a snapshot of specific schedules with the same snapshot_id. From caa75992e840181fcc31fea692f8b705360b0b0f Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Wed, 27 May 2026 12:48:35 +0100 Subject: [PATCH 44/53] Fix cleanup queries and add tests for it --- cicada/commands/smart_schedule.py | 2 +- cicada/commands/smart_schedule_rollback.py | 1 + cicada/lib/scheduler.py | 37 ++++++++++++++-------- tests/test_smart_scheduling.py | 29 +++++++++++++++++ 4 files changed, 55 insertions(+), 14 deletions(-) diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 11e6116..386c2c8 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -195,7 +195,7 @@ def optimise(db_cur, server_id=None, ga_config=None): print("--------------------------------------------------\n") else: print(f"No improvement found for server_id {server_id}. Current peak usage: {initial_fitness}, Optimized peak usage: {peak_usage}. No schedule updates will be made.") - print("--------------------------------------------------\n") + print("--------------------------------------------------\n") except Exception as e: print(f"Error during optimization for server_id {server_id}: {e}") diff --git a/cicada/commands/smart_schedule_rollback.py b/cicada/commands/smart_schedule_rollback.py index 4d802a2..34af6cc 100644 --- a/cicada/commands/smart_schedule_rollback.py +++ b/cicada/commands/smart_schedule_rollback.py @@ -86,6 +86,7 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn _rollback_to_previous_snapshot(db_cur, server_id=server_id[0]) else: _rollback_to_previous_snapshot(db_cur, server_id=server_id) + print("--------------------------------------------------\n") except Exception as e: print(f"Error during rollback: {e}") diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index 34e5f5c..b05c553 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -293,21 +293,32 @@ def snapshot_schedules(db_cur, schedule_ids, server_id=None, computed_usage=None FROM schedules WHERE schedule_id = ANY(%s) """ db_cur.execute(sqlquery, (snapshot_id, schedule_ids)) + # Clean up old snapshots (keep last 5) + min_snapshot_query = """ + SELECT snapshot_id FROM snapshots + WHERE server_id = %s + ORDER BY snapshot_id DESC + LIMIT 1 OFFSET 4 + """ + + db_cur.execute(min_snapshot_query, (server_id,)) + result = db_cur.fetchone() + min_snapshot_to_keep = result[0] if result else 0 - # Clean up old snapshots (keep last 5 per schedule_id) cleanup_backups_query = """ - DELETE FROM schedule_backups sb - DELETE FROM snapshots s - WHERE sb.schedule_id = ANY(%s) - AND sb.snapshot_id NOT IN ( - SELECT snapshot_id FROM schedule_backups - WHERE schedule_id = sb.schedule_id - ORDER BY snapshot_id DESC - LIMIT 5 - ) - AND s.snapshot_id = sb.snapshot_id - """ - print(f"Updated schedule_backups table for server {server_id}") + DELETE FROM schedule_backups + WHERE snapshot_id < %s + AND server_id = %s + """ + db_cur.execute(cleanup_backups_query, (min_snapshot_to_keep, server_id)) + print(f"\nCleaned up old schedule_backups for server_id {server_id} in schedule_backups table") + cleanup_snapshots_query = """ + DELETE FROM snapshots + WHERE snapshot_id < %s + AND server_id = %s + """ + db_cur.execute(cleanup_snapshots_query, (min_snapshot_to_keep, server_id)) + print(f"Cleaned up old snapshots for server_id {server_id} in snapshots table") def get_schedule_executable(db_cur, schedule_id): diff --git a/tests/test_smart_scheduling.py b/tests/test_smart_scheduling.py index e51a67e..af666dc 100644 --- a/tests/test_smart_scheduling.py +++ b/tests/test_smart_scheduling.py @@ -1013,6 +1013,35 @@ def test_restore_previous_schedules(self, db_setup): scheduler.restore_previous_schedules(db_cur, snapshot_id=prev_snapshot_id, server_id=1) assert query_test_db("SELECT smart_interval_mask FROM schedules WHERE schedule_id = 'test-schedule-1'")[0][0] == "30 * * * *" + finally: + db_cur.close() + db_conn.close() + + + def test_snapshot_cleanup(self, db_setup): + """Test that snapshot limits are enforced and old snapshots are deleted""" + db_conn, db_cur = get_db_cursor() + try: + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server', 'test-server.local', 'H')""" + ) + query_test_db( + """INSERT INTO schedules (schedule_id, server_id, interval_mask, smart_interval_mask, exec_command) + VALUES ('test-schedule-1', 1, '0 * * * *', '30 * * * *', 'echo test')""" + ) + # Create more than 5 snapshots to trigger deletion of old snapshots + for i in range(7): + scheduler.snapshot_schedules(db_cur, ["test-schedule-1"], server_id=1) + + # Verify that only the 5 most recent snapshots remain + snapshot_count = query_test_db("SELECT COUNT(*) FROM snapshots WHERE server_id = 1")[0][0] + assert snapshot_count == 5 + oldest_snapshot_id = query_test_db("SELECT snapshot_id FROM snapshots WHERE server_id = 1 ORDER BY snapshot_timestamp ASC LIMIT 1")[0][0] + assert oldest_snapshot_id == 3 + oldest_snapshot_id = query_test_db("SELECT snapshot_id FROM schedule_backups WHERE server_id = 1 ORDER BY snapshot_id ASC LIMIT 1")[0][0] + assert oldest_snapshot_id == 3 + finally: db_cur.close() db_conn.close() \ No newline at end of file From 1a36f3bea61bd2c9f7eda6ef2a0ba8aa2e60cda7 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Wed, 27 May 2026 15:11:18 +0100 Subject: [PATCH 45/53] Updates to reset_schedule_backsup and snapshot_schedules --- cicada/commands/smart_schedule.py | 3 +- cicada/commands/smart_schedule_rollback.py | 3 +- cicada/lib/scheduler.py | 41 +++++++++++----------- tests/test_smart_scheduling.py | 13 ++++--- 4 files changed, 28 insertions(+), 32 deletions(-) diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 386c2c8..c149136 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -185,8 +185,7 @@ def optimise(db_cur, server_id=None, ga_config=None): print("\n-------------Updating Schedules------------------") db_cur.execute("BEGIN;") _assign_new_schedules(optimised_schedules, db_cur=db_cur) - optimised_schedule_ids = [schedule.schedule_id for schedule in optimised_schedules if schedule.shifted] - scheduler.snapshot_schedules(db_cur, optimised_schedule_ids, server_id=server_id, computed_usage=peak_usage, reason='Smart Schedule Optimization') + scheduler.snapshot_schedules(db_cur, server_id=server_id, computed_usage=peak_usage, reason='Smart Schedule Optimization') db_cur.execute("COMMIT;") except Exception as e: db_cur.execute("ROLLBACK;") diff --git a/cicada/commands/smart_schedule_rollback.py b/cicada/commands/smart_schedule_rollback.py index 34af6cc..9728dd4 100644 --- a/cicada/commands/smart_schedule_rollback.py +++ b/cicada/commands/smart_schedule_rollback.py @@ -71,8 +71,7 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn print("Full rollback successful\n") server_ids = [server_id] if server_id else [server[0] for server in scheduler.get_all_server_ids(db_cur)] for server in server_ids: - schedule_ids = [row[0] for row in scheduler.get_all_schedule_ids_per_server(db_cur, server)] - scheduler.snapshot_schedules(db_cur, schedule_ids=schedule_ids, server_id=server, reason='Full Rollback') + scheduler.snapshot_schedules(db_cur, server_id=server, reason='Full Rollback') db_cur.execute("COMMIT;") except Exception as e: db_cur.execute("ROLLBACK;") diff --git a/cicada/lib/scheduler.py b/cicada/lib/scheduler.py index b05c553..da8599b 100644 --- a/cicada/lib/scheduler.py +++ b/cicada/lib/scheduler.py @@ -263,9 +263,10 @@ def update_schedule_details_bulk(db_cur, schedule_list): sqlquery = "; ".join(statements) db_cur.execute(sqlquery, tuple(params)) + print(f"\nBulk updated {len(statements)} schedules") -def snapshot_schedules(db_cur, schedule_ids, server_id=None, computed_usage=None, reason=None): +def snapshot_schedules(db_cur, server_id=None, computed_usage=None, reason=None): """Create a snapshot of specific schedules with the same snapshot_id. Args: @@ -275,8 +276,6 @@ def snapshot_schedules(db_cur, schedule_ids, server_id=None, computed_usage=None computed_usage: Computed usage for the snapshot reason: Optional reason/context for the snapshot """ - if not schedule_ids: - raise ValueError("schedule_ids list cannot be empty") if not server_id: raise ValueError("server_id must be provided for snapshot") @@ -290,9 +289,10 @@ def snapshot_schedules(db_cur, schedule_ids, server_id=None, computed_usage=None sqlquery = """ INSERT INTO schedule_backups (schedule_id, server_id, interval_mask, smart_interval_mask, snapshot_id) SELECT schedule_id, server_id, interval_mask, smart_interval_mask, %s - FROM schedules WHERE schedule_id = ANY(%s) + FROM schedules WHERE server_id = %s """ - db_cur.execute(sqlquery, (snapshot_id, schedule_ids)) + db_cur.execute(sqlquery, (snapshot_id, server_id)) + # Clean up old snapshots (keep last 5) min_snapshot_query = """ SELECT snapshot_id FROM snapshots @@ -555,9 +555,8 @@ def restore_previous_schedules(db_cur, server_id, snapshot_id): raise ValueError("snapshot_id is required to restore previous schedules") schedule_ids = get_all_schedule_ids_per_server(db_cur, server_id) - - print(f"Restoring schedules for server_id {server_id} from snapshot_id {snapshot_id}") - print("Skipping any schedules that aren't in the snapshot or have a different interval mask...") + print(f"{len(schedule_ids)} schedules found for server_id {server_id}") + print("Restoring schedules from snapshot...") sqlquery = """ UPDATE schedules SET smart_interval_mask = schedule_backups.smart_interval_mask @@ -567,9 +566,9 @@ def restore_previous_schedules(db_cur, server_id, snapshot_id): AND schedule_backups.snapshot_id = %s AND schedules.interval_mask = schedule_backups.interval_mask """ - db_cur.execute(sqlquery, (server_id, snapshot_id)) - print(f"{len(schedule_ids)} Schedules restored") - return + db_cur.execute(sqlquery, (server_id, snapshot_id)) + rows_updated = db_cur.rowcount + print(f"{rows_updated} Schedules restored") def get_blocklisted_schedule_ids(db_cur): @@ -582,20 +581,20 @@ def get_blocklisted_schedule_ids(db_cur): def reset_schedule_backups(db_cur, snapshot_id=None, schedule_id=None): """Reset schedule_backups table by deleting all entries""" - sqlquery_backups = "DELETE FROM schedule_backups WHERE 1=1" - sqlquery_snapshots = "DELETE FROM snapshots WHERE 1=1" + if not snapshot_id and not schedule_id: + raise ValueError("Either snapshot_id or schedule_id must be provided to reset schedule_backups") + if snapshot_id and schedule_id: + raise ValueError("Cannot specify both snapshot_id and schedule_id to reset schedule_backups") + if schedule_id: - sqlquery_backups += " AND schedule_id = %s" + sqlquery_backups = "DELETE FROM schedule_backups WHERE schedule_id = %s" db_cur.execute(sqlquery_backups, (schedule_id,)) - elif snapshot_id: - sqlquery_backups += " AND snapshot_id = %s" - sqlquery_snapshots += " AND snapshot_id = %s" + if snapshot_id: + sqlquery_backups = " DELETE FROM schedule_backups WHERE snapshot_id = %s" + sqlquery_snapshots = " DELETE FROM snapshots WHERE snapshot_id = %s" db_cur.execute(sqlquery_backups, (snapshot_id,)) db_cur.execute(sqlquery_snapshots, (snapshot_id,)) - else: - db_cur.execute(sqlquery_backups) - db_cur.execute(sqlquery_snapshots) - return + def blocklist_schedule(db_cur, schedule_id, reason=None): """Add a schedule_id to the blocklist""" diff --git a/tests/test_smart_scheduling.py b/tests/test_smart_scheduling.py index af666dc..e6c3811 100644 --- a/tests/test_smart_scheduling.py +++ b/tests/test_smart_scheduling.py @@ -558,7 +558,7 @@ def test_snapshot_schedules_basic(self, db_setup): ) # Snapshot the schedule - scheduler.snapshot_schedules(db_cur, ["test-sched-1"], reason="Test optimization", server_id = 1) + scheduler.snapshot_schedules(db_cur, server_id=1, reason="Test optimization") # Verify snapshot was created snapshots = query_test_db("SELECT snapshot_id FROM snapshots WHERE reason = 'Test optimization'") @@ -647,8 +647,7 @@ def test_snapshot_schedules(self, db_setup): VALUES ('sched-2', 1, '*/30 * * * *', '*/30 * * * *', 'echo test')""" ) - schedule_ids = ["sched-1", "sched-2"] - scheduler.snapshot_schedules(db_cur, schedule_ids, reason="Test optimization", server_id = 1) + scheduler.snapshot_schedules(db_cur, server_id=1, reason="Test optimization") # Verify that snapshots were created snapshots = query_test_db("SELECT snapshot_id FROM snapshots WHERE reason = 'Test optimization'") @@ -917,7 +916,7 @@ def test_snapshot_schedules(self, db_setup): ) # Snapshot the schedules - scheduler.snapshot_schedules(db_cur, schedule_ids, server_id = 1, reason="Test optimization") + scheduler.snapshot_schedules(db_cur, server_id = 1, reason="Test optimization") # Verify snapshot was created snapshot_result = query_test_db("SELECT snapshot_id, server_id FROM snapshots WHERE reason = 'Test optimization'") @@ -997,13 +996,13 @@ def test_restore_previous_schedules(self, db_setup): """INSERT INTO schedules (schedule_id, server_id, interval_mask, smart_interval_mask, exec_command) VALUES ('test-schedule-1', 1, '0 * * * *', '30 * * * *', 'echo test')""" ) - scheduler.snapshot_schedules(db_cur, ["test-schedule-1"], reason="Test optimization", server_id=1) + scheduler.snapshot_schedules(db_cur, server_id=1, reason="Test optimization") assert query_test_db("SELECT smart_interval_mask FROM schedule_backups WHERE schedule_id = 'test-schedule-1'")[0][0] == "30 * * * *" assert query_test_db("SELECT COUNT(*) FROM schedule_backups WHERE schedule_id = 'test-schedule-1'")[0][0] == 1 query_test_db("UPDATE schedules SET smart_interval_mask = '45 * * * *' WHERE schedule_id = 'test-schedule-1'") - scheduler.snapshot_schedules(db_cur, ["test-schedule-1"], reason="Test optimization", server_id=1) + scheduler.snapshot_schedules(db_cur, server_id=1, reason="Test optimization") assert query_test_db("SELECT server_id FROM snapshots ORDER BY snapshot_id DESC LIMIT 1")[0][0] == 1 assert query_test_db("SELECT smart_interval_mask FROM schedule_backups WHERE schedule_id = 'test-schedule-1' ORDER BY snapshot_id DESC LIMIT 1")[0][0] == "45 * * * *" assert query_test_db("SELECT COUNT(*) FROM schedule_backups WHERE schedule_id = 'test-schedule-1'")[0][0] == 2 @@ -1032,7 +1031,7 @@ def test_snapshot_cleanup(self, db_setup): ) # Create more than 5 snapshots to trigger deletion of old snapshots for i in range(7): - scheduler.snapshot_schedules(db_cur, ["test-schedule-1"], server_id=1) + scheduler.snapshot_schedules(db_cur, server_id=1) # Verify that only the 5 most recent snapshots remain snapshot_count = query_test_db("SELECT COUNT(*) FROM snapshots WHERE server_id = 1")[0][0] From 2380c86cf3d54267e5640e438e0da9075aae58c9 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Thu, 28 May 2026 13:33:26 +0100 Subject: [PATCH 46/53] Remove dataclass decorator from Schedule class --- CLAUDE.md | 1 - cicada/commands/smart_schedule.py | 9 +- cicada/commands/upsert_schedule.py | 1 + cicada/lib/SmartScheduling/config.py | 3 +- cicada/lib/SmartScheduling/domain.py | 32 ++- tests/test_smart_scheduling.py | 406 +++++++++++++++++++++++++-- 6 files changed, 409 insertions(+), 43 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index c43d282..6f26684 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -84,7 +84,6 @@ Located in `cicada/lib/SmartScheduling/` **`config.py`** - `GAConfig` dataclass: hyperparameters for the genetic algorithm - `num_generations`, `sol_per_pop`, `mutation_percent_genes`, etc. - - `blocklist_schedule_ids`: list of schedule IDs to exclude from optimization **`pygad.py`** - Wraps the external `pygad` library (genetic algorithm) diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index c149136..a3076f5 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -35,7 +35,14 @@ def _create_schedule_objects(schedule_ids, db_cur): details['blocklisted'] = False try: - schedule = Schedule(details, db_cur) + schedule = Schedule( + schedule_id = details['schedule_id'], + server_id = details['server_id'], + interval_mask = details['interval_mask'], + smart_interval_mask = details.get('smart_interval_mask'), + blocklisted = details.get('blocklisted', False), + db_cur = db_cur + ) # Ignore the few schedules that have irregular cron expressions for now. # There are few enough that this shouldn't impact the optimisation and is not worth the effort to try and support these in the GA if not schedule.is_regular_schedule(): diff --git a/cicada/commands/upsert_schedule.py b/cicada/commands/upsert_schedule.py index e430eb7..919644c 100644 --- a/cicada/commands/upsert_schedule.py +++ b/cicada/commands/upsert_schedule.py @@ -79,6 +79,7 @@ def main(schedule_details, dbname=None): new_schedule_details["schedule_group_id"] = schedule_details["schedule_group_id"] scheduler.update_schedule_details(db_cur, new_schedule_details) + print(tabulate(new_schedule_details.items(), ["Detail", "Value"], tablefmt="psql")) db_cur.close() db_conn.close() diff --git a/cicada/lib/SmartScheduling/config.py b/cicada/lib/SmartScheduling/config.py index ea790da..07593c1 100644 --- a/cicada/lib/SmartScheduling/config.py +++ b/cicada/lib/SmartScheduling/config.py @@ -12,5 +12,4 @@ class GAConfig: crossover_type: str = "uniform" mutation_type: str = "random" keep_elitism: int = 2 - random_seed: Optional[int] = None - blocklist_schedule_ids: Optional[List[str]] = field(default_factory=list) \ No newline at end of file + random_seed: Optional[int] = None \ No newline at end of file diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/SmartScheduling/domain.py index 804a6fe..4094f9c 100644 --- a/cicada/lib/SmartScheduling/domain.py +++ b/cicada/lib/SmartScheduling/domain.py @@ -1,5 +1,4 @@ from __future__ import annotations -from dataclasses import dataclass import math from typing import Optional from croniter import croniter @@ -7,29 +6,34 @@ from cicada.lib.scheduler import get_median_run_time -@dataclass(frozen=False) class Schedule: schedule_id: str server_id: int - interval_mask: str + interval_mask: str frequency_minutes: int - median_runtime_minutes: int = 5 - shifted: bool = False - start_time_mins: Optional[int] = 0 - blocklisted: bool = False + median_runtime_minutes: int + shifted: bool + start_time_mins: Optional[int] + blocklisted: bool - def __init__(self, details, db_cur): - self.schedule_id = details['schedule_id'] - self.server_id = details['server_id'] - self.interval_mask = details['interval_mask'] - self.current_interval_mask = details.get('smart_interval_mask') if details.get('smart_interval_mask') is not None else self.interval_mask + def __init__(self, schedule_id: str, server_id: int, interval_mask: str, smart_interval_mask: Optional[str] = None, blocklisted: bool = False, db_cur=None): + self.schedule_id = schedule_id + self.server_id = server_id + self.interval_mask = interval_mask + self.smart_interval_mask = smart_interval_mask + self.blocklisted = blocklisted self.determine_attributes(db_cur) - if details.get('blocklisted') is not None: - self.blocklisted = details.get('blocklisted') def determine_attributes(self, db_cur): """Determine frequency and average runtime from interval_mask and scheduler module""" + + self.shifted = False + self.current_interval_mask = ( + self.smart_interval_mask + if self.smart_interval_mask is not None + else self.interval_mask + ) self._determine_frequency() self._determine_start_time_mins() self._get_average_runtime(db_cur) diff --git a/tests/test_smart_scheduling.py b/tests/test_smart_scheduling.py index e6c3811..4d78c35 100644 --- a/tests/test_smart_scheduling.py +++ b/tests/test_smart_scheduling.py @@ -151,8 +151,17 @@ def test_evaluate_single_schedule_no_overlap(self, db_setup): "schedule_id": 1, "server_id": 1, "interval_mask": "0 * * * *", + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_details, db_cur) + test_schedule = Schedule( + schedule_id=schedule_details['schedule_id'], + server_id=schedule_details['server_id'], + interval_mask=schedule_details['interval_mask'], + smart_interval_mask=schedule_details.get('smart_interval_mask'), + blocklisted=schedule_details.get('blocklisted'), + db_cur=db_cur + ) start_blocks = [0] usage, peak = evaluate_usage_and_peak(start_blocks, [test_schedule]) @@ -175,8 +184,17 @@ def test_evaluate_multiple_schedules_no_overlap(self, db_setup): "schedule_id": 1, "server_id": 1, "interval_mask": "0 * * * *", + "smart_interval_mask": None, + "blocklisted": False } - schedule1 = Schedule(schedule1_details, db_cur) + schedule1 = Schedule( + schedule_id=schedule1_details['schedule_id'], + server_id=schedule1_details['server_id'], + interval_mask=schedule1_details['interval_mask'], + smart_interval_mask=schedule1_details.get('smart_interval_mask'), + blocklisted=schedule1_details.get('blocklisted'), + db_cur=db_cur + ) schedule1.frequency_minutes = 60 schedule1.median_runtime_minutes = 5 @@ -184,8 +202,17 @@ def test_evaluate_multiple_schedules_no_overlap(self, db_setup): "schedule_id": 2, "server_id": 1, "interval_mask": "30 * * * *", + "smart_interval_mask": None, + "blocklisted": False } - schedule2 = Schedule(schedule2_details, db_cur) + schedule2 = Schedule( + schedule_id=schedule2_details['schedule_id'], + server_id=schedule2_details['server_id'], + interval_mask=schedule2_details['interval_mask'], + smart_interval_mask=schedule2_details.get('smart_interval_mask'), + blocklisted=schedule2_details.get('blocklisted'), + db_cur=db_cur + ) schedule2.frequency_minutes = 60 schedule2.median_runtime_minutes = 5 @@ -209,8 +236,17 @@ def test_evaluate_overlapping_schedules(self, db_setup): "schedule_id": 1, "server_id": 1, "interval_mask": "0 * * * *", + "smart_interval_mask": None, + "blocklisted": False } - schedule1 = Schedule(schedule1_details, db_cur) + schedule1 = Schedule( + schedule_id=schedule1_details['schedule_id'], + server_id=schedule1_details['server_id'], + interval_mask=schedule1_details['interval_mask'], + smart_interval_mask=schedule1_details.get('smart_interval_mask'), + blocklisted=schedule1_details.get('blocklisted'), + db_cur=db_cur + ) schedule1.frequency_minutes = 60 schedule1.median_runtime_minutes = 10 @@ -218,8 +254,17 @@ def test_evaluate_overlapping_schedules(self, db_setup): "schedule_id": 2, "server_id": 1, "interval_mask": "0 * * * *", + "smart_interval_mask": None, + "blocklisted": False } - schedule2 = Schedule(schedule2_details, db_cur) + schedule2 = Schedule( + schedule_id=schedule2_details['schedule_id'], + server_id=schedule2_details['server_id'], + interval_mask=schedule2_details['interval_mask'], + smart_interval_mask=schedule2_details.get('smart_interval_mask'), + blocklisted=schedule2_details.get('blocklisted'), + db_cur=db_cur + ) schedule2.frequency_minutes = 60 schedule2.median_runtime_minutes = 5 @@ -241,8 +286,17 @@ def test_evaluate_wrapping_around_day(self, db_setup): "schedule_id": 1, "server_id": 1, "interval_mask": "0 0 * * *", + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_details, db_cur) + test_schedule = Schedule( + schedule_id=schedule_details['schedule_id'], + server_id=schedule_details['server_id'], + interval_mask=schedule_details['interval_mask'], + smart_interval_mask=schedule_details.get('smart_interval_mask'), + blocklisted=schedule_details.get('blocklisted'), + db_cur=db_cur + ) test_schedule.frequency_minutes = 60 test_schedule.median_runtime_minutes = 5 start_blocks = [1430] # (1430 mins = 23:50) @@ -267,16 +321,28 @@ def test_schedule_initialization(self, db_setup): "schedule_id": "test-id-1", "server_id": 5, "interval_mask": "0 * * * *", + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_details, db_cur) + test_schedule = Schedule( + schedule_id=schedule_details['schedule_id'], + server_id=schedule_details['server_id'], + interval_mask=schedule_details['interval_mask'], + smart_interval_mask=schedule_details.get('smart_interval_mask'), + blocklisted=schedule_details.get('blocklisted'), + db_cur=db_cur + ) assert test_schedule.schedule_id == "test-id-1" assert test_schedule.server_id == 5 assert test_schedule.interval_mask == "0 * * * *" + assert test_schedule.shifted == False + assert test_schedule.start_time_mins == 0 finally: db_cur.close() db_conn.close() + def test_schedule_dataclass_fields_initialized(self, db_setup): """Test that all dataclass fields are properly initialized, including defaults""" db_conn, db_cur = get_db_cursor() @@ -285,8 +351,17 @@ def test_schedule_dataclass_fields_initialized(self, db_setup): "schedule_id": "test-id-1", "server_id": 5, "interval_mask": "0 * * * *", + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_details, db_cur) + test_schedule = Schedule( + schedule_id=schedule_details['schedule_id'], + server_id=schedule_details['server_id'], + interval_mask=schedule_details['interval_mask'], + smart_interval_mask=schedule_details.get('smart_interval_mask'), + blocklisted=schedule_details.get('blocklisted'), + db_cur=db_cur + ) # Verify all fields exist and have correct default values assert hasattr(test_schedule, 'shifted') @@ -321,8 +396,17 @@ def test_schedule_frequency_hourly(self, db_setup): "schedule_id": "test-id-1", "server_id": 1, "interval_mask": "0 * * * *", # Every hour + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_details, db_cur) + test_schedule = Schedule( + schedule_id=schedule_details['schedule_id'], + server_id=schedule_details['server_id'], + interval_mask=schedule_details['interval_mask'], + smart_interval_mask=schedule_details.get('smart_interval_mask'), + blocklisted=schedule_details.get('blocklisted'), + db_cur=db_cur + ) assert test_schedule.frequency_minutes == 60 finally: @@ -337,8 +421,17 @@ def test_schedule_frequency_daily(self, db_setup): "schedule_id": "test-id-1", "server_id": 1, "interval_mask": "0 0 * * *", + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_details, db_cur) + test_schedule = Schedule( + schedule_id=schedule_details['schedule_id'], + server_id=schedule_details['server_id'], + interval_mask=schedule_details['interval_mask'], + smart_interval_mask=schedule_details.get('smart_interval_mask'), + blocklisted=schedule_details.get('blocklisted'), + db_cur=db_cur + ) assert test_schedule.frequency_minutes == 1440 finally: @@ -353,8 +446,17 @@ def test_schedule_is_unsupported_irregular_cron(self, db_setup): "schedule_id": "test-id-1", "server_id": 1, "interval_mask": "0-15 */9 * * *", + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_details, db_cur) + test_schedule = Schedule( + schedule_id=schedule_details['schedule_id'], + server_id=schedule_details['server_id'], + interval_mask=schedule_details['interval_mask'], + smart_interval_mask=schedule_details.get('smart_interval_mask'), + blocklisted=schedule_details.get('blocklisted'), + db_cur=db_cur + ) assert test_schedule.is_unsupported() assert not test_schedule.frequency_is_supported() @@ -371,8 +473,17 @@ def test_schedule_is_unsupported_low_frequency(self, db_setup): "schedule_id": "test-id-1", "server_id": 1, "interval_mask": "0 0 * * 0", # Weekly + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_details, db_cur) + test_schedule = Schedule( + schedule_id=schedule_details['schedule_id'], + server_id=schedule_details['server_id'], + interval_mask=schedule_details['interval_mask'], + smart_interval_mask=schedule_details.get('smart_interval_mask'), + blocklisted=schedule_details.get('blocklisted'), + db_cur=db_cur + ) assert test_schedule.is_unsupported() finally: @@ -387,8 +498,17 @@ def test_schedule_is_regular_schedule_hourly(self, db_setup): "schedule_id": "test-id-1", "server_id": 1, "interval_mask": "0 * * * *", + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_details, db_cur) + test_schedule = Schedule( + schedule_id=schedule_details['schedule_id'], + server_id=schedule_details['server_id'], + interval_mask=schedule_details['interval_mask'], + smart_interval_mask=schedule_details.get('smart_interval_mask'), + blocklisted=schedule_details.get('blocklisted'), + db_cur=db_cur + ) assert test_schedule.is_regular_schedule() finally: @@ -403,8 +523,17 @@ def test_schedule_is_regular_schedule_every_15_mins(self, db_setup): "schedule_id": "test-id-1", "server_id": 1, "interval_mask": "*/15 * * * *", + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_details, db_cur) + test_schedule = Schedule( + schedule_id=schedule_details['schedule_id'], + server_id=schedule_details['server_id'], + interval_mask=schedule_details['interval_mask'], + smart_interval_mask=schedule_details.get('smart_interval_mask'), + blocklisted=schedule_details.get('blocklisted'), + db_cur=db_cur + ) assert test_schedule.is_regular_schedule() finally: @@ -419,8 +548,17 @@ def test_schedule_is_regular_schedule_daily(self, db_setup): "schedule_id": "test-id-1", "server_id": 1, "interval_mask": "0 0 * * *", + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_details, db_cur) + test_schedule = Schedule( + schedule_id=schedule_details['schedule_id'], + server_id=schedule_details['server_id'], + interval_mask=schedule_details['interval_mask'], + smart_interval_mask=schedule_details.get('smart_interval_mask'), + blocklisted=schedule_details.get('blocklisted'), + db_cur=db_cur + ) assert test_schedule.is_regular_schedule() finally: @@ -435,8 +573,17 @@ def test_schedule_45_min_schedule_is_supported(self, db_setup): "schedule_id": "test-id-1", "server_id": 1, "interval_mask": "*/45 * * * *", + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_details, db_cur) + test_schedule = Schedule( + schedule_id=schedule_details['schedule_id'], + server_id=schedule_details['server_id'], + interval_mask=schedule_details['interval_mask'], + smart_interval_mask=schedule_details.get('smart_interval_mask'), + blocklisted=schedule_details.get('blocklisted'), + db_cur=db_cur + ) assert not test_schedule.is_unsupported() # Fails due to cronitor issue -> means any */45 gets missed out of the smart scheduling @@ -452,8 +599,17 @@ def test_schedule_is_irregular_schedule_weekdays(self, db_setup): "schedule_id": "test-id-1", "server_id": 1, "interval_mask": "0 9 * * 1-5", # Weekdays only + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_details, db_cur) + test_schedule = Schedule( + schedule_id=schedule_details['schedule_id'], + server_id=schedule_details['server_id'], + interval_mask=schedule_details['interval_mask'], + smart_interval_mask=schedule_details.get('smart_interval_mask'), + blocklisted=schedule_details.get('blocklisted'), + db_cur=db_cur + ) assert not test_schedule.is_regular_schedule() finally: @@ -479,7 +635,6 @@ def test_custom_config(self): assert config.parent_selection_type == "rank" assert config.crossover_type == "uniform" assert config.mutation_type == "random" - assert config.blocklist_schedule_ids == [] def test_scheduler_uses_default_config_when_optional_config_is_missing(self): ga_scheduler = GAPyGADScheduler() @@ -613,15 +768,26 @@ def test_create_schedules_from_details(self, db_setup): "schedule_id": "sched-1", "server_id": 1, "interval_mask": "0 * * * *", + "smart_interval_mask": None, + "blocklisted": False }, { "schedule_id": "sched-2", "server_id": 1, "interval_mask": "*/30 * * * *", + "smart_interval_mask": None, + "blocklisted": False }, ] - schedules = [Schedule(data, db_cur) for data in schedules_data] + schedules = [Schedule( + schedule_id=data['schedule_id'], + server_id=data['server_id'], + interval_mask=data['interval_mask'], + smart_interval_mask=data.get('smart_interval_mask'), + blocklisted=data.get('blocklisted'), + db_cur=db_cur + ) for data in schedules_data] assert len(schedules) == 2 assert schedules[0].schedule_id == "sched-1" @@ -691,8 +857,17 @@ def test_multiple_overlapping_schedules_evaluation(self, db_setup): "schedule_id": f"sched-{i}", "server_id": 1, "interval_mask": "0 * * * *" if i == 0 else f"*/{15 * (i + 1)} * * * *", + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_data, db_cur) + test_schedule = Schedule( + schedule_id=schedule_data['schedule_id'], + server_id=schedule_data['server_id'], + interval_mask=schedule_data['interval_mask'], + smart_interval_mask=schedule_data.get('smart_interval_mask'), + blocklisted=schedule_data.get('blocklisted'), + db_cur=db_cur + ) test_schedule.frequency_minutes = 60 test_schedule.median_runtime_minutes = 5 schedules.append(test_schedule) @@ -719,8 +894,17 @@ def test_smart_scheduling_frequency_unchanged_hourly_schedule(self, db_setup): "schedule_id": "test-schedule-1", "server_id": 1, "interval_mask": "0 * * * *", + "smart_interval_mask": None, + "blocklisted": False } - hourly_schedule = Schedule(hourly_schedule_details, db_cur) + hourly_schedule = Schedule( + schedule_id=hourly_schedule_details['schedule_id'], + server_id=hourly_schedule_details['server_id'], + interval_mask=hourly_schedule_details['interval_mask'], + smart_interval_mask=hourly_schedule_details.get('smart_interval_mask'), + blocklisted=hourly_schedule_details.get('blocklisted'), + db_cur=db_cur + ) hourly_schedule.shifted = True hourly_schedule.start_time_mins = 15 @@ -744,8 +928,17 @@ def test_smart_scheduling_frequency_unchanged_fifteen_min_schedule(self, db_setu "schedule_id": "test-schedule-2", "server_id": 1, "interval_mask": "*/15 * * * *", + "smart_interval_mask": None, + "blocklisted": False } - fifteen_min_schedule = Schedule(fifteen_min_schedule_details, db_cur) + fifteen_min_schedule = Schedule( + schedule_id=fifteen_min_schedule_details['schedule_id'], + server_id=fifteen_min_schedule_details['server_id'], + interval_mask=fifteen_min_schedule_details['interval_mask'], + smart_interval_mask=fifteen_min_schedule_details.get('smart_interval_mask'), + blocklisted=fifteen_min_schedule_details.get('blocklisted'), + db_cur=db_cur + ) fifteen_min_schedule.shifted = True fifteen_min_schedule.start_time_mins = 3 @@ -768,8 +961,17 @@ def test_gene_space_constraints(self, db_setup): "schedule_id": "test-schedule-3", "server_id": 1, "interval_mask": "*/45 * * * *", + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_details, db_cur) + test_schedule = Schedule( + schedule_id=schedule_details['schedule_id'], + server_id=schedule_details['server_id'], + interval_mask=schedule_details['interval_mask'], + smart_interval_mask=schedule_details.get('smart_interval_mask'), + blocklisted=schedule_details.get('blocklisted'), + db_cur=db_cur + ) test_schedule.frequency_minutes = 45 test_schedule.shifted = True test_schedule.start_time_mins = 50 # Shift greater than frequency @@ -790,8 +992,17 @@ def test_smart_scheduling_gene_space_constraints_30_min(self, db_setup): "schedule_id": "test-schedule-1", "server_id": 1, "interval_mask": "*/30 * * * *", + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_details, db_cur) + test_schedule = Schedule( + schedule_id=schedule_details['schedule_id'], + server_id=schedule_details['server_id'], + interval_mask=schedule_details['interval_mask'], + smart_interval_mask=schedule_details.get('smart_interval_mask'), + blocklisted=schedule_details.get('blocklisted'), + db_cur=db_cur + ) gene_space = ga_scheduler._gene_space([test_schedule]) test_schedule.shifted = True @@ -820,8 +1031,17 @@ def test_smart_scheduling_gene_space_constraints_daily(self, db_setup): "schedule_id": "test-schedule-4", "server_id": 1, "interval_mask": "30 8 * * *", + "smart_interval_mask": None, + "blocklisted": False } - test_schedule = Schedule(schedule_details, db_cur) + test_schedule = Schedule( + schedule_id=schedule_details['schedule_id'], + server_id=schedule_details['server_id'], + interval_mask=schedule_details['interval_mask'], + smart_interval_mask=schedule_details.get('smart_interval_mask'), + blocklisted=schedule_details.get('blocklisted'), + db_cur=db_cur + ) gene_space = ga_scheduler._gene_space([test_schedule]) test_schedule.shifted = True @@ -1041,6 +1261,142 @@ def test_snapshot_cleanup(self, db_setup): oldest_snapshot_id = query_test_db("SELECT snapshot_id FROM schedule_backups WHERE server_id = 1 ORDER BY snapshot_id ASC LIMIT 1")[0][0] assert oldest_snapshot_id == 3 + finally: + db_cur.close() + db_conn.close() + + +class TestOptimiseWithCustomDbConnection: + """Tests for optimise() function with custom db connection""" + + def test_optimise_with_custom_db_connection_single_server(self, db_setup): + """Test optimise() function using a custom db connection for a single server""" + db_conn, db_cur = get_db_cursor() + try: + # Setup: Create server and schedules + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server', 'test-server.local', '192.168.1.1')""" + ) + query_test_db( + """INSERT INTO schedules (schedule_id, server_id, interval_mask, exec_command) + VALUES ('sched-1', 1, '*/10 * * * *', 'echo test1'), + ('sched-2', 1, '0 * * * *', 'echo test2'), + ('sched-3', 1, '0 * * * *', 'echo test3'), + ('sched-4', 1, '0 * * * *', 'echo test4'), + ('sched-5', 1, '0 * * * *', 'echo test5')""" + ) + + # Call optimise with custom db_cur + ga_config = {"random_seed": 1, "mutation_type": None, "num_generations": 2, "sol_per_pop": 5, "num_parents_mating": 2} + smart_schedule.optimise(db_cur=db_cur, server_id=1, ga_config=ga_config) + + # Verify that the schedules were processed (no errors should occur) + schedules = query_test_db("SELECT schedule_id FROM schedules WHERE server_id = 1") + smart_interval_masks = query_test_db("SELECT smart_interval_mask FROM schedules WHERE server_id = 1") + snapshots = query_test_db("SELECT snapshot_id FROM snapshots WHERE server_id = 1") + schedule_backups = query_test_db("SELECT schedule_id, interval_mask, smart_interval_mask FROM schedule_backups WHERE server_id = 1") + assert all(mask is not None for mask in smart_interval_masks) + assert query_test_db("""SELECT count(*) FROM schedules + LEFT JOIN schedule_backups ON schedules.schedule_id = schedule_backups.schedule_id + WHERE schedules.interval_mask != schedule_backups.interval_mask + OR schedules.smart_interval_mask != schedule_backups.smart_interval_mask""")[0][0] == 0 + + + assert len(schedules) == 5 + assert len(snapshots) == 1 + assert len(schedule_backups) == 5 + finally: + db_cur.close() + db_conn.close() + + + def test_optimise_with_custom_db_connection_multiple_servers(self, db_setup): + """Test optimise() function with custom db connection for multiple servers""" + db_conn, db_cur = get_db_cursor() + try: + # Setup: Create multiple servers and schedules + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'server-1', 'server-1.local', '192.168.1.1'), + (2, 'server-2', 'server-2.local', '192.168.1.2')""" + ) + query_test_db( + """INSERT INTO schedules (schedule_id, server_id, interval_mask, exec_command) + VALUES ('sched-1a', 1, '*/10 * * * *', 'echo test1'), + ('sched-2a', 1, '0 * * * *', 'echo test2'), + ('sched-3a', 1, '0 * * * *', 'echo test3'), + ('sched-4a', 1, '0 * * * *', 'echo test4'), + ('sched-5a', 1, '0 * * * *', 'echo test5'), + ('sched-1b', 2, '*/10 * * * *', 'echo test1'), + ('sched-2b', 2, '0 * * * *', 'echo test2'), + ('sched-3b', 2, '0 * * * *', 'echo test3'), + ('sched-4b', 2, '0 * * * *', 'echo test4'), + ('sched-5b', 2, '0 * * * *', 'echo test5') + """ + ) + + # Call optimise with custom db_cur + ga_config = {"random_seed": 1, "mutation_type": None, "num_generations": 2, "sol_per_pop": 5, "num_parents_mating": 2} + + results_1 = query_test_db("SELECT schedule_id, smart_interval_mask FROM schedules WHERE server_id = 1") + results_2 = query_test_db("SELECT schedule_id, smart_interval_mask FROM schedules WHERE server_id = 2") + if results_1: + schedules_1, smart_interval_masks_1 = zip(*results_1) + if results_2: + schedules_2, smart_interval_masks_2 = zip(*results_2) + smart_schedule.optimise(db_cur=db_cur, ga_config=ga_config) + + results_1 = query_test_db("SELECT schedule_id, smart_interval_mask FROM schedules WHERE server_id = 1") + results_2 = query_test_db("SELECT schedule_id, smart_interval_mask FROM schedules WHERE server_id = 2") + if results_1: + schedules_1, smart_interval_masks_1 = zip(*results_1) + if results_2: + schedules_2, smart_interval_masks_2 = zip(*results_2) + snapshots_1 = query_test_db("SELECT snapshot_id FROM snapshots WHERE server_id = 1") + snapshots_2 = query_test_db("SELECT snapshot_id FROM snapshots WHERE server_id = 2") + assert len(schedules_1) == 5 and len(schedules_2) == 5 + assert len(snapshots_1) == 1 and len(snapshots_2) == 1 + assert query_test_db("SELECT count(*) FROM schedule_backups WHERE server_id = 1")[0][0] == 5 + assert query_test_db("SELECT count(*) FROM schedule_backups WHERE server_id = 2")[0][0] == 5 + assert all(mask is not None for mask in smart_interval_masks_1) + assert all(mask is not None for mask in smart_interval_masks_2) + + assert query_test_db("""SELECT count(*) FROM schedules + LEFT JOIN schedule_backups ON schedules.schedule_id = schedule_backups.schedule_id + WHERE schedules.interval_mask != schedule_backups.interval_mask + OR schedules.smart_interval_mask != schedule_backups.smart_interval_mask""")[0][0] == 0 + finally: + db_cur.close() + db_conn.close() + + def test_optimise_invalid_server_id_with_custom_connection(self, db_setup): + """Test optimise() raises error for invalid server_id with custom db connection""" + db_conn, db_cur = get_db_cursor() + try: + # Attempt to optimize for non-existent server + with pytest.raises(ValueError, match="Server with server_id=999 does not exist"): + smart_schedule.optimise(db_cur=db_cur, server_id=999, ga_config=None) + finally: + db_cur.close() + db_conn.close() + + def test_optimise_no_schedules_with_custom_connection(self, db_setup, capsys): + """Test optimise() handles server with no schedules gracefully using custom db connection""" + db_conn, db_cur = get_db_cursor() + try: + # Setup: Create server with no schedules + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server', 'test-server.local', '192.168.1.1')""" + ) + + # Call optimise - should return early with message + smart_schedule.optimise(db_cur=db_cur, server_id=1, ga_config=None) + + # Verify error message was printed + captured = capsys.readouterr() + assert "No schedules found for server_id 1" in captured.out finally: db_cur.close() db_conn.close() \ No newline at end of file From cbe43aa1b31c6cfe69d5d4caa342ab17f6d63774 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Thu, 28 May 2026 13:47:47 +0100 Subject: [PATCH 47/53] Add test for bulk schedule update --- tests/test_smart_scheduling.py | 158 +++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) diff --git a/tests/test_smart_scheduling.py b/tests/test_smart_scheduling.py index 4d78c35..1a38d1c 100644 --- a/tests/test_smart_scheduling.py +++ b/tests/test_smart_scheduling.py @@ -338,6 +338,7 @@ def test_schedule_initialization(self, db_setup): assert test_schedule.interval_mask == "0 * * * *" assert test_schedule.shifted == False assert test_schedule.start_time_mins == 0 + assert test_schedule.median_runtime_minutes == 5 finally: db_cur.close() db_conn.close() @@ -1397,6 +1398,163 @@ def test_optimise_no_schedules_with_custom_connection(self, db_setup, capsys): # Verify error message was printed captured = capsys.readouterr() assert "No schedules found for server_id 1" in captured.out + finally: + db_cur.close() + db_conn.close() + + +class TestUpdateScheduleDetailsBulk: + """Tests for update_schedule_details_bulk function""" + + def test_update_schedule_details_bulk_single_schedule(self, db_setup): + """Test bulk update of a single schedule""" + db_conn, db_cur = get_db_cursor() + try: + # Setup: Create server and schedule + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server', 'test-server.local', '192.168.1.1')""" + ) + query_test_db( + """INSERT INTO schedules (schedule_id, server_id, interval_mask, smart_interval_mask, exec_command) + VALUES ('sched-1', 1, '0 * * * *', NULL, 'echo test')""" + ) + + # Bulk update: set smart_interval_mask + schedule_list = [ + { + "schedule_id": "sched-1", + "smart_interval_mask": "30 * * * *", + } + ] + scheduler.update_schedule_details_bulk(db_cur=db_cur, schedule_list=schedule_list) + + # Verify update + result = query_test_db("SELECT smart_interval_mask FROM schedules WHERE schedule_id = 'sched-1'") + assert result[0][0] == "30 * * * *" + finally: + db_cur.close() + db_conn.close() + + def test_update_schedule_details_bulk_multiple_schedules(self, db_setup): + """Test bulk update of multiple schedules""" + db_conn, db_cur = get_db_cursor() + try: + # Setup: Create server and schedules + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server', 'test-server.local', '192.168.1.1')""" + ) + query_test_db( + """INSERT INTO schedules (schedule_id, server_id, interval_mask, smart_interval_mask, exec_command) + VALUES ('sched-1', 1, '0 * * * *', NULL, 'echo test1'), + ('sched-2', 1, '15 * * * *', NULL, 'echo test2'), + ('sched-3', 1, '30 * * * *', NULL, 'echo test3')""" + ) + + # Bulk update: set smart_interval_mask for all schedules + schedule_list = [ + {"schedule_id": "sched-1", "smart_interval_mask": "10 * * * *"}, + {"schedule_id": "sched-2", "smart_interval_mask": "25 * * * *"}, + {"schedule_id": "sched-3", "smart_interval_mask": "40 * * * *"}, + ] + scheduler.update_schedule_details_bulk(db_cur=db_cur, schedule_list=schedule_list) + + # Verify updates + result = query_test_db( + "SELECT schedule_id, smart_interval_mask FROM schedules WHERE server_id = 1 ORDER BY schedule_id" + ) + assert len(result) == 3 + assert result[0] == ("sched-1", "10 * * * *") + assert result[1] == ("sched-2", "25 * * * *") + assert result[2] == ("sched-3", "40 * * * *") + finally: + db_cur.close() + db_conn.close() + + def test_update_schedule_details_bulk_with_null_values(self, db_setup): + """Test that NULL values in schedule_list are skipped""" + db_conn, db_cur = get_db_cursor() + try: + # Setup: Create server and schedule + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server', 'test-server.local', '192.168.1.1')""" + ) + query_test_db( + """INSERT INTO schedules (schedule_id, server_id, interval_mask, smart_interval_mask, exec_command) + VALUES ('sched-1', 1, '0 * * * *', '30 * * * *', 'echo test')""" + ) + + # Bulk update: set smart_interval_mask to something new, but include None values + schedule_list = [ + { + "schedule_id": "sched-1", + "smart_interval_mask": "45 * * * *", + "parameters": None, # This should be ignored + } + ] + scheduler.update_schedule_details_bulk(db_cur=db_cur, schedule_list=schedule_list) + + # Verify that only smart_interval_mask was updated + result = query_test_db("SELECT smart_interval_mask FROM schedules WHERE schedule_id = 'sched-1'") + assert result[0][0] == "45 * * * *" + finally: + db_cur.close() + db_conn.close() + + def test_update_schedule_details_bulk_empty_list(self, db_setup): + """Test bulk update with empty schedule list""" + db_conn, db_cur = get_db_cursor() + try: + # Should return early without error + scheduler.update_schedule_details_bulk(db_cur=db_cur, schedule_list=[]) + # No assertion needed - test passes if no exception is raised + finally: + db_cur.close() + db_conn.close() + + + def test_update_schedule_details_bulk_multiple_fields(self, db_setup): + """Test bulk update of multiple fields for each schedule""" + db_conn, db_cur = get_db_cursor() + try: + # Setup: Create server and schedule + query_test_db( + """INSERT INTO servers (server_id, hostname, fqdn, ip4_address) + VALUES (1, 'test-server', 'test-server.local', '192.168.1.1')""" + ) + query_test_db( + """INSERT INTO schedules (schedule_id, server_id, interval_mask, smart_interval_mask, exec_command) + VALUES ('sched-1', 1, '0 * * * *', NULL, 'echo test'), ('sched-2', 1, '15 * * * *', NULL, 'echo test')""" + ) + + # Bulk update: update multiple fields + schedule_list = [ + { + "schedule_id": "sched-1", + "smart_interval_mask": "15 * * * *", + "interval_mask": "15 * * * *", + }, + { + "schedule_id": "sched-2", + "smart_interval_mask": "5 * * * *", + "interval_mask": "5 * * * *" + } + ] + scheduler.update_schedule_details_bulk(db_cur=db_cur, schedule_list=schedule_list) + + # Verify updates + result = query_test_db( + "SELECT smart_interval_mask, interval_mask FROM schedules WHERE schedule_id = 'sched-1'" + ) + assert result[0][0] == "15 * * * *" + assert result[0][1] == "15 * * * *" + result = query_test_db( + "SELECT smart_interval_mask, interval_mask, is_enabled FROM schedules WHERE schedule_id = 'sched-2'" + ) + assert result[0][0] == "5 * * * *" + assert result[0][1] == "5 * * * *" finally: db_cur.close() db_conn.close() \ No newline at end of file From 843eb6ccd84a9f138484fc6639012189d1afda67 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Thu, 28 May 2026 13:59:28 +0100 Subject: [PATCH 48/53] Remove snapshot cleanup for schedule-based rollback --- cicada/commands/smart_schedule_rollback.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cicada/commands/smart_schedule_rollback.py b/cicada/commands/smart_schedule_rollback.py index 9728dd4..c347932 100644 --- a/cicada/commands/smart_schedule_rollback.py +++ b/cicada/commands/smart_schedule_rollback.py @@ -69,9 +69,10 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn try: scheduler.full_rollback(db_cur, server_id, schedule_id) print("Full rollback successful\n") - server_ids = [server_id] if server_id else [server[0] for server in scheduler.get_all_server_ids(db_cur)] - for server in server_ids: - scheduler.snapshot_schedules(db_cur, server_id=server, reason='Full Rollback') + if not schedule_id: + server_ids = [server_id] if server_id else [server[0] for server in scheduler.get_all_server_ids(db_cur)] + for server in server_ids: + scheduler.snapshot_schedules(db_cur, server_id=server, reason='Full Rollback') db_cur.execute("COMMIT;") except Exception as e: db_cur.execute("ROLLBACK;") From 499407265b47df1ec6fb382a9123d308ff10e323 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Thu, 28 May 2026 16:54:23 +0100 Subject: [PATCH 49/53] SmartScheduling -> smart_scheduling --- cicada/commands/smart_schedule.py | 4 ++-- .../lib/{SmartScheduling => smart_scheduling}/GAPyGAD.py | 6 +++--- .../lib/{SmartScheduling => smart_scheduling}/__init__.py | 0 .../lib/{SmartScheduling => smart_scheduling}/config.py | 0 .../lib/{SmartScheduling => smart_scheduling}/domain.py | 0 .../{SmartScheduling => smart_scheduling}/evaluation.py | 2 +- tests/test_smart_scheduling.py | 8 ++++---- 7 files changed, 10 insertions(+), 10 deletions(-) rename cicada/lib/{SmartScheduling => smart_scheduling}/GAPyGAD.py (97%) rename cicada/lib/{SmartScheduling => smart_scheduling}/__init__.py (100%) rename cicada/lib/{SmartScheduling => smart_scheduling}/config.py (100%) rename cicada/lib/{SmartScheduling => smart_scheduling}/domain.py (100%) rename cicada/lib/{SmartScheduling => smart_scheduling}/evaluation.py (97%) diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index a3076f5..110179a 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -6,8 +6,8 @@ from croniter import croniter from cicada.lib import postgres, utils from cicada.lib import scheduler -from cicada.lib.SmartScheduling.GAPyGAD import GAPyGADScheduler -from cicada.lib.SmartScheduling.domain import Schedule +from cicada.lib.smart_scheduling.GAPyGAD import GAPyGADScheduler +from cicada.lib.smart_scheduling.domain import Schedule def _get_schedules_per_server(server_id, db_cur=None): """Get all schedules for a given server_id.""" diff --git a/cicada/lib/SmartScheduling/GAPyGAD.py b/cicada/lib/smart_scheduling/GAPyGAD.py similarity index 97% rename from cicada/lib/SmartScheduling/GAPyGAD.py rename to cicada/lib/smart_scheduling/GAPyGAD.py index 182e247..17d8157 100644 --- a/cicada/lib/SmartScheduling/GAPyGAD.py +++ b/cicada/lib/smart_scheduling/GAPyGAD.py @@ -3,9 +3,9 @@ import numpy as np import pygad -from cicada.lib.SmartScheduling.config import GAConfig -from cicada.lib.SmartScheduling.domain import Schedule -from cicada.lib.SmartScheduling.evaluation import evaluate_usage_and_peak +from cicada.lib.smart_scheduling.config import GAConfig +from cicada.lib.smart_scheduling.domain import Schedule +from cicada.lib.smart_scheduling.evaluation import evaluate_usage_and_peak class GAPyGADScheduler: diff --git a/cicada/lib/SmartScheduling/__init__.py b/cicada/lib/smart_scheduling/__init__.py similarity index 100% rename from cicada/lib/SmartScheduling/__init__.py rename to cicada/lib/smart_scheduling/__init__.py diff --git a/cicada/lib/SmartScheduling/config.py b/cicada/lib/smart_scheduling/config.py similarity index 100% rename from cicada/lib/SmartScheduling/config.py rename to cicada/lib/smart_scheduling/config.py diff --git a/cicada/lib/SmartScheduling/domain.py b/cicada/lib/smart_scheduling/domain.py similarity index 100% rename from cicada/lib/SmartScheduling/domain.py rename to cicada/lib/smart_scheduling/domain.py diff --git a/cicada/lib/SmartScheduling/evaluation.py b/cicada/lib/smart_scheduling/evaluation.py similarity index 97% rename from cicada/lib/SmartScheduling/evaluation.py rename to cicada/lib/smart_scheduling/evaluation.py index c83ac83..88e553e 100644 --- a/cicada/lib/SmartScheduling/evaluation.py +++ b/cicada/lib/smart_scheduling/evaluation.py @@ -1,6 +1,6 @@ import numpy as np from typing import Sequence -from cicada.lib.SmartScheduling.domain import Schedule +from cicada.lib.smart_scheduling.domain import Schedule def evaluate_usage_and_peak(start_times: Sequence[int], schedules: Sequence[Schedule]): diff --git a/tests/test_smart_scheduling.py b/tests/test_smart_scheduling.py index 1a38d1c..b185136 100644 --- a/tests/test_smart_scheduling.py +++ b/tests/test_smart_scheduling.py @@ -8,11 +8,11 @@ import numpy as np import psycopg2 -from cicada.lib.SmartScheduling.domain import Schedule -from cicada.lib.SmartScheduling.config import GAConfig -from cicada.lib.SmartScheduling.evaluation import evaluate_usage_and_peak +from cicada.lib.smart_scheduling.domain import Schedule +from cicada.lib.smart_scheduling.config import GAConfig +from cicada.lib.smart_scheduling.evaluation import evaluate_usage_and_peak import cicada.commands.smart_schedule as smart_schedule -from cicada.lib.SmartScheduling.GAPyGAD import GAPyGADScheduler +from cicada.lib.smart_scheduling.GAPyGAD import GAPyGADScheduler from cicada.lib import scheduler From 962e0d4804b018ebc3174b45a44b01d11595d0d3 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Mon, 1 Jun 2026 10:06:23 +0100 Subject: [PATCH 50/53] Change type() -> isinstance() --- cicada/commands/smart_schedule_rollback.py | 8 ++++---- cicada/lib/smart_scheduling/config.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cicada/commands/smart_schedule_rollback.py b/cicada/commands/smart_schedule_rollback.py index c347932..4463e94 100644 --- a/cicada/commands/smart_schedule_rollback.py +++ b/cicada/commands/smart_schedule_rollback.py @@ -50,9 +50,9 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn previous: bool If used, restores to the most recent snapshot (step back one optimization). """ - if type(server_id) != int and server_id is not None: + if server_id is not None and not isinstance(server_id, int): raise TypeError(f"server_id needs to be of type int. {type(server_id)}") - if type(schedule_id) != str and schedule_id is not None: + if schedule_id is not None and not isinstance(schedule_id, str): raise TypeError("schedule_id needs to be of type str") if not(full or previous) or (full and previous): raise ValueError("Exactly one of --full or --previous flags must be provided") @@ -82,8 +82,8 @@ def main(server_id: Optional[int] = None, schedule_id: Optional[str] = None, dbn print("\n------------Starting Rollback to Previous Snapshot-----------------") if not server_id: print(f"Rolling back all servers...") - for server_id in scheduler.get_all_server_ids(db_cur): - _rollback_to_previous_snapshot(db_cur, server_id=server_id[0]) + for server in scheduler.get_all_server_ids(db_cur): + _rollback_to_previous_snapshot(db_cur, server_id=server[0]) else: _rollback_to_previous_snapshot(db_cur, server_id=server_id) print("--------------------------------------------------\n") diff --git a/cicada/lib/smart_scheduling/config.py b/cicada/lib/smart_scheduling/config.py index 07593c1..5f39955 100644 --- a/cicada/lib/smart_scheduling/config.py +++ b/cicada/lib/smart_scheduling/config.py @@ -12,4 +12,4 @@ class GAConfig: crossover_type: str = "uniform" mutation_type: str = "random" keep_elitism: int = 2 - random_seed: Optional[int] = None \ No newline at end of file + random_seed: Optional[int] = None From ef9412d704d7a142c0e283c290461a1b400b5199 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Mon, 1 Jun 2026 14:10:54 +0100 Subject: [PATCH 51/53] Change test to use and rename ga_pygad --- cicada/commands/smart_schedule.py | 2 +- cicada/lib/smart_scheduling/{GAPyGAD.py => ga_pygad.py} | 0 tests/test_smart_scheduling.py | 6 +++--- 3 files changed, 4 insertions(+), 4 deletions(-) rename cicada/lib/smart_scheduling/{GAPyGAD.py => ga_pygad.py} (100%) diff --git a/cicada/commands/smart_schedule.py b/cicada/commands/smart_schedule.py index 110179a..dfbe54f 100644 --- a/cicada/commands/smart_schedule.py +++ b/cicada/commands/smart_schedule.py @@ -6,7 +6,7 @@ from croniter import croniter from cicada.lib import postgres, utils from cicada.lib import scheduler -from cicada.lib.smart_scheduling.GAPyGAD import GAPyGADScheduler +from cicada.lib.smart_scheduling.ga_pygad import GAPyGADScheduler from cicada.lib.smart_scheduling.domain import Schedule def _get_schedules_per_server(server_id, db_cur=None): diff --git a/cicada/lib/smart_scheduling/GAPyGAD.py b/cicada/lib/smart_scheduling/ga_pygad.py similarity index 100% rename from cicada/lib/smart_scheduling/GAPyGAD.py rename to cicada/lib/smart_scheduling/ga_pygad.py diff --git a/tests/test_smart_scheduling.py b/tests/test_smart_scheduling.py index b185136..d365c0a 100644 --- a/tests/test_smart_scheduling.py +++ b/tests/test_smart_scheduling.py @@ -12,7 +12,7 @@ from cicada.lib.smart_scheduling.config import GAConfig from cicada.lib.smart_scheduling.evaluation import evaluate_usage_and_peak import cicada.commands.smart_schedule as smart_schedule -from cicada.lib.smart_scheduling.GAPyGAD import GAPyGADScheduler +from cicada.lib.smart_scheduling.ga_pygad import GAPyGADScheduler from cicada.lib import scheduler @@ -302,7 +302,7 @@ def test_evaluate_wrapping_around_day(self, db_setup): start_blocks = [1430] # (1430 mins = 23:50) # Should throw an assertion error that the start block is too late for the frequency of the schedule - with pytest.raises(ValueError): + with pytest.raises(ValueError, match=r"Start time should be the earliest it can be for schedule: .* with start time 1430 exceeds frequency 60"): evaluate_usage_and_peak(start_blocks, [test_schedule]) finally: db_cur.close() @@ -665,7 +665,7 @@ def test_get_blocklisted_schedule_ids_empty(self, db_setup): """Test retrieving blocklisted schedule IDs when none exist""" db_conn, db_cur = get_db_cursor() try: - # Initially should have the 10 admin schedules + # Initially should have the 18 admin schedules result = scheduler.get_blocklisted_schedule_ids(db_cur) assert len(result) == 18 finally: From bc47dd5b3e2f001e1ac3a633386dd836eac7dc07 Mon Sep 17 00:00:00 2001 From: naomiwise Date: Mon, 15 Jun 2026 13:36:16 +0100 Subject: [PATCH 52/53] Increment version from 0.9.0 to 0.10.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 944ca84..ddcfaa5 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="cicada", - version="0.9.0", + version="0.10.0", description="Lightweight, agent-based, distributed scheduler", long_description=long_description, long_description_content_type="text/markdown", From 5c8e6ff76ab02dec8dd327448995f9c4e902f126 Mon Sep 17 00:00:00 2001 From: Naomi Saad Date: Mon, 15 Jun 2026 13:44:53 +0100 Subject: [PATCH 53/53] update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cebf20a..04c8b70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +0.10.0 +----- +- Add smart_schedule command with optimise and rollback options (as well as blocklist functionality) +- Adds new column to existing table and new tables connected to smart_schedule command + 0.9.0 ----- - Verify compatibility with Ubuntu 22.04