Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pandas
numpy
pyfixest
pytest
tqdm
4 changes: 2 additions & 2 deletions run_simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

# Parameters for round-based sampling
N_ROUNDS = 15
QUESTIONS_PER_ROUND = 25
QUESTIONS_PER_ROUND = 100
MODELS_PER_ROUND_MEAN = 40
DATASET_WEIGHT = 0.5
SIMULATION_METHOD = "round_based"
Expand Down Expand Up @@ -177,7 +177,7 @@ def validate_processed_data(df):

def main():
print("Loading data...")
df = process_raw_data(f"{INPUT_FOLDER}/leaderboard_human.pkl")
df = process_raw_data(f"{INPUT_FOLDER}/leaderboard_llm.pkl")
df.to_csv(f"{PROCESSED_FOLDER}/processed_dataset.csv", index=False)

# Load the processed dataset
Expand Down
44 changes: 39 additions & 5 deletions src/ranking_sim.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
import pandas as pd
import pyfixest as pf
from tqdm import tqdm

# ================
# Data preparation
Expand All @@ -15,9 +16,16 @@ def process_raw_data(input_name):
df_temp = pkl[ii]["df"]
df_temp["model"] = pkl[ii]["model"]
df_temp["organization"] = pkl[ii]["organization"]
# drop combo questions
df_temp = df_temp[df_temp["direction"] == ()]
df = pd.concat([df, df_temp])
df = df.reset_index(drop=True)

if "horizon" not in df.columns:
df["resolution_date"] = pd.to_datetime(df["resolution_date"])
df["forecast_due_date"] = pd.to_datetime(df["forecast_due_date"])
df["horizon"] = (df["resolution_date"] - df["forecast_due_date"]).dt.days

# Create a new column 'question_id' by concatenating 'source', 'id',
# and 'horizon' columns. This is done to create a unique identifier
# for each question/prediction
Expand Down Expand Up @@ -296,6 +304,35 @@ def simulate_random_sampling(df, n_questions_per_model, ref_model="Always 0.5"):
return df_results


def simple_sample(df, n):
groups = df.groupby(["question_type", "horizon"])["question_id"].unique()

dataset_groups = groups["dataset"]
n_horizons = len(dataset_groups)

n_dataset_horizon = n // 2 // n_horizons
n_market = n - n_dataset_horizon * n_horizons

# Market questions: choose randomly across all market questions
all_market_questions = np.concatenate([g for g in groups["market"].values])
market_questions = np.random.choice(all_market_questions, size=n_market, replace=True)

# Dataset Questions: choose randomly for one horizon, then get the same questions at all horizons
df0 = df[df["question_id"].isin(dataset_groups.values[0])]
sampled_rows = df0.sample(n=n_dataset_horizon, replace=True)
dataset_questions_list = []
for _, row in sampled_rows.iterrows():
subset = df[
(df["source"] == row["source"])
& (df["id"] == row["id"])
& (df["forecast_due_date"] == row["forecast_due_date"])
]
dataset_questions_list.extend(subset["question_id"].unique())
dataset_questions = np.array(dataset_questions_list)

return np.concatenate([market_questions, dataset_questions])


def simulate_round_based(
df,
n_rounds=15,
Expand All @@ -313,7 +350,6 @@ def simulate_round_based(
"""
# Get parameters
models = df["model"].unique()
questions = df["question_id"].unique()

# Check if ref_model exists
if ref_model is None or ref_model not in models:
Expand All @@ -327,9 +363,7 @@ def simulate_round_based(
rounds = []
for round_id in range(n_rounds):
# Sample questions with replacement for this round
round_questions = np.random.choice(
questions, size=questions_per_round, replace=True
)
round_questions = simple_sample(df=df, n=questions_per_round)

# Sample number of models for this round (Poisson, but
# at least 1 non-ref model, and less than total available
Expand Down Expand Up @@ -405,7 +439,7 @@ def evaluate_ranking_methods(

# Run simulations
results_list = []
for sim in range(n_simulations):
for sim in tqdm(range(n_simulations)):
# Generate simulated dataset using the provided simulation function
df_sim = simulation_func(df=df, ref_model=ref_model, **simulation_kwargs)

Expand Down