forecastingresearch · houtanb · May 30, 2025 · May 30, 2025 · May 30, 2025 · May 30, 2025
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,5 @@
+pandas
+numpy
+pyfixest
+pytest
+tqdm
diff --git a/run_simulation.py b/run_simulation.py
@@ -39,7 +39,7 @@
 
 # Parameters for round-based sampling
 N_ROUNDS = 15
-QUESTIONS_PER_ROUND = 25
+QUESTIONS_PER_ROUND = 100
 MODELS_PER_ROUND_MEAN = 40
 DATASET_WEIGHT = 0.5
 SIMULATION_METHOD = "round_based"
@@ -177,7 +177,7 @@ def validate_processed_data(df):
 
 def main():
     print("Loading data...")
-    df = process_raw_data(f"{INPUT_FOLDER}/leaderboard_human.pkl")
+    df = process_raw_data(f"{INPUT_FOLDER}/leaderboard_llm.pkl")
     df.to_csv(f"{PROCESSED_FOLDER}/processed_dataset.csv", index=False)
 
     # Load the processed dataset

diff --git a/src/ranking_sim.py b/src/ranking_sim.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pandas as pd
 import pyfixest as pf
+from tqdm import tqdm
 
 # ================
 # Data preparation
@@ -15,9 +16,16 @@ def process_raw_data(input_name):
         df_temp = pkl[ii]["df"]
         df_temp["model"] = pkl[ii]["model"]
         df_temp["organization"] = pkl[ii]["organization"]
+        # drop combo questions
+        df_temp = df_temp[df_temp["direction"] == ()]
         df = pd.concat([df, df_temp])
     df = df.reset_index(drop=True)
 
+    if "horizon" not in df.columns:
+        df["resolution_date"] = pd.to_datetime(df["resolution_date"])
+        df["forecast_due_date"] = pd.to_datetime(df["forecast_due_date"])
+        df["horizon"] = (df["resolution_date"] - df["forecast_due_date"]).dt.days
+
     # Create a new column 'question_id' by concatenating 'source', 'id',
     # and 'horizon' columns. This is done to create a unique identifier
     # for each question/prediction
@@ -296,6 +304,35 @@ def simulate_random_sampling(df, n_questions_per_model, ref_model="Always 0.5"):
     return df_results
 
 
+def simple_sample(df, n):
+    groups = df.groupby(["question_type", "horizon"])["question_id"].unique()
+
+    dataset_groups = groups["dataset"]
+    n_horizons = len(dataset_groups)
+
+    n_dataset_horizon = n // 2 // n_horizons
+    n_market = n - n_dataset_horizon * n_horizons
+
+    # Market questions: choose randomly across all market questions
+    all_market_questions = np.concatenate([g for g in groups["market"].values])
+    market_questions = np.random.choice(all_market_questions, size=n_market, replace=True)
+
+    # Dataset Questions: choose randomly for one horizon, then get the same questions at all horizons
+    df0 = df[df["question_id"].isin(dataset_groups.values[0])]
+    sampled_rows = df0.sample(n=n_dataset_horizon, replace=True)
+    dataset_questions_list = []
+    for _, row in sampled_rows.iterrows():
+        subset = df[
+            (df["source"] == row["source"])
+            & (df["id"] == row["id"])
+            & (df["forecast_due_date"] == row["forecast_due_date"])
+        ]
+        dataset_questions_list.extend(subset["question_id"].unique())
+    dataset_questions = np.array(dataset_questions_list)
+
+    return np.concatenate([market_questions, dataset_questions])
+
+
 def simulate_round_based(
     df,
     n_rounds=15,
@@ -313,7 +350,6 @@ def simulate_round_based(
     """
     # Get parameters
     models = df["model"].unique()
-    questions = df["question_id"].unique()
 
     # Check if ref_model exists
     if ref_model is None or ref_model not in models:
@@ -327,9 +363,7 @@ def simulate_round_based(
     rounds = []
     for round_id in range(n_rounds):
         # Sample questions with replacement for this round
-        round_questions = np.random.choice(
-            questions, size=questions_per_round, replace=True
-        )
+        round_questions = simple_sample(df=df, n=questions_per_round)
 
         # Sample number of models for this round (Poisson, but
         # at least 1 non-ref model, and less than total available
@@ -405,7 +439,7 @@ def evaluate_ranking_methods(
 
     # Run simulations
     results_list = []
-    for sim in range(n_simulations):
+    for sim in tqdm(range(n_simulations)):
         # Generate simulated dataset using the provided simulation function
         df_sim = simulation_func(df=df, ref_model=ref_model, **simulation_kwargs)