Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/sources/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,14 +386,14 @@ def _build_resolution_file(

@staticmethod
def _finalize_resolution_df(df: pd.DataFrame) -> DataFrame[ResolutionFrame]:

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this return pd.DataFrame now that validation has moved to UpdateResult.__post_init__? Currently DataFrame[ResolutionFrame] implies this helper returns a validated schema.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question for:

    def _build_resolution_file(
        self,
        question: dict,
        resolved: bool,
        existing_df: DataFrame[ResolutionFrame] | None = None,
    ) -> DataFrame[ResolutionFrame]:

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to keep the annotation for readability reasons. I appreciate the implicitness of the annotation but it's not decorated with @pa.check_types so validation does not apply.

"""Apply date filtering and return as validated ResolutionFrame.
"""Apply date filtering and select resolution columns.

Args:
df (pd.DataFrame): Raw resolution data with id, date, value columns.
"""
df["date"] = pd.to_datetime(df["date"])
df = df[df["date"].dt.date >= constants.BENCHMARK_START_DATE_DATETIME_DATE]
return ResolutionFrame.validate(df[["id", "date", "value"]])
return df[["id", "date", "value"]].astype(dtype=constants.RESOLUTION_FILE_COLUMN_DTYPE)

# ------------------------------------------------------------------
# Private: question transformation
Expand Down
5 changes: 2 additions & 3 deletions src/sources/manifold.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,11 +407,10 @@ def _build_resolution_file(

@staticmethod
def _finalize_resolution_df(df: pd.DataFrame) -> DataFrame[ResolutionFrame]:
"""Filter to benchmark period and validate as ResolutionFrame."""
"""Filter to benchmark period and select resolution columns."""
df["date"] = pd.to_datetime(df["date"])
df = df[df["date"].dt.date >= constants.BENCHMARK_START_DATE_DATETIME_DATE]
df = df[["id", "date", "value"]].astype(dtype=constants.RESOLUTION_FILE_COLUMN_DTYPE)
return ResolutionFrame.validate(df)
return df[["id", "date", "value"]].astype(dtype=constants.RESOLUTION_FILE_COLUMN_DTYPE)

@staticmethod
def _get_resolved_market_value(market: dict) -> float:
Expand Down
5 changes: 2 additions & 3 deletions src/sources/metaculus.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ def set_date(end_datetime):

@staticmethod
def _finalize_resolution_df(df: pd.DataFrame) -> DataFrame[ResolutionFrame]:
"""Cast types and return as a validated ResolutionFrame.
"""Cast types and select resolution columns.

Unlike infer/manifold, Metaculus does not filter to the benchmark start date:
the aggregation history is already bounded by the question's open window, and
Expand All @@ -455,5 +455,4 @@ def _finalize_resolution_df(df: pd.DataFrame) -> DataFrame[ResolutionFrame]:
Args:
df (pd.DataFrame): Raw resolution data with id, date, value columns.
"""
df = df[["id", "date", "value"]].astype(dtype=constants.RESOLUTION_FILE_COLUMN_DTYPE)
return ResolutionFrame.validate(df)
return df[["id", "date", "value"]].astype(dtype=constants.RESOLUTION_FILE_COLUMN_DTYPE)
2 changes: 1 addition & 1 deletion src/sources/polymarket.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,4 +547,4 @@ def _build_resolution_file(self, question: dict) -> DataFrame[ResolutionFrame]:
df = pd.DataFrame(question["historical_prices"])
df["id"] = question["id"]
df = df[["id", "date", "value"]].astype(dtype=constants.RESOLUTION_FILE_COLUMN_DTYPE)
return ResolutionFrame.validate(df)
return df
Loading