From 9e2028a76f29e768092b29cf62107f07e14d53bd Mon Sep 17 00:00:00 2001 From: Amaan Javed Date: Sat, 25 Apr 2026 15:55:40 -0400 Subject: [PATCH 1/3] Remove caching, remove configuration of rate limit, minor bug fixes --- docs/configuration.html | 44 +++- docs/index.html | 213 ++++++++++------ docs/reference.html | 7 +- src/rmp_client/__init__.py | 2 - src/rmp_client/client.py | 232 ++++-------------- src/rmp_client/config.py | 1 - src/rmp_client/errors.py | 4 - .../__pycache__/__init__.cpython-313.pyc | Bin 567 -> 567 bytes .../__pycache__/course_codes.cpython-313.pyc | Bin 2490 -> 2492 bytes src/rmp_client/extras/course_codes.py | 2 +- src/rmp_client/extras/helpers.py | 5 +- src/rmp_client/http.py | 9 +- src/rmp_client/models.py | 1 - src/rmp_client/queries.py | 1 + src/rmp_client/rate_limit.py | 12 +- tests/test_client.py | 11 +- tests/test_config.py | 5 - tests/test_errors.py | 14 -- tests/test_extras.py | 11 + tests/test_http.py | 27 +- tests/test_rate_limit.py | 38 +-- 21 files changed, 281 insertions(+), 358 deletions(-) diff --git a/docs/configuration.html b/docs/configuration.html index 3fd406e..c9f4ac9 100644 --- a/docs/configuration.html +++ b/docs/configuration.html @@ -71,7 +71,6 @@

Configuration

base_url="https://www.ratemyprofessors.com/graphql", timeout_seconds=10.0, max_retries=3, - rate_limit_per_minute=60, ) with RMPClient(config) as client: ... @@ -104,12 +103,6 @@

3 Number of retry attempts for failed requests - - rate_limit_per_minute - int - 60 - Max requests per minute (token bucket) - user_agent str @@ -128,19 +121,46 @@

Rate Limiting #

- The client uses a token-bucket algorithm. Tokens replenish continuously. - Each request consumes one token. If no tokens are available, the request - blocks until one becomes available. + The client uses a token-bucket algorithm fixed at 60 requests per + minute. Tokens replenish continuously at 1 per second. Each + request consumes one token; if none are available the request blocks + until one becomes available. This limit is not configurable.

-
config = RMPClientConfig(rate_limit_per_minute=30)

Retries #

On 5xx errors or network failures, the client retries up to max_retries times. 4xx errors are not retried. After exhausting retries, a - RetryError is raised. + RetryError is raised. Each retry attempt consumes a token + from the rate limiter before firing.

+ + + + + + + + + + + + + + + + + + + + + + + + + +
SituationRetried?
Network error (timeout, connection reset)Yes
5xx server errorYes
4xx client error (400, 401, 403, 404)No — raises HttpError immediately
429 Too Many RequestsYes — exponential back-off, up to max_retries times
GraphQL errors in a 200 responseNo — raises RMPAPIError immediately
config = RMPClientConfig(max_retries=5)

Timeouts #

diff --git a/docs/index.html b/docs/index.html index b154af2..f8fbaa6 100644 --- a/docs/index.html +++ b/docs/index.html @@ -1,75 +1,121 @@ - + - - - - RateMyProfessors API Client (Python) - - - - - - - -
-
- - - - -
-
+ + + + RateMyProfessors API Client (Python) + + + + + + + +
+
+ + + + + + +
+
- + -
-

RateMyProfessors API Client

-
- PyPI - downloads -
-

- An unofficial, typed Python client for RateMyProfessors. - All data is fetched via RMP's GraphQL API — no HTML scraping or browser automation required. -

+
+

RateMyProfessors API Client

+
+ PyPI + downloads +
+

+ An unofficial, typed Python client for + RateMyProfessors. All + data is fetched via RMP's GraphQL API — no HTML scraping or + browser automation required. +

-
- Disclaimer: This library is unofficial and may break if RMP changes their internal API. Use responsibly and respect rate limits. -
+
+ Disclaimer: This library is unofficial and may break if + RMP changes their internal API. Use responsibly and respect rate limits. +
-

Features #

-
    -
  • Strong typing via Pydantic models
  • -
  • Automatic retries with configurable max attempts
  • -
  • Token-bucket rate limiting (default 60 req/min)
  • -
  • In-memory caching for ratings pages
  • -
  • Cursor-based pagination for all list/search endpoints
  • -
  • Clear error hierarchy for precise exception handling
  • -
  • Built-in helpers for ingestion workflows (sentiment, comment validation, course codes)
  • -
+

Features #

+
    +
  • Strong typing via Pydantic models
  • +
  • Automatic retries with configurable max attempts
  • +
  • Token-bucket rate limiting (default 60 req/min)
  • +
  • In-memory caching for ratings pages
  • +
  • Cursor-based pagination for all list/search endpoints
  • +
  • Clear error hierarchy for precise exception handling
  • +
  • + Built-in helpers for ingestion workflows (sentiment, comment + validation, course codes) +
  • +
-

Requirements #

-
    -
  • Python 3.10 or later
  • -
  • Works with type checkers (Pydantic models, fully typed API)
  • -
+

+ Requirements # +

+
    +
  • Python 3.10 or later
  • +
  • Works with type checkers (Pydantic models, fully typed API)
  • +
-

Installation #

-
pip install ratemyprofessors-client
+

+ Installation # +

+
pip install ratemyprofessors-client
-

Quick Start #

-
from rmp_client import RMPClient
+      

+ Quick Start # +

+
from rmp_client import RMPClient
 
 with RMPClient() as client:
     prof = client.get_professor("2823076")
@@ -78,16 +124,33 @@ 

Quick Start #

-

Documentation #

-
    -
  • Usage — Quickstart examples for every endpoint
  • -
  • Configuration — Tuning retries, rate limits, timeouts, and headers
  • -
  • API Reference — Full method and type reference
  • -
  • Extras — Ingestion helpers (sentiment, comment validation, course mapping)
  • -
-
+

+ Documentation # +

+
    +
  • + Usage — Quickstart examples for every + endpoint +
  • +
  • + Configuration — Tuning retries, + rate limits, timeouts, and headers +
  • +
  • + API Reference — Full method and + type reference +
  • +
  • + Extras — Ingestion helpers (sentiment, + comment validation, course mapping) +
  • +
+
- - - + + + diff --git a/docs/reference.html b/docs/reference.html index bf4ad34..9e626ae 100644 --- a/docs/reference.html +++ b/docs/reference.html @@ -47,7 +47,7 @@

School Methods search_schools(query, *, page_size=20, cursor=None)SchoolSearchResultSearch schools by name get_school(school_id)SchoolFetch a single school with category ratings get_compare_schools(school_id_1, school_id_2)CompareSchoolsResultFetch two schools side by side - get_school_ratings_page(school_id, *, cursor=None, page_size=20)SchoolRatingsPageGet one page of school ratings (cached) + get_school_ratings_page(school_id, *, cursor=None, page_size=20)SchoolRatingsPageGet one page of school ratings iter_school_ratings(school_id, *, page_size=20, since=None)Iterator[SchoolRating]Iterate all school ratings @@ -58,7 +58,7 @@

Professor Methods list_professors_for_school(school_id, *, query=None, page_size=20, cursor=None)ProfessorSearchResultList professors at a school iter_professors_for_school(school_id, *, query=None, page_size=20)Iterator[Professor]Iterate all professors at a school get_professor(professor_id)ProfessorFetch a single professor - get_professor_ratings_page(professor_id, *, cursor=None, page_size=20, course_filter=None)ProfessorRatingsPageGet one page of professor ratings (cached) + get_professor_ratings_page(professor_id, *, cursor=None, page_size=20, course_filter=None)ProfessorRatingsPageGet one page of professor ratings iter_professor_ratings(professor_id, *, page_size=20, since=None, course_filter=None)Iterator[Rating]Iterate all professor ratings @@ -78,7 +78,7 @@

School

id, name, location, overall_quality, num_ratings, reputation, safety, happiness, facilities, social, location_rating, clubs, opportunities, internet, food

Professor

-

id, name, department, school (School), url, overall_rating, num_ratings, percent_take_again, level_of_difficulty, tags, rating_distribution

+

id, name, department, school (School), overall_rating, num_ratings, percent_take_again, level_of_difficulty, tags, rating_distribution

Rating

date, comment, quality, difficulty, tags, course_raw, details, thumbs_up, thumbs_down

@@ -103,7 +103,6 @@

Errors #

ErrorDescription HttpErrorNon-2xx HTTP response. Has status_code, url, body. ParsingErrorCould not parse the GraphQL response. - RateLimitErrorLocal rate limiter blocked the request. RetryErrorAll retry attempts exhausted. RMPAPIErrorGraphQL API returned an errors array. ConfigurationErrorInvalid client configuration. diff --git a/src/rmp_client/__init__.py b/src/rmp_client/__init__.py index d1e638d..de117a2 100644 --- a/src/rmp_client/__init__.py +++ b/src/rmp_client/__init__.py @@ -6,7 +6,6 @@ ConfigurationError, HttpError, ParsingError, - RateLimitError, RetryError, RMPAPIError, RMPError, @@ -30,7 +29,6 @@ "ConfigurationError", "HttpError", "ParsingError", - "RateLimitError", "RetryError", "RMPAPIError", "TokenBucket", diff --git a/src/rmp_client/client.py b/src/rmp_client/client.py index 24fd561..37d2907 100644 --- a/src/rmp_client/client.py +++ b/src/rmp_client/client.py @@ -3,17 +3,18 @@ All data is fetched via POST to https://www.ratemyprofessors.com/graphql. Rate limiting, retries, and timeouts are handled by :class:`HttpClient`. -Call :meth:`RMPClient.close` when done to release resources and clear caches. +Call :meth:`RMPClient.close` when done to release resources. """ from __future__ import annotations import base64 +import warnings from datetime import date -from typing import Any, Dict, Iterator, List, Mapping, Optional, Tuple +from typing import Any, Dict, Iterator, List, Mapping, Optional from .config import RMPClientConfig -from .errors import HttpError, ParsingError, RetryError, RMPAPIError +from .errors import ParsingError from .http import HttpClient, HttpClientContext from .models import ( CompareSchoolsResult, @@ -70,10 +71,15 @@ def _safe_int(value: Any) -> Optional[int]: return None +def _coalesce(*values: Any) -> Any: + """Return the first non-None value, or None if all are None.""" + return next((v for v in values if v is not None), None) + + def _parse_date(date_str: Any) -> date: """Parse RMP date strings (e.g. '2026-03-03 21:20:35 +0000 UTC') to a date. - Uses only the date part; invalid input yields today's date. + Uses only the date part; invalid input warns and yields today's date. """ if isinstance(date_str, str): part = date_str.split(" ")[0] if " " in date_str else date_str @@ -81,6 +87,7 @@ def _parse_date(date_str: Any) -> date: return date.fromisoformat(part) except ValueError: pass + warnings.warn(f"Could not parse date {date_str!r}, using today", stacklevel=3) return date.today() @@ -94,12 +101,6 @@ def __init__(self, config: Optional[RMPClientConfig] = None) -> None: self._config = config or RMPClientConfig() self._http_ctx = HttpClientContext(self._config) self._http: Optional[HttpClient] = None - self._professor_ratings_cache: Dict[ - str, Tuple[Professor, List[Rating]] - ] = {} - self._school_ratings_cache: Dict[ - str, Tuple[School, List[SchoolRating]] - ] = {} def __enter__(self) -> "RMPClient": self._http = self._http_ctx.__enter__() @@ -108,8 +109,6 @@ def __enter__(self) -> "RMPClient": def __exit__(self, *args: Any) -> None: self._http_ctx.__exit__(*args) self._http = None - self._professor_ratings_cache.clear() - self._school_ratings_cache.clear() @property def _client(self) -> HttpClient: @@ -118,12 +117,10 @@ def _client(self) -> HttpClient: return self._http def close(self) -> None: - """Close the HTTP client and clear all rating caches. Safe to call multiple times.""" + """Close the HTTP client. Safe to call multiple times.""" if self._http is not None: self._http.close() self._http = None - self._professor_ratings_cache.clear() - self._school_ratings_cache.clear() # ---- Low-level --------------------------------------------------------------- @@ -283,11 +280,7 @@ def iter_professors_for_school( # ---- Professor details + ratings --------------------------------------------- def get_professor(self, professor_id: str) -> Professor: - """Fetch a single professor by legacy numeric ID. - - Uses the ratings list query with a minimal page size to retrieve - full teacher details in a single request. - """ + """Fetch a single professor by legacy numeric ID.""" page = self._fetch_professor_ratings_page(professor_id, first=1) return page.professor @@ -299,67 +292,12 @@ def get_professor_ratings_page( page_size: int = 20, course_filter: Optional[str] = None, ) -> ProfessorRatingsPage: - """Fetch one page of ratings for a professor. - - On the first call all ratings are pre-fetched via GraphQL and cached - in memory, so subsequent "Load More" calls with a cursor are served - instantly with no extra network requests. - """ - # Serve from cache when cursor is a numeric offset - if cursor is not None: - cached = self._professor_ratings_cache.get(professor_id) - if cached: - professor, all_ratings = cached - start = max(0, int(cursor)) - page_slice = all_ratings[start : start + page_size] - has_next = start + page_size < len(all_ratings) - return ProfessorRatingsPage( - professor=professor, - ratings=page_slice, - has_next_page=has_next, - next_cursor=str(start + page_size) if has_next else None, - ) - - # Repeated first-page call: serve from cache - existing = self._professor_ratings_cache.get(professor_id) - if existing is not None and cursor is None: - professor, all_ratings = existing - page_slice = all_ratings[:page_size] - has_next = len(all_ratings) > page_size - return ProfessorRatingsPage( - professor=professor, - ratings=page_slice, - has_next_page=has_next, - next_cursor=str(page_size) if has_next else None, - ) - - # First load: fetch ALL ratings via GraphQL and cache - first = self._fetch_professor_ratings_page( - professor_id, first=100, course_filter=course_filter - ) - all_ratings = list(first.ratings) - professor = first.professor - after = first.next_cursor if first.has_next_page else None - - while after is not None: - try: - nxt = self._fetch_professor_ratings_page( - professor_id, after=after, first=100, course_filter=course_filter - ) - except (RMPAPIError, HttpError, RetryError): - break - all_ratings.extend(nxt.ratings) - after = nxt.next_cursor if nxt.has_next_page else None - - self._professor_ratings_cache[professor_id] = (professor, all_ratings) - - page_slice = all_ratings[:page_size] - has_next = len(all_ratings) > page_size - return ProfessorRatingsPage( - professor=professor, - ratings=page_slice, - has_next_page=has_next, - next_cursor=str(page_size) if has_next else None, + """Fetch one page of ratings for a professor.""" + return self._fetch_professor_ratings_page( + professor_id, + after=cursor, + first=page_size, + course_filter=course_filter, ) def iter_professor_ratings( @@ -370,7 +308,11 @@ def iter_professor_ratings( since: Optional[date] = None, course_filter: Optional[str] = None, ) -> Iterator[Rating]: - """Iterate all ratings for a professor. Optional ``since`` stops early.""" + """Iterate all ratings for a professor. + + ``since`` stops iteration early; assumes the API returns ratings + newest-first, which is the observed behaviour. + """ cursor: Optional[str] = None while True: page = self.get_professor_ratings_page( @@ -390,11 +332,7 @@ def iter_professor_ratings( # ---- School details + ratings ------------------------------------------------ def get_school(self, school_id: str) -> School: - """Fetch a single school by legacy numeric ID. - - Uses the school ratings list query with a minimal page size to retrieve - full school details (including category summaries) in a single request. - """ + """Fetch a single school by legacy numeric ID.""" page = self._fetch_school_ratings_page(school_id, first=1) return page.school @@ -413,56 +351,8 @@ def get_school_ratings_page( cursor: Optional[str] = None, page_size: int = 20, ) -> SchoolRatingsPage: - """Fetch one page of school ratings. Same caching pattern as professor ratings.""" - if cursor is not None: - cached = self._school_ratings_cache.get(school_id) - if cached: - school, all_ratings = cached - start = max(0, int(cursor)) - page_slice = all_ratings[start : start + page_size] - has_next = start + page_size < len(all_ratings) - return SchoolRatingsPage( - school=school, - ratings=page_slice, - has_next_page=has_next, - next_cursor=str(start + page_size) if has_next else None, - ) - - existing = self._school_ratings_cache.get(school_id) - if existing is not None and cursor is None: - school, all_ratings = existing - page_slice = all_ratings[:page_size] - has_next = len(all_ratings) > page_size - return SchoolRatingsPage( - school=school, - ratings=page_slice, - has_next_page=has_next, - next_cursor=str(page_size) if has_next else None, - ) - - first = self._fetch_school_ratings_page(school_id, first=100) - all_ratings = list(first.ratings) - school = first.school - after = first.next_cursor if first.has_next_page else None - - while after is not None: - try: - nxt = self._fetch_school_ratings_page(school_id, after=after, first=100) - except (RMPAPIError, HttpError, RetryError): - break - all_ratings.extend(nxt.ratings) - after = nxt.next_cursor if nxt.has_next_page else None - - self._school_ratings_cache[school_id] = (school, all_ratings) - - page_slice = all_ratings[:page_size] - has_next = len(all_ratings) > page_size - return SchoolRatingsPage( - school=school, - ratings=page_slice, - has_next_page=has_next, - next_cursor=str(page_size) if has_next else None, - ) + """Fetch one page of school ratings.""" + return self._fetch_school_ratings_page(school_id, after=cursor, first=page_size) def iter_school_ratings( self, @@ -471,7 +361,11 @@ def iter_school_ratings( page_size: int = 20, since: Optional[date] = None, ) -> Iterator[SchoolRating]: - """Iterate all ratings for a school. Optional ``since`` stops early.""" + """Iterate all ratings for a school. + + ``since`` stops iteration early; assumes the API returns ratings + newest-first, which is the observed behaviour. + """ cursor: Optional[str] = None while True: page = self.get_school_ratings_page( @@ -530,10 +424,10 @@ def _fetch_professor_ratings_page( name=name or "Unknown", department=node.get("department"), school=school, - overall_rating=_safe_float(node.get("avgRating")), + overall_rating=_safe_float(_coalesce(node.get("avgRating"), node.get("overallRating"))), num_ratings=_safe_int(node.get("numRatings")), - percent_take_again=_safe_float(node.get("wouldTakeAgainPercent")), - level_of_difficulty=_safe_float(node.get("avgDifficulty")), + percent_take_again=_safe_float(_coalesce(node.get("wouldTakeAgainPercent"), node.get("percentTakeAgain"))), + level_of_difficulty=_safe_float(_coalesce(node.get("avgDifficulty"), node.get("levelOfDifficulty"))), ) ratings_conn = node.get("ratings") or {} @@ -623,16 +517,15 @@ def _parse_professor_node(self, node: Mapping[str, Any]) -> Professor: name=name, department=node.get("department"), school=school, - url=node.get("url"), overall_rating=_safe_float( - node.get("avgRating") or node.get("overallRating") + _coalesce(node.get("avgRating"), node.get("overallRating")) ), num_ratings=_safe_int(node.get("numRatings")), percent_take_again=_safe_float( - node.get("wouldTakeAgainPercent") or node.get("percentTakeAgain") + _coalesce(node.get("wouldTakeAgainPercent"), node.get("percentTakeAgain")) ), level_of_difficulty=_safe_float( - node.get("avgDifficulty") or node.get("levelOfDifficulty") + _coalesce(node.get("avgDifficulty"), node.get("levelOfDifficulty")) ), tags=[], rating_distribution=None, @@ -661,7 +554,7 @@ def _parse_rating_node(self, record: Mapping[str, Any]) -> Rating: date=_parse_date(record.get("date")), comment=str(record.get("comment") or ""), quality=_safe_float( - record.get("clarityRating") or record.get("helpfulRating") + _coalesce(record.get("clarityRating"), record.get("helpfulRating")) ), difficulty=_safe_float(record.get("difficultyRating")), tags=tags, @@ -674,44 +567,21 @@ def _parse_rating_node(self, record: Mapping[str, Any]) -> Rating: def _parse_school_node(self, node: Mapping[str, Any]) -> School: summary = node.get("summary") if isinstance(node.get("summary"), dict) else None return School( - id=str(node.get("legacyId") or node.get("id") or ""), + id=str(_coalesce(node.get("legacyId"), node.get("id")) or ""), name=str(node.get("name") or ""), location=_format_location(node), - overall_quality=_safe_float( - node.get("avgRatingRounded") or node.get("avgRating") - ), + overall_quality=_safe_float(_coalesce(node.get("avgRatingRounded"), node.get("avgRating"))), num_ratings=_safe_int(node.get("numRatings")), - reputation=_safe_float( - (summary or {}).get("schoolReputation") or node.get("reputation") - ), - safety=_safe_float( - (summary or {}).get("schoolSafety") or node.get("safety") - ), - happiness=_safe_float( - (summary or {}).get("schoolSatisfaction") or node.get("happiness") - ), - facilities=_safe_float( - (summary or {}).get("campusCondition") or node.get("facilities") - ), - social=_safe_float( - (summary or {}).get("socialActivities") or node.get("social") - ), - location_rating=_safe_float( - (summary or {}).get("campusLocation") or node.get("location_rating") - ), - clubs=_safe_float( - (summary or {}).get("clubAndEventActivities") or node.get("clubs") - ), - opportunities=_safe_float( - (summary or {}).get("careerOpportunities") - or node.get("opportunities") - ), - internet=_safe_float( - (summary or {}).get("internetSpeed") or node.get("internet") - ), - food=_safe_float( - (summary or {}).get("foodQuality") or node.get("food") - ), + reputation=_safe_float(_coalesce((summary or {}).get("schoolReputation"), node.get("reputation"))), + safety=_safe_float(_coalesce((summary or {}).get("schoolSafety"), node.get("safety"))), + happiness=_safe_float(_coalesce((summary or {}).get("schoolSatisfaction"), node.get("happiness"))), + facilities=_safe_float(_coalesce((summary or {}).get("campusCondition"), node.get("facilities"))), + social=_safe_float(_coalesce((summary or {}).get("socialActivities"), node.get("social"))), + location_rating=_safe_float(_coalesce((summary or {}).get("campusLocation"), node.get("location_rating"))), + clubs=_safe_float(_coalesce((summary or {}).get("clubAndEventActivities"), node.get("clubs"))), + opportunities=_safe_float(_coalesce((summary or {}).get("careerOpportunities"), node.get("opportunities"))), + internet=_safe_float(_coalesce((summary or {}).get("internetSpeed"), node.get("internet"))), + food=_safe_float(_coalesce((summary or {}).get("foodQuality"), node.get("food"))), ) def _parse_school_rating_node(self, record: Mapping[str, Any]) -> SchoolRating: diff --git a/src/rmp_client/config.py b/src/rmp_client/config.py index 0eb1bec..16a6626 100644 --- a/src/rmp_client/config.py +++ b/src/rmp_client/config.py @@ -32,7 +32,6 @@ class RMPClientConfig: base_url: str = DEFAULT_BASE_URL timeout_seconds: float = 10.0 max_retries: int = 3 - rate_limit_per_minute: int = 60 user_agent: str = DEFAULT_USER_AGENT default_headers: Mapping[str, str] = field( default_factory=lambda: dict(DEFAULT_HEADERS) diff --git a/src/rmp_client/errors.py b/src/rmp_client/errors.py index db49b4e..7e01615 100644 --- a/src/rmp_client/errors.py +++ b/src/rmp_client/errors.py @@ -21,10 +21,6 @@ def __init__(self, status_code: int, url: str, body: Optional[str] = None) -> No super().__init__(f"HTTP {status_code} for {url}") -class RateLimitError(RMPError): - """Raised when a local rate limit is exceeded.""" - - class RetryError(RMPError): """Raised when a request ultimately fails after exhausting retries.""" diff --git a/src/rmp_client/extras/__pycache__/__init__.cpython-313.pyc b/src/rmp_client/extras/__pycache__/__init__.cpython-313.pyc index 7ba88e8a91c7e2d7fd97d7efb7fcf299da726cd9..9c2b0d5bfd2739bfeb6b424e2bfdcb76b9240d45 100644 GIT binary patch delta 22 ccmdnavYmzZGcPX}0}$NF-ks^Wkynce07*RtIRF3v delta 22 ccmdnavYmzZGcPX}0}$Lv-<@f(kynce07%FNBme*a diff --git a/src/rmp_client/extras/__pycache__/course_codes.cpython-313.pyc b/src/rmp_client/extras/__pycache__/course_codes.cpython-313.pyc index 87c93552eca7b7c81fa4995859ef4b33199ec235..dcfd235659e5ed6b6940d533a9d39ae9043111de 100644 GIT binary patch delta 48 zcmdlbyhoV#GcPX}0}$AYzRlFz$eYE+!Ko3GQf;hbvbmORKO ANAT 215 prefix_match = re.match(r"^[A-Z]+", key) - num_match = re.search(r"(\d{3})", key) + num_match = re.search(r"(\d{3,4})", key) candidates: Set[str] = set() if prefix_match and num_match: diff --git a/src/rmp_client/extras/helpers.py b/src/rmp_client/extras/helpers.py index b27be42..9614c1a 100644 --- a/src/rmp_client/extras/helpers.py +++ b/src/rmp_client/extras/helpers.py @@ -2,14 +2,15 @@ from __future__ import annotations +import html import re from dataclasses import dataclass, field from typing import Literal def _strip_html(text: str) -> str: - """Strip HTML tags from text (RMP comments occasionally contain markup).""" - return re.sub(r"<[^>]*>", "", text) + """Strip HTML tags and decode HTML entities from text.""" + return html.unescape(re.sub(r"<[^>]*>", "", text)) def normalize_comment( diff --git a/src/rmp_client/http.py b/src/rmp_client/http.py index 51db2ef..95d5859 100644 --- a/src/rmp_client/http.py +++ b/src/rmp_client/http.py @@ -3,6 +3,7 @@ from __future__ import annotations import json +import time from typing import Any, Dict, Mapping, Optional import httpx @@ -18,10 +19,7 @@ class HttpClient: def __init__(self, config: RMPClientConfig) -> None: self._config = config self._client = httpx.Client(timeout=config.timeout_seconds) - self._bucket = TokenBucket( - capacity=config.rate_limit_per_minute, - refill_per_second=config.rate_limit_per_minute / 60.0, - ) + self._bucket = TokenBucket(capacity=60, refill_per_second=1.0) def close(self) -> None: self._client.close() @@ -81,6 +79,9 @@ def post_json( response.status_code, str(response.url), body=response.text ) last_exc = err + if response.status_code == 429 and attempt <= self._config.max_retries: + time.sleep(2 ** attempt) + continue if ( 500 <= response.status_code < 600 and attempt <= self._config.max_retries diff --git a/src/rmp_client/models.py b/src/rmp_client/models.py index 58ab316..139be73 100644 --- a/src/rmp_client/models.py +++ b/src/rmp_client/models.py @@ -47,7 +47,6 @@ class Professor(BaseModel): name: str department: Optional[str] = None school: Optional[School] = None - url: Optional[str] = None overall_rating: Optional[float] = None num_ratings: Optional[int] = None percent_take_again: Optional[float] = None diff --git a/src/rmp_client/queries.py b/src/rmp_client/queries.py index b7668fb..7ecd9f8 100644 --- a/src/rmp_client/queries.py +++ b/src/rmp_client/queries.py @@ -30,6 +30,7 @@ name city state + country avgRating numRatings } diff --git a/src/rmp_client/rate_limit.py b/src/rmp_client/rate_limit.py index b8827c9..ff57e70 100644 --- a/src/rmp_client/rate_limit.py +++ b/src/rmp_client/rate_limit.py @@ -4,8 +4,6 @@ import time from dataclasses import dataclass -from .errors import RateLimitError - @dataclass class TokenBucket: @@ -23,20 +21,14 @@ def _refill(self) -> None: self._last_refill = now self._tokens = min(self.capacity, self._tokens + elapsed * self.refill_per_second) - def consume(self, amount: float = 1.0, *, block: bool = True) -> None: - """Consume tokens from the bucket. - - If block=False and there are insufficient tokens, raises RateLimitError. - """ + def consume(self, amount: float = 1.0) -> None: + """Consume tokens from the bucket, blocking until available.""" with self._lock: while True: self._refill() if self._tokens >= amount: self._tokens -= amount return - if not block: - raise RateLimitError("Local rate limit exceeded") - # Sleep just enough to gain one token needed = amount - self._tokens sleep_for = max(needed / self.refill_per_second, 0.01) time.sleep(sleep_for) diff --git a/tests/test_client.py b/tests/test_client.py index fad8d57..45a5ec8 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -23,8 +23,7 @@ @pytest.fixture(scope="module") def client() -> RMPClient: - cfg = RMPClientConfig(rate_limit_per_minute=30) - c = RMPClient(config=cfg) + c = RMPClient() yield c c.close() @@ -182,7 +181,7 @@ def test_returns_both_schools(self, client: RMPClient) -> None: # --------------------------------------------------------------------------- -# get_professor_ratings_page (cached pagination) +# get_professor_ratings_page # --------------------------------------------------------------------------- @@ -197,7 +196,7 @@ def test_first_page(self, client: RMPClient) -> None: assert r.date is not None assert isinstance(r.comment, str) - def test_load_more_from_cache(self, client: RMPClient) -> None: + def test_load_more(self, client: RMPClient) -> None: p1 = client.get_professor_ratings_page(PROFESSOR_ID, page_size=3) assert p1.has_next_page is True assert p1.next_cursor is not None @@ -237,7 +236,7 @@ def test_multiple_show_mores(self, client: RMPClient) -> None: # --------------------------------------------------------------------------- -# get_school_ratings_page (cached pagination) +# get_school_ratings_page # --------------------------------------------------------------------------- @@ -257,7 +256,7 @@ def test_has_category_ratings(self, client: RMPClient) -> None: assert isinstance(r.category_ratings, dict) assert len(r.category_ratings) > 0 - def test_load_more_from_cache(self, client: RMPClient) -> None: + def test_load_more(self, client: RMPClient) -> None: p1 = client.get_school_ratings_page(SCHOOL_QUEENS, page_size=3) if not p1.has_next_page: pytest.skip("School does not have enough ratings for multi-page test") diff --git a/tests/test_config.py b/tests/test_config.py index 3cbd822..444ae8f 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -28,16 +28,11 @@ def test_default_timeout_and_retries(self) -> None: config = RMPClientConfig() assert config.timeout_seconds == 10.0 assert config.max_retries == 3 - assert config.rate_limit_per_minute == 60 def test_user_agent_default(self) -> None: config = RMPClientConfig() assert config.user_agent == DEFAULT_USER_AGENT - def test_override_rate_limit(self) -> None: - config = RMPClientConfig(rate_limit_per_minute=30) - assert config.rate_limit_per_minute == 30 - def test_override_base_url(self) -> None: config = RMPClientConfig(base_url="https://custom.example.com/graphql") assert config.base_url == "https://custom.example.com/graphql" diff --git a/tests/test_errors.py b/tests/test_errors.py index d886fd0..4fadbc7 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -10,7 +10,6 @@ ParsingError, RMPAPIError, RMPError, - RateLimitError, RetryError, ) @@ -38,11 +37,6 @@ def test_rmp_api_error_is_rmp_error(self) -> None: exc = RMPAPIError("api err", details=[]) assert isinstance(exc, RMPError) - def test_rate_limit_error_is_rmp_error(self) -> None: - assert issubclass(RateLimitError, RMPError) - exc = RateLimitError("limit exceeded") - assert isinstance(exc, RMPError) - def test_retry_error_is_rmp_error(self) -> None: assert issubclass(RetryError, RMPError) exc = RetryError(ValueError("inner")) @@ -97,11 +91,3 @@ class TestParsingError: def test_message(self) -> None: err = ParsingError("Unexpected payload shape") assert "Unexpected" in str(err) - - -class TestRateLimitError: - """RateLimitError for local rate limit.""" - - def test_message(self) -> None: - err = RateLimitError("Local rate limit exceeded") - assert "rate limit" in str(err).lower() diff --git a/tests/test_extras.py b/tests/test_extras.py index 24c5086..cabe067 100644 --- a/tests/test_extras.py +++ b/tests/test_extras.py @@ -31,6 +31,10 @@ def test_unicode_preserved(self) -> None: def test_strips_html_by_default(self) -> None: assert normalize_comment("Loved this class") == "loved this class" + def test_decodes_html_entities(self) -> None: + assert normalize_comment("great & easy") == "great & easy" + assert normalize_comment("bold & great") == "bold & great" + def test_strip_html_option(self) -> None: assert normalize_comment("Bold", strip_html=False) == "bold" @@ -115,3 +119,10 @@ def test_unknown_returns_none(self) -> None: def test_empty_valid(self) -> None: mapping = build_course_mapping(["MATH 101"], []) assert mapping["MATH 101"] is None + + def test_four_digit_course_number_match(self) -> None: + valid = ["MATH 1001", "CS 1102"] + scraped = ["MATH1001", "CS1102"] + mapping = build_course_mapping(scraped, valid) + assert mapping["MATH1001"] == {"MATH 1001"} + assert mapping["CS1102"] == {"CS 1102"} diff --git a/tests/test_http.py b/tests/test_http.py index 2b43b05..6f9c50d 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -16,7 +16,7 @@ class TestHttpClientPostJson: """post_json with pytest-httpx.""" def test_returns_json_on_200(self, httpx_mock: pytest_httpx.HTTPXMock) -> None: - config = RMPClientConfig(rate_limit_per_minute=1000) + config = RMPClientConfig() payload = {"data": {"x": 1}} httpx_mock.add_response( url=config.base_url, @@ -32,7 +32,7 @@ def test_returns_json_on_200(self, httpx_mock: pytest_httpx.HTTPXMock) -> None: def test_raises_rmp_api_error_when_errors_in_body( self, httpx_mock: pytest_httpx.HTTPXMock ) -> None: - config = RMPClientConfig(rate_limit_per_minute=1000) + config = RMPClientConfig() body = json.dumps({"errors": [{"message": "Unauthorized"}]}) httpx_mock.add_response(url=config.base_url, content=body.encode(), status_code=200) client = HttpClient(config) @@ -44,7 +44,7 @@ def test_raises_rmp_api_error_when_errors_in_body( client.close() def test_raises_http_error_on_4xx(self, httpx_mock: pytest_httpx.HTTPXMock) -> None: - config = RMPClientConfig(rate_limit_per_minute=1000) + config = RMPClientConfig() httpx_mock.add_response(url=config.base_url, status_code=403, text="Forbidden") client = HttpClient(config) try: @@ -55,7 +55,7 @@ def test_raises_http_error_on_4xx(self, httpx_mock: pytest_httpx.HTTPXMock) -> N client.close() def test_retries_on_5xx(self, httpx_mock: pytest_httpx.HTTPXMock) -> None: - config = RMPClientConfig(max_retries=2, rate_limit_per_minute=1000) + config = RMPClientConfig(max_retries=2) httpx_mock.add_response(url=config.base_url, status_code=502) httpx_mock.add_response(url=config.base_url, status_code=502) httpx_mock.add_response(url=config.base_url, status_code=502) @@ -69,7 +69,7 @@ def test_retries_on_5xx(self, httpx_mock: pytest_httpx.HTTPXMock) -> None: client.close() def test_succeeds_after_5xx_retry(self, httpx_mock: pytest_httpx.HTTPXMock) -> None: - config = RMPClientConfig(max_retries=3, rate_limit_per_minute=1000) + config = RMPClientConfig(max_retries=3) httpx_mock.add_response(url=config.base_url, status_code=503) httpx_mock.add_response(url=config.base_url, json={"data": "ok"}) client = HttpClient(config) @@ -79,8 +79,23 @@ def test_succeeds_after_5xx_retry(self, httpx_mock: pytest_httpx.HTTPXMock) -> N finally: client.close() + def test_retries_on_429( + self, httpx_mock: pytest_httpx.HTTPXMock, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.setattr("rmp_client.http.time.sleep", lambda _: None) + config = RMPClientConfig(max_retries=1) + httpx_mock.add_response(url=config.base_url, status_code=429) + httpx_mock.add_response(url=config.base_url, json={"data": "ok"}) + client = HttpClient(config) + try: + result = client.post_json("", {}) + assert result == {"data": "ok"} + assert len(httpx_mock.get_requests()) == 2 + finally: + client.close() + def test_sends_default_headers(self, httpx_mock: pytest_httpx.HTTPXMock) -> None: - config = RMPClientConfig(rate_limit_per_minute=1000) + config = RMPClientConfig() httpx_mock.add_response(url=config.base_url, json={"data": {}}) client = HttpClient(config) try: diff --git a/tests/test_rate_limit.py b/tests/test_rate_limit.py index bbbdec1..0461a2a 100644 --- a/tests/test_rate_limit.py +++ b/tests/test_rate_limit.py @@ -4,50 +4,28 @@ import time -import pytest - -from rmp_client.errors import RateLimitError from rmp_client.rate_limit import TokenBucket class TestTokenBucketConsume: - """consume() with block=True (default).""" - def test_consumes_without_error(self) -> None: bucket = TokenBucket(capacity=10, refill_per_second=10) for _ in range(5): bucket.consume() - def test_exhausts_capacity_then_block_false_raises(self) -> None: - bucket = TokenBucket(capacity=3, refill_per_second=1.0) - for _ in range(3): - bucket.consume() - with pytest.raises(RateLimitError, match="rate limit"): - bucket.consume(amount=1.0, block=False) - - def test_block_false_raises_when_insufficient_tokens(self) -> None: - bucket = TokenBucket(capacity=1, refill_per_second=0.01) - bucket.consume() # exhaust - with pytest.raises(RateLimitError): - bucket.consume(block=False) - - def test_block_false_succeeds_when_tokens_available(self) -> None: - bucket = TokenBucket(capacity=2, refill_per_second=10) - bucket.consume(block=False) - bucket.consume(block=False) - def test_refill_over_time(self) -> None: - bucket = TokenBucket(capacity=2, refill_per_second=10.0) # refill 10 per second + bucket = TokenBucket(capacity=2, refill_per_second=10.0) bucket.consume() bucket.consume() - # After 0.2s we have 2 tokens again (0 + 10*0.2 = 2) time.sleep(0.25) - bucket.consume(block=False) - bucket.consume(block=False) + start = time.monotonic() + bucket.consume() + bucket.consume() + assert time.monotonic() - start < 0.1 def test_consume_amount(self) -> None: - bucket = TokenBucket(capacity=10, refill_per_second=1.0) + bucket = TokenBucket(capacity=10, refill_per_second=100.0) bucket.consume(amount=5.0) bucket.consume(amount=5.0) - with pytest.raises(RateLimitError): - bucket.consume(amount=1.0, block=False) + time.sleep(0.15) + bucket.consume(amount=10.0) From d18a19fe6766a85383eeb7d8e3440a1a9d00695b Mon Sep 17 00:00:00 2001 From: Amaan Javed Date: Sat, 25 Apr 2026 15:59:15 -0400 Subject: [PATCH 2/3] Update versioning --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9d49568..308ef44 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "ratemyprofessors-client" -version = "2.1.2" +version = "3.0.0" description = "Typed, retrying, rate-limited unofficial Python client for the RateMyProfessors GraphQL API." readme = "README.md" requires-python = ">=3.10" From 711fa99e9fa50cdf30848eb5801e42c0c8d51b36 Mon Sep 17 00:00:00 2001 From: Amaan Javed Date: Sat, 25 Apr 2026 18:45:48 -0400 Subject: [PATCH 3/3] Update documentation --- docs/index.html | 5 ++--- docs/reference.html | 2 +- docs/usage.html | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/index.html b/docs/index.html index f8fbaa6..1c78be5 100644 --- a/docs/index.html +++ b/docs/index.html @@ -89,8 +89,7 @@

Features #

  • Strong typing via Pydantic models
  • Automatic retries with configurable max attempts
  • -
  • Token-bucket rate limiting (default 60 req/min)
  • -
  • In-memory caching for ratings pages
  • +
  • Token-bucket rate limiting (fixed at 60 req/min)
  • Cursor-based pagination for all list/search endpoints
  • Clear error hierarchy for precise exception handling
  • @@ -134,7 +133,7 @@

  • Configuration — Tuning retries, - rate limits, timeouts, and headers + timeouts, and headers
  • API Reference — Full method and diff --git a/docs/reference.html b/docs/reference.html index 9e626ae..0b99cf8 100644 --- a/docs/reference.html +++ b/docs/reference.html @@ -66,7 +66,7 @@

    Low-level #

    - +
    MethodReturnsDescription
    raw_query(payload)dictSend a raw GraphQL payload
    close()NoneClose the HTTP client and clear caches
    close()NoneClose the HTTP client

    diff --git a/docs/usage.html b/docs/usage.html index 3b5b816..7c26e9e 100644 --- a/docs/usage.html +++ b/docs/usage.html @@ -85,7 +85,7 @@

    Get a Professor by ID -

    Professor Ratings (Paginated, Cached) #

    +

    Professor Ratings (Paginated) #

    with RMPClient() as client:
         page = client.get_professor_ratings_page("2823076", page_size=10)
         print(f"Professor: {page.professor.name}")
    @@ -103,7 +103,7 @@ 

    Iterate All Professor Ratings

    -

    School Ratings (Paginated, Cached) #

    +

    School Ratings (Paginated) #

    with RMPClient() as client:
         page = client.get_school_ratings_page("1466", page_size=10)
         for rating in page.ratings: