From 2eb4b46adab792e7274e1c1c3ca7f7489e0c2c2b Mon Sep 17 00:00:00 2001 From: Simon Ingelsson Date: Mon, 15 Jun 2026 11:25:54 +0200 Subject: [PATCH 1/5] feat(scrapers): replace ISS scraper with Nordrest scraper for Gourmedia and Karavan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add NordrestMenuScraper parsing the Castit lunch widget from nordrest.se - Support weekly specials (veckans rĂ€tt) merged into each day's menu - Switch Gourmedia and Karavan to use NordrestMenuScraper - Remove unused iss_scraper.py - Add 'salladsbar' as a vegetarian keyword in DishClassifier - Display menu items as bullet points in CLI output Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- lunchscraper/cli.py | 31 ++-- lunchscraper/dish_classifier.py | 2 +- lunchscraper/iss_scraper.py | 306 ------------------------------- lunchscraper/mcp_server.py | 18 +- lunchscraper/nordrest_scraper.py | 222 ++++++++++++++++++++++ 5 files changed, 246 insertions(+), 333 deletions(-) delete mode 100644 lunchscraper/iss_scraper.py create mode 100644 lunchscraper/nordrest_scraper.py diff --git a/lunchscraper/cli.py b/lunchscraper/cli.py index b9067de..024699e 100644 --- a/lunchscraper/cli.py +++ b/lunchscraper/cli.py @@ -5,7 +5,7 @@ import logging from lunchscraper.wkb_scraper import WKBMenuScraper -from .iss_scraper import ISSMenuScraper +from .nordrest_scraper import NordrestMenuScraper from .kvartersmenyn_scraper import KvartersmenynsMenuScraper logging.basicConfig( @@ -17,9 +17,8 @@ RESTAURANTS = { 'gourmedia': { 'name': 'Gourmedia', - 'type': 'iss', - 'url': 'https://www.iss-menyer.se/restaurants/restaurang-gourmedia', - 'id': 'Restaurang Gourmedia' + 'type': 'nordrest', + 'url': 'https://www.nordrest.se/restaurang/gourmedia/' }, 'filmhuset': { 'name': 'Filmhuset', @@ -28,8 +27,8 @@ }, 'karavan': { 'name': 'Karavan', - 'type': 'kvartersmenyn', - 'url': 'https://karavan.kvartersmenyn.se/' + 'type': 'nordrest', + 'url': 'https://www.nordrest.se/restaurang/karavan/' }, 'wkb': { 'name': "WKB", @@ -84,8 +83,8 @@ def main(restaurant_key, vegetarian_only, fish_only, meat_only, week, debug): for key, config in restaurants_to_fetch.items(): try: # Create appropriate scraper based on type - if config['type'] == 'iss': - scraper = ISSMenuScraper(config['url'], config['id'], config['name']) + if config['type'] == 'nordrest': + scraper = NordrestMenuScraper(config['url'], config['name']) elif config['type'] == 'kvartersmenyn': scraper = KvartersmenynsMenuScraper(config['url'], config['name']) elif config['type'] == 'wkbmeny': @@ -155,7 +154,7 @@ def display_all_daily_menus(all_menus, vegetarian_only, fish_only, meat_only): click.echo(click.style("đŸ„Ź Vegetarian".center(80), fg='green', bold=True)) click.echo() for item in menu['vegetarian']: - click.echo(f" {item}") + click.echo(f" ‱ {item}") # Show fish options if not vegetarian_only and not meat_only and menu.get('fish'): @@ -164,7 +163,7 @@ def display_all_daily_menus(all_menus, vegetarian_only, fish_only, meat_only): click.echo(click.style("🐟 Fish".center(80), fg='blue', bold=True)) click.echo() for item in menu['fish']: - click.echo(f" {item}") + click.echo(f" ‱ {item}") # Show meat options if not vegetarian_only and not fish_only and menu.get('meat'): @@ -173,7 +172,7 @@ def display_all_daily_menus(all_menus, vegetarian_only, fish_only, meat_only): click.echo(click.style("đŸ„© Meat".center(80), fg='red', bold=True)) click.echo() for item in menu['meat']: - click.echo(f" {item}") + click.echo(f" ‱ {item}") if not vegetarian_only and not fish_only and menu.get('dessert'): @@ -182,7 +181,7 @@ def display_all_daily_menus(all_menus, vegetarian_only, fish_only, meat_only): click.echo(click.style("🍰 Dessert".center(80), fg='red', bold=True)) click.echo() for item in menu['dessert']: - click.echo(f" {item}") + click.echo(f" ‱ {item}") # Handle case where no menu items found if not has_items: @@ -239,7 +238,7 @@ def display_all_weekly_menus(all_menus, vegetarian_only, fish_only, meat_only): click.echo(click.style("đŸ„Ź Vegetarian".center(80), fg='green', bold=True)) click.echo() for item in menu['vegetarian']: - click.echo(f" {item}") + click.echo(f" ‱ {item}") # Show fish options if not vegetarian_only and not meat_only and menu.get('fish'): @@ -248,7 +247,7 @@ def display_all_weekly_menus(all_menus, vegetarian_only, fish_only, meat_only): click.echo(click.style("🐟 Fish".center(80), fg='blue', bold=True)) click.echo() for item in menu['fish']: - click.echo(f" {item}") + click.echo(f" ‱ {item}") # Show meat options if not vegetarian_only and not fish_only and menu.get('meat'): @@ -257,7 +256,7 @@ def display_all_weekly_menus(all_menus, vegetarian_only, fish_only, meat_only): click.echo(click.style("đŸ„© Meat".center(80), fg='red', bold=True)) click.echo() for item in menu['meat']: - click.echo(f" {item}") + click.echo(f" ‱ {item}") if not vegetarian_only and not fish_only and menu.get('dessert'): has_items = True @@ -265,7 +264,7 @@ def display_all_weekly_menus(all_menus, vegetarian_only, fish_only, meat_only): click.echo(click.style("🍰 Dessert".center(80), fg='red', bold=True)) click.echo() for item in menu['dessert']: - click.echo(f" {item}") + click.echo(f" ‱ {item}") # Show message if no items found if not has_items: click.echo(click.style(" ❌ No menu available", fg='yellow')) diff --git a/lunchscraper/dish_classifier.py b/lunchscraper/dish_classifier.py index 8544e40..b7ffd29 100644 --- a/lunchscraper/dish_classifier.py +++ b/lunchscraper/dish_classifier.py @@ -44,7 +44,7 @@ class DishClassifier: VEGETARIAN_KEYWORDS = [ 'vego', 'vegan', 'vegetarisk', 'halloumi', 'falafel', 'tempeh', 'tofu', 'vegansk', 'vegetariskt', 'bönor', 'linser', 'quinoa', - 'seitan', 'svampgryta', 'svampsĂ„s', 'svampsoppa', 'rotselleri', 'selleri', + 'seitan', 'svampgryta', 'svampsĂ„s', 'svampsoppa', 'rotselleri', 'selleri', 'salladsbar', 'kikĂ€rtor', 'grönsaker', 'vegoburgare', 'vegoköttbullar', 'chili med bönor', 'böff ala lindström', 'gnocchi', 'zucchini', 'aubergine', 'moussaka pĂ„ vegofĂ€rs', 'lĂ„ngbakad rotselleri', 'skogssvamp', 'tempura svamp', 'tortellini', 'ricotta', diff --git a/lunchscraper/iss_scraper.py b/lunchscraper/iss_scraper.py deleted file mode 100644 index 6302178..0000000 --- a/lunchscraper/iss_scraper.py +++ /dev/null @@ -1,306 +0,0 @@ -"""Web scraper for ISS restaurant menus.""" - -import requests -from bs4 import BeautifulSoup -from datetime import datetime, date -from typing import Dict, List, Optional -import logging -import base64 -import json -from .base_scraper import BaseMenuScraper -from .dish_classifier import DishClassifier - -logger = logging.getLogger(__name__) - - -class ISSMenuScraper(BaseMenuScraper): - """Scraper for ISS restaurant lunch menus.""" - - def __init__(self, restaurant_url: str, restaurant_id: str = "Restaurang Gourmedia", restaurant_name: str = "Gourmedia"): - super().__init__(restaurant_name) - self.restaurant_url = restaurant_url - self.restaurant_id = restaurant_id - self.session = requests.Session() - self.session.headers.update({ - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', - 'Accept-Language': 'en-US,en;q=0.9,sv;q=0.8', - }) - self.api_base_url = 'https://www.iss-menyer.se/_api/cloud-data/v2/items/query' - self.app_id = '16d45e35-d3d8-4d5e-b24d-2a680b7e5089' - self._session_established = False - self._meta_site_id = '5e5cfbed-93b8-4425-8938-b96c735bd6c1' # Meta Site ID for iss-menyer.se - self._auth_token = None - - def _establish_session(self): - """Visit the main page to establish a browser session.""" - if self._session_established: - return - - # First visit the home page to establish session - logger.debug("Establishing session by visiting home page") - try: - home_response = self.session.get('https://www.iss-menyer.se/', timeout=10) - home_response.raise_for_status() - logger.debug(f"Home page visited. Status: {home_response.status_code}") - logger.debug(f"Cookies after home: {self.session.cookies}") - except Exception as e: - logger.warning(f"Failed to visit home page: {e}") - - # Then visit the restaurant page - logger.debug(f"Visiting restaurant page: {self.restaurant_url}") - try: - response = self.session.get(self.restaurant_url, timeout=10) - response.raise_for_status() - logger.debug(f"Session established. Status: {response.status_code}") - - # Extract authorization token from HTML using BeautifulSoup - from bs4 import BeautifulSoup - soup = BeautifulSoup(response.text, 'html.parser') - viewer_script = soup.find('script', {'id': 'wix-viewer-model'}) - if viewer_script: - json_content = viewer_script.string - # Parse the JSON to find the authorization token - try: - import json - viewer_data = json.loads(json_content) - # The token is in the headers of the dynamic pages configuration - for prefix_data in viewer_data.get('siteFeaturesConfigs', {}).get('dynamicPages', {}).get('prefixToRouterFetchData', {}).values(): - if 'headers' in prefix_data.get('optionsData', {}): - headers = prefix_data['optionsData']['headers'] - if 'Authorization' in headers: - self._auth_token = headers['Authorization'] - logger.debug(f"Found auth token") - break - except Exception as e: - logger.warning(f"Could not parse viewer-model JSON: {e}") - else: - logger.warning("Could not find wix-viewer-model script tag") - - self._session_established = True - except Exception as e: - logger.warning(f"Failed to establish session: {e}") - # Don't raise, we'll try the API anyway - - def _get_week_number(self, target_date: date) -> int: - """Get ISO week number for a given date.""" - return target_date.isocalendar()[1] - - def _build_api_query(self, week_number: int) -> str: - """Build the API query parameter.""" - query_data = { - "dataCollectionId": "Meny", - "query": { - "filter": { - "restrauntId": self.restaurant_id, # Note: misspelled in API - "weekNumber": week_number - }, - "paging": { - "offset": 0, - "limit": 1 - }, - "fields": [] - }, - "referencedItemOptions": [], - "returnTotalCount": True, - "environment": "LIVE", - "appId": self.app_id - } - - # Encode to base64 - json_str = json.dumps(query_data, separators=(',', ':')) - encoded = base64.urlsafe_b64encode(json_str.encode('utf-8')).decode('utf-8') - - return encoded - - def _fetch_menu_from_api(self, week_number: int) -> dict: - """Fetch menu data from the ISS API.""" - # First establish a session by visiting the main page - self._establish_session() - - # Update headers for API call - api_headers = { - 'Accept': 'application/json, text/plain, */*', - 'Referer': self.restaurant_url, - 'Origin': 'https://www.iss-menyer.se' - } - - # Add Meta Site ID if we have it - if self._meta_site_id: - api_headers['X-Wix-Meta-Site-Id'] = self._meta_site_id - - # Add authorization token if we have it - if self._auth_token: - api_headers['Authorization'] = self._auth_token - - self.session.headers.update(api_headers) - - query_param = self._build_api_query(week_number) - url = f"{self.api_base_url}?.r={query_param}" - - logger.debug(f"Fetching menu from API for week {week_number}") - logger.debug(f"API URL: {url}") - - try: - response = self.session.get(url, timeout=10) - if response.status_code != 200: - logger.debug(f"API error response: {response.text}") - response.raise_for_status() - data = response.json() - logger.debug(f"API response received successfully") - return data - except Exception as e: - raise Exception(f"Failed to fetch menu from API: {e}") - - def _parse_api_response(self, api_data: dict) -> Dict[str, Dict[str, List[str]]]: - """Parse the API response into our menu format.""" - weekly_menu = {} - - logger.debug(f"Parsing API response") - - # The API response contains dataItems array - items = api_data.get('dataItems', []) - logger.debug(f"Found {len(items)} items in API response") - - if not items: - raise Exception("No menu items found in API response") - - # Get the first item (should be the weekly menu) - menu_item = items[0] - menu_data = menu_item.get('data', {}) - - # Extract menuSwedish array - menu_swedish = menu_data.get('menuSwedish', []) - logger.debug(f"Found {len(menu_swedish)} days in menuSwedish") - - if not menu_swedish: - raise Exception("No menuSwedish data found in API response") - - # Map Swedish day names to the menu array indices - day_names = ['mĂ„ndag', 'tisdag', 'onsdag', 'torsdag', 'fredag', 'lördag', 'söndag'] - - for idx, day_menu_obj in enumerate(menu_swedish): - if idx >= len(day_names): - break - - day_name = day_names[idx] - menu_text = day_menu_obj.get('menu', '').strip() - - if not menu_text: - logger.debug(f"No menu for {day_name}") - continue - - # Parse the menu text - menu_items = self._parse_day_menu_from_text(menu_text) - weekly_menu[day_name] = menu_items - logger.debug(f"Parsed {day_name}: {len(menu_items['vegetarian'])} veg, {len(menu_items['meat'])} meat") - - if not weekly_menu: - raise Exception("Could not parse any menu data from API response") - - logger.debug(f"Successfully parsed menu for days: {list(weekly_menu.keys())}") - return weekly_menu - - def _parse_day_menu_from_text(self, menu_text: str) -> Dict[str, List[str]]: - """Parse menu text for a single day.""" - if not menu_text: - return {'vegetarian': [], 'fish': [], 'meat': []} - - # Split by newlines and tabs to get all parts - dishes = [] - lines = menu_text.split('\n') - - for line in lines: - line = line.strip() - if not line: - continue - - # Split by tabs - parts = line.split('\t') - - for part in parts: - part = part.strip() - if part: - dishes.append(part) - - # Use classifier to categorize dishes - categorized = DishClassifier.classify_dishes(dishes) - - # Return all three categories - return categorized - - def get_menu_for_day(self, target_date: Optional[date] = None) -> Dict[str, List[str]]: - """ - Get the menu for a specific day. - - Args: - target_date: The date to get menu for. If None, uses today. - - Returns: - Dictionary with 'vegetarian' and 'meat' menu items for the day. - """ - if target_date is None: - target_date = date.today() - - logger.debug(f"Fetching menu for date: {target_date} ({target_date.strftime('%A, %B %d, %Y')})") - - # Get the week number for the target date - week_number = self._get_week_number(target_date) - logger.debug(f"Week number: {week_number}") - - try: - # Fetch menu from API - api_data = self._fetch_menu_from_api(week_number) - weekly_menu = self._parse_api_response(api_data) - except Exception as e: - raise Exception(f"Failed to fetch menu: {e}") - - # Get the day of week (0=Monday, 6=Sunday) - day_of_week = target_date.weekday() - day_names = ['mĂ„ndag', 'tisdag', 'onsdag', 'torsdag', 'fredag', 'lördag', 'söndag'] - - if day_of_week >= len(day_names): - logger.warning(f"Invalid day of week: {day_of_week}") - return {'vegetarian': [], 'meat': []} - - day_name = day_names[day_of_week] - logger.debug(f"Looking for menu for day: {day_name} (day of week: {day_of_week})") - logger.debug(f"Available days in menu: {list(weekly_menu.keys())}") - - if day_name not in weekly_menu: - raise Exception(f"No menu found for {day_name}. Available days: {', '.join(weekly_menu.keys())}") - - menu = weekly_menu[day_name] - logger.debug(f"Found {len(menu.get('vegetarian', []))} vegetarian items and {len(menu.get('meat', []))} meat items") - - if menu.get('vegetarian'): - logger.debug(f"Vegetarian items: {menu['vegetarian']}") - if menu.get('meat'): - logger.debug(f"Meat items: {menu['meat']}") - - # Check if menu is empty - if not menu.get('vegetarian') and not menu.get('meat'): - logger.warning(f"Found menu entry for {day_name} but it contains no items") - - return menu - - def get_weekly_menu(self) -> Dict[str, Dict[str, List[str]]]: - """ - Get the menu for the whole week. - - Returns: - Dictionary with days as keys and menu items for each day. - """ - # Get the current week number - today = date.today() - week_number = self._get_week_number(today) - logger.debug(f"Fetching weekly menu for week {week_number}") - - try: - # Fetch menu from API - api_data = self._fetch_menu_from_api(week_number) - weekly_menu = self._parse_api_response(api_data) - except Exception as e: - raise Exception(f"Failed to fetch menu: {e}") - - return weekly_menu diff --git a/lunchscraper/mcp_server.py b/lunchscraper/mcp_server.py index 05eb2fb..c67efdc 100644 --- a/lunchscraper/mcp_server.py +++ b/lunchscraper/mcp_server.py @@ -4,7 +4,7 @@ from typing import Optional, List, Dict, Any from mcp.server.fastmcp import FastMCP -from .iss_scraper import ISSMenuScraper +from .nordrest_scraper import NordrestMenuScraper from .kvartersmenyn_scraper import KvartersmenynsMenuScraper # Initialize FastMCP server @@ -14,9 +14,8 @@ RESTAURANTS = { 'gourmedia': { 'name': 'Gourmedia', - 'type': 'iss', - 'url': 'https://www.iss-menyer.se/restaurants/restaurang-gourmedia', - 'id': 'Restaurang Gourmedia' + 'type': 'nordrest', + 'url': 'https://www.nordrest.se/restaurang/gourmedia/' }, 'filmhuset': { 'name': 'Filmhuset', @@ -25,9 +24,9 @@ }, 'karavan': { 'name': 'Karavan', - 'type': 'kvartersmenyn', - 'url': 'https://karavan.kvartersmenyn.se/' - } + 'type': 'nordrest', + 'url': 'https://www.nordrest.se/restaurang/karavan/' + }, } @@ -37,10 +36,9 @@ def _create_scraper(restaurant_key: str): if not config: raise ValueError(f"Unknown restaurant: {restaurant_key}") - if config['type'] == 'iss': - return ISSMenuScraper( + if config['type'] == 'nordrest': + return NordrestMenuScraper( restaurant_url=config['url'], - restaurant_id=config['id'], restaurant_name=config['name'] ) elif config['type'] == 'kvartersmenyn': diff --git a/lunchscraper/nordrest_scraper.py b/lunchscraper/nordrest_scraper.py new file mode 100644 index 0000000..ae6a0b2 --- /dev/null +++ b/lunchscraper/nordrest_scraper.py @@ -0,0 +1,222 @@ +"""Web scraper for Nordrest restaurant menus (via Castit menu widget).""" + +import requests +from bs4 import BeautifulSoup +from datetime import date +from typing import Dict, List, Optional +import logging +from .base_scraper import BaseMenuScraper +from .dish_classifier import DishClassifier + +logger = logging.getLogger(__name__) + + +class NordrestMenuScraper(BaseMenuScraper): + """Scraper for Nordrest restaurant lunch menus rendered via the Castit widget.""" + + def __init__(self, restaurant_url: str, restaurant_name: str = "Gourmedia"): + super().__init__(restaurant_name) + self.restaurant_url = restaurant_url + self.session = requests.Session() + self.session.headers.update({ + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language': 'sv-SE,sv;q=0.9,en;q=0.8', + }) + + def _fetch_page(self) -> BeautifulSoup: + """Fetch the restaurant page and return a BeautifulSoup object.""" + logger.debug(f"Fetching menu from {self.restaurant_url}") + try: + response = self.session.get(self.restaurant_url, timeout=10) + response.raise_for_status() + return BeautifulSoup(response.text, 'html.parser') + except Exception as e: + raise Exception(f"Failed to fetch menu page: {e}") + + def _parse_weekly_menu(self, soup: BeautifulSoup) -> Dict[str, Dict[str, List[str]]]: + """Parse the weekly menu from the Castit widget embedded in the page.""" + weekly_menu = {} + + lunch_div = soup.find('div', class_='castit-lunch') + if not lunch_div: + logger.warning("Could not find castit-lunch widget on page") + return weekly_menu + + # Find the active week panel + week_panel = lunch_div.find('div', class_='is-active', attrs={'data-week-panel': True}) + if not week_panel: + # Fall back to the first panel + week_panel = lunch_div.find('div', attrs={'data-week-panel': True}) + if not week_panel: + logger.warning("Could not find week panel in castit-lunch widget") + return weekly_menu + + # Extract weekly specials (veckans rĂ€tt) — applied to every day + weekly_specials = self._extract_weekly_specials(week_panel) + if weekly_specials: + logger.debug( + f"Found weekly specials: {len(weekly_specials['vegetarian'])} veg, " + f"{len(weekly_specials['fish'])} fish, {len(weekly_specials['meat'])} meat" + ) + + day_sections = week_panel.find_all('section', class_='castit-day') + logger.debug(f"Found {len(day_sections)} day sections") + + for section in day_sections: + # Skip the weekly specials column — handled separately + if 'castit-week-specials-column' in (section.get('class') or []): + continue + + title_span = section.find('h3', class_='castit-day__title') + if not title_span: + continue + i18n_span = title_span.find('span', class_='castit-i18n') + day_name = (i18n_span.get('data-sv') or i18n_span.get_text(strip=True)).lower() if i18n_span else '' + if not day_name: + continue + + dishes = self._extract_dishes(section) + + # Merge weekly specials into each day + if weekly_specials: + for category in ('vegetarian', 'fish', 'meat'): + dishes[category] = dishes[category] + weekly_specials[category] + + if any(dishes.values()): + weekly_menu[day_name] = dishes + logger.debug( + f"Parsed {day_name}: {len(dishes['vegetarian'])} veg, " + f"{len(dishes['fish'])} fish, {len(dishes['meat'])} meat" + ) + + return weekly_menu + + def _extract_weekly_specials(self, week_panel: BeautifulSoup) -> Dict[str, List[str]]: + """Extract dishes from the 'Veckans rĂ€tter' weekly specials column, if present.""" + specials_section = week_panel.find('section', class_='castit-week-specials-column') + if not specials_section: + return {} + return self._extract_dishes(specials_section) + + def _extract_dishes(self, day_section: BeautifulSoup) -> Dict[str, List[str]]: + """Extract and classify dishes from a single day section.""" + dish_strings = [] + allergen_tags: List[List[str]] = [] + + # Collect all castit-dish elements — either wrapped in castit-dish-wrap + # (regular days) or directly inside castit-weekgroup (weekly specials). + dish_elements = [] + for wrap in day_section.find_all('div', class_='castit-dish-wrap'): + dish_div = wrap.find('div', class_='castit-dish') + if dish_div: + dish_elements.append(dish_div) + for group in day_section.find_all('div', class_='castit-weekgroup'): + for dish_div in group.find_all('div', class_='castit-dish', recursive=False): + dish_elements.append(dish_div) + + for dish_div in dish_elements: + title_el = dish_div.find('div', class_='castit-dish__title') + if not title_el: + continue + title_span = title_el.find('span', class_='castit-i18n') + title = (title_span.get('data-sv') or title_span.get_text(strip=True)).strip() if title_span else '' + + desc_el = dish_div.find('div', class_='castit-dish__desc') + if desc_el: + desc_span = desc_el.find('span', class_='castit-i18n') + desc = (desc_span.get('data-sv') or desc_span.get_text(strip=True)).strip() if desc_span else '' + full_dish = f"{title}, {desc}" if title and desc else title or desc + else: + full_dish = title + + allergen_el = dish_div.find('div', class_='castit-dish__allergens') + allergens = [a.strip() for a in (allergen_el.get_text(separator='‱').split('‱') if allergen_el else [])] + + if full_dish: + dish_strings.append(full_dish) + allergen_tags.append(allergens) + + return self._classify_with_allergens(dish_strings, allergen_tags) + + def _classify_with_allergens( + self, + dishes: List[str], + allergen_tags: List[List[str]], + ) -> Dict[str, List[str]]: + """ + Classify dishes using allergen tags and DishClassifier. + + Allergen tags containing 'Vegan' or 'Vegetarisk' directly indicate + vegetarian dishes, overriding keyword-based classification. + """ + result: Dict[str, List[str]] = {'vegetarian': [], 'fish': [], 'meat': []} + + for dish, allergens in zip(dishes, allergen_tags): + allergens_lower = [a.lower() for a in allergens] + if 'vegan' in allergens_lower or 'vegetarisk' in allergens_lower: + result['vegetarian'].append(dish) + else: + # Delegate to keyword-based classifier + classified = DishClassifier.classify_dishes([dish]) + for category in ('vegetarian', 'fish', 'meat'): + if classified.get(category): + result[category].extend(classified[category]) + break + else: + result['meat'].append(dish) + + return result + + def get_menu_for_day(self, target_date: Optional[date] = None) -> Dict[str, List[str]]: + """ + Get the menu for a specific day. + + Args: + target_date: The date to get the menu for. Defaults to today. + + Returns: + Dictionary with 'vegetarian', 'fish', and 'meat' menu items for the day. + """ + if target_date is None: + target_date = date.today() + + logger.debug(f"Fetching menu for {target_date}") + + try: + soup = self._fetch_page() + weekly_menu = self._parse_weekly_menu(soup) + except Exception as e: + raise Exception(f"Failed to fetch menu: {e}") + + day_names = ['mĂ„ndag', 'tisdag', 'onsdag', 'torsdag', 'fredag', 'lördag', 'söndag'] + day_of_week = target_date.weekday() + + if day_of_week >= len(day_names): + logger.warning(f"Invalid day of week: {day_of_week}") + return {'vegetarian': [], 'fish': [], 'meat': []} + + day_name = day_names[day_of_week] + logger.debug(f"Looking for menu for: {day_name}") + + if day_name not in weekly_menu: + raise Exception(f"No menu found for {day_name}. Available days: {', '.join(weekly_menu.keys())}") + + return weekly_menu[day_name] + + def get_weekly_menu(self) -> Dict[str, Dict[str, List[str]]]: + """ + Get the menu for the whole week. + + Returns: + Dictionary with Swedish day names as keys and categorised menu items per day. + """ + logger.debug(f"Fetching weekly menu for {self.restaurant_name}") + + try: + soup = self._fetch_page() + weekly_menu = self._parse_weekly_menu(soup) + except Exception as e: + raise Exception(f"Failed to fetch menu: {e}") + + return weekly_menu From 90b11abe6331f6587cec53b0365fc420c7bdcf80 Mon Sep 17 00:00:00 2001 From: Simon Ingelsson Date: Mon, 15 Jun 2026 13:15:05 +0200 Subject: [PATCH 2/5] fix(scrapers): update Nordrest scraper to use Castit widget HTML structure Both Gourmedia and Karavan now render menus via the Castit widget using 'section.castit-day' elements instead of the old accordion structure ('div.accordion-item.weekday-item'). - Rewrite _parse_weekly_menu to find section.castit-day elements and extract dish titles/descriptions from castit-dish divs - Add missing 'import re' that caused NameError in _parse_dishes - Fix .mcp.json to point to simon-ingelsson-sr/rhlunch instead of hamiltoon/rhlunch (which still used the old ISS API scraper) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .mcp.json | 2 +- lunchscraper/nordrest_scraper.py | 91 +++++++++++--------------------- 2 files changed, 31 insertions(+), 62 deletions(-) diff --git a/.mcp.json b/.mcp.json index c32f1ae..ee8d0dc 100644 --- a/.mcp.json +++ b/.mcp.json @@ -4,7 +4,7 @@ "command": "uvx", "args": [ "--from", - "git+https://github.com/hamiltoon/rhlunch.git", + "git+https://github.com/simon-ingelsson-sr/rhlunch.git", "rhlunch-mcp" ] } diff --git a/lunchscraper/nordrest_scraper.py b/lunchscraper/nordrest_scraper.py index 11b8dec..1641144 100644 --- a/lunchscraper/nordrest_scraper.py +++ b/lunchscraper/nordrest_scraper.py @@ -1,5 +1,6 @@ """Web scraper for Nordrest restaurant menus (via Castit menu widget).""" +import re import requests from bs4 import BeautifulSoup from datetime import date @@ -35,77 +36,45 @@ def _fetch_page(self) -> BeautifulSoup: raise Exception(f"Failed to fetch menu page: {e}") def _parse_weekly_menu(self, soup: BeautifulSoup) -> Dict[str, Dict[str, List[str]]]: - """Parse the weekly menu from the page.""" + """Parse the weekly menu from the Castit widget on the page.""" weekly_menu = {} - # The menu is structured as accordion items with class 'accordion-item weekday-item' - # Each day is in an accordion-header div - day_names = ['MĂ„ndag', 'Tisdag', 'Onsdag', 'Torsdag', 'Fredag', 'Lördag', 'Söndag'] - - # Find all accordion items - accordion_items = soup.find_all('div', class_='accordion-item') - - for item in accordion_items: - # Check if this is a weekday item - if 'weekday-item' not in item.get('class', []): + swedish_weekdays = {'mĂ„ndag', 'tisdag', 'onsdag', 'torsdag', 'fredag', 'lördag', 'söndag'} + + for day_section in soup.find_all('section', class_='castit-day'): + classes = day_section.get('class', []) + # Skip week-special sections (e.g. "Veckans rĂ€tter") + if 'castit-week-specials-column' in classes: continue - - # Find the day header - header = item.find('div', class_='accordion-header') - if not header: + + # Get the Swedish day name from the title span's data-sv attribute + title_span = day_section.find('span', class_='castit-i18n', attrs={'data-sv': True}) + if not title_span: continue - - day_text = header.get_text(strip=True) - - # Check if this is a valid day name - day_found = None - for day in day_names: - if day.lower() == day_text.lower(): - day_found = day - break - - if not day_found: + day_sv = title_span.get('data-sv', '').strip().lower() + if day_sv not in swedish_weekdays: continue - - # Find the accordion body/content - body = item.find('div', class_='accordion-body') or item.find('div', class_='accordion-content') - if not body: - # If no accordion-body, get all text after the header - body = item - - # Extract dishes from the body - dishes_text = body.get_text(separator='\n') + + # Extract dishes: title + optional description combined into one string dishes = [] - - for line in dishes_text.split('\n'): - line = line.strip() - if not line: - continue - - # Skip the day name if it appears again - if line.lower() == day_found.lower(): + for dish_div in day_section.find_all('div', class_='castit-dish'): + title_el = dish_div.find('div', class_='castit-dish__title') + desc_el = dish_div.find('div', class_='castit-dish__desc') + if not title_el: continue - - # Skip lines that are just numbers (prices) - if re.match(r'^\d+\.?\d*$', line): - continue - - # Skip very short lines - if len(line) < 3: - continue - - dishes.append(line) - - # Parse the dishes + dish_text = title_el.get_text(strip=True) + if desc_el: + desc_text = desc_el.get_text(strip=True) + if desc_text: + dish_text = f"{dish_text}, {desc_text}" + if dish_text and len(dish_text) >= 5: + dishes.append(dish_text) + if dishes: menu_items = self._parse_dishes(dishes) - # Check for menu items (either in 'menu' key or in categorized keys) if menu_items.get('menu') or menu_items.get('vegetarian') or menu_items.get('fish') or menu_items.get('meat'): - weekly_menu[day_found.lower()] = menu_items - if menu_items.get('menu'): - logger.debug(f"Parsed {day_found}: {len(menu_items['menu'])} items") - else: - logger.debug(f"Parsed {day_found}: {len(menu_items.get('vegetarian', []))} veg, {len(menu_items.get('fish', []))} fish, {len(menu_items.get('meat', []))} meat") + weekly_menu[day_sv] = menu_items + logger.debug(f"Parsed {day_sv}: {len(menu_items.get('menu', []))} items") return weekly_menu From 169f6bbceadc2e9db425fe3de6a5d7e7807efe57 Mon Sep 17 00:00:00 2001 From: Simon Ingelsson Date: Mon, 15 Jun 2026 13:16:55 +0200 Subject: [PATCH 3/5] =?UTF-8?q?feat(scrapers):=20include=20weekly=20specia?= =?UTF-8?q?ls=20(Veckans=20r=C3=A4tt)=20in=20daily=20menus?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract dishes from the castit-week-specials-column section and append them to every day's menu, so recurring weekly dishes like 'Karavans salladsbar' are included alongside the daily offerings. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- lunchscraper/nordrest_scraper.py | 42 ++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/lunchscraper/nordrest_scraper.py b/lunchscraper/nordrest_scraper.py index 1641144..a8bcd49 100644 --- a/lunchscraper/nordrest_scraper.py +++ b/lunchscraper/nordrest_scraper.py @@ -35,15 +35,38 @@ def _fetch_page(self) -> BeautifulSoup: except Exception as e: raise Exception(f"Failed to fetch menu page: {e}") + def _extract_dishes_from_section(self, section) -> List[str]: + """Extract dish strings from a castit-day or week-specials section.""" + dishes = [] + for dish_div in section.find_all('div', class_='castit-dish'): + title_el = dish_div.find('div', class_='castit-dish__title') + desc_el = dish_div.find('div', class_='castit-dish__desc') + if not title_el: + continue + dish_text = title_el.get_text(strip=True) + if desc_el: + desc_text = desc_el.get_text(strip=True) + if desc_text: + dish_text = f"{dish_text}, {desc_text}" + if dish_text and len(dish_text) >= 5: + dishes.append(dish_text) + return dishes + def _parse_weekly_menu(self, soup: BeautifulSoup) -> Dict[str, Dict[str, List[str]]]: """Parse the weekly menu from the Castit widget on the page.""" weekly_menu = {} - swedish_weekdays = {'mĂ„ndag', 'tisdag', 'onsdag', 'torsdag', 'fredag', 'lördag', 'söndag'} + # Collect week-special dishes (e.g. "Veckans rĂ€tt") to append to every day + weekly_special_dishes: List[str] = [] + for day_section in soup.find_all('section', class_='castit-day'): + if 'castit-week-specials-column' in day_section.get('class', []): + weekly_special_dishes = self._extract_dishes_from_section(day_section) + logger.debug(f"Found {len(weekly_special_dishes)} weekly special dish(es)") + break + for day_section in soup.find_all('section', class_='castit-day'): classes = day_section.get('class', []) - # Skip week-special sections (e.g. "Veckans rĂ€tter") if 'castit-week-specials-column' in classes: continue @@ -55,20 +78,7 @@ def _parse_weekly_menu(self, soup: BeautifulSoup) -> Dict[str, Dict[str, List[st if day_sv not in swedish_weekdays: continue - # Extract dishes: title + optional description combined into one string - dishes = [] - for dish_div in day_section.find_all('div', class_='castit-dish'): - title_el = dish_div.find('div', class_='castit-dish__title') - desc_el = dish_div.find('div', class_='castit-dish__desc') - if not title_el: - continue - dish_text = title_el.get_text(strip=True) - if desc_el: - desc_text = desc_el.get_text(strip=True) - if desc_text: - dish_text = f"{dish_text}, {desc_text}" - if dish_text and len(dish_text) >= 5: - dishes.append(dish_text) + dishes = self._extract_dishes_from_section(day_section) + weekly_special_dishes if dishes: menu_items = self._parse_dishes(dishes) From 8b32e8deced419ac9e7562e5928543e93f535624 Mon Sep 17 00:00:00 2001 From: Simon Ingelsson Date: Mon, 15 Jun 2026 13:18:00 +0200 Subject: [PATCH 4/5] style(cli): add bullet points to daily menu item output Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- lunchscraper/cli.py | 237 ++++++++++++++++++++++++-------------------- 1 file changed, 130 insertions(+), 107 deletions(-) diff --git a/lunchscraper/cli.py b/lunchscraper/cli.py index e144b71..3402e8a 100644 --- a/lunchscraper/cli.py +++ b/lunchscraper/cli.py @@ -1,59 +1,57 @@ """Command line interface for the lunch menu scraper.""" import click -from datetime import date, datetime +from datetime import date import logging from lunchscraper.wkb_scraper import WKBMenuScraper from .nordrest_scraper import NordrestMenuScraper from .kvartersmenyn_scraper import KvartersmenynsMenuScraper -from .nordrest_scraper import NordrestMenuScraper -logging.basicConfig( - level=logging.INFO, - format='%(levelname)s: %(message)s' -) +logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") # Restaurant configurations RESTAURANTS = { - 'gourmedia': { - 'name': 'Gourmedia', - 'type': 'nordrest', - 'url': 'https://www.nordrest.se/restaurang/gourmedia/' + "gourmedia": { + "name": "Gourmedia", + "type": "nordrest", + "url": "https://www.nordrest.se/restaurang/gourmedia/", }, - 'filmhuset': { - 'name': 'Filmhuset', - 'type': 'kvartersmenyn', - 'url': 'https://filmhuset.kvartersmenyn.se/' + "filmhuset": { + "name": "Filmhuset", + "type": "kvartersmenyn", + "url": "https://filmhuset.kvartersmenyn.se/", }, - 'karavan': { - 'name': 'Karavan', - 'type': 'nordrest', - 'url': 'https://www.nordrest.se/restaurang/karavan/' + "karavan": { + "name": "Karavan", + "type": "nordrest", + "url": "https://www.nordrest.se/restaurang/karavan/", }, - 'wkb': { - 'name': "WKB", - 'type': 'wkbmeny', - 'url': 'https://wkb.se/?page_id=79' - } + "wkb": {"name": "WKB", "type": "wkbmeny", "url": "https://wkb.se/?page_id=79"}, } @click.command() -@click.option('--restaurant', '-r', 'restaurant_key', - default=None, - type=click.Choice(list(RESTAURANTS.keys()), case_sensitive=False), - help='Specific restaurant to show. By default shows all restaurants.') -@click.option('--vegetarian-only', '-v', is_flag=True, - help='Show only vegetarian options.') -@click.option('--fish-only', '-f', is_flag=True, - help='Show only fish options.') -@click.option('--meat-only', '-m', is_flag=True, - help='Show only meat options.') -@click.option('--week', '-w', is_flag=True, - help='Show the whole week menu.') -@click.option('--debug', '-d', is_flag=True, - help='Enable debug logging to show which date is being fetched.') +@click.option( + "--restaurant", + "-r", + "restaurant_key", + default=None, + type=click.Choice(list(RESTAURANTS.keys()), case_sensitive=False), + help="Specific restaurant to show. By default shows all restaurants.", +) +@click.option( + "--vegetarian-only", "-v", is_flag=True, help="Show only vegetarian options." +) +@click.option("--fish-only", "-f", is_flag=True, help="Show only fish options.") +@click.option("--meat-only", "-m", is_flag=True, help="Show only meat options.") +@click.option("--week", "-w", is_flag=True, help="Show the whole week menu.") +@click.option( + "--debug", + "-d", + is_flag=True, + help="Enable debug logging to show which date is being fetched.", +) def main(restaurant_key, vegetarian_only, fish_only, meat_only, week, debug): """ Get lunch menu from multiple restaurants. @@ -84,14 +82,14 @@ def main(restaurant_key, vegetarian_only, fish_only, meat_only, week, debug): for key, config in restaurants_to_fetch.items(): try: # Create appropriate scraper based on type - if config['type'] == 'nordrest': - scraper = NordrestMenuScraper(config['url'], config['name']) - elif config['type'] == 'kvartersmenyn': - scraper = KvartersmenynsMenuScraper(config['url'], config['name']) - elif config['type'] == 'wkbmeny': - scraper = WKBMenuScraper(config['url'], config['name']) - elif config['type'] == 'nordrest': - scraper = NordrestMenuScraper(config['url'], config['name']) + if config["type"] == "nordrest": + scraper = NordrestMenuScraper(config["url"], config["name"]) + elif config["type"] == "kvartersmenyn": + scraper = KvartersmenynsMenuScraper(config["url"], config["name"]) + elif config["type"] == "wkbmeny": + scraper = WKBMenuScraper(config["url"], config["name"]) + elif config["type"] == "nordrest": + scraper = NordrestMenuScraper(config["url"], config["name"]) else: click.echo(f"⚠ Unknown scraper type for {config['name']}", err=True) continue @@ -102,13 +100,14 @@ def main(restaurant_key, vegetarian_only, fish_only, meat_only, week, debug): else: menu = scraper.get_menu_for_day() - all_menus[config['name']] = menu + all_menus[config["name"]] = menu except Exception as e: click.echo(f"\n❌ Error fetching menu from {config['name']}:", err=True) click.echo(f" {e}", err=True) if debug: import traceback + traceback.print_exc() # Display results @@ -127,78 +126,87 @@ def display_all_daily_menus(all_menus, vegetarian_only, fish_only, meat_only): """Display daily menus from multiple restaurants.""" today = date.today() day_names = { - 0: 'Monday', - 1: 'Tuesday', - 2: 'Wednesday', - 3: 'Thursday', - 4: 'Friday', - 5: 'Saturday', - 6: 'Sunday' + 0: "Monday", + 1: "Tuesday", + 2: "Wednesday", + 3: "Thursday", + 4: "Friday", + 5: "Saturday", + 6: "Sunday", } day_name = day_names[today.weekday()] # Header click.echo() - click.echo(click.style(" đŸœïž LUNCH MENU", fg='bright_white', bold=True) + - click.style(f" ‱ {day_name}, {today.strftime('%B %d, %Y')}", fg='white', dim=True)) + click.echo( + click.style(" đŸœïž LUNCH MENU", fg="bright_white", bold=True) + + click.style( + f" ‱ {day_name}, {today.strftime('%B %d, %Y')}", fg="white", dim=True + ) + ) click.echo() for i, (restaurant_name, menu) in enumerate(all_menus.items()): # Restaurant header with emoji and bold name - click.echo(click.style(f" 📍 {restaurant_name.upper()}", fg='bright_cyan', bold=True)) - click.echo(click.style(" " + "─" * 74, fg='cyan', dim=True)) + click.echo( + click.style(f" 📍 {restaurant_name.upper()}", fg="bright_cyan", bold=True) + ) + click.echo(click.style(" " + "─" * 74, fg="cyan", dim=True)) has_items = False # Check for general "menu" key first (for menus without categorization) - if menu.get('menu'): + if menu.get("menu"): has_items = True click.echo() - click.echo(click.style("đŸœïž Menu".center(80), fg='bright_white', bold=True)) + click.echo(click.style("đŸœïž Menu".center(80), fg="bright_white", bold=True)) click.echo() - for item in menu['menu']: - click.echo(f" {item}") + for item in menu["menu"]: + click.echo(f" ‱ {item}") else: # Standard categorized menu # Show vegetarian options - if not meat_only and not fish_only and menu.get('vegetarian'): + if not meat_only and not fish_only and menu.get("vegetarian"): has_items = True click.echo() - click.echo(click.style("đŸ„Ź Vegetarian".center(80), fg='green', bold=True)) + click.echo( + click.style("đŸ„Ź Vegetarian".center(80), fg="green", bold=True) + ) click.echo() - for item in menu['vegetarian']: - click.echo(f" {item}") + for item in menu["vegetarian"]: + click.echo(f" ‱ {item}") # Show fish options - if not vegetarian_only and not meat_only and menu.get('fish'): + if not vegetarian_only and not meat_only and menu.get("fish"): has_items = True click.echo() - click.echo(click.style("🐟 Fish".center(80), fg='blue', bold=True)) + click.echo(click.style("🐟 Fish".center(80), fg="blue", bold=True)) click.echo() - for item in menu['fish']: - click.echo(f" {item}") + for item in menu["fish"]: + click.echo(f" ‱ {item}") # Show meat options - if not vegetarian_only and not fish_only and menu.get('meat'): + if not vegetarian_only and not fish_only and menu.get("meat"): has_items = True click.echo() - click.echo(click.style("đŸ„© Meat".center(80), fg='red', bold=True)) + click.echo(click.style("đŸ„© Meat".center(80), fg="red", bold=True)) click.echo() - for item in menu['meat']: - click.echo(f" {item}") - + for item in menu["meat"]: + click.echo(f" ‱ {item}") - if not vegetarian_only and not fish_only and menu.get('dessert'): + if not vegetarian_only and not fish_only and menu.get("dessert"): has_items = True click.echo() - click.echo(click.style("🍰 Dessert".center(80), fg='red', bold=True)) + click.echo(click.style("🍰 Dessert".center(80), fg="red", bold=True)) click.echo() - for item in menu['dessert']: + for item in menu["dessert"]: click.echo(f" ‱ {item}") # Handle case where no menu items found if not has_items: - click.echo(click.style(" ❌ No menu items found for today", fg='yellow')) + click.echo( + click.style(" ❌ No menu items found for today", fg="yellow") + ) # Add spacing between restaurants (except for the last one) if i < len(all_menus) - 1: @@ -211,85 +219,100 @@ def display_all_weekly_menus(all_menus, vegetarian_only, fish_only, meat_only): """Display weekly menus from multiple restaurants.""" # Header click.echo() - click.echo(click.style(" đŸœïž WEEKLY LUNCH MENU", fg='bright_white', bold=True)) + click.echo(click.style(" đŸœïž WEEKLY LUNCH MENU", fg="bright_white", bold=True)) click.echo() day_names = { - 'mĂ„ndag': 'Monday', - 'tisdag': 'Tuesday', - 'onsdag': 'Wednesday', - 'torsdag': 'Thursday', - 'fredag': 'Friday', - 'lördag': 'Saturday', - 'söndag': 'Sunday' + "mĂ„ndag": "Monday", + "tisdag": "Tuesday", + "onsdag": "Wednesday", + "torsdag": "Thursday", + "fredag": "Friday", + "lördag": "Saturday", + "söndag": "Sunday", } for rest_idx, (restaurant_name, weekly_menu) in enumerate(all_menus.items()): # Restaurant header - click.echo(click.style(f"📍 {restaurant_name.upper()}", fg='bright_cyan', bold=True)) - click.echo(click.style(" " + "─" * 74, fg='cyan', dim=True)) + click.echo( + click.style(f"📍 {restaurant_name.upper()}", fg="bright_cyan", bold=True) + ) + click.echo(click.style(" " + "─" * 74, fg="cyan", dim=True)) for day_key, day_name in day_names.items(): if day_key in weekly_menu: menu = weekly_menu[day_key] # Skip if no menu items and it's a weekend - if not menu.get('menu') and not menu.get('vegetarian') and not menu.get('fish') and not menu.get('meat'): - if day_key in ['lördag', 'söndag']: + if ( + not menu.get("menu") + and not menu.get("vegetarian") + and not menu.get("fish") + and not menu.get("meat") + ): + if day_key in ["lördag", "söndag"]: continue # Skip empty weekends # Day header click.echo() - click.echo(click.style(f" 📅 {day_name}", fg='bright_yellow', bold=True)) + click.echo( + click.style(f" 📅 {day_name}", fg="bright_yellow", bold=True) + ) has_items = False # Show vegetarian options - if not meat_only and not fish_only and menu.get('vegetarian'): + if not meat_only and not fish_only and menu.get("vegetarian"): has_items = True click.echo() - click.echo(click.style("đŸ„Ź Vegetarian".center(80), fg='green', bold=True)) + click.echo( + click.style("đŸ„Ź Vegetarian".center(80), fg="green", bold=True) + ) click.echo() - for item in menu['vegetarian']: + for item in menu["vegetarian"]: click.echo(f" ‱ {item}") # Show fish options - if not vegetarian_only and not meat_only and menu.get('fish'): + if not vegetarian_only and not meat_only and menu.get("fish"): has_items = True click.echo() - click.echo(click.style("🐟 Fish".center(80), fg='blue', bold=True)) + click.echo(click.style("🐟 Fish".center(80), fg="blue", bold=True)) click.echo() - for item in menu['fish']: + for item in menu["fish"]: click.echo(f" ‱ {item}") # Show meat options - if not vegetarian_only and not fish_only and menu.get('meat'): + if not vegetarian_only and not fish_only and menu.get("meat"): has_items = True click.echo() - click.echo(click.style("đŸœïž Menu".center(80), fg='bright_white', bold=True)) + click.echo( + click.style("đŸœïž Menu".center(80), fg="bright_white", bold=True) + ) click.echo() - for item in menu['meat']: + for item in menu["meat"]: click.echo(f" ‱ {item}") - if not vegetarian_only and not fish_only and menu.get('dessert'): + if not vegetarian_only and not fish_only and menu.get("dessert"): has_items = True click.echo() - click.echo(click.style("🍰 Dessert".center(80), fg='red', bold=True)) + click.echo( + click.style("🍰 Dessert".center(80), fg="red", bold=True) + ) click.echo() - for item in menu['dessert']: + for item in menu["dessert"]: click.echo(f" ‱ {item}") # Show message if no items found if not has_items: - click.echo(click.style(" ❌ No menu available", fg='yellow')) - + click.echo( + click.style(" ❌ No menu available", fg="yellow") + ) # Add spacing between restaurants (except for the last one) if rest_idx < len(all_menus) - 1: click.echo() - click.echo() -if __name__ == '__main__': +if __name__ == "__main__": main() From 4a4e78972e0f69a14d118aba590cf132b780d06b Mon Sep 17 00:00:00 2001 From: Simon Ingelsson Date: Mon, 15 Jun 2026 14:14:04 +0200 Subject: [PATCH 5/5] test(scrapers): add NordrestMenuScraper tests with real HTML fixtures 26 tests covering: - _fetch_page (success + HTTP failure) - _extract_dishes_from_section (title+desc, no desc, too-short skip) - _parse_dishes (menu key, dietary codes, price suffix, empty, short) - _parse_weekly_menu (all weekdays, weekly specials appended, no castit, unrecognised day names) - get_menu_for_day (Monday, Tuesday, day not found, fetch failure) - get_weekly_menu (all days, fetch failure) - Integration tests against live-fetched HTML fixtures for both Karavan and Gourmedia (2025-11-10) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../2025_11_10/nordrest_gourmedia.html | 1989 +++++++++++++++ .../fixtures/2025_11_10/nordrest_karavan.html | 2176 +++++++++++++++++ tests/test_dish_classifier.py | 187 +- tests/test_iss_scraper.py | 432 ---- tests/test_nordrest_scraper.py | 317 +++ 5 files changed, 4581 insertions(+), 520 deletions(-) create mode 100644 tests/fixtures/2025_11_10/nordrest_gourmedia.html create mode 100644 tests/fixtures/2025_11_10/nordrest_karavan.html delete mode 100644 tests/test_iss_scraper.py create mode 100644 tests/test_nordrest_scraper.py diff --git a/tests/fixtures/2025_11_10/nordrest_gourmedia.html b/tests/fixtures/2025_11_10/nordrest_gourmedia.html new file mode 100644 index 0000000..8eee8b6 --- /dev/null +++ b/tests/fixtures/2025_11_10/nordrest_gourmedia.html @@ -0,0 +1,1989 @@ + + + + + + + + + +Gourmedia - Nordrest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+
+
+ +
+
+
+
+
+
+
+ + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ +
+
+

Gourmedia

+
+
+
+

Gourmedia driver restaurang- och caféverksamhet pÄ FörrÄdsbacken. Utbudet bestÄr av café, varm mat, lunchservering samt konferensbestÀllningar.

+
+
+ +
+ +
+
+
+
+
+

Menyer

+
+
+
+

HÀr finns det minst ett vegetariskt alternativ samt minst ett kött eller fisk alternativ. VÀrmande soppa, stor salladsbuffé, hembakat bröd, kaffe pÄ maten.

+
+
+
+
+
+

+ Lunch v. 25 +

+
+ +
+ + + + + + + +
+
+
+ +
+
+

+ + MÄndag + + Idag + +

+ +
+
+ +
+
+
+ + Tortellini med ricotta och spenat i tryffeldoftande champinjonsÄs + +
+ +
+ + grana padano och ruccola +
+ +
+ Gluten ‱ Laktos ‱ Ägg
+
+ +
+
+
+
+
+ +
+
+
+ + Indisk kycklinggryta med chutney och koriander + +
+ +
+ + kikÀrtor, blomkÄl, spenat, tomat och kokosgrÀdde +
+ +
+ Selleri
+
+ +
+
+
+
+
+
+
+

+ + Tisdag +

+ +
+
+ +
+
+
+ + Lins & bönfylld paprika med spenat + +
+ +
+ + chilidressing och grönsaksris +
+ +
+ Senap ‱ Vegan
+
+ +
+
+
+
+
+ +
+
+
+ + Citronpocherad sejrygg med sparris och hollandaise + +
+ +
+ + rostad potatis och dill +
+ +
+ Ägg
+
+ +
+
+
+
+
+ +
+
+
+ + Stuvade makaroner med stekt falukorv + +
+ +
+ + rostad broccoli, ketchup, senap och rostad lök +
+ +
+ Gluten ‱ Laktos ‱ Senap ‱ FlĂ€sk ‱ Nötkött
+
+ +
+
+
+
+
+
+
+

+ + Onsdag +

+ +
+
+ +
+
+
+ + KikÀrtsbiff med rostade grönsaker och potatis + +
+ +
+ + vitlöksdressing och koriander +
+ +
+ Mjölkprotein
+
+ +
+
+
+
+
+ +
+
+
+ + Kryddig lasagne med kyckling och salsicciafÀrs + +
+ +
+ + tomatsallad och ruccola +
+ +
+ Gluten ‱ Laktos ‱ Selleri ‱ FlĂ€sk ‱ FĂ„gel
+
+ +
+
+
+
+
+
+
+

+ + Torsdag +

+ +
+
+ +
+
+
+ + Vegansk Pasta Bolognese med riven "mozzarella" + +
+ +
+ + picklad lök och ruccola +
+ +
+ Gluten ‱ Soja ‱ Selleri ‱ Vegan
+
+ +
+
+
+
+
+ +
+
+
+ + FlÀskschnitzel med chilibearnaise + +
+ +
+ + rostad potatis och haricots verts +
+ +
+ Gluten ‱ Ägg
+
+ +
+
+
+
+
+
+
+

+ + Fredag +

+ +
+
+ +
+
+
+ + STÄNGT - GLAD MIDSOMMAR + +
+ + +
+ +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

BestÀll catering

+
+
+
+

Order 365 Ă€r er smidiga webshop för catering. HĂ€r bestĂ€ller ni enkelt allt frĂ„n fika, lunch och catering för olika tillfĂ€llen. NĂ€r ni önskar bestĂ€lla mat och dryck vĂ€ljer ni ert bolag via knapparna nedan och lĂ€gger er bestĂ€llning direkt – snabbt och enkelt.

+
+
+
+ +
+
+ +
+
+ +
+
+ +
+
+
+
+
+
+
+
+
+

“Vi sprider matglĂ€dje genom hela livet, frĂ„n allra första stund till Ă„lderns höst”

+
+
+
+

Michael BĂŒhring, VD

+
+
+
+
+
+ Nordrest - Michael BĂŒhring
+
+
+
+
+
+
+
+
+

Öppettider och kontakt för Gourmedia

+
+
+
+
Kontakt:
+
+
+
+

Maila oss: [email protected]

+

Ring oss: 072-227 9215

+
+
+
+
+
Öppettider:
+
+
+
+

SR Hangaren: Cafe och lunch
+Öppettider 07:00-16:00

+

SVT entré: Cafe och lunch
+Öppettider 07:00-16:00

+

Gunnars Café SVT: Lunch
+Öppettider 11:00-13:00

+
+
+ +
+
+
+
+
+
+
+
+
+ + + + + + + +
+
+ + +
+
+
+
+
+
+
+

Skicka feedback

+
+
+
+

HÀr kan ni skicka feedback kring er restaurangupplevelse, ris som ros! +VÄrt mÄl varje dag Àr att erbjuda dig en schysst mÄltidsupplevelse!

+
+
+
+ + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+

AnmÀl dig till vÄrt nyhetsbrev

+
+
+
+ + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+

Kontakta oss

+
+
+
+

Delaktighet, öppenhet och transparens Àr frÄgor som ligger oss varmt om hjÀrtat. SÀrskilt nÀr det gÀller hÄllbarhet. Hör gÀrna av er med synpunkter och Äsikter kring hur vi kan bli Ànnu bÀttre i vÄrt hÄllbarhetsarbete.

+
+ +
+
+
+ + + + + + +
+
+ + +
+
+ + +
+
+ + + +
+
+ +
+
+
+
+ +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

BestÀll cateringmeny

+
+
+
+

BestÀll catering till din verksamhet genom att fylla i uppgifterna i formulÀret nedan.

+
+
+
+
+ + + + + + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + + +
+
+ + +
+
+ + +
+
+ +
+
+ +
+
+ + +
+
+
+ + + +
+
+
+
+ + + +
+
+
+ +
+
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/fixtures/2025_11_10/nordrest_karavan.html b/tests/fixtures/2025_11_10/nordrest_karavan.html new file mode 100644 index 0000000..365d89c --- /dev/null +++ b/tests/fixtures/2025_11_10/nordrest_karavan.html @@ -0,0 +1,2176 @@ + + + + + + + + + +Restaurang Karavan - Nordrest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+
+
+ +
+
+
+
+
+
+
+ + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ +
+
+

Restaurang Karavan

+
+
+ +
+ +
+
+
+
+
+

Menyer

+
+
+
+

Mat med kÀrlek och omtanke

+
+
+
+
+
+
+

+ Lunch v. 25 +

+
+ +
+ + + + + + + +
+
+
+
+
+ PRISER
+
+ + + Dagens Lunch : + 137 SEK + + + Take Away : + 124 SEK + + + Stammiskort : + 127 SEK + + + Student : + 130 SEK +
+
+ +
+
+
+

+ Veckans rÀtter

+ +
+ +
+

+ + Veckans rÀtt +

+ +
+
+
+ + Karavans salladsbar + +
+ +
+ + 25,90 kr/hg +
+ +
+ +
+ +
+
+
+
+
+ +
+
+

+ + MÄndag + + Idag + +

+ +
+
+ +
+
+
+ + Tortellini med ricotta och spenat i tryffeldoftande champinjonsÄs + +
+ +
+ + grana padano och ruccola +
+ +
+ +
+ +
+
+
+
+ +
+
+
+ + Ugnsbakad lax med romsÄs och fÀnkÄl-/Àppelcrudité + +
+ +
+ + kokt potatis och citron +
+ +
+ +
+ +
+
+
+
+ +
+
+
+ + Indisk kycklinggryta med chutney och koriander + +
+ +
+ + kikÀrtor, blomkÄl, spenat, tomat och kokosgrÀdde +
+ +
+ +
+ +
+
+
+
+
+
+

+ + Tisdag +

+ +
+
+ +
+
+
+ + Lins och bönfylld paprika med spenat (Vegansk) + +
+ +
+ + chilidressing och grönsaksris +
+ +
+ +
+ +
+
+
+
+ +
+
+
+ + Pankopanerad fiskfilé med tartarsÄs + +
+ +
+ + rostad potatis och citron +
+ +
+ +
+ +
+
+
+
+ +
+
+
+ + Raggmunk med stekt flÀsk + +
+ +
+ + lingonsylt och persilja +
+ +
+ +
+ +
+
+
+
+ +
+
+
+ + Stekt flÀsk med löksÄs + +
+ +
+ + serveras med kokt potatis +
+ +
+ +
+ +
+
+
+
+
+
+

+ + Onsdag +

+ +
+
+ +
+
+
+ + KikÀrtsbiff med rostade grönsaker och potatis + +
+ +
+ + vitlöksdressing och koriander +
+ +
+ +
+ +
+
+
+
+ +
+
+
+ + Laxpudding med Àrtsallad och brynt smör emulsion + +
+ +
+ + Àrtsallad och citron +
+ +
+ +
+ +
+
+
+
+ +
+
+
+ + Kryddig lasagne med kyckling och salsicciafÀrs + +
+ +
+ + tomatsallad och ruccola +
+ +
+ +
+ +
+
+
+
+
+
+

+ + Torsdag +

+ +
+
+ +
+
+
+ + Vegansk Pasta Bolognese med "mozzarella" + +
+ +
+ + picklad lök och ruccola +
+ +
+ +
+ +
+
+
+
+ +
+
+
+ + Ärtsoppa med flĂ€sk + +
+ +
+ + samt pannkaksbuffé med massa godsaker! +
+ +
+ +
+ +
+
+
+
+ +
+
+
+ + FlÀskschnitzel med chilibearnaise + +
+ +
+ + rostad potatis och haricots verts +
+ +
+ +
+ +
+
+
+
+
+
+

+ + Fredag +

+ +
+
+ +
+
+
+ + STÄNGT - GLAD MIDSOMMAR + +
+ + +
+ +
+ +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

Dagens lunch

+
+
+
+
+
+

Vardagar 10:30 – 13:30

+
+
+
+
+

Mellan 13.30-13.45

+
    +
  • Vi vĂ€rnar om miljön och erbjuder 80 kr för en matlĂ„da av överbliven dagens lunch
  • +
  • Egen medhavd matlĂ„da: 75 kr
  • +
+
+
+

Vid bestÀllning eller förfrÄgan, ring: 073-068 40 93

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

Catering

+
+
+
+

Är du i full fart med att ordna företagets kick off, födelsedagen, eller vill du kanske bara fira nĂ„got med goda vĂ€nner?

+

Vi öppnar gÀrna upp restaurangen för slutna sÀllskap utöver vÄra ordinarie öppettider och ser till att ni fÄr en fantastisk kvÀll.

+

Vi har samlat förslag pÄ menyer som ni kan utgÄ ifrÄn men sjÀlvklart gÄr det bra att komma med egna önskemÄl, sÄ sÀtter vi tillsammans en meny som passar er tillstÀllning.

+

Vi fixar sÄklart catering om ni hellre hÄller festen hos er!

+

Se vÄr cateringmeny nedan!

+
+
+
+ +
+
+
+
+
+
+
+
+

“Vi sprider matglĂ€dje genom hela livet, frĂ„n allra första stund till Ă„lderns höst”

+
+
+
+

Michael BĂŒhring, VD

+
+
+
+
+
+ Nordrest - Michael BĂŒhring
+
+
+
+
+
+
+
+
+

Öppettider och kontakt för Restaurang Karavan

+
+
+
+
Kontakt:
+
+
+
+

Restaurang Karavan

+

ValhallavÀgen 199, 115 53 Stockholm

+

Ring: 073-068 40 93

+
+
+
+
+
Öppettider:
+
+
+
+

MĂ„ndag – fredag 10:30-13.30

+
+
+ +
+
+
+
+
+
+
+
+
+ + + + + + + +
+
+ + +
+
+
+
+
+
+
+

Skicka feedback

+
+
+
+

HÀr kan ni skicka feedback kring er restaurangupplevelse, ris som ros! +VÄrt mÄl varje dag Àr att erbjuda dig en schysst mÄltidsupplevelse!

+
+
+
+ + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+

AnmÀl dig till vÄrt nyhetsbrev

+
+
+
+ + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+

Kontakta oss

+
+
+
+

Delaktighet, öppenhet och transparens Àr frÄgor som ligger oss varmt om hjÀrtat. SÀrskilt nÀr det gÀller hÄllbarhet. Hör gÀrna av er med synpunkter och Äsikter kring hur vi kan bli Ànnu bÀttre i vÄrt hÄllbarhetsarbete.

+
+ +
+
+
+ + + + + + +
+
+ + +
+
+ + +
+
+ + + +
+
+ +
+
+
+
+ +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

BestÀll cateringmeny

+
+
+
+

BestÀll catering till din verksamhet genom att fylla i uppgifterna i formulÀret nedan.

+
+
+
+
+ + + + + + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + + +
+
+ + +
+
+ + +
+
+ +
+
+ +
+
+ + +
+
+
+ + + +
+
+
+
+ + + +
+
+
+ +
+
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/test_dish_classifier.py b/tests/test_dish_classifier.py index 7b6aec6..6f23474 100644 --- a/tests/test_dish_classifier.py +++ b/tests/test_dish_classifier.py @@ -22,7 +22,9 @@ def test_classify_vegetarian_dishes(self): "Morots och zucchinibiff med bulgursallad och myntayoghurt", ] for dish in vegetarian_dishes: - assert DishClassifier.classify_dish(dish) == 'vegetarian', f"Failed for: {dish}" + assert DishClassifier.classify_dish(dish) == "vegetarian", ( + f"Failed for: {dish}" + ) def test_classify_meat_dishes(self): """Test classification of meat dishes.""" @@ -39,7 +41,7 @@ def test_classify_meat_dishes(self): "Kalvschnitzel med citronsĂ„s", ] for dish in meat_dishes: - assert DishClassifier.classify_dish(dish) == 'meat', f"Failed for: {dish}" + assert DishClassifier.classify_dish(dish) == "meat", f"Failed for: {dish}" def test_classify_fish_dishes(self): """Test classification of fish dishes.""" @@ -53,56 +55,71 @@ def test_classify_fish_dishes(self): "FiskgratĂ€ng med ost", ] for dish in fish_dishes: - assert DishClassifier.classify_dish(dish) == 'fish', f"Failed for: {dish}" + assert DishClassifier.classify_dish(dish) == "fish", f"Failed for: {dish}" def test_classify_ambiguous_fish_dishes(self): """Test that dishes with 'med' keyword but no clear fish keyword default to meat.""" # "RĂ€ksmörgĂ„s" contains "med" which triggers the default-to-meat logic # even though "rĂ€k" is similar to "rĂ€kor" - assert DishClassifier.classify_dish("RĂ€ksmörgĂ„s med majonnĂ€s") == 'meat' + assert DishClassifier.classify_dish("RĂ€ksmörgĂ„s med majonnĂ€s") == "meat" def test_classify_special_swedish_meat_dishes(self): """Test classification of special Swedish dishes.""" # Ärtsoppa and pannkaka should be classified as meat - assert DishClassifier.classify_dish("Ärtsoppa med flĂ€sk") == 'meat' - assert DishClassifier.classify_dish("Pannkaka med sylt") == 'meat' + assert DishClassifier.classify_dish("Ärtsoppa med flĂ€sk") == "meat" + assert DishClassifier.classify_dish("Pannkaka med sylt") == "meat" def test_flĂ€sk_vs_fisk_priority(self): """Test that flĂ€sk (meat) has priority over fisk (fish) substring match.""" # FlĂ€sk contains "fisk" as substring, but should be meat - assert DishClassifier.classify_dish("FlĂ€skfilĂ© med grĂ€ddsĂ„s") == 'meat' - assert DishClassifier.classify_dish("Grillad flĂ€sk") == 'meat' + assert DishClassifier.classify_dish("FlĂ€skfilĂ© med grĂ€ddsĂ„s") == "meat" + assert DishClassifier.classify_dish("Grillad flĂ€sk") == "meat" def test_classify_category_markers(self): """Test detection of category markers.""" # Vegetarian markers - assert DishClassifier.classify_dish("Vegetariskt:") == 'marker:vegetarian' - assert DishClassifier.classify_dish("Vego:") == 'marker:vegetarian' - assert DishClassifier.classify_dish("Veganskt alternativ:") == 'marker:vegetarian' + assert DishClassifier.classify_dish("Vegetariskt:") == "marker:vegetarian" + assert DishClassifier.classify_dish("Vego:") == "marker:vegetarian" + assert ( + DishClassifier.classify_dish("Veganskt alternativ:") == "marker:vegetarian" + ) # Fish markers - assert DishClassifier.classify_dish("Fisk:") == 'marker:fish' - assert DishClassifier.classify_dish("Dagens fisk:") == 'marker:fish' + assert DishClassifier.classify_dish("Fisk:") == "marker:fish" + assert DishClassifier.classify_dish("Dagens fisk:") == "marker:fish" # Meat markers - assert DishClassifier.classify_dish("Kött:") == 'marker:meat' - assert DishClassifier.classify_dish("Dagens kött:") == 'marker:meat' + assert DishClassifier.classify_dish("Kött:") == "marker:meat" + assert DishClassifier.classify_dish("Dagens kött:") == "marker:meat" def test_classify_marker_with_dish_on_same_line(self): """Test markers that appear at the start of a line with dish text.""" - assert DishClassifier.classify_dish("Fisk: Lax med dillsĂ„s") == 'marker:fish' - assert DishClassifier.classify_dish("Kött: Biff med lök") == 'marker:meat' - assert DishClassifier.classify_dish("Vegetariskt: Halloumi") == 'marker:vegetarian' + assert DishClassifier.classify_dish("Fisk: Lax med dillsĂ„s") == "marker:fish" + assert DishClassifier.classify_dish("Kött: Biff med lök") == "marker:meat" + assert ( + DishClassifier.classify_dish("Vegetariskt: Halloumi") == "marker:vegetarian" + ) def test_classify_with_previous_category(self): """Test that previous_category is used as fallback after keyword matching.""" # If previous line was a fish marker, classify as fish ONLY if no explicit keywords - assert DishClassifier.classify_dish("Stekt med potatis", previous_category='fish') == 'fish' - assert DishClassifier.classify_dish("Dagens rĂ€tt", previous_category='vegetarian') == 'vegetarian' + assert ( + DishClassifier.classify_dish("Stekt med potatis", previous_category="fish") + == "fish" + ) + assert ( + DishClassifier.classify_dish("Dagens rĂ€tt", previous_category="vegetarian") + == "vegetarian" + ) # Keywords override previous_category (this is intentional to fix Gourmedia Thursday bug) # "grönsaker" is a vegetarian keyword, so it overrides previous_category='meat' - assert DishClassifier.classify_dish("Grillad med grönsaker", previous_category='meat') == 'vegetarian' + assert ( + DishClassifier.classify_dish( + "Grillad med grönsaker", previous_category="meat" + ) + == "vegetarian" + ) def test_classify_empty_and_short_strings(self): """Test handling of edge cases.""" @@ -114,34 +131,36 @@ def test_classify_empty_and_short_strings(self): def test_classify_ambiguous_dishes_with_serveras(self): """Test default behavior for ambiguous dishes.""" # Dishes with "serveras", "med", etc. default to meat - assert DishClassifier.classify_dish("Dagens rĂ€tt serveras med potatis") == 'meat' - assert DishClassifier.classify_dish("RĂ€tt till dagens") == 'meat' + assert ( + DishClassifier.classify_dish("Dagens rĂ€tt serveras med potatis") == "meat" + ) + assert DishClassifier.classify_dish("RĂ€tt till dagens") == "meat" def test_classify_mixed_keywords(self): """Test dishes with multiple keywords.""" # When both fish and meat keywords appear, meat should win (checked first) - assert DishClassifier.classify_dish("FlĂ€skfilĂ© med fiskrĂ€k") == 'meat' + assert DishClassifier.classify_dish("FlĂ€skfilĂ© med fiskrĂ€k") == "meat" # Explicit vegan/vegetarian markers now have HIGHEST priority (fixes Ärtsoppa/Vegan bug) # "Vego" is an explicit vegetarian marker that overrides "köttbullar" meat keyword - assert DishClassifier.classify_dish("Vego köttbullar") == 'vegetarian' + assert DishClassifier.classify_dish("Vego köttbullar") == "vegetarian" # Pure vegetarian dishes without meat keywords work fine - assert DishClassifier.classify_dish("Vego burgare") == 'vegetarian' + assert DishClassifier.classify_dish("Vego burgare") == "vegetarian" # Without explicit veg marker, meat keywords win - assert DishClassifier.classify_dish("Köttbullar") == 'meat' + assert DishClassifier.classify_dish("Köttbullar") == "meat" def test_case_insensitivity(self): """Test that classification is case-insensitive.""" - assert DishClassifier.classify_dish("KYCKLING MED RIS") == 'meat' - assert DishClassifier.classify_dish("LaX mEd DiLlSĂ„S") == 'fish' - assert DishClassifier.classify_dish("HaLLoUmI sAlLaD") == 'vegetarian' + assert DishClassifier.classify_dish("KYCKLING MED RIS") == "meat" + assert DishClassifier.classify_dish("LaX mEd DiLlSĂ„S") == "fish" + assert DishClassifier.classify_dish("HaLLoUmI sAlLaD") == "vegetarian" def test_whitespace_handling(self): """Test that extra whitespace is handled correctly.""" - assert DishClassifier.classify_dish(" Kyckling med ris ") == 'meat' - assert DishClassifier.classify_dish("\tFalafel med hummus\n") == 'vegetarian' + assert DishClassifier.classify_dish(" Kyckling med ris ") == "meat" + assert DishClassifier.classify_dish("\tFalafel med hummus\n") == "vegetarian" class TestClassifyDishes: @@ -156,9 +175,9 @@ def test_classify_dishes_with_vegetarian_marker(self): ] result = DishClassifier.classify_dishes(dishes) - assert len(result['vegetarian']) == 2 - assert "Falafel med hummus" in result['vegetarian'] - assert "Halloumi med sallad" in result['vegetarian'] + assert len(result["vegetarian"]) == 2 + assert "Falafel med hummus" in result["vegetarian"] + assert "Halloumi med sallad" in result["vegetarian"] def test_classify_dishes_with_fish_marker(self): """Test classification when fish marker is present.""" @@ -169,9 +188,9 @@ def test_classify_dishes_with_fish_marker(self): ] result = DishClassifier.classify_dishes(dishes) - assert len(result['fish']) == 2 - assert "Kokt torsk med dillsĂ„s" in result['fish'] - assert "Stekt lax med potatis" in result['fish'] + assert len(result["fish"]) == 2 + assert "Kokt torsk med dillsĂ„s" in result["fish"] + assert "Stekt lax med potatis" in result["fish"] def test_classify_dishes_with_meat_marker(self): """Test classification when meat marker is present.""" @@ -182,9 +201,9 @@ def test_classify_dishes_with_meat_marker(self): ] result = DishClassifier.classify_dishes(dishes) - assert len(result['meat']) == 2 - assert "Biff med bearnaisesĂ„s" in result['meat'] - assert "Kycklinggryta med ris" in result['meat'] + assert len(result["meat"]) == 2 + assert "Biff med bearnaisesĂ„s" in result["meat"] + assert "Kycklinggryta med ris" in result["meat"] def test_classify_dishes_with_multiple_markers(self): """Test classification with multiple category markers.""" @@ -198,12 +217,12 @@ def test_classify_dishes_with_multiple_markers(self): ] result = DishClassifier.classify_dishes(dishes) - assert len(result['vegetarian']) == 1 - assert len(result['fish']) == 1 - assert len(result['meat']) == 1 - assert "Falafel med hummus" in result['vegetarian'] - assert "Lax med dillsĂ„s" in result['fish'] - assert "Biff med lök" in result['meat'] + assert len(result["vegetarian"]) == 1 + assert len(result["fish"]) == 1 + assert len(result["meat"]) == 1 + assert "Falafel med hummus" in result["vegetarian"] + assert "Lax med dillsĂ„s" in result["fish"] + assert "Biff med lök" in result["meat"] def test_classify_dishes_without_markers(self): """Test classification based on keywords alone.""" @@ -214,9 +233,9 @@ def test_classify_dishes_without_markers(self): ] result = DishClassifier.classify_dishes(dishes) - assert "Kyckling med ris" in result['meat'] - assert "Lax med dillsĂ„s" in result['fish'] - assert "Falafel med hummus" in result['vegetarian'] + assert "Kyckling med ris" in result["meat"] + assert "Lax med dillsĂ„s" in result["fish"] + assert "Falafel med hummus" in result["vegetarian"] def test_classify_dishes_skips_empty_and_short_items(self): """Test that empty and very short items are skipped.""" @@ -229,8 +248,8 @@ def test_classify_dishes_skips_empty_and_short_items(self): result = DishClassifier.classify_dishes(dishes) # Only the valid dish should be classified - assert len(result['meat']) == 1 - assert "Kyckling med ris" in result['meat'] + assert len(result["meat"]) == 1 + assert "Kyckling med ris" in result["meat"] def test_classify_dishes_default_to_meat(self): """Test that unclassified dishes default to meat.""" @@ -241,7 +260,7 @@ def test_classify_dishes_default_to_meat(self): result = DishClassifier.classify_dishes(dishes) # These should default to meat - assert len(result['meat']) == 2 + assert len(result["meat"]) == 2 def test_classify_dishes_marker_continues_until_new_marker(self): """Test that category marker continues until a new marker or clear different category.""" @@ -252,8 +271,8 @@ def test_classify_dishes_marker_continues_until_new_marker(self): ] result = DishClassifier.classify_dishes(dishes) - assert "Falafel med hummus" in result['vegetarian'] - assert "Halloumi med sallad" in result['vegetarian'] + assert "Falafel med hummus" in result["vegetarian"] + assert "Halloumi med sallad" in result["vegetarian"] def test_classify_dishes_marker_reset_with_new_marker(self): """Test that new markers properly reset the category.""" @@ -265,14 +284,14 @@ def test_classify_dishes_marker_reset_with_new_marker(self): ] result = DishClassifier.classify_dishes(dishes) - assert "Falafel med hummus" in result['vegetarian'] - assert "Kyckling med ris" in result['meat'] + assert "Falafel med hummus" in result["vegetarian"] + assert "Kyckling med ris" in result["meat"] def test_classify_dishes_empty_list(self): """Test classification with empty list.""" result = DishClassifier.classify_dishes([]) - assert result == {'vegetarian': [], 'meat': [], 'fish': []} + assert result == {"vegetarian": [], "meat": [], "fish": [], "dessert": []} def test_classify_dishes_real_world_example(self): """Test with a realistic menu.""" @@ -287,9 +306,9 @@ def test_classify_dishes_real_world_example(self): ] result = DishClassifier.classify_dishes(dishes) - assert len(result['vegetarian']) == 1 - assert len(result['fish']) == 1 - assert len(result['meat']) == 2 # Chicken and "Serveras med ris och sallad" + assert len(result["vegetarian"]) == 1 + assert len(result["fish"]) == 1 + assert len(result["meat"]) == 2 # Chicken and "Serveras med ris och sallad" class TestMergeCategoriesForDisplay: @@ -298,54 +317,46 @@ class TestMergeCategoriesForDisplay: def test_merge_fish_into_meat(self): """Test that fish dishes are merged into meat category.""" categorized = { - 'vegetarian': ['Falafel med hummus'], - 'meat': ['Kyckling med ris', 'Biff med lök'], - 'fish': ['Lax med dillsĂ„s', 'Torsk med Ă€gg'] + "vegetarian": ["Falafel med hummus"], + "meat": ["Kyckling med ris", "Biff med lök"], + "fish": ["Lax med dillsĂ„s", "Torsk med Ă€gg"], } result = DishClassifier.merge_categories_for_display(categorized) - assert len(result['vegetarian']) == 1 - assert len(result['meat']) == 4 # 2 meat + 2 fish - assert 'Falafel med hummus' in result['vegetarian'] - assert 'Kyckling med ris' in result['meat'] - assert 'Lax med dillsĂ„s' in result['meat'] - assert 'Torsk med Ă€gg' in result['meat'] + assert len(result["vegetarian"]) == 1 + assert len(result["meat"]) == 4 # 2 meat + 2 fish + assert "Falafel med hummus" in result["vegetarian"] + assert "Kyckling med ris" in result["meat"] + assert "Lax med dillsĂ„s" in result["meat"] + assert "Torsk med Ă€gg" in result["meat"] def test_merge_empty_fish_category(self): """Test merge when fish category is empty.""" - categorized = { - 'vegetarian': ['Falafel'], - 'meat': ['Kyckling'], - 'fish': [] - } + categorized = {"vegetarian": ["Falafel"], "meat": ["Kyckling"], "fish": []} result = DishClassifier.merge_categories_for_display(categorized) - assert len(result['vegetarian']) == 1 - assert len(result['meat']) == 1 + assert len(result["vegetarian"]) == 1 + assert len(result["meat"]) == 1 def test_merge_missing_categories(self): """Test merge when some categories are missing.""" categorized = { - 'vegetarian': ['Falafel'], + "vegetarian": ["Falafel"], } result = DishClassifier.merge_categories_for_display(categorized) - assert result['vegetarian'] == ['Falafel'] - assert result['meat'] == [] + assert result["vegetarian"] == ["Falafel"] + assert result["meat"] == [] def test_merge_only_fish(self): """Test merge when only fish dishes exist.""" - categorized = { - 'vegetarian': [], - 'meat': [], - 'fish': ['Lax', 'Torsk'] - } + categorized = {"vegetarian": [], "meat": [], "fish": ["Lax", "Torsk"]} result = DishClassifier.merge_categories_for_display(categorized) - assert result['vegetarian'] == [] - assert len(result['meat']) == 2 - assert 'Lax' in result['meat'] + assert result["vegetarian"] == [] + assert len(result["meat"]) == 2 + assert "Lax" in result["meat"] diff --git a/tests/test_iss_scraper.py b/tests/test_iss_scraper.py deleted file mode 100644 index 383a0a6..0000000 --- a/tests/test_iss_scraper.py +++ /dev/null @@ -1,432 +0,0 @@ -"""Tests for ISSMenuScraper.""" - -import json -import base64 -from datetime import date -from unittest.mock import patch, MagicMock -import pytest -import responses -from lunchscraper.iss_scraper import ISSMenuScraper -from tests.conftest import get_fixture_dates_with_file, load_fixture_file, load_json_fixture - - -class TestISSMenuScraper: - """Tests for ISSMenuScraper class.""" - - @pytest.fixture - def scraper(self): - """Create a scraper instance for testing.""" - return ISSMenuScraper( - restaurant_url="https://www.iss-menyer.se/restaurang-gourmedia", - restaurant_id="Restaurang Gourmedia", - restaurant_name="Gourmedia" - ) - - - def test_init(self, scraper): - """Test scraper initialization.""" - assert scraper.restaurant_name == "Gourmedia" - assert scraper.restaurant_url == "https://www.iss-menyer.se/restaurang-gourmedia" - assert scraper.restaurant_id == "Restaurang Gourmedia" - assert scraper.session is not None - assert scraper._session_established is False - - def test_get_week_number(self, scraper): - """Test week number calculation.""" - # Week 1 of 2025 - test_date = date(2025, 1, 6) - assert scraper._get_week_number(test_date) == 2 - - # Week 52 of 2024 - test_date = date(2024, 12, 30) - assert scraper._get_week_number(test_date) == 1 - - def test_build_api_query(self, scraper): - """Test API query building.""" - query = scraper._build_api_query(week_number=45) - - # Decode the base64 query - decoded = base64.urlsafe_b64decode(query).decode('utf-8') - query_data = json.loads(decoded) - - assert query_data['dataCollectionId'] == 'Meny' - assert query_data['query']['filter']['weekNumber'] == 45 - assert query_data['query']['filter']['restrauntId'] == 'Restaurang Gourmedia' - assert query_data['appId'] == '16d45e35-d3d8-4d5e-b24d-2a680b7e5089' - - def test_parse_day_menu_from_text(self, scraper): - """Test parsing menu text for a single day.""" - menu_text = "Kött:\nBiff med bearnaisesĂ„s\nVegetariskt:\nFalafel med hummus" - result = scraper._parse_day_menu_from_text(menu_text) - - assert 'vegetarian' in result - assert 'meat' in result - assert 'fish' in result - assert 'Falafel med hummus' in result['vegetarian'] - assert 'Biff med bearnaisesĂ„s' in result['meat'] - - def test_parse_day_menu_from_text_with_tabs(self, scraper): - """Test parsing menu text with tab separators.""" - menu_text = "Kött:\tBiff med lök\tserveras med potatis" - result = scraper._parse_day_menu_from_text(menu_text) - - assert len(result['meat']) == 2 - assert 'Biff med lök' in result['meat'] - assert 'serveras med potatis' in result['meat'] - - def test_parse_day_menu_from_text_empty(self, scraper): - """Test parsing empty menu text.""" - result = scraper._parse_day_menu_from_text("") - - assert result == {'vegetarian': [], 'fish': [], 'meat': []} - - def test_parse_api_response(self, scraper, iss_api_response): - """Test parsing the API response.""" - weekly_menu = scraper._parse_api_response(iss_api_response) - - assert 'mĂ„ndag' in weekly_menu - assert 'tisdag' in weekly_menu - assert 'onsdag' in weekly_menu - assert 'torsdag' in weekly_menu - assert 'fredag' in weekly_menu - - # Check Monday's menu - monday_menu = weekly_menu['mĂ„ndag'] - assert 'KycklingfilĂ© med currysĂ„s och ris' in monday_menu['meat'] - assert 'Falafel med hummus och sallad' in monday_menu['vegetarian'] - - # Check Tuesday's menu - tuesday_menu = weekly_menu['tisdag'] - assert 'Lax med dillsĂ„s och potatis' in tuesday_menu['fish'] - assert 'Biff med bearnaisesĂ„s' in tuesday_menu['meat'] - - def test_parse_api_response_empty_items(self, scraper): - """Test parsing API response with no items.""" - api_data = {"dataItems": []} - - with pytest.raises(Exception, match="No menu items found"): - scraper._parse_api_response(api_data) - - def test_parse_api_response_no_menu_swedish(self, scraper): - """Test parsing API response without menuSwedish.""" - api_data = { - "dataItems": [ - {"data": {}} - ] - } - - with pytest.raises(Exception, match="No menuSwedish data found"): - scraper._parse_api_response(api_data) - - @responses.activate - def test_establish_session(self, scraper, iss_home_html, iss_gourmedia_html): - """Test session establishment.""" - # Mock home page request - responses.add( - responses.GET, - 'https://www.iss-menyer.se/', - body=iss_home_html, - status=200 - ) - - # Mock restaurant page request - responses.add( - responses.GET, - 'https://www.iss-menyer.se/restaurang-gourmedia', - body=iss_gourmedia_html, - status=200 - ) - - scraper._establish_session() - - assert scraper._session_established is True - # Real page should contain auth token - assert scraper._auth_token is not None or scraper._auth_token == "" - - @responses.activate - def test_establish_session_failure(self, scraper): - """Test session establishment with network failure.""" - # Mock failed home page request - responses.add( - responses.GET, - 'https://www.iss-menyer.se/', - status=500 - ) - - # Should not raise, just log warning - scraper._establish_session() - - assert scraper._session_established is False - - @responses.activate - def test_fetch_menu_from_api(self, scraper, iss_api_response, iss_home_html, iss_gourmedia_html): - """Test fetching menu from API.""" - # Mock session establishment - responses.add( - responses.GET, - 'https://www.iss-menyer.se/', - body=iss_home_html, - status=200 - ) - responses.add( - responses.GET, - 'https://www.iss-menyer.se/restaurang-gourmedia', - body=iss_gourmedia_html, - status=200 - ) - - # Mock API request - responses.add( - responses.GET, - 'https://www.iss-menyer.se/_api/cloud-data/v2/items/query', - json=iss_api_response, - status=200 - ) - - result = scraper._fetch_menu_from_api(week_number=45) - - assert result == iss_api_response - - @responses.activate - def test_fetch_menu_from_api_failure(self, scraper, iss_home_html, iss_gourmedia_html): - """Test API fetch failure.""" - # Mock session establishment - responses.add( - responses.GET, - 'https://www.iss-menyer.se/', - body=iss_home_html, - status=200 - ) - responses.add( - responses.GET, - 'https://www.iss-menyer.se/restaurang-gourmedia', - body=iss_gourmedia_html, - status=200 - ) - - # Mock API request failure - responses.add( - responses.GET, - 'https://www.iss-menyer.se/_api/cloud-data/v2/items/query', - status=500 - ) - - with pytest.raises(Exception, match="Failed to fetch menu from API"): - scraper._fetch_menu_from_api(week_number=45) - - @responses.activate - def test_get_menu_for_day(self, scraper, iss_api_response, iss_home_html, iss_gourmedia_html): - """Test getting menu for a specific day.""" - # Mock session and API - responses.add( - responses.GET, - 'https://www.iss-menyer.se/', - body=iss_home_html, - status=200 - ) - responses.add( - responses.GET, - 'https://www.iss-menyer.se/restaurang-gourmedia', - body=iss_gourmedia_html, - status=200 - ) - responses.add( - responses.GET, - 'https://www.iss-menyer.se/_api/cloud-data/v2/items/query', - json=iss_api_response, - status=200 - ) - - # Test Monday (weekday 0) - test_date = date(2025, 1, 6) # This is a Monday - menu = scraper.get_menu_for_day(test_date) - - assert 'vegetarian' in menu - assert 'meat' in menu - assert 'Falafel med hummus och sallad' in menu['vegetarian'] - assert 'KycklingfilĂ© med currysĂ„s och ris' in menu['meat'] - - @responses.activate - def test_get_weekly_menu(self, scraper, iss_api_response, iss_home_html, iss_gourmedia_html): - """Test getting the full weekly menu.""" - # Mock session and API - responses.add( - responses.GET, - 'https://www.iss-menyer.se/', - body=iss_home_html, - status=200 - ) - responses.add( - responses.GET, - 'https://www.iss-menyer.se/restaurang-gourmedia', - body=iss_gourmedia_html, - status=200 - ) - responses.add( - responses.GET, - 'https://www.iss-menyer.se/_api/cloud-data/v2/items/query', - json=iss_api_response, - status=200 - ) - - weekly_menu = scraper.get_weekly_menu() - - assert 'mĂ„ndag' in weekly_menu - assert 'tisdag' in weekly_menu - assert 'onsdag' in weekly_menu - assert 'torsdag' in weekly_menu - assert 'fredag' in weekly_menu - - # Verify some content - assert 'Falafel med hummus och sallad' in weekly_menu['mĂ„ndag']['vegetarian'] - assert 'Lax med dillsĂ„s och potatis' in weekly_menu['tisdag']['fish'] - - @responses.activate - def test_get_menu_for_day_not_found(self, scraper, iss_home_html, iss_gourmedia_html): - """Test getting menu when day is not in the response.""" - # Mock with empty menuSwedish - empty_response = { - "dataItems": [ - { - "data": { - "menuSwedish": [ - {"menu": ""}, - {"menu": ""}, - {"menu": ""}, - {"menu": ""}, - {"menu": ""}, - {"menu": ""}, - {"menu": ""} - ] - } - } - ] - } - - responses.add( - responses.GET, - 'https://www.iss-menyer.se/', - body=iss_home_html, - status=200 - ) - responses.add( - responses.GET, - 'https://www.iss-menyer.se/restaurang-gourmedia', - body=iss_gourmedia_html, - status=200 - ) - responses.add( - responses.GET, - 'https://www.iss-menyer.se/_api/cloud-data/v2/items/query', - json=empty_response, - status=200 - ) - - test_date = date(2025, 1, 6) # Monday - - with pytest.raises(Exception, match="Could not parse any menu data"): - scraper.get_menu_for_day(test_date) - - -# ============================================================================ -# Integration Tests with Real Fetched Fixtures -# ============================================================================ - - -class TestISSMenuScraperWithRealFixtures: - """ - Integration tests using real fetched HTML and API fixtures. - - These tests automatically discover and run against all available fixture dates, - ensuring that the scraper works with actual production HTML and API responses. - """ - - @pytest.mark.parametrize( - "fixture_date", - get_fixture_dates_with_file("iss_api_response.json"), - ids=lambda d: f"api_{d.strftime('%Y_%m_%d')}" - ) - def test_parse_real_api_response(self, fixture_date): - """ - Test parsing real ISS API responses from fetched fixtures. - - This test runs against all available fixture dates automatically. - When new fixtures are added, this test will automatically include them. - """ - api_data = load_json_fixture("iss_api_response.json", fixture_date) - scraper = ISSMenuScraper( - restaurant_url="https://www.iss-menyer.se/restaurants/restaurang-gourmedia", - restaurant_id="Restaurang Gourmedia", - restaurant_name="Gourmedia" - ) - - result = scraper._parse_api_response(api_data) - - # Basic validation that we got a menu - assert isinstance(result, dict), f"Failed to parse API response for {fixture_date}" - assert len(result) > 0, f"No menu items found for {fixture_date}" - - # Check that we have expected weekdays - valid_days = {'mĂ„ndag', 'tisdag', 'onsdag', 'torsdag', 'fredag', 'lördag', 'söndag'} - for day in result.keys(): - assert day in valid_days, f"Unexpected day '{day}' in menu for {fixture_date}" - - # Check that each day has the expected structure - for day, menu in result.items(): - assert 'vegetarian' in menu, f"Missing 'vegetarian' category for {day} on {fixture_date}" - assert 'fish' in menu, f"Missing 'fish' category for {day} on {fixture_date}" - assert 'meat' in menu, f"Missing 'meat' category for {day} on {fixture_date}" - - # Check that dishes are lists - assert isinstance(menu['vegetarian'], list), f"'vegetarian' should be a list for {day} on {fixture_date}" - assert isinstance(menu['fish'], list), f"'fish' should be a list for {day} on {fixture_date}" - assert isinstance(menu['meat'], list), f"'meat' should be a list for {day} on {fixture_date}" - - @pytest.mark.parametrize( - "fixture_date", - get_fixture_dates_with_file("iss_gourmedia.html"), - ids=lambda d: f"html_{d.strftime('%Y_%m_%d')}" - ) - def test_real_html_fixtures_exist_and_loadable(self, fixture_date): - """ - Test that real Gourmedia HTML fixtures exist and can be loaded. - - This validates that the fetched HTML files are accessible and - contain valid HTML that can be used for testing. - """ - home_html = load_fixture_file("iss_home.html", fixture_date) - gourmedia_html = load_fixture_file("iss_gourmedia.html", fixture_date) - - # Basic validation - files loaded successfully - assert home_html is not None, f"Failed to load home HTML for {fixture_date}" - assert gourmedia_html is not None, f"Failed to load gourmedia HTML for {fixture_date}" - assert len(home_html) > 0, f"Home HTML is empty for {fixture_date}" - assert len(gourmedia_html) > 0, f"Gourmedia HTML is empty for {fixture_date}" - - # Check for expected HTML markers - assert '' in home_html or '' in gourmedia_html or ' 0 - assert len(iss_gourmedia_html) > 0 - - # Test API response parsing - result = scraper._parse_api_response(iss_api_response) - assert isinstance(result, dict) - assert len(result) > 0 diff --git a/tests/test_nordrest_scraper.py b/tests/test_nordrest_scraper.py new file mode 100644 index 0000000..37ea7a3 --- /dev/null +++ b/tests/test_nordrest_scraper.py @@ -0,0 +1,317 @@ +"""Tests for NordrestMenuScraper.""" + +from datetime import date +import pytest +import responses +from bs4 import BeautifulSoup +from lunchscraper.nordrest_scraper import NordrestMenuScraper +from tests.conftest import get_fixture_dates_with_file, load_fixture_file + + +def _make_dish(title: str, desc: str = "") -> str: + """Helper to build a castit-dish HTML snippet.""" + desc_html = f'
{desc}
' if desc else "" + return f""" +
+
+
{title}
+ {desc_html} +
+
+ """ + + +def _make_day_section(sv_name: str, en_name: str, dishes_html: str, extra_classes: str = "") -> str: + """Helper to build a castit-day section.""" + return f""" +
+

+ {sv_name} +

+
+ {dishes_html} +
+
+ """ + + +SAMPLE_HTML = """ + +{monday} +{tuesday} +{wednesday} +{thursday} +{friday} + +""".format( + monday=_make_day_section("MÄndag", "Monday", + _make_dish("Kycklingfilé med currysÄs", "serveras med ris") + + _make_dish("Falafel med hummus", "tzatziki och pitabröd") + ), + tuesday=_make_day_section("Tisdag", "Tuesday", + _make_dish("Ugnsbakad lax med dill", "kokt potatis") + + _make_dish("KöttfÀrssÄs med pasta", "riven ost") + ), + wednesday=_make_day_section("Onsdag", "Wednesday", + _make_dish("Vegansk Pasta Bolognese", "lök och tomat") + ), + thursday=_make_day_section("Torsdag", "Thursday", + _make_dish("Biff med bearnaisesÄs", "pommes och sallad") + ), + friday=_make_day_section("Fredag", "Friday", + _make_dish("FiskgratÀng med ost", "kokt potatis") + ), +) + +SAMPLE_HTML_WITH_WEEKLY_SPECIAL = """ + +
+

+ Veckans rÀtter +

+
+ {special} +
+
+{monday} + +""".format( + special=_make_dish("Salladsbar", "25 kr/hg"), + monday=_make_day_section("MÄndag", "Monday", + _make_dish("Kycklingfilé med currysÄs", "ris") + ), +) + + +class TestNordrestMenuScraper: + """Tests for NordrestMenuScraper class.""" + + @pytest.fixture + def scraper(self): + return NordrestMenuScraper( + restaurant_url="https://www.nordrest.se/restaurang/karavan/", + restaurant_name="Karavan" + ) + + def test_init(self, scraper): + assert scraper.restaurant_name == "Karavan" + assert scraper.restaurant_url == "https://www.nordrest.se/restaurang/karavan/" + assert scraper.session is not None + + # ------------------------------------------------------------------ + # _fetch_page + # ------------------------------------------------------------------ + + @responses.activate + def test_fetch_page_success(self, scraper): + responses.add(responses.GET, scraper.restaurant_url, body=SAMPLE_HTML, status=200) + soup = scraper._fetch_page() + assert isinstance(soup, BeautifulSoup) + + @responses.activate + def test_fetch_page_failure(self, scraper): + responses.add(responses.GET, scraper.restaurant_url, status=500) + with pytest.raises(Exception, match="Failed to fetch menu page"): + scraper._fetch_page() + + # ------------------------------------------------------------------ + # _extract_dishes_from_section + # ------------------------------------------------------------------ + + def test_extract_dishes_combines_title_and_desc(self, scraper): + html = _make_day_section("MÄndag", "Monday", + _make_dish("Kycklingfilé med currysÄs", "serveras med ris") + ) + soup = BeautifulSoup(html, 'html.parser') + section = soup.find('section') + dishes = scraper._extract_dishes_from_section(section) + assert len(dishes) == 1 + assert "Kycklingfilé med currysÄs" in dishes[0] + assert "serveras med ris" in dishes[0] + + def test_extract_dishes_no_desc(self, scraper): + html = _make_day_section("MÄndag", "Monday", _make_dish("Grönsakssoppa")) + soup = BeautifulSoup(html, 'html.parser') + section = soup.find('section') + dishes = scraper._extract_dishes_from_section(section) + assert dishes == ["Grönsakssoppa"] + + def test_extract_dishes_skips_too_short(self, scraper): + html = _make_day_section("MÄndag", "Monday", _make_dish("AB")) + soup = BeautifulSoup(html, 'html.parser') + section = soup.find('section') + dishes = scraper._extract_dishes_from_section(section) + assert dishes == [] + + # ------------------------------------------------------------------ + # _parse_dishes + # ------------------------------------------------------------------ + + def test_parse_dishes_returns_menu_key(self, scraper): + result = scraper._parse_dishes(["Kycklingfilé med currysÄs, serveras med ris"]) + assert 'menu' in result + assert len(result['menu']) == 1 + + def test_parse_dishes_strips_dietary_codes_and_appends_label(self, scraper): + result = scraper._parse_dishes(["Pasta med grönsakssÄs (Vegansk)"]) + assert result['menu'][0].endswith("(vegansk)") + + def test_parse_dishes_strips_price_suffix(self, scraper): + result = scraper._parse_dishes(["Salladsbar 25.90"]) + assert "25.90" not in result['menu'][0] + + def test_parse_dishes_empty_returns_empty_keys(self, scraper): + result = scraper._parse_dishes([]) + assert result == {'menu': [], 'vegetarian': [], 'fish': [], 'meat': []} + + def test_parse_dishes_skips_too_short(self, scraper): + result = scraper._parse_dishes(["ab", "ok"]) + assert result['menu'] == [] + + # ------------------------------------------------------------------ + # _parse_weekly_menu + # ------------------------------------------------------------------ + + def test_parse_weekly_menu_finds_all_weekdays(self, scraper): + soup = BeautifulSoup(SAMPLE_HTML, 'html.parser') + menu = scraper._parse_weekly_menu(soup) + assert set(menu.keys()) == {'mÄndag', 'tisdag', 'onsdag', 'torsdag', 'fredag'} + + def test_parse_weekly_menu_each_day_has_menu_items(self, scraper): + soup = BeautifulSoup(SAMPLE_HTML, 'html.parser') + menu = scraper._parse_weekly_menu(soup) + for day, day_menu in menu.items(): + assert len(day_menu['menu']) > 0, f"No items for {day}" + + def test_parse_weekly_menu_monday_contains_expected_dishes(self, scraper): + soup = BeautifulSoup(SAMPLE_HTML, 'html.parser') + menu = scraper._parse_weekly_menu(soup) + items = menu['mÄndag']['menu'] + assert any("Kycklingfilé med currysÄs" in d for d in items) + assert any("Falafel med hummus" in d for d in items) + + def test_parse_weekly_menu_skips_week_specials_column(self, scraper): + """Week-specials column should not appear as a day in the weekly menu.""" + soup = BeautifulSoup(SAMPLE_HTML_WITH_WEEKLY_SPECIAL, 'html.parser') + menu = scraper._parse_weekly_menu(soup) + assert 'veckans rÀtter' not in menu + assert 'mÄndag' in menu + + def test_parse_weekly_menu_appends_weekly_specials_to_each_day(self, scraper): + """Dishes from Veckans rÀtt should appear in every day's menu.""" + soup = BeautifulSoup(SAMPLE_HTML_WITH_WEEKLY_SPECIAL, 'html.parser') + menu = scraper._parse_weekly_menu(soup) + items = menu['mÄndag']['menu'] + assert any("Salladsbar" in d for d in items) + + def test_parse_weekly_menu_no_castit_sections_returns_empty(self, scraper): + soup = BeautifulSoup("

No menu

", 'html.parser') + assert scraper._parse_weekly_menu(soup) == {} + + def test_parse_weekly_menu_ignores_unrecognised_day_names(self, scraper): + html = _make_day_section("Helgdag", "Holiday", _make_dish("Grönsakssoppa med bröd")) + soup = BeautifulSoup(f"{html}", 'html.parser') + assert scraper._parse_weekly_menu(soup) == {} + + # ------------------------------------------------------------------ + # get_menu_for_day + # ------------------------------------------------------------------ + + @responses.activate + def test_get_menu_for_day_monday(self, scraper): + responses.add(responses.GET, scraper.restaurant_url, body=SAMPLE_HTML, status=200) + menu = scraper.get_menu_for_day(date(2025, 11, 10)) # Monday + assert 'menu' in menu + assert any("Kycklingfilé med currysÄs" in d for d in menu['menu']) + + @responses.activate + def test_get_menu_for_day_tuesday(self, scraper): + responses.add(responses.GET, scraper.restaurant_url, body=SAMPLE_HTML, status=200) + menu = scraper.get_menu_for_day(date(2025, 11, 11)) # Tuesday + assert any("Ugnsbakad lax med dill" in d for d in menu['menu']) + + @responses.activate + def test_get_menu_for_day_not_found_raises(self, scraper): + html = "{}".format( + _make_day_section("MÄndag", "Monday", _make_dish("Kycklingfilé med currysÄs")) + ) + responses.add(responses.GET, scraper.restaurant_url, body=html, status=200) + with pytest.raises(Exception, match="No menu found for tisdag"): + scraper.get_menu_for_day(date(2025, 11, 11)) # Tuesday not in HTML + + @responses.activate + def test_get_menu_for_day_fetch_failure_raises(self, scraper): + responses.add(responses.GET, scraper.restaurant_url, status=503) + with pytest.raises(Exception, match="Failed to fetch menu"): + scraper.get_menu_for_day(date(2025, 11, 10)) + + # ------------------------------------------------------------------ + # get_weekly_menu + # ------------------------------------------------------------------ + + @responses.activate + def test_get_weekly_menu_returns_all_days(self, scraper): + responses.add(responses.GET, scraper.restaurant_url, body=SAMPLE_HTML, status=200) + weekly = scraper.get_weekly_menu() + assert set(weekly.keys()) == {'mÄndag', 'tisdag', 'onsdag', 'torsdag', 'fredag'} + + @responses.activate + def test_get_weekly_menu_fetch_failure_raises(self, scraper): + responses.add(responses.GET, scraper.restaurant_url, status=500) + with pytest.raises(Exception, match="Failed to fetch menu"): + scraper.get_weekly_menu() + + +# ============================================================================ +# Integration Tests with Real Fetched Fixtures +# ============================================================================ + + +class TestNordrestMenuScraperWithRealFixtures: + """Integration tests using real fetched HTML fixtures.""" + + @pytest.mark.parametrize( + "fixture_date", + get_fixture_dates_with_file("nordrest_karavan.html"), + ids=lambda d: f"karavan_{d.strftime('%Y_%m_%d')}" + ) + def test_parse_real_karavan_html(self, fixture_date): + html = load_fixture_file("nordrest_karavan.html", fixture_date) + scraper = NordrestMenuScraper( + restaurant_url="https://www.nordrest.se/restaurang/karavan/", + restaurant_name="Karavan" + ) + soup = BeautifulSoup(html, 'html.parser') + weekly_menu = scraper._parse_weekly_menu(soup) + + assert isinstance(weekly_menu, dict), f"Failed to parse menu for {fixture_date}" + assert len(weekly_menu) > 0, f"No menu items found for {fixture_date}" + + valid_days = {'mÄndag', 'tisdag', 'onsdag', 'torsdag', 'fredag', 'lördag', 'söndag'} + for day, day_menu in weekly_menu.items(): + assert day in valid_days, f"Unexpected day '{day}' for {fixture_date}" + assert 'menu' in day_menu + assert len(day_menu['menu']) > 0, f"No dishes for {day} on {fixture_date}" + + @pytest.mark.parametrize( + "fixture_date", + get_fixture_dates_with_file("nordrest_gourmedia.html"), + ids=lambda d: f"gourmedia_{d.strftime('%Y_%m_%d')}" + ) + def test_parse_real_gourmedia_html(self, fixture_date): + html = load_fixture_file("nordrest_gourmedia.html", fixture_date) + scraper = NordrestMenuScraper( + restaurant_url="https://www.nordrest.se/restaurang/gourmedia/", + restaurant_name="Gourmedia" + ) + soup = BeautifulSoup(html, 'html.parser') + weekly_menu = scraper._parse_weekly_menu(soup) + + assert isinstance(weekly_menu, dict), f"Failed to parse menu for {fixture_date}" + assert len(weekly_menu) > 0, f"No menu items found for {fixture_date}" + + valid_days = {'mÄndag', 'tisdag', 'onsdag', 'torsdag', 'fredag', 'lördag', 'söndag'} + for day, day_menu in weekly_menu.items(): + assert day in valid_days, f"Unexpected day '{day}' for {fixture_date}" + assert 'menu' in day_menu + assert len(day_menu['menu']) > 0, f"No dishes for {day} on {fixture_date}"