diff --git a/sig-security-tooling/cve-feed/README.md b/sig-security-tooling/cve-feed/README.md index 99a90f8..1e51170 100644 --- a/sig-security-tooling/cve-feed/README.md +++ b/sig-security-tooling/cve-feed/README.md @@ -53,6 +53,26 @@ A script in the [kubernetes/sig-security](https://github.com/kubernetes/sig-secu repository under the [sig-security-tooling/cve-feed/hack](https://github.com/kubernetes/sig-security/tree/main/sig-security-tooling/cve-feed/hack) folder is responsible for generating and updating the feed. +#### Local development + +To run the feed scripts locally you need Python 3 and pip3. Install dependencies from the `hack` directory: + +```bash +cd sig-security-tooling/cve-feed/hack +pip3 install -r requirements.txt +``` + +If your system restricts global package installs (e.g. externally managed environment), use a virtual environment: + +```bash +cd sig-security-tooling/cve-feed +python3 -m venv .venv +source .venv/bin/activate # On Windows: .venv\Scripts\activate +pip3 install -r hack/requirements.txt +``` + +Then run the Python script from the `hack` directory: `python3 fetch-official-cve-feed.py`. + This bash script, named `fetch-cve-feed.sh`: - sets up the Python 3 environment; - generates the CVE feed file with `fetch-official-cve-feed.py`; diff --git a/sig-security-tooling/cve-feed/hack/.gitignore b/sig-security-tooling/cve-feed/hack/.gitignore index 13bf3fc..7b3bf67 100644 --- a/sig-security-tooling/cve-feed/hack/.gitignore +++ b/sig-security-tooling/cve-feed/hack/.gitignore @@ -1,3 +1,222 @@ #files generated by cve feed prow job cve-feed-hash official-cve-feed.json + +#python environment + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +# Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +# poetry.lock +# poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +# pdm.lock +# pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +# pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# Redis +*.rdb +*.aof +*.pid + +# RabbitMQ +mnesia/ +rabbitmq/ +rabbitmq-data/ + +# ActiveMQ +activemq-data/ + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +# .idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Streamlit +.streamlit/secrets.toml diff --git a/sig-security-tooling/cve-feed/hack/fetch-official-cve-feed.py b/sig-security-tooling/cve-feed/hack/fetch-official-cve-feed.py index d8faece..df0450c 100755 --- a/sig-security-tooling/cve-feed/hack/fetch-official-cve-feed.py +++ b/sig-security-tooling/cve-feed/hack/fetch-official-cve-feed.py @@ -18,9 +18,84 @@ import json import requests import sys +import re + from datetime import datetime, timezone from cve_title_parser import parse_cve_title +# Pattern for the embedded OSV ```json [osv] ... ``` block in CVE issue bodies (SRC format). +# Some issues use "```json osv" instead of "```json"; (?:\\s+osv)? allows both. +_OSV_JSON_BLOCK_RE = re.compile(r"```json(?:\s+osv)?\s*(.*?)\s*```", re.DOTALL) +# Pattern for the "generated by" HTML comment in issue body (e.g. from srctl). +_OSV_GENERATOR_COMMENT_RE = re.compile(r"", re.DOTALL) +# Pattern for the
OSV format
block and following generator comment to strip from content_text. +_OSV_DETAILS_BLOCK_RE = re.compile( + r"\s*
\s*OSV format\s*.*?
\s*", + re.DOTALL, +) +# Value for osv_generator when OSV is fetched from cve-feed-osv repo. +OSV_GENERATOR_FROM_FEED_REPO = "OSV from kubernetes-sigs/cve-feed-osv GitHub repository" + + +def _find_osv_json_block(body): + """Find the embedded OSV JSON code block in body. Returns the re.Match or None.""" + if not body: + return None + return _OSV_JSON_BLOCK_RE.search(body) + + +def extract_osv_from_body(body): + # Extract an embedded OSV JSON object from a CVE issue body. + # New SRC CVE announcements may include the OSV data inline + # as a fenced ```json code block. This helper parses and returns + # that JSON when present. + match = _find_osv_json_block(body) + if not match: + return None + try: + return json.loads(match.group(1).strip()) + except json.JSONDecodeError: + return None + + +def body_without_osv_json(body): + # Remove the embedded OSV JSON code block and the
OSV format
+ # + generator comment from body; return the remaining text for content_text. + if not body: + return None + text = body + match = _find_osv_json_block(text) + if match: + text = text[: match.start()] + text[match.end() :] + text = _OSV_DETAILS_BLOCK_RE.sub("", text) + return text.strip() or None + + +def extract_osv_generator_from_body(body): + """Extract the 'generated by' HTML comment from issue body (e.g. from srctl).""" + if not body: + return None + match = _OSV_GENERATOR_COMMENT_RE.search(body) + if not match: + return None + return match.group(1).strip() + + +def get_osv_for_cve(body, cve_id): + """Resolve OSV data: try embedded JSON in issue body first, then fetch from cve-feed-osv repo.""" + osv = extract_osv_from_body(body) + if osv is not None: + return osv + osv_url = f'https://raw.githubusercontent.com/kubernetes-sigs/cve-feed-osv/main/vulns/{cve_id}.json' + try: + res = requests.get(osv_url, timeout=5) + if res.status_code == 200: + return res.json() + except requests.RequestException as e: + print(f"Error fetching OSV for CVE {cve_id}: {e}", file=sys.stderr) + return None + + def getCVEStatus(state, state_reason): if state == "open": if state_reason == "reopened": @@ -76,12 +151,17 @@ def getCVEStatus(state, state_reason): cve = {'content_text': None, 'date_published': None, 'external_url': None, 'id': None,'summary': None, 'url': None, '_kubernetes_io': None} # This is a custom extension - item_kubernetes_io = {'google_group_url': None, 'issue_number': None} + item_kubernetes_io = { + 'google_group_url': None, + 'issue_number': None, + 'osv': None, + 'osv_generator': None, + } cve['_kubernetes_io'] = item_kubernetes_io cve['url'] = item['html_url'] cve['_kubernetes_io']['issue_number'] = item['number'] - cve['content_text'] = item['body'] + cve['content_text'] = body_without_osv_json(item.get('body')) cve['date_published'] = item['created_at'] cve['status'] = getCVEStatus(item['state'], item['state_reason']) @@ -91,15 +171,39 @@ def getCVEStatus(state, state_reason): first_cve_id = cve_ids[0] cve['id'] = first_cve_id + + # Prefer OSV from GitHub issue when present; fallback to cve-feed-osv repo. + cve['_kubernetes_io']['osv'] = get_osv_for_cve(item.get('body'), first_cve_id) + if cve['_kubernetes_io']['osv'] is not None: + if extract_osv_from_body(item.get('body')) is not None: + cve['_kubernetes_io']['osv_generator'] = extract_osv_generator_from_body(item.get('body')) + else: + cve['_kubernetes_io']['osv_generator'] = OSV_GENERATOR_FROM_FEED_REPO + cve['external_url'] = f'https://www.cve.org/cverecord?id={first_cve_id}' cve['_kubernetes_io']['google_group_url'] = f'https://groups.google.com/g/kubernetes-announce/search?q={first_cve_id}' # Add additional entries for any remaining CVE IDs for additional_cve_id in cve_ids[1:]: + # Make a deep copy of the main CVE to avoid overwriting its data additional_cve = copy.deepcopy(cve) + + # Update the CVE ID for this additional CVE additional_cve['id'] = additional_cve_id + + # Set the external URL for this CVE on CVE.org additional_cve['external_url'] = f'https://www.cve.org/cverecord?id={additional_cve_id}' + + # Set the Google Group URL specific to this CVE additional_cve['_kubernetes_io']['google_group_url'] = f'https://groups.google.com/g/kubernetes-announce/search?q={additional_cve_id}' + + additional_cve['_kubernetes_io']['osv'] = get_osv_for_cve(item.get('body'), additional_cve_id) + if additional_cve['_kubernetes_io']['osv'] is not None: + if extract_osv_from_body(item.get('body')) is not None: + additional_cve['_kubernetes_io']['osv_generator'] = extract_osv_generator_from_body(item.get('body')) + else: + additional_cve['_kubernetes_io']['osv_generator'] = OSV_GENERATOR_FROM_FEED_REPO + cve_list.append(additional_cve) cve_list.append(cve)