From 1314b35282cbe881a204838001f5d5e77b0d7d8b Mon Sep 17 00:00:00 2001 From: pnkcaht Date: Fri, 6 Feb 2026 18:47:26 -0500 Subject: [PATCH 1/3] tooling: include OSV JSON data in official CVE feed Signed-off-by: pnkcaht --- .../cve_title_parser.cpython-313.pyc | Bin 0 -> 1346 bytes .../cve-feed/hack/fetch-official-cve-feed.py | 54 +++++++++++++++++- 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 sig-security-tooling/cve-feed/hack/__pycache__/cve_title_parser.cpython-313.pyc diff --git a/sig-security-tooling/cve-feed/hack/__pycache__/cve_title_parser.cpython-313.pyc b/sig-security-tooling/cve-feed/hack/__pycache__/cve_title_parser.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..362ec240435a59deb0bf7b3ecbbc386dcdb0e385 GIT binary patch literal 1346 zcmZ8hQES^)6uz>hB$XSdaT_{^ZoPC(7q@W?OEQKwGThp0-Gr=I$tWbPQKcJOu`GG_ zUbkyT!B2bWOJPv9vcdYgw><8#KS9HV4Kc=GPkReEd*8V?a^mEG?z!jaJLi1*bfy0I zxQ@WSYJKaz8b# zqx%;i`x0%6oV03Uusilw(55w{; zRI3Q|?^%=)3>sspA?5)lUvp~NTqY|ehArR04q-NR+uRNOd0j8kIx}<(CW{Qb-Wcw| zICcHHXw??ob-hY>hx&t(PTL~}p0yv7n(MH+;odp?q(Lab@jdLqbnLl|<6t}HgAcni zIbKizJH=3xabRS7#PZ1r@|eTZ0U^s}B<8ak1kFynOld$3T-lbDhgvuRVg3gkCR&u^ zCoXRQo5z;tI+DvHFR+{X?##M4d{}_7ADlo7StZKAR2-@Z<<3M2N+PXgak~*^SIfmx zW$ktifVG>eU))-$-7BuIm#b@0YKT^()oIXaN2A-W?^vD}r3JoHIszwI=TW2yVcI#3 zibFufS&b$^00&-0P3S3!M5D2^QEKRwGV!%YC8K7c5wu95?Kf?!!3(9p?zD){S;2Jc zd6U>3>hh;6ADaaRF^l69L7oSJ2SW=mDZfpKQ)pOrQ$#f>YfT;sI^TX8O~|ceCvVa7 zz#|J6W3Y75!8975cyZ_Xowrl-ds^@8N8R#YsdIm(v*D%Uo0&Hk!|6&deK#Dt`}aHV z{+xQ1dR_c69ja3YV`yUX#m@7cpWX{|MlbzI_x8aEIzRROwePOI&3?Fd^H;5xS^hm! z`YluHWj^n&{E^Cpsmq~yxesFzNH_wHiVu>G!4f4wTFLDfL@$VJMr6l6Z3RxpBg^z6 zgyL7g3b5=a6-DV^L5jXVf-*DRG&p?q^;-D;!k!UcS`JZmU(G7`hlT$TIN9)j Date: Fri, 6 Feb 2026 19:14:08 -0500 Subject: [PATCH 2/3] tooling: include embedded OSV data from CVE issue body Signed-off-by: pnkcaht --- .../cve-feed/hack/fetch-official-cve-feed.py | 62 +++++++++++++------ 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/sig-security-tooling/cve-feed/hack/fetch-official-cve-feed.py b/sig-security-tooling/cve-feed/hack/fetch-official-cve-feed.py index abcffbe..7d7eb41 100755 --- a/sig-security-tooling/cve-feed/hack/fetch-official-cve-feed.py +++ b/sig-security-tooling/cve-feed/hack/fetch-official-cve-feed.py @@ -18,9 +18,29 @@ import json import requests import sys +import re + from datetime import datetime, timezone from cve_title_parser import parse_cve_title +def extract_osv_from_body(body): + # Extract an embedded OSV JSON object from a CVE issue body. + # New SRC CVE announcements may include the OSV data inline + # as a fenced ```json code block. This helper parses and returns + # that JSON when present. + if not body: + return None + + match = re.search(r"```json\s*(\{.*?\})\s*```", body, re.DOTALL) + if not match: + return None + + try: + return json.loads(match.group(1)) + except json.JSONDecodeError: + return None + + def getCVEStatus(state, state_reason): if state == "open": if state_reason == "reopened": @@ -96,23 +116,18 @@ def getCVEStatus(state, state_reason): first_cve_id = cve_ids[0] cve['id'] = first_cve_id - # Initialize the OSV field as None in case no OSV JSON is available - cve['_kubernetes_io']['osv'] = None + # Try extracting OSV from issue body first (SRC new format) + cve['_kubernetes_io']['osv'] = extract_osv_from_body(item.get('body')) - # Construct the URL to fetch the OSV JSON from the official repository - osv_url = f'https://raw.githubusercontent.com/kubernetes-sigs/cve-feed-osv/main/vulns/{first_cve_id}.json' - - try: - # Attempt to fetch the OSV JSON with a 5-second timeout - res_osv = requests.get(osv_url, timeout=5) - - # If the OSV JSON exists, parse it and store it in the 'osv' field - if res_osv.status_code == 200: - cve['_kubernetes_io']['osv'] = res_osv.json() - - except requests.RequestException: - # If any network error occurs (timeout, connection error, etc.), leave 'osv' as None - cve['_kubernetes_io']['osv'] = None + # Fallback: fetch OSV from cve-feed-osv repository + if cve['_kubernetes_io']['osv'] is None: + osv_url = f'https://raw.githubusercontent.com/kubernetes-sigs/cve-feed-osv/main/vulns/{first_cve_id}.json' + try: + res_osv = requests.get(osv_url, timeout=5) + if res_osv.status_code == 200: + cve['_kubernetes_io']['osv'] = res_osv.json() + except requests.RequestException: + pass cve['external_url'] = f'https://www.cve.org/cverecord?id={first_cve_id}' @@ -132,9 +147,18 @@ def getCVEStatus(state, state_reason): # Set the Google Group URL specific to this CVE additional_cve['_kubernetes_io']['google_group_url'] = f'https://groups.google.com/g/kubernetes-announce/search?q={additional_cve_id}' - # --- Add OSV data for the additional CVE --- - # Initialize the OSV field as None in case no JSON exists - additional_cve['_kubernetes_io']['osv'] = None + additional_cve['_kubernetes_io']['osv'] = extract_osv_from_body(item.get('body')) + + # Fallback: if no embedded OSV was found in the issue body, attempt to fetch + # the OSV JSON from the official cve-feed-osv repository for this CVE. + if additional_cve['_kubernetes_io']['osv'] is None: + additional_osv_url = f'https://raw.githubusercontent.com/kubernetes-sigs/cve-feed-osv/main/vulns/{additional_cve_id}.json' + try: + res_additional_osv = requests.get(additional_osv_url, timeout=5) + if res_additional_osv.status_code == 200: + additional_cve['_kubernetes_io']['osv'] = res_additional_osv.json() + except requests.RequestException: + pass # Construct the URL to fetch the OSV JSON from the official repository additional_osv_url = f'https://raw.githubusercontent.com/kubernetes-sigs/cve-feed-osv/main/vulns/{additional_cve_id}.json' From 2fce6c042df3d45c98c987c8329ad01e1b68ddc7 Mon Sep 17 00:00:00 2001 From: Pushkar Joglekar Date: Fri, 20 Feb 2026 16:10:32 -0800 Subject: [PATCH 3/3] tooling(cve-feed): osv_generator, strip OSV from content_text, support ```json osv fence - Add _kubernetes_io.osv_generator from issue 'generated by' comment or cve-feed-osv repo - Set content_text to body minus OSV block and
OSV format
+comment - Prefer OSV from GitHub issue when present; support '```json osv' fence in addition to '```json' - Refactor shared _find_osv_json_block; remove redundant additional-CVE OSV fetch --- sig-security-tooling/cve-feed/README.md | 20 ++ sig-security-tooling/cve-feed/hack/.gitignore | 219 ++++++++++++++++++ .../cve_title_parser.cpython-313.pyc | Bin 1346 -> 0 bytes .../cve-feed/hack/fetch-official-cve-feed.py | 120 ++++++---- 4 files changed, 313 insertions(+), 46 deletions(-) delete mode 100644 sig-security-tooling/cve-feed/hack/__pycache__/cve_title_parser.cpython-313.pyc diff --git a/sig-security-tooling/cve-feed/README.md b/sig-security-tooling/cve-feed/README.md index 99a90f8..1e51170 100644 --- a/sig-security-tooling/cve-feed/README.md +++ b/sig-security-tooling/cve-feed/README.md @@ -53,6 +53,26 @@ A script in the [kubernetes/sig-security](https://github.com/kubernetes/sig-secu repository under the [sig-security-tooling/cve-feed/hack](https://github.com/kubernetes/sig-security/tree/main/sig-security-tooling/cve-feed/hack) folder is responsible for generating and updating the feed. +#### Local development + +To run the feed scripts locally you need Python 3 and pip3. Install dependencies from the `hack` directory: + +```bash +cd sig-security-tooling/cve-feed/hack +pip3 install -r requirements.txt +``` + +If your system restricts global package installs (e.g. externally managed environment), use a virtual environment: + +```bash +cd sig-security-tooling/cve-feed +python3 -m venv .venv +source .venv/bin/activate # On Windows: .venv\Scripts\activate +pip3 install -r hack/requirements.txt +``` + +Then run the Python script from the `hack` directory: `python3 fetch-official-cve-feed.py`. + This bash script, named `fetch-cve-feed.sh`: - sets up the Python 3 environment; - generates the CVE feed file with `fetch-official-cve-feed.py`; diff --git a/sig-security-tooling/cve-feed/hack/.gitignore b/sig-security-tooling/cve-feed/hack/.gitignore index 13bf3fc..7b3bf67 100644 --- a/sig-security-tooling/cve-feed/hack/.gitignore +++ b/sig-security-tooling/cve-feed/hack/.gitignore @@ -1,3 +1,222 @@ #files generated by cve feed prow job cve-feed-hash official-cve-feed.json + +#python environment + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +# Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +# poetry.lock +# poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +# pdm.lock +# pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +# pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# Redis +*.rdb +*.aof +*.pid + +# RabbitMQ +mnesia/ +rabbitmq/ +rabbitmq-data/ + +# ActiveMQ +activemq-data/ + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +# .idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Streamlit +.streamlit/secrets.toml diff --git a/sig-security-tooling/cve-feed/hack/__pycache__/cve_title_parser.cpython-313.pyc b/sig-security-tooling/cve-feed/hack/__pycache__/cve_title_parser.cpython-313.pyc deleted file mode 100644 index 362ec240435a59deb0bf7b3ecbbc386dcdb0e385..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1346 zcmZ8hQES^)6uz>hB$XSdaT_{^ZoPC(7q@W?OEQKwGThp0-Gr=I$tWbPQKcJOu`GG_ zUbkyT!B2bWOJPv9vcdYgw><8#KS9HV4Kc=GPkReEd*8V?a^mEG?z!jaJLi1*bfy0I zxQ@WSYJKaz8b# zqx%;i`x0%6oV03Uusilw(55w{; zRI3Q|?^%=)3>sspA?5)lUvp~NTqY|ehArR04q-NR+uRNOd0j8kIx}<(CW{Qb-Wcw| zICcHHXw??ob-hY>hx&t(PTL~}p0yv7n(MH+;odp?q(Lab@jdLqbnLl|<6t}HgAcni zIbKizJH=3xabRS7#PZ1r@|eTZ0U^s}B<8ak1kFynOld$3T-lbDhgvuRVg3gkCR&u^ zCoXRQo5z;tI+DvHFR+{X?##M4d{}_7ADlo7StZKAR2-@Z<<3M2N+PXgak~*^SIfmx zW$ktifVG>eU))-$-7BuIm#b@0YKT^()oIXaN2A-W?^vD}r3JoHIszwI=TW2yVcI#3 zibFufS&b$^00&-0P3S3!M5D2^QEKRwGV!%YC8K7c5wu95?Kf?!!3(9p?zD){S;2Jc zd6U>3>hh;6ADaaRF^l69L7oSJ2SW=mDZfpKQ)pOrQ$#f>YfT;sI^TX8O~|ceCvVa7 zz#|J6W3Y75!8975cyZ_Xowrl-ds^@8N8R#YsdIm(v*D%Uo0&Hk!|6&deK#Dt`}aHV z{+xQ1dR_c69ja3YV`yUX#m@7cpWX{|MlbzI_x8aEIzRROwePOI&3?Fd^H;5xS^hm! z`YluHWj^n&{E^Cpsmq~yxesFzNH_wHiVu>G!4f4wTFLDfL@$VJMr6l6Z3RxpBg^z6 zgyL7g3b5=a6-DV^L5jXVf-*DRG&p?q^;-D;!k!UcS`JZmU(G7`hlT$TIN9)j", re.DOTALL) +# Pattern for the
OSV format
block and following generator comment to strip from content_text. +_OSV_DETAILS_BLOCK_RE = re.compile( + r"\s*
\s*OSV format\s*.*?
\s*", + re.DOTALL, +) +# Value for osv_generator when OSV is fetched from cve-feed-osv repo. +OSV_GENERATOR_FROM_FEED_REPO = "OSV from kubernetes-sigs/cve-feed-osv GitHub repository" + + +def _find_osv_json_block(body): + """Find the embedded OSV JSON code block in body. Returns the re.Match or None.""" + if not body: + return None + return _OSV_JSON_BLOCK_RE.search(body) + + def extract_osv_from_body(body): # Extract an embedded OSV JSON object from a CVE issue body. # New SRC CVE announcements may include the OSV data inline # as a fenced ```json code block. This helper parses and returns # that JSON when present. + match = _find_osv_json_block(body) + if not match: + return None + try: + return json.loads(match.group(1).strip()) + except json.JSONDecodeError: + return None + + +def body_without_osv_json(body): + # Remove the embedded OSV JSON code block and the
OSV format
+ # + generator comment from body; return the remaining text for content_text. if not body: return None + text = body + match = _find_osv_json_block(text) + if match: + text = text[: match.start()] + text[match.end() :] + text = _OSV_DETAILS_BLOCK_RE.sub("", text) + return text.strip() or None + - match = re.search(r"```json\s*(\{.*?\})\s*```", body, re.DOTALL) +def extract_osv_generator_from_body(body): + """Extract the 'generated by' HTML comment from issue body (e.g. from srctl).""" + if not body: + return None + match = _OSV_GENERATOR_COMMENT_RE.search(body) if not match: return None + return match.group(1).strip() + +def get_osv_for_cve(body, cve_id): + """Resolve OSV data: try embedded JSON in issue body first, then fetch from cve-feed-osv repo.""" + osv = extract_osv_from_body(body) + if osv is not None: + return osv + osv_url = f'https://raw.githubusercontent.com/kubernetes-sigs/cve-feed-osv/main/vulns/{cve_id}.json' try: - return json.loads(match.group(1)) - except json.JSONDecodeError: - return None + res = requests.get(osv_url, timeout=5) + if res.status_code == 200: + return res.json() + except requests.RequestException as e: + print(f"Error fetching OSV for CVE {cve_id}: {e}", file=sys.stderr) + return None def getCVEStatus(state, state_reason): @@ -100,12 +155,13 @@ def getCVEStatus(state, state_reason): 'google_group_url': None, 'issue_number': None, 'osv': None, + 'osv_generator': None, } cve['_kubernetes_io'] = item_kubernetes_io cve['url'] = item['html_url'] cve['_kubernetes_io']['issue_number'] = item['number'] - cve['content_text'] = item['body'] + cve['content_text'] = body_without_osv_json(item.get('body')) cve['date_published'] = item['created_at'] cve['status'] = getCVEStatus(item['state'], item['state_reason']) @@ -116,19 +172,13 @@ def getCVEStatus(state, state_reason): first_cve_id = cve_ids[0] cve['id'] = first_cve_id - # Try extracting OSV from issue body first (SRC new format) - cve['_kubernetes_io']['osv'] = extract_osv_from_body(item.get('body')) - - # Fallback: fetch OSV from cve-feed-osv repository - if cve['_kubernetes_io']['osv'] is None: - osv_url = f'https://raw.githubusercontent.com/kubernetes-sigs/cve-feed-osv/main/vulns/{first_cve_id}.json' - try: - res_osv = requests.get(osv_url, timeout=5) - if res_osv.status_code == 200: - cve['_kubernetes_io']['osv'] = res_osv.json() - except requests.RequestException: - pass - + # Prefer OSV from GitHub issue when present; fallback to cve-feed-osv repo. + cve['_kubernetes_io']['osv'] = get_osv_for_cve(item.get('body'), first_cve_id) + if cve['_kubernetes_io']['osv'] is not None: + if extract_osv_from_body(item.get('body')) is not None: + cve['_kubernetes_io']['osv_generator'] = extract_osv_generator_from_body(item.get('body')) + else: + cve['_kubernetes_io']['osv_generator'] = OSV_GENERATOR_FROM_FEED_REPO cve['external_url'] = f'https://www.cve.org/cverecord?id={first_cve_id}' cve['_kubernetes_io']['google_group_url'] = f'https://groups.google.com/g/kubernetes-announce/search?q={first_cve_id}' @@ -147,34 +197,12 @@ def getCVEStatus(state, state_reason): # Set the Google Group URL specific to this CVE additional_cve['_kubernetes_io']['google_group_url'] = f'https://groups.google.com/g/kubernetes-announce/search?q={additional_cve_id}' - additional_cve['_kubernetes_io']['osv'] = extract_osv_from_body(item.get('body')) - - # Fallback: if no embedded OSV was found in the issue body, attempt to fetch - # the OSV JSON from the official cve-feed-osv repository for this CVE. - if additional_cve['_kubernetes_io']['osv'] is None: - additional_osv_url = f'https://raw.githubusercontent.com/kubernetes-sigs/cve-feed-osv/main/vulns/{additional_cve_id}.json' - try: - res_additional_osv = requests.get(additional_osv_url, timeout=5) - if res_additional_osv.status_code == 200: - additional_cve['_kubernetes_io']['osv'] = res_additional_osv.json() - except requests.RequestException: - pass - - # Construct the URL to fetch the OSV JSON from the official repository - additional_osv_url = f'https://raw.githubusercontent.com/kubernetes-sigs/cve-feed-osv/main/vulns/{additional_cve_id}.json' - - try: - # Attempt to fetch the OSV JSON with a 5-second timeout - res_additional_osv = requests.get(additional_osv_url, timeout=5) - - # If the file exists, parse it as JSON and store it in the 'osv' field - if res_additional_osv.status_code == 200: - additional_cve['_kubernetes_io']['osv'] = res_additional_osv.json() - - except requests.RequestException: - # If any network error occurs (timeout, connection issue, etc.), keep 'osv' as None - additional_cve['_kubernetes_io']['osv'] = None - + additional_cve['_kubernetes_io']['osv'] = get_osv_for_cve(item.get('body'), additional_cve_id) + if additional_cve['_kubernetes_io']['osv'] is not None: + if extract_osv_from_body(item.get('body')) is not None: + additional_cve['_kubernetes_io']['osv_generator'] = extract_osv_generator_from_body(item.get('body')) + else: + additional_cve['_kubernetes_io']['osv_generator'] = OSV_GENERATOR_FROM_FEED_REPO cve_list.append(additional_cve)