From 408a2b298a18516a506622efd628d94bf75cb0ec Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Sun, 21 Jun 2026 03:59:29 +0300 Subject: [PATCH 01/21] Rename ExploitIQ/Agent morpheus to Exploit Intelligence --- .gitignore | 2 + .tekton/on-cm-runner.yaml | 2 +- .tekton/on-pull-request.yaml | 2 +- .tekton/on-push.yaml | 2 +- .tekton/on-tag.yaml | 2 +- README.md | 2 +- kustomize/README.md | 38 +++++++++---------- kustomize/base/argilla/argilla-service.yaml | 2 +- .../argilla/argilla-user-feedback-pvc.yaml | 2 +- kustomize/base/argilla/deployment.yaml | 8 ++-- kustomize/base/argilla/service.yaml | 6 +-- kustomize/base/exploit_iq_client.yaml | 10 ++--- kustomize/base/exploit_iq_service.yaml | 4 +- kustomize/base/kustomization.yaml | 4 +- kustomize/network-policy.yaml | 6 +-- .../batch-processing/kustomization.yaml | 4 +- kustomize/overlays/tests/kustomization.yaml | 2 +- .../utils/chain_of_calls_retriever.py | 2 +- .../utils/chain_of_calls_retriever_base.py | 2 +- src/exploit_iq_commons/utils/dep_tree.py | 2 +- .../java_functions_parsers.py | 2 +- .../utils/java_chain_of_calls_retriever.py | 2 +- .../utils/transitive_code_searcher_tool.py | 2 +- src/vuln_analysis/register.py | 12 +++--- .../utils/function_name_extractor.py | 2 +- .../utils/function_name_locator.py | 2 +- src/vuln_analysis/utils/llm_engine_utils.py | 4 +- .../vex/implementations/csaf_generator.py | 8 ++-- 28 files changed, 70 insertions(+), 68 deletions(-) diff --git a/.gitignore b/.gitignore index 3ef4a4930..92f47346b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ ###### Place new entries directly below this line! ###### +CLAUDE.md + # Ignore anything in the ./.tmp directory .tmp/ diff --git a/.tekton/on-cm-runner.yaml b/.tekton/on-cm-runner.yaml index 17c42105d..fc4003022 100644 --- a/.tekton/on-cm-runner.yaml +++ b/.tekton/on-cm-runner.yaml @@ -26,7 +26,7 @@ spec: value: "{{ trigger_comment }}" # Point to the image ALREADY built by the PR pipeline - name: target-image - value: quay.io/ecosystem-appeng/agent-morpheus-rh:on-pr-{{revision}} + value: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-service:on-pr-{{revision}} pipelineSpec: params: diff --git a/.tekton/on-pull-request.yaml b/.tekton/on-pull-request.yaml index 34c219807..8e1823663 100644 --- a/.tekton/on-pull-request.yaml +++ b/.tekton/on-pull-request.yaml @@ -31,7 +31,7 @@ spec: - name: image-expires-after value: 5d - name: output-image - value: quay.io/ecosystem-appeng/agent-morpheus-rh:on-pr-{{revision}} + value: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-service:on-pr-{{revision}} - name: path-context value: . - name: dockerfile diff --git a/.tekton/on-push.yaml b/.tekton/on-push.yaml index 5da2cc106..93e96a5d3 100644 --- a/.tekton/on-push.yaml +++ b/.tekton/on-push.yaml @@ -26,7 +26,7 @@ spec: - name: revision value: "{{ revision }}" - name: output-image - value: quay.io/ecosystem-appeng/agent-morpheus-rh:latest + value: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-service:latest - name: path-context value: . - name: dockerfile diff --git a/.tekton/on-tag.yaml b/.tekton/on-tag.yaml index 08718fd32..cb479c761 100644 --- a/.tekton/on-tag.yaml +++ b/.tekton/on-tag.yaml @@ -26,7 +26,7 @@ spec: - name: revision value: "{{ revision }}" - name: output-image - value: 'quay.io/ecosystem-appeng/agent-morpheus-rh' + value: 'quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-service' - name: tag-name value: "{{ target_branch }}" - name: path-context diff --git a/README.md b/README.md index f4fab8a9f..692d72f8f 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,7 @@ The detailed architecture consists of the following components: - **Vector database**: Various vector databases can be used for the embedding. We currently utilize FAISS for the VDB because it does not require an external service and is simple to use. Any vector store can be used, such as NVIDIA cuVS, which would provide accelerated indexing and search. - **Lexical search**: As an alternative, a lexical search is available for use cases where creating an embedding is impractical due to a large number of source files in the target container. - **Software Bill of Materials (SBOM)**: A Software Bill of Materials (SBOM) is a machine-readable manifest of all the dependencies of a software package or container. The blueprint cross-references every entry in the SBOM for known vulnerabilities and looks at the code implementation to see whether the implementation puts users at risk—just as a security analyst would do. For this reason, starting with an accurate SBOM is an important first step. SBOMs can be generated for any container using the open-source tool [Syft](https://github.com/anchore/syft). For more information on generating SBOMs for your containers, see the [SBOM documentation](./src/vuln_analysis/data/sboms/README.md). - - **Web vulnerability intel**: The system collects detailed information about each CVE through web scraping and data retrieval from various public security databases, including GHSA, Redhat, Ubuntu, and NIST CVE records, as well as tailored threat intelligence feeds. + - **Web vulnerability intel**: The system collects detailed information about each CVE through web scraping and data retrieval from various public security databases, including GHSA, Redhat, Ubuntu, and NIST CVE records, as well as RHTPA exploit intelligence feeds. - **Core LLM engine**: The below actions comprise the core LLM engine and are each implemented as NeMo Agent toolkit functions within the workflow. - **Checklist generation**: Leveraging the gathered information about each vulnerability, the checklist generation node creates a tailored, context-sensitive task checklist designed to guide the impact analysis. (See [`src/vuln_analysis/functions/cve_checklist.py`](./src/vuln_analysis/functions/cve_checklist.py).) diff --git a/kustomize/README.md b/kustomize/README.md index 1f9be713b..490128731 100644 --- a/kustomize/README.md +++ b/kustomize/README.md @@ -19,7 +19,7 @@ limitations under the License. ## Install and Run Locally -One can run ExploitIQ on his local machine ( No GPU dependency is required!), for the purpose of testing, debugging and troubleshooting problems: +One can run the RHTPA exploit intelligence workflow on his local machine ( No GPU dependency is required!), for the purpose of testing, debugging and troubleshooting problems: 1. Install the lightweight [uv package manager](https://docs.astral.sh/uv/getting-started/installation). 2. Ensure Python 3.12 is installed for your operating system. @@ -98,7 +98,7 @@ export USE_CONTAINER_SOURCES=true ## Deploy And Run On OCP -1. Create a `base/secrets.env` file containing the API keys for external services `ExploitIQ` might use. Not all keys are mandatory. Refer to the main [README](../README.md#obtain-api-keys) for details on how to create the Red Hat credentials and other API keys. +1. Create a `base/secrets.env` file containing the API keys for external services the exploit intelligence workflow might use. Not all keys are mandatory. Refer to the main [README](../README.md#obtain-api-keys) for details on how to create the Red Hat credentials and other API keys. ```shell cat > base/secrets.env << EOF @@ -128,7 +128,7 @@ argilla_api_key=your_argilla_api_key EOF ``` -4. Create an image pull secret to authorize pulling the `ExploitIQ` and `Argilla` container images: +4. Create an image pull secret to authorize pulling the `exploit-intelligence` and `Argilla` container images: ```shell oc create secret generic exploit-iq-pull-secret --from-file=.dockerconfigjson= --type=kubernetes.io/dockerconfigjson @@ -152,7 +152,7 @@ EOF >[!IMPORTANT] >This secret is essential for product scanning to authenticate and pull component images. If you skip this step, kustomize will still deploy, but authenticated pulls will not work until you provide real credentials. -6. Create the `oauth-secret.env` file containing the `client-secret` and `openshift-domain` values required by the [ExploitIQ Client](./base/exploit_iq_client.yaml) configuration. +6. Create the `oauth-secret.env` file containing the `client-secret` and `openshift-domain` values required by the [exploit-intelligence-client](./base/exploit_iq_client.yaml) configuration. If openshift resource of kind `OAuthClient` named `exploit-iq-client` exists, just get the secret from there: ```shell @@ -187,7 +187,7 @@ exploit-iq-password=$(openssl rand -base64 24 | tr -d '/+=' | head -c 32) EOF ``` -8. Update `ExploitIQ` configuration file with the correct callback URL for the client service. +8. Update exploit intelligence configuration file with the correct callback URL for the client service. ```shell export CALLBACK_URL="https://exploit-iq-client.$(oc project -q).svc:8443" @@ -196,7 +196,7 @@ find . -type f -name 'exploit-iq-config.yml' -exec sed -i "s|CALLBACK_URL_PLACEH ### Configuring Git SSL Certificate Authority for Custom CAs -If your Git server uses a certificate that is signed by a custom Certificate Authority (CA), you must provide the CA certificate bundle to enable ExploitIQ to verify the Git server identity. +If your Git server uses a certificate that is signed by a custom Certificate Authority (CA), you must provide the CA certificate bundle to enable the exploit intelligence workflow to verify the Git server identity. > [!IMPORTANT] > If you need to access Red Hat internal Git repositories such as `gitlab.cee.redhat.com`, you must complete this procedure. @@ -245,15 +245,15 @@ openssl crl2pkcs7 -nocrl -certfile kustomize/base/ca-certs/ca-bundle.crt | \ >[!IMPORTANT] You should only run one of the steps 9,10 or 11, depending on if you want to run the service with a self hosted LLM, self hosted LLM with MLOps or Nvidia remote NIM. -9. To deploy `ExploitIQ` with a self-hosted LLM , run: +9. To deploy the exploit intelligence service with a self-hosted LLM , run: ```shell -# Deploy ExploitIQ with self hosted llama3.1-70b-4bit LLM +# Deploy exploit intelligence with self hosted llama3.1-70b-4bit LLM oc kustomize overlays/self-hosted-llama3.1-70b-4bit | oc apply -f - -n $YOUR_NAMESPACE_NAME ``` -10. To deploy `ExploitIQ` with a self-hosted LLM and MLOps, run: +10. To deploy the exploit intelligence service with a self-hosted LLM and MLOps, run: ```shell # Patch overlay kustomization yaml with deployment namespace value (Grafana and Tempo) @@ -262,12 +262,12 @@ sed -i "s/REPLACE_NAMESPACE/$YOUR_NAMESPACE_NAME/" overlays/mlops/tempo/kustomiz ``` ```shell -# replace EXPLOIT_IQ_GRAFANA_SA_TOKEN with ExploitIQ Grafana SA Token from bitwarden vault (1 year expiration date) +# replace EXPLOIT_IQ_GRAFANA_SA_TOKEN with exploit intelligence Grafana SA Token from bitwarden vault (1 year expiration date) oc create secret generic grafana-bearer-token --from-literal=token='EXPLOIT_IQ_GRAFANA_SA_TOKEN' ``` ```shell -# Deploy ExploitIQ with self hosted llama3.1-70b-4bit LLM and MLOps +# Deploy exploit intelligence with self hosted llama3.1-70b-4bit LLM and MLOps oc kustomize overlays/mlops | oc apply -f - -n $YOUR_NAMESPACE_NAME ``` @@ -299,9 +299,9 @@ oc kustomize overlays/mlops \ ``` -10. Alternatively, to deploy `ExploitIQ` with a fully remote nim LLM, run: +10. Alternatively, to deploy the exploit intelligence service with a fully remote nim LLM, run: ```shell -# Deploy ExploitIQ with remote nim llama-3.1-70b-16bit LLM +# Deploy exploit intelligence with remote nim llama-3.1-70b-16bit LLM oc kustomize overlays/remote-nim-all | oc apply -f - -n $YOUR_NAMESPACE_NAME ``` >[!WARNING] @@ -335,7 +335,7 @@ openshift-domain=$(oc get dns cluster -o jsonpath='{.spec.baseDomain}') EOF ``` -12. **(Optional) Enable OAuth for the ExploitIQ MCP Server.** If you want MCP clients (Claude Code, Cursor, etc.) to authenticate via OpenShift OAuth, create an `OAuthClient` CR for the MCP server: +12. **(Optional) Enable OAuth for the exploit intelligence MCP Server.** If you want MCP clients (Claude Code, Cursor, etc.) to authenticate via OpenShift OAuth, create an `OAuthClient` CR for the MCP server: ```bash oc create -f - < str: return f"{function_file_name};{function_name_to_search}" diff --git a/src/exploit_iq_commons/utils/dep_tree.py b/src/exploit_iq_commons/utils/dep_tree.py index 09b91789e..acb65c133 100644 --- a/src/exploit_iq_commons/utils/dep_tree.py +++ b/src/exploit_iq_commons/utils/dep_tree.py @@ -58,7 +58,7 @@ logger = LoggingFactory.get_agent_logger(__name__) -ROOT_LEVEL_SENTINEL = 'root-top-level-agent-morpheus' +ROOT_LEVEL_SENTINEL = 'root-top-level-exploit-intelligence' TRANSITIVE_ENV_NAME = 'transitive_env' diff --git a/src/exploit_iq_commons/utils/functions_parsers/java_functions_parsers.py b/src/exploit_iq_commons/utils/functions_parsers/java_functions_parsers.py index 1a442c634..1323f49c1 100644 --- a/src/exploit_iq_commons/utils/functions_parsers/java_functions_parsers.py +++ b/src/exploit_iq_commons/utils/functions_parsers/java_functions_parsers.py @@ -28,7 +28,7 @@ strip_java_generics, JAVA_ANNOTATION_SYMBOL, extract_fqcn from exploit_iq_commons.logging.loggers_factory import LoggingFactory -logger = LoggingFactory.get_agent_logger(f"morpheus.{__name__}") +logger = LoggingFactory.get_agent_logger(f"exploit-intelligence.{__name__}") PARAMETER = "parameter" diff --git a/src/exploit_iq_commons/utils/java_chain_of_calls_retriever.py b/src/exploit_iq_commons/utils/java_chain_of_calls_retriever.py index e624fa99b..146c022c9 100644 --- a/src/exploit_iq_commons/utils/java_chain_of_calls_retriever.py +++ b/src/exploit_iq_commons/utils/java_chain_of_calls_retriever.py @@ -36,7 +36,7 @@ create_inheritance_map, get_target_class_names, dummy_package_name from exploit_iq_commons.data_models.input import SourceDocumentsInfo -logger = LoggingFactory.get_agent_logger(f"morpheus.{__name__}") +logger = LoggingFactory.get_agent_logger(f"exploit-intelligence.{__name__}") # Lowercase package segments; class segments start with uppercase; allow dots or $ for inners _FQCN_STRICT_RE = re.compile( diff --git a/src/exploit_iq_commons/utils/transitive_code_searcher_tool.py b/src/exploit_iq_commons/utils/transitive_code_searcher_tool.py index 12c3455a3..09956c637 100644 --- a/src/exploit_iq_commons/utils/transitive_code_searcher_tool.py +++ b/src/exploit_iq_commons/utils/transitive_code_searcher_tool.py @@ -25,7 +25,7 @@ from exploit_iq_commons.logging.loggers_factory import LoggingFactory, MULTI_LINE_MESSAGE_TRUE -logger = LoggingFactory.get_agent_logger(f"morpheus.{__name__}") +logger = LoggingFactory.get_agent_logger(f"exploit-intelligence.{__name__}") class TransitiveCodeSearcher: diff --git a/src/vuln_analysis/register.py b/src/vuln_analysis/register.py index 03ca69448..008279031 100644 --- a/src/vuln_analysis/register.py +++ b/src/vuln_analysis/register.py @@ -502,7 +502,7 @@ async def call_llm_engine_subgraph_node(message: AgentMorpheusEngineInput): graph = graph_builder.compile() #graph.get_graph().draw_mermaid_png(output_file_path="checker_flow.png") - def convert_str_to_agent_morpheus_input(input: str) -> AgentMorpheusInput: + def convert_str_to_exploit_intelligence_input(input: str) -> AgentMorpheusInput: logger.debug("Converting JSON string input to AgentMorpheusInput (length: %d)", len(input)) try: return AgentMorpheusInput.model_validate_json(input) @@ -510,7 +510,7 @@ def convert_str_to_agent_morpheus_input(input: str) -> AgentMorpheusInput: logger.error("Failed to convert input to AgentMorpheusInput: %s. Your input needs to be a json string.", e) raise e - def convert_textio_to_agent_morpheus_input(input: TextIOWrapper) -> AgentMorpheusInput: + def convert_textio_to_exploit_intelligence_input(input: TextIOWrapper) -> AgentMorpheusInput: logger.debug("Converting TextIOWrapper input to AgentMorpheusInput") try: data = input.read() @@ -520,7 +520,7 @@ def convert_textio_to_agent_morpheus_input(input: TextIOWrapper) -> AgentMorpheu "Failed to convert input to AgentMorpheusInput: %s. Your input needs to be a TextIOWrapper object.", e) raise e - def convert_agent_morpheus_output_to_str(output: AgentMorpheusOutput) -> str: + def convert_exploit_intelligence_output_to_str(output: AgentMorpheusOutput) -> str: logger.debug("Converting AgentMorpheusOutput to JSON string") try: return output.model_dump_json() @@ -539,9 +539,9 @@ async def _response_fn(input_message: AgentMorpheusInput) -> AgentMorpheusOutput description=config.description, input_schema=AgentMorpheusInput, converters=[ - convert_str_to_agent_morpheus_input, - convert_textio_to_agent_morpheus_input, - convert_agent_morpheus_output_to_str + convert_str_to_exploit_intelligence_input, + convert_textio_to_exploit_intelligence_input, + convert_exploit_intelligence_output_to_str ]) except GeneratorExit: logger.info("Workflow exited early!") diff --git a/src/vuln_analysis/utils/function_name_extractor.py b/src/vuln_analysis/utils/function_name_extractor.py index 6d789561a..45b4107be 100644 --- a/src/vuln_analysis/utils/function_name_extractor.py +++ b/src/vuln_analysis/utils/function_name_extractor.py @@ -20,7 +20,7 @@ from exploit_iq_commons.logging.loggers_factory import LoggingFactory -logger = LoggingFactory.get_agent_logger(f"morpheus.{__name__}") +logger = LoggingFactory.get_agent_logger(f"exploit-intelligence.{__name__}") def traverse_all_parameters(function_ending_index_end, function_prefix_index_end, function_string): diff --git a/src/vuln_analysis/utils/function_name_locator.py b/src/vuln_analysis/utils/function_name_locator.py index a7f43260e..60e776579 100644 --- a/src/vuln_analysis/utils/function_name_locator.py +++ b/src/vuln_analysis/utils/function_name_locator.py @@ -25,7 +25,7 @@ from exploit_iq_commons.utils.source_rpm_downloader import RPMDependencyManager from vuln_analysis.utils.prompt_factory import FL_EXAMPLES -logger = LoggingFactory.get_agent_logger(f"morpheus.{__name__}") +logger = LoggingFactory.get_agent_logger(f"exploit-intelligence.{__name__}") class FunctionNameLocator: diff --git a/src/vuln_analysis/utils/llm_engine_utils.py b/src/vuln_analysis/utils/llm_engine_utils.py index 727830212..8b2268cc7 100644 --- a/src/vuln_analysis/utils/llm_engine_utils.py +++ b/src/vuln_analysis/utils/llm_engine_utils.py @@ -93,7 +93,7 @@ def preprocess_engine_input(message: AgentMorpheusEngineInput) -> AgentMorpheusE original_input=message) -def parse_agent_morpheus_engine_output(vuln_id: str, +def parse_exploit_intelligence_engine_output(vuln_id: str, checklist_results: list[dict[str, typing.Any]], summary: str, justification: dict[str, str], @@ -244,7 +244,7 @@ def postprocess_engine_output(message: AgentMorpheusEngineInput, for vuln_id in input_vuln_ids: if vuln_id in output_vuln_ids: output.append( - parse_agent_morpheus_engine_output(vuln_id=vuln_id, + parse_exploit_intelligence_engine_output(vuln_id=vuln_id, checklist_results=result.checklist_results[vuln_id], summary=result.final_summaries[vuln_id], justification=result.justifications[vuln_id], diff --git a/src/vuln_analysis/utils/vex/implementations/csaf_generator.py b/src/vuln_analysis/utils/vex/implementations/csaf_generator.py index 605c37192..3d556e618 100644 --- a/src/vuln_analysis/utils/vex/implementations/csaf_generator.py +++ b/src/vuln_analysis/utils/vex/implementations/csaf_generator.py @@ -47,9 +47,9 @@ NOTE_TITLE_VULNERABILITY_DESCRIPTION = "Vulnerability description" NOTE_TITLE_VULNERABILITY_SUMMARY = "Vulnerability summary" NOTE_TITLE_RHSA_STATEMENT = "Red Hat Security Advisory Statement" -NOTE_TITLE_EXPLOITIQ_SUMMARY = "ExploitIQ Analysis Summary" -NOTE_TITLE_EXPLOITIQ_JUSTIFICATION_REASONING = "ExploitIQ Analysis Justification Reasoning" -NOTE_TITLE_EXPLOITIQ_JUSTIFICATION_LABEL = "ExploitIQ Analysis Justification Label" +NOTE_TITLE_EXPLOITIQ_SUMMARY = "RHTPA exploit intelligence Analysis Summary" +NOTE_TITLE_EXPLOITIQ_JUSTIFICATION_REASONING = "RHTPA exploit intelligence Analysis Justification Reasoning" +NOTE_TITLE_EXPLOITIQ_JUSTIFICATION_LABEL = "RHTPA exploit intelligence Analysis Justification Label" NOTE_TITLE_UNOFFICIAL_CONTENT = "Unofficial Content Notice" # Disclaimer text @@ -176,7 +176,7 @@ def generate(self, state: AgentMorpheusEngineState) -> Dict[str, Any]: product_name = message.input.image.name product_tag = message.input.image.tag - csaf_gen.set_header_title(f"ExploitIQ VEX Document - {product_name}{"@" if OCI_DIGEST_RE.fullmatch(product_tag) else ":"}{product_tag}") + csaf_gen.set_header_title(f"RHTPA exploit intelligence VEX Document - {product_name}{"@" if OCI_DIGEST_RE.fullmatch(product_tag) else ":"}{product_tag}") csaf_gen.set_value("notes",[ { From ee80d0093c407eae20f7576253d50fe8c4be346d Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Sun, 21 Jun 2026 04:07:51 +0300 Subject: [PATCH 02/21] Rename ExploitIQ/Agent morpheus to Exploit Intelligence --- README.md | 2 +- kustomize/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 692d72f8f..f4fab8a9f 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,7 @@ The detailed architecture consists of the following components: - **Vector database**: Various vector databases can be used for the embedding. We currently utilize FAISS for the VDB because it does not require an external service and is simple to use. Any vector store can be used, such as NVIDIA cuVS, which would provide accelerated indexing and search. - **Lexical search**: As an alternative, a lexical search is available for use cases where creating an embedding is impractical due to a large number of source files in the target container. - **Software Bill of Materials (SBOM)**: A Software Bill of Materials (SBOM) is a machine-readable manifest of all the dependencies of a software package or container. The blueprint cross-references every entry in the SBOM for known vulnerabilities and looks at the code implementation to see whether the implementation puts users at risk—just as a security analyst would do. For this reason, starting with an accurate SBOM is an important first step. SBOMs can be generated for any container using the open-source tool [Syft](https://github.com/anchore/syft). For more information on generating SBOMs for your containers, see the [SBOM documentation](./src/vuln_analysis/data/sboms/README.md). - - **Web vulnerability intel**: The system collects detailed information about each CVE through web scraping and data retrieval from various public security databases, including GHSA, Redhat, Ubuntu, and NIST CVE records, as well as RHTPA exploit intelligence feeds. + - **Web vulnerability intel**: The system collects detailed information about each CVE through web scraping and data retrieval from various public security databases, including GHSA, Redhat, Ubuntu, and NIST CVE records, as well as tailored threat intelligence feeds. - **Core LLM engine**: The below actions comprise the core LLM engine and are each implemented as NeMo Agent toolkit functions within the workflow. - **Checklist generation**: Leveraging the gathered information about each vulnerability, the checklist generation node creates a tailored, context-sensitive task checklist designed to guide the impact analysis. (See [`src/vuln_analysis/functions/cve_checklist.py`](./src/vuln_analysis/functions/cve_checklist.py).) diff --git a/kustomize/README.md b/kustomize/README.md index 490128731..b63230d94 100644 --- a/kustomize/README.md +++ b/kustomize/README.md @@ -98,7 +98,7 @@ export USE_CONTAINER_SOURCES=true ## Deploy And Run On OCP -1. Create a `base/secrets.env` file containing the API keys for external services the exploit intelligence workflow might use. Not all keys are mandatory. Refer to the main [README](../README.md#obtain-api-keys) for details on how to create the Red Hat credentials and other API keys. +1. Create a `base/secrets.env` file containing the API keys for external services `RHTPA exploit intelligence` might use. Not all keys are mandatory. Refer to the main [README](../README.md#obtain-api-keys) for details on how to create the Red Hat credentials and other API keys. ```shell cat > base/secrets.env << EOF From b62a4bdda980745902912cfbe95b862f1da45869 Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Mon, 22 Jun 2026 09:49:16 +0300 Subject: [PATCH 03/21] Align tests according to the last changes --- .../utils/vex/implementations/csaf_generator.py | 2 +- .../utils/vex/tests/test_csaf_generator_integration.py | 4 ++-- tests/test_vex_csaf_helpers.py | 9 +++++---- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/vuln_analysis/utils/vex/implementations/csaf_generator.py b/src/vuln_analysis/utils/vex/implementations/csaf_generator.py index 3d556e618..cd2cf42b2 100644 --- a/src/vuln_analysis/utils/vex/implementations/csaf_generator.py +++ b/src/vuln_analysis/utils/vex/implementations/csaf_generator.py @@ -138,7 +138,7 @@ def _enrich_vulnerabilities_with_notes( "title": NOTE_TITLE_RHSA_STATEMENT }) - # Add ExploitIQ analysis summary + # Add RHTPA exploit intelligence Analysis Summary summary = final_summaries.get(vuln_id) notes.append({ "category": NOTE_CATEGORY_OTHER, diff --git a/src/vuln_analysis/utils/vex/tests/test_csaf_generator_integration.py b/src/vuln_analysis/utils/vex/tests/test_csaf_generator_integration.py index 5bca102d3..53f0dfe37 100644 --- a/src/vuln_analysis/utils/vex/tests/test_csaf_generator_integration.py +++ b/src/vuln_analysis/utils/vex/tests/test_csaf_generator_integration.py @@ -119,7 +119,7 @@ def test_document_has_correct_title(self, mock_state): result = generator.generate(mock_state) title = result["document"].get("title") - assert "ExploitIQ VEX Document - " + _DEFAULT_PRODUCT_NAME + ":" + _DEFAULT_PRODUCT_TAG in title + assert "RHTPA exploit intelligence VEX Document - " + _DEFAULT_PRODUCT_NAME + ":" + _DEFAULT_PRODUCT_TAG in title def test_oci_digest_tag_uses_at_separator(self): """Test that OCI digest tags use @ separator instead of : in title.""" @@ -132,7 +132,7 @@ def test_oci_digest_tag_uses_at_separator(self): result = generator.generate(state) title = result["document"].get("title") - assert "ExploitIQ VEX Document - " + _DEFAULT_PRODUCT_NAME + "@" + oci_digest in title + assert "RHTPA exploit intelligence VEX Document - " + _DEFAULT_PRODUCT_NAME + "@" + oci_digest in title def test_document_has_disclaimer_note(self, mock_state): """Test that document includes the disclaimer note.""" diff --git a/tests/test_vex_csaf_helpers.py b/tests/test_vex_csaf_helpers.py index 687e52473..84a2ade9a 100644 --- a/tests/test_vex_csaf_helpers.py +++ b/tests/test_vex_csaf_helpers.py @@ -21,7 +21,8 @@ from exploit_iq_commons.data_models.cve_intel import CveIntel, CveIntelGhsa, CveIntelRhsa from vuln_analysis.utils.vex.implementations.csaf_generator import ( - _enrich_vulnerabilities_with_notes, + _enrich_vulnerabilities_with_notes, NOTE_TITLE_EXPLOITIQ_JUSTIFICATION_REASONING, NOTE_TITLE_EXPLOITIQ_SUMMARY, + NOTE_TITLE_EXPLOITIQ_JUSTIFICATION_LABEL, ) @@ -94,7 +95,7 @@ def test_adds_analysis_summary_note(self, base_csaf_json, base_intel_map, base_j _enrich_vulnerabilities_with_notes(base_csaf_json, base_intel_map, final_summaries, base_justifications) notes = base_csaf_json["vulnerabilities"][0]["notes"] - analysis_notes = [n for n in notes if n.get("title") == "ExploitIQ Analysis Summary"] + analysis_notes = [n for n in notes if n.get("title") == NOTE_TITLE_EXPLOITIQ_SUMMARY] assert len(analysis_notes) == 1 assert analysis_notes[0]["text"] == "This is the analysis summary" assert analysis_notes[0]["category"] == "other" @@ -112,11 +113,11 @@ def test_adds_justification_notes(self, base_csaf_json, base_intel_map, base_fin notes = base_csaf_json["vulnerabilities"][0]["notes"] - reasoning_notes = [n for n in notes if n.get("title") == "ExploitIQ Analysis Justification Reasoning"] + reasoning_notes = [n for n in notes if n.get("title") == NOTE_TITLE_EXPLOITIQ_JUSTIFICATION_REASONING] assert len(reasoning_notes) == 1 assert reasoning_notes[0]["text"] == "The vulnerable code path is reachable" - label_notes = [n for n in notes if n.get("title") == "ExploitIQ Analysis Justification Label"] + label_notes = [n for n in notes if n.get("title") == NOTE_TITLE_EXPLOITIQ_JUSTIFICATION_LABEL] assert len(label_notes) == 1 assert label_notes[0]["text"] == "vulnerable" From 49e0d184519792865c0c29115a681002992e8111 Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Tue, 23 Jun 2026 21:27:39 +0300 Subject: [PATCH 04/21] Update agent image name --- .tekton/on-cm-runner.yaml | 2 +- .tekton/on-pull-request.yaml | 2 +- .tekton/on-push.yaml | 2 +- .tekton/on-tag.yaml | 2 +- kustomize/base/exploit_iq_service.yaml | 4 ++-- kustomize/base/kustomization.yaml | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.tekton/on-cm-runner.yaml b/.tekton/on-cm-runner.yaml index fc4003022..e4265c5b6 100644 --- a/.tekton/on-cm-runner.yaml +++ b/.tekton/on-cm-runner.yaml @@ -26,7 +26,7 @@ spec: value: "{{ trigger_comment }}" # Point to the image ALREADY built by the PR pipeline - name: target-image - value: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-service:on-pr-{{revision}} + value: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-agent:on-pr-{{revision}} pipelineSpec: params: diff --git a/.tekton/on-pull-request.yaml b/.tekton/on-pull-request.yaml index 8e1823663..2769cf6fc 100644 --- a/.tekton/on-pull-request.yaml +++ b/.tekton/on-pull-request.yaml @@ -31,7 +31,7 @@ spec: - name: image-expires-after value: 5d - name: output-image - value: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-service:on-pr-{{revision}} + value: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-agent:on-pr-{{revision}} - name: path-context value: . - name: dockerfile diff --git a/.tekton/on-push.yaml b/.tekton/on-push.yaml index 93e96a5d3..09887328c 100644 --- a/.tekton/on-push.yaml +++ b/.tekton/on-push.yaml @@ -26,7 +26,7 @@ spec: - name: revision value: "{{ revision }}" - name: output-image - value: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-service:latest + value: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-agent:latest - name: path-context value: . - name: dockerfile diff --git a/.tekton/on-tag.yaml b/.tekton/on-tag.yaml index cb479c761..af5bc73a4 100644 --- a/.tekton/on-tag.yaml +++ b/.tekton/on-tag.yaml @@ -26,7 +26,7 @@ spec: - name: revision value: "{{ revision }}" - name: output-image - value: 'quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-service' + value: 'quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-agent' - name: tag-name value: "{{ target_branch }}" - name: path-context diff --git a/kustomize/base/exploit_iq_service.yaml b/kustomize/base/exploit_iq_service.yaml index 3344ca69e..7e239e208 100644 --- a/kustomize/base/exploit_iq_service.yaml +++ b/kustomize/base/exploit_iq_service.yaml @@ -25,7 +25,7 @@ spec: serviceAccountName: exploit-iq-sa containers: - name: exploit-iq-phoenix-tracing - image: quay.io/ecosystem-appeng/agent-exploit-intelligence-rh:nat + image: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-agent:nat imagePullPolicy: Always workingDir: /workspace/ args: @@ -45,7 +45,7 @@ spec: memory: "1Gi" cpu: "100m" - name: exploit-iq - image: quay.io/ecosystem-appeng/agent-exploit-intelligence-rh:nat + image: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-agent:nat imagePullPolicy: Always workingDir: /workspace/ args: diff --git a/kustomize/base/kustomization.yaml b/kustomize/base/kustomization.yaml index f6fdf8efd..211fa85a1 100644 --- a/kustomize/base/kustomization.yaml +++ b/kustomize/base/kustomization.yaml @@ -94,10 +94,10 @@ patches: kind: Deployment images: - - name: quay.io/ecosystem-appeng/agent-exploit-intelligence-rh + - name: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-agent newTag: latest - - name: quay.io/ecosystem-appeng/agent-exploit-intelligence-client + - name: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-agent newTag: latest - name: quay.io/exploit-iq/exploitiq-mcp-server From 1abea07be6938064d29c3b2f040890188bfafe99 Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Wed, 24 Jun 2026 00:38:43 +0300 Subject: [PATCH 05/21] Update agent image name --- kustomize/base/exploit_iq_client.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kustomize/base/exploit_iq_client.yaml b/kustomize/base/exploit_iq_client.yaml index 69f0d8539..b4cd58564 100644 --- a/kustomize/base/exploit_iq_client.yaml +++ b/kustomize/base/exploit_iq_client.yaml @@ -27,7 +27,7 @@ spec: - ./application - -Dquarkus.http.host=0.0.0.0 - -Dquarkus.log.category."com.redhat.ecosystemappeng.exploitintelligence".level=DEBUG - image: quay.io/ecosystem-appeng/agent-exploit-intelligence-client:latest + image: quay.io/ecosystem-appeng/exploit-intelligence-client:latest imagePullPolicy: Always ports: - name: http From 2e55b5fdd4ffe5cd0133220111af6325f9981066 Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Wed, 24 Jun 2026 01:54:24 +0300 Subject: [PATCH 06/21] Revert agent image name change --- .tekton/on-pull-request.yaml | 2 +- .tekton/on-push.yaml | 2 +- .tekton/on-tag.yaml | 2 +- kustomize/base/exploit_iq_service.yaml | 4 ++-- kustomize/base/kustomization.yaml | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.tekton/on-pull-request.yaml b/.tekton/on-pull-request.yaml index 276466631..e72258311 100644 --- a/.tekton/on-pull-request.yaml +++ b/.tekton/on-pull-request.yaml @@ -33,7 +33,7 @@ spec: - name: image-expires-after value: 5d - name: output-image - value: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-agent:on-pr-{{revision}} + value: quay.io/ecosystem-appeng/agent-morpheus-rh:on-pr-{{revision}} - name: path-context value: . - name: dockerfile diff --git a/.tekton/on-push.yaml b/.tekton/on-push.yaml index 09887328c..5da2cc106 100644 --- a/.tekton/on-push.yaml +++ b/.tekton/on-push.yaml @@ -26,7 +26,7 @@ spec: - name: revision value: "{{ revision }}" - name: output-image - value: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-agent:latest + value: quay.io/ecosystem-appeng/agent-morpheus-rh:latest - name: path-context value: . - name: dockerfile diff --git a/.tekton/on-tag.yaml b/.tekton/on-tag.yaml index af5bc73a4..08718fd32 100644 --- a/.tekton/on-tag.yaml +++ b/.tekton/on-tag.yaml @@ -26,7 +26,7 @@ spec: - name: revision value: "{{ revision }}" - name: output-image - value: 'quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-agent' + value: 'quay.io/ecosystem-appeng/agent-morpheus-rh' - name: tag-name value: "{{ target_branch }}" - name: path-context diff --git a/kustomize/base/exploit_iq_service.yaml b/kustomize/base/exploit_iq_service.yaml index a7514e654..2f99c7411 100644 --- a/kustomize/base/exploit_iq_service.yaml +++ b/kustomize/base/exploit_iq_service.yaml @@ -25,7 +25,7 @@ spec: serviceAccountName: exploit-iq-sa containers: - name: exploit-iq-phoenix-tracing - image: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-agent:nat + image: quay.io/ecosystem-appeng/agent-morpheus-rh:nat imagePullPolicy: Always workingDir: /workspace/ args: @@ -45,7 +45,7 @@ spec: memory: "1Gi" cpu: "100m" - name: exploit-iq - image: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-agent:nat + image: quay.io/ecosystem-appeng/agent-morpheus-rh:nat imagePullPolicy: Always workingDir: /workspace/ args: diff --git a/kustomize/base/kustomization.yaml b/kustomize/base/kustomization.yaml index e971c34ee..ef7608241 100644 --- a/kustomize/base/kustomization.yaml +++ b/kustomize/base/kustomization.yaml @@ -94,10 +94,10 @@ patches: kind: Deployment images: - - name: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-agent + - name: quay.io/ecosystem-appeng/agent-morpheus-rh newTag: latest - - name: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-agent + - name: quay.io/ecosystem-appeng/agent-morpheus-rh newTag: latest - name: quay.io/ecosystem-appeng/exploitiq-mcp-server From 1fdc29c1cfff5901c75db84e54c1daaf51a05c37 Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Wed, 24 Jun 2026 16:02:23 +0300 Subject: [PATCH 07/21] Remove unrelated changes --- .tekton/on-cm-runner.yaml | 2 +- kustomize/base/argilla/argilla-service.yaml | 2 +- .../argilla/argilla-user-feedback-pvc.yaml | 2 +- kustomize/base/argilla/deployment.yaml | 8 +- kustomize/base/argilla/service.yaml | 6 +- kustomize/base/exploit-iq-config.yml.bak | 314 ++++++++++++++++++ kustomize/base/exploit_iq_client.yaml | 4 +- 7 files changed, 326 insertions(+), 12 deletions(-) create mode 100644 kustomize/base/exploit-iq-config.yml.bak diff --git a/.tekton/on-cm-runner.yaml b/.tekton/on-cm-runner.yaml index 1b227d63c..439d6114e 100644 --- a/.tekton/on-cm-runner.yaml +++ b/.tekton/on-cm-runner.yaml @@ -26,7 +26,7 @@ spec: value: "{{ trigger_comment }}" # Point to the image ALREADY built by the PR pipeline - name: target-image - value: quay.io/ecosystem-appeng/rhtpa-exploit-intelligence-agent:on-pr-{{revision}} + value: quay.io/ecosystem-appeng/agent-morpheus-rh:on-pr-{{revision}} pipelineSpec: params: diff --git a/kustomize/base/argilla/argilla-service.yaml b/kustomize/base/argilla/argilla-service.yaml index 65a1ea6f3..cc9f2840b 100644 --- a/kustomize/base/argilla/argilla-service.yaml +++ b/kustomize/base/argilla/argilla-service.yaml @@ -6,7 +6,7 @@ metadata: app: argilla spec: selector: - app: exploit-intelligence-feedback-api + app: morpheus-feedback-api ports: - protocol: TCP port: 6900 diff --git a/kustomize/base/argilla/argilla-user-feedback-pvc.yaml b/kustomize/base/argilla/argilla-user-feedback-pvc.yaml index bdcbc7b52..8a730ef7c 100644 --- a/kustomize/base/argilla/argilla-user-feedback-pvc.yaml +++ b/kustomize/base/argilla/argilla-user-feedback-pvc.yaml @@ -4,7 +4,7 @@ kind: PersistentVolumeClaim metadata: name: argilla-user-feedback-pvc labels: - app: exploit-intelligence-feedback-api + app: morpheus-feedback-api spec: accessModes: - ReadWriteOnce diff --git a/kustomize/base/argilla/deployment.yaml b/kustomize/base/argilla/deployment.yaml index 7f898faa4..0e1c6601d 100644 --- a/kustomize/base/argilla/deployment.yaml +++ b/kustomize/base/argilla/deployment.yaml @@ -1,20 +1,20 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: exploit-intelligence-feedback-api + name: morpheus-feedback-api labels: - app: exploit-intelligence-feedback-api + app: morpheus-feedback-api spec: replicas: 1 selector: matchLabels: - app: exploit-intelligence-feedback-api + app: morpheus-feedback-api strategy: type: Recreate template: metadata: labels: - app: exploit-intelligence-feedback-api + app: morpheus-feedback-api spec: restartPolicy: Always imagePullSecrets: diff --git a/kustomize/base/argilla/service.yaml b/kustomize/base/argilla/service.yaml index 5ad95328f..545316330 100644 --- a/kustomize/base/argilla/service.yaml +++ b/kustomize/base/argilla/service.yaml @@ -1,12 +1,12 @@ apiVersion: v1 kind: Service metadata: - name: exploit-intelligence-feedback-api + name: morpheus-feedback-api labels: - app: exploit-intelligence-feedback-api + app: morpheus-feedback-api spec: selector: - app: exploit-intelligence-feedback-api + app: morpheus-feedback-api ports: - protocol: TCP port: 5001 diff --git a/kustomize/base/exploit-iq-config.yml.bak b/kustomize/base/exploit-iq-config.yml.bak new file mode 100644 index 000000000..abfa7d15b --- /dev/null +++ b/kustomize/base/exploit-iq-config.yml.bak @@ -0,0 +1,314 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +general: + front_end: + _type: fastapi + endpoints: + - path: /health + method: GET + description: Perform a health check. + function_name: health_check + use_uvloop: true + telemetry: + tracing: + phoenix: + _type: phoenix + endpoint: ${OTEL_TRACES_ENDPOINT:-http://localhost:6006/v1/traces} + project: cve_agent + +functions: + cve_generate_vdbs: + _type: cve_generate_vdbs + agent_name: cve_agent_executor # Used to determine which tools are enabled + embedder_name: nim_embedder + base_git_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}git + base_vdb_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}vdb + base_code_index_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}code_index + base_pickle_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}pickle + base_rpm_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}rpms + ignore_code_embedding: true + cve_fetch_intel: + _type: cve_fetch_intel + retry_on_client_errors: false + intel_plugin_config: + plugin_name: vuln_analysis.data_models.plugins.intel_plugin.SimpleHttpIntelPlugin + plugin_config: + source: Product Security research + endpoint: CALLBACK_URL_PLACEHOLDER/api/v1/vulnerabilities/{vuln_id}/comments + token_path: /var/run/secrets/kubernetes.io/serviceaccount/token + verify_path: /app/certs/service-ca.crt + + cve_process_sbom: + _type: cve_process_sbom + cve_check_vuln_deps : + _type: cve_check_vuln_deps + skip: true + cve_checklist: + _type: cve_checklist + llm_name: checklist_llm + Call Chain Analyzer: + _type: transitive_code_search + enable_transitive_search: true + Function Caller Finder: + _type: calling_function_name_extractor + enable_functions_usage_search: true + Function Locator: + _type: package_and_function_locator + Function Library Version Finder: + _type: calling_function_library_version_finder + Code Semantic Search: + _type: local_vdb_retriever + embedder_name: nim_embedder + llm_name: code_vdb_retriever_llm + vdb_type: code + return_source_documents: false + Docs Semantic Search: + _type: local_vdb_retriever + embedder_name: nim_embedder + llm_name: doc_vdb_retriever_llm + vdb_type: doc + return_source_documents: false + Code Keyword Search: + _type: lexical_code_search + top_k: 5 + Source Grep: + _type: source_grep + base_checker_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}checker + max_results: 50 + context_lines: 2 + CVE Web Search: + _type: serp_wrapper + max_retries: 5 + Container Analysis Data: + _type: container_image_analysis_data + cve_agent_executor: + _type: cve_agent_executor + llm_name: cve_agent_executor_llm + tool_names: + - Code Semantic Search + - Docs Semantic Search + - Code Keyword Search + - CVE Web Search + - Call Chain Analyzer + - Function Caller Finder + - Function Locator + - Function Library Version Finder + max_concurrency: null + max_iterations: 10 + prompt_examples: false + replace_exceptions: true + replace_exceptions_value: "I do not have a definitive answer for this checklist item." + return_intermediate_steps: false +# transitive_search_tool_enabled: false + cve_web_search_enabled: true + verbose: false + cve_generate_cvss: + _type: cve_generate_cvss + skip: true + llm_name: generate_cvss_llm + tool_names: + - Code Semantic Search + - Docs Semantic Search + - Code Keyword Search + - Container Analysis Data + max_concurrency: null + max_iterations: 10 + prompt_examples: true + replace_exceptions: false + replace_exceptions_value: "Failed to generate CVSS for this analysis." + return_intermediate_steps: false + verbose: false + cve_summarize: + _type: cve_summarize + llm_name: summarize_llm + cve_justify: + _type: cve_justify + llm_name: justify_llm + cve_generate_vex: + _type: cve_generate_vex + skip: false + # vex_format: csaf + cve_http_output: + _type: cve_http_output + url: CALLBACK_URL_PLACEHOLDER + endpoint: /api/v1/reports + auth_type: bearer + token_path: /var/run/secrets/kubernetes.io/serviceaccount/token + verify_path: /app/certs/service-ca.crt + enable_mlops: ${ENABLE_MLOPS:-false} + mlops_config: + mlops_url: http://localhost:8080 + auth_type: "bearer" + token_path: "/var/run/secrets/kubernetes.io/serviceaccount/token" + verify_path: "/app/certs/service-ca.crt" + enable_verify: true + cve_calculate_intel_score: + _type: cve_calculate_intel_score + llm_name: intel_source_score_llm + generate_intel_score: true + intel_low_score: 51 + insist_analysis: false + cve_source_acquisition: + _type: cve_source_acquisition + base_git_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}git + base_pickle_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}pickle + base_rpm_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}rpms + base_checker_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}checker + rpm_user_type: ${RPM_USER_TYPE:-internal} + cve_checker_segmentation: + _type: cve_checker_segmentation + base_checker_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}checker + base_code_index_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}code_index + cve_package_code_agent: + _type: cve_package_code_agent + llm_name: cve_agent_executor_llm + base_checker_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}checker + base_code_index_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}code_index + rpm_user_type: ${RPM_USER_TYPE:-internal} + tool_names: + - Source Grep + - Code Keyword Search + cve_checker_report: + _type: cve_checker_report + llm_name: cve_agent_executor_llm + base_checker_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}checker + cve_build_agent: + _type: cve_build_agent + llm_name: cve_agent_executor_llm + base_checker_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}checker + max_iterations: 10 + tool_names: + - Source Grep + - Code Keyword Search + health_check: + _type: health_check + +llms: + checklist_llm: + _type: ${LLM_TYPE_CHECKLIST:-nim} + api_key: ${LLM_API_KEY_CHECKLIST:-"EMPTY"} + base_url: ${CHECKLIST_LLM_API_BASE:-https://integrate.api.nvidia.com/v1} + model_name: ${CHECKLIST_MODEL_NAME:-meta/llama-3.1-70b-instruct} + temperature: 0.0 + max_tokens: 2000 + top_p: 0.01 + code_vdb_retriever_llm: + _type: ${LLM_TYPE_VDB_CODE_RETRIEVER:-nim} + api_key: ${LLM_API_KEY_CODE_VDB_RETRIEVER:-"EMPTY"} + base_url: ${CODE_VDB_RETRIEVER_API_BASE:-https://integrate.api.nvidia.com/v1} + model_name: ${CODE_VDB_RETRIEVER_MODEL_NAME:-meta/llama-3.1-70b-instruct} + temperature: 0.0 + max_tokens: 2000 + top_p: 0.01 + doc_vdb_retriever_llm: + _type: ${LLM_TYPE_VDB_DOC_RETRIEVER:-nim} + api_key: ${LLM_API_KEY_DOC_VDB_RETRIEVER:-"EMPTY"} + base_url: ${DOC_VDB_RETRIEVER_API_BASE:-https://integrate.api.nvidia.com/v1} + model_name: ${DOC_VDB_RETRIEVER_MODEL_NAME:-meta/llama-3.1-70b-instruct} + temperature: 0.0 + max_tokens: 2000 + top_p: 0.01 + cve_agent_executor_llm: + _type: ${LLM_TYPE_AGENT_EXECUTOR:-nim} + api_key: ${LLM_API_KEY_AGENT_EXECUTOR:-"EMPTY"} + base_url: ${AGENT_EXECUTOR_LLM_API_BASE:-https://integrate.api.nvidia.com/v1} + model_name: ${AGENT_EXECUTOR_MODEL_NAME:-meta/llama-3.1-70b-instruct} + temperature: 0.0 + max_tokens: 2000 + top_p: 0.01 + generate_cvss_llm: + _type: ${LLM_TYPE_GENERATE_CVSS:-nim} + api_key: ${LLM_API_KEY_GENERATE_CVSS:-"EMPTY"} + base_url: ${GENERATE_CVSS_LLM_API_BASE:-https://integrate.api.nvidia.com/v1} + model_name: ${GENERATE_CVSS_MODEL_NAME:-meta/llama-3.1-70b-instruct} + temperature: 0.0 + max_tokens: 1024 + top_p: 0.01 + summarize_llm: + _type: ${LLM_TYPE_SUMMARIZE:-nim} + api_key: ${LLM_API_KEY_SUMMARIZE:-"EMPTY"} + base_url: ${SUMMARIZE_LLM_API_BASE:-https://integrate.api.nvidia.com/v1} + model_name: ${SUMMARIZE_MODEL_NAME:-meta/llama-3.1-70b-instruct} + temperature: 0.0 + max_tokens: 1024 + top_p: 0.01 + justify_llm: + _type: ${LLM_TYPE_JUSTIFY:-nim} + api_key: ${LLM_API_KEY_JUSTIFY:-"EMPTY"} + base_url: ${JUSTIFY_LLM_API_BASE:-https://integrate.api.nvidia.com/v1} + model_name: ${JUSTIFY_MODEL_NAME:-meta/llama-3.1-70b-instruct} + temperature: 0.0 + max_tokens: 1024 + top_p: 0.01 + + intel_source_score_llm: + _type: ${LLM_TYPE_INTEL_SOURCE_SCORE:-nim} + api_key: ${LLM_API_KEY_INTEL_SOURCE_SCORE:-"EMPTY"} + base_url: ${INTEL_SOURCE_SCORE_LLM_API_BASE:-https://integrate.api.nvidia.com/v1} + model_name: ${INTEL_SOURCE_SCORE_MODEL_NAME:-meta/llama-3.1-70b-instruct} + temperature: 0.0 + max_tokens: 1024 + top_p: 0.01 + +embedders: + nim_embedder: + _type: nim + base_url: ${NIM_EMBED_BASE_URL:-https://integrate.api.nvidia.com/v1} + model_name: ${EMBEDDER_MODEL_NAME:-nvidia/nv-embedqa-e5-v5} + truncate: END + max_batch_size: 128 + +workflow: + _type: cve_agent + cve_generate_vdbs_name: cve_generate_vdbs + cve_fetch_intel_name: cve_fetch_intel + cve_calculate_intel_score_name: cve_calculate_intel_score + cve_process_sbom_name: cve_process_sbom + cve_check_vuln_deps_name: cve_check_vuln_deps + cve_checklist_name: cve_checklist + cve_agent_executor_name: cve_agent_executor + cve_generate_cvss_name: cve_generate_cvss + cve_generate_vex_name: cve_generate_vex + cve_summarize_name: cve_summarize + cve_justify_name: cve_justify + cve_output_config_name: cve_http_output + cve_source_acquisition_name: cve_source_acquisition + cve_checker_segmentation_name: cve_checker_segmentation + cve_package_code_agent_name: cve_package_code_agent + cve_checker_report_name: cve_checker_report + cve_build_agent_name: cve_build_agent + +eval: + general: + output_dir: ./.tmp/eval/cve_agent + dataset: + _type: json + file_path: data/eval_datasets/eval_dataset.json + + profiler: + token_uniqueness_forecast: true + workflow_runtime_forecast: true + compute_llm_metrics: true + csv_exclude_io_text: true + prompt_caching_prefixes: + enable: true + min_frequency: 0.1 + bottleneck_analysis: + # Can also be simple_stack + enable_nested_stack: true + concurrency_spike_analysis: + enable: true + spike_threshold: 7 diff --git a/kustomize/base/exploit_iq_client.yaml b/kustomize/base/exploit_iq_client.yaml index b4cd58564..d9a582bdd 100644 --- a/kustomize/base/exploit_iq_client.yaml +++ b/kustomize/base/exploit_iq_client.yaml @@ -26,8 +26,8 @@ spec: args: - ./application - -Dquarkus.http.host=0.0.0.0 - - -Dquarkus.log.category."com.redhat.ecosystemappeng.exploitintelligence".level=DEBUG - image: quay.io/ecosystem-appeng/exploit-intelligence-client:latest + - -Dquarkus.log.category."com.redhat.ecosystemappeng.exploitiq".level=DEBUG + image: quay.io/ecosystem-appeng/agent-morpheus-client:latest imagePullPolicy: Always ports: - name: http From 1df818d27059d089f0048700309b435fa26e8b1f Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Wed, 24 Jun 2026 16:15:58 +0300 Subject: [PATCH 08/21] Remove unrelated changes --- kustomize/base/exploit-iq-config.yml.bak | 314 ----------------------- 1 file changed, 314 deletions(-) delete mode 100644 kustomize/base/exploit-iq-config.yml.bak diff --git a/kustomize/base/exploit-iq-config.yml.bak b/kustomize/base/exploit-iq-config.yml.bak deleted file mode 100644 index abfa7d15b..000000000 --- a/kustomize/base/exploit-iq-config.yml.bak +++ /dev/null @@ -1,314 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -general: - front_end: - _type: fastapi - endpoints: - - path: /health - method: GET - description: Perform a health check. - function_name: health_check - use_uvloop: true - telemetry: - tracing: - phoenix: - _type: phoenix - endpoint: ${OTEL_TRACES_ENDPOINT:-http://localhost:6006/v1/traces} - project: cve_agent - -functions: - cve_generate_vdbs: - _type: cve_generate_vdbs - agent_name: cve_agent_executor # Used to determine which tools are enabled - embedder_name: nim_embedder - base_git_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}git - base_vdb_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}vdb - base_code_index_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}code_index - base_pickle_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}pickle - base_rpm_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}rpms - ignore_code_embedding: true - cve_fetch_intel: - _type: cve_fetch_intel - retry_on_client_errors: false - intel_plugin_config: - plugin_name: vuln_analysis.data_models.plugins.intel_plugin.SimpleHttpIntelPlugin - plugin_config: - source: Product Security research - endpoint: CALLBACK_URL_PLACEHOLDER/api/v1/vulnerabilities/{vuln_id}/comments - token_path: /var/run/secrets/kubernetes.io/serviceaccount/token - verify_path: /app/certs/service-ca.crt - - cve_process_sbom: - _type: cve_process_sbom - cve_check_vuln_deps : - _type: cve_check_vuln_deps - skip: true - cve_checklist: - _type: cve_checklist - llm_name: checklist_llm - Call Chain Analyzer: - _type: transitive_code_search - enable_transitive_search: true - Function Caller Finder: - _type: calling_function_name_extractor - enable_functions_usage_search: true - Function Locator: - _type: package_and_function_locator - Function Library Version Finder: - _type: calling_function_library_version_finder - Code Semantic Search: - _type: local_vdb_retriever - embedder_name: nim_embedder - llm_name: code_vdb_retriever_llm - vdb_type: code - return_source_documents: false - Docs Semantic Search: - _type: local_vdb_retriever - embedder_name: nim_embedder - llm_name: doc_vdb_retriever_llm - vdb_type: doc - return_source_documents: false - Code Keyword Search: - _type: lexical_code_search - top_k: 5 - Source Grep: - _type: source_grep - base_checker_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}checker - max_results: 50 - context_lines: 2 - CVE Web Search: - _type: serp_wrapper - max_retries: 5 - Container Analysis Data: - _type: container_image_analysis_data - cve_agent_executor: - _type: cve_agent_executor - llm_name: cve_agent_executor_llm - tool_names: - - Code Semantic Search - - Docs Semantic Search - - Code Keyword Search - - CVE Web Search - - Call Chain Analyzer - - Function Caller Finder - - Function Locator - - Function Library Version Finder - max_concurrency: null - max_iterations: 10 - prompt_examples: false - replace_exceptions: true - replace_exceptions_value: "I do not have a definitive answer for this checklist item." - return_intermediate_steps: false -# transitive_search_tool_enabled: false - cve_web_search_enabled: true - verbose: false - cve_generate_cvss: - _type: cve_generate_cvss - skip: true - llm_name: generate_cvss_llm - tool_names: - - Code Semantic Search - - Docs Semantic Search - - Code Keyword Search - - Container Analysis Data - max_concurrency: null - max_iterations: 10 - prompt_examples: true - replace_exceptions: false - replace_exceptions_value: "Failed to generate CVSS for this analysis." - return_intermediate_steps: false - verbose: false - cve_summarize: - _type: cve_summarize - llm_name: summarize_llm - cve_justify: - _type: cve_justify - llm_name: justify_llm - cve_generate_vex: - _type: cve_generate_vex - skip: false - # vex_format: csaf - cve_http_output: - _type: cve_http_output - url: CALLBACK_URL_PLACEHOLDER - endpoint: /api/v1/reports - auth_type: bearer - token_path: /var/run/secrets/kubernetes.io/serviceaccount/token - verify_path: /app/certs/service-ca.crt - enable_mlops: ${ENABLE_MLOPS:-false} - mlops_config: - mlops_url: http://localhost:8080 - auth_type: "bearer" - token_path: "/var/run/secrets/kubernetes.io/serviceaccount/token" - verify_path: "/app/certs/service-ca.crt" - enable_verify: true - cve_calculate_intel_score: - _type: cve_calculate_intel_score - llm_name: intel_source_score_llm - generate_intel_score: true - intel_low_score: 51 - insist_analysis: false - cve_source_acquisition: - _type: cve_source_acquisition - base_git_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}git - base_pickle_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}pickle - base_rpm_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}rpms - base_checker_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}checker - rpm_user_type: ${RPM_USER_TYPE:-internal} - cve_checker_segmentation: - _type: cve_checker_segmentation - base_checker_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}checker - base_code_index_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}code_index - cve_package_code_agent: - _type: cve_package_code_agent - llm_name: cve_agent_executor_llm - base_checker_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}checker - base_code_index_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}code_index - rpm_user_type: ${RPM_USER_TYPE:-internal} - tool_names: - - Source Grep - - Code Keyword Search - cve_checker_report: - _type: cve_checker_report - llm_name: cve_agent_executor_llm - base_checker_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}checker - cve_build_agent: - _type: cve_build_agent - llm_name: cve_agent_executor_llm - base_checker_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}checker - max_iterations: 10 - tool_names: - - Source Grep - - Code Keyword Search - health_check: - _type: health_check - -llms: - checklist_llm: - _type: ${LLM_TYPE_CHECKLIST:-nim} - api_key: ${LLM_API_KEY_CHECKLIST:-"EMPTY"} - base_url: ${CHECKLIST_LLM_API_BASE:-https://integrate.api.nvidia.com/v1} - model_name: ${CHECKLIST_MODEL_NAME:-meta/llama-3.1-70b-instruct} - temperature: 0.0 - max_tokens: 2000 - top_p: 0.01 - code_vdb_retriever_llm: - _type: ${LLM_TYPE_VDB_CODE_RETRIEVER:-nim} - api_key: ${LLM_API_KEY_CODE_VDB_RETRIEVER:-"EMPTY"} - base_url: ${CODE_VDB_RETRIEVER_API_BASE:-https://integrate.api.nvidia.com/v1} - model_name: ${CODE_VDB_RETRIEVER_MODEL_NAME:-meta/llama-3.1-70b-instruct} - temperature: 0.0 - max_tokens: 2000 - top_p: 0.01 - doc_vdb_retriever_llm: - _type: ${LLM_TYPE_VDB_DOC_RETRIEVER:-nim} - api_key: ${LLM_API_KEY_DOC_VDB_RETRIEVER:-"EMPTY"} - base_url: ${DOC_VDB_RETRIEVER_API_BASE:-https://integrate.api.nvidia.com/v1} - model_name: ${DOC_VDB_RETRIEVER_MODEL_NAME:-meta/llama-3.1-70b-instruct} - temperature: 0.0 - max_tokens: 2000 - top_p: 0.01 - cve_agent_executor_llm: - _type: ${LLM_TYPE_AGENT_EXECUTOR:-nim} - api_key: ${LLM_API_KEY_AGENT_EXECUTOR:-"EMPTY"} - base_url: ${AGENT_EXECUTOR_LLM_API_BASE:-https://integrate.api.nvidia.com/v1} - model_name: ${AGENT_EXECUTOR_MODEL_NAME:-meta/llama-3.1-70b-instruct} - temperature: 0.0 - max_tokens: 2000 - top_p: 0.01 - generate_cvss_llm: - _type: ${LLM_TYPE_GENERATE_CVSS:-nim} - api_key: ${LLM_API_KEY_GENERATE_CVSS:-"EMPTY"} - base_url: ${GENERATE_CVSS_LLM_API_BASE:-https://integrate.api.nvidia.com/v1} - model_name: ${GENERATE_CVSS_MODEL_NAME:-meta/llama-3.1-70b-instruct} - temperature: 0.0 - max_tokens: 1024 - top_p: 0.01 - summarize_llm: - _type: ${LLM_TYPE_SUMMARIZE:-nim} - api_key: ${LLM_API_KEY_SUMMARIZE:-"EMPTY"} - base_url: ${SUMMARIZE_LLM_API_BASE:-https://integrate.api.nvidia.com/v1} - model_name: ${SUMMARIZE_MODEL_NAME:-meta/llama-3.1-70b-instruct} - temperature: 0.0 - max_tokens: 1024 - top_p: 0.01 - justify_llm: - _type: ${LLM_TYPE_JUSTIFY:-nim} - api_key: ${LLM_API_KEY_JUSTIFY:-"EMPTY"} - base_url: ${JUSTIFY_LLM_API_BASE:-https://integrate.api.nvidia.com/v1} - model_name: ${JUSTIFY_MODEL_NAME:-meta/llama-3.1-70b-instruct} - temperature: 0.0 - max_tokens: 1024 - top_p: 0.01 - - intel_source_score_llm: - _type: ${LLM_TYPE_INTEL_SOURCE_SCORE:-nim} - api_key: ${LLM_API_KEY_INTEL_SOURCE_SCORE:-"EMPTY"} - base_url: ${INTEL_SOURCE_SCORE_LLM_API_BASE:-https://integrate.api.nvidia.com/v1} - model_name: ${INTEL_SOURCE_SCORE_MODEL_NAME:-meta/llama-3.1-70b-instruct} - temperature: 0.0 - max_tokens: 1024 - top_p: 0.01 - -embedders: - nim_embedder: - _type: nim - base_url: ${NIM_EMBED_BASE_URL:-https://integrate.api.nvidia.com/v1} - model_name: ${EMBEDDER_MODEL_NAME:-nvidia/nv-embedqa-e5-v5} - truncate: END - max_batch_size: 128 - -workflow: - _type: cve_agent - cve_generate_vdbs_name: cve_generate_vdbs - cve_fetch_intel_name: cve_fetch_intel - cve_calculate_intel_score_name: cve_calculate_intel_score - cve_process_sbom_name: cve_process_sbom - cve_check_vuln_deps_name: cve_check_vuln_deps - cve_checklist_name: cve_checklist - cve_agent_executor_name: cve_agent_executor - cve_generate_cvss_name: cve_generate_cvss - cve_generate_vex_name: cve_generate_vex - cve_summarize_name: cve_summarize - cve_justify_name: cve_justify - cve_output_config_name: cve_http_output - cve_source_acquisition_name: cve_source_acquisition - cve_checker_segmentation_name: cve_checker_segmentation - cve_package_code_agent_name: cve_package_code_agent - cve_checker_report_name: cve_checker_report - cve_build_agent_name: cve_build_agent - -eval: - general: - output_dir: ./.tmp/eval/cve_agent - dataset: - _type: json - file_path: data/eval_datasets/eval_dataset.json - - profiler: - token_uniqueness_forecast: true - workflow_runtime_forecast: true - compute_llm_metrics: true - csv_exclude_io_text: true - prompt_caching_prefixes: - enable: true - min_frequency: 0.1 - bottleneck_analysis: - # Can also be simple_stack - enable_nested_stack: true - concurrency_spike_analysis: - enable: true - spike_threshold: 7 From fd4ad14280c7455662d5ad25ba4dd6557151405d Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Thu, 25 Jun 2026 09:41:18 +0300 Subject: [PATCH 09/21] Remove unrelated changes --- kustomize/README.md | 36 +++++++++---------- kustomize/base/exploit_iq_client.yaml | 6 ++-- kustomize/base/kustomization.yaml | 2 +- kustomize/network-policy.yaml | 6 ++-- .../batch-processing/kustomization.yaml | 4 +-- kustomize/overlays/tests/kustomization.yaml | 2 +- .../utils/chain_of_calls_retriever.py | 2 +- .../utils/chain_of_calls_retriever_base.py | 2 +- src/exploit_iq_commons/utils/dep_tree.py | 2 +- .../java_functions_parsers.py | 2 +- .../utils/java_chain_of_calls_retriever.py | 2 +- .../utils/transitive_code_searcher_tool.py | 2 +- src/vuln_analysis/register.py | 12 +++---- .../utils/function_name_extractor.py | 2 +- .../utils/function_name_locator.py | 2 +- src/vuln_analysis/utils/llm_engine_utils.py | 28 +++++++-------- .../vex/implementations/csaf_generator.py | 10 +++--- .../tests/test_csaf_generator_integration.py | 4 +-- 18 files changed, 63 insertions(+), 63 deletions(-) diff --git a/kustomize/README.md b/kustomize/README.md index b63230d94..8282be9c5 100644 --- a/kustomize/README.md +++ b/kustomize/README.md @@ -19,7 +19,7 @@ limitations under the License. ## Install and Run Locally -One can run the RHTPA exploit intelligence workflow on his local machine ( No GPU dependency is required!), for the purpose of testing, debugging and troubleshooting problems: +One can run the ExploitIQ on his local machine ( No GPU dependency is required!), for the purpose of testing, debugging and troubleshooting problems: 1. Install the lightweight [uv package manager](https://docs.astral.sh/uv/getting-started/installation). 2. Ensure Python 3.12 is installed for your operating system. @@ -98,7 +98,7 @@ export USE_CONTAINER_SOURCES=true ## Deploy And Run On OCP -1. Create a `base/secrets.env` file containing the API keys for external services `RHTPA exploit intelligence` might use. Not all keys are mandatory. Refer to the main [README](../README.md#obtain-api-keys) for details on how to create the Red Hat credentials and other API keys. +1. Create a `base/secrets.env` file containing the API keys for external services `ExploitIQ` might use. Not all keys are mandatory. Refer to the main [README](../README.md#obtain-api-keys) for details on how to create the Red Hat credentials and other API keys. ```shell cat > base/secrets.env << EOF @@ -128,7 +128,7 @@ argilla_api_key=your_argilla_api_key EOF ``` -4. Create an image pull secret to authorize pulling the `exploit-intelligence` and `Argilla` container images: +4. Create an image pull secret to authorize pulling the `ExploitIQ` and `Argilla` container images: ```shell oc create secret generic exploit-iq-pull-secret --from-file=.dockerconfigjson= --type=kubernetes.io/dockerconfigjson @@ -152,7 +152,7 @@ EOF >[!IMPORTANT] >This secret is essential for product scanning to authenticate and pull component images. If you skip this step, kustomize will still deploy, but authenticated pulls will not work until you provide real credentials. -6. Create the `oauth-secret.env` file containing the `client-secret` and `openshift-domain` values required by the [exploit-intelligence-client](./base/exploit_iq_client.yaml) configuration. +6. Create the `oauth-secret.env` file containing the `client-secret` and `openshift-domain` values required by the [ExploitIQ Client](./base/exploit_iq_client.yaml) configuration. If openshift resource of kind `OAuthClient` named `exploit-iq-client` exists, just get the secret from there: ```shell @@ -187,7 +187,7 @@ exploit-iq-password=$(openssl rand -base64 24 | tr -d '/+=' | head -c 32) EOF ``` -8. Update exploit intelligence configuration file with the correct callback URL for the client service. +8. Update `ExploitIQ` configuration file with the correct callback URL for the client service. ```shell export CALLBACK_URL="https://exploit-iq-client.$(oc project -q).svc:8443" @@ -196,7 +196,7 @@ find . -type f -name 'exploit-iq-config.yml' -exec sed -i "s|CALLBACK_URL_PLACEH ### Configuring Git SSL Certificate Authority for Custom CAs -If your Git server uses a certificate that is signed by a custom Certificate Authority (CA), you must provide the CA certificate bundle to enable the exploit intelligence workflow to verify the Git server identity. +If your Git server uses a certificate that is signed by a custom Certificate Authority (CA), you must provide the CA certificate bundle to enable ExploitIQ to verify the Git server identity. > [!IMPORTANT] > If you need to access Red Hat internal Git repositories such as `gitlab.cee.redhat.com`, you must complete this procedure. @@ -245,15 +245,15 @@ openssl crl2pkcs7 -nocrl -certfile kustomize/base/ca-certs/ca-bundle.crt | \ >[!IMPORTANT] You should only run one of the steps 9,10 or 11, depending on if you want to run the service with a self hosted LLM, self hosted LLM with MLOps or Nvidia remote NIM. -9. To deploy the exploit intelligence service with a self-hosted LLM , run: +9. To deploy `ExploitIQ` with a self-hosted LLM , run: ```shell -# Deploy exploit intelligence with self hosted llama3.1-70b-4bit LLM +# Deploy ExploitIQ with self hosted llama3.1-70b-4bit LLM oc kustomize overlays/self-hosted-llama3.1-70b-4bit | oc apply -f - -n $YOUR_NAMESPACE_NAME ``` -10. To deploy the exploit intelligence service with a self-hosted LLM and MLOps, run: +10. To deploy `ExploitIQ` with a self-hosted LLM and MLOps, run: ```shell # Patch overlay kustomization yaml with deployment namespace value (Grafana and Tempo) @@ -267,7 +267,7 @@ oc create secret generic grafana-bearer-token --from-literal=token='EXPLOIT_IQ_G ``` ```shell -# Deploy exploit intelligence with self hosted llama3.1-70b-4bit LLM and MLOps +# Deploy ExploitIQ with self hosted llama3.1-70b-4bit LLM and MLOps oc kustomize overlays/mlops | oc apply -f - -n $YOUR_NAMESPACE_NAME ``` @@ -299,9 +299,9 @@ oc kustomize overlays/mlops \ ``` -10. Alternatively, to deploy the exploit intelligence service with a fully remote nim LLM, run: +10. Alternatively, to deploy `ExploitIQ` with a fully remote nim LLM, run: ```shell -# Deploy exploit intelligence with remote nim llama-3.1-70b-16bit LLM +# Deploy ExploitIQ with remote nim llama-3.1-70b-16bit LLM oc kustomize overlays/remote-nim-all | oc apply -f - -n $YOUR_NAMESPACE_NAME ``` >[!WARNING] @@ -335,7 +335,7 @@ openshift-domain=$(oc get dns cluster -o jsonpath='{.spec.baseDomain}') EOF ``` -12. **(Optional) Enable OAuth for the exploit intelligence MCP Server.** If you want MCP clients (Claude Code, Cursor, etc.) to authenticate via OpenShift OAuth, create an `OAuthClient` CR for the MCP server: +12. **(Optional) Enable OAuth for the ExploitIQ MCP Server.** If you want MCP clients (Claude Code, Cursor, etc.) to authenticate via OpenShift OAuth, create an `OAuthClient` CR for the MCP server: ```bash oc create -f - < str: return f"{function_file_name};{function_name_to_search}" diff --git a/src/exploit_iq_commons/utils/dep_tree.py b/src/exploit_iq_commons/utils/dep_tree.py index f18b9ef68..ce915ea88 100644 --- a/src/exploit_iq_commons/utils/dep_tree.py +++ b/src/exploit_iq_commons/utils/dep_tree.py @@ -75,7 +75,7 @@ def _get_go_repo_lock(manifest_path) -> threading.Lock: return _go_repo_locks[key] -ROOT_LEVEL_SENTINEL = 'root-top-level-exploit-intelligence' +ROOT_LEVEL_SENTINEL = 'root-top-level-exploit-iq' TRANSITIVE_ENV_NAME = 'transitive_env' INSTALLED_PACKAGES_FILE = 'installed_packages.txt' diff --git a/src/exploit_iq_commons/utils/functions_parsers/java_functions_parsers.py b/src/exploit_iq_commons/utils/functions_parsers/java_functions_parsers.py index 2165aa584..a79b89223 100644 --- a/src/exploit_iq_commons/utils/functions_parsers/java_functions_parsers.py +++ b/src/exploit_iq_commons/utils/functions_parsers/java_functions_parsers.py @@ -28,7 +28,7 @@ strip_java_generics, JAVA_ANNOTATION_SYMBOL, extract_fqcn from exploit_iq_commons.logging.loggers_factory import LoggingFactory -logger = LoggingFactory.get_agent_logger(f"exploit-intelligence.{__name__}") +logger = LoggingFactory.get_agent_logger(f"exploit-iq.{__name__}") PARAMETER = "parameter" diff --git a/src/exploit_iq_commons/utils/java_chain_of_calls_retriever.py b/src/exploit_iq_commons/utils/java_chain_of_calls_retriever.py index c2fabf8f5..4dcbc25ee 100644 --- a/src/exploit_iq_commons/utils/java_chain_of_calls_retriever.py +++ b/src/exploit_iq_commons/utils/java_chain_of_calls_retriever.py @@ -36,7 +36,7 @@ create_inheritance_map, get_target_class_names, dummy_package_name from exploit_iq_commons.data_models.input import SourceDocumentsInfo -logger = LoggingFactory.get_agent_logger(f"exploit-intelligence.{__name__}") +logger = LoggingFactory.get_agent_logger(f"exploit-iq.{__name__}") # Lowercase package segments; class segments start with uppercase; allow dots or $ for inners _FQCN_STRICT_RE = re.compile( diff --git a/src/exploit_iq_commons/utils/transitive_code_searcher_tool.py b/src/exploit_iq_commons/utils/transitive_code_searcher_tool.py index 09956c637..5c259737c 100644 --- a/src/exploit_iq_commons/utils/transitive_code_searcher_tool.py +++ b/src/exploit_iq_commons/utils/transitive_code_searcher_tool.py @@ -25,7 +25,7 @@ from exploit_iq_commons.logging.loggers_factory import LoggingFactory, MULTI_LINE_MESSAGE_TRUE -logger = LoggingFactory.get_agent_logger(f"exploit-intelligence.{__name__}") +logger = LoggingFactory.get_agent_logger(f"exploit-iq.{__name__}") class TransitiveCodeSearcher: diff --git a/src/vuln_analysis/register.py b/src/vuln_analysis/register.py index 0c3d919ce..296391e0e 100644 --- a/src/vuln_analysis/register.py +++ b/src/vuln_analysis/register.py @@ -575,7 +575,7 @@ async def call_llm_engine_subgraph_node(message: AgentMorpheusEngineInput): graph = graph_builder.compile() #graph.get_graph().draw_mermaid_png(output_file_path="checker_flow.png") - def convert_str_to_exploit_intelligence_input(input: str) -> AgentMorpheusInput: + def convert_str_to_exploit_iq_input(input: str) -> AgentMorpheusInput: logger.debug("Converting JSON string input to AgentMorpheusInput (length: %d)", len(input)) try: return AgentMorpheusInput.model_validate_json(input) @@ -583,7 +583,7 @@ def convert_str_to_exploit_intelligence_input(input: str) -> AgentMorpheusInput: logger.error("Failed to convert input to AgentMorpheusInput: %s. Your input needs to be a json string.", e) raise e - def convert_textio_to_exploit_intelligence_input(input: TextIOWrapper) -> AgentMorpheusInput: + def convert_textio_to_exploit_iq_input(input: TextIOWrapper) -> AgentMorpheusInput: logger.debug("Converting TextIOWrapper input to AgentMorpheusInput") try: data = input.read() @@ -593,7 +593,7 @@ def convert_textio_to_exploit_intelligence_input(input: TextIOWrapper) -> AgentM "Failed to convert input to AgentMorpheusInput: %s. Your input needs to be a TextIOWrapper object.", e) raise e - def convert_exploit_intelligence_output_to_str(output: AgentMorpheusOutput) -> str: + def convert_exploit_iq_output_to_str(output: AgentMorpheusOutput) -> str: logger.debug("Converting AgentMorpheusOutput to JSON string") try: return output.model_dump_json() @@ -612,9 +612,9 @@ async def _response_fn(input_message: AgentMorpheusInput) -> AgentMorpheusOutput description=config.description, input_schema=AgentMorpheusInput, converters=[ - convert_str_to_exploit_intelligence_input, - convert_textio_to_exploit_intelligence_input, - convert_exploit_intelligence_output_to_str + convert_str_to_exploit_iq_input, + convert_textio_to_exploit_iq_input, + convert_exploit_iq_output_to_str ]) except GeneratorExit: logger.info("Workflow exited early!") diff --git a/src/vuln_analysis/utils/function_name_extractor.py b/src/vuln_analysis/utils/function_name_extractor.py index eca13427b..7f955f330 100644 --- a/src/vuln_analysis/utils/function_name_extractor.py +++ b/src/vuln_analysis/utils/function_name_extractor.py @@ -20,7 +20,7 @@ from exploit_iq_commons.logging.loggers_factory import LoggingFactory -logger = LoggingFactory.get_agent_logger(f"exploit-intelligence.{__name__}") +logger = LoggingFactory.get_agent_logger(f"exploit-iq.{__name__}") def traverse_all_parameters(function_ending_index_end, function_prefix_index_end, function_string): diff --git a/src/vuln_analysis/utils/function_name_locator.py b/src/vuln_analysis/utils/function_name_locator.py index 89054202b..08144ad27 100644 --- a/src/vuln_analysis/utils/function_name_locator.py +++ b/src/vuln_analysis/utils/function_name_locator.py @@ -25,7 +25,7 @@ from exploit_iq_commons.utils.source_rpm_downloader import RPMDependencyManager from vuln_analysis.utils.prompt_factory import FL_EXAMPLES -logger = LoggingFactory.get_agent_logger(f"exploit-intelligence.{__name__}") +logger = LoggingFactory.get_agent_logger(f"exploit-iq.{__name__}") class FunctionNameLocator: diff --git a/src/vuln_analysis/utils/llm_engine_utils.py b/src/vuln_analysis/utils/llm_engine_utils.py index 6ec5de30e..2991396f3 100644 --- a/src/vuln_analysis/utils/llm_engine_utils.py +++ b/src/vuln_analysis/utils/llm_engine_utils.py @@ -171,13 +171,13 @@ def _build_full_pipeline_details_md(patch_result: WebPatchResult | dict | None) return result or None -def parse_exploit_intelligence_engine_output(vuln_id: str, - checklist_results: list[dict[str, typing.Any]], - summary: str, - justification: dict[str, str], - intel_score: int, - cvss: dict[str, str] | None, - patch_result: WebPatchResult | dict | None = None) -> AgentMorpheusEngineOutput: +def parse_exploit_iq_engine_output(vuln_id: str, + checklist_results: list[dict[str, typing.Any]], + summary: str, + justification: dict[str, str], + intel_score: int, + cvss: dict[str, str] | None, + patch_result: WebPatchResult | dict | None = None) -> AgentMorpheusEngineOutput: """ Parse the output fields for a single vulnerability into an AgentMorpheusEngineOutput object. """ @@ -362,13 +362,13 @@ def postprocess_engine_output(message: AgentMorpheusEngineInput, justification = result.justifications[vuln_id] is_vulnerable = justification.get("affected_status") == "TRUE" output.append( - parse_exploit_intelligence_engine_output(vuln_id=vuln_id, - checklist_results=result.checklist_results[vuln_id], - summary=result.final_summaries[vuln_id], - justification=justification, - intel_score=intel_map[vuln_id].intel_score, - cvss=result.cvss_results.get(vuln_id, None), - patch_result=result.patch_results.get(vuln_id) if is_vulnerable else None)) + parse_exploit_iq_engine_output(vuln_id=vuln_id, + checklist_results=result.checklist_results[vuln_id], + summary=result.final_summaries[vuln_id], + justification=justification, + intel_score=intel_map[vuln_id].intel_score, + cvss=result.cvss_results.get(vuln_id, None), + patch_result=result.patch_results.get(vuln_id) if is_vulnerable else None)) elif vuln_id in deficient_intel: output.append(build_deficient_intel_output(vuln_id)) elif vuln_id in poor_quality_intel_vul: diff --git a/src/vuln_analysis/utils/vex/implementations/csaf_generator.py b/src/vuln_analysis/utils/vex/implementations/csaf_generator.py index cd2cf42b2..e60598bee 100644 --- a/src/vuln_analysis/utils/vex/implementations/csaf_generator.py +++ b/src/vuln_analysis/utils/vex/implementations/csaf_generator.py @@ -47,9 +47,9 @@ NOTE_TITLE_VULNERABILITY_DESCRIPTION = "Vulnerability description" NOTE_TITLE_VULNERABILITY_SUMMARY = "Vulnerability summary" NOTE_TITLE_RHSA_STATEMENT = "Red Hat Security Advisory Statement" -NOTE_TITLE_EXPLOITIQ_SUMMARY = "RHTPA exploit intelligence Analysis Summary" -NOTE_TITLE_EXPLOITIQ_JUSTIFICATION_REASONING = "RHTPA exploit intelligence Analysis Justification Reasoning" -NOTE_TITLE_EXPLOITIQ_JUSTIFICATION_LABEL = "RHTPA exploit intelligence Analysis Justification Label" +NOTE_TITLE_EXPLOITIQ_SUMMARY = "ExploitIQ Analysis Summary" +NOTE_TITLE_EXPLOITIQ_JUSTIFICATION_REASONING = "ExploitIQ Analysis Justification Reasoning" +NOTE_TITLE_EXPLOITIQ_JUSTIFICATION_LABEL = "ExploitIQ Analysis Justification Label" NOTE_TITLE_UNOFFICIAL_CONTENT = "Unofficial Content Notice" # Disclaimer text @@ -138,7 +138,7 @@ def _enrich_vulnerabilities_with_notes( "title": NOTE_TITLE_RHSA_STATEMENT }) - # Add RHTPA exploit intelligence Analysis Summary + # Add ExploitIQ Analysis Summary summary = final_summaries.get(vuln_id) notes.append({ "category": NOTE_CATEGORY_OTHER, @@ -176,7 +176,7 @@ def generate(self, state: AgentMorpheusEngineState) -> Dict[str, Any]: product_name = message.input.image.name product_tag = message.input.image.tag - csaf_gen.set_header_title(f"RHTPA exploit intelligence VEX Document - {product_name}{"@" if OCI_DIGEST_RE.fullmatch(product_tag) else ":"}{product_tag}") + csaf_gen.set_header_title(f"ExploitIQ VEX Document - {product_name}{"@" if OCI_DIGEST_RE.fullmatch(product_tag) else ":"}{product_tag}") csaf_gen.set_value("notes",[ { diff --git a/src/vuln_analysis/utils/vex/tests/test_csaf_generator_integration.py b/src/vuln_analysis/utils/vex/tests/test_csaf_generator_integration.py index 53f0dfe37..5bca102d3 100644 --- a/src/vuln_analysis/utils/vex/tests/test_csaf_generator_integration.py +++ b/src/vuln_analysis/utils/vex/tests/test_csaf_generator_integration.py @@ -119,7 +119,7 @@ def test_document_has_correct_title(self, mock_state): result = generator.generate(mock_state) title = result["document"].get("title") - assert "RHTPA exploit intelligence VEX Document - " + _DEFAULT_PRODUCT_NAME + ":" + _DEFAULT_PRODUCT_TAG in title + assert "ExploitIQ VEX Document - " + _DEFAULT_PRODUCT_NAME + ":" + _DEFAULT_PRODUCT_TAG in title def test_oci_digest_tag_uses_at_separator(self): """Test that OCI digest tags use @ separator instead of : in title.""" @@ -132,7 +132,7 @@ def test_oci_digest_tag_uses_at_separator(self): result = generator.generate(state) title = result["document"].get("title") - assert "RHTPA exploit intelligence VEX Document - " + _DEFAULT_PRODUCT_NAME + "@" + oci_digest in title + assert "ExploitIQ VEX Document - " + _DEFAULT_PRODUCT_NAME + "@" + oci_digest in title def test_document_has_disclaimer_note(self, mock_state): """Test that document includes the disclaimer note.""" From be688f1118d7355e3327cf99190a0999485a5b54 Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Sun, 28 Jun 2026 03:33:44 +0300 Subject: [PATCH 10/21] Rename Agent morpheus to ExploitIQ --- .tekton/on-cm-runner.yaml | 2 +- .tekton/on-pull-request.yaml | 2 +- .tekton/on-push.yaml | 2 +- .tekton/on-tag.yaml | 2 +- ci/scripts/copyright.py | 2 +- ci/scripts/gitutils.py | 2 +- kustomize/README.md | 2 +- kustomize/base/argilla/argilla-service.yaml | 2 +- .../argilla/argilla-user-feedback-pvc.yaml | 2 +- kustomize/base/argilla/deployment.yaml | 8 +- kustomize/base/argilla/service.yaml | 6 +- kustomize/base/exploit-iq-config.yml | 4 +- kustomize/base/exploit_iq_client.yaml | 2 +- kustomize/base/exploit_iq_service.yaml | 4 +- kustomize/base/kustomization.yaml | 4 +- kustomize/network-policy.yaml | 4 +- .../template-variables.conf.template | 28 ++-- .../data_models/checker_status.py | 2 +- src/exploit_iq_commons/data_models/info.py | 4 +- src/exploit_iq_commons/data_models/input.py | 18 +-- .../utils/document_embedding.py | 2 +- .../utils/source_rpm_downloader.py | 4 +- .../configs/openapi/openapi.json | 50 +++---- .../data/eval_datasets/eval_dataset.json | 3 +- src/vuln_analysis/data_models/output.py | 14 +- src/vuln_analysis/data_models/state.py | 6 +- src/vuln_analysis/functions/cve_agent.py | 6 +- .../functions/cve_build_agent.py | 6 +- .../functions/cve_calculate_intel_score.py | 8 +- .../functions/cve_check_vuln_deps.py | 6 +- .../functions/cve_checker_report.py | 24 ++-- .../functions/cve_checker_segmentation.py | 8 +- src/vuln_analysis/functions/cve_checklist.py | 6 +- .../functions/cve_clone_and_deps.py | 14 +- .../functions/cve_fetch_intel.py | 6 +- .../functions/cve_fetch_patches.py | 6 +- .../functions/cve_file_output.py | 6 +- .../functions/cve_generate_cvss.py | 10 +- .../functions/cve_generate_vdbs.py | 18 +-- .../functions/cve_generate_vex.py | 6 +- .../functions/cve_http_output.py | 12 +- src/vuln_analysis/functions/cve_justify.py | 6 +- .../functions/cve_package_code_agent.py | 6 +- .../functions/cve_process_sbom.py | 18 +-- .../functions/cve_segmentation.py | 12 +- .../functions/cve_source_acquisition.py | 6 +- src/vuln_analysis/functions/cve_summarize.py | 6 +- .../functions/cve_verify_vuln_package.py | 10 +- src/vuln_analysis/register.py | 130 +++++++++--------- src/vuln_analysis/runtime_context.py | 2 +- src/vuln_analysis/tools/serp.py | 4 +- .../tools/tests/test_concurrency.py | 6 +- .../tests/test_transitive_code_search.py | 26 ++-- .../tools/transitive_code_search.py | 4 +- .../utils/function_name_locator.py | 4 +- src/vuln_analysis/utils/llm_engine_utils.py | 78 +++++------ src/vuln_analysis/utils/output_formatter.py | 38 ++--- src/vuln_analysis/utils/serp_api_wrapper.py | 6 +- .../vex/implementations/csaf_generator.py | 7 +- .../tests/test_csaf_generator_integration.py | 24 ++-- .../utils/vex/vex_generator_base.py | 4 +- tests/test_serp_api_key_rotation.py | 20 +-- 62 files changed, 371 insertions(+), 369 deletions(-) diff --git a/.tekton/on-cm-runner.yaml b/.tekton/on-cm-runner.yaml index 439d6114e..87a109717 100644 --- a/.tekton/on-cm-runner.yaml +++ b/.tekton/on-cm-runner.yaml @@ -26,7 +26,7 @@ spec: value: "{{ trigger_comment }}" # Point to the image ALREADY built by the PR pipeline - name: target-image - value: quay.io/ecosystem-appeng/agent-morpheus-rh:on-pr-{{revision}} + value: quay.io/ecosystem-appeng/exploit-iq-agent:on-pr-{{revision}} pipelineSpec: params: diff --git a/.tekton/on-pull-request.yaml b/.tekton/on-pull-request.yaml index e72258311..d2e3b8340 100644 --- a/.tekton/on-pull-request.yaml +++ b/.tekton/on-pull-request.yaml @@ -33,7 +33,7 @@ spec: - name: image-expires-after value: 5d - name: output-image - value: quay.io/ecosystem-appeng/agent-morpheus-rh:on-pr-{{revision}} + value: quay.io/ecosystem-appeng/exploit-iq-agent:on-pr-{{revision}} - name: path-context value: . - name: dockerfile diff --git a/.tekton/on-push.yaml b/.tekton/on-push.yaml index 5da2cc106..71316d17f 100644 --- a/.tekton/on-push.yaml +++ b/.tekton/on-push.yaml @@ -26,7 +26,7 @@ spec: - name: revision value: "{{ revision }}" - name: output-image - value: quay.io/ecosystem-appeng/agent-morpheus-rh:latest + value: quay.io/ecosystem-appeng/exploit-iq-agent:latest - name: path-context value: . - name: dockerfile diff --git a/.tekton/on-tag.yaml b/.tekton/on-tag.yaml index 08718fd32..c8721f1c1 100644 --- a/.tekton/on-tag.yaml +++ b/.tekton/on-tag.yaml @@ -26,7 +26,7 @@ spec: - name: revision value: "{{ revision }}" - name: output-image - value: 'quay.io/ecosystem-appeng/agent-morpheus-rh' + value: 'quay.io/ecosystem-appeng/exploit-iq-agent' - name: tag-name value: "{{ target_branch }}" - name: path-context diff --git a/ci/scripts/copyright.py b/ci/scripts/copyright.py index 56e7b7ab1..3a49e62a4 100755 --- a/ci/scripts/copyright.py +++ b/ci/scripts/copyright.py @@ -285,7 +285,7 @@ def _main(): repo, this script will just look for uncommitted files and in case of CI it compares between branches "$PR_TARGET_BRANCH" and "current-pr-branch" """ - log_level = logging.getLevelName(os.environ.get("MORPHEUS_LOG_LEVEL", "INFO")) + log_level = logging.getLevelName(os.environ.get("EXPLOIT_IQ_LOG_LEVEL", "INFO")) logging.basicConfig(format="%(levelname)s:%(message)s", level=log_level) ret_val = 0 diff --git a/ci/scripts/gitutils.py b/ci/scripts/gitutils.py index edfb44056..7415af369 100755 --- a/ci/scripts/gitutils.py +++ b/ci/scripts/gitutils.py @@ -632,7 +632,7 @@ def _parse_args(): def _main(): - log_level = logging.getLevelName(os.environ.get("MORPHEUS_LOG_LEVEL", "INFO")) + log_level = logging.getLevelName(os.environ.get("EXPLOIT_IQ_LOG_LEVEL", "INFO")) logging.basicConfig(format="%(levelname)s:%(message)s", level=log_level) args = _parse_args() diff --git a/kustomize/README.md b/kustomize/README.md index 8282be9c5..93de93d7b 100644 --- a/kustomize/README.md +++ b/kustomize/README.md @@ -503,7 +503,7 @@ To restrict MCP access to specific groups, edit the `EXPLOITIQ_ALLOWED_GROUPS` v 11. Deploy Self hosted LLM + nim Embedding model for the automation tests ( Integration tests and Confusion matrix runner): ```shell -helm upgrade --install --set nim_embed.enabled=false --set llama3_1_70b_instruct_4bit.storageClass.name=gp3-csi-throughput-2000 --set llama3_1_70b_instruct_4bit.readinessProbe.initialDelaySeconds=25 --set llama3_1_70b_instruct_4bit.readinessProbe.periodSeconds=10 --set global.tolerationsKey=p4d-gpu --set nim-embed.ngcSecret.apiKey=your_nvidia_ngc_api_key exploit-iq-tests ../../../exploit-iq-models/agent-morpheus-models +helm upgrade --install --set nim_embed.enabled=false --set llama3_1_70b_instruct_4bit.storageClass.name=gp3-csi-throughput-2000 --set llama3_1_70b_instruct_4bit.readinessProbe.initialDelaySeconds=25 --set llama3_1_70b_instruct_4bit.readinessProbe.periodSeconds=10 --set global.tolerationsKey=p4d-gpu --set nim-embed.ngcSecret.apiKey=your_nvidia_ngc_api_key exploit-iq-tests ../../../exploit-iq-models/exploit-iq-models ``` 12. Remove untracked decrypted secrets files diff --git a/kustomize/base/argilla/argilla-service.yaml b/kustomize/base/argilla/argilla-service.yaml index cc9f2840b..3c7f7d6ed 100644 --- a/kustomize/base/argilla/argilla-service.yaml +++ b/kustomize/base/argilla/argilla-service.yaml @@ -6,7 +6,7 @@ metadata: app: argilla spec: selector: - app: morpheus-feedback-api + app: exploit-iq-feedback-api ports: - protocol: TCP port: 6900 diff --git a/kustomize/base/argilla/argilla-user-feedback-pvc.yaml b/kustomize/base/argilla/argilla-user-feedback-pvc.yaml index 8a730ef7c..56d4479b8 100644 --- a/kustomize/base/argilla/argilla-user-feedback-pvc.yaml +++ b/kustomize/base/argilla/argilla-user-feedback-pvc.yaml @@ -4,7 +4,7 @@ kind: PersistentVolumeClaim metadata: name: argilla-user-feedback-pvc labels: - app: morpheus-feedback-api + app: exploit-iq-feedback-api spec: accessModes: - ReadWriteOnce diff --git a/kustomize/base/argilla/deployment.yaml b/kustomize/base/argilla/deployment.yaml index 0e1c6601d..08b1e1970 100644 --- a/kustomize/base/argilla/deployment.yaml +++ b/kustomize/base/argilla/deployment.yaml @@ -1,20 +1,20 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: morpheus-feedback-api + name: exploit-iq-feedback-api labels: - app: morpheus-feedback-api + app: exploit-iq-feedback-api spec: replicas: 1 selector: matchLabels: - app: morpheus-feedback-api + app: exploit-iq-feedback-api strategy: type: Recreate template: metadata: labels: - app: morpheus-feedback-api + app: exploit-iq-feedback-api spec: restartPolicy: Always imagePullSecrets: diff --git a/kustomize/base/argilla/service.yaml b/kustomize/base/argilla/service.yaml index 545316330..a85c00bdf 100644 --- a/kustomize/base/argilla/service.yaml +++ b/kustomize/base/argilla/service.yaml @@ -1,12 +1,12 @@ apiVersion: v1 kind: Service metadata: - name: morpheus-feedback-api + name: exploit-iq-feedback-api labels: - app: morpheus-feedback-api + app: exploit-iq-feedback-api spec: selector: - app: morpheus-feedback-api + app: exploit-iq-feedback-api ports: - protocol: TCP port: 5001 diff --git a/kustomize/base/exploit-iq-config.yml b/kustomize/base/exploit-iq-config.yml index 819084623..b75e69a4c 100644 --- a/kustomize/base/exploit-iq-config.yml +++ b/kustomize/base/exploit-iq-config.yml @@ -52,7 +52,7 @@ functions: plugin_name: vuln_analysis.data_models.plugins.intel_plugin.SimpleHttpIntelPlugin plugin_config: source: Product Security research - endpoint: CALLBACK_URL_PLACEHOLDER/api/v1/vulnerabilities/{vuln_id}/comments + endpoint: https://exploit-iq-client.tw-iq.svc:8443/api/v1/vulnerabilities/{vuln_id}/comments token_path: /var/run/secrets/kubernetes.io/serviceaccount/token verify_path: /app/certs/service-ca.crt @@ -145,7 +145,7 @@ functions: # vex_format: csaf cve_http_output: _type: cve_http_output - url: CALLBACK_URL_PLACEHOLDER + url: https://exploit-iq-client.tw-iq.svc:8443 endpoint: /api/v1/reports auth_type: bearer token_path: /var/run/secrets/kubernetes.io/serviceaccount/token diff --git a/kustomize/base/exploit_iq_client.yaml b/kustomize/base/exploit_iq_client.yaml index 51ebf53d1..d29bbe30d 100644 --- a/kustomize/base/exploit_iq_client.yaml +++ b/kustomize/base/exploit_iq_client.yaml @@ -27,7 +27,7 @@ spec: - ./application - -Dquarkus.http.host=0.0.0.0 - -Dquarkus.log.category."com.redhat.ecosystemappeng.exploitiq".level=DEBUG - image: quay.io/ecosystem-appeng/agent-morpheus-client:latest + image: exploit-iq-test-imagequay.io/ecosystem-appeng/exploit-iq-client:latest imagePullPolicy: Always ports: - name: http diff --git a/kustomize/base/exploit_iq_service.yaml b/kustomize/base/exploit_iq_service.yaml index 2f99c7411..46c36f43f 100644 --- a/kustomize/base/exploit_iq_service.yaml +++ b/kustomize/base/exploit_iq_service.yaml @@ -25,7 +25,7 @@ spec: serviceAccountName: exploit-iq-sa containers: - name: exploit-iq-phoenix-tracing - image: quay.io/ecosystem-appeng/agent-morpheus-rh:nat + image: quay.io/ecosystem-appeng/exploit-iq-agent:nat imagePullPolicy: Always workingDir: /workspace/ args: @@ -45,7 +45,7 @@ spec: memory: "1Gi" cpu: "100m" - name: exploit-iq - image: quay.io/ecosystem-appeng/agent-morpheus-rh:nat + image: quay.io/ecosystem-appeng/exploit-iq-agent:nat imagePullPolicy: Always workingDir: /workspace/ args: diff --git a/kustomize/base/kustomization.yaml b/kustomize/base/kustomization.yaml index 16abd4d8e..e6f1f6930 100644 --- a/kustomize/base/kustomization.yaml +++ b/kustomize/base/kustomization.yaml @@ -94,10 +94,10 @@ patches: kind: Deployment images: - - name: quay.io/ecosystem-appeng/agent-morpheus-rh + - name: quay.io/ecosystem-appeng/exploit-iq-agent newTag: latest - - name: quay.io/ecosystem-appeng/agent-morpheus-client + - name: quay.io/ecosystem-appeng/exploit-iq-client newTag: latest - name: quay.io/ecosystem-appeng/exploitiq-mcp-server diff --git a/kustomize/network-policy.yaml b/kustomize/network-policy.yaml index 1b3080632..3e16c3b0d 100644 --- a/kustomize/network-policy.yaml +++ b/kustomize/network-policy.yaml @@ -3,14 +3,14 @@ kind: NetworkPolicy metadata: labels: networking.knative.dev/ingress-provider: istio - name: allow-from-agent-exploit-iq-namespaces + name: allow-from-exploit-iq-namespaces namespace: exploit-iq-cn-models-nim spec: ingress: - from: - namespaceSelector: matchLabels: - application: agent-exploit-iq + application: exploit-iq podSelector: {} policyTypes: - Ingress diff --git a/nginx/templates/variables/template-variables.conf.template b/nginx/templates/variables/template-variables.conf.template index e2f122c52..ed6656889 100644 --- a/nginx/templates/variables/template-variables.conf.template +++ b/nginx/templates/variables/template-variables.conf.template @@ -1,31 +1,31 @@ map $http_authorization $nemo_http_authorization { - 'Bearer AGENT_MORPHEUS' 'Bearer ${NGC_API_KEY}'; - 'Bearer "AGENT_MORPHEUS"' 'Bearer ${NGC_API_KEY}'; + 'Bearer EXPLOIT_IQ' 'Bearer ${NGC_API_KEY}'; + 'Bearer "EXPLOIT_IQ"' 'Bearer ${NGC_API_KEY}'; 'Bearer CYBER_DEV_DAY' 'Bearer ${NGC_API_KEY}'; 'Bearer "CYBER_DEV_DAY"' 'Bearer ${NGC_API_KEY}'; default $http_authorization; } map $http_organization_id $nemo_http_organization_id { - 'AGENT_MORPHEUS' '${NGC_ORG_ID}'; - '"AGENT_MORPHEUS"' '${NGC_ORG_ID}'; + 'EXPLOIT_IQ' '${NGC_ORG_ID}'; + '"EXPLOIT_IQ"' '${NGC_ORG_ID}'; 'CYBER_DEV_DAY' '${NGC_ORG_ID}'; '"CYBER_DEV_DAY"' '${NGC_ORG_ID}'; default $http_organization_id; } map $http_authorization $nim_http_authorization { - 'Bearer AGENT_MORPHEUS' 'Bearer ${NVIDIA_API_KEY}'; - 'Bearer "AGENT_MORPHEUS"' 'Bearer ${NVIDIA_API_KEY}'; + 'Bearer EXPLOIT_IQ' 'Bearer ${NVIDIA_API_KEY}'; + 'Bearer "EXPLOIT_IQ"' 'Bearer ${NVIDIA_API_KEY}'; 'Bearer CYBER_DEV_DAY' '${NVIDIA_API_KEY}'; 'Bearer "CYBER_DEV_DAY"' '${NVIDIA_API_KEY}'; default $http_authorization; } map $http_authorization $nvai_http_authorization { - 'Bearer AGENT_MORPHEUS' 'Bearer ${NVIDIA_API_KEY}'; - 'Bearer nvapi-AGENT_MORPHEUS' 'Bearer ${NVIDIA_API_KEY}'; - 'Bearer "nvapi-AGENT_MORPHEUS"' 'Bearer ${NVIDIA_API_KEY}'; + 'Bearer EXPLOIT_IQ' 'Bearer ${NVIDIA_API_KEY}'; + 'Bearer nvapi-EXPLOIT_IQ' 'Bearer ${NVIDIA_API_KEY}'; + 'Bearer "nvapi-EXPLOIT_IQ"' 'Bearer ${NVIDIA_API_KEY}'; 'Bearer CYBER_DEV_DAY' 'Bearer ${NVIDIA_API_KEY}'; 'Bearer nvapi-CYBER_DEV_DAY' 'Bearer ${NVIDIA_API_KEY}'; 'Bearer "nvapi-CYBER_DEV_DAY"' 'Bearer ${NVIDIA_API_KEY}'; @@ -33,22 +33,22 @@ map $http_authorization $nvai_http_authorization { } map $http_authorization $openai_http_authorization { - 'Bearer AGENT_MORPHEUS' 'Bearer ${OPENAI_API_KEY}'; - 'Bearer "AGENT_MORPHEUS"' 'Bearer ${OPENAI_API_KEY}'; + 'Bearer EXPLOIT_IQ' 'Bearer ${OPENAI_API_KEY}'; + 'Bearer "EXPLOIT_IQ"' 'Bearer ${OPENAI_API_KEY}'; 'Bearer CYBER_DEV_DAY' 'Bearer ${OPENAI_API_KEY}'; 'Bearer "CYBER_DEV_DAY"' 'Bearer ${OPENAI_API_KEY}'; default $http_authorization; } map $http_authorization $ghsa_http_authorization { - 'Bearer AGENT_MORPHEUS' 'Bearer ${GHSA_API_KEY}'; - 'Bearer "AGENT_MORPHEUS"' 'Bearer ${GHSA_API_KEY}'; + 'Bearer EXPLOIT_IQ' 'Bearer ${GHSA_API_KEY}'; + 'Bearer "EXPLOIT_IQ"' 'Bearer ${GHSA_API_KEY}'; 'Bearer CYBER_DEV_DAY' 'Bearer ${GHSA_API_KEY}'; 'Bearer "CYBER_DEV_DAY"' 'Bearer ${GHSA_API_KEY}'; default $http_authorization; } map $http_apikey $nvd_http_api_key { - 'AGENT_MORPHEUS' '${NVD_API_KEY}'; + 'EXPLOIT_IQ' '${NVD_API_KEY}'; default $http_apikey; } diff --git a/src/exploit_iq_commons/data_models/checker_status.py b/src/exploit_iq_commons/data_models/checker_status.py index 4047d02ea..fd4dccc3f 100644 --- a/src/exploit_iq_commons/data_models/checker_status.py +++ b/src/exploit_iq_commons/data_models/checker_status.py @@ -298,7 +298,7 @@ class L2BuildResult(BaseModel): class PackageCheckerContext(BaseModel): - """Consolidates all checker-specific state on AgentMorpheusInfo.""" + """Consolidates all checker-specific state on ExploitIqInfo.""" status: PackageCheckerStatus | None = None source_key: str | None = None artifacts: AcquiredArtifacts = Field(default_factory=AcquiredArtifacts) diff --git a/src/exploit_iq_commons/data_models/info.py b/src/exploit_iq_commons/data_models/info.py index 4f7bd1ef1..09d9f9876 100644 --- a/src/exploit_iq_commons/data_models/info.py +++ b/src/exploit_iq_commons/data_models/info.py @@ -30,9 +30,9 @@ class SBOMPackage(BaseModel): system: str -class AgentMorpheusInfo(BaseModel): +class ExploitIqInfo(BaseModel): """ - Information used for decisioning in the Agent Morpheus engine. These information can all be automatically + Information used for decisioning in the ExploitIQ engine. These information can all be automatically generated or retrieved by the pipeline from the input information. - vdb: paths to source code and documentation vector databases (VDBs) used to understand whether a vulnerability diff --git a/src/exploit_iq_commons/data_models/input.py b/src/exploit_iq_commons/data_models/input.py index b15e2f09a..720f5bc54 100644 --- a/src/exploit_iq_commons/data_models/input.py +++ b/src/exploit_iq_commons/data_models/input.py @@ -34,7 +34,7 @@ from exploit_iq_commons.data_models.common import HashableModel from exploit_iq_commons.data_models.common import PipelineMode , TargetPackage from exploit_iq_commons.data_models.common import TypedBaseModel -from exploit_iq_commons.data_models.info import AgentMorpheusInfo +from exploit_iq_commons.data_models.info import ExploitIqInfo from exploit_iq_commons.data_models.info import SBOMPackage DEFAULT_FAILURE_REASON = "No failure reason provided" @@ -206,9 +206,9 @@ def check_conflicting_refs(cls, source_info: list[SourceDocumentsInfo]) -> list[ return source_info -class AgentMorpheusInput(HashableModel): +class ExploitIqInput(HashableModel): """ - Inputs required by the Agent Morpheus pipeline. + Inputs required by the ExploitIQ pipeline. """ scan: ScanInfoInput image: ImageInfoInput @@ -220,12 +220,12 @@ class AgentMorpheusInput(HashableModel): failure_reason: str | None = Field(default=DEFAULT_FAILURE_REASON) -class AgentMorpheusEngineInput(BaseModel): +class ExploitIqEngineInput(BaseModel): """ - Inputs required by the Agent Morpheus Engine. + Inputs required by the ExploitIQ Engine. - - input: AgentMorpheusInput object that must be provided to the pipeline. - - info: AgentMorpheusInfo object that is retrieved/generated by the pipeline. + - input: ExploitIqInput object that must be provided to the pipeline. + - info: ExploitIqInfo object that is retrieved/generated by the pipeline. """ - input: AgentMorpheusInput - info: AgentMorpheusInfo + input: ExploitIqInput + info: ExploitIqInfo diff --git a/src/exploit_iq_commons/utils/document_embedding.py b/src/exploit_iq_commons/utils/document_embedding.py index ad805cd1f..aee69a23a 100644 --- a/src/exploit_iq_commons/utils/document_embedding.py +++ b/src/exploit_iq_commons/utils/document_embedding.py @@ -692,7 +692,7 @@ def build_vdbs(self, # Determine the output path by combining the vdb_directory with the hash of the source documents vdb_output_dir = self.vdb_directory / source_type / str(self.hash_source_documents_info(source_infos)) - if (not vdb_output_dir.exists() or os.environ.get("MORPHEUS_ALWAYS_REBUILD_VDB", "0") == "1"): + if (not vdb_output_dir.exists() or os.environ.get("EXPLOIT_IQ_ALWAYS_REBUILD_VDB", "0") == "1"): vdb = self.create_vdb(source_infos=source_infos, output_path=vdb_output_dir) else: logger.info("Cache hit on VDB. Loading existing FAISS database: %s", vdb_output_dir) diff --git a/src/exploit_iq_commons/utils/source_rpm_downloader.py b/src/exploit_iq_commons/utils/source_rpm_downloader.py index 01fa0a240..cf8a85c9b 100644 --- a/src/exploit_iq_commons/utils/source_rpm_downloader.py +++ b/src/exploit_iq_commons/utils/source_rpm_downloader.py @@ -113,7 +113,7 @@ def __init__(self): RepoUrl(url="https://mirror.stream.centos.org/12-stream/AppStream/source/tree/", platform="el12") ] # Default RPM cache directory - self.rpm_cache_dir = "/morpheus-data/rpms/" + self.rpm_cache_dir = "/exploit-iq-data/rpms/" # Unit Test mode toggle (default disabled) self._unit_test_mode: bool = False self._initialized = True @@ -138,7 +138,7 @@ def set_rpm_cache_dir(self, cache_dir: str): except Exception as e: logger.error("Failed to create RPM cache directory %s: %s", cache_dir, e) # Fall back to default directory if creation fails - self.rpm_cache_dir = "/morpheus-data/rpms/" + self.rpm_cache_dir = "/exploit-iq-data/rpms/" logger.warning("Falling back to default cache directory: %s", self.rpm_cache_dir) else: logger.info("RPM cache directory updated to: %s", cache_dir) diff --git a/src/vuln_analysis/configs/openapi/openapi.json b/src/vuln_analysis/configs/openapi/openapi.json index e0cdb1ad2..efba2b9ed 100644 --- a/src/vuln_analysis/configs/openapi/openapi.json +++ b/src/vuln_analysis/configs/openapi/openapi.json @@ -14,7 +14,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/AgentMorpheusInput-Input" + "$ref": "#/components/schemas/ExploitIqInput-Input" } } }, @@ -26,7 +26,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/AgentMorpheusOutput" + "$ref": "#/components/schemas/ExploitIqOutput" } } } @@ -63,7 +63,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/AgentMorpheusInput-Input" + "$ref": "#/components/schemas/ExploitIqInput-Input" } } }, @@ -75,7 +75,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/AgentMorpheusOutput" + "$ref": "#/components/schemas/ExploitIqOutput" } } } @@ -131,7 +131,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/AgentMorpheusInput-Input" + "$ref": "#/components/schemas/ExploitIqInput-Input" } } } @@ -142,7 +142,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/AgentMorpheusOutput" + "$ref": "#/components/schemas/ExploitIqOutput" } } } @@ -943,7 +943,7 @@ "title": "AgentIntermediateStep", "description": "Represents info for an intermediate step taken by an agent." }, - "AgentMorpheusEngineOutput": { + "ExploitIqEngineOutput": { "properties": { "vuln_id": { "type": "string", @@ -987,10 +987,10 @@ "intel_score", "cvss" ], - "title": "AgentMorpheusEngineOutput", - "description": "Contains all output generated by the main Agent Morpheus LLM Engine for a given vulnerability.\n\n- vuln_id: the ID of the vulnerability being processed by the LLM engine.\n- checklist: a list of ChecklistItemOutput objects, each containing an input and a response from the LLM agent.\n- summary: a short summary of the checklist inputs and responses, generated by an LLM.\n- justification: a JustificationOutput object containing details of the model's justification decision.\n- cvss: a CVSSOutput object containing the CVSS score and vector string for the vulnerability." + "title": "ExploitIqEngineOutput", + "description": "Contains all output generated by the main ExploitIQ LLM Engine for a given vulnerability.\n\n- vuln_id: the ID of the vulnerability being processed by the LLM engine.\n- checklist: a list of ChecklistItemOutput objects, each containing an input and a response from the LLM agent.\n- summary: a short summary of the checklist inputs and responses, generated by an LLM.\n- justification: a JustificationOutput object containing details of the model's justification decision.\n- cvss: a CVSSOutput object containing the CVSS score and vector string for the vulnerability." }, - "AgentMorpheusInfo": { + "ExploitIqInfo": { "properties": { "vdb": { "anyOf": [ @@ -1052,10 +1052,10 @@ } }, "type": "object", - "title": "AgentMorpheusInfo", - "description": "Information used for decisioning in the Agent Morpheus engine. These information can all be automatically\ngenerated or retrieved by the pipeline from the input information.\n\n- vdb: paths to source code and documentation vector databases (VDBs) used to understand whether a vulnerability\n is exploitable in the source code.\n- intel: list of CveIntel objects representing intelligence for each vulnerability pulled from various vulnerability\n databases and APIs.\n- sbom: software bill of materials listing the packages and versions in the container image, used to understand\n whether the vulnerable package exists in the image.\n- vulnerable_dependencies: a list of VulnerableDependencies objects for each vuln_id, representing the SBOM packages\n and transitive dependencies that are vulnerable for the vuln_id." + "title": "ExploitIqInfo", + "description": "Information used for decisioning in the ExploitIQ engine. These information can all be automatically\ngenerated or retrieved by the pipeline from the input information.\n\n- vdb: paths to source code and documentation vector databases (VDBs) used to understand whether a vulnerability\n is exploitable in the source code.\n- intel: list of CveIntel objects representing intelligence for each vulnerability pulled from various vulnerability\n databases and APIs.\n- sbom: software bill of materials listing the packages and versions in the container image, used to understand\n whether the vulnerable package exists in the image.\n- vulnerable_dependencies: a list of VulnerableDependencies objects for each vuln_id, representing the SBOM packages\n and transitive dependencies that are vulnerable for the vuln_id." }, - "AgentMorpheusInput-Input": { + "ExploitIqInput-Input": { "properties": { "scan": { "$ref": "#/components/schemas/ScanInfoInput" @@ -1103,10 +1103,10 @@ "scan", "image" ], - "title": "AgentMorpheusInput", - "description": "Inputs required by the Agent Morpheus pipeline." + "title": "ExploitIqInput", + "description": "Inputs required by the ExploitIQ pipeline." }, - "AgentMorpheusInput-Output": { + "ExploitIqInput-Output": { "properties": { "scan": { "$ref": "#/components/schemas/ScanInfoInput" @@ -1154,16 +1154,16 @@ "scan", "image" ], - "title": "AgentMorpheusInput", - "description": "Inputs required by the Agent Morpheus pipeline." + "title": "ExploitIqInput", + "description": "Inputs required by the ExploitIQ pipeline." }, - "AgentMorpheusOutput": { + "ExploitIqOutput": { "properties": { "input": { - "$ref": "#/components/schemas/AgentMorpheusInput-Output" + "$ref": "#/components/schemas/ExploitIqInput-Output" }, "info": { - "$ref": "#/components/schemas/AgentMorpheusInfo" + "$ref": "#/components/schemas/ExploitIqInfo" }, "output": { "$ref": "#/components/schemas/OutputPayload" @@ -1175,8 +1175,8 @@ "info", "output" ], - "title": "AgentMorpheusOutput", - "description": "\"\nThe final output of the Agent Morpheus pipeline.\nContains all fields in the AgentMorpheusEngineInput, plus the AgentMorpheusEngineOuput for each input vulnerability." + "title": "ExploitIqOutput", + "description": "\"\nThe final output of the ExploitIQ pipeline.\nContains all fields in the ExploitIqEngineInput, plus the ExploitIQEngineOuput for each input vulnerability." }, "AnalysisType": { "type": "string", @@ -3775,7 +3775,7 @@ "properties": { "analysis": { "items": { - "$ref": "#/components/schemas/AgentMorpheusEngineOutput" + "$ref": "#/components/schemas/ExploitIqEngineOutput" }, "type": "array", "title": "Analysis" @@ -3854,7 +3854,7 @@ }, "type": "object", "title": "PackageCheckerContext", - "description": "Consolidates all checker-specific state on AgentMorpheusInfo." + "description": "Consolidates all checker-specific state on ExploitIqInfo." }, "PackageCheckerStatus": { "type": "integer", diff --git a/src/vuln_analysis/data/eval_datasets/eval_dataset.json b/src/vuln_analysis/data/eval_datasets/eval_dataset.json index 1f4597f4e..5cf98cd54 100644 --- a/src/vuln_analysis/data/eval_datasets/eval_dataset.json +++ b/src/vuln_analysis/data/eval_datasets/eval_dataset.json @@ -1,7 +1,8 @@ [ { "id": 1, - "question": "{\"image\":{\"name\":\"nvcr.io\/nvidia\/morpheus\/morpheus\",\"tag\":\"23.11-runtime\",\"source_info\":[{\"type\":\"code\",\"git_repo\":\"https:\/\/github.com\/nv-morpheus\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.cpp\",\"**\/*.cu\",\"**\/*.cuh\",\"**\/*.h\",\"**\/*.hpp\",\"**\/*.ipynb\",\"**\/*.py\",\"**\/*Dockerfile\"],\"exclude\":[\"tests\/**\/*\"]},{\"type\":\"doc\",\"git_repo\":\"https:\/\/github.com\/nv-morpheus\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.md\",\"docs\/**\/*.rst\"]}],\"sbom_info\":{\"_type\":\"file\",\"file_path\":\"data\/sboms\/nvcr.io\/nvidia\/morpheus\/morpheus:v23.11.01-runtime.sbom\"}},\"scan\":{\"vulns\":[{\"vuln_id\":\"GHSA-3f63-hfp8-52jq\"},{\"vuln_id\":\"CVE-2023-50782\"}]}}", + "question": "{\"image\":{\"name\":\"nvcr.io\/nvidia\/morpheus\/morpheus\",\"tag\":\"23.11-runtime\",\"source_info\":[{\"type\":\"code\",\"git_repo\":\"https:\/\/github.com\/nv-\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.cpp\",\"**\/*.cu\",\"**\/*.cuh\",\"**\/*.h\",\"**\/*.hpp\",\"**\/*.ipynb\",\"**\/*.py\",\"**\/*Dockerfile\"],\"exclude\":[\"tests\/**\/*\"]},{\"type\":\"doc\",\"git_repo\":\"https:\/\/github.com\/nv-morpheus\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.md\",\"docs\/**\/*.rst\"]}],\"sbom_info\":{\"_type\":\"file\",\"file_path\":\"data\/sboms\/nvcr.io\/nvidia\/morpheus\/morpheus:v23.11.01-runtime.sbom\"}},\"scan\":{\"vulns\":[{\"vuln_id\":\"GHSA-3f63-hfp8-52jq\"},{\"vuln_id\":\"CVE-2023-50782\"}]}}", + "answer": "N/A" } ] diff --git a/src/vuln_analysis/data_models/output.py b/src/vuln_analysis/data_models/output.py index b182c9fa2..76f52ce97 100644 --- a/src/vuln_analysis/data_models/output.py +++ b/src/vuln_analysis/data_models/output.py @@ -19,7 +19,7 @@ from pydantic import Field from pydantic import model_validator -from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput +from exploit_iq_commons.data_models.input import ExploitIqEngineInput class FailureReport(BaseModel): @@ -74,9 +74,9 @@ class JustificationOutput(BaseModel): status: typing.Literal["TRUE", "FALSE", "UNKNOWN"] -class AgentMorpheusEngineOutput(BaseModel): +class ExploitIqEngineOutput(BaseModel): """ - Contains all output generated by the main Agent Morpheus LLM Engine for a given vulnerability. + ExploitIqEngineOutputEngine for a given vulnerability. - vuln_id: the ID of the vulnerability being processed by the LLM engine. - checklist: a list of ChecklistItemOutput objects, each containing an input and a response from the LLM agent. @@ -99,14 +99,14 @@ class OutputPayload(BaseModel): - analysis: per-vulnerability analysis results - vex: the vulnerability exploitability exchange document JSON """ - analysis: list[AgentMorpheusEngineOutput] + analysis: list[ExploitIqEngineOutput] vex: dict[str, typing.Any] | None -class AgentMorpheusOutput(AgentMorpheusEngineInput): +class ExploitIqOutput(ExploitIqEngineInput): """" - The final output of the Agent Morpheus pipeline. - Contains all fields in the AgentMorpheusEngineInput, plus the AgentMorpheusEngineOuput for each input vulnerability. + The final output of the ExploitIQ pipeline. + Contains all fields in the ExploitIqEngineInput, plus the ExploitIqEngineOutput for each input vulnerability. """ output: OutputPayload diff --git a/src/vuln_analysis/data_models/state.py b/src/vuln_analysis/data_models/state.py index 9d147cbb6..5264f9a69 100644 --- a/src/vuln_analysis/data_models/state.py +++ b/src/vuln_analysis/data_models/state.py @@ -18,16 +18,16 @@ from pydantic import BaseModel from exploit_iq_commons.data_models.cve_intel import CveIntel -from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput +from exploit_iq_commons.data_models.input import ExploitIqEngineInput -class AgentMorpheusEngineState(BaseModel): +class ExploitIqEngineState(BaseModel): code_vdb_path: str | None = None doc_vdb_path: str | None = None code_index_path: str | None = None cve_intel: list[CveIntel] transitive_code_searcher: typing.Any | None = None - original_input: AgentMorpheusEngineInput | None = None + original_input: ExploitIqEngineInput | None = None checklist_plans: dict[str, list[str]] = {} checklist_results: dict[str, list[dict[str, typing.Any]]] = {} final_summaries: dict[str, str] = {} diff --git a/src/vuln_analysis/functions/cve_agent.py b/src/vuln_analysis/functions/cve_agent.py index ac2a1d00b..85587139e 100644 --- a/src/vuln_analysis/functions/cve_agent.py +++ b/src/vuln_analysis/functions/cve_agent.py @@ -25,7 +25,7 @@ from langchain_core.messages import HumanMessage from pydantic import Field -from vuln_analysis.data_models.state import AgentMorpheusEngineState +from vuln_analysis.data_models.state import ExploitIqEngineState from vuln_analysis.utils.error_handling_decorator import ToolRaisedException from vuln_analysis.utils.intel_utils import build_critical_context, enrich_go_candidates, enrich_vulnerable_functions_from_patch from vuln_analysis.runtime_context import ctx_state @@ -223,7 +223,7 @@ def _postprocess_results(results: list[tuple], replace_exceptions: bool, replace async def cve_agent(config: CVEAgentExecutorToolConfig, builder: Builder): semaphore = asyncio.Semaphore(config.max_concurrency) if config.max_concurrency else None - async def _arun(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: + async def _arun(state: ExploitIqEngineState) -> ExploitIqEngineState: trace_id.set(state.original_input.input.scan.id) state.uber_jar_file_threshold = config.uber_jar_file_threshold ctx_state.set(state) @@ -274,6 +274,6 @@ async def _arun(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: yield FunctionInfo.from_fn( _arun, - input_schema=AgentMorpheusEngineState, + input_schema=ExploitIqEngineState, description=("Executes provided checklist of tasks mapped to flagged CVEs to investigate the " "exploitability of a software container by the flagged CVEs.")) diff --git a/src/vuln_analysis/functions/cve_build_agent.py b/src/vuln_analysis/functions/cve_build_agent.py index dfca3dce4..86b55a52f 100644 --- a/src/vuln_analysis/functions/cve_build_agent.py +++ b/src/vuln_analysis/functions/cve_build_agent.py @@ -39,7 +39,7 @@ from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, RemoveMessage from nat.builder.context import Context -from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput +from exploit_iq_commons.data_models.input import ExploitIqEngineInput from vuln_analysis.functions.react_internals import ( CheckerThought, @@ -127,7 +127,7 @@ class L2InvestigationPhase(StrEnum): async def create_graph_build_agent( config: CVEBuildAgentConfig, builder: Builder, - state: AgentMorpheusEngineInput, + state: ExploitIqEngineInput, tracer, ): """Build the L2 Build Agent LangGraph. @@ -723,7 +723,7 @@ async def investigation_phase_node(state: BuildAgentState) -> dict: async def cve_build_agent(config: CVEBuildAgentConfig, builder: Builder): """Level 2 Build Agent entry point.""" - async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def _arun(message: ExploitIqEngineInput) -> ExploitIqEngineInput: """Run L2 build analysis and populate l2_result on checker_context.""" trace_id.set(message.input.scan.id) tracer = Context.get() diff --git a/src/vuln_analysis/functions/cve_calculate_intel_score.py b/src/vuln_analysis/functions/cve_calculate_intel_score.py index 5f562e4d2..f060efaf0 100644 --- a/src/vuln_analysis/functions/cve_calculate_intel_score.py +++ b/src/vuln_analysis/functions/cve_calculate_intel_score.py @@ -31,17 +31,17 @@ class CVECalculateIntelScoreConfig(FunctionBaseConfig, name="cve_calculate_intel Defines a function that calculates the intel quality generating a score for it. """ llm_name: str = Field(description="The LLM model to use with the CVE agent.") - generate_intel_score: bool = Field(default=True, description="Whether to generate a CVE intel score for the agent morpheus analysis or not.") + generate_intel_score: bool = Field(default=True, description="Whether to generate a CVE intel score for the exploit-iq analysis or not.") intel_low_score: int = Field(default=51, description="The intel low score threshold to stop analysis.") insist_analysis: bool = Field(default=False, description="Whether to continue the analysis even when the intel score is below 'intel_low_score' threshold.") @register_function(config_type=CVECalculateIntelScoreConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) async def cve_calculate_intel_score(config: CVECalculateIntelScoreConfig, builder: Builder): # pylint: disable=unused-argument - from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput + from exploit_iq_commons.data_models.input import ExploitIqEngineInput from vuln_analysis.utils.intel_source_score import IntelScorer - async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def _arun(message: ExploitIqEngineInput) -> ExploitIqEngineInput: if config.generate_intel_score: intel_scorer = IntelScorer(config, builder) @@ -50,5 +50,5 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: return message yield FunctionInfo.from_fn(_arun, - input_schema=AgentMorpheusEngineInput, + input_schema=ExploitIqEngineInput, description="Calculates the CVE source score.") diff --git a/src/vuln_analysis/functions/cve_check_vuln_deps.py b/src/vuln_analysis/functions/cve_check_vuln_deps.py index 08dc972af..e7f3ce898 100644 --- a/src/vuln_analysis/functions/cve_check_vuln_deps.py +++ b/src/vuln_analysis/functions/cve_check_vuln_deps.py @@ -44,10 +44,10 @@ async def cve_check_vuln_deps(config: CVEVulnerableDepsChecksConfig, builder: Bu from exploit_iq_commons.data_models.cve_intel import CveIntel from exploit_iq_commons.data_models.dependencies import VulnerableDependencies from exploit_iq_commons.data_models.dependencies import VulnerableSBOMPackage - from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput + from exploit_iq_commons.data_models.input import ExploitIqEngineInput from vuln_analysis.utils.vulnerable_dependency_checker import VulnerableDependencyChecker - async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def _arun(message: ExploitIqEngineInput) -> ExploitIqEngineInput: trace_id.set(message.input.scan.id) @@ -144,5 +144,5 @@ async def _calc_dep(cve_intel: CveIntel): return message yield FunctionInfo.from_fn(_arun, - input_schema=AgentMorpheusEngineInput, + input_schema=ExploitIqEngineInput, description=("Cross-references every entry in the SBOM for known vulnerabilities.")) diff --git a/src/vuln_analysis/functions/cve_checker_report.py b/src/vuln_analysis/functions/cve_checker_report.py index 130d34d2c..b8992fce5 100644 --- a/src/vuln_analysis/functions/cve_checker_report.py +++ b/src/vuln_analysis/functions/cve_checker_report.py @@ -18,7 +18,7 @@ This module provides the report generation node for the L1/L2 pipeline. It consumes L1InvestigationResult (and optionally L2BuildResult) from -checker_context and produces the final AgentMorpheusOutput. +checker_context and produces the final ExploitIqOutput. """ import re @@ -35,7 +35,7 @@ from pydantic import Field from exploit_iq_commons.logging.loggers_factory import LoggingFactory, trace_id -from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput +from exploit_iq_commons.data_models.input import ExploitIqEngineInput from exploit_iq_commons.data_models.checker_status import ( EnumIdentifyResult, L1InvestigationResult, @@ -45,8 +45,8 @@ from nat.builder.context import Context from vuln_analysis.data_models.output import ( - AgentMorpheusEngineOutput, - AgentMorpheusOutput, + ExploitIqEngineOutput, + ExploitIqOutput, JustificationOutput, OutputPayload, ) @@ -768,7 +768,7 @@ def _build_details_md(blocks: ReportBlocks) -> str | None: def _build_report_blocks( - message: AgentMorpheusEngineInput, + message: ExploitIqEngineInput, code_agent_report: CodeAgentReport, cve_description: str, downstream_report: DownstreamSearchReport | None, @@ -994,7 +994,7 @@ def _apply_l2_verdict( def _build_analysis( - message: AgentMorpheusEngineInput, + message: ExploitIqEngineInput, code_agent_report: CodeAgentReport, intel_score: int, cve_description: str = "", @@ -1002,7 +1002,7 @@ def _build_analysis( upstream_report: UpstreamSearchReport | None = None, l1_result: L1InvestigationResult | None = None, l2_result: L2BuildResult | None = None, -) -> list[AgentMorpheusEngineOutput]: +) -> list[ExploitIqEngineOutput]: """Build the final analysis output from the code agent report using ReportBlocks. - summary: LLM executive_summary (verdict, reconciliation, technical context) @@ -1045,7 +1045,7 @@ def _build_analysis( details = _build_details_md(blocks) return [ - AgentMorpheusEngineOutput( + ExploitIqEngineOutput( vuln_id=intel.vuln_id, checklist=[], summary=summary, @@ -1075,7 +1075,7 @@ class CVECheckerReportConfig(FunctionBaseConfig, name="cve_checker_report"): async def cve_checker_report(config: CVECheckerReportConfig, builder: Builder): """Report generation function for the L1/L2 checker pipeline.""" - async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: + async def _arun(message: ExploitIqEngineInput) -> ExploitIqOutput: """Generate the final checker report from L1 (and optionally L2) results.""" trace_id.set(message.input.scan.id) tracer = Context.get() @@ -1085,12 +1085,12 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: ctx = message.info.checker_context if ctx is None or ctx.l1_result is None: logger.error("cve_checker_report: no L1 result available") - return AgentMorpheusOutput( + return ExploitIqOutput( input=message.input, info=message.info, output=OutputPayload( analysis=[ - AgentMorpheusEngineOutput( + ExploitIqEngineOutput( vuln_id=intel.vuln_id, checklist=[], summary="Rpm scanning investigation did not produce results.", @@ -1201,7 +1201,7 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusOutput: if descriptions: cve_description = descriptions[0][1] - return AgentMorpheusOutput( + return ExploitIqOutput( input=message.input, info=message.info, output=OutputPayload( diff --git a/src/vuln_analysis/functions/cve_checker_segmentation.py b/src/vuln_analysis/functions/cve_checker_segmentation.py index bae7f31ee..129c9c1b1 100644 --- a/src/vuln_analysis/functions/cve_checker_segmentation.py +++ b/src/vuln_analysis/functions/cve_checker_segmentation.py @@ -107,11 +107,11 @@ class CVECheckerSegmentationConfig(FunctionBaseConfig, name="cve_checker_segment @register_function(config_type=CVECheckerSegmentationConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) async def cve_checker_segmentation(config: CVECheckerSegmentationConfig, builder: Builder): - from exploit_iq_commons.data_models.info import AgentMorpheusInfo - from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput + from exploit_iq_commons.data_models.info import ExploitIqInfo + from exploit_iq_commons.data_models.input import ExploitIqEngineInput from vuln_analysis.utils.full_text_search import FullTextSearch - async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def _arun(message: ExploitIqEngineInput) -> ExploitIqEngineInput: if not message.info.checker_context or not message.info.checker_context.source_key: logger.info("checker_segmentation: no checker_context.source_keys, skipping indexing") return message @@ -144,7 +144,7 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: elapsed = time.time() - start logger.info("checker_segmentation: indexing completed in %.2fs at %s", elapsed, index_path) - message.info.vdb = AgentMorpheusInfo.VdbPaths(code_index_path=str(index_path)) + message.info.vdb = ExploitIqInfo.VdbPaths(code_index_path=str(index_path)) return message yield FunctionInfo.from_fn( diff --git a/src/vuln_analysis/functions/cve_checklist.py b/src/vuln_analysis/functions/cve_checklist.py index ab6f3c985..ead68f119 100644 --- a/src/vuln_analysis/functions/cve_checklist.py +++ b/src/vuln_analysis/functions/cve_checklist.py @@ -48,7 +48,7 @@ class CVEChecklistToolConfig(FunctionBaseConfig, name="cve_checklist"): @register_function(config_type=CVEChecklistToolConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) async def cve_checklist(config: CVEChecklistToolConfig, builder: Builder): - from vuln_analysis.data_models.state import AgentMorpheusEngineState + from vuln_analysis.data_models.state import ExploitIqEngineState from vuln_analysis.utils.checklist_prompt_generator import _parse_list from vuln_analysis.utils.checklist_prompt_generator import generate_checklist @@ -71,7 +71,7 @@ async def generate_checklist_for_cve(cve_intel, ecosystem: str = ""): return cve_intel["vuln_id"], checklist[0] - async def _arun(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: + async def _arun(state: ExploitIqEngineState) -> ExploitIqEngineState: trace_id.set(state.original_input.input.scan.id) ecosystem = state.original_input.input.image.ecosystem.value if state.original_input and state.original_input.input.image.ecosystem else "" intel_df = data_utils.merge_intel_and_plugin_data_convert_to_dataframe(state.cve_intel) @@ -84,5 +84,5 @@ async def _arun(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: yield FunctionInfo.from_fn( _arun, - input_schema=AgentMorpheusEngineState, + input_schema=ExploitIqEngineState, description=("Generates tailored, context-sensitive task checklist for impact analysis.")) diff --git a/src/vuln_analysis/functions/cve_clone_and_deps.py b/src/vuln_analysis/functions/cve_clone_and_deps.py index a7e2117ab..74af45dd7 100644 --- a/src/vuln_analysis/functions/cve_clone_and_deps.py +++ b/src/vuln_analysis/functions/cve_clone_and_deps.py @@ -64,9 +64,9 @@ class CVECloneAndDepsConfig(FunctionBaseConfig, name="cve_clone_and_deps"): @register_function(config_type=CVECloneAndDepsConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) async def clone_and_deps(config: CVECloneAndDepsConfig, builder: Builder): - from exploit_iq_commons.data_models.info import AgentMorpheusInfo - from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput - from exploit_iq_commons.data_models.input import AgentMorpheusInput + from exploit_iq_commons.data_models.info import ExploitIqInfo + from exploit_iq_commons.data_models.input import ExploitIqEngineInput + from exploit_iq_commons.data_models.input import ExploitIqInput from exploit_iq_commons.data_models.input import ManualSBOMInfoInput from exploit_iq_commons.utils.document_embedding import DocumentEmbedding from exploit_iq_commons.utils.source_rpm_downloader import RPMDependencyManager @@ -80,7 +80,7 @@ async def clone_and_deps(config: CVECloneAndDepsConfig, builder: Builder): pickle_cache_directory=config.base_pickle_dir, ) - async def _arun(message: AgentMorpheusInput) -> AgentMorpheusEngineInput: + async def _arun(message: ExploitIqInput) -> ExploitIqEngineInput: """ Clone repositories and install dependencies. @@ -148,11 +148,11 @@ async def _arun(message: AgentMorpheusInput) -> AgentMorpheusEngineInput: if failure_reason: message.failure_reason = failure_reason - info = AgentMorpheusInfo() - return AgentMorpheusEngineInput(input=message, info=info) + info = ExploitIqInfo() + return ExploitIqEngineInput(input=message, info=info) yield FunctionInfo.from_fn( _arun, - input_schema=AgentMorpheusInput, + input_schema=ExploitIqInput, description="Clone repositories and install dependencies (early phase).", ) diff --git a/src/vuln_analysis/functions/cve_fetch_intel.py b/src/vuln_analysis/functions/cve_fetch_intel.py index b21b9238a..746a38837 100644 --- a/src/vuln_analysis/functions/cve_fetch_intel.py +++ b/src/vuln_analysis/functions/cve_fetch_intel.py @@ -46,10 +46,10 @@ class CVEFetchIntelConfig(FunctionBaseConfig, name="cve_fetch_intel"): @register_function(config_type=CVEFetchIntelConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) async def cve_fetch_intel(config: CVEFetchIntelConfig, builder: Builder): # pylint: disable=unused-argument - from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput + from exploit_iq_commons.data_models.input import ExploitIqEngineInput from vuln_analysis.utils.intel_retriever import IntelRetriever - async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def _arun(message: ExploitIqEngineInput) -> ExploitIqEngineInput: async def _inner(): async with aiohttp.ClientSession() as session: trace_id.set(message.input.scan.id) @@ -71,5 +71,5 @@ async def _inner(): return message yield FunctionInfo.from_fn(_arun, - input_schema=AgentMorpheusEngineInput, + input_schema=ExploitIqEngineInput, description=("Fetches details about CVEs from NIST and CVE Details websites.")) diff --git a/src/vuln_analysis/functions/cve_fetch_patches.py b/src/vuln_analysis/functions/cve_fetch_patches.py index a78b5acbe..168e35864 100644 --- a/src/vuln_analysis/functions/cve_fetch_patches.py +++ b/src/vuln_analysis/functions/cve_fetch_patches.py @@ -38,7 +38,7 @@ class CVEFetchPatchesConfig(FunctionBaseConfig, name="cve_fetch_patches"): @register_function(config_type=CVEFetchPatchesConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) async def cve_fetch_patches(config: CVEFetchPatchesConfig, builder: Builder): - from vuln_analysis.data_models.state import AgentMorpheusEngineState + from vuln_analysis.data_models.state import ExploitIqEngineState from vuln_analysis.utils.intel_utils import extract_commit_url_candidates from vuln_analysis.utils.web_patch_fetcher import fetch_patch_for_cve @@ -47,7 +47,7 @@ async def cve_fetch_patches(config: CVEFetchPatchesConfig, builder: Builder): if config.llm_name else None ) - async def _arun(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: + async def _arun(state: ExploitIqEngineState) -> ExploitIqEngineState: trace_id.set(state.original_input.input.scan.id) intel_map = {intel.vuln_id: intel for intel in state.cve_intel} @@ -87,6 +87,6 @@ async def _throttled_fetch(vuln_id, intel): yield FunctionInfo.from_fn( _arun, - input_schema=AgentMorpheusEngineState, + input_schema=ExploitIqEngineState, description="Fetches vulnerability fix patches from intel references and OSV API.", ) diff --git a/src/vuln_analysis/functions/cve_file_output.py b/src/vuln_analysis/functions/cve_file_output.py index 6e41d198c..04b05d4b2 100644 --- a/src/vuln_analysis/functions/cve_file_output.py +++ b/src/vuln_analysis/functions/cve_file_output.py @@ -46,7 +46,7 @@ async def output_to_file(config: CVEFileOutputConfig, builder: Builder): # pyli import os from pathlib import Path - from vuln_analysis.data_models.output import AgentMorpheusOutput + from vuln_analysis.data_models.output import ExploitIqOutput from vuln_analysis.utils.output_formatter import generate_vulnerability_reports if (os.path.exists(config.file_path)): @@ -59,7 +59,7 @@ async def output_to_file(config: CVEFileOutputConfig, builder: Builder): # pyli # Ensure our directory exists os.makedirs(os.path.realpath(os.path.dirname(config.file_path)), exist_ok=True) - async def _arun(message: AgentMorpheusOutput) -> AgentMorpheusOutput: + async def _arun(message: ExploitIqOutput) -> ExploitIqOutput: file_path = Path(config.file_path) @@ -78,5 +78,5 @@ async def _arun(message: AgentMorpheusOutput) -> AgentMorpheusOutput: return message yield FunctionInfo.from_fn(_arun, - input_schema=AgentMorpheusOutput, + input_schema=ExploitIqOutput, description=("Outputs workflow results to a file.")) diff --git a/src/vuln_analysis/functions/cve_generate_cvss.py b/src/vuln_analysis/functions/cve_generate_cvss.py index 8343d1c1b..eac6a67d3 100644 --- a/src/vuln_analysis/functions/cve_generate_cvss.py +++ b/src/vuln_analysis/functions/cve_generate_cvss.py @@ -33,7 +33,7 @@ from cvss import CVSS3 from langchain.agents.mrkl.output_parser import MRKLOutputParser -from vuln_analysis.data_models.state import AgentMorpheusEngineState +from vuln_analysis.data_models.state import ExploitIqEngineState from vuln_analysis.tools.tool_names import ToolNames from vuln_analysis.utils.prompting import get_cvss_prompt @@ -183,7 +183,7 @@ def handle_parse_error(exception: OutputParserException) -> str: return handle_parse_error async def _create_agent(config: CVEGenerateCvssToolConfig, builder: Builder, - state: AgentMorpheusEngineState) -> AgentExecutor: + state: ExploitIqEngineState) -> AgentExecutor: tools = builder.get_tools(tool_names=config.tool_names, wrapper_type=LLMFrameworkEnum.LANGCHAIN) llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) is_openai = "openai" in llm.__class__.__module__.lower() @@ -225,7 +225,7 @@ async def _create_agent(config: CVEGenerateCvssToolConfig, builder: Builder, async def _run_for_vuln(agent: AgentExecutor, - state: AgentMorpheusEngineState, + state: ExploitIqEngineState, vuln_id: str, semaphore: asyncio.Semaphore | None) -> list: state_copy = state.model_copy(deep=True) @@ -293,7 +293,7 @@ async def cve_generate_cvss(config: CVEGenerateCvssToolConfig, builder: Builder) semaphore = asyncio.Semaphore(config.max_concurrency) if config.max_concurrency else None - async def _arun(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: + async def _arun(state: ExploitIqEngineState) -> ExploitIqEngineState: if config.skip: logger.info("`config.skip` is set to True. Skipping CVSS generation.") @@ -322,5 +322,5 @@ async def _arun(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: yield FunctionInfo.from_fn( _arun, - input_schema=AgentMorpheusEngineState, + input_schema=ExploitIqEngineState, description=("Generates the CVSS (Common Vulnerability Scoring System) score and vector string for the vulnerability analysis results.")) diff --git a/src/vuln_analysis/functions/cve_generate_vdbs.py b/src/vuln_analysis/functions/cve_generate_vdbs.py index 354631eea..0a30ff7eb 100644 --- a/src/vuln_analysis/functions/cve_generate_vdbs.py +++ b/src/vuln_analysis/functions/cve_generate_vdbs.py @@ -71,9 +71,9 @@ class CVEGenerateVDBsToolConfig(FunctionBaseConfig, name="cve_generate_vdbs"): @register_function(config_type=CVEGenerateVDBsToolConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) async def generate_vdb(config: CVEGenerateVDBsToolConfig, builder: Builder): - from exploit_iq_commons.data_models.info import AgentMorpheusInfo - from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput - from exploit_iq_commons.data_models.input import AgentMorpheusInput + from exploit_iq_commons.data_models.info import ExploitIqInfo + from exploit_iq_commons.data_models.input import ExploitIqEngineInput + from exploit_iq_commons.data_models.input import ExploitIqInput from exploit_iq_commons.data_models.input import SourceDocumentsInfo from vuln_analysis.functions.cve_agent import CVEAgentExecutorToolConfig from exploit_iq_commons.utils.document_embedding import DocumentEmbedding @@ -168,7 +168,7 @@ def _build_code_index(source_infos: list[SourceDocumentsInfo]) -> Path | None: code_index_path = FullTextSearch.get_index_directory( base_path=config.base_code_index_dir, hash_value=embedder.hash_source_documents_info(source_infos)) - if (not code_index_path.exists() or os.environ.get("MORPHEUS_ALWAYS_REBUILD_VDB", "0") == "1"): + if (not code_index_path.exists() or os.environ.get("EXPLOIT_IQ_ALWAYS_REBUILD_VDB", "0") == "1"): documents_exists = _create_code_index(source_infos, embedder, code_index_path) else: logger.info("Cache hit on code index. Loading existing code index: %s", code_index_path) @@ -177,7 +177,7 @@ def _build_code_index(source_infos: list[SourceDocumentsInfo]) -> Path | None: else: return None - async def _arun(message: AgentMorpheusInput) -> AgentMorpheusEngineInput: + async def _arun(message: ExploitIqInput) -> ExploitIqEngineInput: """ Builds source code and documentation FAISS databases based upon the source repositories. For now we are only storing a path to the FAISS databases in the message. @@ -186,7 +186,7 @@ async def _arun(message: AgentMorpheusInput) -> AgentMorpheusEngineInput: Parameters ---------- - message : AgentMorpheusInput + message : ExploitIQInput The input message build_vdb_fn : typing.Callable[[str, str, list[SourceCodeRepo]], dict[str, str]] The function that builds the VDB database for a given vulnerability scan id, base image, and source code @@ -279,12 +279,12 @@ async def _arun(message: AgentMorpheusInput) -> AgentMorpheusEngineInput: if not code_index_success: message.failure_reason = failure_reason - info = AgentMorpheusInfo(vdb=AgentMorpheusInfo.VdbPaths( + info = ExploitIqInfo(vdb=ExploitIqInfo.VdbPaths( code_vdb_path=vdb_code_path, doc_vdb_path=vdb_doc_path, code_index_path=code_index_path)) message.code_index_success = code_index_success - return AgentMorpheusEngineInput(input=message, info=info) + return ExploitIqEngineInput(input=message, info=info) yield FunctionInfo.from_fn(_arun, - input_schema=AgentMorpheusInput, + input_schema=ExploitIqInput, description=("Generates vector database from code repositories and documentation.")) diff --git a/src/vuln_analysis/functions/cve_generate_vex.py b/src/vuln_analysis/functions/cve_generate_vex.py index 62a9d0598..3e3c227bc 100644 --- a/src/vuln_analysis/functions/cve_generate_vex.py +++ b/src/vuln_analysis/functions/cve_generate_vex.py @@ -20,7 +20,7 @@ from aiq.cli.register_workflow import register_function from aiq.data_models.function import FunctionBaseConfig from pydantic import Field -from vuln_analysis.data_models.state import AgentMorpheusEngineState +from vuln_analysis.data_models.state import ExploitIqEngineState from vuln_analysis.utils.vex.vex_generator_loader import load_vex_generator from exploit_iq_commons.logging.loggers_factory import LoggingFactory @@ -37,7 +37,7 @@ class CVEGenerateVexConfig(FunctionBaseConfig, name="cve_generate_vex"): @register_function(config_type=CVEGenerateVexConfig) async def cve_generate_vex(config: CVEGenerateVexConfig, builder: Builder): - async def _arun(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: + async def _arun(state: ExploitIqEngineState) -> ExploitIqEngineState: if config.skip: logger.info("`config.skip` is set to True. Skipping VEX generation.") return state @@ -58,5 +58,5 @@ async def _arun(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: return state yield FunctionInfo.from_fn(_arun, - input_schema=AgentMorpheusEngineState, + input_schema=ExploitIqEngineState, description="Generates a custom VEX document for vulnerable components.") diff --git a/src/vuln_analysis/functions/cve_http_output.py b/src/vuln_analysis/functions/cve_http_output.py index 792b0f107..3f9c30e9e 100644 --- a/src/vuln_analysis/functions/cve_http_output.py +++ b/src/vuln_analysis/functions/cve_http_output.py @@ -36,7 +36,7 @@ import re if TYPE_CHECKING: - from vuln_analysis.data_models.output import AgentMorpheusOutput, FailureReport + from vuln_analysis.data_models.output import ExploitIqOutput, FailureReport logger = LoggingFactory.get_agent_logger(__name__) @@ -109,7 +109,7 @@ class OutputPayload: def _build_output_payload( - message: "AgentMorpheusOutput", + message: "ExploitIqOutput", config: CVEHttpOutputConfig, default_json: str, ) -> OutputPayload: @@ -159,10 +159,10 @@ def _build_output_payload( @register_function(config_type=CVEHttpOutputConfig) async def output_to_http(config: CVEHttpOutputConfig, builder: Builder): # pylint: disable=unused-argument - from vuln_analysis.data_models.output import AgentMorpheusOutput + from vuln_analysis.data_models.output import ExploitIqOutput from vuln_analysis.utils import http_utils - async def _arun(message: AgentMorpheusOutput) -> AgentMorpheusOutput: + async def _arun(message: ExploitIqOutput) -> ExploitIqOutput: trace_id.set(message.input.scan.id) @@ -219,7 +219,7 @@ async def _arun(message: AgentMorpheusOutput) -> AgentMorpheusOutput: return message - def _extract_job_data(message: AgentMorpheusOutput) -> Job: + def _extract_job_data(message: ExploitIqOutput) -> Job: agent_config = builder.get_workflow_config() job_id = message.input.scan.id start_time = datetime.fromisoformat(message.input.scan.started_at.replace('Z', '+00:00')) @@ -246,7 +246,7 @@ def _extract_job_data(message: AgentMorpheusOutput) -> Job: env_vars=env_vars, job_output=message_output) yield FunctionInfo.from_fn(_arun, - input_schema=AgentMorpheusOutput, + input_schema=ExploitIqOutput, description=("Sends CVE workflow output to HTTP endpoint.")) diff --git a/src/vuln_analysis/functions/cve_justify.py b/src/vuln_analysis/functions/cve_justify.py index 8124b4222..790b8b805 100644 --- a/src/vuln_analysis/functions/cve_justify.py +++ b/src/vuln_analysis/functions/cve_justify.py @@ -39,7 +39,7 @@ async def cve_justify(config: CVEJustifyToolConfig, builder: Builder): from langchain_core.prompts import PromptTemplate - from vuln_analysis.data_models.state import AgentMorpheusEngineState + from vuln_analysis.data_models.state import ExploitIqEngineState from vuln_analysis.utils.justification_parser import JustificationParser jp = JustificationParser() @@ -53,7 +53,7 @@ async def justify_cve(summary): justification_text = await chain.ainvoke({"summary": summary}) return justification_text.content - async def _arun(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: + async def _arun(state: ExploitIqEngineState) -> ExploitIqEngineState: trace_id.set(state.original_input.input.scan.id) results = await asyncio.gather(*(justify_cve(summary) for summary in state.final_summaries.values())) parsed_justification = await asyncio.gather(jp._parse_justification(results)) @@ -69,5 +69,5 @@ async def _arun(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: return state yield FunctionInfo.from_fn(_arun, - input_schema=AgentMorpheusEngineState, + input_schema=ExploitIqEngineState, description=("Assigns justification label and reason to each CVE based on summary.")) diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 251a0d17a..4327153cd 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -32,7 +32,7 @@ from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, RemoveMessage from nat.builder.context import Context -from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput +from exploit_iq_commons.data_models.input import ExploitIqEngineInput from vuln_analysis.functions.code_agent_graph_defs import ( CodeAgentState, DownstreamSearchReport, @@ -408,7 +408,7 @@ class CVEPackageCodeAgentConfig(FunctionBaseConfig, name="cve_package_code_agent ) -async def create_graph_code_agent(config: CVEPackageCodeAgentConfig, builder: Builder, state: AgentMorpheusEngineInput, tracer): +async def create_graph_code_agent(config: CVEPackageCodeAgentConfig, builder: Builder, state: ExploitIqEngineInput, tracer): # Node name constants THOUGHT_NODE = "think_node" TOOL_NODE = "tool" @@ -1489,7 +1489,7 @@ def should_continue_after_intel(state: CodeAgentState) -> str: @register_function(config_type=CVEPackageCodeAgentConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) async def cve_package_code_agent(config: CVEPackageCodeAgentConfig, builder: Builder): - async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def _arun(message: ExploitIqEngineInput) -> ExploitIqEngineInput: """Run L1 investigation and return intermediate result for routing to L2 or report generation.""" trace_id.set(message.input.scan.id) tracer = Context.get() diff --git a/src/vuln_analysis/functions/cve_process_sbom.py b/src/vuln_analysis/functions/cve_process_sbom.py index 39a11788b..93a5de421 100644 --- a/src/vuln_analysis/functions/cve_process_sbom.py +++ b/src/vuln_analysis/functions/cve_process_sbom.py @@ -70,8 +70,8 @@ class CVEProcessSBOMConfig(FunctionBaseConfig, name="cve_process_sbom"): @register_function(config_type=CVEProcessSBOMConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) async def cve_process_sbom(config: CVEProcessSBOMConfig, builder: Builder): # pylint: disable=unused-argument - from exploit_iq_commons.data_models.info import AgentMorpheusInfo - from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput + from exploit_iq_commons.data_models.info import ExploitIqInfo + from exploit_iq_commons.data_models.input import ExploitIqEngineInput from exploit_iq_commons.data_models.input import FileSBOMInfoInput from exploit_iq_commons.data_models.input import HTTPSBOMInfoInput from exploit_iq_commons.data_models.input import ManualSBOMInfoInput @@ -79,7 +79,7 @@ async def cve_process_sbom(config: CVEProcessSBOMConfig, builder: Builder): # p from vuln_analysis.utils import http_utils from vuln_analysis.utils.http_utils import HTTPMethod - async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def _arun(message: ExploitIqEngineInput) -> ExploitIqEngineInput: def _parse_sbom_packages(sbom_lines: list[str]) -> list[SBOMPackage]: # Extract the packages @@ -112,7 +112,7 @@ def _parse_sbom_packages(sbom_lines: list[str]) -> list[SBOMPackage]: assert isinstance(message.input.image.sbom_info, ManualSBOMInfoInput) # Create the SBOM object - message.info.sbom = AgentMorpheusInfo.SBOMInfo(packages=message.input.image.sbom_info.packages) + message.info.sbom = ExploitIqInfo.SBOMInfo(packages=message.input.image.sbom_info.packages) elif (message.input.image.sbom_info.type == FileSBOMInfoInput.static_type()): assert isinstance(message.input.image.sbom_info, FileSBOMInfoInput) @@ -122,7 +122,7 @@ def _parse_sbom_packages(sbom_lines: list[str]) -> list[SBOMPackage]: sbom_lines = f.readlines() # Create the SBOM object - message.info.sbom = AgentMorpheusInfo.SBOMInfo(packages=_parse_sbom_packages(sbom_lines)) + message.info.sbom = ExploitIqInfo.SBOMInfo(packages=_parse_sbom_packages(sbom_lines)) elif (message.input.image.sbom_info.type == HTTPSBOMInfoInput.static_type()): assert isinstance(message.input.image.sbom_info, HTTPSBOMInfoInput) @@ -137,7 +137,7 @@ def _parse_sbom_packages(sbom_lines: list[str]) -> list[SBOMPackage]: ) except Exception as e: logger.error("Error fetching SBOM from %s: %s", message.input.image.sbom_info.url, e) - message.info.sbom = AgentMorpheusInfo.SBOMInfo(packages=[]) + message.info.sbom = ExploitIqInfo.SBOMInfo(packages=[]) return message try: @@ -146,14 +146,14 @@ def _parse_sbom_packages(sbom_lines: list[str]) -> list[SBOMPackage]: packages = _parse_sbom_packages(sbom_lines) except Exception as e: logger.error("Error parsing SBOM from %s: %s", message.input.image.sbom_info.url, e) - message.info.sbom = AgentMorpheusInfo.SBOMInfo(packages=[]) + message.info.sbom = ExploitIqInfo.SBOMInfo(packages=[]) return message # Create the SBOM object - message.info.sbom = AgentMorpheusInfo.SBOMInfo(packages=packages) + message.info.sbom = ExploitIqInfo.SBOMInfo(packages=packages) return message yield FunctionInfo.from_fn(_arun, - input_schema=AgentMorpheusEngineInput, + input_schema=ExploitIqEngineInput, description=("Prepares and validates input SBOM.")) diff --git a/src/vuln_analysis/functions/cve_segmentation.py b/src/vuln_analysis/functions/cve_segmentation.py index 7a1135d8b..361bb8f93 100644 --- a/src/vuln_analysis/functions/cve_segmentation.py +++ b/src/vuln_analysis/functions/cve_segmentation.py @@ -89,8 +89,8 @@ class DocumentCollectionError(Exception): @register_function(config_type=CVESegmentationConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) async def segmentation(config: CVESegmentationConfig, builder: Builder): - from exploit_iq_commons.data_models.info import AgentMorpheusInfo - from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput + from exploit_iq_commons.data_models.info import ExploitIqInfo + from exploit_iq_commons.data_models.input import ExploitIqEngineInput from exploit_iq_commons.data_models.input import SourceDocumentsInfo from exploit_iq_commons.utils.document_embedding import DocumentEmbedding from exploit_iq_commons.utils.standard_library_cache import StandardLibraryCache @@ -185,7 +185,7 @@ def _build_code_index(source_infos: list[SourceDocumentsInfo]) -> Path | None: hash_value=index_embedder.hash_source_documents_info(code_sources), ) - if not code_index_path.exists() or os.environ.get("MORPHEUS_ALWAYS_REBUILD_VDB", "0") == "1": + if not code_index_path.exists() or os.environ.get("EXPLOIT_IQ_ALWAYS_REBUILD_VDB", "0") == "1": documents_exist = _create_code_index(code_sources, index_embedder, code_index_path) if not documents_exist: return None @@ -194,7 +194,7 @@ def _build_code_index(source_infos: list[SourceDocumentsInfo]) -> Path | None: return code_index_path - async def _arun(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def _arun(state: ExploitIqEngineInput) -> ExploitIqEngineInput: """ Build VDBs and code indexes from already-cloned repositories. @@ -265,7 +265,7 @@ async def _arun(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: message.code_index_success = code_index_success - state.info.vdb = AgentMorpheusInfo.VdbPaths( + state.info.vdb = ExploitIqInfo.VdbPaths( code_vdb_path=vdb_code_path, doc_vdb_path=vdb_doc_path, code_index_path=code_index_path, @@ -275,6 +275,6 @@ async def _arun(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: yield FunctionInfo.from_fn( _arun, - input_schema=AgentMorpheusEngineInput, + input_schema=ExploitIqEngineInput, description="Build VDBs and code indexes from cloned repositories (late phase).", ) diff --git a/src/vuln_analysis/functions/cve_source_acquisition.py b/src/vuln_analysis/functions/cve_source_acquisition.py index eabb2ded4..6406873c8 100644 --- a/src/vuln_analysis/functions/cve_source_acquisition.py +++ b/src/vuln_analysis/functions/cve_source_acquisition.py @@ -115,9 +115,9 @@ class CVESourceAcquisitionConfig(FunctionBaseConfig, name="cve_source_acquisitio @register_function(config_type=CVESourceAcquisitionConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) async def cve_source_acquisition(config: CVESourceAcquisitionConfig, builder: Builder): - from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput + from exploit_iq_commons.data_models.input import ExploitIqEngineInput - async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def _arun(message: ExploitIqEngineInput) -> ExploitIqEngineInput: logger.info("source_acquisition: starting source code acquisition") rpm_manager = RPMDependencyManager.get_instance() @@ -237,6 +237,6 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: yield FunctionInfo.from_fn( _arun, - input_schema=AgentMorpheusEngineInput, + input_schema=ExploitIqEngineInput, description="Downloads source containers and locates package sources by purl and ecosystem.", ) diff --git a/src/vuln_analysis/functions/cve_summarize.py b/src/vuln_analysis/functions/cve_summarize.py index 6c0644ac6..3a7a51265 100644 --- a/src/vuln_analysis/functions/cve_summarize.py +++ b/src/vuln_analysis/functions/cve_summarize.py @@ -77,7 +77,7 @@ async def cve_summarize(config: CVESummarizeToolConfig, builder: Builder): from langchain_core.prompts import PromptTemplate - from vuln_analysis.data_models.state import AgentMorpheusEngineState + from vuln_analysis.data_models.state import ExploitIqEngineState from vuln_analysis.utils.prompting import SUMMARY_PROMPT llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) @@ -134,7 +134,7 @@ async def summarize_cve(results, ecosystem: str = ""): final_summary = await chain.ainvoke({"response": response}) return final_summary.content - async def _arun(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: + async def _arun(state: ExploitIqEngineState) -> ExploitIqEngineState: trace_id.set(state.original_input.input.scan.id) ecosystem = state.original_input.input.image.ecosystem.value if state.original_input and state.original_input.input.image.ecosystem else "" results = await asyncio.gather(*(summarize_cve(results, ecosystem=ecosystem) for results in state.checklist_results.items())) @@ -143,5 +143,5 @@ async def _arun(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: yield FunctionInfo.from_fn( _arun, - input_schema=AgentMorpheusEngineState, + input_schema=ExploitIqEngineState, description=("Generates concise, human-readable summarization paragraph from agent results.")) diff --git a/src/vuln_analysis/functions/cve_verify_vuln_package.py b/src/vuln_analysis/functions/cve_verify_vuln_package.py index 87a4c5dd5..2b33ca41f 100644 --- a/src/vuln_analysis/functions/cve_verify_vuln_package.py +++ b/src/vuln_analysis/functions/cve_verify_vuln_package.py @@ -380,7 +380,7 @@ async def cve_verify_vuln_package(config: CVEVerifyVulnPackageConfig, builder: B from langchain_core.messages import HumanMessage - from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput + from exploit_iq_commons.data_models.input import ExploitIqEngineInput from vuln_analysis.utils.package_matchers import get_package_matcher, PackageMatch from vuln_analysis.utils.version_check import ( StdlibVulnerabilityResult, @@ -767,10 +767,10 @@ async def _process_cve_intel_loop( return vulnerable_dependencies async def _handle_cpp_image_analysis( - message: AgentMorpheusEngineInput, + message: ExploitIqEngineInput, version_checker, package_matcher, - ) -> AgentMorpheusEngineInput: + ) -> ExploitIqEngineInput: """ Handle C/C++ container IMAGE analysis using SBOM matching and LLM version checking. @@ -818,7 +818,7 @@ async def _handle_cpp_image_analysis( len(vulnerable_dependencies)) return message - async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def _arun(message: ExploitIqEngineInput) -> ExploitIqEngineInput: from vuln_analysis.utils.package_matchers import CppPackageMatcher from exploit_iq_commons.utils.source_rpm_downloader import RPMDependencyManager @@ -937,6 +937,6 @@ async def _arun(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: yield FunctionInfo.from_fn( _arun, - input_schema=AgentMorpheusEngineInput, + input_schema=ExploitIqEngineInput, description="Verifies vulnerable package presence in source dependencies before LLM analysis." ) diff --git a/src/vuln_analysis/register.py b/src/vuln_analysis/register.py index 296391e0e..a11c1b8db 100644 --- a/src/vuln_analysis/register.py +++ b/src/vuln_analysis/register.py @@ -25,12 +25,12 @@ from exploit_iq_commons.data_models.common import PipelineMode from exploit_iq_commons.data_models.checker_status import PackageCheckerStatus, PACKAGE_CHECKER_STATUS_DESCRIPTIONS -from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput -from exploit_iq_commons.data_models.input import AgentMorpheusInput +from exploit_iq_commons.data_models.input import ExploitIqEngineInput +from exploit_iq_commons.data_models.input import ExploitIqInput from exploit_iq_commons.data_models.input import DEFAULT_FAILURE_REASON -from exploit_iq_commons.data_models.info import AgentMorpheusInfo -from vuln_analysis.data_models.output import AgentMorpheusEngineOutput, AgentMorpheusOutput, JustificationOutput, OutputPayload -from vuln_analysis.data_models.state import AgentMorpheusEngineState +from exploit_iq_commons.data_models.info import ExploitIqInfo +from vuln_analysis.data_models.output import ExploitIqEngineOutput, ExploitIqOutput, JustificationOutput, OutputPayload +from vuln_analysis.data_models.state import ExploitIqEngineState # pylint: disable=unused-import from vuln_analysis.functions import cve_agent from vuln_analysis.functions import cve_checklist @@ -176,32 +176,32 @@ async def cve_agent_workflow(config: CVEAgentWorkflowConfig, builder: Builder): # Define langgraph node functions @catch_pipeline_errors_async - async def add_start_time_node(state: AgentMorpheusInput) -> AgentMorpheusInput: + async def add_start_time_node(state: ExploitIqInput) -> ExploitIqInput: """Adds the start time to the input""" state.scan.started_at = datetime.now(timezone.utc).isoformat() return state @catch_pipeline_errors_async - async def fetch_intel_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def fetch_intel_node(state: ExploitIqEngineInput) -> ExploitIqEngineInput: """Fetch intel for CVE input""" return await cve_fetch_intel_fn.ainvoke(state.model_dump()) @catch_pipeline_errors_async - async def calculate_intel_score_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def calculate_intel_score_node(state: ExploitIqEngineInput) -> ExploitIqEngineInput: """Calculate score for intel source""" return await cve_calculate_intel_score_fn.ainvoke(state.model_dump()) @catch_pipeline_errors_async - async def process_sbom_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def process_sbom_node(state: ExploitIqEngineInput) -> ExploitIqEngineInput: """Process SBOMs for CVE input""" return await cve_process_sbom_fn.ainvoke(state.model_dump()) @catch_pipeline_errors_async - async def verify_vuln_package_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def verify_vuln_package_node(state: ExploitIqEngineInput) -> ExploitIqEngineInput: """Verify vulnerable package presence in source dependencies""" if cve_verify_vuln_package_fn: return await cve_verify_vuln_package_fn.ainvoke(state.model_dump()) @@ -210,12 +210,12 @@ async def verify_vuln_package_node(state: AgentMorpheusEngineInput) -> AgentMorp # --- Split pipeline nodes (clone_and_deps + segmentation) --- @catch_pipeline_errors_async - async def clone_and_deps_node(state: AgentMorpheusInput) -> AgentMorpheusEngineInput: + async def clone_and_deps_node(state: ExploitIqInput) -> ExploitIqEngineInput: """Clone repositories and install dependencies (early phase).""" return await cve_clone_and_deps_fn.ainvoke(state.model_dump()) @catch_pipeline_errors_async - async def segmentation_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def segmentation_node(state: ExploitIqEngineInput) -> ExploitIqEngineInput: """Build VDBs and code indexes from cloned repos (late phase).""" result = await cve_segmentation_fn.ainvoke(state.model_dump()) result_dict = result.model_dump() @@ -223,7 +223,7 @@ async def segmentation_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEng result_dict["failure_reason"] = result.input.failure_reason return result_dict - def route_after_verify_vuln_package(state: AgentMorpheusEngineInput) -> str: + def route_after_verify_vuln_package(state: ExploitIqEngineInput) -> str: """Route to segmentation if any CVE is vulnerable, else skip to llm_engine.""" vuln_deps = state.info.vulnerable_dependencies if vuln_deps is None: @@ -231,14 +231,14 @@ def route_after_verify_vuln_package(state: AgentMorpheusEngineInput) -> str: any_vulnerable = any(len(v.vulnerable_sbom_packages) > 0 for v in vuln_deps) return "segmentation" if any_vulnerable else "llm_engine" - def route_after_clone_and_deps(state: AgentMorpheusEngineInput | AgentMorpheusInput) -> str: + def route_after_clone_and_deps(state: ExploitIqEngineInput | ExploitIqInput) -> str: """Route to fetch_intel on success, or failure if clone failed. - Note: state may be AgentMorpheusEngineInput (has .input.failure_reason) or - AgentMorpheusInput (has .failure_reason directly) depending on LangGraph's + Note: state may be ExploitIqEngineInput (has .input.failure_reason) or + ExploitIqInput (has .failure_reason directly) depending on LangGraph's state propagation behavior. """ - if isinstance(state, AgentMorpheusEngineInput): + if isinstance(state, ExploitIqEngineInput): failure_reason = state.input.failure_reason else: failure_reason = state.failure_reason @@ -247,57 +247,57 @@ def route_after_clone_and_deps(state: AgentMorpheusEngineInput | AgentMorpheusIn return "failure" return "fetch_intel" - async def failure_node(state: AgentMorpheusEngineInput) -> AgentMorpheusOutput: + async def failure_node(state: ExploitIqEngineInput) -> ExploitIqOutput: """Handles pipeline failure (e.g., clone/install failed).""" - return AgentMorpheusOutput( + return ExploitIqOutput( input=state.input, info=state.info, output=OutputPayload(analysis=[], vex=None), ) - async def checklist_node(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: + async def checklist_node(state: ExploitIqEngineState) -> ExploitIqEngineState: """Generates a checklist based on CVE input""" return await cve_checklist_fn.ainvoke(state.model_dump()) - async def agent_executor_node(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: + async def agent_executor_node(state: ExploitIqEngineState) -> ExploitIqEngineState: """Executes the checklist using an agent with ReAct prompt.""" return await cve_agent_executor_fn.ainvoke(state.model_dump()) - async def summarize_node(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: + async def summarize_node(state: ExploitIqEngineState) -> ExploitIqEngineState: """Summarizes the results of the execution""" return await cve_summary_fn.ainvoke(state.model_dump()) - async def justify_node(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: + async def justify_node(state: ExploitIqEngineState) -> ExploitIqEngineState: """Generates a justification for the final summary""" return await cve_justify_fn.ainvoke(state.model_dump()) - async def generate_vex_node(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: + async def generate_vex_node(state: ExploitIqEngineState) -> ExploitIqEngineState: """Generates VEX for vulnerable components""" return await cve_generate_vex_fn.ainvoke(state.model_dump()) - async def generate_cvss_node(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: + async def generate_cvss_node(state: ExploitIqEngineState) -> ExploitIqEngineState: """Generates CVSS for the results of the execution""" return await cve_generate_cvss_fn.ainvoke(state.model_dump()) - async def fetch_patches_node(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState: + async def fetch_patches_node(state: ExploitIqEngineState) -> ExploitIqEngineState: """Fetches vulnerability fix patches from intel references and OSV.""" return await cve_fetch_patches_fn.ainvoke(state.model_dump()) @catch_pipeline_errors_async - async def add_completed_time_node(state: AgentMorpheusOutput) -> AgentMorpheusOutput: + async def add_completed_time_node(state: ExploitIqOutput) -> ExploitIqOutput: """Adds the completed time to the output""" state.input.scan.completed_at = datetime.now(timezone.utc).isoformat() return state @catch_pipeline_errors_async - async def output_results_node(state: AgentMorpheusOutput) -> AgentMorpheusOutput: + async def output_results_node(state: ExploitIqOutput) -> ExploitIqOutput: """Outputs results using configured output function""" return await cve_output_fn.ainvoke(state.model_dump()) if cve_output_fn else state @@ -305,22 +305,22 @@ async def output_results_node(state: AgentMorpheusOutput) -> AgentMorpheusOutput # --- Package checker path nodes --- @catch_pipeline_errors_async - async def checker_init_state_node(state: AgentMorpheusInput) -> AgentMorpheusEngineInput: - """Bridges AgentMorpheusInput -> AgentMorpheusEngineInput with empty info (skips VDB generation).""" - return AgentMorpheusEngineInput(input=state, info=AgentMorpheusInfo()) + async def checker_init_state_node(state: ExploitIqInput) -> ExploitIqEngineInput: + """Bridges ExploitIqInput -> ExploitIqEngineInput with empty info (skips VDB generation).""" + return ExploitIqEngineInput(input=state, info=ExploitIqInfo()) @catch_pipeline_errors_async - async def checker_fetch_intel_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def checker_fetch_intel_node(state: ExploitIqEngineInput) -> ExploitIqEngineInput: """Fetch intel for CVE input (package checker path). Reuses the same fetch_intel function.""" return await cve_fetch_intel_fn.ainvoke(state.model_dump()) @catch_pipeline_errors_async - async def checker_calculate_intel_score_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def checker_calculate_intel_score_node(state: ExploitIqEngineInput) -> ExploitIqEngineInput: """Calculate intel score for CVE input (package checker path).""" return await cve_calculate_intel_score_fn.ainvoke(state.model_dump()) @catch_pipeline_errors_async - async def source_acquisition_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def source_acquisition_node(state: ExploitIqEngineInput) -> ExploitIqEngineInput: """Acquires source code for the target package (source containers, git fallback).""" if cve_source_acquisition_fn: state = await cve_source_acquisition_fn.ainvoke(state.model_dump()) @@ -335,7 +335,7 @@ async def source_acquisition_node(state: AgentMorpheusEngineInput) -> AgentMorph return state @catch_pipeline_errors_async - async def checker_segmentation_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def checker_segmentation_node(state: ExploitIqEngineInput) -> ExploitIqEngineInput: """Builds scoped Tantivy code index from extracted checker sources.""" if cve_checker_segmentation_fn: state = await cve_checker_segmentation_fn.ainvoke(state.model_dump()) @@ -344,17 +344,17 @@ async def checker_segmentation_node(state: AgentMorpheusEngineInput) -> AgentMor return state @catch_pipeline_errors_async - async def l1_code_agent_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def l1_code_agent_node(state: ExploitIqEngineInput) -> ExploitIqEngineInput: """Level 1 Package Code Agent: investigates CVEs using extracted source and Tantivy code index. - Returns AgentMorpheusEngineInput with l1_result populated on checker_context. + Returns ExploitIqEngineInput with l1_result populated on checker_context. """ if cve_package_code_agent_fn: return await cve_package_code_agent_fn.ainvoke(state.model_dump()) logger.warning("Package code agent function not configured, passing state through") return state - def route_after_l1(state: AgentMorpheusEngineInput) -> str: + def route_after_l1(state: ExploitIqEngineInput) -> str: """Route to L2 Build Agent if vulnerable or uncertain, else to report generation.""" ctx = state.info.checker_context if ctx and ctx.l1_result: @@ -364,10 +364,10 @@ def route_after_l1(state: AgentMorpheusEngineInput) -> str: return "generate_report" @catch_pipeline_errors_async - async def l2_build_agent_node(state: AgentMorpheusEngineInput) -> AgentMorpheusEngineInput: + async def l2_build_agent_node(state: ExploitIqEngineInput) -> ExploitIqEngineInput: """Level 2 Build Agent: BuildCompilationCheck + HardeningCheck. - Returns AgentMorpheusEngineInput with l2_result populated on checker_context. + Returns ExploitIqEngineInput with l2_result populated on checker_context. """ if cve_build_agent_fn: return await cve_build_agent_fn.ainvoke(state.model_dump()) @@ -375,19 +375,19 @@ async def l2_build_agent_node(state: AgentMorpheusEngineInput) -> AgentMorpheusE return state @catch_pipeline_errors_async - async def generate_report_node(state: AgentMorpheusEngineInput) -> AgentMorpheusOutput: + async def generate_report_node(state: ExploitIqEngineInput) -> ExploitIqOutput: """Generate the final checker report from L1/L2 investigation results.""" if cve_checker_report_fn: return await cve_checker_report_fn.ainvoke(state.model_dump()) logger.warning("Checker report function not configured, producing empty output") - return AgentMorpheusOutput( + return ExploitIqOutput( input=state.input, info=state.info, output=OutputPayload(analysis=[], vex=None), ) @catch_pipeline_errors_async - async def checker_early_exit_node(state: AgentMorpheusEngineInput) -> AgentMorpheusOutput: + async def checker_early_exit_node(state: ExploitIqEngineInput) -> ExploitIqOutput: """Produces a proper output when source_acquisition exits with a non-OK status.""" ctx = state.info.checker_context status = ctx.status if ctx else None @@ -414,7 +414,7 @@ def _get_justification_label(s: PackageCheckerStatus | None) -> str: return "error" analysis = [ - AgentMorpheusEngineOutput( + ExploitIqEngineOutput( vuln_id=v.vuln_id, checklist=[], summary=reason, @@ -431,26 +431,26 @@ def _get_justification_label(s: PackageCheckerStatus | None) -> str: ) for v in state.input.scan.vulns ] - return AgentMorpheusOutput( + return ExploitIqOutput( input=state.input, info=state.info, output=OutputPayload(analysis=analysis, vex=None), ) - def route_after_source_acquisition(state: AgentMorpheusEngineInput): + def route_after_source_acquisition(state: ExploitIqEngineInput): """Route to checker_segmentation (happy path) or early exit on non-OK status.""" ctx = state.info.checker_context if ctx and ctx.status == PackageCheckerStatus.OK: return "checker_segmentation" return "checker_early_exit" - def route_after_add_start_time(state: AgentMorpheusInput): + def route_after_add_start_time(state: ExploitIqInput): """Route to full pipeline or package checker based on pipeline_mode.""" if state.image.pipeline_mode == PipelineMode.PACKAGE_CHECKER: return "checker_init_state" return "clone_and_deps" # build llm engine subgraph - subgraph_builder = StateGraph(AgentMorpheusEngineState) + subgraph_builder = StateGraph(ExploitIqEngineState) subgraph_builder.add_node("checklist", checklist_node) subgraph_builder.add_node("agent_executor", agent_executor_node) subgraph_builder.add_node("summarize", summarize_node) @@ -472,20 +472,20 @@ def route_after_add_start_time(state: AgentMorpheusInput): subgraph = subgraph_builder.compile() @catch_pipeline_errors_async - async def call_llm_engine_subgraph_node(message: AgentMorpheusEngineInput): + async def call_llm_engine_subgraph_node(message: ExploitIqEngineInput): trace_id.set(message.input.scan.id) subgraph_input = preprocess_engine_input(message) subgraph_input = finalize_preprocess_engine_input(message, subgraph_input, builder) if len(subgraph_input.cve_intel) > 0: results = await subgraph.ainvoke(subgraph_input) - subgraph_output = AgentMorpheusEngineState(**results) + subgraph_output = ExploitIqEngineState(**results) else: subgraph_output = subgraph_input output = postprocess_engine_output(message=message, result=subgraph_output) return output # build parent graph - graph_builder = StateGraph(AgentMorpheusOutput, input=AgentMorpheusInput) + graph_builder = StateGraph(ExploitIqOutput, input=ExploitIqInput) graph_builder.add_node("add_start_time", add_start_time_node) graph_builder.add_node("fetch_intel", fetch_intel_node) graph_builder.add_node("calculate_intel_score_node", calculate_intel_score_node) @@ -575,42 +575,42 @@ async def call_llm_engine_subgraph_node(message: AgentMorpheusEngineInput): graph = graph_builder.compile() #graph.get_graph().draw_mermaid_png(output_file_path="checker_flow.png") - def convert_str_to_exploit_iq_input(input: str) -> AgentMorpheusInput: - logger.debug("Converting JSON string input to AgentMorpheusInput (length: %d)", len(input)) + def convert_str_to_exploit_iq_input(input: str) -> ExploitIqInput: + logger.debug("Converting JSON string input to AExploitIqInput (length: %d)", len(input)) try: - return AgentMorpheusInput.model_validate_json(input) + return ExploitIqInput.model_validate_json(input) except Exception as e: - logger.error("Failed to convert input to AgentMorpheusInput: %s. Your input needs to be a json string.", e) + logger.error("Failed to convert input to ExploitIqInput: %s. Your input needs to be a json string.", e) raise e - def convert_textio_to_exploit_iq_input(input: TextIOWrapper) -> AgentMorpheusInput: - logger.debug("Converting TextIOWrapper input to AgentMorpheusInput") + def convert_textio_to_exploit_iq_input(input: TextIOWrapper) -> ExploitIqInput: + logger.debug("Converting TextIOWrapper input to ExploitIqInput") try: data = input.read() - return AgentMorpheusInput.model_validate_json(data) + return ExploitIqInput.model_validate_json(data) except Exception as e: logger.error( - "Failed to convert input to AgentMorpheusInput: %s. Your input needs to be a TextIOWrapper object.", e) + "Failed to convert input to ExploitIqInput: %s. Your input needs to be a TextIOWrapper object.", e) raise e - def convert_exploit_iq_output_to_str(output: AgentMorpheusOutput) -> str: - logger.debug("Converting AgentMorpheusOutput to JSON string") + def convert_exploit_iq_output_to_str(output: ExploitIqOutput) -> str: + logger.debug("Converting ExploitIqOutput to JSON string") try: return output.model_dump_json() except Exception as e: - logger.error("Failed to convert output to str: %s. Your input needs to be an AgentMorpheusOutput object.", + logger.error("Failed to convert output to str: %s. Your input needs to be an ExploitIqOutput object.", e) raise e - async def _response_fn(input_message: AgentMorpheusInput) -> AgentMorpheusOutput: + async def _response_fn(input_message: ExploitIqInput) -> ExploitIqOutput: results = await graph.ainvoke(input_message) - graph_output = AgentMorpheusOutput(**results) + graph_output = ExploitIqOutput(**results) return graph_output try: yield FunctionInfo.from_fn(_response_fn, description=config.description, - input_schema=AgentMorpheusInput, + input_schema=ExploitIqInput, converters=[ convert_str_to_exploit_iq_input, convert_textio_to_exploit_iq_input, diff --git a/src/vuln_analysis/runtime_context.py b/src/vuln_analysis/runtime_context.py index adca2360e..088bd8097 100644 --- a/src/vuln_analysis/runtime_context.py +++ b/src/vuln_analysis/runtime_context.py @@ -18,7 +18,7 @@ import contextvars -# Holds the current AgentMorpheusEngineState for the active task +# Holds the current ExploitIqEngineState for the active task ctx_state = contextvars.ContextVar("ctx_state", default="default_value") # Source scope for CU agent tools (Docs Semantic Search, Code Keyword Search). diff --git a/src/vuln_analysis/tools/serp.py b/src/vuln_analysis/tools/serp.py index 7fe820b82..3ca61bf73 100644 --- a/src/vuln_analysis/tools/serp.py +++ b/src/vuln_analysis/tools/serp.py @@ -35,9 +35,9 @@ class SerpWrapperToolConfig(FunctionBaseConfig, name=("%s" % SERP_WRAPPER)): @register_function(config_type=SerpWrapperToolConfig) async def serp_wrapper(config: SerpWrapperToolConfig, builder: Builder): # pylint: disable=unused-argument - from vuln_analysis.utils.serp_api_wrapper import MorpheusSerpAPIWrapper + from vuln_analysis.utils.serp_api_wrapper import ExploitIqSerpAPIWrapper - search = MorpheusSerpAPIWrapper(max_retries=config.max_retries) + search = ExploitIqSerpAPIWrapper(max_retries=config.max_retries) @catch_tool_errors(SERP_WRAPPER) async def _arun(query: str) -> str: diff --git a/src/vuln_analysis/tools/tests/test_concurrency.py b/src/vuln_analysis/tools/tests/test_concurrency.py index 4d0df58c5..b6f4e5235 100644 --- a/src/vuln_analysis/tools/tests/test_concurrency.py +++ b/src/vuln_analysis/tools/tests/test_concurrency.py @@ -16,7 +16,7 @@ from exploit_iq_commons.utils.java_chain_of_calls_retriever import JavaChainOfCallsRetriever from exploit_iq_commons.utils.transitive_code_searcher_tool import TransitiveCodeSearcher -from vuln_analysis.data_models.state import AgentMorpheusEngineState +from vuln_analysis.data_models.state import ExploitIqEngineState from vuln_analysis.tools.transitive_code_search import ( _build_or_get_cached, _searcher_cache, @@ -24,7 +24,7 @@ _repo_build_locks, ) -_DEFAULT_THRESHOLD = AgentMorpheusEngineState.model_fields["uber_jar_file_threshold"].default +_DEFAULT_THRESHOLD = ExploitIqEngineState.model_fields["uber_jar_file_threshold"].default # --------------------------------------------------------------------------- @@ -563,7 +563,7 @@ def _make_vuln_dep(self, has_vulns: bool): return mock def _make_engine_input(self, vuln_deps): - """Create a mock AgentMorpheusEngineInput with vulnerable_dependencies.""" + """Create a mock ExploitIqEngineInput with vulnerable_dependencies.""" mock = MagicMock() mock.info.vulnerable_dependencies = vuln_deps return mock diff --git a/src/vuln_analysis/tools/tests/test_transitive_code_search.py b/src/vuln_analysis/tools/tests/test_transitive_code_search.py index b6b5180c1..c2984dbcb 100644 --- a/src/vuln_analysis/tools/tests/test_transitive_code_search.py +++ b/src/vuln_analysis/tools/tests/test_transitive_code_search.py @@ -5,11 +5,11 @@ from langchain_core.documents import Document from exploit_iq_commons.data_models.common import AnalysisType -from vuln_analysis.data_models.state import AgentMorpheusEngineState +from vuln_analysis.data_models.state import ExploitIqEngineState from vuln_analysis.tools.transitive_code_search import transitive_search, TransitiveCodeSearchToolConfig, _searcher_cache -from exploit_iq_commons.data_models.input import (AgentMorpheusEngineInput, AgentMorpheusInput, +from exploit_iq_commons.data_models.input import (ExploitIqEngineInput, ExploitIqInput, ImageInfoInput, SourceDocumentsInfo, ManualSBOMInfoInput -, SBOMPackage, ScanInfoInput, VulnInfo, AgentMorpheusInfo) +, SBOMPackage, ScanInfoInput, VulnInfo, ExploitIqInfo) from vuln_analysis.runtime_context import ctx_state from vuln_analysis.tools.tests.mock_documents import (python_script_example, python_init_function_example, python_full_document_example, python_parse_function_example, @@ -148,16 +148,16 @@ def set_input_for_next_run(git_repository: str, git_ref: str, included_extension sbom_info_input = ManualSBOMInfoInput(packages=sbom_packages) else: sbom_info_input = ManualSBOMInfoInput(packages=[SBOMPackage(name="a", version="1.0", system="blabla")]) - morpheus_input = AgentMorpheusInput(image=ImageInfoInput(source_info=source_code_info, - sbom_info=sbom_info_input, - analysis_type=AnalysisType.IMAGE), - scan=ScanInfoInput(vulns=[VulnInfo(vuln_id="CVE-2025-1234")])) - engine_input = AgentMorpheusEngineInput(input=morpheus_input, info=AgentMorpheusInfo()) - state: AgentMorpheusEngineState = AgentMorpheusEngineState(original_input=engine_input, - code_vdb_path="", - doc_vdb_path="", - code_index_path="", - cve_intel=[]) + exploit_iq_input = ExploitIqInput(image=ImageInfoInput(source_info=source_code_info, + sbom_info=sbom_info_input, + analysis_type=AnalysisType.IMAGE), + scan=ScanInfoInput(vulns=[VulnInfo(vuln_id="CVE-2025-1234")])) + engine_input = ExploitIqEngineInput(input=exploit_iq_input, info=ExploitIqInfo()) + state: ExploitIqEngineState = ExploitIqEngineState(original_input=engine_input, + code_vdb_path="", + doc_vdb_path="", + code_index_path="", + cve_intel=[]) ctx_state.set(state) diff --git a/src/vuln_analysis/tools/transitive_code_search.py b/src/vuln_analysis/tools/transitive_code_search.py index ec5e7c281..75892ba8e 100644 --- a/src/vuln_analysis/tools/transitive_code_search.py +++ b/src/vuln_analysis/tools/transitive_code_search.py @@ -26,7 +26,7 @@ from langchain.docstore.document import Document -from vuln_analysis.data_models.state import AgentMorpheusEngineState +from vuln_analysis.data_models.state import ExploitIqEngineState from exploit_iq_commons.utils.document_embedding import DocumentEmbedding from exploit_iq_commons.data_models.input import SourceDocumentsInfo from exploit_iq_commons.utils.chain_of_calls_retriever_base import ChainOfCallsRetrieverBase @@ -329,7 +329,7 @@ async def _build_or_get_cached(si, query: str, uber_jar_file_threshold: int) -> async def get_transitive_code_searcher(query: str): - state: AgentMorpheusEngineState = ctx_state.get() + state: ExploitIqEngineState = ctx_state.get() si = state.original_input.input.image.source_info threshold = state.uber_jar_file_threshold diff --git a/src/vuln_analysis/utils/function_name_locator.py b/src/vuln_analysis/utils/function_name_locator.py index 08144ad27..99f42e40e 100644 --- a/src/vuln_analysis/utils/function_name_locator.py +++ b/src/vuln_analysis/utils/function_name_locator.py @@ -20,7 +20,7 @@ from exploit_iq_commons.utils.chain_of_calls_retriever_base import ChainOfCallsRetrieverBase from exploit_iq_commons.utils.dep_tree import Ecosystem from exploit_iq_commons.utils.standard_library_cache import StandardLibraryCache -from vuln_analysis.utils.serp_api_wrapper import MorpheusSerpAPIWrapper +from vuln_analysis.utils.serp_api_wrapper import ExploitIqSerpAPIWrapper from exploit_iq_commons.utils.source_rpm_downloader import RPMDependencyManager from vuln_analysis.utils.prompt_factory import FL_EXAMPLES @@ -467,7 +467,7 @@ async def quick_standard_lib_check(package_name: str, ecosystem: Ecosystem) -> t True if package is standard library, False otherwise """ try: - search = MorpheusSerpAPIWrapper(max_retries=2) + search = ExploitIqSerpAPIWrapper(max_retries=2) result = await search.arun(f"Is '{package_name}' part of the {ecosystem.value} standard library?") logger.info("quick_standard_lib_check Standard library check result: %s", result) text = str(result).lower() diff --git a/src/vuln_analysis/utils/llm_engine_utils.py b/src/vuln_analysis/utils/llm_engine_utils.py index 2991396f3..bc3dd7d12 100644 --- a/src/vuln_analysis/utils/llm_engine_utils.py +++ b/src/vuln_analysis/utils/llm_engine_utils.py @@ -21,10 +21,10 @@ from exploit_iq_commons.data_models.common import AnalysisType from exploit_iq_commons.data_models.dependencies import CheckedNotVulnerablePackage, VulnerableDependencies -from exploit_iq_commons.data_models.input import AgentMorpheusEngineInput -from exploit_iq_commons.data_models.input import AgentMorpheusInput -from vuln_analysis.data_models.output import AgentMorpheusEngineOutput -from vuln_analysis.data_models.output import AgentMorpheusOutput +from exploit_iq_commons.data_models.input import ExploitIqEngineInput +from exploit_iq_commons.data_models.input import ExploitIqInput +from vuln_analysis.data_models.output import ExploitIqEngineOutput +from vuln_analysis.data_models.output import ExploitIqOutput from vuln_analysis.data_models.output import OutputPayload from vuln_analysis.data_models.output import ChecklistItemOutput from vuln_analysis.data_models.output import JustificationOutput @@ -34,7 +34,7 @@ from vuln_analysis.functions.code_agent_graph_defs import PatchFile from vuln_analysis.utils.intel_utils import TEST_FILE_RE from vuln_analysis.functions.cve_checker_report import infer_language_from_path, NON_CODE_LANGUAGES -from vuln_analysis.data_models.state import AgentMorpheusEngineState +from vuln_analysis.data_models.state import ExploitIqEngineState from aiq.builder.builder import Builder from vuln_analysis.functions.cve_calculate_intel_score import CVECalculateIntelScoreConfig @@ -44,7 +44,7 @@ logger = LoggingFactory.get_agent_logger(__name__) -def preprocess_engine_input(message: AgentMorpheusEngineInput) -> AgentMorpheusEngineState: +def preprocess_engine_input(message: ExploitIqEngineInput) -> ExploitIqEngineState: assert message.info.intel is not None, "The input message must have intel information" @@ -55,7 +55,7 @@ def preprocess_engine_input(message: AgentMorpheusEngineInput) -> AgentMorpheusE if not sbom: raise ValueError(f"No SBOM packages found for image {image}. Skipping the LLM Engine.") - am_input: AgentMorpheusInput = message.input + am_input: ExploitIqInput = message.input # Scan through the VDC output for CVE's to run through the agent if message.info.vulnerable_dependencies is None: @@ -93,11 +93,11 @@ def preprocess_engine_input(message: AgentMorpheusEngineInput) -> AgentMorpheusE logger.info("Passing %d vuln_id(s) with vulnerable dependencies to the LLM Engine", len(vulns_for_agent)) vdb = message.info.vdb - return AgentMorpheusEngineState(code_vdb_path=vdb.code_vdb_path if vdb else None, - doc_vdb_path=vdb.doc_vdb_path if vdb else None, - code_index_path=vdb.code_index_path if vdb else None, - cve_intel=filtered_intel, - original_input=message) + return ExploitIqEngineState(code_vdb_path=vdb.code_vdb_path if vdb else None, + doc_vdb_path=vdb.doc_vdb_path if vdb else None, + code_index_path=vdb.code_index_path if vdb else None, + cve_intel=filtered_intel, + original_input=message) _MAX_SNIPPET_LINES = 12 @@ -177,9 +177,9 @@ def parse_exploit_iq_engine_output(vuln_id: str, justification: dict[str, str], intel_score: int, cvss: dict[str, str] | None, - patch_result: WebPatchResult | dict | None = None) -> AgentMorpheusEngineOutput: + patch_result: WebPatchResult | dict | None = None) -> ExploitIqEngineOutput: """ - Parse the output fields for a single vulnerability into an AgentMorpheusEngineOutput object. + Parse the output fields for a single vulnerability into an ExploitIqEngineOutput object. """ # Convert list of checklist item dicts to list of ChecklistItemOutput objects checklist_output = [ @@ -201,16 +201,16 @@ def parse_exploit_iq_engine_output(vuln_id: str, details = _build_full_pipeline_details_md(patch_result) - return AgentMorpheusEngineOutput(vuln_id=vuln_id, - checklist=checklist_output, - summary=summary, - justification=justification_output, - intel_score=intel_score, - cvss=cvss_output, - details=details) + return ExploitIqEngineOutput(vuln_id=vuln_id, + checklist=checklist_output, + summary=summary, + justification=justification_output, + intel_score=intel_score, + cvss=cvss_output, + details=details) -def build_deficient_intel_output(vuln_id: str) -> AgentMorpheusEngineOutput: +def build_deficient_intel_output(vuln_id: str) -> ExploitIqEngineOutput: summary = ("There is insufficient intel available to determine vulnerability. " "This is either due to the CVE not existing or there is not enough " "gathered intel for the agent to make an informed decision.") @@ -219,7 +219,7 @@ def build_deficient_intel_output(vuln_id: str) -> AgentMorpheusEngineOutput: status="UNKNOWN") cvss = None - return AgentMorpheusEngineOutput( + return ExploitIqEngineOutput( vuln_id=vuln_id, checklist=[ ChecklistItemOutput(input="Agent bypassed: Insufficient intel gathered. No checklist generated.", @@ -235,7 +235,7 @@ def build_deficient_intel_output(vuln_id: str) -> AgentMorpheusEngineOutput: def build_no_vuln_packages_output( vuln_id: str, checked_not_vulnerable: list[CheckedNotVulnerablePackage] | None = None -) -> AgentMorpheusEngineOutput: +) -> ExploitIqEngineOutput: if checked_not_vulnerable: n = len(checked_not_vulnerable) reasons = {pkg.reason for pkg in checked_not_vulnerable} @@ -273,7 +273,7 @@ def build_no_vuln_packages_output( status="FALSE") cvss = None - return AgentMorpheusEngineOutput( + return ExploitIqEngineOutput( vuln_id=vuln_id, checklist=[ ChecklistItemOutput(input="Agent bypassed: no vulnerable packages detected. Checklist not generated.", @@ -286,7 +286,7 @@ def build_no_vuln_packages_output( cvss=cvss) -def build_no_sbom_output(vuln_id: str) -> AgentMorpheusEngineOutput: +def build_no_sbom_output(vuln_id: str) -> ExploitIqEngineOutput: summary = ("There were no SBOM packages found for the image. This is either due to " "an invalid SBOM input or empty SBOM. There is not enough information " "to make an informed decision.") @@ -295,20 +295,20 @@ def build_no_sbom_output(vuln_id: str) -> AgentMorpheusEngineOutput: status="UNKNOWN") cvss = None - return AgentMorpheusEngineOutput(vuln_id=vuln_id, - checklist=[ + return ExploitIqEngineOutput(vuln_id=vuln_id, + checklist=[ ChecklistItemOutput( input="Agent bypassed: no SBOM packages found. Checklist not generated.", response=summary, intermediate_steps=None) ], - summary=summary, - justification=justification, - intel_score=0, - cvss=cvss) + summary=summary, + justification=justification, + intel_score=0, + cvss=cvss) -def build_low_intel_score_output(vuln_id: str, intel_score: int) -> AgentMorpheusEngineOutput: +def build_low_intel_score_output(vuln_id: str, intel_score: int) -> ExploitIqEngineOutput: summary = ("There is poor quality intel available to determine vulnerability. There is not enough gathered intel" " for the agent to make an informed decision.") justification = JustificationOutput(label="poor_quality_intel", @@ -316,7 +316,7 @@ def build_low_intel_score_output(vuln_id: str, intel_score: int) -> AgentMorpheu status="UNKNOWN") cvss = None - return AgentMorpheusEngineOutput( + return ExploitIqEngineOutput( vuln_id=vuln_id, checklist=[], summary=summary, @@ -325,8 +325,8 @@ def build_low_intel_score_output(vuln_id: str, intel_score: int) -> AgentMorpheu cvss=cvss ) -def postprocess_engine_output(message: AgentMorpheusEngineInput, - result: AgentMorpheusEngineState) -> AgentMorpheusOutput: +def postprocess_engine_output(message: ExploitIqEngineInput, + result: ExploitIqEngineState) -> ExploitIqOutput: trace_id.set(message.input.scan.id) vulnerable_dependencies: list[VulnerableDependencies] | None = message.info.vulnerable_dependencies @@ -351,7 +351,7 @@ def postprocess_engine_output(message: AgentMorpheusEngineInput, if not message.input.image.analysis_type == AnalysisType.SOURCE and not message.info.sbom.packages: output = [build_no_sbom_output(vuln_id) for vuln_id in input_vuln_ids] else: - output: list[AgentMorpheusEngineOutput] = [] + output: list[ExploitIqEngineOutput] = [] output_vuln_ids = list(result.final_summaries.keys()) poor_quality_intel_vul = result.poor_quality_intel_vul @@ -387,9 +387,9 @@ def postprocess_engine_output(message: AgentMorpheusEngineInput, out.cvss.score if out.cvss else "-") payload = OutputPayload(analysis=output, vex=result.vex) - return AgentMorpheusOutput(input=message.input, info=message.info, output=payload) + return ExploitIqOutput(input=message.input, info=message.info, output=payload) -def finalize_preprocess_engine_input(message: AgentMorpheusEngineInput, engine_state: AgentMorpheusEngineState, builder: Builder) -> AgentMorpheusEngineState: +def finalize_preprocess_engine_input(message: ExploitIqEngineInput, engine_state: ExploitIqEngineState, builder: Builder) -> ExploitIqEngineState: config = builder.get_function_config("cve_calculate_intel_score") assert isinstance(config, CVECalculateIntelScoreConfig) diff --git a/src/vuln_analysis/utils/output_formatter.py b/src/vuln_analysis/utils/output_formatter.py index c01b9d9be..612d233a4 100644 --- a/src/vuln_analysis/utils/output_formatter.py +++ b/src/vuln_analysis/utils/output_formatter.py @@ -19,17 +19,17 @@ from dateutil.parser import parse -from vuln_analysis.data_models.output import AgentMorpheusOutput +from vuln_analysis.data_models.output import ExploitIqOutput from exploit_iq_commons.utils.data_utils import safe_getattr -def generate_vulnerability_reports(model_dict: AgentMorpheusOutput, output_dir): +def generate_vulnerability_reports(model_dict: ExploitIqOutput, output_dir): """ Creates a markdown file for each CVE ID in the markdown content dictionary. Parameters ---------- - model_dict : AgentMorpheusOutput + model_dict : ExploitIqOutput JSON data containing vulnerability information. output_dir : str The directory where the markdown files will be created. @@ -57,13 +57,13 @@ def generate_vulnerability_reports(model_dict: AgentMorpheusOutput, output_dir): f.write("\n".join(content)) -def _transform_to_markdown(model_dict: AgentMorpheusOutput): +def _transform_to_markdown(model_dict: ExploitIqOutput): """ Convert JSON data to Markdown content. Parameters ---------- - model_dict : AgentMorpheusOutput + model_dict : ExploitIqOutput JSON data containing vulnerability information. Returns @@ -86,7 +86,7 @@ def _transform_to_markdown(model_dict: AgentMorpheusOutput): return markdown_content -def _add_header(markdown_content, model_dict: AgentMorpheusOutput): +def _add_header(markdown_content, model_dict: ExploitIqOutput): """ Add header to Markdown content. @@ -94,7 +94,7 @@ def _add_header(markdown_content, model_dict: AgentMorpheusOutput): ---------- markdown_content : dict Markdown content for each CVE ID. - model_dict : AgentMorpheusOutput + model_dict : ExploitIqOutput JSON data containing vulnerability information. Returns @@ -114,7 +114,7 @@ def _add_header(markdown_content, model_dict: AgentMorpheusOutput): markdown_content[cve_id].append(f"> **Status:** {_get_expoiltability_text(output.justification.status)}") -def _add_cve_intel(markdown_content, model_dict: AgentMorpheusOutput): +def _add_cve_intel(markdown_content, model_dict: ExploitIqOutput): """ Add CVE intelligence details to Markdown content. @@ -122,7 +122,7 @@ def _add_cve_intel(markdown_content, model_dict: AgentMorpheusOutput): ---------- markdown_content : dict Markdown content for each CVE ID. - model_dict : AgentMorpheusOutput + model_dict : ExploitIqOutput JSON data containing vulnerability information. Returns @@ -263,7 +263,7 @@ def _get_cve_description(intel_obj): return None -def _add_table_of_contents(markdown_content, model_dict: AgentMorpheusOutput): +def _add_table_of_contents(markdown_content, model_dict: ExploitIqOutput): """ Add a table of contents for checklists per CVE. @@ -271,7 +271,7 @@ def _add_table_of_contents(markdown_content, model_dict: AgentMorpheusOutput): ---------- markdown_content : dict Markdown content for each CVE ID. - model_dict : AgentMorpheusOutput + model_dict : ExploitIqOutput JSON data containing vulnerability information. Returns @@ -297,7 +297,7 @@ def _add_table_of_contents(markdown_content, model_dict: AgentMorpheusOutput): markdown_content[cve_id].append(f"\t {j}. [{intermediate_step}](#checklist-step-{i}.{j})") -def _add_checklist_info(markdown_content, model_dict: AgentMorpheusOutput): +def _add_checklist_info(markdown_content, model_dict: ExploitIqOutput): """ Add detailed information for checklists associated with each CVE. @@ -305,7 +305,7 @@ def _add_checklist_info(markdown_content, model_dict: AgentMorpheusOutput): ---------- markdown_content : dict Markdown content for each CVE ID. - model_dict : AgentMorpheusOutput + model_dict : ExploitIqOutput JSON data containing vulnerability information. Returns @@ -408,7 +408,7 @@ def _process_tool_output(content): return content_markdown + table -def _add_vulnerability_analysis(markdown_content, model_dict: AgentMorpheusOutput): +def _add_vulnerability_analysis(markdown_content, model_dict: ExploitIqOutput): """ Add vulnerability analysis details to Markdown content. @@ -416,7 +416,7 @@ def _add_vulnerability_analysis(markdown_content, model_dict: AgentMorpheusOutpu ---------- markdown_content : dict Markdown content for each CVE ID. - model_dict : AgentMorpheusOutput + model_dict : ExploitIqOutput JSON data containing vulnerability information. Returns @@ -438,7 +438,7 @@ def _add_vulnerability_analysis(markdown_content, model_dict: AgentMorpheusOutpu markdown_content[cve_id].append(f"\n{justification.reason}") -def _add_vulnerable_sboms(markdown_content, model_dict: AgentMorpheusOutput): +def _add_vulnerable_sboms(markdown_content, model_dict: ExploitIqOutput): """ Add information about vulnerable SBOM dependencies to Markdown content. @@ -446,7 +446,7 @@ def _add_vulnerable_sboms(markdown_content, model_dict: AgentMorpheusOutput): ---------- markdown_content : dict Markdown content for each CVE ID. - model_dict : AgentMorpheusOutput + model_dict : ExploitIqOutput JSON data containing vulnerability information. Returns @@ -485,7 +485,7 @@ def _add_vulnerable_sboms(markdown_content, model_dict: AgentMorpheusOutput): unique_rows.add(row) # Add the row to the set of unique rows -def _add_references(markdown_content, model_dict: AgentMorpheusOutput): +def _add_references(markdown_content, model_dict: ExploitIqOutput): """ Add references for a CVE from all available sources in the intel object. @@ -493,7 +493,7 @@ def _add_references(markdown_content, model_dict: AgentMorpheusOutput): ---------- markdown_content : dict Markdown content for each CVE ID. - model_dict : AgentMorpheusOutput + model_dict : ExploitIqOutput JSON data containing references information. Returns diff --git a/src/vuln_analysis/utils/serp_api_wrapper.py b/src/vuln_analysis/utils/serp_api_wrapper.py index eecac3514..c8af45506 100644 --- a/src/vuln_analysis/utils/serp_api_wrapper.py +++ b/src/vuln_analysis/utils/serp_api_wrapper.py @@ -25,7 +25,7 @@ from vuln_analysis.utils.url_utils import url_join -class MorpheusSerpAPIWrapper(SerpAPIWrapper): +class ExploitIqSerpAPIWrapper(SerpAPIWrapper): """Custom SerpAPI wrapper with multi-key rotation support. This wrapper extends the standard SerpAPIWrapper to support multiple API keys @@ -59,7 +59,7 @@ def serp_api_key_index(self) -> int: """Shared current key index.""" return self.__class__._serp_api_key_index @model_validator(mode="after") - def validate_base_url(self) -> "MorpheusSerpAPIWrapper": + def validate_base_url(self) -> "ExploitIqSerpAPIWrapper": """Validate the base URL from the environment.""" self.base_url = get_from_env(key="base_url", env_key="SERPAPI_BASE_URL", default=self.base_url) if not self.base_url: @@ -68,7 +68,7 @@ def validate_base_url(self) -> "MorpheusSerpAPIWrapper": self.search_engine.BACKEND = self.base_url return self @model_validator(mode="after") - def validate_serp_api_keys(self) -> "MorpheusSerpAPIWrapper": + def validate_serp_api_keys(self) -> "ExploitIqSerpAPIWrapper": """Initialize API keys pool from SERPAPI_API_KEY environment variable. Parses comma-separated keys from the serpapi_api_key field (populated by parent class) diff --git a/src/vuln_analysis/utils/vex/implementations/csaf_generator.py b/src/vuln_analysis/utils/vex/implementations/csaf_generator.py index e60598bee..01cb03d05 100644 --- a/src/vuln_analysis/utils/vex/implementations/csaf_generator.py +++ b/src/vuln_analysis/utils/vex/implementations/csaf_generator.py @@ -23,7 +23,8 @@ from typing import Any, Dict from exploit_iq_commons.data_models.cve_intel import CveIntel -from vuln_analysis.data_models.state import AgentMorpheusEngineState +from exploit_iq_commons.data_models.input import ExploitIqEngineInput +from vuln_analysis.data_models.state import ExploitIqEngineState from ..vex_generator_base import VexGenerator from ..vex_utils import get_vex_validator, build_patch_recommendation @@ -167,9 +168,9 @@ class CsafVexGenerator(VexGenerator): CSAF VEX generator. Builds a CSAF JSON document and validates it with the csaf-tool. """ - def generate(self, state: AgentMorpheusEngineState) -> Dict[str, Any]: + def generate(self, state: ExploitIqEngineState) -> Dict[str, Any]: - message: AgentMorpheusEngineInput = state.original_input + message: ExploitIqEngineInput = state.original_input csaf_gen = CSAFGenerator() diff --git a/src/vuln_analysis/utils/vex/tests/test_csaf_generator_integration.py b/src/vuln_analysis/utils/vex/tests/test_csaf_generator_integration.py index 5bca102d3..096577320 100644 --- a/src/vuln_analysis/utils/vex/tests/test_csaf_generator_integration.py +++ b/src/vuln_analysis/utils/vex/tests/test_csaf_generator_integration.py @@ -23,17 +23,17 @@ from exploit_iq_commons.data_models.common import AnalysisType from exploit_iq_commons.data_models.cve_intel import CveIntel, CveIntelGhsa, CveIntelRhsa -from exploit_iq_commons.data_models.info import AgentMorpheusInfo, SBOMPackage +from exploit_iq_commons.data_models.info import ExploitIqInfo, SBOMPackage from exploit_iq_commons.data_models.input import ( - AgentMorpheusEngineInput, - AgentMorpheusInput, + ExploitIqEngineInput, + ExploitIqInput, ImageInfoInput, ManualSBOMInfoInput, ScanInfoInput, SourceDocumentsInfo, VulnInfo, ) -from vuln_analysis.data_models.state import AgentMorpheusEngineState +from vuln_analysis.data_models.state import ExploitIqEngineState from vuln_analysis.utils.vex.implementations.csaf_generator import CsafVexGenerator from vuln_analysis.utils.vex.vex_generator_loader import load_vex_generator @@ -56,8 +56,8 @@ @pytest.fixture(scope="module") -def mock_state() -> AgentMorpheusEngineState: - """Fixture providing a default mock AgentMorpheusEngineState for testing. +def mock_state() -> ExploitIqEngineState: + """Fixture providing a default mock ExploitIqEngineState for testing. This state has one vulnerable CVE with a known affected status, with no GHSA or RHSA intel data. """ return create_mock_state() @@ -70,8 +70,8 @@ def create_mock_state( product_name: str = _DEFAULT_PRODUCT_NAME, product_tag: str = _DEFAULT_PRODUCT_TAG, sbom_packages: list[SBOMPackage] | None = _DEFAULT_SBOM_PACKAGES, -) -> AgentMorpheusEngineState: - """Create a mock AgentMorpheusEngineState for testing.""" +) -> ExploitIqEngineState: + """Create a mock ExploitIqEngineState for testing.""" intel = intel or [CveIntel(vuln_id=v) for v in vulns] @@ -85,15 +85,15 @@ def create_mock_state( sbom_info=sbom_info, ) - engine_input = AgentMorpheusEngineInput( - input=AgentMorpheusInput( + engine_input = ExploitIqEngineInput( + input=ExploitIqInput( scan=ScanInfoInput(vulns=[VulnInfo(vuln_id=v) for v in vulns]), image=image_info, ), - info=AgentMorpheusInfo(intel=intel), + info=ExploitIqInfo(intel=intel), ) - return AgentMorpheusEngineState( + return ExploitIqEngineState( cve_intel=intel, original_input=engine_input, final_summaries={v: _DEFAULT_SUMMARY.format(v=v) for v in vulns}, diff --git a/src/vuln_analysis/utils/vex/vex_generator_base.py b/src/vuln_analysis/utils/vex/vex_generator_base.py index fa8d3c452..215748ca0 100644 --- a/src/vuln_analysis/utils/vex/vex_generator_base.py +++ b/src/vuln_analysis/utils/vex/vex_generator_base.py @@ -18,7 +18,7 @@ from abc import ABC, abstractmethod from typing import Any, Dict -from vuln_analysis.data_models.state import AgentMorpheusEngineState +from vuln_analysis.data_models.state import ExploitIqEngineState class VexGenerator(ABC): @@ -27,7 +27,7 @@ class VexGenerator(ABC): """ @abstractmethod - def generate(self, state: AgentMorpheusEngineState) -> Dict[str, Any]: + def generate(self, state: ExploitIqEngineState) -> Dict[str, Any]: """ Generate a VEX document as a JSON-serializable dict from the engine state. """ diff --git a/tests/test_serp_api_key_rotation.py b/tests/test_serp_api_key_rotation.py index acc8a1108..a6d477e95 100644 --- a/tests/test_serp_api_key_rotation.py +++ b/tests/test_serp_api_key_rotation.py @@ -23,7 +23,7 @@ import pytest from aioresponses import aioresponses from aiohttp import ClientResponseError -from vuln_analysis.utils.serp_api_wrapper import MorpheusSerpAPIWrapper +from vuln_analysis.utils.serp_api_wrapper import ExploitIqSerpAPIWrapper SERPAPI_SEARCH_URL_PATTERN = re.compile(r'https://serpapi\.com/search\?.*') TEST_PAYLOAD = {"results": ["test"]} @@ -36,18 +36,18 @@ def serpapi_wrapper_single_key(): """Create a wrapper with a single API key.""" # Reset class-level state before each test - MorpheusSerpAPIWrapper._serp_api_keys = [] - MorpheusSerpAPIWrapper._serp_api_key_index = 0 - return MorpheusSerpAPIWrapper(serpapi_api_key=SINGLE_KEY) + ExploitIqSerpAPIWrapper._serp_api_keys = [] + ExploitIqSerpAPIWrapper._serp_api_key_index = 0 + return ExploitIqSerpAPIWrapper(serpapi_api_key=SINGLE_KEY) @pytest.fixture def serpapi_wrapper_two_keys(): """Create a wrapper with two API keys.""" # Reset class-level state before each test - MorpheusSerpAPIWrapper._serp_api_keys = [] - MorpheusSerpAPIWrapper._serp_api_key_index = 0 - return MorpheusSerpAPIWrapper(serpapi_api_key=TWO_KEYS) + ExploitIqSerpAPIWrapper._serp_api_keys = [] + ExploitIqSerpAPIWrapper._serp_api_key_index = 0 + return ExploitIqSerpAPIWrapper(serpapi_api_key=TWO_KEYS) @pytest.mark.asyncio @@ -124,9 +124,9 @@ async def test_key_rotation(error_code, serpapi_wrapper_two_keys): def test_concurrent_rotation(): """Test that concurrent key rotation is thread-safe.""" # Reset class-level state before test - MorpheusSerpAPIWrapper._serp_api_keys = [] - MorpheusSerpAPIWrapper._serp_api_key_index = 0 - wrapper = MorpheusSerpAPIWrapper(serpapi_api_key="key1,key2,key3") + ExploitIqSerpAPIWrapper._serp_api_keys = [] + ExploitIqSerpAPIWrapper._serp_api_key_index = 0 + wrapper = ExploitIqSerpAPIWrapper(serpapi_api_key="key1,key2,key3") num_threads = 10 iterations_per_thread = 5 From b1af633b06d15f1e1cbe6e0020af0cbe5a3aff6e Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Sun, 28 Jun 2026 04:38:41 +0300 Subject: [PATCH 11/21] revert this commit after fixing quay authentication --- .tekton/on-cm-runner.yaml | 2 +- .tekton/on-pull-request.yaml | 2 +- .tekton/on-push.yaml | 2 +- .tekton/on-tag.yaml | 2 +- kustomize/base/exploit_iq_service.yaml | 4 ++-- kustomize/base/kustomization.yaml | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.tekton/on-cm-runner.yaml b/.tekton/on-cm-runner.yaml index 87a109717..439d6114e 100644 --- a/.tekton/on-cm-runner.yaml +++ b/.tekton/on-cm-runner.yaml @@ -26,7 +26,7 @@ spec: value: "{{ trigger_comment }}" # Point to the image ALREADY built by the PR pipeline - name: target-image - value: quay.io/ecosystem-appeng/exploit-iq-agent:on-pr-{{revision}} + value: quay.io/ecosystem-appeng/agent-morpheus-rh:on-pr-{{revision}} pipelineSpec: params: diff --git a/.tekton/on-pull-request.yaml b/.tekton/on-pull-request.yaml index d2e3b8340..e72258311 100644 --- a/.tekton/on-pull-request.yaml +++ b/.tekton/on-pull-request.yaml @@ -33,7 +33,7 @@ spec: - name: image-expires-after value: 5d - name: output-image - value: quay.io/ecosystem-appeng/exploit-iq-agent:on-pr-{{revision}} + value: quay.io/ecosystem-appeng/agent-morpheus-rh:on-pr-{{revision}} - name: path-context value: . - name: dockerfile diff --git a/.tekton/on-push.yaml b/.tekton/on-push.yaml index 71316d17f..5da2cc106 100644 --- a/.tekton/on-push.yaml +++ b/.tekton/on-push.yaml @@ -26,7 +26,7 @@ spec: - name: revision value: "{{ revision }}" - name: output-image - value: quay.io/ecosystem-appeng/exploit-iq-agent:latest + value: quay.io/ecosystem-appeng/agent-morpheus-rh:latest - name: path-context value: . - name: dockerfile diff --git a/.tekton/on-tag.yaml b/.tekton/on-tag.yaml index c8721f1c1..08718fd32 100644 --- a/.tekton/on-tag.yaml +++ b/.tekton/on-tag.yaml @@ -26,7 +26,7 @@ spec: - name: revision value: "{{ revision }}" - name: output-image - value: 'quay.io/ecosystem-appeng/exploit-iq-agent' + value: 'quay.io/ecosystem-appeng/agent-morpheus-rh' - name: tag-name value: "{{ target_branch }}" - name: path-context diff --git a/kustomize/base/exploit_iq_service.yaml b/kustomize/base/exploit_iq_service.yaml index 46c36f43f..2f99c7411 100644 --- a/kustomize/base/exploit_iq_service.yaml +++ b/kustomize/base/exploit_iq_service.yaml @@ -25,7 +25,7 @@ spec: serviceAccountName: exploit-iq-sa containers: - name: exploit-iq-phoenix-tracing - image: quay.io/ecosystem-appeng/exploit-iq-agent:nat + image: quay.io/ecosystem-appeng/agent-morpheus-rh:nat imagePullPolicy: Always workingDir: /workspace/ args: @@ -45,7 +45,7 @@ spec: memory: "1Gi" cpu: "100m" - name: exploit-iq - image: quay.io/ecosystem-appeng/exploit-iq-agent:nat + image: quay.io/ecosystem-appeng/agent-morpheus-rh:nat imagePullPolicy: Always workingDir: /workspace/ args: diff --git a/kustomize/base/kustomization.yaml b/kustomize/base/kustomization.yaml index e6f1f6930..aa362898a 100644 --- a/kustomize/base/kustomization.yaml +++ b/kustomize/base/kustomization.yaml @@ -94,7 +94,7 @@ patches: kind: Deployment images: - - name: quay.io/ecosystem-appeng/exploit-iq-agent + - name: quay.io/ecosystem-appeng/agent-morpheus-rh newTag: latest - name: quay.io/ecosystem-appeng/exploit-iq-client From 81e4017d76a787fc5cf673ab5f82c13050b93e04 Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Sun, 28 Jun 2026 10:44:47 +0300 Subject: [PATCH 12/21] skip java script tests --- src/vuln_analysis/tools/tests/test_transitive_code_search.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/vuln_analysis/tools/tests/test_transitive_code_search.py b/src/vuln_analysis/tools/tests/test_transitive_code_search.py index c2984dbcb..9965dda09 100644 --- a/src/vuln_analysis/tools/tests/test_transitive_code_search.py +++ b/src/vuln_analysis/tools/tests/test_transitive_code_search.py @@ -488,6 +488,7 @@ async def test_transitive_search_java_4(): assert len(list_path) > 1 assert 'src/main/java/io/cryostat' in list_path[-1] +@pytest.mark.skip @pytest.mark.asyncio async def test_java_script_transitive_search_1(): """Test that runs with a real repository""" @@ -511,7 +512,7 @@ async def test_java_script_transitive_search_1(): assert path_found == True assert len(list_path) == 2 - +@pytest.mark.skip @pytest.mark.asyncio async def test_java_script_transitive_search_2(): """Test that runs with a real repository""" From 4b2d280ffb4ecc139234351bdc800263e0c821cb Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Sun, 28 Jun 2026 10:59:08 +0300 Subject: [PATCH 13/21] merge main --- .gitignore | 2 + kustomize/README.md | 732 ++++++++++++------ kustomize/base/argilla/deployment.yaml | 2 - kustomize/base/exploit-iq-config.yml | 19 + kustomize/base/ips-patch-client.json | 6 - kustomize/base/ips-patch.json | 6 - kustomize/base/kustomization.yaml | 14 - .../components/oidc-ca/kustomization.yaml | 15 + .../components/oidc-ca/oidc-ca-patch.yaml | 22 + kustomize/deployer-rbac.yaml | 101 +++ .../exploit-iq-client-batch-patch.yaml | 31 + .../exploit-iq-resources-patch.yaml | 32 + .../batch-processing/kustomization.yaml | 90 +-- .../remote-nim-all/exploit-iq-nim-patch.yaml | 56 ++ .../remote-nim-all/kustomization.yaml | 81 +- .../exploit-iq-llm-patch.yaml | 88 +++ .../kustomization.yaml | 136 +--- .../nginx-patch.yaml | 2 + kustomize/overlays/tests/kustomization.yaml | 7 +- pyproject.toml | 1 + .../functions/build_agent_graph_defs.py | 149 +++- .../functions/cve_build_agent.py | 53 +- .../functions/cve_generate_vex.py | 9 +- .../functions/cve_package_code_agent.py | 161 ++-- .../functions/react_internals.py | 68 +- src/vuln_analysis/tools/source_grep.py | 38 +- .../tools/tests/test_concurrency.py | 126 ++- .../tests/test_transitive_code_search.py | 1 - .../tools/transitive_code_search.py | 48 +- .../utils/rpm_checker_prompts.py | 183 ++++- .../test_vulnerability_intel_sanitizer.py | 80 +- src/vuln_analysis/utils/token_utils.py | 45 ++ .../vex/implementations/csaf_generator.py | 62 +- .../tests/test_csaf_generator_integration.py | 57 ++ src/vuln_analysis/utils/vex/vex_utils.py | 53 +- .../utils/vulnerability_intel_sanitizer.py | 39 + 36 files changed, 1868 insertions(+), 747 deletions(-) delete mode 100644 kustomize/base/ips-patch-client.json delete mode 100644 kustomize/base/ips-patch.json create mode 100644 kustomize/components/oidc-ca/kustomization.yaml create mode 100644 kustomize/components/oidc-ca/oidc-ca-patch.yaml create mode 100644 kustomize/deployer-rbac.yaml create mode 100644 kustomize/overlays/batch-processing/exploit-iq-client-batch-patch.yaml create mode 100644 kustomize/overlays/batch-processing/exploit-iq-resources-patch.yaml create mode 100644 kustomize/overlays/remote-nim-all/exploit-iq-nim-patch.yaml create mode 100644 kustomize/overlays/self-hosted-llama3.1-70b-4bit/exploit-iq-llm-patch.yaml diff --git a/.gitignore b/.gitignore index 92f47346b..7221e1240 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ ###### Place new entries directly below this line! ###### +kustomize/components/oidc-ca/*.crt + CLAUDE.md # Ignore anything in the ./.tmp directory diff --git a/kustomize/README.md b/kustomize/README.md index 93de93d7b..7ce49156d 100644 --- a/kustomize/README.md +++ b/kustomize/README.md @@ -15,129 +15,83 @@ See the License for the specific language governing permissions and limitations under the License. --> -# Procedure to Run and Deploy -## Install and Run Locally +# Deploying Exploit Intelligence on OpenShift Container Platform -One can run the ExploitIQ on his local machine ( No GPU dependency is required!), for the purpose of testing, debugging and troubleshooting problems: +## Prerequisites -1. Install the lightweight [uv package manager](https://docs.astral.sh/uv/getting-started/installation). -2. Ensure Python 3.12 is installed for your operating system. -```shell - uv python install 3.12 -``` -3. Navigate to the project root, then create and activate a virtual environment using `uv`. +> [!NOTE] +> All commands in this guide assume that your current working directory is `kustomize/`. -```shell -# Make sure you're on the repo's root directory. -cd $(git rev-parse --show-toplevel) -# Create and activate the virtual environment -rm -rf .venv || true -uv venv --python 3.12 --no-cache -source .venv/bin/activate -``` +### Required Tools -4. **Install Dependencies**: Once the environment is active, install the required packages. +Install the following tools and verify that all binaries are available on your system path: -```shell -uv sync --no-cache -``` +- [`oc`](https://docs.openshift.com/container-platform/latest/cli_reference/openshift_cli/getting-started-cli.html) +- [`kustomize`](https://kubectl.docs.kubernetes.io/installation/kustomize/) +- `openssl` -5. **Set Environment Variables**: Define the following environment variables. +### Cluster Access -```shell -export CHECKLIST_MODEL_NAME=hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4 -export CODE_VDB_RETRIEVER_MODEL_NAME=hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4 -export CVE_AGENT_EXECUTOR_MODEL_NAME=hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4 -export DOC_VDB_RETRIEVER_MODEL_NAME=hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4 -export JUSTIFY_MODEL_NAME=hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4 -export NVIDIA_API_BASE=http://YOUR_SELF_HOSTED_OPENAI_LLM_ADDRESSS/v1 -export PYTHONUNBUFFERED=1 -export GOPROXY=https://proxy.golang.org,direct -export SERPAPI_API_KEY=YOUR_SERPAPI_KEY -export SUMMARIZE_MODEL_NAME=hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4 -export REGISTRY_REDHAT_USERNAME="your_username" -export REGISTRY_REDHAT_PASSWORD="your_password_or_token" -``` +Cluster-admin privileges are required. `OAuthClient` creation, anyuid SCC grants, and `ServiceMonitor` access are all cluster-scoped operations. If cluster-admin access is not available, a cluster administrator can optionally apply `deployer-rbac.yaml` to delegate the minimum required permissions. -6. **Run the Application**: After a successful installation, run the application. -```shell -nat --log-level debug serve --config_file=src/vuln_analysis/configs/config-http-openai.yml --host 0.0.0.0 --port 26466 -``` -7. In another terminal, in the same venv python env, start Arize phoenix service to enable tracing for local deployment: -```shell -phoenix serve -``` +### API Credentials -### Prerequisites -One need to install the following tools in order to run the agent locally without problems, all tools binaries expected -to be on the system path: -- [Java JDK >=21](https://www.oracle.com/il-en/java/technologies/downloads/#java21) -- [Maven Package manager](https://maven.apache.org/install.html) -- [Golang](https://go.dev/doc/install) +Obtain the following credentials before you begin. For instructions on creating each credential, refer to the main [README](../README.md#obtain-api-keys): -### Container Source Download Configuration +- GitHub Advisory Database (GHSA) API key +- National Vulnerability Database (NVD) API key +- NVIDIA API key (required for Remote NIM; a placeholder value is sufficient for self-hosted variants) +- Red Hat registry credentials (`registry.redhat.io`) +- SerpAPI key -For C/C++ projects, you can enable container source download to extract RPM dependencies from Red Hat container registries. This feature uses skopeo to download container source layers and automatically extracts RPM packages, excluding the main application RPMs to focus on dependencies only. -**Required Environment Variables:** -# Enable container source download mode (default behavior ) -export USE_CONTAINER_SOURCES=true +--- + +## Preparing Your Deployment -**How it works:** -1. Downloads container source layers using skopeo -2. Extracts RPM packages from the downloaded layers -3. Filters out main application RPMs (e.g., `postgresql-*` for PostgreSQL containers) -4. Copies dependency RPMs to the standard RPM cache directory +Complete the following steps before running any deployment command. +### Step 1. Create a Project Namespace -**Prerequisites:** -- `skopeo` must be installed on the system -- Valid Red Hat registry credentials -- Network access to `registry.redhat.io` +```shell +export YOUR_NAMESPACE_NAME= +oc new-project $YOUR_NAMESPACE_NAME +``` +### Step 2. Create API Credentials -## Deploy And Run On OCP +Create the `base/secrets.env` file with your API keys. The following table shows which keys are required for each deployment variant: -1. Create a `base/secrets.env` file containing the API keys for external services `ExploitIQ` might use. Not all keys are mandatory. Refer to the main [README](../README.md#obtain-api-keys) for details on how to create the Red Hat credentials and other API keys. +| Key | Self-Hosted LLM | Remote NIM | Description | +| --- | --- | --- | --- | +| `serpapi_api_key` | Required | Required | Web search for patch intelligence | +| `ghsa_api_key` | Required | Required | GitHub token for advisory lookups and repository scanning | +| `nvidia_api_key` | Placeholder | Required | A placeholder value is sufficient for self-hosted variants | +| `nvd_api_key` | Required | Required | National Vulnerability Database API key | +| `registry_redhat_username` | Required | Required | Red Hat registry credentials for container image scanning | +| `registry_redhat_password` | Required | Required | Red Hat registry credentials for container image scanning | ```shell cat > base/secrets.env << EOF -nvd_api_key=you_api_key -serpapi_api_key=your_api_key -nvidia_api_key=your_api_key -ghsa_api_key=your_api_key -registry_redhat_username=your_registry_username -registry_redhat_password=your_registry_pass_token - +serpapi_api_key= +ghsa_api_key= +nvidia_api_key= +nvd_api_key= +registry_redhat_username= +registry_redhat_password= EOF ``` -2. If a namespace does not exist, create one: +### Step 3. Configure Image Registry Credentials -```shell -export YOUR_NAMESPACE_NAME=yourNamespaceNameHere -oc new-project $YOUR_NAMESPACE_NAME -``` -3. Create a `base/argilla/feedback_secret.env` file containing the credentials for the Argilla feedback service: +> [!IMPORTANT] +> Product scanning requires valid registry credentials to pull product component images. If you skip this step, the deployment succeeds but authenticated image pulls fail. -```shell -cat > base/argilla/feedback_secret.env << EOF -argilla_username=your_argilla_username -argilla_password=your_argilla_password -argilla_api_key=your_argilla_api_key +Create the `base/image-registry-credentials.env` file with credentials for the registries that product scanning uses to pull component images. The `auth` value is the base64 encoding of `:`. -EOF -``` -4. Create an image pull secret to authorize pulling the `ExploitIQ` and `Argilla` container images: +For `registry.redhat.io`, Red Hat recommends creating a registry service account at [https://access.redhat.com/terms-based-registry/](https://access.redhat.com/terms-based-registry/) ```shell -oc create secret generic exploit-iq-pull-secret --from-file=.dockerconfigjson= --type=kubernetes.io/dockerconfigjson -``` - -5. Edit the `image-registry-credentials.env` and provide image registry credentials required by product scanning to access and pull component images. - -```shell -# Example: add your desired registries and credentials. "auth" is base64 of ":". cat > base/image-registry-credentials.env << 'EOF' { "auths": { @@ -149,166 +103,168 @@ cat > base/image-registry-credentials.env << 'EOF' EOF ``` ->[!IMPORTANT] ->This secret is essential for product scanning to authenticate and pull component images. If you skip this step, kustomize will still deploy, but authenticated pulls will not work until you provide real credentials. +### Step 4. Configure Argilla Feedback Credentials + +Argilla runs as an internal cluster service and does not connect to any external account. Use any values for the username, password, and API key. -6. Create the `oauth-secret.env` file containing the `client-secret` and `openshift-domain` values required by the [ExploitIQ Client](./base/exploit_iq_client.yaml) configuration. +Create the `base/argilla/feedback_secret.env` file: -If openshift resource of kind `OAuthClient` named `exploit-iq-client` exists, just get the secret from there: ```shell -export OAUTH_CLIENT_SECRET=$(oc get oauthclient exploit-iq-client -o jsonpath='{..secret}') +cat > base/argilla/feedback_secret.env << EOF +argilla_username= +argilla_password= +argilla_api_key= +EOF ``` -Otherwise, Replace `some-long-secret-used-by-the-oauth-client` with a more secure, unique secret of your own: + +### Step 5. Configure OAuth Credentials + +Exploit Intelligence uses OpenShift OAuth for user authentication. The OAuth client secret must be at least 32 bytes (256 bits) because Exploit Intelligence uses it to sign internal session tokens with HS256, which requires a minimum key length of 256 bits. + +> [!IMPORTANT] +> Save the value of `$OAUTH_CLIENT_SECRET` after running the commands below. You need it after deployment to create or update the `OAuthClient` resource. + +#### First-Time Deployment + +Use this procedure only if no `OAuthClient` named `exploit-iq-client` exists on the cluster. If another Exploit Intelligence installation already uses that `OAuthClient`, you must use the [Reusing an Existing OAuthClient](#reusing-an-existing-oauthclient) procedure instead — generating a new secret overwrites the existing one and breaks authentication for all users of that installation. + +Verify that the `OAuthClient` does not exist before proceeding: ```shell -export OAUTH_CLIENT_SECRET="some-long-secret-used-by-the-oauth-client" +oc get oauthclient exploit-iq-client 2>&1 | grep -q "not found" && echo "Safe to proceed" || echo "OAuthClient exists — use Reusing an Existing OAuthClient" ``` +Generate a new client secret and write the credentials file: + ```shell +export OAUTH_CLIENT_SECRET=$(openssl rand -base64 32) export OAUTH_SESSION_SECRET=$(openssl rand -base64 32) +export OCP_DOMAIN=$(oc whoami --show-server | sed -E 's#^https?://api\.##; s#:[0-9]+/?$##; s#/$##') + +cat > base/oauth-secrets.env << EOF +client-secret=$OAUTH_CLIENT_SECRET +session-secret=$OAUTH_SESSION_SECRET +openshift-domain=$OCP_DOMAIN +EOF ``` +#### Reusing an Existing OAuthClient + +Retrieve the secret from the existing `OAuthClient` resource and write the credentials file: + ```shell +export OAUTH_CLIENT_SECRET=$(oc get oauthclient exploit-iq-client -o jsonpath='{..secret}') +export OAUTH_SESSION_SECRET=$(openssl rand -base64 32) +export OCP_DOMAIN=$(oc whoami --show-server | sed -E 's#^https?://api\.##; s#:[0-9]+/?$##; s#/$##') + cat > base/oauth-secrets.env << EOF client-secret=$OAUTH_CLIENT_SECRET session-secret=$OAUTH_SESSION_SECRET -openshift-domain=$(oc get dns cluster -o jsonpath='{.spec.baseDomain}') +openshift-domain=$OCP_DOMAIN EOF ``` -7. Create MongoDB credentials: +### Step 6. Create Database Credentials ```shell cat > base/mongodb-credentials.env << EOF admin-user=mongoadmin -admin-password=$(openssl rand -base64 24 | tr -d '/+=' | head -c 32) +admin-password=$(openssl rand -base64 24) exploit-iq-user=exploit-iq-user -exploit-iq-password=$(openssl rand -base64 24 | tr -d '/+=' | head -c 32) +exploit-iq-password=$(openssl rand -base64 24) EOF ``` -8. Update `ExploitIQ` configuration file with the correct callback URL for the client service. +### Step 7. Set the Application Callback URL ```shell export CALLBACK_URL="https://exploit-iq-client.$(oc project -q).svc:8443" find . -type f -name 'exploit-iq-config.yml' -exec sed -i "s|CALLBACK_URL_PLACEHOLDER|$CALLBACK_URL|g" {} + ``` -### Configuring Git SSL Certificate Authority for Custom CAs +--- -If your Git server uses a certificate that is signed by a custom Certificate Authority (CA), you must provide the CA certificate bundle to enable ExploitIQ to verify the Git server identity. +## Selecting a Deployment Variant -> [!IMPORTANT] -> If you need to access Red Hat internal Git repositories such as `gitlab.cee.redhat.com`, you must complete this procedure. +Exploit Intelligence supports the following deployment variants. Run only one deployment command in the next section. -#### Procedure +| Variant | Overlay | LLM | Use When | +| --- | --- | --- | --- | +| Self-Hosted LLM | `self-hosted-llama3.1-70b-4bit` | Any network-accessible LLM endpoint | You run your own LLM (on-cluster or external) | +| Self-Hosted LLM + MLOps | `mlops` | Any network-accessible LLM endpoint | You also need Grafana and Tempo observability | +| Remote NIM | `remote-nim-all` | NVIDIA-hosted NIM | You use NVIDIA-hosted inference | -1. Create the certificate directory: +--- -```shell -mkdir -p kustomize/base/ca-certs -``` +## Deploying Exploit Intelligence -2. Obtain your CA certificates. +### Deploy with a Self-Hosted LLM -For Red Hat internal Git repositories: +This overlay assumes the LLM is deployed using the [exploit-iq-models](https://github.com/RHEcosystemAppEng/exploit-iq-models) Helm chart, which creates the `llama3-1-70b-instruct-4bit` service in the `exploit-iq-models` namespace. If your LLM is deployed differently, update the upstream URLs in `overlays/self-hosted-llama3.1-70b-4bit/nginx-patch.yaml` to point to your model endpoint before deploying. ```shell -# Download the Red Hat Root CA -curl -o kustomize/base/ca-certs/internal-root-ca.pem \ - https://certs.corp.redhat.com/certs/2022-IT-Root-CA.pem - -# Download the Red Hat internal Intermediate CA -curl -o kustomize/base/ca-certs/rhcs-intermediate-ca.crt \ - https://certs.corp.redhat.com/chains/rhcs-ca-chain-2022-self-signed.crt +oc kustomize overlays/self-hosted-llama3.1-70b-4bit | oc apply -f - -n $YOUR_NAMESPACE_NAME ``` -For other custom CAs: +### Deploy with a Self-Hosted LLM and MLOps Observability -```shell -# Copy your custom CA certificate files to the directory -cp /path/to/your-custom-ca.pem kustomize/base/ca-certs/ -``` - -3. Create the CA bundle by concatenating all certificates: +Patch the overlay with your namespace: ```shell -cat kustomize/base/ca-certs/*.{pem,crt} > kustomize/base/ca-certs/ca-bundle.crt +sed -i "s/REPLACE_NAMESPACE/$YOUR_NAMESPACE_NAME/" overlays/mlops/grafana/kustomization.yaml +sed -i "s/REPLACE_NAMESPACE/$YOUR_NAMESPACE_NAME/" overlays/mlops/tempo/kustomization.yaml ``` -4. Verify that the bundle contains your certificates: +Create the Grafana token secret. Retrieve the token value from the Bitwarden vault entry **Exploit Intelligence Grafana SA Token**: ```shell -openssl crl2pkcs7 -nocrl -certfile kustomize/base/ca-certs/ca-bundle.crt | \ - openssl pkcs7 -print_certs -noout +oc create secret generic grafana-bearer-token \ + --from-literal=token='' ``` ->[!IMPORTANT] -You should only run one of the steps 9,10 or 11, depending on if you want to run the service with a self hosted LLM, self hosted LLM with MLOps or Nvidia remote NIM. -9. To deploy `ExploitIQ` with a self-hosted LLM , run: +Deploy: ```shell -# Deploy ExploitIQ with self hosted llama3.1-70b-4bit LLM -oc kustomize overlays/self-hosted-llama3.1-70b-4bit | oc apply -f - -n $YOUR_NAMESPACE_NAME - +oc kustomize overlays/mlops | oc apply -f - -n $YOUR_NAMESPACE_NAME ``` -10. To deploy `ExploitIQ` with a self-hosted LLM and MLOps, run: +> [!WARNING] +> Grafana custom resources may fail to create if the Grafana Operator has not yet reached the `Succeeded` phase. Check the operator status: +> +> ```shell +> oc get csv -n $YOUR_NAMESPACE_NAME | grep grafana-operator +> ``` +> +> When the PHASE column shows `Succeeded`, redeploy the Grafana resources: +> +> ```shell +> oc kustomize overlays/mlops \ +> | yq e 'select(.metadata.labels.managed-by == "grafana-operator")' - \ +> | oc replace -f - -n $YOUR_NAMESPACE_NAME +> ``` -```shell -# Patch overlay kustomization yaml with deployment namespace value (Grafana and Tempo) -sed -i "s/REPLACE_NAMESPACE/$YOUR_NAMESPACE_NAME/" overlays/mlops/grafana/kustomization.yaml -sed -i "s/REPLACE_NAMESPACE/$YOUR_NAMESPACE_NAME/" overlays/mlops/tempo/kustomization.yaml -``` - -```shell -# replace EXPLOIT_IQ_GRAFANA_SA_TOKEN with exploit intelligence Grafana SA Token from bitwarden vault (1 year expiration date) -oc create secret generic grafana-bearer-token --from-literal=token='EXPLOIT_IQ_GRAFANA_SA_TOKEN' -``` +### Deploy with Remote NIM ```shell -# Deploy ExploitIQ with self hosted llama3.1-70b-4bit LLM and MLOps -oc kustomize overlays/mlops | oc apply -f - -n $YOUR_NAMESPACE_NAME - +oc kustomize overlays/remote-nim-all | oc apply -f - -n $YOUR_NAMESPACE_NAME ``` -#### ⚠️ Note: Grafana Resources May Fail to Deploy - -When applying the Grafana overlay, some Grafana resources **may fail to be created**. This is usually because the **Grafana Operator CSV has not yet reached the `Succeeded` phase**. +--- -**How to check** +## Post-Deployment Configuration -Check the Grafana Operator CSV status: - -```shell -oc get csv -n $YOUR_NAMESPACE_NAME | grep grafana-operator -``` +### Configure OpenShift OAuth -Look at the PHASE column — it should show: -```text -Succeeded -``` +> [!WARNING] +> Complete this step before attempting to log in to the Exploit Intelligence UI. Authentication fails if the `OAuthClient` resource is not configured correctly. -**Redeploy Grafana resources** +After the deployment completes and the `exploit-iq-client` route is available, configure the OpenShift OAuth client. Select the procedure that matches your situation. -Once the CSV is in Succeeded state, you can safely redeploy the Grafana overlay filtered to Grafana CRs (all grafana CRs are labeled with managed-by: grafana-operator): -```shell -oc kustomize overlays/mlops \ - | yq e 'select(.metadata.labels.managed-by == "grafana-operator")' - \ - | oc replace -f - -n $YOUR_NAMESPACE_NAME -``` +#### Create the OAuthClient Resource +Complete this procedure if you followed Step 5, "First-Time Deployment": -10. Alternatively, to deploy `ExploitIQ` with a fully remote nim LLM, run: ```shell -# Deploy ExploitIQ with remote nim llama-3.1-70b-16bit LLM -oc kustomize overlays/remote-nim-all | oc apply -f - -n $YOUR_NAMESPACE_NAME -``` ->[!WARNING] -Without completing the following step with the correct secret from step 6, authentication and logging into the UI App will fail! -11. If it doesn't already exist, create the `OAuthClient` Custom Resource using the secret (from step 6) and generated route - -```bash oc create -f - < base/oauth-secrets.env << EOF -client-secret=$OAUTH_CLIENT_SECRET -openshift-domain=$(oc get dns cluster -o jsonpath='{.spec.baseDomain}') -EOF +oc adm groups new exploit-iq-view +oc adm groups add-users exploit-iq-view ``` -12. **(Optional) Enable OAuth for the ExploitIQ MCP Server.** If you want MCP clients (Claude Code, Cursor, etc.) to authenticate via OpenShift OAuth, create an `OAuthClient` CR for the MCP server: +### Enable OAuth for the MCP Server + +Complete this step if you want MCP clients such as Claude Code or Cursor to authenticate with OpenShift OAuth. -```bash +If the `OAuthClient` resource does not exist, create it: + +```shell oc create -f - < | oc apply -f - -n $YOUR_NAMESPACE_NAME +``` + +To restrict MCP access to specific OpenShift groups, set the `EXPLOITIQ_ALLOWED_GROUPS` environment variable in `base/exploitiq_mcp_server.yaml`. The default value is `exploitiq-users`. To allow any authenticated user, remove the environment variable entirely. + +### Configure the Swagger UI Endpoint + +Complete this step to enable the Swagger UI endpoint for API testing. This configuration applies to non-production environments only. + +```shell +oc set env deployment -l component=exploit-iq-client \ + QUARKUS_SMALLRYE_OPENAPI_SERVERS=https://$(oc get route exploit-iq-client -o=jsonpath='{..spec.host}') +``` + +### Configure a Custom Git Server CA + +Complete this step if your Git server uses a certificate signed by a custom Certificate Authority (CA). + +> [!IMPORTANT] +> Complete this step if you access Red Hat internal Git repositories such as `gitlab.cee.redhat.com`. + +**1.** Create the certificate directory: + +```shell +mkdir -p base/ca-certs +``` + +**2.** Obtain your CA certificates. + +For Red Hat internal Git repositories: + +```shell +curl -o base/ca-certs/internal-root-ca.pem \ + https://certs.corp.redhat.com/certs/2022-IT-Root-CA.pem + +curl -o base/ca-certs/rhcs-intermediate-ca.crt \ + https://certs.corp.redhat.com/chains/rhcs-ca-chain-2022-self-signed.crt +``` + +For other custom CAs: + +```shell +cp /path/to/your-custom-ca.pem base/ca-certs/ +``` + +**3.** Create the CA bundle: + +```shell +cat base/ca-certs/*.{pem,crt} > base/ca-certs/ca-bundle.crt +``` + +**4.** Verify the bundle: + ```shell -export MCP_CALLBACK=https://$(oc get route exploitiq-mcp-server -o jsonpath='{.spec.host}')/oauth/callback -oc patch oauthclient exploitiq-mcp-server -p '{"redirectURIs":["'$MCP_CALLBACK'"]}' +openssl crl2pkcs7 -nocrl -certfile base/ca-certs/ca-bundle.crt | \ + openssl pkcs7 -print_certs -noout ``` -Then uncomment the OAuth env vars in `base/exploitiq_mcp_server.yaml` and re-apply: +Then redeploy: + ```shell oc kustomize overlays/ | oc apply -f - -n $YOUR_NAMESPACE_NAME ``` -To restrict MCP access to specific OpenShift groups, edit the `EXPLOITIQ_ALLOWED_GROUPS` value in `base/exploitiq_mcp_server.yaml` (defaults to `exploitiq-users`): -```yaml -- name: EXPLOITIQ_ALLOWED_GROUPS - value: "exploitiq-users,security-team" +### Configure the OAuth Endpoint CA + +Complete this step if you see the error `PKIX path building failed` in the `exploit-iq-client` pod logs after deployment. + +**1.** Fetch and merge CA chains from both OpenShift OIDC endpoints: + +- `oauth-openshift.apps.` (authorization/token endpoint chain) +- `api.:6443` (JWKS and user-info endpoint chain) + +The command extracts only CA certificates (skips endpoint leaf certificates, which are not trust anchors): + +```shell +export OCP_DOMAIN=$(oc whoami --show-server | sed -E 's#^https?://api\.##; s#:[0-9]+/?$##; s#/$##') +export OIDC_CA_DIR=components/oidc-ca +mkdir -p "${OIDC_CA_DIR}" + +openssl s_client -connect oauth-openshift.apps.${OCP_DOMAIN}:443 \ + -servername oauth-openshift.apps.${OCP_DOMAIN} \ + -showcerts /dev/null \ + | awk '/BEGIN CERT/,/END CERT/' \ + | awk 'BEGIN{n=0;buf=""} /BEGIN CERT/{n++;buf=""} {buf=buf $0"\n"} /END CERT/{if(n>1) printf buf}' \ + > "${OIDC_CA_DIR}/oidc-oauth-ca.crt" + +openssl s_client -connect api.${OCP_DOMAIN}:6443 \ + -servername api.${OCP_DOMAIN} \ + -showcerts /dev/null \ + | awk '/BEGIN CERT/,/END CERT/' \ + | awk 'BEGIN{n=0;buf=""} /BEGIN CERT/{n++;buf=""} {buf=buf $0"\n"} /END CERT/{if(n>1) printf buf}' \ + > "${OIDC_CA_DIR}/oidc-api-ca.crt" ``` -When set, group membership is checked during the OAuth callback — unauthorized users see a 403 error in the browser before a token is issued, instead of a misleading "Authentication successful" followed by a connection failure. A second group check runs as Express middleware on every `/mcp` request as defense-in-depth. To allow any authenticated user, remove the env var entirely. -13. On Non Production environments, the Swagger-UI endpoint is enabled, and can be configured for testing on environment -that way: +**2.** Verify the bundle: + ```shell -oc set env deployment -l component=exploit-iq-client QUARKUS_SMALLRYE_OPENAPI_SERVERS=https://$(oc get route exploit-iq-client -o=jsonpath='{..spec.host}') +cat "${OIDC_CA_DIR}/oidc-oauth-ca.crt" "${OIDC_CA_DIR}/oidc-api-ca.crt" > "${OIDC_CA_DIR}/ca-bundle.crt" + +openssl crl2pkcs7 -nocrl -certfile "${OIDC_CA_DIR}/ca-bundle.crt" | \ + openssl pkcs7 -print_certs -noout ``` -14. To Uninstall the ExploitIQ System, kindly run the following command, after setting the Deployment variant environment variable, depending on your deployment variant of choice: +**3.** Create or update the `oidc-ca-bundle` ConfigMap in your namespace (no certificate files are stored in this repository): ```shell -DEPLOYMENT_VARIANT_NAME=remote-nim-all -#DEPLOYMENT_VARIANT_NAME=self-hosted-llama3.1-70b-4bit -# Delete all resources but keep all data saved in PVCs -kustomize build overlays/$DEPLOYMENT_VARIANT_NAME/ | oc delete -l purpose!=persistent -f - -# Or, Delete Everything -kustomize build overlays/$DEPLOYMENT_VARIANT_NAME/ | oc delete -f - +oc -n $YOUR_NAMESPACE_NAME create configmap oidc-ca-bundle \ + --from-file=ca-bundle.crt="${OIDC_CA_DIR}/ca-bundle.crt" \ + --dry-run=client -o yaml | oc apply -f - ``` -### Deploy Test overlay variant (Rapid deployment) -1. Download and install [GnuPG](https://www.gnupg.org/download/) and [sops](https://github.com/getsops/sops/releases) -2. Create new namespace/project: + +**4.** Add the `oidc-ca` component to your deployment variant and redeploy. The component patches the `exploit-iq-client` deployment to mount the CA bundle and configure the named OIDC TLS trust store: + +```shell +(cd overlays/ && kustomize edit add component ../../components/oidc-ca) +oc kustomize overlays/ | oc apply -f - -n $YOUR_NAMESPACE_NAME +``` + +> [!NOTE] +> **Dev/temporary workaround:** If you need to unblock quickly without the certificate setup, set the following environment variable on the deployment. This disables TLS verification entirely and must not be used in production environments. +> +> ```shell +> oc set env deployment/exploit-iq-client QUARKUS_TLS_TRUST_ALL=true -n $YOUR_NAMESPACE_NAME +> ``` + +--- + +## Uninstalling Exploit Intelligence + +Set your deployment variant and run one of the following commands: + +```shell +export DEPLOYMENT_VARIANT_NAME=remote-nim-all +# export DEPLOYMENT_VARIANT_NAME=self-hosted-llama3.1-70b-4bit +# export DEPLOYMENT_VARIANT_NAME=mlops +``` + +To delete all resources but preserve data in PersistentVolumeClaims: + +```shell +kustomize build overlays/$DEPLOYMENT_VARIANT_NAME/ | oc delete -l purpose!=persistent -f - +``` + +To delete all resources including persistent data: + +```shell +kustomize build overlays/$DEPLOYMENT_VARIANT_NAME/ | oc delete -f - +``` + +--- + +## Running Exploit Intelligence Locally + +You can run Exploit Intelligence on a local machine without GPU hardware, for development, debugging, and troubleshooting. + +Before you begin, install the following tools and verify that all binaries are available on your system path: + +- [uv package manager](https://docs.astral.sh/uv/getting-started/installation) +- Python 3.12 +- [Java JDK 21 or later](https://www.oracle.com/il-en/java/technologies/downloads/#java21) +- [Maven](https://maven.apache.org/install.html) +- [Go](https://go.dev/doc/install) + +### Running the Application Locally + +**1.** Navigate to the repository root and create a Python virtual environment: + +```shell +cd $(git rev-parse --show-toplevel) +rm -rf .venv || true +uv venv --python 3.12 --no-cache +source .venv/bin/activate +``` + +**2.** Install dependencies: + +```shell +uv sync --no-cache +``` + +**3.** Set the required environment variables: + +```shell +export CHECKLIST_MODEL_NAME=hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4 +export CODE_VDB_RETRIEVER_MODEL_NAME=hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4 +export CVE_AGENT_EXECUTOR_MODEL_NAME=hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4 +export DOC_VDB_RETRIEVER_MODEL_NAME=hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4 +export JUSTIFY_MODEL_NAME=hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4 +export SUMMARIZE_MODEL_NAME=hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4 +export NVIDIA_API_BASE=http:///v1 +export PYTHONUNBUFFERED=1 +export GOPROXY=https://proxy.golang.org,direct +export SERPAPI_API_KEY= +export REGISTRY_REDHAT_USERNAME="" +export REGISTRY_REDHAT_PASSWORD="" +``` + +**4.** Start the application: + +```shell +nat --log-level debug serve \ + --config_file=src/vuln_analysis/configs/config-http-openai.yml \ + --host 0.0.0.0 \ + --port 26466 +``` + +**5.** In a separate terminal with the virtual environment active, start the Arize Phoenix tracing service: + +```shell +phoenix serve +``` + +### Enabling Container Source Download + +Complete this step to extract RPM dependencies from Red Hat container registries for C/C++ projects. + +Before enabling this feature, verify that the following prerequisites are met: + +- `skopeo` is installed and available on your system path +- Valid Red Hat registry credentials are configured +- Network access to `registry.redhat.io` is available + +Set the following environment variable before starting the application: + +```shell +export USE_CONTAINER_SOURCES=true +``` + +--- + +## Deploying the Test Variant + +The test variant uses encrypted secret files. To decrypt them, you need the following tools and access: + +- [GnuPG](https://www.gnupg.org/download/) +- [SOPS](https://github.com/getsops/sops/releases) +- The private decryption key from the Bitwarden vault entry **Exploit Intelligence Tests Deployment Variant Private Key for Decryption** + +### Deploying the Test Overlay + +**1.** Create a project namespace: + ```shell export PROJECT_NAME=exploit-test oc new-project $PROJECT_NAME ``` -3. Take private key from vault ( `ExploitIQ Tests Deployment Variant Private Key for Decryption.`) and import it to GPG: + +**2.** Import the private decryption key: + ```shell gpg --import /path/to/sec-decryption.key ``` -4. Decrypt all secret files: + +**3.** Navigate to the test overlay directory and decrypt all secret files: + ```shell cd $(git rev-parse --show-toplevel)/kustomize/overlays/tests mkdir -p secrets -sops -d exploit-iq-ips.secret > secrets/exploit-iq-ips.json -sops -d google-sheets-secrets-enc.yaml > secrets/google-sheets-secrets.yaml -sops -d integration-tests-secrets-enc.yaml > secrets/integration-tests-secrets.yaml -sops -d mongodb-credentials.env2 > secrets/mongodb-credentials.env -sops -d oauth-secrets.env2 > secrets/oauth-secrets.env -sops -d registry-app-creds-enc.yaml > secrets/registry-app-creds.yaml -sops -d secrets.env2 > secrets/secrets.env -sops -d server-model-config-enc.yaml > secrets/server-model-config.yaml -sops -d exploit-iq-client-build-ips-enc.yaml > secrets/exploit-iq-client-build-ips.yaml -sops -d exploit-iq-automation-token-enc.yaml > secrets/exploit-iq-automation-token.yaml -sops -d credential-encryption-key.env2 > secrets/credential-encryption-key.env + +sops -d exploit-iq-ips.secret > secrets/exploit-iq-ips.json +sops -d google-sheets-secrets-enc.yaml > secrets/google-sheets-secrets.yaml +sops -d integration-tests-secrets-enc.yaml > secrets/integration-tests-secrets.yaml +sops -d mongodb-credentials.env2 > secrets/mongodb-credentials.env +sops -d oauth-secrets.env2 > secrets/oauth-secrets.env +sops -d registry-app-creds-enc.yaml > secrets/registry-app-creds.yaml +sops -d secrets.env2 > secrets/secrets.env +sops -d server-model-config-enc.yaml > secrets/server-model-config.yaml +sops -d exploit-iq-client-build-ips-enc.yaml > secrets/exploit-iq-client-build-ips.yaml +sops -d exploit-iq-automation-token-enc.yaml > secrets/exploit-iq-automation-token.yaml +sops -d credential-encryption-key.env2 > secrets/credential-encryption-key.env ``` -5. Override any secret that you need in the decrypted files, if not needed, you can continue to next step. +**4.** Override any decrypted secret values as needed, then continue to the next step. + +**5.** Configure the OAuth client secret. ->[!WARNING] -Without completing the following step with the correct secret from step 6, authentication and logging into the UI App will fail! +> [!WARNING] +> Authentication fails if you do not complete this step with the correct OAuth secret before deployment. +If an `OAuthClient` named `exploit-iq-client` already exists on the cluster, retrieve its secret: -6. If openshift resource of kind `OAuthClient` named `exploit-iq-client` exists, just get the secret from there: ```shell export OAUTH_CLIENT_SECRET=$(oc get oauthclient exploit-iq-client -o jsonpath='{..secret}') -echo oauthClientSecret=$OAUTH_CLIENT_SECRET ``` -Otherwise, Replace `some-long-secret-used-by-the-oauth-client` with a more secure, unique secret of your own: + +Otherwise, generate a new secret (minimum 32 characters): ```shell -export OAUTH_CLIENT_SECRET="some-long-secret-used-by-the-oauth-client" +export OAUTH_CLIENT_SECRET=$(openssl rand -base64 32) ``` -Eventually, Run the following: + +Write the credentials file: + ```shell +export OCP_DOMAIN=$(oc whoami --show-server | sed -E 's#^https?://api\.##; s#:[0-9]+/?$##; s#/$##') + cat > secrets/oauth-secrets.env << EOF client-secret=$OAUTH_CLIENT_SECRET -openshift-domain=$(oc get dns cluster -o jsonpath='{.spec.baseDomain}') +openshift-domain=$OCP_DOMAIN EOF ``` -7. Update `ExploitIQ` configuration file with the correct callback URL for the client service +**6.** Set the application callback URL: + ```shell cd $(git rev-parse --show-toplevel)/kustomize export CALLBACK_URL="https://exploit-iq-client.$(oc project -q).svc:8443" @@ -444,15 +642,17 @@ find . -type f -name 'exploit-iq-config.yml' -exec sed -i "s|CALLBACK_URL_PLACEH cd $(git rev-parse --show-toplevel)/kustomize/overlays/tests ``` -8. Now deploy to the cluster the exploitIQ system ( minus agent) with all resources: +**7.** Deploy the test overlay: + ```shell kustomize build . | oc apply -f - ``` +**8.** Configure the `OAuthClient` resource. -9. If it doesn't already exist, create the `OAuthClient` Custom Resource using the secret (from step 6) and generated route +If the resource does not exist, create it: -```bash +```shell oc create -f - < \ + exploit-iq-tests ../../../exploit-iq-models/exploit-iq-models ``` -12. Remove untracked decrypted secrets files +**11.** Remove the decrypted secret files: + ```shell -rm -rf secrets/ +rm -rf secrets/ ``` -13. Tear down: +### Tearing Down the Test Environment + ```shell helm delete exploit-iq-tests - oc delete project $(oc project --short -q) ``` -14. Need to install on cluster [Openshift pipelines operator](https://docs.redhat.com/en/documentation/red_hat_openshift_pipelines/1.19/html/installing_and_configuring/installing-pipelines)If need to install the [exploit-iq-pac](https://github.com/apps/exploit-iq-pac/) PAC (pipeline as code) github application on a new cluster , you need to make sure to configure it according to the [PAC github application docs](https://pipelinesascode.com/docs/install/github_apps/#configure-pipelines-as-code-on-your-cluster-to-access-the-github-app). -In this case, you need to supply to the secret in the documentation github application private key generated in the github app settings, and webhook secret defined and set it in the github application settings. +### Installing Pipelines as Code + +If you need to install the OpenShift Pipelines Operator on a new cluster, refer to the [OpenShift Pipelines installation documentation](https://docs.redhat.com/en/documentation/red_hat_openshift_pipelines/1.19/html/installing_and_configuring/installing-pipelines). + +To configure the [Exploit Intelligence PAC GitHub application](https://github.com/apps/exploit-iq-pac/) on a new cluster, follow the [PAC GitHub application configuration guide](https://pipelinesascode.com/docs/install/github_apps/#configure-pipelines-as-code-on-your-cluster-to-access-the-github-app). You need the GitHub application private key and the webhook secret from the application settings. diff --git a/kustomize/base/argilla/deployment.yaml b/kustomize/base/argilla/deployment.yaml index 08b1e1970..afb3aedca 100644 --- a/kustomize/base/argilla/deployment.yaml +++ b/kustomize/base/argilla/deployment.yaml @@ -17,8 +17,6 @@ spec: app: exploit-iq-feedback-api spec: restartPolicy: Always - imagePullSecrets: - - name: exploit-iq-pull-secret serviceAccountName: argilla securityContext: fsGroup: 1000 diff --git a/kustomize/base/exploit-iq-config.yml b/kustomize/base/exploit-iq-config.yml index b75e69a4c..ef4a1be97 100644 --- a/kustomize/base/exploit-iq-config.yml +++ b/kustomize/base/exploit-iq-config.yml @@ -58,6 +58,11 @@ functions: cve_process_sbom: _type: cve_process_sbom + cve_verify_vuln_package: + _type: cve_verify_vuln_package + skip: false + base_git_dir: ${EXPLOIT_IQ_DATA_DIR:-/exploit-iq-data/}git + llm_name: checklist_llm cve_checklist: _type: cve_checklist llm_name: checklist_llm @@ -96,6 +101,13 @@ functions: max_retries: 5 Container Analysis Data: _type: container_image_analysis_data + Configuration Scanner: + _type: configuration_scanner + max_results: 15 + context_lines: 5 + Import Usage Analyzer: + _type: import_usage_analyzer + max_files: 20 cve_agent_executor: _type: cve_agent_executor llm_name: cve_agent_executor_llm @@ -108,6 +120,8 @@ functions: - Function Caller Finder - Function Locator - Function Library Version Finder + - Configuration Scanner + - Import Usage Analyzer max_concurrency: null max_iterations: 10 prompt_examples: false @@ -116,6 +130,7 @@ functions: return_intermediate_steps: false # transitive_search_tool_enabled: false cve_web_search_enabled: true + uber_jar_file_threshold: 600 verbose: false cve_generate_cvss: _type: cve_generate_cvss @@ -195,6 +210,8 @@ functions: tool_names: - Source Grep - Code Keyword Search + cve_fetch_patches: + _type: cve_fetch_patches health_check: _type: health_check @@ -280,6 +297,7 @@ workflow: cve_fetch_intel_name: cve_fetch_intel cve_calculate_intel_score_name: cve_calculate_intel_score cve_process_sbom_name: cve_process_sbom + cve_verify_vuln_package_name: cve_verify_vuln_package cve_checklist_name: cve_checklist cve_agent_executor_name: cve_agent_executor cve_generate_cvss_name: cve_generate_cvss @@ -292,6 +310,7 @@ workflow: cve_package_code_agent_name: cve_package_code_agent cve_checker_report_name: cve_checker_report cve_build_agent_name: cve_build_agent + cve_fetch_patches_name: cve_fetch_patches eval: general: diff --git a/kustomize/base/ips-patch-client.json b/kustomize/base/ips-patch-client.json deleted file mode 100644 index e2b53b801..000000000 --- a/kustomize/base/ips-patch-client.json +++ /dev/null @@ -1,6 +0,0 @@ -[{ - "op": "add", - "path": "/spec/template/spec/imagePullSecrets/0", - "value": {"name": "exploit-iq-pull-secret"} -} -] diff --git a/kustomize/base/ips-patch.json b/kustomize/base/ips-patch.json deleted file mode 100644 index e2b53b801..000000000 --- a/kustomize/base/ips-patch.json +++ /dev/null @@ -1,6 +0,0 @@ -[{ - "op": "add", - "path": "/spec/template/spec/imagePullSecrets/0", - "value": {"name": "exploit-iq-pull-secret"} -} -] diff --git a/kustomize/base/kustomization.yaml b/kustomize/base/kustomization.yaml index aa362898a..6837ed8c7 100644 --- a/kustomize/base/kustomization.yaml +++ b/kustomize/base/kustomization.yaml @@ -79,20 +79,6 @@ configMapGenerator: - ca-certs/ca-bundle.crt options: disableNameSuffixHash: true - -patches: - - path: ips-patch.json - - target: - name: exploit-iq - kind: Deployment - - - path: ips-patch-client.json - - target: - name: exploit-iq-client - kind: Deployment - images: - name: quay.io/ecosystem-appeng/agent-morpheus-rh newTag: latest diff --git a/kustomize/components/oidc-ca/kustomization.yaml b/kustomize/components/oidc-ca/kustomization.yaml new file mode 100644 index 000000000..e0887cfe9 --- /dev/null +++ b/kustomize/components/oidc-ca/kustomization.yaml @@ -0,0 +1,15 @@ +apiVersion: kustomize.config.k8s.io/v1alpha1 +kind: Component + +configMapGenerator: + - name: oidc-ca-bundle + files: + - ca-bundle.crt + options: + disableNameSuffixHash: true + +patches: + - path: oidc-ca-patch.yaml + target: + name: exploit-iq-client + kind: Deployment diff --git a/kustomize/components/oidc-ca/oidc-ca-patch.yaml b/kustomize/components/oidc-ca/oidc-ca-patch.yaml new file mode 100644 index 000000000..e61335c2f --- /dev/null +++ b/kustomize/components/oidc-ca/oidc-ca-patch.yaml @@ -0,0 +1,22 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: exploit-iq-client +spec: + template: + spec: + containers: + - name: exploit-iq-client + env: + - name: QUARKUS_TLS__OIDC__TRUST_STORE_PEM_CERTS + value: /etc/oidc-ca/ca-bundle.crt + - name: QUARKUS_OIDC_TLS_TLS_CONFIGURATION_NAME + value: oidc + volumeMounts: + - name: oidc-ca + mountPath: /etc/oidc-ca + readOnly: true + volumes: + - name: oidc-ca + configMap: + name: oidc-ca-bundle diff --git a/kustomize/deployer-rbac.yaml b/kustomize/deployer-rbac.yaml new file mode 100644 index 000000000..8be695176 --- /dev/null +++ b/kustomize/deployer-rbac.yaml @@ -0,0 +1,101 @@ +# deployer-rbac.yaml +# +# Grants a non-cluster-admin user the minimum permissions required to +# deploy Exploit Intelligence on OpenShift Container Platform. +# +# Please replace the following placeholders: +# — the OpenShift username of the deployer (e.g. jdoe) +# — the target namespace (e.g. exploit-iq) +# + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: exploit-intelligence-oauthclient-deploy +rules: + # get and patch scoped to the two project OAuthClients only. + - apiGroups: + - oauth.openshift.io + resources: + - oauthclients + resourceNames: + - exploit-iq-client + - exploitiq-mcp-server + verbs: + - get + - patch + # create cannot be restricted by resourceNames (resource does not exist yet). + - apiGroups: + - oauth.openshift.io + resources: + - oauthclients + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: exploit-intelligence-oauthclient-deploy +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: exploit-intelligence-oauthclient-deploy +subjects: + - kind: User + name: +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: exploit-intelligence-rbac-deploy + namespace: +rules: + - apiGroups: + - rbac.authorization.k8s.io + resources: + - roles + verbs: + - get + - create + - update + - patch + - apiGroups: + - rbac.authorization.k8s.io + resources: + - rolebindings + verbs: + - get + - create + - update + - patch + - apiGroups: + - security.openshift.io + resources: + - securitycontextconstraints + resourceNames: + - anyuid + verbs: + - use + - apiGroups: + - monitoring.coreos.com + resources: + - servicemonitors + verbs: + - get + - create + - update + - patch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: exploit-intelligence-rbac-deploy + namespace: +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: exploit-intelligence-rbac-deploy +subjects: + - kind: User + name: diff --git a/kustomize/overlays/batch-processing/exploit-iq-client-batch-patch.yaml b/kustomize/overlays/batch-processing/exploit-iq-client-batch-patch.yaml new file mode 100644 index 000000000..a753910a2 --- /dev/null +++ b/kustomize/overlays/batch-processing/exploit-iq-client-batch-patch.yaml @@ -0,0 +1,31 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: exploit-iq-client + labels: + app: exploit-iq + component: exploit-iq-client +spec: + strategy: + type: Recreate + replicas: 1 + selector: + matchLabels: + app: exploit-iq + component: exploit-iq-client + template: + metadata: + labels: + app: exploit-iq + component: exploit-iq-client + spec: + imagePullSecrets: [] + serviceAccountName: exploit-iq-client-sa + containers: + - name: exploit-iq-client + imagePullPolicy: Always + env: + - name: MORPHEUS_QUEUE_TIMEOUT + value: 60m + - name: MORPHEUS_QUEUE_MAX_ACTIVE + value: "5" diff --git a/kustomize/overlays/batch-processing/exploit-iq-resources-patch.yaml b/kustomize/overlays/batch-processing/exploit-iq-resources-patch.yaml new file mode 100644 index 000000000..195e8792d --- /dev/null +++ b/kustomize/overlays/batch-processing/exploit-iq-resources-patch.yaml @@ -0,0 +1,32 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: exploit-iq + labels: + app: exploit-iq + component: exploit-iq +spec: + selector: + matchLabels: + app: exploit-iq + component: exploit-iq + template: + metadata: + labels: + app: exploit-iq + component: exploit-iq + spec: + containers: + - name: exploit-iq-service + imagePullPolicy: Always + workingDir: /workspace/ + # Deploy with QoS(Guaranteed) + resources: + limits: + memory: "12Gi" + cpu: "1000m" + nvidia.com/gpu: "1" + requests: + memory: "12Gi" + cpu: "1000m" + nvidia.com/gpu: "1" diff --git a/kustomize/overlays/batch-processing/kustomization.yaml b/kustomize/overlays/batch-processing/kustomization.yaml index 14792f837..5d7cc1acd 100644 --- a/kustomize/overlays/batch-processing/kustomization.yaml +++ b/kustomize/overlays/batch-processing/kustomization.yaml @@ -1,86 +1,22 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization -resources: -- ../local-llama3.1-70b-4bit/ +resources: +- ../self-hosted-llama3.1-70b-4bit commonAnnotations: deployment-variant: batch-processing -patchesStrategicMerge: - - |- - apiVersion: apps/v1 - kind: Deployment - metadata: - name: exploit-iq - labels: - app: exploit-iq - component: exploit-iq - spec: - selector: - matchLabels: - app: exploit-iq - component: exploit-iq - template: - metadata: - labels: - app: exploit-iq - component: exploit-iq - spec: - containers: - - name: exploit-iq-service - imagePullPolicy: Always - workingDir: /workspace/ - # Deploy with QoS(Guaranteed) - resources: - limits: - memory: "12Gi" - cpu: "1000m" - nvidia.com/gpu: "1" - requests: - memory: "12Gi" - cpu: "1000m" - nvidia.com/gpu: "1" - - |- - apiVersion: apps/v1 - kind: Deployment - metadata: - name: exploit-iq-client - labels: - app: exploit-iq - component: exploit-iq-client - spec: - strategy: - type: Recreate - replicas: 1 - selector: - matchLabels: - app: exploit-iq - component: exploit-iq-client - template: - metadata: - labels: - app: exploit-iq - component: exploit-iq-client - spec: - imagePullSecrets: [] - serviceAccountName: exploit-iq-client-sa - containers: - - name: exploit-iq-client - imagePullPolicy: Always - env: - - name: EXPLOIT_IQ_QUEUE_TIMEOUT - value: 60m - - name: EXPLOIT_IQ_QUEUE_MAX_ACTIVE - value: "5" +patches: +- path: exploit-iq-resources-patch.yaml +- path: exploit-iq-client-batch-patch.yaml configMapGenerator: - - behavior: replace - - name: nginx-cache-routes - files: - - nginx/templates/routes/intel.conf.template - - nginx/templates/routes/nemo.conf.template - - nginx/templates/routes/nim.conf.template - - nginx/templates/routes/nvidia.conf.template - - nginx/templates/routes/openai.conf.template +- behavior: replace + name: nginx-cache-routes + files: + - nginx/templates/routes/intel.conf.template + - nginx/templates/routes/nemo.conf.template + - nginx/templates/routes/nim.conf.template + - nginx/templates/routes/nvidia.conf.template + - nginx/templates/routes/openai.conf.template diff --git a/kustomize/overlays/remote-nim-all/exploit-iq-nim-patch.yaml b/kustomize/overlays/remote-nim-all/exploit-iq-nim-patch.yaml new file mode 100644 index 000000000..15b18a3a6 --- /dev/null +++ b/kustomize/overlays/remote-nim-all/exploit-iq-nim-patch.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: exploit-iq + labels: + app: exploit-iq + component: exploit-iq + annotations: + api-key: &api-key "EXPLOIT_IQ" + nim-llm-baseurl: &nim-llm-baseurl "http://nginx-cache:8080/nim_llm/v1" +spec: + selector: + matchLabels: + app: exploit-iq + component: exploit-iq + template: + metadata: + labels: + app: exploit-iq + component: exploit-iq + spec: + containers: + - name: exploit-iq + imagePullPolicy: Always + workingDir: /workspace/ + env: + - name: LLM_API_KEY_CHECKLIST + value: *api-key + - name: LLM_API_KEY_CODE_VDB_RETRIEVER + value: *api-key + - name: LLM_API_KEY_DOC_VDB_RETRIEVER + value: *api-key + - name: LLM_API_KEY_AGENT_EXECUTOR + value: *api-key + - name: LLM_API_KEY_SUMMARIZE + value: *api-key + - name: LLM_API_KEY_JUSTIFY + value: *api-key + - name: LLM_API_KEY_INTEL_SOURCE_SCORE + value: *api-key + - name: CHECKLIST_LLM_API_BASE + value: *nim-llm-baseurl + - name: CODE_VDB_RETRIEVER_API_BASE + value: *nim-llm-baseurl + - name: DOC_VDB_RETRIEVER_API_BASE + value: *nim-llm-baseurl + - name: AGENT_EXECUTOR_LLM_API_BASE + value: *nim-llm-baseurl + - name: GENERATE_CVSS_LLM_API_BASE + value: *nim-llm-baseurl + - name: SUMMARIZE_LLM_API_BASE + value: *nim-llm-baseurl + - name: JUSTIFY_LLM_API_BASE + value: *nim-llm-baseurl + - name: INTEL_SOURCE_SCORE_LLM_API_BASE + value: *nim-llm-baseurl diff --git a/kustomize/overlays/remote-nim-all/kustomization.yaml b/kustomize/overlays/remote-nim-all/kustomization.yaml index 994d919e1..56689c402 100644 --- a/kustomize/overlays/remote-nim-all/kustomization.yaml +++ b/kustomize/overlays/remote-nim-all/kustomization.yaml @@ -2,84 +2,11 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - ../../base +- ../../base commonAnnotations: deployment-variant: remote-nim -patchesStrategicMerge: - - nginx-patch.yaml - - |- - apiVersion: apps/v1 - kind: Deployment - metadata: - name: exploit-iq - labels: - app: exploit-iq - component: exploit-iq - annotations: - api-key: &api-key "EXPLOIT_IQ" - nim-llm-baseurl: &nim-llm-baseurl "http://nginx-cache:8080/nim_llm/v1" - spec: - selector: - matchLabels: - app: exploit-iq - component: exploit-iq - template: - metadata: - labels: - app: exploit-iq - component: exploit-iq - spec: - containers: - - name: exploit-iq - imagePullPolicy: Always - workingDir: /workspace/ - env: - - name: LLM_API_KEY_CHECKLIST - value: *api-key - - - name: LLM_API_KEY_CODE_VDB_RETRIEVER - value: *api-key - - - name: LLM_API_KEY_DOC_VDB_RETRIEVER - value: *api-key - - - name: LLM_API_KEY_AGENT_EXECUTOR - value: *api-key - - - name: LLM_API_KEY_SUMMARIZE - value: *api-key - - - name: LLM_API_KEY_JUSTIFY - value: *api-key - - - name: LLM_API_KEY_INTEL_SOURCE_SCORE - value: *api-key - - - - name: CHECKLIST_LLM_API_BASE - value: *nim-llm-baseurl - - - name: CODE_VDB_RETRIEVER_API_BASE - value: *nim-llm-baseurl - - - - name: DOC_VDB_RETRIEVER_API_BASE - value: *nim-llm-baseurl - - - name: AGENT_EXECUTOR_LLM_API_BASE - value: *nim-llm-baseurl - - - name: GENERATE_CVSS_LLM_API_BASE - value: *nim-llm-baseurl - - - name: SUMMARIZE_LLM_API_BASE - value: *nim-llm-baseurl - - - name: JUSTIFY_LLM_API_BASE - value: *nim-llm-baseurl - - - name: INTEL_SOURCE_SCORE_LLM_API_BASE - value: *nim-llm-baseurl - +patches: +- path: nginx-patch.yaml +- path: exploit-iq-nim-patch.yaml diff --git a/kustomize/overlays/self-hosted-llama3.1-70b-4bit/exploit-iq-llm-patch.yaml b/kustomize/overlays/self-hosted-llama3.1-70b-4bit/exploit-iq-llm-patch.yaml new file mode 100644 index 000000000..1aad91928 --- /dev/null +++ b/kustomize/overlays/self-hosted-llama3.1-70b-4bit/exploit-iq-llm-patch.yaml @@ -0,0 +1,88 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: exploit-iq + labels: + app: exploit-iq + component: exploit-iq + annotations: + llm-type: &llm-type openai + api-key: &api-key "EMPTY" + openai-llm-baseurl: &openai-llm-baseurl http://nginx-cache:8080/openai/v1 + model-name: &model-name hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4 +spec: + selector: + matchLabels: + app: exploit-iq + component: exploit-iq + template: + metadata: + labels: + app: exploit-iq + component: exploit-iq + spec: + containers: + - name: exploit-iq + imagePullPolicy: Always + workingDir: /workspace/ + env: + - name: LLM_TYPE_CHECKLIST + value: *llm-type + - name: LLM_TYPE_VDB_CODE_RETRIEVER + value: *llm-type + - name: LLM_TYPE_VDB_DOC_RETRIEVER + value: *llm-type + - name: LLM_TYPE_AGENT_EXECUTOR + value: *llm-type + - name: LLM_TYPE_SUMMARIZE + value: *llm-type + - name: LLM_TYPE_JUSTIFY + value: *llm-type + - name: LLM_TYPE_INTEL_SOURCE_SCORE + value: *llm-type + - name: LLM_API_KEY_CHECKLIST + value: *api-key + - name: LLM_API_KEY_CODE_VDB_RETRIEVER + value: *api-key + - name: LLM_API_KEY_DOC_VDB_RETRIEVER + value: *api-key + - name: LLM_API_KEY_AGENT_EXECUTOR + value: *api-key + - name: LLM_API_KEY_SUMMARIZE + value: *api-key + - name: LLM_API_KEY_JUSTIFY + value: *api-key + - name: LLM_API_KEY_INTEL_SOURCE_SCORE + value: *api-key + - name: CHECKLIST_LLM_API_BASE + value: *openai-llm-baseurl + - name: CODE_VDB_RETRIEVER_API_BASE + value: *openai-llm-baseurl + - name: DOC_VDB_RETRIEVER_API_BASE + value: *openai-llm-baseurl + - name: AGENT_EXECUTOR_LLM_API_BASE + value: *openai-llm-baseurl + - name: GENERATE_CVSS_LLM_API_BASE + value: *openai-llm-baseurl + - name: SUMMARIZE_LLM_API_BASE + value: *openai-llm-baseurl + - name: JUSTIFY_LLM_API_BASE + value: *openai-llm-baseurl + - name: INTEL_SOURCE_SCORE_LLM_API_BASE + value: *openai-llm-baseurl + - name: CHECKLIST_MODEL_NAME + value: *model-name + - name: CODE_VDB_RETRIEVER_MODEL_NAME + value: *model-name + - name: DOC_VDB_RETRIEVER_MODEL_NAME + value: *model-name + - name: AGENT_EXECUTOR_MODEL_NAME + value: *model-name + - name: SUMMARIZE_MODEL_NAME + value: *model-name + - name: JUSTIFY_MODEL_NAME + value: *model-name + - name: INTEL_SOURCE_SCORE_MODEL_NAME + value: *model-name + - name: GENERATE_CVSS_MODEL_NAME + value: *model-name diff --git a/kustomize/overlays/self-hosted-llama3.1-70b-4bit/kustomization.yaml b/kustomize/overlays/self-hosted-llama3.1-70b-4bit/kustomization.yaml index f44ae4703..c0948da4c 100644 --- a/kustomize/overlays/self-hosted-llama3.1-70b-4bit/kustomization.yaml +++ b/kustomize/overlays/self-hosted-llama3.1-70b-4bit/kustomization.yaml @@ -2,139 +2,11 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - ../../base +- ../../base commonAnnotations: deployment-variant: local-llama3.1-70b-4bit -patchesStrategicMerge: - - nginx-patch.yaml - - |- - apiVersion: apps/v1 - kind: Deployment - metadata: - name: exploit-iq - labels: - app: exploit-iq - component: exploit-iq - annotations: - llm-type: &llm-type openai - api-key: &api-key "EMPTY" - openai-llm-baseurl: &openai-llm-baseurl http://nginx-cache:8080/openai/v1 - model-name: &model-name hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4 - spec: - selector: - matchLabels: - app: exploit-iq - component: exploit-iq - template: - metadata: - labels: - app: exploit-iq - component: exploit-iq - spec: - containers: - - name: exploit-iq - imagePullPolicy: Always - workingDir: /workspace/ - env: - - name: LLM_TYPE_CHECKLIST - value: *llm-type - - - - name: LLM_TYPE_VDB_CODE_RETRIEVER - value: *llm-type - - - - name: LLM_TYPE_VDB_DOC_RETRIEVER - value: *llm-type - - - - name: LLM_TYPE_AGENT_EXECUTOR - value: *llm-type - - - name: LLM_TYPE_SUMMARIZE - value: *llm-type - - - - name: LLM_TYPE_JUSTIFY - value: *llm-type - - - name: LLM_TYPE_INTEL_SOURCE_SCORE - value: *llm-type - - - name: LLM_API_KEY_CHECKLIST - value: *api-key - - - name: LLM_API_KEY_CODE_VDB_RETRIEVER - value: *api-key - - - name: LLM_API_KEY_DOC_VDB_RETRIEVER - value: *api-key - - - name: LLM_API_KEY_AGENT_EXECUTOR - value: *api-key - - - name: LLM_API_KEY_SUMMARIZE - value: *api-key - - - name: LLM_API_KEY_JUSTIFY - value: *api-key - - - name: LLM_API_KEY_INTEL_SOURCE_SCORE - value: *api-key - - - - name: CHECKLIST_LLM_API_BASE - value: *openai-llm-baseurl - - - name: CODE_VDB_RETRIEVER_API_BASE - value: *openai-llm-baseurl - - - - name: DOC_VDB_RETRIEVER_API_BASE - value: *openai-llm-baseurl - - - name: AGENT_EXECUTOR_LLM_API_BASE - value: *openai-llm-baseurl - - - name: GENERATE_CVSS_LLM_API_BASE - value: *openai-llm-baseurl - - - name: SUMMARIZE_LLM_API_BASE - value: *openai-llm-baseurl - - - name: JUSTIFY_LLM_API_BASE - value: *openai-llm-baseurl - - - name: INTEL_SOURCE_SCORE_LLM_API_BASE - value: *openai-llm-baseurl - - - name: CHECKLIST_MODEL_NAME - value: *model-name - - - name: CODE_VDB_RETRIEVER_MODEL_NAME - value: *model-name - - - name: DOC_VDB_RETRIEVER_MODEL_NAME - value: *model-name - - - name: AGENT_EXECUTOR_MODEL_NAME - value: *model-name - - - name: SUMMARIZE_MODEL_NAME - value: *model-name - - - name: JUSTIFY_MODEL_NAME - value: *model-name - - - name: INTEL_SOURCE_SCORE_MODEL_NAME - value: *model-name - - - name: GENERATE_CVSS_MODEL_NAME - value: *model-name - - - - - +patches: +- path: nginx-patch.yaml +- path: exploit-iq-llm-patch.yaml diff --git a/kustomize/overlays/self-hosted-llama3.1-70b-4bit/nginx-patch.yaml b/kustomize/overlays/self-hosted-llama3.1-70b-4bit/nginx-patch.yaml index b3d598543..8e3fb5e70 100644 --- a/kustomize/overlays/self-hosted-llama3.1-70b-4bit/nginx-patch.yaml +++ b/kustomize/overlays/self-hosted-llama3.1-70b-4bit/nginx-patch.yaml @@ -10,5 +10,7 @@ spec: env: - name: NGINX_UPSTREAM_NIM_LLM value: http://llama3-1-70b-instruct-4bit.exploit-iq-models.svc.cluster.local:8000 + - name: NGINX_UPSTREAM_NIM_EMBED + value: http://llama3-1-70b-instruct-4bit.exploit-iq-models.svc.cluster.local:8000 - name: NGINX_UPSTREAM_OPENAI value: http://llama3-1-70b-instruct-4bit.exploit-iq-models.svc.cluster.local:8000 diff --git a/kustomize/overlays/tests/kustomization.yaml b/kustomize/overlays/tests/kustomization.yaml index db6df1cf6..74ea831cc 100644 --- a/kustomize/overlays/tests/kustomization.yaml +++ b/kustomize/overlays/tests/kustomization.yaml @@ -14,12 +14,7 @@ resources: - secrets/exploit-iq-automation-token.yaml secretGenerator: - - name: exploit-iq-pull-secret - files: - - .dockerconfigjson=secrets/exploit-iq-ips.json - type: kubernetes.io/dockerconfigjson - - - name: ecosystem-appeng-exploit-iq-quay + - name: ecosystem-appeng-morpheus-quay files: - .dockerconfigjson=secrets/exploit-iq-ips.json type: kubernetes.io/dockerconfigjson diff --git a/pyproject.toml b/pyproject.toml index 4a11258c4..7a72008a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dependencies = [ "nemollm", "openinference-instrumentation-langchain~=0.1.31", "ordered_set", + "packageurl-python", "pydpkg==1.9.4", "rank_bm25==0.2.2", "tantivy==0.22.2", diff --git a/src/vuln_analysis/functions/build_agent_graph_defs.py b/src/vuln_analysis/functions/build_agent_graph_defs.py index 698ff1b84..5f98c5dbb 100644 --- a/src/vuln_analysis/functions/build_agent_graph_defs.py +++ b/src/vuln_analysis/functions/build_agent_graph_defs.py @@ -48,6 +48,12 @@ ) logger = logging.getLogger(__name__) +BASE_KERNEL_PACKAGE = "kernel" +KERNEL_RHEL_CONFIG_SUFFIX = "rhel.config" +KERNEL_DEBUG_FLAVOR = "debug" +KERNEL_RT_FLAVOR = "rt" +KERNEL_RT_DEBUG_FLAVOR = "rt-debug" + # --------------------------------------------------------------------------- # Data Models @@ -407,16 +413,82 @@ def _is_kernel_package(source_path: Path) -> bool: return False -def _find_kernel_config_file(source_path: Path, arch: str) -> Path | None: - """Find the kernel config file for a specific architecture. +def _kernel_package_flavor(package_name: str | None) -> str | None: + """Return the kernel sub-flavor from an RPM name (e.g. kernel-rt -> rt).""" + if not package_name or package_name == BASE_KERNEL_PACKAGE: + return None + prefix = f"{BASE_KERNEL_PACKAGE}-" + if not package_name.startswith(prefix): + return None + return package_name.removeprefix(prefix) + + +def _kernel_config_candidate_names(arch: str, package_name: str | None) -> list[str]: + """Build ordered RHEL kernel config basenames for a package and architecture.""" + flavor = _kernel_package_flavor(package_name) + candidates: list[str] = [] + + if flavor == KERNEL_RT_DEBUG_FLAVOR: + # kernel-rt-debug -> support both layouts seen in RHEL sources: + # kernel-rt-x86_64-debug-rhel.config + # kernel-x86_64-rt-debug-rhel.config + candidates.append( + f"kernel-{KERNEL_RT_FLAVOR}-{arch}-{KERNEL_DEBUG_FLAVOR}-{KERNEL_RHEL_CONFIG_SUFFIX}" + ) + candidates.append( + f"kernel-{arch}-{KERNEL_RT_FLAVOR}-{KERNEL_DEBUG_FLAVOR}-{KERNEL_RHEL_CONFIG_SUFFIX}" + ) + elif flavor and KERNEL_DEBUG_FLAVOR in flavor: + # kernel-debug -> kernel-x86_64-debug-rhel.config + candidates.append( + f"kernel-{arch}-{KERNEL_DEBUG_FLAVOR}-{KERNEL_RHEL_CONFIG_SUFFIX}" + ) + elif flavor: + # kernel-rt -> kernel-rt-x86_64-rhel.config; also try flavor-after-arch variant + candidates.append(f"kernel-{flavor}-{arch}-{KERNEL_RHEL_CONFIG_SUFFIX}") + candidates.append(f"kernel-{arch}-{flavor}-{KERNEL_RHEL_CONFIG_SUFFIX}") + else: + candidates.append(f"kernel-{arch}-{KERNEL_RHEL_CONFIG_SUFFIX}") + + return list(dict.fromkeys(candidates)) + + +def _score_kernel_config_basename(name: str, arch: str, flavor: str | None) -> int: + """Rank glob matches: exact segment match wins over substring.""" + if arch not in name: + return -1 + + score = 0 + if flavor: + if f"-{flavor}-" in name or name.startswith(f"kernel-{flavor}-"): + score += 50 + elif flavor in name: + score += 25 + + wants_debug = flavor is not None and KERNEL_DEBUG_FLAVOR in flavor + is_debug = f"-{KERNEL_DEBUG_FLAVOR}-" in name + if is_debug: + score += 20 if wants_debug else -40 - RHEL kernel packages store config files in the source root with naming - pattern: kernel-{arch}-rhel.config (base flavor) or - kernel-{arch}-{flavor}-rhel.config (debug, rt, etc.) + return score + + +def _find_kernel_config_file( + source_path: Path, + arch: str, + package_name: str | None = None, +) -> Path | None: + """Find the kernel config file for a specific architecture and package flavor. + + RHEL SRPMs use several naming layouts in the source root, for example: + - kernel-x86_64-rhel.config (base kernel) + - kernel-rt-x86_64-rhel.config (kernel-rt: flavor before arch) + - kernel-x86_64-debug-rhel.config (kernel-debug: flavor after arch) Args: source_path: Path to the source directory arch: Target architecture (e.g., 'x86_64', 'aarch64') + package_name: RPM package name (e.g., 'kernel', 'kernel-rt') Returns: Path to the config file, or None if not found @@ -424,20 +496,58 @@ def _find_kernel_config_file(source_path: Path, arch: str) -> Path | None: if not source_path or not source_path.exists() or not arch: return None - # Try base flavor first: kernel-{arch}-rhel.config - config_path = source_path / f"kernel-{arch}-rhel.config" - if config_path.exists(): - logger.info("_find_kernel_config_file: found config at %s", config_path) - return config_path + flavor = _kernel_package_flavor(package_name) + for candidate_name in _kernel_config_candidate_names(arch, package_name): + config_path = source_path / candidate_name + if config_path.is_file(): + logger.info( + "_find_kernel_config_file: matched candidate %s for package=%s arch=%s", + candidate_name, + package_name, + arch, + ) + return config_path + + glob_pattern = f"kernel*{arch}*{KERNEL_RHEL_CONFIG_SUFFIX}" + glob_matches = [path for path in source_path.glob(glob_pattern) if path.is_file()] + if not glob_matches: + logger.warning( + "_find_kernel_config_file: no config found for package=%s arch=%s in %s", + package_name, + arch, + source_path, + ) + return None - # Fallback: any kernel-{arch}*.config - for config in source_path.glob(f"kernel-{arch}*.config"): - if config.is_file(): - logger.info("_find_kernel_config_file: found fallback config at %s", config) - return config + best_match = max( + glob_matches, + key=lambda path: _score_kernel_config_basename(path.name, arch, flavor), + ) + best_score = _score_kernel_config_basename(best_match.name, arch, flavor) + if best_score < 0: + logger.warning( + "_find_kernel_config_file: glob results did not contain arch %s in %s", + arch, + source_path, + ) + return None + if flavor and best_score <= 0: + logger.warning( + "_find_kernel_config_file: fallback %s does not match flavor=%s for package=%s arch=%s", + best_match, + flavor, + package_name, + arch, + ) + return None - logger.warning("_find_kernel_config_file: no config found for arch %s in %s", arch, source_path) - return None + logger.info( + "_find_kernel_config_file: selected fallback %s for package=%s arch=%s", + best_match, + package_name, + arch, + ) + return best_match def _find_kernel_source_root(source_path: Path) -> Path | None: @@ -528,9 +638,10 @@ async def harvest_build_data( logger.info("harvest_build_data: detected kernel package") # Find kernel config file for the target architecture if arch: - config_file = _find_kernel_config_file(source_path, arch) + config_file = _find_kernel_config_file(source_path, arch, package_name) if config_file: - kernel_config_path = str(config_file) + # Basename only: L2 Source Grep uses it as a file_glob under source root. + kernel_config_path = config_file.name # Find kernel source root (contains Kconfig, Makefiles) source_root = _find_kernel_source_root(source_path) if source_root: diff --git a/src/vuln_analysis/functions/cve_build_agent.py b/src/vuln_analysis/functions/cve_build_agent.py index 86b55a52f..0c5b80269 100644 --- a/src/vuln_analysis/functions/cve_build_agent.py +++ b/src/vuln_analysis/functions/cve_build_agent.py @@ -77,7 +77,7 @@ L2_KERNEL_THOUGHT_INSTRUCTIONS, ) from vuln_analysis.runtime_context import ctx_state -from vuln_analysis.utils.token_utils import truncate_tool_output +from vuln_analysis.utils.token_utils import truncate_tool_output, truncate_tool_output_list import uuid import tiktoken logger = LoggingFactory.get_agent_logger(__name__) @@ -455,7 +455,7 @@ async def observation_node(state: BuildAgentState) -> dict: tool_output_for_llm = tool_message.content # Check for empty/error outputs - bypass LLM if so to prevent hallucination - empty_findings = check_empty_output(tool_output_for_llm, tool_used, tool_input_detail) + empty_findings, _ = check_empty_output(tool_output_for_llm, tool_used, tool_input_detail) if empty_findings: # Build-specific: empty grep for file in logs = NOT_COMPILED evidence if tool_used == "Source Grep" and "logs:" in tool_input_detail: @@ -469,22 +469,39 @@ async def observation_node(state: BuildAgentState) -> dict: ) code_findings = empty_findings else: - # Step 1: Comprehension - extract findings from tool output - comp_prompt = L2_COMPREHENSION_PROMPT.format( - vuln_id=vuln_id, - target_package=target_package_name, - vulnerability_intel=vulnerability_intel_str, - disabled_features=", ".join(harvest_report.disabled_features) if harvest_report.disabled_features else "None", - spec_disabled_features=", ".join(harvest_report.spec_disabled_features) if harvest_report.spec_disabled_features else "None", - enabled_features=", ".join(harvest_report.enabled_features) if harvest_report.enabled_features else "None", - spec_enabled_features=", ".join(harvest_report.spec_enabled_features) if harvest_report.spec_enabled_features else "None", - tool_used=tool_used, - tool_input=tool_input_detail, - last_thought=last_thought_text, - tool_output=truncate_tool_output(tool_output_for_llm, tool_used, max_tokens=1000), - ) - code_findings: CodeFindings = await invoke_comprehension( - comprehension_llm, comp_prompt, tool_used, tool_input_detail, tool_output_for_llm, agent_label="L2", + # Step 1: Comprehension - split into chunks and process each + chunks = truncate_tool_output_list(tool_output_for_llm, tool_used, max_tokens=1000) + all_findings = [] + best_tool_outcome = "" + + for chunk in chunks: + comp_prompt = L2_COMPREHENSION_PROMPT.format( + vuln_id=vuln_id, + target_package=target_package_name, + vulnerability_intel=vulnerability_intel_str, + disabled_features=", ".join(harvest_report.disabled_features) if harvest_report.disabled_features else "None", + spec_disabled_features=", ".join(harvest_report.spec_disabled_features) if harvest_report.spec_disabled_features else "None", + enabled_features=", ".join(harvest_report.enabled_features) if harvest_report.enabled_features else "None", + spec_enabled_features=", ".join(harvest_report.spec_enabled_features) if harvest_report.spec_enabled_features else "None", + tool_used=tool_used, + tool_input=tool_input_detail, + last_thought=last_thought_text, + tool_output=chunk, + ) + chunk_findings: CodeFindings = await invoke_comprehension( + comprehension_llm, comp_prompt, tool_used, tool_input_detail, chunk, agent_label="L2", + ) + all_findings.extend(chunk_findings.findings) + # Keep tool_outcome from chunk with actual findings (not FAILED) + if not best_tool_outcome or ( + chunk_findings.findings and + not any("FAILED" in f for f in chunk_findings.findings) + ): + best_tool_outcome = chunk_findings.tool_outcome + + code_findings = CodeFindings( + findings=all_findings, + tool_outcome=best_tool_outcome or "No matches found" ) findings_text = "\n".join(f"- {f}" for f in code_findings.findings) diff --git a/src/vuln_analysis/functions/cve_generate_vex.py b/src/vuln_analysis/functions/cve_generate_vex.py index 3e3c227bc..7333554b5 100644 --- a/src/vuln_analysis/functions/cve_generate_vex.py +++ b/src/vuln_analysis/functions/cve_generate_vex.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json +import uuid from aiq.builder.builder import Builder from aiq.builder.function_info import FunctionInfo @@ -43,12 +43,15 @@ async def _arun(state: ExploitIqEngineState) -> ExploitIqEngineState: return state if not any(justification.get("justification_label") == "vulnerable" for justification in state.justifications.values()): - logger.info("No vulnerable CVE(s) found. Skipping VEX generation.") - return state + logger.info("No vulnerable CVE(s) found. Generating VEX with known_not_affected status.") try: generator = load_vex_generator(config.vex_format) vex_doc = generator.generate(state) + if vex_doc: + tracking = vex_doc.get("document", {}).get("tracking") + if tracking and tracking.get("id"): + tracking["id"] = f"{tracking['id']}-{uuid.uuid4()}" state.vex = vex_doc except ValueError as e: logger.error("VEX generator initialization failed: %s", e) diff --git a/src/vuln_analysis/functions/cve_package_code_agent.py b/src/vuln_analysis/functions/cve_package_code_agent.py index 4327153cd..a624bc569 100644 --- a/src/vuln_analysis/functions/cve_package_code_agent.py +++ b/src/vuln_analysis/functions/cve_package_code_agent.py @@ -51,17 +51,17 @@ ) from vuln_analysis.utils.rpm_checker_prompts import ( L1_AGENT_SYS_PROMPT_PATCH_AVAILABLE, - L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH, L1_AGENT_SYS_PROMPT_REBASE_FIX, L1_AGENT_SYS_PROMPT_REBASE_NO_PATCH, L1_AGENT_PROMPT_TEMPLATE, L1_AGENT_PROMPT_TEMPLATE_NO_PATCH, L1_AGENT_THOUGHT_INSTRUCTIONS, - L1_AGENT_THOUGHT_UPSTREAM_INSTRUCTIONS, L1_AGENT_THOUGHT_REBASE_INSTRUCTIONS, L1_AGENT_THOUGHT_CVE_DESC_INSTRUCTIONS, + select_upstream_prompt_and_instructions, L1_COMPREHENSION_PROMPT, L1_MEMORY_UPDATE_PROMPT, + L1_EMPTY_RESULT_CLASSIFICATION_PROMPT, VULNERABILITY_INTEL_EXTRACTION_PROMPT, ) from vuln_analysis.tools.brew_downloader import BrewDownloader, BrewDownloaderError, resolve_brew_profile @@ -72,7 +72,7 @@ from vuln_analysis.utils.vulnerability_intel_sanitizer import VulnerabilityIntelSanitizer from vuln_analysis.utils.reference_fetcher import ReferenceFetcher from vuln_analysis.utils.reference_parser import ReflectiveReferenceParser, ParserConfig -from vuln_analysis.utils.token_utils import truncate_tool_output +from vuln_analysis.utils.token_utils import truncate_tool_output, truncate_tool_output_list from vuln_analysis.runtime_context import ctx_state logger = LoggingFactory.get_agent_logger(__name__) @@ -425,7 +425,7 @@ async def create_graph_code_agent(config: CVEPackageCodeAgentConfig, builder: Bu tools = builder.get_tools(tool_names=config.tool_names, wrapper_type=LLMFrameworkEnum.LANGCHAIN) thought_llm = llm.with_structured_output(CheckerThought) - comprehension_llm = llm.with_structured_output(CodeFindings) + structured_comprehension_llm = llm.with_structured_output(CodeFindings) observation_llm = llm.with_structured_output(Observation) vulnerability_intel_llm = llm.with_structured_output(VulnerabilityIntel) # Get tool names after filtering for dynamic guidance @@ -693,30 +693,37 @@ async def L1_agent(state: CodeAgentState) -> dict: }) # use case 3: in target patch was not found but patch is found in the rpm that was mention in cve that is fixed elif upstream_report and upstream_report.fixed_parsed_patch: + sys_prompt, tool_instructions = select_upstream_prompt_and_instructions( + vulnerability_intel.vulnerable_patterns + ) runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format( - sys_prompt=L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH, + sys_prompt=sys_prompt, vuln_id=vuln_id, target_package=target_package.name, vulnerability_intel=vulnerability_intel.format_for_prompt(), tools=tools_str, tool_selection_strategy=tool_strategy, - tool_instructions=L1_AGENT_THOUGHT_UPSTREAM_INSTRUCTIONS, + tool_instructions=tool_instructions, ) span.set_output({ "mode": "upstream_patch_verification", "patch_filename": upstream_report.fixed_srpm_file_name, + "prompt_variant": "case_b" if not vulnerability_intel.vulnerable_patterns else "case_a", }) # use case 4: Fix commit discovered via git search elif git_search_report and git_search_report.parsed_patch: + sys_prompt, tool_instructions = select_upstream_prompt_and_instructions( + vulnerability_intel.vulnerable_patterns + ) runtime_prompt = L1_AGENT_PROMPT_TEMPLATE.format( - sys_prompt=L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH, + sys_prompt=sys_prompt, vuln_id=vuln_id, target_package=target_package.name, vulnerability_intel=vulnerability_intel.format_for_prompt(), tools=tools_str, tool_selection_strategy=tool_strategy, - tool_instructions=L1_AGENT_THOUGHT_UPSTREAM_INSTRUCTIONS, + tool_instructions=tool_instructions, ) span.set_output({ @@ -724,6 +731,7 @@ async def L1_agent(state: CodeAgentState) -> dict: "commit_hash": git_search_report.best_result.commit_hash_short if git_search_report.best_result else "", "confidence": git_search_report.best_result.confidence if git_search_report.best_result else 0, "search_method": git_search_report.best_result.search_method if git_search_report.best_result else "", + "prompt_variant": "case_b" if not vulnerability_intel.vulnerable_patterns else "case_a", }) else: # Use case 4: Default prompt - no patch context, use VulnerabilityIntel from CVE description @@ -1320,60 +1328,99 @@ async def observation_node(state: CodeAgentState) -> dict: with tracer.push_active_function("observation node", input_data=f"tool used:{tool_used} + {tool_input_detail}") as span: tool_output_for_llm = tool_message.content - # Check for empty/error outputs - bypass LLM if so to prevent hallucination - empty_findings = check_empty_output(tool_output_for_llm, tool_used, tool_input_detail) + # Check for empty/error outputs + # For empty Source Grep on source code, let LLM classify against RAW_PATCH_DIFF + # to distinguish VULNERABLE_CODE_ABSENT vs FIX_CODE_ABSENT + empty_findings, needs_llm_classification = check_empty_output( + tool_output_for_llm, tool_used, tool_input_detail, + allow_llm_classification=True + ) + + # Get parsed_patch from state for raw diff context (needed for classification and comprehension) + downstream_report = state.get("downstream_report") + upstream_report = state.get("upstream_report") + parsed_patch = None + + if downstream_report: + if isinstance(downstream_report, dict): + parsed_patch = downstream_report.get('parsed_patch') + else: + parsed_patch = getattr(downstream_report, 'parsed_patch', None) + + if not parsed_patch and upstream_report: + if isinstance(upstream_report, dict): + parsed_patch = upstream_report.get('fixed_parsed_patch') + else: + parsed_patch = getattr(upstream_report, 'fixed_parsed_patch', None) + + # If parsed_patch is a dict, convert it to ParsedPatch model + if parsed_patch and isinstance(parsed_patch, dict): + try: + parsed_patch = ParsedPatch(**parsed_patch) + except Exception as e: + logger.warning("Failed to parse parsed_patch dict: %s", e) + parsed_patch = None + + logger.debug("observation_node: parsed_patch=%s, downstream=%s, upstream=%s", + parsed_patch is not None, + downstream_report is not None, + upstream_report is not None) + + # Extract relevant hunks based on grep target file + raw_patch_diff = "" + if tool_used == "Source Grep" and parsed_patch: + raw_patch_diff = get_relevant_hunks(parsed_patch, tool_input_detail) + if empty_findings: code_findings = empty_findings - else: - # Get parsed_patch from state for raw diff context - # Reports may be Pydantic models or dicts depending on state serialization - downstream_report = state.get("downstream_report") - upstream_report = state.get("upstream_report") - parsed_patch = None - - if downstream_report: - if isinstance(downstream_report, dict): - parsed_patch = downstream_report.get('parsed_patch') - else: - parsed_patch = getattr(downstream_report, 'parsed_patch', None) - - if not parsed_patch and upstream_report: - if isinstance(upstream_report, dict): - parsed_patch = upstream_report.get('fixed_parsed_patch') - else: - parsed_patch = getattr(upstream_report, 'fixed_parsed_patch', None) - - # If parsed_patch is a dict, convert it to ParsedPatch model - if parsed_patch and isinstance(parsed_patch, dict): - try: - parsed_patch = ParsedPatch(**parsed_patch) - except Exception as e: - logger.warning("Failed to parse parsed_patch dict: %s", e) - parsed_patch = None - - logger.debug("observation_node: parsed_patch=%s, downstream=%s, upstream=%s", - parsed_patch is not None, - downstream_report is not None, - upstream_report is not None) - - # Extract relevant hunks based on grep target file - raw_patch_diff = "" - if tool_used == "Source Grep" and parsed_patch: - raw_patch_diff = get_relevant_hunks(parsed_patch, tool_input_detail) - - # Step 1: Comprehension - extract key findings from raw tool output - comp_prompt = L1_COMPREHENSION_PROMPT.format( - vuln_id=vuln_id, - target_package=target_package_name, - vulnerability_intel=intel_formatted, - raw_patch_diff=raw_patch_diff, + elif needs_llm_classification: + # Empty source grep - use classification prompt to determine meaning + classification_prompt = L1_EMPTY_RESULT_CLASSIFICATION_PROMPT.format( tool_used=tool_used, - tool_input=tool_input_detail, last_thought=last_thought_text, - tool_output=truncate_tool_output(tool_output_for_llm, tool_used, max_tokens=1000), + tool_input=tool_input_detail, + raw_patch_diff=raw_patch_diff if raw_patch_diff else "No patch diff available", ) - code_findings: CodeFindings = await invoke_comprehension( - comprehension_llm, comp_prompt, tool_used, tool_input_detail, tool_output_for_llm, agent_label="L1", + code_findings = await structured_comprehension_llm.ainvoke( + [SystemMessage(content=classification_prompt)] + ) + logger.debug("Empty source grep classified: %s", code_findings.findings) + else: + # Has actual content - split into chunks and process each + chunks = truncate_tool_output_list(tool_output_for_llm, tool_used, max_tokens=1000) + all_findings = [] + best_tool_outcome = "" + + for chunk in chunks: + comp_prompt = L1_COMPREHENSION_PROMPT.format( + vuln_id=vuln_id, + target_package=target_package_name, + vulnerability_intel=intel_formatted, + raw_patch_diff=raw_patch_diff, + tool_used=tool_used, + tool_input=tool_input_detail, + last_thought=last_thought_text, + tool_output=chunk, + ) + chunk_findings = await invoke_comprehension( + structured_comprehension_llm, + comp_prompt, + tool_used, + tool_input_detail, + chunk, + agent_label="L1", + ) + all_findings.extend(chunk_findings.findings) + # Keep tool_outcome from chunk with actual findings (not FAILED) + if not best_tool_outcome or ( + chunk_findings.findings and + not any("FAILED" in f for f in chunk_findings.findings) + ): + best_tool_outcome = chunk_findings.tool_outcome + + code_findings = CodeFindings( + findings=all_findings, + tool_outcome=best_tool_outcome or "No matches found" ) findings_text = "\n".join(f"- {f}" for f in code_findings.findings) diff --git a/src/vuln_analysis/functions/react_internals.py b/src/vuln_analysis/functions/react_internals.py index 22300de74..4a7c74913 100644 --- a/src/vuln_analysis/functions/react_internals.py +++ b/src/vuln_analysis/functions/react_internals.py @@ -107,13 +107,25 @@ def check_empty_output( tool_output: str | list, tool_used: str, tool_input: str, -) -> CodeFindings | None: + allow_llm_classification: bool = False, +) -> tuple[CodeFindings | None, bool]: """Check if tool output is empty or an error, returning factual CodeFindings if so. - This bypasses LLM comprehension for empty/error outputs to prevent hallucination. + For empty Source Grep on source code, we may want the LLM to classify the result + against the RAW_PATCH_DIFF (to distinguish VULNERABLE_CODE_ABSENT vs FIX_CODE_ABSENT). + + Args: + tool_output: The raw output from the tool. + tool_used: Name of the tool (e.g., "Source Grep"). + tool_input: The input/query passed to the tool. + allow_llm_classification: If True, signal that empty Source Grep on source + code needs LLM classification against the diff. Returns: - CodeFindings with factual empty/error message, or None if output has content. + Tuple of (CodeFindings | None, needs_llm_classification: bool). + - If CodeFindings is returned, use it directly. + - If CodeFindings is None and needs_llm_classification is True, call classification LLM. + - If CodeFindings is None and needs_llm_classification is False, output has content. """ is_empty = ( not tool_output @@ -126,29 +138,49 @@ def check_empty_output( and any(m in tool_output for m in ["Error:", "error:", "Failed:", "Exception:", "Traceback"]) ) - if is_empty: - return CodeFindings( - findings=[f"{tool_used} for '{tool_input}' returned empty - no matches found"], - tool_outcome=f"CALLED: {tool_used} with {tool_input} -> EMPTY (no results)" + if is_error: + return ( + CodeFindings( + findings=[ + f"FAILED: {tool_used} [{tool_input}] - tool error", + f"Details: {str(tool_output)[:150]}" + ], + tool_outcome=f"FAILED: {tool_used} with {tool_input} -> ERROR" + ), + False, ) - if is_error: - return CodeFindings( - findings=[ - f"FAILED: {tool_used} [{tool_input}] - tool error", - f"Details: {str(tool_output)[:150]}" - ], - tool_outcome=f"FAILED: {tool_used} with {tool_input} -> ERROR" + if is_empty: + is_source_grep_on_source = ( + tool_used == "Source Grep" + and not tool_input.startswith("logs:") + and not tool_input.startswith("patch:") + ) + if allow_llm_classification and is_source_grep_on_source: + return (None, True) + + return ( + CodeFindings( + findings=[f"{tool_used} for '{tool_input}' returned empty - no matches found"], + tool_outcome=f"CALLED: {tool_used} with {tool_input} -> EMPTY (no results)" + ), + False, ) - return None + return (None, False) -async def invoke_comprehension(llm, prompt: str, tool_used: str, tool_input: str, tool_output: str, - agent_label: str = "") -> CodeFindings: +async def invoke_comprehension( + structured_comprehension_llm, + prompt: str, + tool_used: str, + tool_input: str, + tool_output: str, + agent_label: str = "", +) -> CodeFindings: """Invoke comprehension LLM with fallback on token limit overflow.""" try: - return await llm.ainvoke([SystemMessage(content=prompt)]) + return await structured_comprehension_llm.ainvoke([SystemMessage(content=prompt)]) except LengthFinishReasonError: logger.warning("%s comprehension LLM hit token limit (tool=%s), using fallback", agent_label, tool_used) summary = tool_output[:500] if isinstance(tool_output, str) else str(tool_output)[:500] diff --git a/src/vuln_analysis/tools/source_grep.py b/src/vuln_analysis/tools/source_grep.py index b549df58f..9220937e0 100644 --- a/src/vuln_analysis/tools/source_grep.py +++ b/src/vuln_analysis/tools/source_grep.py @@ -55,6 +55,9 @@ class SourceGrepToolConfig(FunctionBaseConfig, name=SOURCE_GREP): VALID_TARGETS = ("source", "logs", "patch") +QUERY_FILE_SEPARATOR = "," +QUERY_MULTI_PATTERN_SEPARATOR = ";" +FILE_GLOB_WILDCARDS = ("*", "?", "/") TARGET_EXTENSIONS: dict[str, list[str]] = { "source": ["*.c", "*.h", "*.cpp", "*.hpp", "*.py", "*.go", "*.java", "*.spec", "*.cmake", "Makefile", "*.mk", "*.config"], @@ -63,6 +66,14 @@ class SourceGrepToolConfig(FunctionBaseConfig, name=SOURCE_GREP): } +def _looks_like_file_glob(token: str) -> bool: + """Return True when token appears to be a filename or glob.""" + if not token: + return False + filename = Path(token).name + return any(ch in token for ch in FILE_GLOB_WILDCARDS) or "." in filename + + def _parse_query(query: str) -> tuple[str | list[str], str | None, str, bool]: """Parse query string into (pattern(s), file_glob, target, word_boundary). @@ -74,6 +85,7 @@ def _parse_query(query: str) -> tuple[str | list[str], str | None, str, bool]: - "pattern -w" -> search with word boundary (whole words only) - "target:pattern,file_glob -w" -> full format with word boundary - "pattern1;pattern2,file.c" -> multiple patterns (only with file_glob) + - "pattern1,pattern2,file.c" -> compatibility syntax (normalized to multi-pattern + file) Valid targets: source, logs, patch @@ -94,16 +106,32 @@ def _parse_query(query: str) -> tuple[str | list[str], str | None, str, bool]: target = prefix query = rest - if "," in query: - parts = query.split(",", 1) + if QUERY_FILE_SEPARATOR in query: + parts = query.split(QUERY_FILE_SEPARATOR, 1) pattern_part = parts[0].strip() file_glob = parts[1].strip() if len(parts) > 1 else None # Multi-pattern support: only when file_glob is provided - if file_glob and ";" in pattern_part: - patterns = [p.strip() for p in pattern_part.split(";") if p.strip()] + if file_glob and QUERY_MULTI_PATTERN_SEPARATOR in pattern_part: + patterns = [p.strip() for p in pattern_part.split(QUERY_MULTI_PATTERN_SEPARATOR) if p.strip()] return patterns, file_glob, target, word_boundary + # Compatibility mode: + # LLMs sometimes emit "pattern1,pattern2,file.c" instead of + # the documented "pattern1;pattern2,file.c". Normalize it. + if file_glob and QUERY_FILE_SEPARATOR in file_glob: + trailing_parts = [p.strip() for p in file_glob.split(QUERY_FILE_SEPARATOR) if p.strip()] + candidate_file_glob = trailing_parts[-1] if trailing_parts else "" + extra_patterns = trailing_parts[:-1] + + if _looks_like_file_glob(candidate_file_glob): + patterns = [pattern_part, *extra_patterns] + patterns = [p for p in patterns if p] + if len(patterns) > 1: + return patterns, candidate_file_glob, target, word_boundary + if len(patterns) == 1: + return patterns[0], candidate_file_glob, target, word_boundary + return pattern_part, file_glob, target, word_boundary return query, None, target, word_boundary @@ -145,12 +173,14 @@ async def _arun(query: str) -> str: Options: - -w: Match whole words only (word boundary) - Multiple patterns: use ';' separator ONLY with a specific file + - Compatibility: "pattern1,pattern2,file.c" is accepted and normalized Examples: - 'archive_read_open' - search source files - 'archive_read_open,*.c' - search only .c source files - 'archive_read_open -w' - search for whole word only - 'unsigned int cursor;unsigned int nodes,archive_read.c' - multiple patterns in one file + - 'sum2,s2length,match.c' - compatibility syntax for multiple patterns in one file - 'logs:undefined reference' - search build logs for link errors - 'logs:error:' - search build logs for error messages - 'patch:CVE-2026-5121' - find patch for specific CVE diff --git a/src/vuln_analysis/tools/tests/test_concurrency.py b/src/vuln_analysis/tools/tests/test_concurrency.py index b6f4e5235..8dcafd494 100644 --- a/src/vuln_analysis/tools/tests/test_concurrency.py +++ b/src/vuln_analysis/tools/tests/test_concurrency.py @@ -19,6 +19,8 @@ from vuln_analysis.data_models.state import ExploitIqEngineState from vuln_analysis.tools.transitive_code_search import ( _build_or_get_cached, + _build_searcher, + get_git_and_pickle_base_dirs, _searcher_cache, _searcher_building, _repo_build_locks, @@ -139,7 +141,7 @@ def _make_slow_builder(build_log, sleep_secs=0.2, java=True): """ lock = threading.Lock() - def slow_build(si, query, uber_jar_file_threshold=_DEFAULT_THRESHOLD): + def slow_build(si, query, uber_jar_file_threshold=_DEFAULT_THRESHOLD,base_dirs=()): tag = query.split(",")[0] start = time.monotonic() time.sleep(sleep_secs) @@ -234,8 +236,8 @@ async def test_java_same_repo_different_packages_are_serialized(): with patch("vuln_analysis.tools.transitive_code_search._build_searcher", side_effect=_make_slow_builder(build_log, java=True)): - task1 = asyncio.create_task(_build_or_get_cached(si, "pkg-a:art-a:1.0,ClassA.foo", _DEFAULT_THRESHOLD)) - task2 = asyncio.create_task(_build_or_get_cached(si, "pkg-b:art-b:2.0,ClassB.bar", _DEFAULT_THRESHOLD)) + task1 = asyncio.create_task(_build_or_get_cached(si=si, query="pkg-a:art-a:1.0,ClassA.foo", uber_jar_file_threshold=_DEFAULT_THRESHOLD )) + task2 = asyncio.create_task(_build_or_get_cached(si=si, query="pkg-b:art-b:2.0,ClassB.bar", uber_jar_file_threshold=_DEFAULT_THRESHOLD)) await asyncio.gather(task1, task2) assert len(build_log) == 2, f"Expected 2 Java builds (different packages), got {len(build_log)}" @@ -256,7 +258,7 @@ async def test_different_repos_can_build_concurrently(): si_b = _make_si("https://github.com/example/repo-b") build_log = [] - def slow_build(si, query, uber_jar_file_threshold=_DEFAULT_THRESHOLD): + def slow_build(si, query, uber_jar_file_threshold=_DEFAULT_THRESHOLD,base_dirs=()): tag = si[0].git_repo.split("/")[-1] start = time.monotonic() time.sleep(0.2) @@ -266,8 +268,8 @@ def slow_build(si, query, uber_jar_file_threshold=_DEFAULT_THRESHOLD): with patch("vuln_analysis.tools.transitive_code_search._build_searcher", side_effect=slow_build): - task1 = asyncio.create_task(_build_or_get_cached(si_a, "pkg-a:art-a:1.0,Foo.bar", _DEFAULT_THRESHOLD)) - task2 = asyncio.create_task(_build_or_get_cached(si_b, "pkg-b:art-b:2.0,Baz.qux", _DEFAULT_THRESHOLD)) + task1 = asyncio.create_task(_build_or_get_cached(si_a, query="pkg-a:art-a:1.0,Foo.bar", uber_jar_file_threshold=_DEFAULT_THRESHOLD, base_dirs=())) + task2 = asyncio.create_task(_build_or_get_cached(si_b, query="pkg-b:art-b:2.0,Baz.qux", uber_jar_file_threshold=_DEFAULT_THRESHOLD, base_dirs=())) await asyncio.gather(task1, task2) assert len(build_log) == 2 @@ -288,7 +290,7 @@ async def test_same_key_deduplicates_build(): build_count = 0 count_lock = threading.Lock() - def counting_build(build_si, q, uber_jar_file_threshold=_DEFAULT_THRESHOLD): + def counting_build(build_si, q, uber_jar_file_threshold=_DEFAULT_THRESHOLD,base_dirs=()): nonlocal build_count with count_lock: build_count += 1 @@ -297,8 +299,8 @@ def counting_build(build_si, q, uber_jar_file_threshold=_DEFAULT_THRESHOLD): with patch("vuln_analysis.tools.transitive_code_search._build_searcher", side_effect=counting_build): - task1 = asyncio.create_task(_build_or_get_cached(si, query, _DEFAULT_THRESHOLD)) - task2 = asyncio.create_task(_build_or_get_cached(si, query, _DEFAULT_THRESHOLD)) + task1 = asyncio.create_task(_build_or_get_cached(si, query=query, uber_jar_file_threshold=_DEFAULT_THRESHOLD, base_dirs=())) + task2 = asyncio.create_task(_build_or_get_cached(si, query=query, uber_jar_file_threshold=_DEFAULT_THRESHOLD, base_dirs=())) results = await asyncio.gather(task1, task2) assert build_count == 1, f"Expected 1 build (deduplicated), got {build_count}" @@ -319,14 +321,14 @@ async def test_cache_hit_skips_build(): build_count = 0 - def counting_build(build_si, q, uber_jar_file_threshold=_DEFAULT_THRESHOLD): + def counting_build(build_si, q, uber_jar_file_threshold=_DEFAULT_THRESHOLD,base_dirs=()): nonlocal build_count build_count += 1 return _make_nonjava_searcher() with patch("vuln_analysis.tools.transitive_code_search._build_searcher", side_effect=counting_build): - result = await _build_or_get_cached(si, query, _DEFAULT_THRESHOLD) + result = await _build_or_get_cached(si, query=query, uber_jar_file_threshold=_DEFAULT_THRESHOLD, base_dirs=()) assert build_count == 0, "Build should not run when cache hit exists" assert result is pre_cached, "Should return the pre-cached searcher" @@ -345,14 +347,14 @@ async def test_java_cache_hit_skips_build(): build_count = 0 - def counting_build(build_si, q, uber_jar_file_threshold=_DEFAULT_THRESHOLD): + def counting_build(build_si, q, uber_jar_file_threshold=_DEFAULT_THRESHOLD,base_dirs=()): nonlocal build_count build_count += 1 return _make_java_searcher() with patch("vuln_analysis.tools.transitive_code_search._build_searcher", side_effect=counting_build): - result = await _build_or_get_cached(si, query, _DEFAULT_THRESHOLD) + result = await _build_or_get_cached(si, query=query, uber_jar_file_threshold=_DEFAULT_THRESHOLD, base_dirs=()) assert build_count == 0, "Build should not run when Java cache hit exists" assert result is pre_cached, "Should return the pre-cached Java searcher" @@ -367,13 +369,13 @@ async def test_build_failure_cleans_up_building_marker(): full_key = ("https://github.com/example/repo", "main", "pkg-a:art-a:1.0") repo_key = ("https://github.com/example/repo", "main") - def failing_build(build_si, q, uber_jar_file_threshold=_DEFAULT_THRESHOLD): + def failing_build(build_si, q, uber_jar_file_threshold=_DEFAULT_THRESHOLD,base_dirs=()): raise RuntimeError("Maven failed") with patch("vuln_analysis.tools.transitive_code_search._build_searcher", side_effect=failing_build): with pytest.raises(RuntimeError, match="Maven failed"): - await _build_or_get_cached(si, query, _DEFAULT_THRESHOLD) + await _build_or_get_cached(si, query=query, uber_jar_file_threshold=_DEFAULT_THRESHOLD, base_dirs=()) assert full_key not in _searcher_building, "Building marker not cleaned up after failure" assert full_key not in _searcher_cache, "Failed build should not be cached" @@ -394,7 +396,7 @@ async def test_java_repo_lock_recheck_avoids_redundant_build(): build_count = 0 count_lock = threading.Lock() - def build_that_precaches_b(build_si, q, uber_jar_file_threshold=_DEFAULT_THRESHOLD): + def build_that_precaches_b(build_si, q, uber_jar_file_threshold=_DEFAULT_THRESHOLD,base_dirs=()): nonlocal build_count with count_lock: build_count += 1 @@ -406,9 +408,9 @@ def build_that_precaches_b(build_si, q, uber_jar_file_threshold=_DEFAULT_THRESHO with patch("vuln_analysis.tools.transitive_code_search._build_searcher", side_effect=build_that_precaches_b): - task1 = asyncio.create_task(_build_or_get_cached(si, query_a, _DEFAULT_THRESHOLD)) + task1 = asyncio.create_task(_build_or_get_cached(si, query=query_a, uber_jar_file_threshold=_DEFAULT_THRESHOLD, base_dirs=())) await asyncio.sleep(0.01) - task2 = asyncio.create_task(_build_or_get_cached(si, query_b, _DEFAULT_THRESHOLD)) + task2 = asyncio.create_task(_build_or_get_cached(si, query=query_b, uber_jar_file_threshold=_DEFAULT_THRESHOLD, base_dirs=())) await asyncio.gather(task1, task2) assert build_count == 1, ( @@ -429,7 +431,7 @@ async def test_nonjava_same_repo_different_packages_share_cache(): build_count = 0 count_lock = threading.Lock() - def counting_build(build_si, q, uber_jar_file_threshold=_DEFAULT_THRESHOLD): + def counting_build(build_si, q, uber_jar_file_threshold=_DEFAULT_THRESHOLD,base_dirs=()): nonlocal build_count with count_lock: build_count += 1 @@ -438,8 +440,9 @@ def counting_build(build_si, q, uber_jar_file_threshold=_DEFAULT_THRESHOLD): with patch("vuln_analysis.tools.transitive_code_search._build_searcher", side_effect=counting_build): - task1 = asyncio.create_task(_build_or_get_cached(si, "crypto/x509,ParsePKCS1PrivateKey", _DEFAULT_THRESHOLD)) - task2 = asyncio.create_task(_build_or_get_cached(si, "net/http,ListenAndServe", _DEFAULT_THRESHOLD)) + task1 = asyncio.create_task( + _build_or_get_cached(si, query="crypto/x509,ParsePKCS1PrivateKey", uber_jar_file_threshold=_DEFAULT_THRESHOLD, base_dirs=())) + task2 = asyncio.create_task(_build_or_get_cached(si, query="net/http,ListenAndServe", uber_jar_file_threshold=_DEFAULT_THRESHOLD, base_dirs=())) results = await asyncio.gather(task1, task2) # Non-Java: both should resolve to the same (repo, ref) cache entry. @@ -461,12 +464,91 @@ async def test_nonjava_caches_under_repo_key(): with patch("vuln_analysis.tools.transitive_code_search._build_searcher", return_value=_make_nonjava_searcher()): - await _build_or_get_cached(si, "crypto/x509,ParsePKCS1PrivateKey", _DEFAULT_THRESHOLD) + await _build_or_get_cached(si, query="crypto/x509,ParsePKCS1PrivateKey", uber_jar_file_threshold=_DEFAULT_THRESHOLD, base_dirs=()) assert repo_key in _searcher_cache, "Non-Java should cache under repo_key" assert full_key not in _searcher_cache, "Non-Java should NOT cache under full_key" +# --------------------------------------------------------------------------- +# Base Dirs Cache Path Alignment Tests +# --------------------------------------------------------------------------- + +class TestGetGitAndPickleBaseDirs: + """Tests for get_git_and_pickle_base_dirs — reads config from Builder.""" + + @pytest.mark.asyncio + async def test_returns_empty_tuple_when_builder_is_none(self): + result = await get_git_and_pickle_base_dirs(None) + assert result == () + + @pytest.mark.asyncio + async def test_returns_empty_tuple_when_config_is_not_segmentation(self): + builder = MagicMock() + builder.get_function_config.return_value = "some_other_config" + result = await get_git_and_pickle_base_dirs(builder) + assert result == () + builder.get_function_config.assert_called_once_with("cve_segmentation") + + @pytest.mark.asyncio + async def test_returns_dirs_from_segmentation_config(self): + from vuln_analysis.functions.cve_segmentation import CVESegmentationConfig + config = CVESegmentationConfig( + agent_name="cve_agent_executor", + embedder_name="nim_embedder", + base_git_dir="/data/git", + base_pickle_dir="/data/pickle", + ) + builder = MagicMock() + builder.get_function_config.return_value = config + result = await get_git_and_pickle_base_dirs(builder) + assert result == ("/data/git", "/data/pickle") + + +class TestBuildSearcherBaseDirs: + """Tests for _build_searcher — ensures configured base_dirs reach DocumentEmbedding.""" + + def test_empty_base_dirs_uses_defaults(self): + """Empty tuple should create DocumentEmbedding with default dirs.""" + si = [SourceDocumentsInfo(git_repo="https://github.com/example/repo", ref="main", type="code")] + with patch("vuln_analysis.tools.transitive_code_search.get_call_of_chains_retriever") as mock_get_coc, \ + patch("vuln_analysis.tools.transitive_code_search.DocumentEmbedding") as mock_de: + mock_get_coc.return_value = MagicMock() + _build_searcher(si, "pkg,Func", _DEFAULT_THRESHOLD, base_dirs=()) + mock_de.assert_called_once_with(embedding=None) + + def test_valid_base_dirs_passed_to_document_embedding(self): + """Two-element tuple should pass git_directory and pickle_cache_directory.""" + si = [SourceDocumentsInfo(git_repo="https://github.com/example/repo", ref="main", type="code")] + with patch("vuln_analysis.tools.transitive_code_search.get_call_of_chains_retriever") as mock_get_coc, \ + patch("vuln_analysis.tools.transitive_code_search.DocumentEmbedding") as mock_de: + mock_get_coc.return_value = MagicMock() + _build_searcher(si, "pkg,Func", _DEFAULT_THRESHOLD, base_dirs=("/custom/git", "/custom/pickle")) + mock_de.assert_called_once_with( + embedding=None, + pickle_cache_directory="/custom/pickle", + git_directory="/custom/git", + ) + + def test_single_element_tuple_falls_back_to_defaults(self): + """Tuple with wrong length should fall back to defaults, not crash.""" + si = [SourceDocumentsInfo(git_repo="https://github.com/example/repo", ref="main", type="code")] + with patch("vuln_analysis.tools.transitive_code_search.get_call_of_chains_retriever") as mock_get_coc, \ + patch("vuln_analysis.tools.transitive_code_search.DocumentEmbedding") as mock_de: + mock_get_coc.return_value = MagicMock() + _build_searcher(si, "pkg,Func", _DEFAULT_THRESHOLD, base_dirs=("/only/one",)) + mock_de.assert_called_once_with(embedding=None) + + def test_default_parameter_uses_defaults(self): + """Omitting base_dirs entirely should use defaults.""" + si = [SourceDocumentsInfo(git_repo="https://github.com/example/repo", ref="main", type="code")] + with patch("vuln_analysis.tools.transitive_code_search.get_call_of_chains_retriever") as mock_get_coc, \ + patch("vuln_analysis.tools.transitive_code_search.DocumentEmbedding") as mock_de: + mock_get_coc.return_value = MagicMock() + _build_searcher(si, "pkg,Func", _DEFAULT_THRESHOLD) + mock_de.assert_called_once_with(embedding=None) + + # --------------------------------------------------------------------------- # Split Clone/Segmentation Pipeline Tests # --------------------------------------------------------------------------- diff --git a/src/vuln_analysis/tools/tests/test_transitive_code_search.py b/src/vuln_analysis/tools/tests/test_transitive_code_search.py index 9965dda09..1950d5c00 100644 --- a/src/vuln_analysis/tools/tests/test_transitive_code_search.py +++ b/src/vuln_analysis/tools/tests/test_transitive_code_search.py @@ -24,7 +24,6 @@ from exploit_iq_commons.utils.git_utils import sanitize_git_url_for_path, get_repo_path_with_ref from pathlib import Path - @pytest.fixture(autouse=True) def patch_repo_path_with_fallback(): """ diff --git a/src/vuln_analysis/tools/transitive_code_search.py b/src/vuln_analysis/tools/transitive_code_search.py index 75892ba8e..b1a911346 100644 --- a/src/vuln_analysis/tools/transitive_code_search.py +++ b/src/vuln_analysis/tools/transitive_code_search.py @@ -38,6 +38,7 @@ from exploit_iq_commons.logging.loggers_factory import LoggingFactory from exploit_iq_commons.utils.java_chain_of_calls_retriever import JavaChainOfCallsRetriever, _release_repo_data +from ..functions.cve_segmentation import CVESegmentationConfig PACKAGE_AND_FUNCTION_LOCATOR_TOOL_NAME = "package_and_function_locator" @@ -77,6 +78,14 @@ "Please retry with the correct format." ) +async def get_git_and_pickle_base_dirs(builder: Builder) ->tuple: + if builder is None: + return () + segmentation_config = builder.get_function_config("cve_segmentation") + if isinstance(segmentation_config, CVESegmentationConfig): + return segmentation_config.base_git_dir, segmentation_config.base_pickle_dir + else: + return () def _summarize_call_chain(call_hierarchy_list: list[Document]) -> list[str]: """Summarize a call chain into concise strings for the agent scratchpad. @@ -209,7 +218,7 @@ def _get_cache_keys(si, query: str) -> tuple[tuple | None, tuple | None]: return None, None -def _build_searcher(si, query: str, uber_jar_file_threshold: int) -> TransitiveCodeSearcher: +def _build_searcher(si, query: str, uber_jar_file_threshold: int, base_dirs : tuple = ()) -> TransitiveCodeSearcher: """Synchronous helper that builds a TransitiveCodeSearcher. Separated so it can be offloaded to a thread via asyncio.to_thread(), @@ -218,12 +227,21 @@ def _build_searcher(si, query: str, uber_jar_file_threshold: int) -> TransitiveC package-specific retrievers via _JavaRepoData (in-memory cache keyed by (git_repo, ref), backed by pickle sub-caches on disk). """ - documents_embedder = DocumentEmbedding(embedding=None) + if len(base_dirs) == 2: + git_base_dir_config, pickle_base_dir_config = base_dirs + documents_embedder = DocumentEmbedding( + embedding=None, + pickle_cache_directory=pickle_base_dir_config, + git_directory=git_base_dir_config, + ) + else: + documents_embedder = DocumentEmbedding(embedding=None) + coc_retriever = get_call_of_chains_retriever(documents_embedder, si, query, uber_jar_file_threshold) return TransitiveCodeSearcher(chain_of_calls_retriever=coc_retriever) -async def _build_or_get_cached(si, query: str, uber_jar_file_threshold: int) -> TransitiveCodeSearcher: +async def _build_or_get_cached(si, query: str, uber_jar_file_threshold: int, base_dirs: tuple = ()) -> TransitiveCodeSearcher: """Build a TransitiveCodeSearcher, or return a cached one. Cache keys: @@ -239,6 +257,7 @@ async def _build_or_get_cached(si, query: str, uber_jar_file_threshold: int) -> - Per-repo serialization via asyncio.Lock (protects shared filesystem: git checkout, install_dependencies, document creation). - The expensive build runs in a thread via asyncio.to_thread(). + :param base_dirs: a tuple contains base dirs of cache - (git_base_dir, pickle_base_dir) """ repo_key, full_key = _get_cache_keys(si, query) @@ -296,7 +315,7 @@ async def _build_or_get_cached(si, query: str, uber_jar_file_threshold: int) -> # other tasks can read/write the cache while this build runs, # but no concurrent build on the same repo's filesystem. logger.info("Building TransitiveCodeSearcher for %s", full_key) - searcher = await asyncio.to_thread(_build_searcher, si, query, uber_jar_file_threshold) + searcher = await asyncio.to_thread(_build_searcher, si, query, uber_jar_file_threshold, base_dirs) async with _searcher_cache_lock: # Cache under the appropriate key based on ecosystem @@ -328,13 +347,13 @@ async def _build_or_get_cached(si, query: str, uber_jar_file_threshold: int) -> raise -async def get_transitive_code_searcher(query: str): +async def get_transitive_code_searcher(query: str, base_dirs: tuple): state: ExploitIqEngineState = ctx_state.get() si = state.original_input.input.image.source_info threshold = state.uber_jar_file_threshold if state.transitive_code_searcher is None: - state.transitive_code_searcher = await _build_or_get_cached(si, query, threshold) + state.transitive_code_searcher = await _build_or_get_cached(si, query, threshold, base_dirs) elif isinstance(state.transitive_code_searcher.chain_of_calls_retriever, JavaChainOfCallsRetriever): # Java: different queries produce different dep trees (build_tree uses # -DtargetIncludes for GAV queries), so rebuild when the package changes. @@ -346,7 +365,7 @@ async def get_transitive_code_searcher(query: str): if cached is not None and cached is state.transitive_code_searcher: pass # Same searcher, no change needed else: - state.transitive_code_searcher = await _build_or_get_cached(si, query, threshold) + state.transitive_code_searcher = await _build_or_get_cached(si, query, threshold, base_dirs) # Both Java and non-Java retrievers use per-search context objects (_JavaSearchCtx / _SearchCtx) # for mutable state, so the retriever instance is immutable after __init__ and needs no deep copy. @@ -376,7 +395,9 @@ async def _arun(query: str) -> tuple: if not is_valid: return False, [validation_result] transitive_code_searcher: TransitiveCodeSearcher - transitive_code_searcher = await get_transitive_code_searcher(validation_result) + base_dirs = await get_git_and_pickle_base_dirs(builder) + + transitive_code_searcher = await get_transitive_code_searcher(validation_result, base_dirs) found_path, call_hierarchy_list = transitive_code_searcher.search(validation_result) # Return concise call chain summary instead of full Document objects # to avoid blowing up the agent's context window with source code. @@ -427,7 +448,8 @@ async def functions_usage_search(config: CallingFunctionNameExtractorToolConfig, async def _arun(query: str) -> list: coc_retriever: ChainOfCallsRetrieverBase transitive_code_searcher: TransitiveCodeSearcher - transitive_code_searcher = await get_transitive_code_searcher(query) + base_dirs = await get_git_and_pickle_base_dirs(builder) + transitive_code_searcher = await get_transitive_code_searcher(query, base_dirs) coc_retriever = transitive_code_searcher.chain_of_calls_retriever function_name_extractor = FunctionNameExtractor(coc_retriever) result = function_name_extractor.fetch_list(query) @@ -458,7 +480,8 @@ async def _arun(query: str) -> dict: return {"error": validation_result} coc_retriever: ChainOfCallsRetrieverBase transitive_code_searcher: TransitiveCodeSearcher - transitive_code_searcher = await get_transitive_code_searcher(validation_result) + base_dirs = await get_git_and_pickle_base_dirs(builder) + transitive_code_searcher = await get_transitive_code_searcher(validation_result, base_dirs) coc_retriever = transitive_code_searcher.chain_of_calls_retriever locator = FunctionNameLocator(coc_retriever) result = await locator.locate_functions(validation_result) @@ -491,7 +514,8 @@ async def library_version_finder(config: FunctionLibraryVersionFinderToolConfig, @catch_tool_errors(FUNCTION_LIBRARY_VERSION_FINDER_TOOL_NAME) async def _arun(query: str) -> dict: - transitive_code_searcher = await get_transitive_code_searcher(query) + base_dirs = await get_git_and_pickle_base_dirs(builder) + transitive_code_searcher = await get_transitive_code_searcher(query, base_dirs) coc_retriever = transitive_code_searcher.chain_of_calls_retriever # Clean the query: strip whitespace, trailing junk after newlines, then quotes (including unicode smart quotes) @@ -535,4 +559,4 @@ async def _arun(query: str) -> dict: Returns: {'ecosystem': str, 'found': bool, 'message': str, 'matching_packages': list}. For Java, matching_packages contains Maven GAV coordinates like 'groupId:artifactId:version'. -""")) \ No newline at end of file +""")) diff --git a/src/vuln_analysis/utils/rpm_checker_prompts.py b/src/vuln_analysis/utils/rpm_checker_prompts.py index 3a7b82bf3..6007e8ad8 100644 --- a/src/vuln_analysis/utils/rpm_checker_prompts.py +++ b/src/vuln_analysis/utils/rpm_checker_prompts.py @@ -180,6 +180,30 @@ "- State confidence level based on evidence quality." ) +L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH_CASE_B = ( + "You are a security analyst verifying whether a package remains VULNERABLE to a CVE.\n" + "The TARGET package does NOT contain a CVE-specific patch file.\n" + "Patterns were extracted from a FIXED RPM patch, but VULNERABLE_PATTERNS is EMPTY.\n\n" + "YOUR TASK: Determine whether the FIX is present in the target source.\n\n" + "VERIFICATION STRATEGY (CASE B - EMPTY VULNERABLE_PATTERNS):\n" + "1. FIRST search FIX_PATTERNS inside each file from AFFECTED_FILES.\n" + "2. If a fix marker is found, verify fix application at the call site.\n" + " - Definition-only matches are insufficient.\n" + " - The fix must be called and its result must be used.\n" + "3. CONCLUSION:\n" + " - If fix is applied at call site → Package is PATCHED via rebase.\n" + " - If fix is absent or unverified → investigate remaining affected files and use version fallback rules.\n\n" + "CRITICAL RULES:\n" + "- Do NOT start with vulnerable-pattern search in this mode.\n" + "- Do NOT assume vulnerability or safety without tool evidence.\n" + "- Base conclusions ONLY on tool results and version-range evidence when required.\n\n" + "ANSWER QUALITY:\n" + "- Cite specific file paths and line numbers from tool results.\n" + "- Quote the actual code found, not just describe it.\n" + "- Clearly state whether fix is present at call site, absent, or unverified.\n" + "- State confidence level based on evidence quality." +) + L1_AGENT_SYS_PROMPT_REBASE_FIX = ( "You are a security analyst verifying that a CVE fix is PRESENT in a rebased package.\n" "The TARGET package was REBASED to a newer upstream version that claims to fix this CVE.\n\n" @@ -372,9 +396,14 @@ - SEARCH_KEYWORDS: Terms to grep for PHASE 2 - SOURCE CODE INSPECTION (YOUR TASK): - For EACH item in VULNERABLE_FUNCTIONS and AFFECTED_FILES: + CASE A - VULNERABLE_PATTERNS is NOT empty: 1. Search for vulnerable pattern - it SHOULD exist in unpatched target 2. Search for fix pattern - it should NOT exist in unpatched target + + CASE B - VULNERABLE_PATTERNS is empty: + 1. Search for FIX_PATTERNS in AFFECTED_FILES first + 2. Verify fix is applied at call site (not definition-only) + 3. Do NOT force a vulnerable-pattern search in this case IMPORTANT: Do NOT stop after finding the first file. Check ALL AFFECTED_FILES. PHASE 3 - VERDICT: @@ -384,7 +413,9 @@ - Evidence is sufficient for confident verdict **VERSION-BASED FALLBACK (when code search is inconclusive):** - If your code searches found NO conclusive evidence (no vulnerable pattern, no fix pattern): + If your code searches found NO conclusive evidence: + - CASE A: no vulnerable pattern and no fix pattern found + - CASE B: no fix pattern or no fix call-site verification found - Check TARGET_IN_VULNERABLE_RANGE in VULNERABILITY_INTEL - If TARGET_IN_VULNERABLE_RANGE: YES and no fix was verified in code: → Conclude VULNERABLE based on version evidence @@ -400,11 +431,12 @@ 4. Source Grep: use query field with pattern from VULNERABILITY_INTEL (function name, variable, or code snippet). 5. Code Keyword Search: use query field for broader searches. 6. Do NOT call the same tool with the same input twice - CHECK KNOWLEDGE for prior calls. -7. FIRST search for VULNERABLE code - it SHOULD exist in target. -8. THEN search for FIX code - it should NOT exist in target. -9. If a pattern contains special regex characters, escape them or use literal substrings. -10. Before PATCHED: verify FIX_APPLIED_AT_CALL_SITE in ALL AFFECTED_FILES. FIX_DEFINITION_FOUND alone is insufficient. -11. Fix must be CALLED and result USED (assigned/in condition). Called but unused = not applied. +7. IF VULNERABLE_PATTERNS is non-empty: search VULNERABLE code first. +8. IF VULNERABLE_PATTERNS is empty: search FIX_PATTERNS first in AFFECTED_FILES. +9. Do NOT classify as vulnerable solely because VULNERABLE_PATTERNS is empty. +10. If a pattern contains special regex characters, escape them or use literal substrings. +11. Before PATCHED: verify FIX_APPLIED_AT_CALL_SITE in ALL AFFECTED_FILES. FIX_DEFINITION_FOUND alone is insufficient. +12. Fix must be CALLED and result USED (assigned/in condition). Called but unused = not applied. @@ -424,6 +456,9 @@ {{"thought": "No prior searches in KNOWLEDGE. Search for the vulnerable code pattern from the patch", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Locate vulnerable code that should exist in unpatched target"}}, "final_answer": null}} + +{{"thought": "VULNERABLE_PATTERNS is empty. Start with FIX_PATTERNS in affected files.", "mode": "act", "actions": {{"tool": "Source Grep", "query": ",", "reason": "CASE B: verify additive fix markers are present at call site"}}, "final_answer": null}} + {{"thought": "KNOWLEDGE shows function found at iso9660.c:2074. Now verify the fix is NOT present", "mode": "act", "actions": {{"tool": "Source Grep", "query": "", "reason": "Check if fix code is absent (confirms vulnerability)"}}, "final_answer": null}} @@ -442,10 +477,96 @@ {{"thought": "FIX_DEFINITION_FOUND but no call site evidence", "mode": "finish", "actions": null, "final_answer": "UNCERTAIN - fix function exists at [file:line] but usage in AFFECTED_FILES unverified. Manual review required."}} + +{{"thought": "VULNERABLE_PATTERNS is empty and fix application is unverified", "mode": "finish", "actions": null, "final_answer": "UNCERTAIN - vulnerable patterns were unavailable and fix call-site evidence is insufficient. Manual review required."}} + {{"thought": "Code searches found no vulnerable or fix patterns - affected module not located in source. However, TARGET_IN_VULNERABLE_RANGE is YES. No fix was verified in code. Version evidence indicates vulnerability.", "mode": "finish", "actions": null, "final_answer": "VULNERABLE (version-based). Target version is within the affected range per VULNERABILITY_INTEL. Code search could not locate the affected module or verify a fix. Based on version evidence and absence of verified fix, the package is vulnerable."}} """ +L1_AGENT_THOUGHT_UPSTREAM_CASE_B_INSTRUCTIONS = """ +You will receive KNOWLEDGE (cumulative findings) and LATEST FINDINGS (most recent tool results). +BEFORE ACTING, you MUST: +1. Review KNOWLEDGE to see what tools were already called (TOOL_CALL_RECORD entries) +2. Review LATEST FINDINGS for the most recent tool output analysis +3. NEVER repeat any action already in TOOL_CALL_RECORD +4. Your next action MUST build on findings - progress the investigation + + + +PHASE 1 - INTELLIGENCE (PRE-COMPLETED): + Review VULNERABILITY_INTEL above. It contains: + - AFFECTED_FILES: Files to verify + - FIX_PATTERNS: Code patterns indicating the fix + - SEARCH_KEYWORDS: Terms to grep for + +PHASE 2 - SOURCE CODE INSPECTION (CASE B ONLY): + VULNERABLE_PATTERNS is empty: + 1. Search FIX_PATTERNS in AFFECTED_FILES first + 2. Verify fix is applied at call site (not definition-only) + 3. Do NOT force a vulnerable-pattern search in this case + IMPORTANT: Do NOT stop after finding the first file. Check ALL AFFECTED_FILES. + +PHASE 3 - VERDICT: + Only conclude when: + - ALL AFFECTED_FILES have been searched + - Evidence is sufficient for confident verdict + + **VERSION-BASED FALLBACK (when code search is inconclusive):** + If code searches found no fix pattern or no fix call-site verification: + - Check TARGET_IN_VULNERABLE_RANGE in VULNERABILITY_INTEL + - If TARGET_IN_VULNERABLE_RANGE: YES and no fix was verified in code: + → Conclude VULNERABLE based on version evidence + → Reason: "Target version is within affected range, and no fix was verified in code." + + + +1. You MUST select a tool ONLY from . Do NOT invent or use any other tool names. +2. Output valid JSON only. thought < 100 words. final_answer < 150 words. +3. mode="act" REQUIRES actions. mode="finish" REQUIRES final_answer. +4. Source Grep: use query field with pattern from VULNERABILITY_INTEL (function name, variable, or code snippet). +5. Code Keyword Search: use query field for broader searches. +6. Do NOT call the same tool with the same input twice - CHECK KNOWLEDGE for prior calls. +7. FIRST action in this mode MUST search FIX_PATTERNS in AFFECTED_FILES. +8. Do NOT perform vulnerable-pattern-first search in this mode. +9. If a pattern contains special regex characters, escape them or use literal substrings. +10. Before PATCHED: verify FIX_APPLIED_AT_CALL_SITE in ALL AFFECTED_FILES. FIX_DEFINITION_FOUND alone is insufficient. +11. Fix must be CALLED and result USED (assigned/in condition). Called but unused = not applied. + + + +If a search returned results: +- Narrow down by searching within that specific file (e.g., "pattern,filename.c") +- Search for related symbols or variables from the code found +If a pattern wasn't found: +- Try simpler substrings or partial patterns +- Try a different tool (Source Grep <-> Code Keyword Search) +- Search for file paths from VULNERABILITY_INTEL AFFECTED_FILES +If KNOWLEDGE shows partial evidence: +- Investigate other files mentioned in VULNERABILITY_INTEL AFFECTED_FILES +- Search for key variables from the fix pattern +If FIX_DEFINITION_FOUND: search AFFECTED_FILES for actual usage before concluding PATCHED. + + + +{{"thought": "No prior searches in KNOWLEDGE. CASE B requires fix-first verification in affected file.", "mode": "act", "actions": {{"tool": "Source Grep", "query": ",", "reason": "CASE B: verify whether fix marker exists in affected file"}}, "final_answer": null}} + + +{{"thought": "Fix marker search completed for one file. Continue to remaining affected files and verify call-site usage where found.", "mode": "act", "actions": {{"tool": "Source Grep", "query": ",", "reason": "Ensure all affected files are checked before verdict"}}, "final_answer": null}} + + +{{"thought": "Fix was verified at call site in affected code", "mode": "finish", "actions": null, "final_answer": "PATCHED via rebase. Fix pattern is present and applied at call site in affected files."}} + + +{{"thought": "Fix not verified in code and target remains in affected version range", "mode": "finish", "actions": null, "final_answer": "VULNERABLE (version-based). Target version is within the affected range and no fix was verified at call site in affected files."}} +""" + +def select_upstream_prompt_and_instructions(vulnerable_patterns: list[str]) -> tuple[str, str]: + """Select upstream patch prompts based on vulnerable-pattern availability.""" + if vulnerable_patterns: + return L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH, L1_AGENT_THOUGHT_UPSTREAM_INSTRUCTIONS + return L1_AGENT_SYS_PROMPT_UPSTREAM_PATCH_CASE_B, L1_AGENT_THOUGHT_UPSTREAM_CASE_B_INSTRUCTIONS + L1_AGENT_THOUGHT_REBASE_INSTRUCTIONS = """ You will receive KNOWLEDGE (cumulative findings) and LATEST FINDINGS (most recent tool results). BEFORE ACTING, you MUST: @@ -653,6 +774,54 @@ {{"thought": "KNOWLEDGE shows insufficient evidence AND TARGET_IN_VULNERABLE_RANGE is NO/UNKNOWN - cannot determine patch status from version alone", "mode": "finish", "actions": null, "final_answer": "INCONCLUSIVE. Could not find definitive evidence of fix or vulnerability, and version range is unknown. Manual review recommended."}} """ +# --------------------------------------------------------------------------- +# L1 Empty Result Classification Prompt +# --------------------------------------------------------------------------- + +L1_EMPTY_RESULT_CLASSIFICATION_PROMPT = """Classify an empty grep result for CVE patch verification. + +The grep search returned NO MATCHES. Determine what this means for patch verification. + +TOOL USED: {tool_used} +AGENT THOUGHT (why the search was performed): +{last_thought} + +SEARCHED PATTERN: {tool_input} + +RAW_PATCH_DIFF (for reference): +{raw_patch_diff} + +CLASSIFICATION RULES: +1. Read AGENT THOUGHT to understand what the agent was searching for +2. Determine if the agent was searching for VULNERABLE code or FIX code: + - Keywords like "verify vulnerable", "check if vulnerable", "removed", "absent" → searching for VULNERABLE code + - Keywords like "search for fix", "find fix", "added", "patched" → searching for FIX code +3. Classify the empty result: + - Searching for VULNERABLE code + NOT FOUND → VULNERABLE_CODE_ABSENT (good - fix removed it) + - Searching for FIX code + NOT FOUND → FIX_CODE_ABSENT (bad - fix not applied) + - Intent unclear → INCONCLUSIVE + +OUTPUT FORMAT (JSON - CodeFindings): +{{ + "findings": [": not found - "], + "tool_outcome": "{tool_used} [{tool_input}] -> NO MATCHES ()" +}} + +CLASSIFICATION VALUES for findings[0]: +- VULNERABLE_CODE_ABSENT: Agent searched for vulnerable code, it's not found = fix likely applied +- FIX_CODE_ABSENT: Agent searched for fix code, it's not found = fix NOT applied +- INCONCLUSIVE: Cannot determine search intent from agent thought + +EXAMPLE OUTPUTS: +If searching for vulnerable code: +{{"findings": ["VULNERABLE_CODE_ABSENT: switch (data->verdict.code & NF_VERDICT_MASK) not found - vulnerable pattern removed by fix"], "tool_outcome": "{tool_used} [pattern] -> NO MATCHES (vulnerable code absent = fix applied)"}} + +If searching for fix code: +{{"findings": ["FIX_CODE_ABSENT: safe_function() not found - fix pattern missing"], "tool_outcome": "{tool_used} [pattern] -> NO MATCHES (fix code absent = fix NOT applied)"}} + +RESPONSE: +{{""" + # --------------------------------------------------------------------------- # L1 Observation Prompts (Comprehension + Memory Update) # --------------------------------------------------------------------------- diff --git a/src/vuln_analysis/utils/tests/test_vulnerability_intel_sanitizer.py b/src/vuln_analysis/utils/tests/test_vulnerability_intel_sanitizer.py index d98ccbc35..5185ccacf 100644 --- a/src/vuln_analysis/utils/tests/test_vulnerability_intel_sanitizer.py +++ b/src/vuln_analysis/utils/tests/test_vulnerability_intel_sanitizer.py @@ -5,7 +5,7 @@ from exploit_iq_commons.data_models.checker_status import VulnerabilityIntel -from vuln_analysis.functions.code_agent_graph_defs import ParsedPatch, PatchFile +from vuln_analysis.functions.code_agent_graph_defs import ParsedPatch, PatchFile, PatchHunk from vuln_analysis.utils.vulnerability_intel_sanitizer import VulnerabilityIntelSanitizer @@ -22,6 +22,58 @@ def _patch_with_util_c() -> ParsedPatch: ) +def _additive_only_patch() -> ParsedPatch: + return ParsedPatch( + patch_filename="additive.patch", + files=[ + PatchFile( + source_path="a/net/sched/act_ct.c", + target_path="b/net/sched/act_ct.c", + hunks=[ + PatchHunk( + source_start=100, + source_length=0, + target_start=100, + target_length=4, + section_header="tcf_ct_init", + context_lines=[], + removed_lines=[], + added_lines=[ + "if (bind && !(flags & TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT)) {", + "return -EOPNOTSUPP;", + "}", + ], + ) + ], + ) + ], + ) + + +def _patch_with_removed_lines() -> ParsedPatch: + return ParsedPatch( + patch_filename="mixed.patch", + files=[ + PatchFile( + source_path="a/foo.c", + target_path="b/foo.c", + hunks=[ + PatchHunk( + source_start=10, + source_length=1, + target_start=10, + target_length=1, + section_header="foo", + context_lines=[], + removed_lines=["unsafe_call();"], + added_lines=["safe_call();"], + ) + ], + ) + ], + ) + + class TestSanitizeAffectedFiles: def test_clears_affected_files_when_no_patch(self): raw = VulnerabilityIntel(affected_files=["generator.c", "tar/util.c"]) @@ -83,6 +135,32 @@ def test_keeps_keyword_with_and(self): assert result.search_keywords == ["foo AND bar"] +class TestSanitizeAdditiveOnlyPatchIntel: + def test_clears_vulnerable_fields_for_additive_only_patch(self): + raw = VulnerabilityIntel( + vulnerable_functions=["classify"], + vulnerable_variables=["skb"], + vulnerable_patterns=["TC_ACT_CONSUMED"], + fix_patterns=["TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT"], + ) + result = VulnerabilityIntelSanitizer(_additive_only_patch()).apply(raw) + assert result.vulnerable_functions == [] + assert result.vulnerable_variables == [] + assert result.vulnerable_patterns == [] + assert result.fix_patterns == ["TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT"] + + def test_keeps_vulnerable_fields_when_patch_has_removed_lines(self): + raw = VulnerabilityIntel( + vulnerable_functions=["classify"], + vulnerable_variables=["skb"], + vulnerable_patterns=["TC_ACT_CONSUMED"], + ) + result = VulnerabilityIntelSanitizer(_patch_with_removed_lines()).apply(raw) + assert result.vulnerable_functions == ["classify"] + assert result.vulnerable_variables == ["skb"] + assert result.vulnerable_patterns == ["TC_ACT_CONSUMED"] + + class TestRsyncStyleNoPatch: def test_strips_hallucinated_paths_and_prose(self): raw = VulnerabilityIntel( diff --git a/src/vuln_analysis/utils/token_utils.py b/src/vuln_analysis/utils/token_utils.py index 3624e3e80..fb29bbf6b 100644 --- a/src/vuln_analysis/utils/token_utils.py +++ b/src/vuln_analysis/utils/token_utils.py @@ -112,3 +112,48 @@ def truncate_tool_output(tool_output: str, tool_name: str, max_tokens: int = 400 tail_tokens += lt truncated = token_count - head_tokens - tail_tokens return '\n'.join(head_lines) + f"\n[... truncated {truncated} tokens ...]\n" + '\n'.join(tail_lines) + + +def truncate_tool_output_list( + tool_output: str, + tool_name: str, + max_tokens: int = 1000, + max_chunks: int = 2 +) -> list[str]: + """Split tool output into chunks, each up to max_tokens. Returns up to max_chunks. + + Use this when you want to process large outputs in multiple passes rather than + truncating to a single smaller output. Each chunk can be processed separately + and findings merged. + + Calls truncate_tool_output internally to apply tool-specific truncation logic. + """ + total_tokens = count_tokens(tool_output) + if total_tokens <= max_tokens: + return [truncate_tool_output(tool_output, tool_name, max_tokens)] + + # Split by lines and create raw chunks + lines = tool_output.split('\n') + chunks = [] + current_lines = [] + current_tokens = 0 + + for line in lines: + line_tokens = count_tokens(line) + if current_tokens + line_tokens > max_tokens and current_lines: + # Apply tool-specific truncation to each chunk + raw_chunk = '\n'.join(current_lines) + chunks.append(truncate_tool_output(raw_chunk, tool_name, max_tokens)) + if len(chunks) >= max_chunks: + break + current_lines = [] + current_tokens = 0 + current_lines.append(line) + current_tokens += line_tokens + + # Handle remaining lines as final chunk + if current_lines and len(chunks) < max_chunks: + raw_chunk = '\n'.join(current_lines) + chunks.append(truncate_tool_output(raw_chunk, tool_name, max_tokens)) + + return chunks if chunks else [truncate_tool_output(tool_output, tool_name, max_tokens)] diff --git a/src/vuln_analysis/utils/vex/implementations/csaf_generator.py b/src/vuln_analysis/utils/vex/implementations/csaf_generator.py index 01cb03d05..7078f7ece 100644 --- a/src/vuln_analysis/utils/vex/implementations/csaf_generator.py +++ b/src/vuln_analysis/utils/vex/implementations/csaf_generator.py @@ -27,7 +27,7 @@ from vuln_analysis.data_models.state import ExploitIqEngineState from ..vex_generator_base import VexGenerator -from ..vex_utils import get_vex_validator, build_patch_recommendation +from ..vex_utils import build_oci_image_purl, get_vex_validator, build_patch_recommendation from csaf.generator import CSAFGenerator from exploit_iq_commons.logging.loggers_factory import LoggingFactory @@ -72,6 +72,22 @@ # Justification labels JUSTIFICATION_LABEL_VULNERABLE = "vulnerable" +# ExploitIQ justification labels mapped to CSAF 2.0 VEX flag labels +EXPLOITIQ_TO_CSAF_JUSTIFICATION_MAP: dict[str, str] = { + "false_positive": "component_not_present", + "code_not_present": "vulnerable_code_not_present", + "code_not_reachable": "vulnerable_code_not_in_execute_path", + "requires_configuration": "vulnerable_code_cannot_be_controlled_by_adversary", + "requires_dependency": "component_not_present", + "requires_environment": "vulnerable_code_cannot_be_controlled_by_adversary", + "protected_by_compiler": "inline_mitigations_already_exist", + "protected_at_runtime": "inline_mitigations_already_exist", + "protected_at_perimeter": "vulnerable_code_cannot_be_controlled_by_adversary", + "protected_by_mitigating_control": "inline_mitigations_already_exist", + "uncertain": "component_not_present", +} +DEFAULT_CSAF_JUSTIFICATION = "component_not_present" + # Vulnerability statuses STATUS_KNOWN_AFFECTED = "known_affected" STATUS_KNOWN_NOT_AFFECTED = "known_not_affected" @@ -91,6 +107,12 @@ CSAF_SCHEMA_PATH = Path(__file__).resolve().parents[3] / "configs" / "vex" / "csaf" / "v2.0" / "csaf_json_schema.json" +def _map_justification_to_csaf_label(exploitiq_label: str | None) -> str: + if not exploitiq_label: + return DEFAULT_CSAF_JUSTIFICATION + return EXPLOITIQ_TO_CSAF_JUSTIFICATION_MAP.get(exploitiq_label, DEFAULT_CSAF_JUSTIFICATION) + + def _enrich_vulnerabilities_with_notes( csaf_json: Dict[str, Any], intel_map: Dict[str, CveIntel], @@ -163,6 +185,25 @@ def _enrich_vulnerabilities_with_notes( v["notes"] = notes +def _enrich_product_tree_with_purl(csaf_json: Dict[str, Any], purl: str | None) -> None: + """Add product_identification_helper.purl to each product in the product tree.""" + if not purl: + return + + def visit(obj: Any) -> None: + if isinstance(obj, dict): + if "product_id" in obj and "name" in obj: + helper = obj.setdefault("product_identification_helper", {}) + helper["purl"] = purl + for value in obj.values(): + visit(value) + elif isinstance(obj, list): + for item in obj: + visit(item) + + visit(csaf_json.get("product_tree", {})) + + class CsafVexGenerator(VexGenerator): """ CSAF VEX generator. Builds a CSAF JSON document and validates it with the csaf-tool. @@ -203,8 +244,10 @@ def generate(self, state: ExploitIqEngineState) -> Dict[str, Any]: ci = intel_map.get(vuln_id) impact = ci.rhsa.threat_severity if ci and ci.rhsa and ci.rhsa.threat_severity else DEFAULT_IMPACT - is_vulnerable = justification.get("justification_label") == JUSTIFICATION_LABEL_VULNERABLE - + justification_label = justification.get("justification_label") + is_vulnerable = justification_label == JUSTIFICATION_LABEL_VULNERABLE + csaf_justification = _map_justification_to_csaf_label(justification_label) + if is_vulnerable: patch_recommendation = build_patch_recommendation(ci, sbom_names) comment = ( @@ -223,7 +266,7 @@ def generate(self, state: ExploitIqEngineState) -> Dict[str, Any]: action=comment ) - else: + else: csaf_gen.add_vulnerability( product_name=product_name, release=product_tag, @@ -231,6 +274,7 @@ def generate(self, state: ExploitIqEngineState) -> Dict[str, Any]: status=STATUS_KNOWN_NOT_AFFECTED, description="", comment=impact, + justification=csaf_justification, ) csaf_gen.generate_csaf() @@ -244,9 +288,13 @@ def generate(self, state: ExploitIqEngineState) -> Dict[str, Any]: csaf_json = json.load(f) # Enrich the CSAF in memory - _enrich_vulnerabilities_with_notes( - csaf_json, intel_map, state.final_summaries, state.justifications - ) + image = message.input.image + if image.analysis_type == "image": + product_purl = build_oci_image_purl(image.name, image.tag, image.digest) + _enrich_product_tree_with_purl(csaf_json, product_purl) + _enrich_vulnerabilities_with_notes( + csaf_json, intel_map, state.final_summaries, state.justifications + ) # Validate the CSAF document against the JSON schema errors = list(get_vex_validator(CSAF_SCHEMA_PATH).iter_errors(csaf_json)) diff --git a/src/vuln_analysis/utils/vex/tests/test_csaf_generator_integration.py b/src/vuln_analysis/utils/vex/tests/test_csaf_generator_integration.py index 096577320..f8bdf4161 100644 --- a/src/vuln_analysis/utils/vex/tests/test_csaf_generator_integration.py +++ b/src/vuln_analysis/utils/vex/tests/test_csaf_generator_integration.py @@ -36,6 +36,7 @@ from vuln_analysis.data_models.state import ExploitIqEngineState from vuln_analysis.utils.vex.implementations.csaf_generator import CsafVexGenerator from vuln_analysis.utils.vex.vex_generator_loader import load_vex_generator +from vuln_analysis.utils.vex.vex_utils import build_oci_image_purl _DEFAULT_SOURCE_INFO = [ @@ -161,6 +162,42 @@ def test_product_tree_contains_product(self, mock_state): product_tree = result["product_tree"] assert _DEFAULT_PRODUCT_NAME in product_tree.get("branches")[0].get("branches")[0].get("name") assert _DEFAULT_PRODUCT_TAG in product_tree.get("branches")[0].get("branches")[0].get("branches")[0].get("name") + + def test_product_tree_includes_oci_purl(self, mock_state): + """Test that product tree includes product_identification_helper with OCI purl.""" + generator = CsafVexGenerator() + result = generator.generate(mock_state) + + product = ( + result["product_tree"] + .get("branches")[0] + .get("branches")[0] + .get("branches")[0] + .get("product") + ) + helper = product.get("product_identification_helper", {}) + assert helper.get("purl") == build_oci_image_purl(_DEFAULT_PRODUCT_NAME, _DEFAULT_PRODUCT_TAG) + + def test_product_tree_purl_prefers_digest_over_tag(self): + """Test that explicit digest is used in purl instead of tag.""" + oci_digest = "sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890" + state = create_mock_state(product_tag="v1.0.0") + state.original_input.input.image.digest = oci_digest + + generator = CsafVexGenerator() + result = generator.generate(state) + + product = ( + result["product_tree"] + .get("branches")[0] + .get("branches")[0] + .get("branches")[0] + .get("product") + ) + helper = product.get("product_identification_helper", {}) + assert helper.get("purl") == build_oci_image_purl( + _DEFAULT_PRODUCT_NAME, "v1.0.0", oci_digest + ) def test_vulnerable_cve_has_known_affected_status(self, mock_state): """Test that vulnerable CVEs get 'known_affected' status.""" @@ -184,6 +221,26 @@ def test_not_vulnerable_cve_has_known_not_affected_status(self): product_status = vuln.get("product_status", {}) assert "known_not_affected" in product_status + def test_code_not_reachable_maps_to_csaf_execute_path_flag(self): + """Test that code_not_reachable maps to the CSAF execute-path flag.""" + state = create_mock_state( + justification={ + "justification": "Vulnerable function exists but is not called.", + "justification_label": "code_not_reachable", + }, + ) + + generator = CsafVexGenerator() + result = generator.generate(state) + + vuln = result["vulnerabilities"][0] + assert vuln["flags"][0]["label"] == "vulnerable_code_not_in_execute_path" + label_notes = [ + n for n in vuln.get("notes", []) + if n.get("title") == "ExploitIQ Analysis Justification Label" + ] + assert label_notes[0]["text"] == "code_not_reachable" + def test_vulnerable_cve_includes_remediation(self): """Test that vulnerable CVEs include remediation information when patch is available.""" ghsa = CveIntelGhsa( diff --git a/src/vuln_analysis/utils/vex/vex_utils.py b/src/vuln_analysis/utils/vex/vex_utils.py index e27bf227d..f41c92048 100644 --- a/src/vuln_analysis/utils/vex/vex_utils.py +++ b/src/vuln_analysis/utils/vex/vex_utils.py @@ -20,9 +20,11 @@ from pathlib import Path from jsonschema import Draft202012Validator +from packageurl import PackageURL from exploit_iq_commons.data_models.cve_intel import CveIntel from exploit_iq_commons.logging.loggers_factory import LoggingFactory +from urllib.parse import urlparse logger = LoggingFactory.get_agent_logger(__name__) @@ -58,6 +60,56 @@ def get_patched_package(vuln: dict) -> tuple[str | None, str | None]: return pkg.get("name"), vuln.get("first_patched_version") +def build_oci_image_purl( + image_name: str, + tag: str | None = None, + digest: str | None = None, +) -> str | None: + """ + Build an OCI package URL (purl) for a container image. + + Prefers an explicit digest, then falls back to the image tag. + """ + image_path = image_name + parsed = urlparse(f"//{image_path}") + registry = parsed.netloc + # qualifiers include registry and full name which all already exist in image_path + qualifiers = {"repository_url": image_path} if image_path else {} + path_parts = [part for part in parsed.path.strip("/").split("/") if part] + if path_parts: + if len(path_parts) > 1: + name = path_parts[-1] + namespace = "/".join(path_parts[:-1]) + else: + name = path_parts[0] + namespace = None + elif parsed.netloc: + name = parsed.netloc + namespace = None + else: + name = image_path + namespace = None + + version = digest or tag + # oci purl specification required to emit namespace, therefor it is set to None + purl = PackageURL( + type="oci", + namespace=None, + name=name, + version=version, + qualifiers=qualifiers if qualifiers else None, + ) + logger.debug( + "Building OCI image purl components: registry=%s, qualifiers=%s, name=%s, version=%s", + registry, + qualifiers, + name, + version, + ) + logger.debug("Resulting OCI image purl: %s", purl.to_string()) + return purl.to_string() + + def build_patch_recommendation(ci: CveIntel, sbom_package_names: set[str] | None) -> str: """ Build a patch recommendation string from GHSA data. @@ -93,4 +145,3 @@ def build_patch_recommendation(ci: CveIntel, sbom_package_names: set[str] | None if not name_to_version: return "" return ", ".join(f"{name}:{patch}" for name, patch in name_to_version.items()) - diff --git a/src/vuln_analysis/utils/vulnerability_intel_sanitizer.py b/src/vuln_analysis/utils/vulnerability_intel_sanitizer.py index a703d0e96..99632b919 100644 --- a/src/vuln_analysis/utils/vulnerability_intel_sanitizer.py +++ b/src/vuln_analysis/utils/vulnerability_intel_sanitizer.py @@ -27,6 +27,23 @@ def _has_boolean_operator(keyword: str) -> bool: return bool(_BOOLEAN_OP_RE.search(keyword)) +def _patch_line_presence(parsed_patch: ParsedPatch) -> tuple[bool, bool]: + """Return (has_removed_lines, has_added_lines) across all hunks.""" + has_removed_lines = False + has_added_lines = False + + for patch_file in parsed_patch.files: + for hunk in patch_file.hunks: + if hunk.removed_lines: + has_removed_lines = True + if hunk.added_lines: + has_added_lines = True + if has_removed_lines and has_added_lines: + return has_removed_lines, has_added_lines + + return has_removed_lines, has_added_lines + + class VulnerabilityIntelSanitizer: """Apply shape rules to L1 VulnerabilityIntel; extensible one method per rule.""" @@ -37,8 +54,17 @@ def __init__(self, parsed_patch: ParsedPatch | None = None) -> None: def _has_trusted_patch(self) -> bool: return self._parsed_patch is not None and bool(self._parsed_patch.files) + @property + def _is_additive_only_patch(self) -> bool: + if not self._has_trusted_patch: + return False + + has_removed_lines, has_added_lines = _patch_line_presence(self._parsed_patch) + return has_added_lines and not has_removed_lines + def apply(self, intel: VulnerabilityIntel) -> VulnerabilityIntel: intel = self.sanitize_affected_files(intel) + intel = self.sanitize_additive_only_patch_intel(intel) intel = self.filter_vulnerable_functions(intel) return self.filter_search_keywords(intel) @@ -59,6 +85,19 @@ def filter_vulnerable_functions(self, intel: VulnerabilityIntel) -> Vulnerabilit kept = [name for name in intel.vulnerable_functions if " " not in name] return intel.model_copy(update={"vulnerable_functions": kept}) + def sanitize_additive_only_patch_intel(self, intel: VulnerabilityIntel) -> VulnerabilityIntel: + """Drop vulnerable-side intel when patch has only added lines.""" + if not self._is_additive_only_patch: + return intel + + return intel.model_copy( + update={ + "vulnerable_functions": [], + "vulnerable_variables": [], + "vulnerable_patterns": [], + } + ) + def filter_search_keywords(self, intel: VulnerabilityIntel) -> VulnerabilityIntel: kept = [ kw From 9f36af918f832e517e046eba8a9c736dd578754a Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Sun, 28 Jun 2026 11:59:49 +0300 Subject: [PATCH 14/21] fix image path --- .tekton/on-cm-runner.yaml | 2 +- .tekton/on-pull-request.yaml | 2 +- .tekton/on-push.yaml | 2 +- .tekton/on-tag.yaml | 2 +- kustomize/base/exploit_iq_service.yaml | 4 ++-- kustomize/base/kustomization.yaml | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.tekton/on-cm-runner.yaml b/.tekton/on-cm-runner.yaml index 439d6114e..87a109717 100644 --- a/.tekton/on-cm-runner.yaml +++ b/.tekton/on-cm-runner.yaml @@ -26,7 +26,7 @@ spec: value: "{{ trigger_comment }}" # Point to the image ALREADY built by the PR pipeline - name: target-image - value: quay.io/ecosystem-appeng/agent-morpheus-rh:on-pr-{{revision}} + value: quay.io/ecosystem-appeng/exploit-iq-agent:on-pr-{{revision}} pipelineSpec: params: diff --git a/.tekton/on-pull-request.yaml b/.tekton/on-pull-request.yaml index e72258311..d2e3b8340 100644 --- a/.tekton/on-pull-request.yaml +++ b/.tekton/on-pull-request.yaml @@ -33,7 +33,7 @@ spec: - name: image-expires-after value: 5d - name: output-image - value: quay.io/ecosystem-appeng/agent-morpheus-rh:on-pr-{{revision}} + value: quay.io/ecosystem-appeng/exploit-iq-agent:on-pr-{{revision}} - name: path-context value: . - name: dockerfile diff --git a/.tekton/on-push.yaml b/.tekton/on-push.yaml index 5da2cc106..71316d17f 100644 --- a/.tekton/on-push.yaml +++ b/.tekton/on-push.yaml @@ -26,7 +26,7 @@ spec: - name: revision value: "{{ revision }}" - name: output-image - value: quay.io/ecosystem-appeng/agent-morpheus-rh:latest + value: quay.io/ecosystem-appeng/exploit-iq-agent:latest - name: path-context value: . - name: dockerfile diff --git a/.tekton/on-tag.yaml b/.tekton/on-tag.yaml index 08718fd32..c8721f1c1 100644 --- a/.tekton/on-tag.yaml +++ b/.tekton/on-tag.yaml @@ -26,7 +26,7 @@ spec: - name: revision value: "{{ revision }}" - name: output-image - value: 'quay.io/ecosystem-appeng/agent-morpheus-rh' + value: 'quay.io/ecosystem-appeng/exploit-iq-agent' - name: tag-name value: "{{ target_branch }}" - name: path-context diff --git a/kustomize/base/exploit_iq_service.yaml b/kustomize/base/exploit_iq_service.yaml index 2f99c7411..46c36f43f 100644 --- a/kustomize/base/exploit_iq_service.yaml +++ b/kustomize/base/exploit_iq_service.yaml @@ -25,7 +25,7 @@ spec: serviceAccountName: exploit-iq-sa containers: - name: exploit-iq-phoenix-tracing - image: quay.io/ecosystem-appeng/agent-morpheus-rh:nat + image: quay.io/ecosystem-appeng/exploit-iq-agent:nat imagePullPolicy: Always workingDir: /workspace/ args: @@ -45,7 +45,7 @@ spec: memory: "1Gi" cpu: "100m" - name: exploit-iq - image: quay.io/ecosystem-appeng/agent-morpheus-rh:nat + image: quay.io/ecosystem-appeng/exploit-iq-agent:nat imagePullPolicy: Always workingDir: /workspace/ args: diff --git a/kustomize/base/kustomization.yaml b/kustomize/base/kustomization.yaml index 6837ed8c7..be4dfa5c5 100644 --- a/kustomize/base/kustomization.yaml +++ b/kustomize/base/kustomization.yaml @@ -80,7 +80,7 @@ configMapGenerator: options: disableNameSuffixHash: true images: - - name: quay.io/ecosystem-appeng/agent-morpheus-rh + - name: quay.io/ecosystem-appeng/exploit-iq-agent newTag: latest - name: quay.io/ecosystem-appeng/exploit-iq-client From 014f8a662a30a117c0331549663c455870e27aac Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Sun, 28 Jun 2026 13:55:28 +0300 Subject: [PATCH 15/21] fixes --- kustomize/README.md | 28 +++++++++---------- kustomize/base/exploit_iq_client.yaml | 4 +-- kustomize/deployer-rbac.yaml | 14 +++++----- .../exploit-iq-client-batch-patch.yaml | 4 +-- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/kustomize/README.md b/kustomize/README.md index 7ce49156d..92a62c139 100644 --- a/kustomize/README.md +++ b/kustomize/README.md @@ -16,7 +16,7 @@ limitations under the License. --> -# Deploying Exploit Intelligence on OpenShift Container Platform +# Deploying ExploitIQ on OpenShift Container Platform ## Prerequisites @@ -119,14 +119,14 @@ EOF ### Step 5. Configure OAuth Credentials -Exploit Intelligence uses OpenShift OAuth for user authentication. The OAuth client secret must be at least 32 bytes (256 bits) because Exploit Intelligence uses it to sign internal session tokens with HS256, which requires a minimum key length of 256 bits. +ExploitIQ uses OpenShift OAuth for user authentication. The OAuth client secret must be at least 32 bytes (256 bits) because ExploitIQ uses it to sign internal session tokens with HS256, which requires a minimum key length of 256 bits. > [!IMPORTANT] > Save the value of `$OAUTH_CLIENT_SECRET` after running the commands below. You need it after deployment to create or update the `OAuthClient` resource. #### First-Time Deployment -Use this procedure only if no `OAuthClient` named `exploit-iq-client` exists on the cluster. If another Exploit Intelligence installation already uses that `OAuthClient`, you must use the [Reusing an Existing OAuthClient](#reusing-an-existing-oauthclient) procedure instead — generating a new secret overwrites the existing one and breaks authentication for all users of that installation. +Use this procedure only if no `OAuthClient` named `exploit-iq-client` exists on the cluster. If another ExploitIQ installation already uses that `OAuthClient`, you must use the [Reusing an Existing OAuthClient](#reusing-an-existing-oauthclient) procedure instead — generating a new secret overwrites the existing one and breaks authentication for all users of that installation. Verify that the `OAuthClient` does not exist before proceeding: @@ -186,7 +186,7 @@ find . -type f -name 'exploit-iq-config.yml' -exec sed -i "s|CALLBACK_URL_PLACEH ## Selecting a Deployment Variant -Exploit Intelligence supports the following deployment variants. Run only one deployment command in the next section. +ExploitIQ supports the following deployment variants. Run only one deployment command in the next section. | Variant | Overlay | LLM | Use When | | --- | --- | --- | --- | @@ -196,7 +196,7 @@ Exploit Intelligence supports the following deployment variants. Run only one de --- -## Deploying Exploit Intelligence +## Deploying ExploitIQ ### Deploy with a Self-Hosted LLM @@ -215,7 +215,7 @@ sed -i "s/REPLACE_NAMESPACE/$YOUR_NAMESPACE_NAME/" overlays/mlops/grafana/kustom sed -i "s/REPLACE_NAMESPACE/$YOUR_NAMESPACE_NAME/" overlays/mlops/tempo/kustomization.yaml ``` -Create the Grafana token secret. Retrieve the token value from the Bitwarden vault entry **Exploit Intelligence Grafana SA Token**: +Create the Grafana token secret. Retrieve the token value from the Bitwarden vault entry **ExploitIQ Grafana SA Token**: ```shell oc create secret generic grafana-bearer-token \ @@ -256,7 +256,7 @@ oc kustomize overlays/remote-nim-all | oc apply -f - -n $YOUR_NAMESPACE_NAME ### Configure OpenShift OAuth > [!WARNING] -> Complete this step before attempting to log in to the Exploit Intelligence UI. Authentication fails if the `OAuthClient` resource is not configured correctly. +> Complete this step before attempting to log in to the ExploitIQ UI. Authentication fails if the `OAuthClient` resource is not configured correctly. After the deployment completes and the `exploit-iq-client` route is available, configure the OpenShift OAuth client. Select the procedure that matches your situation. @@ -291,9 +291,9 @@ oc patch oauthclient exploit-iq-client \ -p '{"redirectURIs":["http://exploit-iq-client:8080","'$HTTP_ROUTE'","'$HTTPS_ROUTE'"]}' ``` -### Grant Users Access to the Exploit Intelligence UI +### Grant Users Access to the ExploitIQ UI -Access to the Exploit Intelligence UI is controlled by OpenShift group membership. Add users to the `exploit-iq-view` group to grant UI access. Create the group if it does not exist: +Access to the ExploitIQ UI is controlled by OpenShift group membership. Add users to the `exploit-iq-view` group to grant UI access. Create the group if it does not exist: ```shell oc adm groups new exploit-iq-view @@ -458,7 +458,7 @@ oc kustomize overlays/ | oc apply -f - -n $YOUR_NAMESPACE_NAME --- -## Uninstalling Exploit Intelligence +## Uninstalling ExploitIQ Set your deployment variant and run one of the following commands: @@ -482,9 +482,9 @@ kustomize build overlays/$DEPLOYMENT_VARIANT_NAME/ | oc delete -f - --- -## Running Exploit Intelligence Locally +## Running ExploitIQ Locally -You can run Exploit Intelligence on a local machine without GPU hardware, for development, debugging, and troubleshooting. +You can run ExploitIQ on a local machine without GPU hardware, for development, debugging, and troubleshooting. Before you begin, install the following tools and verify that all binaries are available on your system path: @@ -567,7 +567,7 @@ The test variant uses encrypted secret files. To decrypt them, you need the foll - [GnuPG](https://www.gnupg.org/download/) - [SOPS](https://github.com/getsops/sops/releases) -- The private decryption key from the Bitwarden vault entry **Exploit Intelligence Tests Deployment Variant Private Key for Decryption** +- The private decryption key from the Bitwarden vault entry **ExploitIQ Tests Deployment Variant Private Key for Decryption** ### Deploying the Test Overlay @@ -737,4 +737,4 @@ oc delete project $(oc project --short -q) If you need to install the OpenShift Pipelines Operator on a new cluster, refer to the [OpenShift Pipelines installation documentation](https://docs.redhat.com/en/documentation/red_hat_openshift_pipelines/1.19/html/installing_and_configuring/installing-pipelines). -To configure the [Exploit Intelligence PAC GitHub application](https://github.com/apps/exploit-iq-pac/) on a new cluster, follow the [PAC GitHub application configuration guide](https://pipelinesascode.com/docs/install/github_apps/#configure-pipelines-as-code-on-your-cluster-to-access-the-github-app). You need the GitHub application private key and the webhook secret from the application settings. +To configure the [ExploitIQ PAC GitHub application](https://github.com/apps/exploit-iq-pac/) on a new cluster, follow the [PAC GitHub application configuration guide](https://pipelinesascode.com/docs/install/github_apps/#configure-pipelines-as-code-on-your-cluster-to-access-the-github-app). You need the GitHub application private key and the webhook secret from the application settings. diff --git a/kustomize/base/exploit_iq_client.yaml b/kustomize/base/exploit_iq_client.yaml index d29bbe30d..734b3f582 100644 --- a/kustomize/base/exploit_iq_client.yaml +++ b/kustomize/base/exploit_iq_client.yaml @@ -27,14 +27,14 @@ spec: - ./application - -Dquarkus.http.host=0.0.0.0 - -Dquarkus.log.category."com.redhat.ecosystemappeng.exploitiq".level=DEBUG - image: exploit-iq-test-imagequay.io/ecosystem-appeng/exploit-iq-client:latest + image: quay.io/ecosystem-appeng/exploit-iq-client:latest imagePullPolicy: Always ports: - name: http protocol: TCP containerPort: 8080 env: - - name: QUARKUS_REST-CLIENT_EXPLOIT_IQ_URL + - name: QUARKUS_REST_CLIENT_EXPLOIT_IQ_URL value: http://nginx-cache:8080/generate - name: QUARKUS_MONGODB_HOSTS value: exploit-iq-client-db:27017 diff --git a/kustomize/deployer-rbac.yaml b/kustomize/deployer-rbac.yaml index 8be695176..863d37c33 100644 --- a/kustomize/deployer-rbac.yaml +++ b/kustomize/deployer-rbac.yaml @@ -1,7 +1,7 @@ # deployer-rbac.yaml # # Grants a non-cluster-admin user the minimum permissions required to -# deploy Exploit Intelligence on OpenShift Container Platform. +# deploy ExploitIQ on OpenShift Container Platform. # # Please replace the following placeholders: # — the OpenShift username of the deployer (e.g. jdoe) @@ -12,7 +12,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: exploit-intelligence-oauthclient-deploy + name: exploit-iq-oauthclient-deploy rules: # get and patch scoped to the two project OAuthClients only. - apiGroups: @@ -36,11 +36,11 @@ rules: apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: - name: exploit-intelligence-oauthclient-deploy + name: exploit-iq-oauthclient-deploy roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: exploit-intelligence-oauthclient-deploy + name: exploit-iq-oauthclient-deploy subjects: - kind: User name: @@ -48,7 +48,7 @@ subjects: apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: - name: exploit-intelligence-rbac-deploy + name: exploit-iq-rbac-deploy namespace: rules: - apiGroups: @@ -90,12 +90,12 @@ rules: apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: - name: exploit-intelligence-rbac-deploy + name: exploit-iq-rbac-deploy namespace: roleRef: apiGroup: rbac.authorization.k8s.io kind: Role - name: exploit-intelligence-rbac-deploy + name: exploit-iq-rbac-deploy subjects: - kind: User name: diff --git a/kustomize/overlays/batch-processing/exploit-iq-client-batch-patch.yaml b/kustomize/overlays/batch-processing/exploit-iq-client-batch-patch.yaml index a753910a2..6391a08cc 100644 --- a/kustomize/overlays/batch-processing/exploit-iq-client-batch-patch.yaml +++ b/kustomize/overlays/batch-processing/exploit-iq-client-batch-patch.yaml @@ -25,7 +25,7 @@ spec: - name: exploit-iq-client imagePullPolicy: Always env: - - name: MORPHEUS_QUEUE_TIMEOUT + - name: EXPLOIT_IQ_QUEUE_TIMEOUT value: 60m - - name: MORPHEUS_QUEUE_MAX_ACTIVE + - name: EXPLOIT_IQ_QUEUE_MAX_ACTIVE value: "5" From 17470b5964e944831738cec2d023def5e150d003 Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Sun, 28 Jun 2026 18:04:10 +0300 Subject: [PATCH 16/21] fixes after review --- .gitignore | 2 -- kustomize/base/exploit-iq-config.yml | 4 ++-- kustomize/base/exploit_iq_client.yaml | 2 +- kustomize/base/exploit_iq_service.yaml | 4 ++-- src/vuln_analysis/data/eval_datasets/eval_dataset.json | 3 +-- src/vuln_analysis/register.py | 2 +- 6 files changed, 7 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 571ed1f1e..7221e1240 100644 --- a/.gitignore +++ b/.gitignore @@ -2,8 +2,6 @@ kustomize/components/oidc-ca/*.crt -kustomize/components/oidc-ca/*.crt - CLAUDE.md # Ignore anything in the ./.tmp directory diff --git a/kustomize/base/exploit-iq-config.yml b/kustomize/base/exploit-iq-config.yml index ef4a1be97..e5a9363bd 100644 --- a/kustomize/base/exploit-iq-config.yml +++ b/kustomize/base/exploit-iq-config.yml @@ -52,7 +52,7 @@ functions: plugin_name: vuln_analysis.data_models.plugins.intel_plugin.SimpleHttpIntelPlugin plugin_config: source: Product Security research - endpoint: https://exploit-iq-client.tw-iq.svc:8443/api/v1/vulnerabilities/{vuln_id}/comments + endpoint: CALLBACK_URL_PLACEHOLDER/api/v1/vulnerabilities/{vuln_id}/comments token_path: /var/run/secrets/kubernetes.io/serviceaccount/token verify_path: /app/certs/service-ca.crt @@ -160,7 +160,7 @@ functions: # vex_format: csaf cve_http_output: _type: cve_http_output - url: https://exploit-iq-client.tw-iq.svc:8443 + url: CALLBACK_URL_PLACEHOLDER endpoint: /api/v1/reports auth_type: bearer token_path: /var/run/secrets/kubernetes.io/serviceaccount/token diff --git a/kustomize/base/exploit_iq_client.yaml b/kustomize/base/exploit_iq_client.yaml index 734b3f582..d8c0072dd 100644 --- a/kustomize/base/exploit_iq_client.yaml +++ b/kustomize/base/exploit_iq_client.yaml @@ -34,7 +34,7 @@ spec: protocol: TCP containerPort: 8080 env: - - name: QUARKUS_REST_CLIENT_EXPLOIT_IQ_URL + - name: QUARKUS_REST-CLIENT_EXPLOIT_IQ_URL value: http://nginx-cache:8080/generate - name: QUARKUS_MONGODB_HOSTS value: exploit-iq-client-db:27017 diff --git a/kustomize/base/exploit_iq_service.yaml b/kustomize/base/exploit_iq_service.yaml index 46c36f43f..01dbbfcd8 100644 --- a/kustomize/base/exploit_iq_service.yaml +++ b/kustomize/base/exploit_iq_service.yaml @@ -25,7 +25,7 @@ spec: serviceAccountName: exploit-iq-sa containers: - name: exploit-iq-phoenix-tracing - image: quay.io/ecosystem-appeng/exploit-iq-agent:nat + image: quay.io/ecosystem-appeng/exploit-iq-agent:latest imagePullPolicy: Always workingDir: /workspace/ args: @@ -45,7 +45,7 @@ spec: memory: "1Gi" cpu: "100m" - name: exploit-iq - image: quay.io/ecosystem-appeng/exploit-iq-agent:nat + image: quay.io/ecosystem-appeng/exploit-iq-agent:latest imagePullPolicy: Always workingDir: /workspace/ args: diff --git a/src/vuln_analysis/data/eval_datasets/eval_dataset.json b/src/vuln_analysis/data/eval_datasets/eval_dataset.json index 5cf98cd54..1f4597f4e 100644 --- a/src/vuln_analysis/data/eval_datasets/eval_dataset.json +++ b/src/vuln_analysis/data/eval_datasets/eval_dataset.json @@ -1,8 +1,7 @@ [ { "id": 1, - "question": "{\"image\":{\"name\":\"nvcr.io\/nvidia\/morpheus\/morpheus\",\"tag\":\"23.11-runtime\",\"source_info\":[{\"type\":\"code\",\"git_repo\":\"https:\/\/github.com\/nv-\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.cpp\",\"**\/*.cu\",\"**\/*.cuh\",\"**\/*.h\",\"**\/*.hpp\",\"**\/*.ipynb\",\"**\/*.py\",\"**\/*Dockerfile\"],\"exclude\":[\"tests\/**\/*\"]},{\"type\":\"doc\",\"git_repo\":\"https:\/\/github.com\/nv-morpheus\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.md\",\"docs\/**\/*.rst\"]}],\"sbom_info\":{\"_type\":\"file\",\"file_path\":\"data\/sboms\/nvcr.io\/nvidia\/morpheus\/morpheus:v23.11.01-runtime.sbom\"}},\"scan\":{\"vulns\":[{\"vuln_id\":\"GHSA-3f63-hfp8-52jq\"},{\"vuln_id\":\"CVE-2023-50782\"}]}}", - + "question": "{\"image\":{\"name\":\"nvcr.io\/nvidia\/morpheus\/morpheus\",\"tag\":\"23.11-runtime\",\"source_info\":[{\"type\":\"code\",\"git_repo\":\"https:\/\/github.com\/nv-morpheus\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.cpp\",\"**\/*.cu\",\"**\/*.cuh\",\"**\/*.h\",\"**\/*.hpp\",\"**\/*.ipynb\",\"**\/*.py\",\"**\/*Dockerfile\"],\"exclude\":[\"tests\/**\/*\"]},{\"type\":\"doc\",\"git_repo\":\"https:\/\/github.com\/nv-morpheus\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.md\",\"docs\/**\/*.rst\"]}],\"sbom_info\":{\"_type\":\"file\",\"file_path\":\"data\/sboms\/nvcr.io\/nvidia\/morpheus\/morpheus:v23.11.01-runtime.sbom\"}},\"scan\":{\"vulns\":[{\"vuln_id\":\"GHSA-3f63-hfp8-52jq\"},{\"vuln_id\":\"CVE-2023-50782\"}]}}", "answer": "N/A" } ] diff --git a/src/vuln_analysis/register.py b/src/vuln_analysis/register.py index a11c1b8db..9115cbc00 100644 --- a/src/vuln_analysis/register.py +++ b/src/vuln_analysis/register.py @@ -576,7 +576,7 @@ async def call_llm_engine_subgraph_node(message: ExploitIqEngineInput): #graph.get_graph().draw_mermaid_png(output_file_path="checker_flow.png") def convert_str_to_exploit_iq_input(input: str) -> ExploitIqInput: - logger.debug("Converting JSON string input to AExploitIqInput (length: %d)", len(input)) + logger.debug("Converting JSON string input to ExploitIqInput (length: %d)", len(input)) try: return ExploitIqInput.model_validate_json(input) except Exception as e: From 0da6d2f93906a7558db72d1e294c71da5ae12fa0 Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Tue, 30 Jun 2026 13:11:39 +0300 Subject: [PATCH 17/21] few review fixes --- .../configs/openapi/openapi.json | 1044 ++++++++++++----- .../data/eval_datasets/eval_dataset.json | 3 +- 2 files changed, 753 insertions(+), 294 deletions(-) diff --git a/src/vuln_analysis/configs/openapi/openapi.json b/src/vuln_analysis/configs/openapi/openapi.json index efba2b9ed..87ee11fd7 100644 --- a/src/vuln_analysis/configs/openapi/openapi.json +++ b/src/vuln_analysis/configs/openapi/openapi.json @@ -908,162 +908,53 @@ "title": "AcquiredArtifacts", "description": "Resolved file locations populated by source_acquisition, consumed by downstream checker nodes." }, - "AgentIntermediateStep": { + "Affect": { "properties": { - "tool_name": { - "type": "string", - "title": "Tool Name" - }, - "action_log": { - "type": "string", - "title": "Action Log" - }, - "tool_input": { + "ps_module": { "anyOf": [ { "type": "string" }, - { - "type": "object" - } - ], - "title": "Tool Input" - }, - "tool_output": { - "title": "Tool Output" - } - }, - "type": "object", - "required": [ - "tool_name", - "action_log", - "tool_input", - "tool_output" - ], - "title": "AgentIntermediateStep", - "description": "Represents info for an intermediate step taken by an agent." - }, - "ExploitIqEngineOutput": { - "properties": { - "vuln_id": { - "type": "string", - "title": "Vuln Id" - }, - "checklist": { - "items": { - "$ref": "#/components/schemas/ChecklistItemOutput" - }, - "type": "array", - "title": "Checklist" - }, - "summary": { - "type": "string", - "title": "Summary" - }, - "justification": { - "$ref": "#/components/schemas/JustificationOutput" - }, - "intel_score": { - "type": "integer", - "title": "Intel Score" - }, - "cvss": { - "anyOf": [ - { - "$ref": "#/components/schemas/CVSSOutput" - }, - { - "type": "null" - } - ] - } - }, - "type": "object", - "required": [ - "vuln_id", - "checklist", - "summary", - "justification", - "intel_score", - "cvss" - ], - "title": "ExploitIqEngineOutput", - "description": "Contains all output generated by the main ExploitIQ LLM Engine for a given vulnerability.\n\n- vuln_id: the ID of the vulnerability being processed by the LLM engine.\n- checklist: a list of ChecklistItemOutput objects, each containing an input and a response from the LLM agent.\n- summary: a short summary of the checklist inputs and responses, generated by an LLM.\n- justification: a JustificationOutput object containing details of the model's justification decision.\n- cvss: a CVSSOutput object containing the CVSS score and vector string for the vulnerability." - }, - "ExploitIqInfo": { - "properties": { - "vdb": { - "anyOf": [ - { - "$ref": "#/components/schemas/VdbPaths" - }, - { - "type": "null" - } - ] - }, - "intel": { - "anyOf": [ - { - "items": { - "$ref": "#/components/schemas/CveIntel" - }, - "type": "array" - }, { "type": "null" } ], - "title": "Intel" + "title": "Ps Module" }, - "sbom": { + "ps_product": { "anyOf": [ { - "$ref": "#/components/schemas/SBOMInfo" + "type": "string" }, { "type": "null" } - ] + ], + "title": "Ps Product" }, - "vulnerable_dependencies": { + "ps_component": { "anyOf": [ { - "items": { - "$ref": "#/components/schemas/VulnerableDependencies" - }, - "type": "array" + "type": "string" }, { "type": "null" } ], - "title": "Vulnerable Dependencies" + "title": "Ps Component" }, - "checker_context": { + "ps_update_stream": { "anyOf": [ { - "$ref": "#/components/schemas/PackageCheckerContext" + "type": "string" }, { "type": "null" } - ] - } - }, - "type": "object", - "title": "ExploitIqInfo", - "description": "Information used for decisioning in the ExploitIQ engine. These information can all be automatically\ngenerated or retrieved by the pipeline from the input information.\n\n- vdb: paths to source code and documentation vector databases (VDBs) used to understand whether a vulnerability\n is exploitable in the source code.\n- intel: list of CveIntel objects representing intelligence for each vulnerability pulled from various vulnerability\n databases and APIs.\n- sbom: software bill of materials listing the packages and versions in the container image, used to understand\n whether the vulnerable package exists in the image.\n- vulnerable_dependencies: a list of VulnerableDependencies objects for each vuln_id, representing the SBOM packages\n and transitive dependencies that are vulnerable for the vuln_id." - }, - "ExploitIqInput-Input": { - "properties": { - "scan": { - "$ref": "#/components/schemas/ScanInfoInput" - }, - "image": { - "$ref": "#/components/schemas/ImageInfoInput-Input" + ], + "title": "Ps Update Stream" }, - "credential_id": { + "resolution": { "anyOf": [ { "type": "string" @@ -1072,20 +963,20 @@ "type": "null" } ], - "title": "Credential Id" + "title": "Resolution" }, - "code_index_success": { + "affectedness": { "anyOf": [ { - "type": "boolean" + "type": "string" }, { "type": "null" } ], - "title": "Code Index Success" + "title": "Affectedness" }, - "failure_reason": { + "purl": { "anyOf": [ { "type": "string" @@ -1094,89 +985,47 @@ "type": "null" } ], - "title": "Failure Reason", - "default": "No failure reason provided" + "title": "Purl" } }, + "additionalProperties": true, "type": "object", - "required": [ - "scan", - "image" - ], - "title": "ExploitIqInput", - "description": "Inputs required by the ExploitIQ pipeline." + "title": "Affect" }, - "ExploitIqInput-Output": { + "AgentIntermediateStep": { "properties": { - "scan": { - "$ref": "#/components/schemas/ScanInfoInput" - }, - "image": { - "$ref": "#/components/schemas/ImageInfoInput-Output" - }, - "credential_id": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Credential Id" + "tool_name": { + "type": "string", + "title": "Tool Name" }, - "code_index_success": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "title": "Code Index Success" + "action_log": { + "type": "string", + "title": "Action Log" }, - "failure_reason": { + "tool_input": { "anyOf": [ { "type": "string" }, { - "type": "null" + "type": "object" } ], - "title": "Failure Reason", - "default": "No failure reason provided" - } - }, - "type": "object", - "required": [ - "scan", - "image" - ], - "title": "ExploitIqInput", - "description": "Inputs required by the ExploitIQ pipeline." - }, - "ExploitIqOutput": { - "properties": { - "input": { - "$ref": "#/components/schemas/ExploitIqInput-Output" - }, - "info": { - "$ref": "#/components/schemas/ExploitIqInfo" + "title": "Tool Input" }, - "output": { - "$ref": "#/components/schemas/OutputPayload" + "tool_output": { + "title": "Tool Output" } }, "type": "object", "required": [ - "input", - "info", - "output" + "tool_name", + "action_log", + "tool_input", + "tool_output" ], - "title": "ExploitIqOutput", - "description": "\"\nThe final output of the ExploitIQ pipeline.\nContains all fields in the ExploitIqEngineInput, plus the ExploitIQEngineOuput for each input vulnerability." + "title": "AgentIntermediateStep", + "description": "Represents info for an intermediate step taken by an agent." }, "AnalysisType": { "type": "string", @@ -2051,6 +1900,30 @@ "title": "ChatResponseChunk", "description": "ChatResponseChunk is a data model that represents a response chunk from the NAT chat streaming API.\nFully compatible with OpenAI Chat Completions API specification." }, + "CheckedNotVulnerablePackage": { + "properties": { + "name": { + "type": "string", + "title": "Name" + }, + "version": { + "type": "string", + "title": "Version" + }, + "reason": { + "type": "string", + "title": "Reason" + } + }, + "type": "object", + "required": [ + "name", + "version", + "reason" + ], + "title": "CheckedNotVulnerablePackage", + "description": "Information about a package that was checked against CVE intel but determined not vulnerable.\n\n- name: package name that was checked\n- version: installed version of the package\n- reason: LLM-generated explanation of why the package is not vulnerable" + }, "ChecklistItemOutput": { "properties": { "input": { @@ -2328,6 +2201,16 @@ } ] }, + "osidb": { + "anyOf": [ + { + "$ref": "#/components/schemas/CveIntelOsidb" + }, + { + "type": "null" + } + ] + }, "plugin_data": { "items": { "$ref": "#/components/schemas/IntelPluginData" @@ -2694,26 +2577,165 @@ "title": "CveIntelNvd", "description": "Information about an NVD (National Vulnerability Database) entry." }, - "CveIntelRhsa": { + "CveIntelOsidb": { "properties": { - "bugzilla": { - "$ref": "#/components/schemas/Bugzilla" - }, - "details": { + "cve_id": { "anyOf": [ { - "items": { - "type": "string" - }, - "type": "array" + "type": "string" }, { "type": "null" } ], - "title": "Details" + "title": "Cve Id" }, - "statement": { + "impact": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Impact" + }, + "title": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Title" + }, + "comment_zero": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Comment Zero" + }, + "cve_description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Cve Description" + }, + "statement": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Statement" + }, + "cwe_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Cwe Id" + }, + "source": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Source" + }, + "mitigation": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Mitigation" + }, + "upstream_purls": { + "items": { + "$ref": "#/components/schemas/UpstreamPurl" + }, + "type": "array", + "title": "Upstream Purls", + "default": [] + }, + "affects": { + "items": { + "$ref": "#/components/schemas/Affect" + }, + "type": "array", + "title": "Affects", + "default": [] + }, + "cvss_scores": { + "items": { + "$ref": "#/components/schemas/CvssScore" + }, + "type": "array", + "title": "Cvss Scores", + "default": [] + }, + "references": { + "items": { + "type": "string" + }, + "type": "array", + "title": "References", + "default": [] + } + }, + "additionalProperties": true, + "type": "object", + "title": "CveIntelOsidb", + "description": "Information about an OSIDB (Open Source Information Database) flaw entry.\nAvailable only for internal Red Hat users on VPN." + }, + "CveIntelRhsa": { + "properties": { + "bugzilla": { + "$ref": "#/components/schemas/Bugzilla" + }, + "details": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Details" + }, + "statement": { "anyOf": [ { "type": "string" @@ -2862,6 +2884,57 @@ "title": "CveIntelUbuntu", "description": "Information about a Ubuntu CVE entry." }, + "CvssScore": { + "properties": { + "issuer": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Issuer" + }, + "score": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Score" + }, + "vector": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Vector" + }, + "cvss_version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Cvss Version" + } + }, + "additionalProperties": true, + "type": "object", + "title": "CvssScore" + }, "DependencyPackage": { "properties": { "system": { @@ -2946,7 +3019,271 @@ "title": "Config File", "description": "Path to the configuration file for evaluation" }, - "job_id": { + "job_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Job Id", + "description": "Unique identifier for the evaluation job" + }, + "reps": { + "type": "integer", + "exclusiveMinimum": 0, + "title": "Reps", + "description": "Number of repetitions for the evaluation, defaults to 1", + "default": 1 + }, + "expiry_seconds": { + "type": "integer", + "exclusiveMinimum": 0, + "title": "Expiry Seconds", + "description": "Optional time (in seconds) before the job expires. Clamped between 600 (10 min) and 86400 (24h).", + "default": 3600 + } + }, + "type": "object", + "required": [ + "config_file" + ], + "title": "EvaluateRequest", + "description": "Request model for the evaluate endpoint." + }, + "EvaluateResponse": { + "properties": { + "job_id": { + "type": "string", + "title": "Job Id", + "description": "Unique identifier for the job" + }, + "status": { + "type": "string", + "title": "Status", + "description": "Current status of the job" + } + }, + "type": "object", + "required": [ + "job_id", + "status" + ], + "title": "EvaluateResponse", + "description": "Response model for the evaluate endpoint." + }, + "EvaluateStatusResponse": { + "properties": { + "job_id": { + "type": "string", + "title": "Job Id", + "description": "Unique identifier for the evaluation job" + }, + "status": { + "type": "string", + "title": "Status", + "description": "Current status of the evaluation job" + }, + "error": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Error", + "description": "Error message if the job failed" + }, + "created_at": { + "type": "string", + "format": "date-time", + "title": "Created At", + "description": "Timestamp when the job was created" + }, + "updated_at": { + "type": "string", + "format": "date-time", + "title": "Updated At", + "description": "Timestamp when the job was last updated" + }, + "expires_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Expires At", + "description": "Timestamp when the job will expire" + }, + "config_file": { + "type": "string", + "title": "Config File", + "description": "Path to the configuration file used for evaluation" + }, + "output_path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Output Path", + "description": "Path to the output file if the job completed successfully" + } + }, + "type": "object", + "required": [ + "job_id", + "status", + "created_at", + "updated_at", + "config_file" + ], + "title": "EvaluateStatusResponse", + "description": "Response model for the evaluate status endpoint." + }, + "ExploitIqEngineOutput": { + "properties": { + "vuln_id": { + "type": "string", + "title": "Vuln Id" + }, + "checklist": { + "items": { + "$ref": "#/components/schemas/ChecklistItemOutput" + }, + "type": "array", + "title": "Checklist" + }, + "summary": { + "type": "string", + "title": "Summary" + }, + "justification": { + "$ref": "#/components/schemas/JustificationOutput" + }, + "intel_score": { + "type": "integer", + "title": "Intel Score" + }, + "cvss": { + "anyOf": [ + { + "$ref": "#/components/schemas/CVSSOutput" + }, + { + "type": "null" + } + ] + }, + "details": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Details" + } + }, + "type": "object", + "required": [ + "vuln_id", + "checklist", + "summary", + "justification", + "intel_score", + "cvss" + ], + "title": "ExploitIqEngineOutput", + "description": "ExploitIqEngineOutputEngine for a given vulnerability.\n\n- vuln_id: the ID of the vulnerability being processed by the LLM engine.\n- checklist: a list of ChecklistItemOutput objects, each containing an input and a response from the LLM agent.\n- summary: a short summary of the checklist inputs and responses, generated by an LLM.\n- justification: a JustificationOutput object containing details of the model's justification decision.\n- cvss: a CVSSOutput object containing the CVSS score and vector string for the vulnerability." + }, + "ExploitIqInfo": { + "properties": { + "vdb": { + "anyOf": [ + { + "$ref": "#/components/schemas/VdbPaths" + }, + { + "type": "null" + } + ] + }, + "intel": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/CveIntel" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Intel" + }, + "sbom": { + "anyOf": [ + { + "$ref": "#/components/schemas/SBOMInfo" + }, + { + "type": "null" + } + ] + }, + "vulnerable_dependencies": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/VulnerableDependencies" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Vulnerable Dependencies" + }, + "checker_context": { + "anyOf": [ + { + "$ref": "#/components/schemas/PackageCheckerContext" + }, + { + "type": "null" + } + ] + } + }, + "type": "object", + "title": "ExploitIqInfo", + "description": "Information used for decisioning in the ExploitIQ engine. These information can all be automatically\ngenerated or retrieved by the pipeline from the input information.\n\n- vdb: paths to source code and documentation vector databases (VDBs) used to understand whether a vulnerability\n is exploitable in the source code.\n- intel: list of CveIntel objects representing intelligence for each vulnerability pulled from various vulnerability\n databases and APIs.\n- sbom: software bill of materials listing the packages and versions in the container image, used to understand\n whether the vulnerable package exists in the image.\n- vulnerable_dependencies: a list of VulnerableDependencies objects for each vuln_id, representing the SBOM packages\n and transitive dependencies that are vulnerable for the vuln_id." + }, + "ExploitIqInput-Input": { + "properties": { + "scan": { + "$ref": "#/components/schemas/ScanInfoInput" + }, + "image": { + "$ref": "#/components/schemas/ImageInfoInput-Input" + }, + "credential_id": { "anyOf": [ { "type": "string" @@ -2955,65 +3292,49 @@ "type": "null" } ], - "title": "Job Id", - "description": "Unique identifier for the evaluation job" - }, - "reps": { - "type": "integer", - "exclusiveMinimum": 0, - "title": "Reps", - "description": "Number of repetitions for the evaluation, defaults to 1", - "default": 1 + "title": "Credential Id" }, - "expiry_seconds": { - "type": "integer", - "exclusiveMinimum": 0, - "title": "Expiry Seconds", - "description": "Optional time (in seconds) before the job expires. Clamped between 600 (10 min) and 86400 (24h).", - "default": 3600 - } - }, - "type": "object", - "required": [ - "config_file" - ], - "title": "EvaluateRequest", - "description": "Request model for the evaluate endpoint." - }, - "EvaluateResponse": { - "properties": { - "job_id": { - "type": "string", - "title": "Job Id", - "description": "Unique identifier for the job" + "code_index_success": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Code Index Success" }, - "status": { - "type": "string", - "title": "Status", - "description": "Current status of the job" + "failure_reason": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Failure Reason", + "default": "No failure reason provided" } }, "type": "object", "required": [ - "job_id", - "status" + "scan", + "image" ], - "title": "EvaluateResponse", - "description": "Response model for the evaluate endpoint." + "title": "ExploitIqInput", + "description": "Inputs required by the ExploitIQ pipeline." }, - "EvaluateStatusResponse": { + "ExploitIqInput-Output": { "properties": { - "job_id": { - "type": "string", - "title": "Job Id", - "description": "Unique identifier for the evaluation job" + "scan": { + "$ref": "#/components/schemas/ScanInfoInput" }, - "status": { - "type": "string", - "title": "Status", - "description": "Current status of the evaluation job" + "image": { + "$ref": "#/components/schemas/ImageInfoInput-Output" }, - "error": { + "credential_id": { "anyOf": [ { "type": "string" @@ -3022,40 +3343,20 @@ "type": "null" } ], - "title": "Error", - "description": "Error message if the job failed" - }, - "created_at": { - "type": "string", - "format": "date-time", - "title": "Created At", - "description": "Timestamp when the job was created" - }, - "updated_at": { - "type": "string", - "format": "date-time", - "title": "Updated At", - "description": "Timestamp when the job was last updated" + "title": "Credential Id" }, - "expires_at": { + "code_index_success": { "anyOf": [ { - "type": "string", - "format": "date-time" + "type": "boolean" }, { "type": "null" } ], - "title": "Expires At", - "description": "Timestamp when the job will expire" - }, - "config_file": { - "type": "string", - "title": "Config File", - "description": "Path to the configuration file used for evaluation" + "title": "Code Index Success" }, - "output_path": { + "failure_reason": { "anyOf": [ { "type": "string" @@ -3064,20 +3365,38 @@ "type": "null" } ], - "title": "Output Path", - "description": "Path to the output file if the job completed successfully" + "title": "Failure Reason", + "default": "No failure reason provided" } }, "type": "object", "required": [ - "job_id", - "status", - "created_at", - "updated_at", - "config_file" + "scan", + "image" ], - "title": "EvaluateStatusResponse", - "description": "Response model for the evaluate status endpoint." + "title": "ExploitIqInput", + "description": "Inputs required by the ExploitIQ pipeline." + }, + "ExploitIqOutput": { + "properties": { + "input": { + "$ref": "#/components/schemas/ExploitIqInput-Output" + }, + "info": { + "$ref": "#/components/schemas/ExploitIqInfo" + }, + "output": { + "$ref": "#/components/schemas/OutputPayload" + } + }, + "type": "object", + "required": [ + "input", + "info", + "output" + ], + "title": "ExploitIqOutput", + "description": "\"\nThe final output of the ExploitIQ pipeline.\nContains all fields in the ExploitIqEngineInput, plus the ExploitIqEngineOutput for each input vulnerability." }, "FileSBOMInfoInput": { "properties": { @@ -3533,7 +3852,7 @@ } ], "title": "Downstream Report", - "description": "Serialized DownstreamSearchReport from L1 investigation" + "description": "DownstreamSearchReport from L1 investigation (as dict)" }, "upstream_report": { "anyOf": [ @@ -3545,7 +3864,19 @@ } ], "title": "Upstream Report", - "description": "Serialized UpstreamSearchReport from L1 investigation" + "description": "UpstreamSearchReport from L1 investigation (as dict)" + }, + "git_search_report": { + "anyOf": [ + { + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Git Search Report", + "description": "GitSearchReport from git commit search (as dict)" }, "l1_agent_answer": { "anyOf": [ @@ -3667,6 +3998,14 @@ null], "title": "L2 Override Verdict", "description": "L2 verdict override (if any)" + }, + "evidence_sources": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Evidence Sources", + "description": "Sources used for analysis: 'build_log', 'spec_file', 'build_system_files', 'binary'" } }, "type": "object", @@ -3858,7 +4197,7 @@ }, "PackageCheckerStatus": { "type": "integer", - "enum": [0, 1, 2, 3, 4, 5], + "enum": [0, 1, 2, 3, 4, 5, 6], "title": "PackageCheckerStatus", "description": "Per-CVE status codes produced by the PackageIdentify phase." }, @@ -3888,6 +4227,18 @@ "$ref": "#/components/schemas/EnumIdentifyResult", "default": "unknown" }, + "rhsa_fix_state": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Rhsa Fix State", + "description": "Raw RHSA fix_state value when is_target_package_affected is based on RHSA assessment" + }, "conclusion_reason": { "type": "string", "title": "Conclusion Reason", @@ -4216,6 +4567,46 @@ "type": "object", "title": "TextContent" }, + "UpstreamPurl": { + "properties": { + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" + }, + "purl": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Purl" + }, + "ecosystem": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Ecosystem" + } + }, + "additionalProperties": true, + "type": "object", + "title": "UpstreamPurl" + }, "Usage": { "properties": { "prompt_tokens": { @@ -4510,17 +4901,40 @@ "title": "Search Keywords", "description": "Recommended grep patterns ordered by specificity (most specific first)" }, - "affected_architectures": { + "component_names": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Component Names", + "description": "Module or component names explicitly mentioned in CVE intel (e.g., mod_http2, libxml2_sax). Only extract if explicitly named in advisory/description - do NOT infer or guess." + }, + "affected_bitness": { "type": "string", "enum": [ "32-bit", "64-bit", "both" ], - "title": "Affected Architectures", - "description": "Which CPU architectures are affected: 32-bit only, 64-bit only, or both (default)", + "title": "Affected Bitness", + "description": "Which bitness is affected: 32-bit only, 64-bit only, or both (default)", "default": "both" }, + "affected_architectures": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Affected Architectures", + "description": "CPU families affected (e.g., ['x86', 'arm']). None means all architectures." + }, "is_downstream_patch_available": { "type": "boolean", "title": "Is Downstream Patch Available", @@ -4538,6 +4952,42 @@ "title": "Patch File Name", "description": "Name of the CVE-specific patch file (if available)", "default": "" + }, + "known_mitigations": { + "type": "string", + "title": "Known Mitigations", + "description": "Vendor-provided mitigations from RHSA or other intel sources (e.g., compiler flags, config changes)", + "default": "" + }, + "affected_version_range": { + "type": "string", + "title": "Affected Version Range", + "description": "Affected version range from NVD configurations (e.g., '\u003C 2.4.68')", + "default": "" + }, + "fixed_version": { + "type": "string", + "title": "Fixed Version", + "description": "First fixed version from NVD intel (e.g., '2.4.68')", + "default": "" + }, + "target_version_in_vulnerable_range": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Target Version In Vulnerable Range", + "description": "True if target package version is within the vulnerable range (from Identify phase)" + }, + "target_package_version": { + "type": "string", + "title": "Target Package Version", + "description": "Actual version of the target package being analyzed (e.g., '2.4.63')", + "default": "" } }, "type": "object", @@ -4563,6 +5013,14 @@ }, "type": "array", "title": "Vulnerable Sbom Packages" + }, + "checked_not_vulnerable": { + "items": { + "$ref": "#/components/schemas/CheckedNotVulnerablePackage" + }, + "type": "array", + "title": "Checked Not Vulnerable", + "default": [] } }, "type": "object", @@ -4572,7 +5030,7 @@ "vulnerable_sbom_packages" ], "title": "VulnerableDependencies", - "description": "Information about the vulnerable SBOM packages associated with the vuln_id.\n\n- vuln_id: vulnerability ID (e.g. CVE ID, GHSA ID) associated with the vulnerable package list.\n- vuln_package_intel_sources: list of sources (e.g. \"ghsa\", \"nvd\", \"ubuntu\", \"rhsa\") that provided\n the vulnerable package/version intel for the vuln_id.\n- vulnerable_sbom_packages: list of VulnerableSBOMPackage objects, representing the SBOM packages that are\n vulnerable for a given vuln_id." + "description": "Information about the vulnerable SBOM packages associated with the vuln_id.\n\n- vuln_id: vulnerability ID (e.g. CVE ID, GHSA ID) associated with the vulnerable package list.\n- vuln_package_intel_sources: list of sources (e.g. \"ghsa\", \"nvd\", \"ubuntu\", \"rhsa\") that provided\n the vulnerable package/version intel for the vuln_id.\n- vulnerable_sbom_packages: list of VulnerableSBOMPackage objects, representing the SBOM packages that are\n vulnerable for a given vuln_id.\n- checked_not_vulnerable: list of CheckedNotVulnerablePackage objects, representing packages that were\n checked but determined not vulnerable, along with the LLM reasoning." }, "VulnerableSBOMPackage": { "properties": { diff --git a/src/vuln_analysis/data/eval_datasets/eval_dataset.json b/src/vuln_analysis/data/eval_datasets/eval_dataset.json index 1f4597f4e..5cf98cd54 100644 --- a/src/vuln_analysis/data/eval_datasets/eval_dataset.json +++ b/src/vuln_analysis/data/eval_datasets/eval_dataset.json @@ -1,7 +1,8 @@ [ { "id": 1, - "question": "{\"image\":{\"name\":\"nvcr.io\/nvidia\/morpheus\/morpheus\",\"tag\":\"23.11-runtime\",\"source_info\":[{\"type\":\"code\",\"git_repo\":\"https:\/\/github.com\/nv-morpheus\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.cpp\",\"**\/*.cu\",\"**\/*.cuh\",\"**\/*.h\",\"**\/*.hpp\",\"**\/*.ipynb\",\"**\/*.py\",\"**\/*Dockerfile\"],\"exclude\":[\"tests\/**\/*\"]},{\"type\":\"doc\",\"git_repo\":\"https:\/\/github.com\/nv-morpheus\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.md\",\"docs\/**\/*.rst\"]}],\"sbom_info\":{\"_type\":\"file\",\"file_path\":\"data\/sboms\/nvcr.io\/nvidia\/morpheus\/morpheus:v23.11.01-runtime.sbom\"}},\"scan\":{\"vulns\":[{\"vuln_id\":\"GHSA-3f63-hfp8-52jq\"},{\"vuln_id\":\"CVE-2023-50782\"}]}}", + "question": "{\"image\":{\"name\":\"nvcr.io\/nvidia\/morpheus\/morpheus\",\"tag\":\"23.11-runtime\",\"source_info\":[{\"type\":\"code\",\"git_repo\":\"https:\/\/github.com\/nv-\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.cpp\",\"**\/*.cu\",\"**\/*.cuh\",\"**\/*.h\",\"**\/*.hpp\",\"**\/*.ipynb\",\"**\/*.py\",\"**\/*Dockerfile\"],\"exclude\":[\"tests\/**\/*\"]},{\"type\":\"doc\",\"git_repo\":\"https:\/\/github.com\/nv-morpheus\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.md\",\"docs\/**\/*.rst\"]}],\"sbom_info\":{\"_type\":\"file\",\"file_path\":\"data\/sboms\/nvcr.io\/nvidia\/morpheus\/morpheus:v23.11.01-runtime.sbom\"}},\"scan\":{\"vulns\":[{\"vuln_id\":\"GHSA-3f63-hfp8-52jq\"},{\"vuln_id\":\"CVE-2023-50782\"}]}}", + "answer": "N/A" } ] From cc16abf4c29a9f74370a3da49ee7301a3374cdbc Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Tue, 30 Jun 2026 13:22:16 +0300 Subject: [PATCH 18/21] Register exploit-iq-models submodule --- exploit-iq-models | 1 + 1 file changed, 1 insertion(+) create mode 160000 exploit-iq-models diff --git a/exploit-iq-models b/exploit-iq-models new file mode 160000 index 000000000..43de21f51 --- /dev/null +++ b/exploit-iq-models @@ -0,0 +1 @@ +Subproject commit 43de21f513538ef8580227584a6e609fe8e5029a From 5851d0a60c6c509e7ae227fad9bfc23e1fa4f6f6 Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf Date: Tue, 30 Jun 2026 13:27:58 +0300 Subject: [PATCH 19/21] align readme --- kustomize/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kustomize/README.md b/kustomize/README.md index 6e8decfdc..f7a23180e 100644 --- a/kustomize/README.md +++ b/kustomize/README.md @@ -716,7 +716,7 @@ helm upgrade --install \ --set llama3_1_70b_instruct_4bit.readinessProbe.periodSeconds=10 \ --set global.tolerationsKey=p4d-gpu \ --set nim-embed.ngcSecret.apiKey= \ - exploit-iq-tests ../../../exploit-iq-models/exploit-iq-models + exploit-iq-tests ../exploit-iq-models/exploit-iq-models ``` **11.** Remove the decrypted secret files: From eef5a82185fe85c32fbd5394d531009c1dd8e1ec Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf <98809100+TamarW0@users.noreply.github.com> Date: Tue, 30 Jun 2026 16:47:02 +0300 Subject: [PATCH 20/21] Update eval_dataset.json --- src/vuln_analysis/data/eval_datasets/eval_dataset.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/vuln_analysis/data/eval_datasets/eval_dataset.json b/src/vuln_analysis/data/eval_datasets/eval_dataset.json index 5cf98cd54..2813623b4 100644 --- a/src/vuln_analysis/data/eval_datasets/eval_dataset.json +++ b/src/vuln_analysis/data/eval_datasets/eval_dataset.json @@ -1,8 +1,7 @@ [ { "id": 1, - "question": "{\"image\":{\"name\":\"nvcr.io\/nvidia\/morpheus\/morpheus\",\"tag\":\"23.11-runtime\",\"source_info\":[{\"type\":\"code\",\"git_repo\":\"https:\/\/github.com\/nv-\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.cpp\",\"**\/*.cu\",\"**\/*.cuh\",\"**\/*.h\",\"**\/*.hpp\",\"**\/*.ipynb\",\"**\/*.py\",\"**\/*Dockerfile\"],\"exclude\":[\"tests\/**\/*\"]},{\"type\":\"doc\",\"git_repo\":\"https:\/\/github.com\/nv-morpheus\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.md\",\"docs\/**\/*.rst\"]}],\"sbom_info\":{\"_type\":\"file\",\"file_path\":\"data\/sboms\/nvcr.io\/nvidia\/morpheus\/morpheus:v23.11.01-runtime.sbom\"}},\"scan\":{\"vulns\":[{\"vuln_id\":\"GHSA-3f63-hfp8-52jq\"},{\"vuln_id\":\"CVE-2023-50782\"}]}}", - + "question": "{\"image\":{\"name\":\"nvcr.io\/nvidia\/morpheus\/morpheus\",\"tag\":\"23.11-runtime\",\"source_info\":[{\"type\":\"code\",\"git_repo\":\"https:\/\/github.com\/nv-\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.cpp\",\"**\/*.cu\",\"**\/*.cuh\",\"**\/*.h\",\"**\/*.hpp\",\"**\/*.ipynb\",\"**\/*.py\",\"**\/*Dockerfile\"],\"exclude\":[\"tests\/**\/*\"]},{\"type\":\"doc\",\"git_repo\":\"https:\/\/github.com\/nv-morpheus\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.md\",\"docs\/**\/*.rst\"]}],\"sbom_info\":{\"_type\":\"file\",\"file_path\":\"data\/sboms\/nvcr.io\/nvidia\/morpheus\/morpheus:v23.11.01-runtime.sbom\"}},\"scan\":{\"vulns\":[{\"vuln_id\":\"GHSA-3f63-hfp8-52jq\"},{\"vuln_id\":\"CVE-2023-50782\"}]}}", "answer": "N/A" } ] From d5d0013e205cc32e133e24a33291320ca5b9d92a Mon Sep 17 00:00:00 2001 From: Tamar Weisskopf <98809100+TamarW0@users.noreply.github.com> Date: Tue, 30 Jun 2026 16:52:10 +0300 Subject: [PATCH 21/21] Update eval_dataset.json --- src/vuln_analysis/data/eval_datasets/eval_dataset.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vuln_analysis/data/eval_datasets/eval_dataset.json b/src/vuln_analysis/data/eval_datasets/eval_dataset.json index 2813623b4..1f4597f4e 100644 --- a/src/vuln_analysis/data/eval_datasets/eval_dataset.json +++ b/src/vuln_analysis/data/eval_datasets/eval_dataset.json @@ -1,7 +1,7 @@ [ { "id": 1, - "question": "{\"image\":{\"name\":\"nvcr.io\/nvidia\/morpheus\/morpheus\",\"tag\":\"23.11-runtime\",\"source_info\":[{\"type\":\"code\",\"git_repo\":\"https:\/\/github.com\/nv-\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.cpp\",\"**\/*.cu\",\"**\/*.cuh\",\"**\/*.h\",\"**\/*.hpp\",\"**\/*.ipynb\",\"**\/*.py\",\"**\/*Dockerfile\"],\"exclude\":[\"tests\/**\/*\"]},{\"type\":\"doc\",\"git_repo\":\"https:\/\/github.com\/nv-morpheus\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.md\",\"docs\/**\/*.rst\"]}],\"sbom_info\":{\"_type\":\"file\",\"file_path\":\"data\/sboms\/nvcr.io\/nvidia\/morpheus\/morpheus:v23.11.01-runtime.sbom\"}},\"scan\":{\"vulns\":[{\"vuln_id\":\"GHSA-3f63-hfp8-52jq\"},{\"vuln_id\":\"CVE-2023-50782\"}]}}", + "question": "{\"image\":{\"name\":\"nvcr.io\/nvidia\/morpheus\/morpheus\",\"tag\":\"23.11-runtime\",\"source_info\":[{\"type\":\"code\",\"git_repo\":\"https:\/\/github.com\/nv-morpheus\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.cpp\",\"**\/*.cu\",\"**\/*.cuh\",\"**\/*.h\",\"**\/*.hpp\",\"**\/*.ipynb\",\"**\/*.py\",\"**\/*Dockerfile\"],\"exclude\":[\"tests\/**\/*\"]},{\"type\":\"doc\",\"git_repo\":\"https:\/\/github.com\/nv-morpheus\/Morpheus.git\",\"ref\":\"v23.11.01\",\"include\":[\"**\/*.md\",\"docs\/**\/*.rst\"]}],\"sbom_info\":{\"_type\":\"file\",\"file_path\":\"data\/sboms\/nvcr.io\/nvidia\/morpheus\/morpheus:v23.11.01-runtime.sbom\"}},\"scan\":{\"vulns\":[{\"vuln_id\":\"GHSA-3f63-hfp8-52jq\"},{\"vuln_id\":\"CVE-2023-50782\"}]}}", "answer": "N/A" } ]