RHEcosystemAppEng · tmihalac · Jun 17, 2026 · Jun 21, 2026 · Jun 23, 2026 · Jun 24, 2026
diff --git a/.tekton/on-cm-runner.yaml b/.tekton/on-cm-runner.yaml
@@ -147,10 +147,10 @@ spec:
 
               resources:
                 requests:
-                  cpu: "1000m"      # CPU request (1 core)
+                  cpu: "3000m"      # CPU request (3 cores)
                   memory: "12Gi"     # Memory request (8 gigabytes)
                 limits:
-                  cpu: "2000m"      # CPU limit (2 cores)
+                  cpu: "3000m"      # CPU limit (3 cores)
                   memory: "32Gi"    # Memory limit (16 gigabytes)
 
               volumeMounts:
@@ -188,6 +188,10 @@ spec:
                   value: "$(params.TRIGGER_COMMENT)"
                 - name: GOMODCACHE
                   value: "/exploit-iq-data/go/pkg/mod"
+                - name: GOCACHE
+                  value: "/exploit-iq-data/go/cache"
+                - name: MAVEN_OPTS
+                  value: "-Dmaven.repo.local=/exploit-iq-data/maven"
                 - name: UV_CACHE_DIR
                   value: "/tmp/uv-cache"
                 - name: SERPAPI_BASE_URL

diff --git a/.tekton/on-pull-request.yaml b/.tekton/on-pull-request.yaml
@@ -281,7 +281,8 @@ spec:
                 export MAVEN_HOME="$HOME/maven-sdk/apache-maven-${MAVEN_VERSION}"
                 export M2_HOME="$MAVEN_HOME"
                 export PATH="$MAVEN_HOME/bin:$PATH"
-
+                export MAVEN_OPTS="-Dmaven.repo.local=/exploit-iq-data/maven"
+
                 echo "Maven version:"
                 mvn -v
 
@@ -368,6 +369,10 @@ spec:
                 # Pass the raw comment text into the container
                 - name: GOMODCACHE
                   value: "/exploit-iq-data/go/pkg/mod"
+                - name: GOCACHE
+                  value: "/exploit-iq-data/go/cache"
+                - name: MAVEN_OPTS
+                  value: "-Dmaven.repo.local=/exploit-iq-data/maven"
                 - name: UV_CACHE_DIR
                   value: "/tmp/uv-cache"
                 - name: UV_PYTHON_INSTALL_DIR

diff --git a/kustomize/base/exploit_iq_service.yaml b/kustomize/base/exploit_iq_service.yaml
@@ -144,6 +144,10 @@ spec:
                   fieldPath: metadata.namespace
             - name: GOMODCACHE
               value: /exploit-iq-package-cache/go/pkg/mod
+            - name: GOCACHE
+              value: /exploit-iq-package-cache/go/cache
+            - name: MAVEN_OPTS
+              value: "-Dmaven.repo.local=/exploit-iq-package-cache/maven"
             - name: ENABLE_MLOPS
               value: "true"
             - name: CREDENTIAL_ENCRYPTION_KEY

diff --git a/src/exploit_iq_commons/data_models/checker_status.py b/src/exploit_iq_commons/data_models/checker_status.py
@@ -222,6 +222,12 @@ def format_for_prompt(self) -> str:
             lines.append(f"AFFECTED_VERSION_RANGE: {self.affected_version_range}")
         if self.fixed_version:
             lines.append(f"FIXED_VERSION: {self.fixed_version}")
+        # Emit version-in-range so the L1 agent can apply the VERSION GUARD
+        # and VERSION-BASED FALLBACK rules defined in the Case B thought instructions.
+        # Without this, the agent is told to check TARGET_IN_VULNERABLE_RANGE but never sees it.
+        if self.target_version_in_vulnerable_range is not None:
+            label = "YES" if self.target_version_in_vulnerable_range else "NO"
+            lines.append(f"TARGET_IN_VULNERABLE_RANGE: {label}")
         return "\n".join(lines)
 
 

diff --git a/src/exploit_iq_commons/utils/c_segmenter_custom.py b/src/exploit_iq_commons/utils/c_segmenter_custom.py
@@ -17,6 +17,24 @@
 from langchain_community.document_loaders.parsers.language.c import CSegmenter
 from typing import List
 
+
+def _comment_replacer(match):
+    """Preserve string literals while removing C/C++ comments."""
+    if match.group(1) is not None:  # string literal — keep it
+        return match.group(0)
+    return ' '  # comment — replace with space to preserve token boundaries
+
+
+_COMMENT_OR_STRING = re.compile(
+    r'("(?:[^"\\]|\\.)*"|\'(?:[^\'\\]|\\.)*\')'  # group 1: string literals
+    r'|'
+    r'(/\*[\s\S]*?\*/)'   # block comment
+    r'|'
+    r'(//[^\n]*)',         # line comment
+    re.DOTALL
+)
+
+
 #class extened CSegmenter
 class CSegmenterExtended(CSegmenter):
 
@@ -32,11 +50,8 @@ def __init__(self, code: str):
 
     @staticmethod
     def remove_comments(code: str) -> str:
-        # Remove all multi-line comments (/* ... */)
-        code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL)
-        # Remove all single-line comments (//...)
-        code = re.sub(r'//.*', '', code)
-        return code
+        # Remove comments while preserving comment-like patterns inside string literals
+        return _COMMENT_OR_STRING.sub(_comment_replacer, code)
 
     @staticmethod
     def remove_macro_blocks(text: str) -> str:

diff --git a/src/exploit_iq_commons/utils/chain_of_calls_retriever.py b/src/exploit_iq_commons/utils/chain_of_calls_retriever.py
@@ -95,7 +95,6 @@ def __init__(self, documents: List[Document], ecosystem: Ecosystem, manifest_pat
         used here to build the dependency tree for a more efficient lookup and search.
         """
 
-        logger.debug("Creating Chain of Calls Retriever")
         logger.debug("Starting building Chain of Calls Retriever")
         self.ecosystem = ecosystem
         logger.debug("Chain of Calls Retriever - creating dependency tree")
@@ -113,8 +112,8 @@ def __init__(self, documents: List[Document], ecosystem: Ecosystem, manifest_pat
         # Build a dependency tree using the dependency tree builder logic.
         tree = self.dependency_tree.builder.build_tree(manifest_path=manifest_path)
         for package, parents in tree.items():
-            parents.extend([package])
-            self.tree_dict[package] = parents
+            parents.append(package)
+            self.tree_dict[package] = list(dict.fromkeys(parents))
         self.supported_packages = list(self.tree_dict.keys())
         logger.debug("Chain of Calls Retriever - populating functions documents")
 
@@ -159,8 +158,18 @@ def __init__(self, documents: List[Document], ecosystem: Ecosystem, manifest_pat
         self.functions_local_variables_index = self.language_parser.create_map_of_local_vars(self.documents_of_functions)
         logger.debug("Chain of Calls Retriever - after functions_local_variables_index")
 
-        if not self.language_parser.is_search_algo_dfs():
-            self.sort_docs = self.__group_docs_by_pkg()
+        # Pre-index docs by package name for O(package_size) lookups instead of O(all_docs).
+        # sort_docs is used by BFS and by get_possible_docs for vendor-package filtering.
+        self.sort_docs = self.__group_docs_by_pkg()
+        # Pre-filter root-level docs to avoid scanning all documents in the root-package
+        # search path (sources_location_packages=False) of get_possible_docs.
+        self._root_docs = [doc for doc in self.documents if self.language_parser.is_root_package(doc)]
+        # Pre-index non-root docs by source path segments for fast vendor-package lookups.
+        # Maps each unique path component to the set of docs whose source contains it.
+        self._source_path_index: dict[str, list[Document]] = defaultdict(list)
+        for doc in self.documents:
+            if not self.language_parser.is_root_package(doc):
+                self._source_path_index[doc.metadata.get('source', '')].append(doc)
 
     def _resolve_tree_key(self, package: str, ctx: _SearchCtx) -> str | None:
         """Find the canonical tree_dict key for a package name.
@@ -226,6 +235,17 @@ def __find_caller_function_dfs(self, document_function: Document, function_packa
             parents = self._get_parents(package_name, ctx)
             if parents:
                 direct_parents.extend(parents)
+        # Search root-level parents first so the DFS finds root callers
+        # before exploring library-internal call chains.
+        root_first = []
+        non_root = []
+        for p in direct_parents:
+            pp = self._get_parents(p, ctx)
+            if pp and pp[0] == ROOT_LEVEL_SENTINEL:
+                root_first.append(p)
+            else:
+                non_root.append(p)
+        direct_parents = root_first + non_root
         function_name_to_search = self.language_parser.get_function_name(document_function)
         if not function_name_to_search:
             return None
@@ -281,9 +301,6 @@ def __find_caller_function_dfs(self, document_function: Document, function_packa
             # match, and add it to exclusions so it will not consider it when backtracking in order to prevent cycles.
             if function_is_being_called:
                 package_exclusions.append(doc)
-                # update index of last scanned package for backtracking
-                # hashed_value = calculate_hashable_string_for_function(function_file_name, function_name_to_search)
-                # self.last_visited_parent_package_indexes[hashed_value] = last_visited_package_index + package_index
                 return doc
 
         # If didn't find a matching caller function document, returns None.
@@ -292,38 +309,55 @@ def __find_caller_function_dfs(self, document_function: Document, function_packa
     def _is_doc_excluded(self, doc: Document, exclusions: list[Document]) -> bool:
         """
         Checks if a document is in the exclusions list based on its
-        function name, function body and source metadata.
+        function body and source metadata.
+        Compares source first (cheap string compare) before falling back
+        to the more expensive content comparison.
         """
+        if not exclusions:
+            return False
         doc_function_content = doc.page_content.strip()
         doc_source = doc.metadata.get('source').strip()
 
         for exclusion_doc in exclusions:
-            exclusion_function_content = exclusion_doc.page_content.strip()
+            # Compare source path first — cheaper and usually different
             exclusion_source = exclusion_doc.metadata.get('source').strip()
-
-            if doc_function_content == exclusion_function_content and doc_source == exclusion_source:
+            if exclusion_source != doc_source:
+                continue
+            exclusion_function_content = exclusion_doc.page_content.strip()
+            if doc_function_content == exclusion_function_content:
                 return True
         return False
 
 
-    # This helper method filter out irrelevant function ( that cannot be caller functions), it filter out all
-    # excluded functions, and all function that their body doesn't contain the target function name to search for.
     def get_possible_docs(self, function_name_to_search: str, package: str, exclusions: list[Document],
                           sources_location_packages: bool,
                           target_class_names: frozenset[str],
                           method_exclusions: dict) -> (list[Document], bool):
-        if sources_location_packages:
-            filter_1 = [doc for doc in self.documents if package in doc.metadata.get('source')
-                        and self.language_parser.is_function(doc) and
-                        not self._is_doc_excluded(doc, exclusions)]
-        else:
-            filter_1 = [doc for doc in self.documents if self.language_parser.is_root_package(doc) and
-                        (self.language_parser.is_function(doc) or self.language_parser.is_script_language()) and
-                        not self._is_doc_excluded(doc, exclusions)]
+        """Filter documents to those that could be callers of function_name_to_search.
 
+        Applies the cheapest check first (search_token substring match) to
+        short-circuit before more expensive checks (is_function, _is_doc_excluded).
+        For root-package searches, uses pre-filtered _root_docs instead of scanning
+        all documents.
+        """
         if not function_name_to_search:
             return []
-        return [doc for doc in filter_1 if doc.page_content.__contains__(f"{function_name_to_search}(")]
+        search_token = f"{function_name_to_search}("
+        if sources_location_packages:
+            # Use source path index to only scan docs whose path contains the package name,
+            # instead of iterating all documents.
+            candidates = [doc for path, docs in self._source_path_index.items()
+                          if package in path for doc in docs]
+            return [doc for doc in candidates
+                    if search_token in doc.page_content
+                    and self.language_parser.is_function(doc)
+                    and not self._is_doc_excluded(doc, exclusions)]
+        else:
+            # Use pre-filtered _root_docs to avoid scanning all documents
+            return [doc for doc in self._root_docs
+                    if search_token in doc.page_content
+                    and (self.language_parser.is_function(doc) or self.language_parser.is_script_language())
+                    and not self._is_doc_excluded(doc, exclusions)]
 
     def __find_caller_functions_bfs(self, document_function: Document, function_package: str,
                                      ctx: _SearchCtx) -> List[Document]:
@@ -407,6 +441,7 @@ def __find_caller_functions_bfs(self, document_function: Document, function_pack
                                 documents_of_functions=
                                 self.documents_of_functions)
 
+
                             if found and self.language_parser.is_call_allowed( pkg_docs, doc, document_function):
                                 log_entries.append((file_name, func_name, function_name_to_search))
                                 relevant_docs_to_search_in.append(doc)
@@ -552,19 +587,32 @@ def get_relevant_documents(self, query: str) -> tuple[List[Document], bool]:
         matching_documents = []
         standard_libs_cache = StandardLibraryCache.get_instance()
         # If it's a standard library package, then skip checking the package in dependency tree.
+        subpackage_filter = None
         if not standard_libs_cache.is_standard_library(package_name, self.ecosystem):
             # Check if input package is in dependency tree
             for package in self.tree_dict:
                 if self.language_parser.is_tree_key_match(package_name, package):
                     package_name = package
                     found_package = True
                     break
+            # Sub-package fallback: query may be a sub-path of a module in tree_dict
+            if not found_package:
+                for package in self.tree_dict:
+                    suffix = self.language_parser.resolve_subpackage_to_module(package_name, package)
+                    if suffix is not None:
+                        subpackage_filter = suffix
+                        logger.debug("Sub-package resolved: '%s' → module '%s' (filter='%s')",
+                                     package_name, package, subpackage_filter)
+                        package_name = package
+                        found_package = True
+                        break
         # If it's , then create a document for it.
         if found_package:
             target_function_doc = self.__find_initial_function(function, package_name=package_name,
                                                                documents=self.documents,
                                                                ctx=ctx,
-                                                               class_name=class_name)
+                                                               class_name=class_name,
+                                                               subpackage_filter=subpackage_filter)
             if not target_function_doc and self.language_parser.get_constructor_method_name():
                 target_function_doc = self.__find_initial_function(function_name=self.language_parser.get_constructor_method_name(),
                                                                    package_name=package_name,
@@ -620,8 +668,6 @@ def get_relevant_documents(self, query: str) -> tuple[List[Document], bool]:
             matching_documents, ctx.found_path = self._breadth_first_search(
                 matching_documents, target_function_doc, current_package_name, ctx)
 
-        # When the loop is finished, return list of documents ( path) and boolean indicating whether a path was
-        # found or not.
         return matching_documents, ctx.found_path
 
     def __determine_doc_package_name(self, target_function_doc, ctx: _SearchCtx):
@@ -645,7 +691,8 @@ def __determine_doc_package_name(self, target_function_doc, ctx: _SearchCtx):
         return fallback
 
     def __find_initial_function(self, function_name: str, package_name: str, documents: list[Document],
-                                ctx: _SearchCtx, class_name: str = None) -> Document:
+                                ctx: _SearchCtx, class_name: str = None,
+                                subpackage_filter: str | None = None) -> Document:
 
         if self.language_parser.is_search_algo_dfs():
             pkg_docs = documents
@@ -657,6 +704,14 @@ def __find_initial_function(self, function_name: str, package_name: str, documen
             relevant_docs = [doc for doc in relevant_docs if doc.page_content.endswith(
                 f'{self.language_parser.get_comment_line_notation()}(class: {class_name})')]
 
+        if subpackage_filter:
+            pre_count = len(relevant_docs)
+            relevant_docs = [
+                doc for doc in relevant_docs
+                if subpackage_filter in doc.metadata.get("source", "")
+            ]
+            logger.debug("Sub-package filter '%s': %d → %d docs", subpackage_filter, pre_count, len(relevant_docs))
+
         package_exclusions = ctx.exclusions[package_name]
         #for index, document in enumerate(get_functions_for_package(package_name, relevant_docs, language_parser)):
         from itertools import chain