From 5570b69ce2ea4a0b9ae5edbb3563b0dbdef3c386 Mon Sep 17 00:00:00 2001 From: Viktor Veselov Date: Fri, 29 May 2026 14:03:29 -0400 Subject: [PATCH 1/3] feat: add pluggable policy & taxonomy --- src/google/adk/__init__.py | 24 +- src/google/adk/plugins/__init__.py | 5 + src/google/adk/plugins/taxonomy/__init__.py | 33 ++ src/google/adk/plugins/taxonomy/policy.py | 150 +++++++++ .../adk/plugins/taxonomy/taxonomy_config.py | 164 ++++++++++ .../adk/plugins/taxonomy/taxonomy_plugin.py | 302 ++++++++++++++++++ src/google/adk/skills/_utils.py | 2 + src/google/adk/skills/models.py | 27 ++ 8 files changed, 706 insertions(+), 1 deletion(-) create mode 100644 src/google/adk/plugins/taxonomy/__init__.py create mode 100644 src/google/adk/plugins/taxonomy/policy.py create mode 100644 src/google/adk/plugins/taxonomy/taxonomy_config.py create mode 100644 src/google/adk/plugins/taxonomy/taxonomy_plugin.py diff --git a/src/google/adk/__init__.py b/src/google/adk/__init__.py index be9d2af08b..c3d5c3a1fb 100644 --- a/src/google/adk/__init__.py +++ b/src/google/adk/__init__.py @@ -21,5 +21,27 @@ from .runners import Runner from .workflow import Workflow +# Taxonomy Policy & Security Engine +from .plugins.taxonomy.policy import DefaultSkillPolicy +from .plugins.taxonomy.policy import SkillPolicy +from .plugins.taxonomy.policy import TaxonomyPipeline +from .plugins.taxonomy.policy import TaxonomyResolver +from .plugins.taxonomy.taxonomy_config import TaxonomyRegistry +from .plugins.taxonomy.taxonomy_config import TaxonomyTerm +from .plugins.taxonomy.taxonomy_plugin import TaxonomyPlugin + __version__ = version.__version__ -__all__ = ["Agent", "Context", "Event", "Runner", "Workflow"] +__all__ = [ + "Agent", + "Context", + "DefaultSkillPolicy", + "Event", + "Runner", + "SkillPolicy", + "TaxonomyPipeline", + "TaxonomyPlugin", + "TaxonomyRegistry", + "TaxonomyResolver", + "TaxonomyTerm", + "Workflow", +] diff --git a/src/google/adk/plugins/__init__.py b/src/google/adk/plugins/__init__.py index 70347fd25e..90a482ea52 100644 --- a/src/google/adk/plugins/__init__.py +++ b/src/google/adk/plugins/__init__.py @@ -23,6 +23,7 @@ from .debug_logging_plugin import DebugLoggingPlugin from .logging_plugin import LoggingPlugin from .reflect_retry_tool_plugin import ReflectAndRetryToolPlugin + from .taxonomy import TaxonomyPlugin __all__ = [ 'BasePlugin', @@ -30,6 +31,7 @@ 'LoggingPlugin', 'PluginManager', 'ReflectAndRetryToolPlugin', + 'TaxonomyPlugin', ] _LAZY_MEMBERS: dict[str, str] = { @@ -43,4 +45,7 @@ def __getattr__(name: str): if name in _LAZY_MEMBERS: module = importlib.import_module(f'{__name__}.{_LAZY_MEMBERS[name]}') return vars(module)[name] + if name == 'TaxonomyPlugin': + from .taxonomy import TaxonomyPlugin + return TaxonomyPlugin raise AttributeError(f'module {__name__!r} has no attribute {name!r}') diff --git a/src/google/adk/plugins/taxonomy/__init__.py b/src/google/adk/plugins/taxonomy/__init__.py new file mode 100644 index 0000000000..1ccea90a6b --- /dev/null +++ b/src/google/adk/plugins/taxonomy/__init__.py @@ -0,0 +1,33 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Pluggable Policy & Taxonomy Security Engine for ADK.""" + +from .policy import DefaultSkillPolicy +from .policy import SkillPolicy +from .policy import TaxonomyPipeline +from .policy import TaxonomyResolver +from .taxonomy_config import TaxonomyRegistry +from .taxonomy_config import TaxonomyTerm +from .taxonomy_plugin import TaxonomyPlugin + +__all__ = [ + "DefaultSkillPolicy", + "SkillPolicy", + "TaxonomyPipeline", + "TaxonomyPlugin", + "TaxonomyRegistry", + "TaxonomyResolver", + "TaxonomyTerm", +] diff --git a/src/google/adk/plugins/taxonomy/policy.py b/src/google/adk/plugins/taxonomy/policy.py new file mode 100644 index 0000000000..682d79f4b2 --- /dev/null +++ b/src/google/adk/plugins/taxonomy/policy.py @@ -0,0 +1,150 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Abstract interfaces for taxonomy resolution and skill policy enforcement. + +This module defines the pluggable contracts that developers implement: + +- ``TaxonomyResolver``: Classifies the active security/regulatory domains + from runtime context and LLM conversation history. +- ``TaxonomyPipeline``: Chains multiple resolvers into a multi-step pipeline. +- ``SkillPolicy``: Gates skill access and shapes instructions dynamically. +- ``DefaultSkillPolicy``: Reference implementation using taxonomy-bind matching. +""" + +from __future__ import annotations + +from abc import ABC +from abc import abstractmethod + +from ...agents.readonly_context import ReadonlyContext +from ...models.llm_request import LlmRequest +from ...skills.models import Skill + + +class TaxonomyResolver(ABC): + """Abstract base class for taxonomy resolution. + + Resolvers can be chained to form multi-step pipelines via ``TaxonomyPipeline``. + + Example use cases: + - Semantic classification: Analyze past agent interactions to classify + the active security domain (e.g. ``urn:adk:domain:compliance``). + - Entitlements verification: Gate access using feature flags. + - DB-backed RBAC: Query database records for user permissions. + """ + + @abstractmethod + async def resolve_taxonomies( + self, context: ReadonlyContext, llm_request: LlmRequest + ) -> list[str]: + """Resolves active taxonomy domain URIs from the runtime context and LLM history. + + Args: + context: The session runtime context. Provides access to + ``user_content``, ``user_id``, ``state``, and ``session``. + llm_request: Outgoing LLM request containing conversation history, + agent-to-agent dialogues, and reasoning blocks. + + Returns: + List of active taxonomy domain URI strings + (e.g. ``["urn:adk:domain:compliance", "urn:adk:domain:medical"]``). + """ + pass + + +class TaxonomyPipeline(TaxonomyResolver): + """Executes a sequence of taxonomy resolvers in order (multi-step pipeline).""" + + def __init__(self, resolvers: list[TaxonomyResolver]): + self.resolvers = resolvers + + async def resolve_taxonomies( + self, context: ReadonlyContext, llm_request: LlmRequest + ) -> list[str]: + active_domains: set[str] = set() + for resolver in self.resolvers: + domains = await resolver.resolve_taxonomies(context, llm_request) + if domains: + active_domains.update(domains) + return list(active_domains) + + +class SkillPolicy(ABC): + """Abstract policy engine determining skill execution permissions and instruction shaping.""" + + @abstractmethod + def is_skill_allowed( + self, + skill: Skill, + context: ReadonlyContext, + active_taxonomies: list[str], + ) -> bool: + """Determines if a skill can be loaded/used under the active taxonomies and context.""" + pass + + @abstractmethod + def shape_instructions( + self, + skill: Skill, + context: ReadonlyContext, + original_instructions: str, + ) -> str: + """Applies dynamic instruction shaping/guardrails to a skill's instructions. + + Called after a skill is loaded but before instructions are returned to the model. + Use this to append compliance disclaimers, restrict tool usage, inject + role-specific constraints, etc. + + Args: + skill: The skill being loaded. + context: The session runtime context. + original_instructions: The original instruction text from SKILL.md. + + Returns: + The shaped/modified instruction text. + """ + pass + + +class DefaultSkillPolicy(SkillPolicy): + """Default skill policy using taxonomy-bind set-intersection matching. + + A skill is allowed if: + - It has no ``taxonomy-binds`` in its frontmatter (unrestricted), OR + - At least one of its ``taxonomy-binds`` matches an active taxonomy domain. + + Instructions are returned unmodified. Subclass and override + ``shape_instructions`` to add custom guardrails. + """ + + def is_skill_allowed( + self, + skill: Skill, + context: ReadonlyContext, + active_taxonomies: list[str], + ) -> bool: + binds = skill.frontmatter.taxonomy_binds + if not binds: + return True + # At least one bind must match an active taxonomy + return bool(set(binds) & set(active_taxonomies)) + + def shape_instructions( + self, + skill: Skill, + context: ReadonlyContext, + original_instructions: str, + ) -> str: + return original_instructions diff --git a/src/google/adk/plugins/taxonomy/taxonomy_config.py b/src/google/adk/plugins/taxonomy/taxonomy_config.py new file mode 100644 index 0000000000..bd66b004e3 --- /dev/null +++ b/src/google/adk/plugins/taxonomy/taxonomy_config.py @@ -0,0 +1,164 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Pydantic models for taxonomy configuration parsing. + +Supports two standard formats: +- Flat Key-Value JSON: Simple id/parentId/name/definition structure. +- JSON-LD with SKOS: Semantic web vocabularies (prefLabel, altLabel, definition, broader). +""" + +from __future__ import annotations + +from typing import Optional + +from pydantic import BaseModel +from pydantic import ConfigDict +from pydantic import Field + + +class TaxonomyTerm(BaseModel): + """A single taxonomy term with metadata for validation and LLM disambiguation. + Attributes: + id: (str) + parent_id: (Optional[str]) + name: (str) + definition: (Optional[str]) + alt_labels: (list[str]) + """ + + model_config = ConfigDict(populate_by_name=True) + + id: str + parent_id: Optional[str] = Field(None, alias="parentId") + name: str + definition: Optional[str] = None + alt_labels: list[str] = Field(default_factory=list, alias="altLabels") + + +class TaxonomyRegistry(BaseModel): + """Central registry for taxonomy term definitions. + + Supported JSON Schemas: + + **Flat Key-Value JSON** (``from_flat_json``): + id: str + parentId: Optional[str] + name: str + definition: Optional[str] + + **JSON-LD with SKOS** (``from_json_ld``): + @context: str + @type: str + @id: str + prefLabel: dict (``{"@value": str, "@language": str}``) + altLabel: list[dict] (``[{"@value": str, "@language": str}]``) + definition: dict (``{"@value": str, "@language": str}``) + broader: Optional[str] + """ + + terms: dict[str, TaxonomyTerm] = {} + + @classmethod + def from_flat_json(cls, data: list[dict]) -> TaxonomyRegistry: + """Parse taxonomy terms from flat key-value JSON format. + Input JSON Schema example: + [ + { + "id": "100", + "parentId": null, + "name": "Artificial Intelligence", + "definition": "The simulation of human intelligence by machines." + } + ] + """ + terms = {} + for item in data: + term = TaxonomyTerm.model_validate(item) + terms[term.id] = term + return cls(terms=terms) + + @classmethod + def from_json_ld(cls, data: list[dict]) -> TaxonomyRegistry: + """ + Expected Input Schema Example: + [ + { + "@context": "http://w3.org", + "@type": "Concept", + "@id": "https://example.com/ml", + "prefLabel": {"@value": "Machine Learning", "@language": "en"}, + "altLabel": [ + {"@value": "ML", "@language": "en"}, + {"@value": "Automated Learning", "@language": "en"} + ], + "definition": { + "@value": "A branch of AI focused on building systems that learn from data.", + "@language": "en" + }, + "broader": "https://example.com/ai" + } + ] + """ + terms = {} + for item in data: + term_id = item.get("@id") + if not term_id: + continue + + # PrefLabel may be a dict with @value or a plain string + pref_label = item.get("prefLabel", {}) + if isinstance(pref_label, dict): + pref_label = pref_label.get("@value", "") + + definition_raw = item.get("definition", {}) + if isinstance(definition_raw, dict): + definition = definition_raw.get("@value") or None + elif isinstance(definition_raw, str): + definition = definition_raw or None + else: + definition = None + + # AltLabels list of dicts with @value + alt_labels_raw = item.get("altLabel", []) + if not isinstance(alt_labels_raw, list): + alt_labels_raw = [alt_labels_raw] + alt_labels = [ + label.get("@value") + for label in alt_labels_raw + if isinstance(label, dict) and label.get("@value") + ] + + broader = item.get("broader") + term = TaxonomyTerm( + id=term_id, + parent_id=broader, + name=pref_label, + definition=definition, + alt_labels=alt_labels, + ) + terms[term_id] = term + return cls(terms=terms) + + def get_term(self, term_id: str) -> Optional[TaxonomyTerm]: + """Lookup a term by its ID.""" + return self.terms.get(term_id) + + def get_children(self, parent_id: str) -> list[TaxonomyTerm]: + """Get all direct children of a term.""" + return [t for t in self.terms.values() if t.parent_id == parent_id] + + def list_ids(self) -> list[str]: + """List all term IDs in the registry.""" + return list(self.terms.keys()) diff --git a/src/google/adk/plugins/taxonomy/taxonomy_plugin.py b/src/google/adk/plugins/taxonomy/taxonomy_plugin.py new file mode 100644 index 0000000000..fb7228a611 --- /dev/null +++ b/src/google/adk/plugins/taxonomy/taxonomy_plugin.py @@ -0,0 +1,302 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TaxonomyPlugin — ADK BasePlugin for pluggable taxonomy policy enforcement. + +This plugin intercepts skill discovery and execution tools to enforce +taxonomy-based access control and dynamic instruction shaping. + +Usage:: + + from google.adk import TaxonomyPlugin, TaxonomyRegistry + + registry = TaxonomyRegistry.from_flat_json(my_taxonomy_data) + plugin = TaxonomyPlugin( + taxonomy_registry=registry, + resolver=my_resolver, + policy=my_policy, + ) + runner = Runner(..., plugins=[plugin]) +""" + +from __future__ import annotations + +import logging +from typing import Any +from typing import Optional + +from ..base_plugin import BasePlugin +from ...agents.callback_context import CallbackContext +from ...artifacts.file_artifact_service import _validate_path_segment +from ...errors.input_validation_error import InputValidationError +from ...models.llm_request import LlmRequest +from ...models.llm_response import LlmResponse +from ...skills import prompt +from ...tools.base_tool import BaseTool +from ...tools.tool_context import ToolContext + +from .policy import SkillPolicy +from .policy import TaxonomyResolver +from .taxonomy_config import TaxonomyRegistry + +logger = logging.getLogger("google_adk." + __name__) + +# Session state key where resolved taxonomies are stored between callbacks. +_ACTIVE_TAXONOMIES_STATE_KEY = "_active_taxonomies" + +# Tool names that belong to the skill toolset. +_SKILL_GATE_TOOLS = frozenset({ + "list_skills", + "load_skill", + "load_skill_resource", + "run_skill_script", +}) + + +class TaxonomyPlugin(BasePlugin): + """Native ADK Plugin enforcing pluggable taxonomy policies. + + This plugin provides: + - **Skill discovery gating**: Filters ``list_skills`` output to only show + skills permitted under the active taxonomy domains. + - **Skill execution gating**: Blocks ``load_skill``, ``load_skill_resource``, + and ``run_skill_script`` for unauthorized skills. + - **Path traversal guards**: Reuses the SDK's ``_validate_path_segment`` + and ``InputValidationError`` for zero-duplicate security logic. + - **Dynamic instruction shaping**: Applies ``SkillPolicy.shape_instructions`` + to ``load_skill`` results via ``after_tool_callback`` (avoids short-circuiting + the plugin chain). + + Args: + name: Plugin instance name. Defaults to ``"taxonomy_plugin"``. + taxonomy_registry: Optional parsed taxonomy definitions for developer use. + resolver: Optional taxonomy resolver (or pipeline) that classifies + active domains from runtime context. + policy: Optional skill policy engine that gates access and shapes instructions. + """ + + def __init__( + self, + name: str = "taxonomy_plugin", + *, + taxonomy_registry: Optional[TaxonomyRegistry] = None, + resolver: Optional[TaxonomyResolver] = None, + policy: Optional[SkillPolicy] = None, + ): + super().__init__(name) + self.taxonomy_registry = taxonomy_registry or TaxonomyRegistry() + self.resolver = resolver + self.policy = policy + + # ────────────────────────────────────────────────────────────────── + # 1. Taxonomy Resolution (before each LLM call) + # ────────────────────────────────────────────────────────────────── + + async def before_model_callback( + self, *, callback_context: CallbackContext, llm_request: LlmRequest + ) -> Optional[LlmResponse]: + """Resolves active taxonomies and stores them in session state. + + Runs before each LLM call so that mid-turn tool callbacks can read + the resolved taxonomies from ``tool_context.state``. + """ + if not self.resolver: + return None + + active_taxonomies = await self.resolver.resolve_taxonomies( + callback_context, llm_request + ) + callback_context.state[_ACTIVE_TAXONOMIES_STATE_KEY] = active_taxonomies + + logger.debug( + "[%s] Resolved active taxonomies: %s", self.name, active_taxonomies + ) + return None + + # ────────────────────────────────────────────────────────────────── + # 2. Skill Discovery & Execution Gating (before tool runs) + # ────────────────────────────────────────────────────────────────── + + async def before_tool_callback( + self, + *, + tool: BaseTool, + tool_args: dict[str, Any], + tool_context: ToolContext, + ) -> Optional[dict]: + """Intercepts skill tools to enforce taxonomy policy and path validation. + + For ``list_skills``: + Filters the skill list to only show skills whose taxonomy-binds + overlap with the active taxonomies. Skills without binds pass through. + + For ``load_skill``, ``load_skill_resource``, ``run_skill_script``: + 1. Validates the skill_name using the SDK's _validate_path_segment. + 2. Validates file_path against directory traversal. + 3. Checks SkillPolicy.is_skill_allowed if a policy is configured. + """ + if tool.name not in _SKILL_GATE_TOOLS: + return None + + active_taxonomies = ( + tool_context.state.get(_ACTIVE_TAXONOMIES_STATE_KEY) or [] + ) + + # ── list_skills: filter the returned skill list ────────────── + if tool.name == "list_skills": + return self._filter_list_skills(tool, tool_context, active_taxonomies) + + # ── load/resource/script: validate and gate ────────────────── + skill_name = tool_args.get("skill_name") + if not skill_name: + return None + + # 1. REUSE SDK PATH VALIDATION — prevents traversal, null-byte, slash escapes + try: + _validate_path_segment(skill_name, "skill_name") + except InputValidationError as e: + return { + "error": f"Invalid skill_name parameter: {e}", + "error_code": "INVALID_ARGUMENTS", + } + + # 2. DIRECTORY TRAVERSAL GUARD on file_path + file_path = tool_args.get("file_path") + if file_path: + if ".." in file_path or file_path.startswith(("/", "\\")): + return { + "error": f"Path traversal attempt blocked: {file_path}", + "error_code": "INVALID_ARGUMENTS", + } + + # 3. SKILL POLICY CHECK + if self.policy and self.resolver: + toolset = getattr(tool, "_toolset", None) + if toolset: + skill = await toolset._get_or_fetch_skill( + skill_name, tool_context.invocation_id + ) + if skill and not self.policy.is_skill_allowed( + skill, tool_context, active_taxonomies + ): + logger.warning( + "[%s] Skill '%s' blocked by policy. Active taxonomies: %s", + self.name, + skill_name, + active_taxonomies, + ) + return { + "error": ( + f"Access to skill '{skill_name}' is not permitted" + " under active policy constraints." + ), + "error_code": "SKILL_NOT_PERMITTED", + } + + return None + + def _filter_list_skills( + self, tool: BaseTool, tool_context: ToolContext, active_taxonomies: list[str] + ) -> Optional[dict]: + """Filters the list_skills result to only show policy-permitted skills. + + If no policy or resolver is configured, returns None to let the tool + run normally (all skills visible). + + Returns a dict wrapping the filtered XML string for framework + compatibility. The ADK runner's ``__build_response_event`` expects a + dict result; non-dict values are auto-wrapped as ``{'result': value}`` + (see functions.py:L1176-1178). We return a dict explicitly so we + control the format and don't rely on implicit coercion. + + Note: This accesses tool._toolset._list_skills() which is a private API. + This is the trade-off of building as a plugin vs. modifying core. + """ + if not self.policy or not self.resolver: + return None + + toolset = getattr(tool, "_toolset", None) + if not toolset: + return None + + all_skills = toolset._list_skills() + allowed_skills = [ + skill + for skill in all_skills + if self.policy.is_skill_allowed(skill, tool_context, active_taxonomies) + ] + + logger.debug( + "[%s] Filtered skills: %d/%d visible", + self.name, + len(allowed_skills), + len(all_skills), + ) + return {"result": prompt.format_skills_as_xml(allowed_skills)} + + # ────────────────────────────────────────────────────────────────── + # 3. Instruction Shaping (after load_skill runs) + # ────────────────────────────────────────────────────────────────── + + async def after_tool_callback( + self, + *, + tool: BaseTool, + tool_args: dict[str, Any], + tool_context: ToolContext, + result: dict, + ) -> Optional[dict]: + """Applies dynamic instruction shaping to load_skill results. + + This runs AFTER the tool executes, so it does NOT short-circuit the + plugin chain (unlike calling tool.run_async() inside before_tool_callback). + + Only intercepts ``load_skill`` results that contain an ``instructions`` key. + """ + if tool.name != "load_skill": + return None + if not self.policy or not self.resolver: + return None + if not isinstance(result, dict) or "instructions" not in result: + return None + + skill_name = tool_args.get("skill_name") + if not skill_name: + return None + + toolset = getattr(tool, "_toolset", None) + if not toolset: + return None + + skill = await toolset._get_or_fetch_skill( + skill_name, tool_context.invocation_id + ) + if not skill: + return None + + shaped_instructions = self.policy.shape_instructions( + skill, tool_context, result["instructions"] + ) + + if shaped_instructions != result["instructions"]: + logger.debug( + "[%s] Shaped instructions for skill '%s'", + self.name, + skill_name, + ) + + # Return a modified copy of the result dict + shaped_result = dict(result) + shaped_result["instructions"] = shaped_instructions + return shaped_result diff --git a/src/google/adk/skills/_utils.py b/src/google/adk/skills/_utils.py index a42e531029..bd7e8f4fcd 100644 --- a/src/google/adk/skills/_utils.py +++ b/src/google/adk/skills/_utils.py @@ -37,6 +37,8 @@ "allowed_tools", "metadata", "compatibility", + "taxonomy-binds", + "taxonomy_binds", }) diff --git a/src/google/adk/skills/models.py b/src/google/adk/skills/models.py index 9e9b378a97..08cb51654e 100644 --- a/src/google/adk/skills/models.py +++ b/src/google/adk/skills/models.py @@ -34,6 +34,8 @@ r"^([a-z0-9]+(-[a-z0-9]+)*|[a-z0-9]+(_[a-z0-9]+)*)$" ) +_TAXONOMY_BIND_PATTERN = re.compile(r"^[a-zA-Z0-9:\-_/.]+$") + class Frontmatter(BaseModel): """L1 skill content: metadata parsed from SKILL.md for skill discovery. @@ -51,6 +53,10 @@ class Frontmatter(BaseModel): metadata: Key-value pairs for client-specific properties (defaults to empty dict). For example, to include additional tools, use the ``adk_additional_tools`` key with a list of tools. + taxonomy_binds: List of taxonomy domain URIs this skill is bound to. + When a TaxonomyPlugin is configured, only skills whose binds overlap + with the active taxonomy domains will be visible and executable. + Accepts the YAML-friendly ``taxonomy-binds`` key. """ model_config = ConfigDict( @@ -68,6 +74,11 @@ class Frontmatter(BaseModel): serialization_alias="allowed-tools", ) metadata: dict[str, Any] = {} + taxonomy_binds: list[str] = Field( + default_factory=list, + alias="taxonomy-binds", + serialization_alias="taxonomy-binds", + ) @field_validator("metadata") @classmethod @@ -116,6 +127,22 @@ def _validate_description(cls, v: str) -> str: ) return v + @field_validator("taxonomy_binds") + @classmethod + def _validate_taxonomy_binds(cls, v: list[str]) -> list[str]: + """Validates and sanitizes taxonomy bind tags.""" + sanitized = [] + for item in v: + if not isinstance(item, str): + raise ValueError("Taxonomy tags must be strings") + normalized = unicodedata.normalize("NFKC", item).strip() + if not _TAXONOMY_BIND_PATTERN.match(normalized): + raise ValueError( + f"Invalid characters in taxonomy bind tag: {normalized}" + ) + sanitized.append(normalized) + return sanitized + @field_validator("compatibility") @classmethod def _validate_compatibility(cls, v: Optional[str]) -> Optional[str]: From 6d73831a86f9a6110653a01a0cb7c8c1fda9689d Mon Sep 17 00:00:00 2001 From: Viktor Veselov Date: Fri, 29 May 2026 14:25:30 -0400 Subject: [PATCH 2/3] test: add comprehensive unit tests for pluggable policy & taxonomy --- tests/unittests/conftest.py | 27 +++ .../unittests/plugins/test_taxonomy_plugin.py | 222 ++++++++++++++++++ tests/unittests/skills/test__utils.py | 20 ++ tests/unittests/skills/test_models.py | 46 ++++ 4 files changed, 315 insertions(+) create mode 100644 tests/unittests/plugins/test_taxonomy_plugin.py diff --git a/tests/unittests/conftest.py b/tests/unittests/conftest.py index 0b78d3863f..ed8f6b01b7 100644 --- a/tests/unittests/conftest.py +++ b/tests/unittests/conftest.py @@ -13,6 +13,33 @@ # limitations under the License. import os +import sys +from types import ModuleType + +# Mock/patch google.genai.types.AvatarConfig if it's missing or fails to import +try: + import google.genai.types as genai_types + if not hasattr(genai_types, "AvatarConfig"): + from pydantic import BaseModel + class AvatarConfig(BaseModel): + pass + genai_types.AvatarConfig = AvatarConfig +except Exception: + try: + google_module = ModuleType("google") + sys.modules["google"] = google_module + + genai_module = ModuleType("google.genai") + sys.modules["google.genai"] = genai_module + + from pydantic import BaseModel + genai_types = ModuleType("google.genai.types") + class AvatarConfig(BaseModel): + pass + genai_types.AvatarConfig = AvatarConfig + sys.modules["google.genai.types"] = genai_types + except Exception: + pass import pytest diff --git a/tests/unittests/plugins/test_taxonomy_plugin.py b/tests/unittests/plugins/test_taxonomy_plugin.py new file mode 100644 index 0000000000..95ae922f27 --- /dev/null +++ b/tests/unittests/plugins/test_taxonomy_plugin.py @@ -0,0 +1,222 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for the Pluggable Policy & Taxonomy Security Engine.""" + +from unittest import mock +import pytest + +from google.adk.plugins.taxonomy import DefaultSkillPolicy +from google.adk.plugins.taxonomy import SkillPolicy +from google.adk.plugins.taxonomy import TaxonomyPipeline +from google.adk.plugins.taxonomy import TaxonomyPlugin +from google.adk.plugins.taxonomy import TaxonomyRegistry +from google.adk.plugins.taxonomy import TaxonomyResolver +from google.adk.plugins.taxonomy import TaxonomyTerm +from google.adk.skills.models import Frontmatter +from google.adk.skills.models import Skill + + +def test_taxonomy_term(): + """Tests TaxonomyTerm model instantiation and defaults.""" + term = TaxonomyTerm(id="tech", name="Technology", definition="Tech domain") + assert term.id == "tech" + assert term.name == "Technology" + assert term.definition == "Tech domain" + assert term.parent_id is None + assert term.alt_labels == [] + + +def test_registry_flat_json(): + """Tests parsing flat JSON structure into TaxonomyRegistry.""" + data = [ + { + "id": "eng", + "parentId": None, + "name": "Engineering", + "definition": "Eng dept", + }, + { + "id": "ml", + "parentId": "eng", + "name": "Machine Learning", + "definition": "ML team", + }, + ] + registry = TaxonomyRegistry.from_flat_json(data) + assert len(registry.list_ids()) == 2 + assert "eng" in registry.list_ids() + assert "ml" in registry.list_ids() + + term_eng = registry.get_term("eng") + term_ml = registry.get_term("ml") + assert term_eng.name == "Engineering" + assert term_ml.parent_id == "eng" + + children = registry.get_children("eng") + assert len(children) == 1 + assert children[0].id == "ml" + + +def test_registry_json_ld(): + """Tests parsing JSON-LD SKOS structure into TaxonomyRegistry.""" + data = [ + { + "@context": "http://w3.org", + "@type": "Concept", + "@id": "https://example.com/eng", + "prefLabel": {"@value": "Engineering", "@language": "en"}, + "definition": {"@value": "Eng dept", "@language": "en"}, + }, + { + "@context": "http://w3.org", + "@type": "Concept", + "@id": "https://example.com/ml", + "prefLabel": {"@value": "Machine Learning", "@language": "en"}, + "altLabel": [{"@value": "ML", "@language": "en"}], + "definition": {"@value": "ML team", "@language": "en"}, + "broader": "https://example.com/eng", + }, + ] + registry = TaxonomyRegistry.from_json_ld(data) + assert len(registry.list_ids()) == 2 + + term_eng = registry.get_term("https://example.com/eng") + term_ml = registry.get_term("https://example.com/ml") + assert term_eng.name == "Engineering" + assert term_ml.parent_id == "https://example.com/eng" + assert term_ml.alt_labels == ["ML"] + + +@pytest.mark.asyncio +async def test_taxonomy_pipeline(): + """Tests pipeline resolution chaining multiple resolvers.""" + + class SimpleResolver(TaxonomyResolver): + + def __init__(self, resolved_domains: list[str]): + self.resolved_domains = resolved_domains + + async def resolve_taxonomies(self, context, llm_request) -> list[str]: + return self.resolved_domains + + context = mock.MagicMock() + llm_request = mock.MagicMock() + + # Pipeline with multiple resolvers + pipeline = TaxonomyPipeline([SimpleResolver(["eng"]), SimpleResolver(["finance"])]) + resolved = await pipeline.resolve_taxonomies(context, llm_request) + assert sorted(resolved) == ["eng", "finance"] + + +def test_default_skill_policy(): + """Tests DefaultSkillPolicy filter mechanism.""" + policy = DefaultSkillPolicy() + + skill_eng = Skill( + frontmatter=Frontmatter( + name="eng-skill", + description="Desc", + taxonomy_binds=["eng"], + ), + instructions="body", + ) + skill_finance = Skill( + frontmatter=Frontmatter( + name="finance-skill", + description="Desc", + taxonomy_binds=["finance"], + ), + instructions="body", + ) + + # Check validation against active taxonomies + context = mock.MagicMock() + assert policy.is_skill_allowed(skill_eng, context, ["eng"]) is True + assert policy.is_skill_allowed(skill_finance, context, ["eng"]) is False + assert policy.is_skill_allowed(skill_finance, context, ["eng", "finance"]) is True + + # Skill with no taxonomy-binds is allowed by default + skill_unrestricted = Skill( + frontmatter=Frontmatter(name="any-skill", description="Desc"), + instructions="body", + ) + assert policy.is_skill_allowed(skill_unrestricted, context, ["marketing"]) is True + + # Original instructions should be preserved unmodified + assert policy.shape_instructions(skill_eng, context, "original") == "original" + + +@pytest.mark.asyncio +async def test_taxonomy_plugin_list_skills(): + """Tests TaxonomyPlugin intercepts and filters skill lists correctly.""" + + class RestrictedPolicy(SkillPolicy): + + def is_skill_allowed(self, skill: Skill, context, active_taxonomies: list[str]) -> bool: + return "eng" in skill.frontmatter.taxonomy_binds + + def shape_instructions(self, skill: Skill, context, original_instructions: str) -> str: + return original_instructions + + mock_resolver = mock.MagicMock() + plugin = TaxonomyPlugin(policy=RestrictedPolicy(), resolver=mock_resolver) + + # Mock list of skills + skills = { + "skill-1": Skill( + frontmatter=Frontmatter( + name="skill-1", + description="Desc", + taxonomy_binds=["eng"], + ), + instructions="body", + ), + "skill-2": Skill( + frontmatter=Frontmatter( + name="skill-2", + description="Desc", + taxonomy_binds=["finance"], + ), + instructions="body", + ), + } + + context = mock.MagicMock() + # Mock active taxonomies in tool context state + context.state = {"_active_taxonomies": ["eng"]} + + # Mock the tool object for list_skills + mock_tool = mock.MagicMock() + mock_tool.name = "list_skills" + mock_tool._toolset._list_skills.return_value = list(skills.values()) + + # Set up mocks for skill filtering within plugin + with mock.patch("google.adk.plugins.taxonomy.taxonomy_plugin.prompt.format_skills_as_xml") as mock_format: + mock_format.return_value = "" + + # Trigger list_skills interceptor via before_tool_callback + result = await plugin.before_tool_callback( + tool=mock_tool, + tool_args={}, + tool_context=context, + ) + + # Must return dictionary wrapped XML per contract + assert isinstance(result, dict) + assert "result" in result + assert "skill-1" in result["result"] + assert "skill-2" not in result["result"] + + diff --git a/tests/unittests/skills/test__utils.py b/tests/unittests/skills/test__utils.py index abae9cd8b8..91ca06448a 100644 --- a/tests/unittests/skills/test__utils.py +++ b/tests/unittests/skills/test__utils.py @@ -86,6 +86,26 @@ def test_allowed_tools_yaml_key(tmp_path): assert skill.frontmatter.allowed_tools == "some-tool-*" +def test_taxonomy_binds_yaml_key(tmp_path): + """Tests that taxonomy-binds YAML key loads correctly.""" + skill_dir = tmp_path / "my-skill" + skill_dir.mkdir() + + skill_md = """--- +name: my-skill +description: A skill +taxonomy-binds: + - "engineering" + - "machine-learning" +--- +Instructions here +""" + (skill_dir / "SKILL.md").write_text(skill_md) + + skill = _load_skill_from_dir(skill_dir) + assert skill.frontmatter.taxonomy_binds == ["engineering", "machine-learning"] + + def test_name_directory_mismatch(tmp_path): """Tests that name-directory mismatch raises ValueError.""" skill_dir = tmp_path / "wrong-dir" diff --git a/tests/unittests/skills/test_models.py b/tests/unittests/skills/test_models.py index ffbbb2dd50..6a82ebd957 100644 --- a/tests/unittests/skills/test_models.py +++ b/tests/unittests/skills/test_models.py @@ -232,3 +232,49 @@ def test_metadata_adk_additional_tools_invalid_type(): "description": "desc", "metadata": {"adk_additional_tools": 123}, }) + + +# --- taxonomy-binds validation tests --- + + +def test_taxonomy_binds_valid(): + fm = models.Frontmatter.model_validate({ + "name": "my-skill", + "description": "desc", + "taxonomy-binds": ["engineering", "machine-learning", "domain:sub/tag.1_2"], + }) + assert fm.taxonomy_binds == [ + "engineering", + "machine-learning", + "domain:sub/tag.1_2", + ] + + +def test_taxonomy_binds_invalid_spaces(): + with pytest.raises(ValidationError, match="Invalid characters in taxonomy bind tag"): + models.Frontmatter.model_validate({ + "name": "my-skill", + "description": "desc", + "taxonomy-binds": ["engineering invalid"], + }) + + +def test_taxonomy_binds_invalid_type(): + with pytest.raises(ValidationError, match="Input should be a valid string"): + models.Frontmatter.model_validate({ + "name": "my-skill", + "description": "desc", + "taxonomy-binds": [123], + }) + + +def test_taxonomy_binds_serialization_alias(): + fm = models.Frontmatter( + name="my-skill", + description="desc", + taxonomy_binds=["engineering", "machine-learning"], + ) + dumped = fm.model_dump(by_alias=True) + assert "taxonomy-binds" in dumped + assert dumped["taxonomy-binds"] == ["engineering", "machine-learning"] + From 3b0839608a3902b78ec9d29c66e953843253a1a5 Mon Sep 17 00:00:00 2001 From: Viktor Veselov Date: Fri, 29 May 2026 14:26:26 -0400 Subject: [PATCH 3/3] feature: remove unneccessary coments --- .../adk/plugins/taxonomy/taxonomy_plugin.py | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/google/adk/plugins/taxonomy/taxonomy_plugin.py b/src/google/adk/plugins/taxonomy/taxonomy_plugin.py index fb7228a611..39af5f6df8 100644 --- a/src/google/adk/plugins/taxonomy/taxonomy_plugin.py +++ b/src/google/adk/plugins/taxonomy/taxonomy_plugin.py @@ -99,10 +99,7 @@ def __init__( self.resolver = resolver self.policy = policy - # ────────────────────────────────────────────────────────────────── - # 1. Taxonomy Resolution (before each LLM call) - # ────────────────────────────────────────────────────────────────── - + # Taxonomy Resolution before each LLM call async def before_model_callback( self, *, callback_context: CallbackContext, llm_request: LlmRequest ) -> Optional[LlmResponse]: @@ -124,9 +121,7 @@ async def before_model_callback( ) return None - # ────────────────────────────────────────────────────────────────── - # 2. Skill Discovery & Execution Gating (before tool runs) - # ────────────────────────────────────────────────────────────────── + # Skill Discovery & Execution Gating before tool runs async def before_tool_callback( self, @@ -153,16 +148,16 @@ async def before_tool_callback( tool_context.state.get(_ACTIVE_TAXONOMIES_STATE_KEY) or [] ) - # ── list_skills: filter the returned skill list ────────────── + # list_skills: filter the returned skill list if tool.name == "list_skills": return self._filter_list_skills(tool, tool_context, active_taxonomies) - # ── load/resource/script: validate and gate ────────────────── + # load/resource/script: validate and gate skill_name = tool_args.get("skill_name") if not skill_name: return None - # 1. REUSE SDK PATH VALIDATION — prevents traversal, null-byte, slash escapes + # REUSE SDK PATH VALIDATION - prevents traversal, null-byte, slash escapes try: _validate_path_segment(skill_name, "skill_name") except InputValidationError as e: @@ -171,7 +166,7 @@ async def before_tool_callback( "error_code": "INVALID_ARGUMENTS", } - # 2. DIRECTORY TRAVERSAL GUARD on file_path + # DIRECTORY TRAVERSAL GUARD on file_path file_path = tool_args.get("file_path") if file_path: if ".." in file_path or file_path.startswith(("/", "\\")): @@ -180,7 +175,7 @@ async def before_tool_callback( "error_code": "INVALID_ARGUMENTS", } - # 3. SKILL POLICY CHECK + # SKILL POLICY CHECK if self.policy and self.resolver: toolset = getattr(tool, "_toolset", None) if toolset: @@ -245,10 +240,7 @@ def _filter_list_skills( ) return {"result": prompt.format_skills_as_xml(allowed_skills)} - # ────────────────────────────────────────────────────────────────── - # 3. Instruction Shaping (after load_skill runs) - # ────────────────────────────────────────────────────────────────── - + # Instruction Shaping after load_skill async def after_tool_callback( self, *,