From 737dcb8e0875c4bccbbf3a59f3ead36f4a38d8f3 Mon Sep 17 00:00:00 2001 From: haya-asif Date: Thu, 18 Jun 2026 16:32:49 +0500 Subject: [PATCH] fix(sdk): redact plugin and extension source credentials on serialization (#3752) --- .../conversation/impl/local_conversation.py | 8 ++- .../sdk/extensions/installation/info.py | 8 ++- openhands-sdk/openhands/sdk/plugin/types.py | 31 +++++++--- .../test_local_conversation_plugins.py | 34 +++++++++++ .../installation/test_installation_info.py | 15 +++++ .../test_installation_metadata.py | 25 ++++++++ tests/sdk/git/test_url_redaction.py | 57 +++++++++++++++++-- 7 files changed, 159 insertions(+), 19 deletions(-) diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 9936e5ef68..825984f4cd 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -78,6 +78,7 @@ from openhands.sdk.tool.client_tool import ClientToolSpec from openhands.sdk.tool.schema import Action, Observation from openhands.sdk.utils.cipher import Cipher +from openhands.sdk.utils.redact import redact_url_credentials from openhands.sdk.workspace import LocalWorkspace @@ -680,15 +681,16 @@ def _expand_secret_refs(value: str) -> str: # Store resolved ref for persistence. Build this from the # ORIGINAL spec (not the expanded values) so the persisted # record keeps the ${VAR} placeholder rather than the raw - # secret. from_plugin_source() additionally redacts any inline - # credentials. Resume re-fetches via the resolved commit SHA. + # secret. The source field serializer redacts any inline + # credentials on dump. Resume re-fetches via the resolved SHA. resolved = ResolvedPluginSource.from_plugin_source(spec, resolved_ref) self._resolved_plugins.append(resolved) # Load the plugin plugin = Plugin.load(path) logger.debug( - f"Loaded plugin '{plugin.manifest.name}' from {spec.source}" + f"Loaded plugin '{plugin.manifest.name}' from " + f"{redact_url_credentials(spec.source)}" + (f" @ {resolved_ref[:8]}" if resolved_ref else "") ) diff --git a/openhands-sdk/openhands/sdk/extensions/installation/info.py b/openhands-sdk/openhands/sdk/extensions/installation/info.py index d1ef16ae60..b583bd809d 100644 --- a/openhands-sdk/openhands/sdk/extensions/installation/info.py +++ b/openhands-sdk/openhands/sdk/extensions/installation/info.py @@ -3,9 +3,10 @@ from datetime import UTC, datetime from pathlib import Path -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, field_serializer from openhands.sdk.extensions.installation.interface import ExtensionProtocol +from openhands.sdk.utils.redact import redact_url_credentials class InstallationInfo(BaseModel): @@ -36,6 +37,11 @@ class InstallationInfo(BaseModel): ) install_path: Path = Field(description="Path where the extension is installed") + @field_serializer("source") + def _redact_source(self, source: str) -> str: + """Redact inline URL credentials on dump (kept raw in memory).""" + return redact_url_credentials(source) + @staticmethod def from_extension( extension: ExtensionProtocol, diff --git a/openhands-sdk/openhands/sdk/plugin/types.py b/openhands-sdk/openhands/sdk/plugin/types.py index 48fd4a6228..883941dbbc 100644 --- a/openhands-sdk/openhands/sdk/plugin/types.py +++ b/openhands-sdk/openhands/sdk/plugin/types.py @@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any import frontmatter -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel, Field, field_serializer, field_validator from openhands.sdk.utils.path import to_posix_path from openhands.sdk.utils.redact import redact_url_credentials @@ -64,6 +64,11 @@ def validate_repo_path(cls, v: str | None) -> str | None: ) return v + @field_serializer("source") + def _redact_source(self, source: str) -> str: + """Redact inline URL credentials on dump (kept raw in memory for fetch).""" + return redact_url_credentials(source) + @property def source_url(self) -> str | None: """Convert the plugin source to a canonical URL. @@ -119,10 +124,12 @@ class ResolvedPluginSource(BaseModel): branches are updated between pause and resume. Security Note: - The source URL is redacted when created via from_plugin_source() to - prevent credential exposure in persisted state. Any credentials in - the original URL (e.g., https://oauth2:TOKEN@host) are replaced with - "****" (e.g., https://****@host). This is safe because: + A field serializer redacts the source URL whenever this model is + dumped (``model_dump`` / ``model_dump_json``), so credentials never + reach persisted state or logs. Any credentials in the original URL + (e.g., https://oauth2:TOKEN@host) are replaced with "****" (e.g., + https://****@host). The raw value is kept in memory. This is safe + because: 1. The plugin is fetched and cached BEFORE this object is created 2. The resolved_ref (commit SHA) uniquely identifies the exact version 3. Resume operations can re-fetch using the SHA from the local cache @@ -151,18 +158,24 @@ class ResolvedPluginSource(BaseModel): description="Original ref from PluginSource (for debugging/display)", ) + @field_serializer("source") + def _redact_source(self, source: str) -> str: + """Redact inline URL credentials on dump (kept raw in memory for fetch).""" + return redact_url_credentials(source) + @classmethod def from_plugin_source( cls, plugin_source: PluginSource, resolved_ref: str | None ) -> ResolvedPluginSource: """Create a ResolvedPluginSource from a PluginSource and resolved ref. - The source URL is automatically redacted to prevent credential exposure - in persisted state. This is safe because the plugin should already be - fetched and cached before creating the ResolvedPluginSource. + The source URL is kept verbatim in memory; the field serializer redacts + any embedded credentials when this model is dumped to persisted state. + This is safe because the plugin should already be fetched and cached + before creating the ResolvedPluginSource. """ return cls( - source=redact_url_credentials(plugin_source.source), + source=plugin_source.source, resolved_ref=resolved_ref, repo_path=plugin_source.repo_path, original_ref=plugin_source.ref, diff --git a/tests/sdk/conversation/test_local_conversation_plugins.py b/tests/sdk/conversation/test_local_conversation_plugins.py index 5c2be91d89..41cf9b99ac 100644 --- a/tests/sdk/conversation/test_local_conversation_plugins.py +++ b/tests/sdk/conversation/test_local_conversation_plugins.py @@ -1,6 +1,7 @@ """Tests for plugin loading via LocalConversation and Conversation factory.""" import json +import logging from pathlib import Path from unittest.mock import MagicMock, patch @@ -749,3 +750,36 @@ def fake_fetch(source, ref=None, repo_path=None, **kwargs): assert captured["ref"] == "v1.2.3" conversation.close() + + def test_plugin_load_log_redacts_credentials( + self, tmp_path: Path, basic_agent, caplog + ): + """The plugin-load debug log must not leak inline URL credentials.""" + source = "https://oauth2:LITERAL_SECRET@host.example.com/org/repo.git" + conversation, plugin_dir = self._make_conversation( + tmp_path, basic_agent, source + ) + + def fake_fetch(source, ref=None, repo_path=None, **kwargs): + return plugin_dir, "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef" + + with patch( + "openhands.sdk.conversation.impl.local_conversation." + "fetch_plugin_with_resolution", + side_effect=fake_fetch, + ): + with caplog.at_level( + logging.DEBUG, + logger="openhands.sdk.conversation.impl.local_conversation", + ): + conversation._ensure_plugins_loaded() + + load_logs = [ + r.getMessage() for r in caplog.records if "Loaded plugin" in r.getMessage() + ] + assert load_logs, "expected a 'Loaded plugin' debug log line" + joined = "\n".join(load_logs) + assert "LITERAL_SECRET" not in joined + assert "****" in joined + + conversation.close() diff --git a/tests/sdk/extensions/installation/test_installation_info.py b/tests/sdk/extensions/installation/test_installation_info.py index c334c51e3c..cb78cdfde9 100644 --- a/tests/sdk/extensions/installation/test_installation_info.py +++ b/tests/sdk/extensions/installation/test_installation_info.py @@ -34,3 +34,18 @@ def test_installation_info_from_extension(): assert info.repo_path is None assert datetime.fromisoformat(info.installed_at) + + +def test_source_credentials_redacted_on_dump(): + """InstallationInfo keeps the raw source in memory but redacts it on dump.""" + cred = "https://oauth2:SUPER_SECRET@github.com/org/repo.git" + info = InstallationInfo(name="x", source=cred, install_path=Path("/tmp/x")) + + # Raw value retained in memory. + assert info.source == cred + + # Redacted at the serialization boundary (model_dump / model_dump_json). + dumped = info.model_dump_json() + assert "SUPER_SECRET" not in dumped + assert "****" in dumped + assert info.model_dump()["source"] == "https://****@github.com/org/repo.git" diff --git a/tests/sdk/extensions/installation/test_installation_metadata.py b/tests/sdk/extensions/installation/test_installation_metadata.py index bf9d662192..568757537d 100644 --- a/tests/sdk/extensions/installation/test_installation_metadata.py +++ b/tests/sdk/extensions/installation/test_installation_metadata.py @@ -226,6 +226,31 @@ def test_load_from_dir_and_save_to_dir(tmp_path: Path): assert metadata == loaded_metadata +def test_credentials_redacted_in_persisted_metadata(tmp_path: Path): + """Credential-bearing sources must never be written to .installed.json.""" + installation_dir = tmp_path / "installed" + installation_dir.mkdir() + + cred = "https://oauth2:SUPER_SECRET@github.com/org/repo.git" + info = InstallationInfo( + name="private-ext", + source=cred, + install_path=installation_dir / "private-ext", + ) + metadata = InstallationMetadata(extensions={"private-ext": info}) + metadata.save_to_dir(installation_dir) + + on_disk = InstallationMetadata.get_metadata_path(installation_dir).read_text() + assert "SUPER_SECRET" not in on_disk + assert "****" in on_disk + + # Reloaded metadata carries the redacted source (credentials gone for good). + loaded = InstallationMetadata.load_from_dir(installation_dir) + assert loaded.extensions["private-ext"].source == ( + "https://****@github.com/org/repo.git" + ) + + def test_load_from_dir_invalid_json(tmp_path: Path): """Test loading invalid JSON returns empty metadata.""" installation_dir = tmp_path / "installed" diff --git a/tests/sdk/git/test_url_redaction.py b/tests/sdk/git/test_url_redaction.py index bd0b8357f6..5539945e7a 100644 --- a/tests/sdk/git/test_url_redaction.py +++ b/tests/sdk/git/test_url_redaction.py @@ -4,6 +4,7 @@ from unittest.mock import patch import pytest +from pydantic import BaseModel from openhands.sdk.git.exceptions import GitCommandError from openhands.sdk.git.utils import ( @@ -118,8 +119,8 @@ def test_special_characters_in_token(self): class TestResolvedPluginSourceCredentialRedaction: """Tests for credential redaction in ResolvedPluginSource.""" - def test_from_plugin_source_redacts_credentials(self): - """Should redact credentials when creating from PluginSource.""" + def test_from_plugin_source_keeps_raw_source_redacts_on_dump(self): + """Source is kept raw in memory; credentials are redacted on dump.""" plugin_source = PluginSource( source="https://oauth2:SECRET_TOKEN@gitlab.com/org/repo.git", ref="main", @@ -127,8 +128,11 @@ def test_from_plugin_source_redacts_credentials(self): resolved = ResolvedPluginSource.from_plugin_source( plugin_source, resolved_ref="abc123def456" ) - assert resolved.source == "https://****@gitlab.com/org/repo.git" - assert "SECRET_TOKEN" not in resolved.source + # Raw value retained in memory (needed to re-fetch/clone). + assert resolved.source == "https://oauth2:SECRET_TOKEN@gitlab.com/org/repo.git" + # ...but redacted at the serialization boundary. + assert "SECRET_TOKEN" not in resolved.model_dump_json() + assert resolved.model_dump()["source"] == "https://****@gitlab.com/org/repo.git" assert resolved.resolved_ref == "abc123def456" assert resolved.original_ref == "main" @@ -154,7 +158,7 @@ def test_from_plugin_source_preserves_local_path(self): assert resolved.resolved_ref is None def test_from_plugin_source_preserves_repo_path(self): - """Should preserve repo_path when redacting credentials.""" + """Should preserve repo_path; credentials redacted on dump.""" plugin_source = PluginSource( source="https://token@github.com/org/monorepo.git", ref="main", @@ -163,8 +167,11 @@ def test_from_plugin_source_preserves_repo_path(self): resolved = ResolvedPluginSource.from_plugin_source( plugin_source, resolved_ref="abc123" ) - assert resolved.source == "https://****@github.com/org/monorepo.git" + assert resolved.source == "https://token@github.com/org/monorepo.git" assert resolved.repo_path == "plugins/my-plugin" + assert resolved.model_dump()["source"] == ( + "https://****@github.com/org/monorepo.git" + ) def test_to_plugin_source_uses_redacted_url(self): """When converting back to PluginSource, should use the redacted URL.""" @@ -193,6 +200,44 @@ def test_serialization_does_not_expose_credentials(self): assert "****" in json_str +class TestPluginSourceCredentialRedaction: + """PluginSource redacts inline URL credentials at the serialization boundary.""" + + CRED = "https://oauth2:SUPER_SECRET@gitlab.com/org/repo.git" + REDACTED = "https://****@gitlab.com/org/repo.git" + + def test_model_dump_json_redacts_credentials(self): + dumped = PluginSource(source=self.CRED).model_dump_json() + assert "SUPER_SECRET" not in dumped + assert "****" in dumped + + def test_model_dump_python_redacts_credentials(self): + assert PluginSource(source=self.CRED).model_dump()["source"] == self.REDACTED + + def test_raw_source_kept_in_memory_for_fetch(self): + # The attribute keeps the real URL so the plugin can still be cloned. + assert PluginSource(source=self.CRED).source == self.CRED + + def test_nested_serialization_redacts(self): + class Wrapper(BaseModel): + plugin: PluginSource + + wrapper = Wrapper(plugin=PluginSource(source=self.CRED)) + assert "SUPER_SECRET" not in wrapper.model_dump_json() + + @pytest.mark.parametrize( + "source", + [ + "github:owner/repo", + "/path/to/local/plugin", + "git@github.com:owner/repo.git", + "https://github.com/owner/repo.git", + ], + ) + def test_non_credential_sources_unchanged(self, source): + assert PluginSource(source=source).model_dump()["source"] == source + + class TestRedactUrlCredentialsCentralModule: """Verify redact_url_credentials is accessible from the central redact module."""