From 0b60ee2e447d92e2e44745c165c01917b8ea07c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Laurenz=20Altenm=C3=BCller?= Date: Mon, 5 Aug 2024 16:42:10 +0200 Subject: [PATCH 1/2] Quote strings that contain `:` --- docs/index.md | 1 + src/yamlfix/adapters.py | 87 +++++++++++++++++++++++++++++- tests/unit/test_services.py | 104 ++++++++++++++++++++++++++++++++++++ 3 files changed, 191 insertions(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index 00aeb96..35d78ab 100644 --- a/docs/index.md +++ b/docs/index.md @@ -87,6 +87,7 @@ variable, use `fix_code`: - item - item ``` +- Quote strings that contain `:` to prevent some parsers from misinterpreting them as mappings. # Configuration diff --git a/src/yamlfix/adapters.py b/src/yamlfix/adapters.py index ac2d0d4..267aa93 100644 --- a/src/yamlfix/adapters.py +++ b/src/yamlfix/adapters.py @@ -2,10 +2,12 @@ import logging import re +from collections.abc import Mapping from functools import partial from io import StringIO -from typing import Any, Callable, List, Match, Optional, Tuple +from typing import Any, Callable, List, Match, NamedTuple, Optional, Tuple +from ruyaml.comments import CommentedMap, CommentedSeq from ruyaml.main import YAML from ruyaml.nodes import MappingNode, Node, ScalarNode, SequenceNode from ruyaml.representer import RoundTripRepresenter @@ -349,6 +351,7 @@ def fix(self, source_code: str) -> str: self._fix_truthy_strings, self._fix_jinja_variables, self._ruamel_yaml_fixer, + self._quote_strings_with_colons, self._restore_truthy_strings, self._restore_jinja_variables, self._restore_double_exclamations, @@ -795,3 +798,85 @@ def _restore_jinja_variables(source_code: str) -> str: fixed_source_lines.append(line) return "\n".join(fixed_source_lines) + + def _quote_strings_with_colons(self, source_code: str) -> str: + """Fix strings with colons to be quoted. + + Example: + volumes: [/root:/mapped] + becomes + volumes: ["/root:/mapped"] + + We do this by + 1. loading the yaml + 2. recursively scanning for strings that + * contain colons + * are not already quoted + * are not multi-line strings (see note). + 3. Adding quotes at the string start and end locations in the source_code. + + Note: Multi-line strings are not supported because ruyaml only provides the + start location of a scalar, but not the end location. For single-line strings + you can calculate the end location by adding the length of the string to the + start, but for strings broken over multiple lines this is not straightforward. + """ + log.debug("Fixing unquoted strings with colons...") + + class ToFix(NamedTuple): + """Where to insert quotes.""" + + line: int + start: int + end: int + + positions_to_quote = set() + lines = source_code.splitlines() + + def add(item: str, line: int, col: int) -> None: + is_quoted = lines[line][col] in ['"', "'"] + if ":" in item and not is_quoted: + to_fix = ToFix( + line=line, + start=col, + end=col + len(item), + ) + if to_fix.end <= len(lines[to_fix.line]): + positions_to_quote.add(to_fix) + else: + log.debug("Skipping %r which is multi-line", item) + + def check(value: CommentedSeq | Mapping[str, Any]) -> None: + if isinstance(value, CommentedSeq): + for i, item in enumerate(value): + if isinstance(item, str): + line, col = value.lc.item(i) + add(item, line, col) + else: + check(item) + elif isinstance(value, CommentedMap): + for key, item in value.items(): + if isinstance(item, str): + try: + line, col = value.lc.value(key) + except (KeyError, TypeError): + # May not be available if merged from an anchor + pass + else: + add(item, line, col) + else: + check(item) + + source_dicts = self.yaml.load_all(source_code) + for source_dict in source_dicts: + check(source_dict) + + for to_fix in sorted(positions_to_quote, reverse=True): + lines[to_fix.line] = (self.config.quote_representation or "'").join( + [ + lines[to_fix.line][: to_fix.start], # noqa: E203: black disagrees + lines[to_fix.line][to_fix.start : to_fix.end], # noqa: E203 + lines[to_fix.line][to_fix.end :], # noqa: E203 + ] + ) + + return "\n".join(lines) diff --git a/tests/unit/test_services.py b/tests/unit/test_services.py index ca23f1b..f185c9f 100644 --- a/tests/unit/test_services.py +++ b/tests/unit/test_services.py @@ -497,6 +497,7 @@ def test_fix_code_functions_emit_debug_logs( "Fixing comments...", "Fixing top level lists...", "Fixing flow-style lists...", + "Fixing unquoted strings with colons...", } assert set(caplog.messages) == expected_logs for record in caplog.records: @@ -1034,3 +1035,106 @@ def test_fix_code_fix_whitelines( result = fix_code(source_code=source, config=config) assert result == desired_source + + @pytest.mark.parametrize( + ("source", "config", "desired_source"), + [ + ( + "volumes: [/root:/mapped, a:b, 'c:d']", + YamlfixConfig(sequence_style=YamlNodeStyle.FLOW_STYLE), + dedent( + """\ + --- + volumes: ['/root:/mapped', 'a:b', 'c:d'] + """ + ), + ), + ( + dedent( + """\ + volumes: + - /root:/mapped + - a:b + - 'c:d' + """ + ), + YamlfixConfig(sequence_style=YamlNodeStyle.BLOCK_STYLE), + dedent( + """\ + --- + volumes: + - '/root:/mapped' + - 'a:b' + - 'c:d' + """ + ), + ), + ( + dedent( + """\ + test: + - "this one:\ + is ok" + - fix this:one + - | + multiline strings: + are not supported yet + - >- + multiline strings: + are not supported yet + """ + ), + YamlfixConfig(sequence_style=YamlNodeStyle.BLOCK_STYLE), + dedent( + """\ + --- + test: + - 'this one:\ + is ok' + - 'fix this:one' + - | + multiline strings: + are not supported yet + - >- + multiline strings: + are not supported yet + """ + ), + ), + ( + dedent( + """\ + merge0: &anchor + host: host.docker.internal:host-gateway + merge1: + <<: *anchor + merge2: + <<: *anchor + """ + ), + None, + dedent( + """\ + --- + merge0: &anchor + host: 'host.docker.internal:host-gateway' + merge1: + <<: *anchor + merge2: + <<: *anchor + """ + ), + ), + ], + ) + def test_strings_with_colons_are_quoted( + self, source: str, config: Optional[YamlfixConfig], desired_source: str + ) -> None: + """ + Given: Code with a string containing `:` + When: fix_code is run + Then: The string is quoted and not turned into a mapping + """ + result = fix_code(source, config=config) + + assert result == desired_source From 238c47d953d6d51d9c512b4f384816794a6a9731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Laurenz=20Altenm=C3=BCller?= Date: Mon, 5 Aug 2024 23:36:24 +0200 Subject: [PATCH 2/2] Quote jobs.*.containers.volumes entries in GitHub Actions workflow yamls --- docs/index.md | 2 +- src/yamlfix/adapters.py | 102 ++++++------------------------------ tests/unit/test_services.py | 101 +++++++++++++---------------------- 3 files changed, 54 insertions(+), 151 deletions(-) diff --git a/docs/index.md b/docs/index.md index 35d78ab..79d157d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -87,7 +87,7 @@ variable, use `fix_code`: - item - item ``` -- Quote strings that contain `:` to prevent some parsers from misinterpreting them as mappings. +- Quote jobs.*.containers.volumes entries in GitHub Actions workflow yamls to not confuse GitHubs Yaml parser. # Configuration diff --git a/src/yamlfix/adapters.py b/src/yamlfix/adapters.py index 267aa93..9955ff7 100644 --- a/src/yamlfix/adapters.py +++ b/src/yamlfix/adapters.py @@ -2,15 +2,15 @@ import logging import re -from collections.abc import Mapping from functools import partial from io import StringIO -from typing import Any, Callable, List, Match, NamedTuple, Optional, Tuple +from typing import Any, Callable, List, Match, Optional, Tuple from ruyaml.comments import CommentedMap, CommentedSeq from ruyaml.main import YAML from ruyaml.nodes import MappingNode, Node, ScalarNode, SequenceNode from ruyaml.representer import RoundTripRepresenter +from ruyaml.scalarstring import DoubleQuotedScalarString, SingleQuotedScalarString from ruyaml.tokens import CommentToken from yamlfix.model import YamlfixConfig, YamlNodeStyle @@ -351,7 +351,6 @@ def fix(self, source_code: str) -> str: self._fix_truthy_strings, self._fix_jinja_variables, self._ruamel_yaml_fixer, - self._quote_strings_with_colons, self._restore_truthy_strings, self._restore_jinja_variables, self._restore_double_exclamations, @@ -382,12 +381,27 @@ def _ruamel_yaml_fixer(self, source_code: str) -> str: # Return the output to a string string_stream = StringIO() for source_dict in source_dicts: + self._quote_gha_container_volumes(source_dict) self.yaml.dump(source_dict, string_stream) source_code = string_stream.getvalue() string_stream.close() return source_code.strip() + def _quote_gha_container_volumes( + self, source_dict: CommentedMap | CommentedSeq + ) -> None: + """Quote jobs.*.container.volumes entries.""" + if not isinstance(source_dict, CommentedMap): + return + scalar_type = SingleQuotedScalarString + if self.config.quote_representation == '"': + scalar_type = DoubleQuotedScalarString + for job in source_dict.get("jobs", {}).values(): + if volumes := job.get("container", {}).get("volumes", []): + for i, _ in enumerate(volumes): + volumes[i] = scalar_type(volumes[i]) + @staticmethod def _fix_top_level_lists(source_code: str) -> str: """Deindent the source with a top level list. @@ -798,85 +812,3 @@ def _restore_jinja_variables(source_code: str) -> str: fixed_source_lines.append(line) return "\n".join(fixed_source_lines) - - def _quote_strings_with_colons(self, source_code: str) -> str: - """Fix strings with colons to be quoted. - - Example: - volumes: [/root:/mapped] - becomes - volumes: ["/root:/mapped"] - - We do this by - 1. loading the yaml - 2. recursively scanning for strings that - * contain colons - * are not already quoted - * are not multi-line strings (see note). - 3. Adding quotes at the string start and end locations in the source_code. - - Note: Multi-line strings are not supported because ruyaml only provides the - start location of a scalar, but not the end location. For single-line strings - you can calculate the end location by adding the length of the string to the - start, but for strings broken over multiple lines this is not straightforward. - """ - log.debug("Fixing unquoted strings with colons...") - - class ToFix(NamedTuple): - """Where to insert quotes.""" - - line: int - start: int - end: int - - positions_to_quote = set() - lines = source_code.splitlines() - - def add(item: str, line: int, col: int) -> None: - is_quoted = lines[line][col] in ['"', "'"] - if ":" in item and not is_quoted: - to_fix = ToFix( - line=line, - start=col, - end=col + len(item), - ) - if to_fix.end <= len(lines[to_fix.line]): - positions_to_quote.add(to_fix) - else: - log.debug("Skipping %r which is multi-line", item) - - def check(value: CommentedSeq | Mapping[str, Any]) -> None: - if isinstance(value, CommentedSeq): - for i, item in enumerate(value): - if isinstance(item, str): - line, col = value.lc.item(i) - add(item, line, col) - else: - check(item) - elif isinstance(value, CommentedMap): - for key, item in value.items(): - if isinstance(item, str): - try: - line, col = value.lc.value(key) - except (KeyError, TypeError): - # May not be available if merged from an anchor - pass - else: - add(item, line, col) - else: - check(item) - - source_dicts = self.yaml.load_all(source_code) - for source_dict in source_dicts: - check(source_dict) - - for to_fix in sorted(positions_to_quote, reverse=True): - lines[to_fix.line] = (self.config.quote_representation or "'").join( - [ - lines[to_fix.line][: to_fix.start], # noqa: E203: black disagrees - lines[to_fix.line][to_fix.start : to_fix.end], # noqa: E203 - lines[to_fix.line][to_fix.end :], # noqa: E203 - ] - ) - - return "\n".join(lines) diff --git a/tests/unit/test_services.py b/tests/unit/test_services.py index f185c9f..cd1cfd6 100644 --- a/tests/unit/test_services.py +++ b/tests/unit/test_services.py @@ -497,7 +497,6 @@ def test_fix_code_functions_emit_debug_logs( "Fixing comments...", "Fixing top level lists...", "Fixing flow-style lists...", - "Fixing unquoted strings with colons...", } assert set(caplog.messages) == expected_logs for record in caplog.records: @@ -1040,88 +1039,60 @@ def test_fix_code_fix_whitelines( ("source", "config", "desired_source"), [ ( - "volumes: [/root:/mapped, a:b, 'c:d']", - YamlfixConfig(sequence_style=YamlNodeStyle.FLOW_STYLE), dedent( """\ --- - volumes: ['/root:/mapped', 'a:b', 'c:d'] - """ - ), - ), - ( - dedent( - """\ - volumes: - - /root:/mapped - - a:b - - 'c:d' + jobs: + test: + container: + volumes: + - /data:/data + - a:b # commented + - 'c:d' + - >- + multi: + line """ ), - YamlfixConfig(sequence_style=YamlNodeStyle.BLOCK_STYLE), + YamlfixConfig(sequence_style=YamlNodeStyle.FLOW_STYLE), dedent( """\ --- - volumes: - - '/root:/mapped' - - 'a:b' - - 'c:d' + jobs: + test: + container: + volumes: + - '/data:/data' + - 'a:b' # commented + - 'c:d' + - 'multi: line' """ ), ), ( - dedent( - """\ - test: - - "this one:\ - is ok" - - fix this:one - - | - multiline strings: - are not supported yet - - >- - multiline strings: - are not supported yet - """ - ), - YamlfixConfig(sequence_style=YamlNodeStyle.BLOCK_STYLE), dedent( """\ --- - test: - - 'this one:\ - is ok' - - 'fix this:one' - - | - multiline strings: - are not supported yet - - >- - multiline strings: - are not supported yet - """ - ), - ), - ( - dedent( - """\ - merge0: &anchor - host: host.docker.internal:host-gateway - merge1: - <<: *anchor - merge2: - <<: *anchor + jobs: + test2: + container: + volumes: + - /data:/data + - a:b + - 'c:d' + - >- + multi: + line """ ), - None, + YamlfixConfig(sequence_style=YamlNodeStyle.FLOW_STYLE), dedent( """\ --- - merge0: &anchor - host: 'host.docker.internal:host-gateway' - merge1: - <<: *anchor - merge2: - <<: *anchor + jobs: + test2: + container: + volumes: ['/data:/data', 'a:b', 'c:d', 'multi: line'] """ ), ), @@ -1131,9 +1102,9 @@ def test_strings_with_colons_are_quoted( self, source: str, config: Optional[YamlfixConfig], desired_source: str ) -> None: """ - Given: Code with a string containing `:` + Given: A GitHub Action workflow yaml containing jobs.*.containers.volumes When: fix_code is run - Then: The string is quoted and not turned into a mapping + Then: The volumes entries are quoted """ result = fix_code(source, config=config)