diff --git a/CHANGELOG.md b/CHANGELOG.md index 3852caeb..11e91752 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## \[Unreleased\] -- Nothing yet. +- Issue parsing interpolations and escaped interpolations in a single string. ([#239](https://github.com/amplify-education/python-hcl2/pull/239)) ## \[7.2.1\] - 2025-05-16 diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark index 56dd104a..78ba3ca6 100644 --- a/hcl2/hcl2.lark +++ b/hcl2/hcl2.lark @@ -1,7 +1,7 @@ start : body body : (new_line_or_comment? (attribute | block))* new_line_or_comment? attribute : identifier EQ expression -block : identifier (identifier | STRING_LIT | string_with_interpolation)* new_line_or_comment? "{" body "}" +block : identifier (identifier | string)* new_line_or_comment? "{" body "}" new_line_or_comment: ( NL_OR_COMMENT )+ NL_OR_COMMENT: /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ @@ -44,8 +44,7 @@ COLON : ":" expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR | float_lit | int_lit - | STRING_LIT - | string_with_interpolation + | string | tuple | object | function_call @@ -60,11 +59,13 @@ expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR | for_tuple_expr | for_object_expr -STRING_LIT : "\"" STRING_CHARS? "\"" -STRING_CHARS : /(?:(?!\${)([^"\\]|\\.|\$\$))+/ // any character except '"', including escaped $$ -string_with_interpolation: "\"" (STRING_CHARS)* interpolation_maybe_nested (STRING_CHARS | interpolation_maybe_nested)* "\"" -interpolation_maybe_nested: "${" expression "}" - +string: "\"" string_part* "\"" +string_part: STRING_CHARS + | ESCAPED_INTERPOLATION + | interpolation +interpolation: "${" expression "}" +ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ +STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ int_lit : NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+ !float_lit: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)? @@ -77,7 +78,7 @@ EQ : /[ \t]*=(?!=|>)/ tuple : "[" (new_line_or_comment* expression new_line_or_comment* ",")* (new_line_or_comment* expression)? new_line_or_comment* "]" object : "{" new_line_or_comment? (new_line_or_comment* (object_elem | (object_elem COMMA)) new_line_or_comment*)* "}" object_elem : object_elem_key ( EQ | COLON ) expression -object_elem_key : float_lit | int_lit | identifier | STRING_LIT | object_elem_key_dot_accessor | object_elem_key_expression | string_with_interpolation +object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression object_elem_key_expression : LPAR expression RPAR object_elem_key_dot_accessor : identifier (DOT identifier)+ diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index faf0ee89..16fc843b 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -1,7 +1,6 @@ """A reconstructor for HCL2 implemented using Lark's experimental reconstruction functionality""" import re -import json from typing import List, Dict, Callable, Optional, Union, Any, Tuple from lark import Lark, Tree @@ -137,7 +136,7 @@ def _is_equals_sign(self, terminal) -> bool: ) # pylint: disable=too-many-branches, too-many-return-statements - def _should_add_space(self, rule, current_terminal): + def _should_add_space(self, rule, current_terminal, is_block_label: bool = False): """ This method documents the situations in which we add space around certain tokens while reconstructing the generated HCL. @@ -155,6 +154,7 @@ def _should_add_space(self, rule, current_terminal): This should be sufficient to make a spacing decision. """ + # we don't need to add multiple spaces if self._last_char_space: return False @@ -166,6 +166,14 @@ def _should_add_space(self, rule, current_terminal): if self._is_equals_sign(current_terminal): return True + if is_block_label and isinstance(rule, Token) and rule.value == "string": + if ( + current_terminal == self._last_terminal == Terminal("DBLQUOTE") + or current_terminal == Terminal("DBLQUOTE") + and self._last_terminal == Terminal("NAME") + ): + return True + # if we're in a ternary or binary operator, add space around the operator if ( isinstance(rule, Token) @@ -235,7 +243,7 @@ def _should_add_space(self, rule, current_terminal): return True # always add space between string literals - if current_terminal == Terminal("STRING_LIT"): + if current_terminal == Terminal("STRING_CHARS"): return True # if we just opened a block, add a space, unless the block is empty @@ -257,7 +265,7 @@ def _should_add_space(self, rule, current_terminal): # preceded by a space if they're following a comma in a tuple or # function arg if current_terminal in [ - Terminal("STRING_LIT"), + Terminal("DBLQUOTE"), Terminal("DECIMAL"), Terminal("NAME"), Terminal("NEGATIVE_DECIMAL"), @@ -267,13 +275,15 @@ def _should_add_space(self, rule, current_terminal): # the catch-all case, we're not sure, so don't add a space return False - def _reconstruct(self, tree): + def _reconstruct(self, tree, is_block_label=False): unreduced_tree = self.match_tree(tree, tree.data) res = self.write_tokens.transform(unreduced_tree) for item in res: # any time we encounter a child tree, we recurse if isinstance(item, Tree): - yield from self._reconstruct(item) + yield from self._reconstruct( + item, (unreduced_tree.data == "block" and item.data != "body") + ) # every leaf should be a tuple, which contains information about # which terminal the leaf represents @@ -309,7 +319,7 @@ def _reconstruct(self, tree): self._deferred_item = None # potentially add a space before the next token - if self._should_add_space(rule, terminal): + if self._should_add_space(rule, terminal, is_block_label): yield " " self._last_char_space = True @@ -353,21 +363,21 @@ def _name_to_identifier(name: str) -> Tree: @staticmethod def _escape_interpolated_str(interp_s: str) -> str: - if interp_s.strip().startswith('<<-') or interp_s.strip().startswith('<<'): + if interp_s.strip().startswith("<<-") or interp_s.strip().startswith("<<"): # For heredoc strings, preserve their format exactly return reverse_quotes_within_interpolation(interp_s) # Escape backslashes first (very important to do this first) - escaped = interp_s.replace('\\', '\\\\') + escaped = interp_s.replace("\\", "\\\\") # Escape quotes escaped = escaped.replace('"', '\\"') # Escape control characters - escaped = escaped.replace('\n', '\\n') - escaped = escaped.replace('\r', '\\r') - escaped = escaped.replace('\t', '\\t') - escaped = escaped.replace('\b', '\\b') - escaped = escaped.replace('\f', '\\f') + escaped = escaped.replace("\n", "\\n") + escaped = escaped.replace("\r", "\\r") + escaped = escaped.replace("\t", "\\t") + escaped = escaped.replace("\b", "\\b") + escaped = escaped.replace("\f", "\\f") # find each interpolation within the string and remove the backslashes - interp_s = reverse_quotes_within_interpolation(f'"{escaped}"') + interp_s = reverse_quotes_within_interpolation(f"{escaped}") return interp_s @staticmethod @@ -420,6 +430,48 @@ def _newline(self, level: int, count: int = 1) -> Tree: [Token("NL_OR_COMMENT", f"\n{' ' * level}") for _ in range(count)], ) + def _build_string_rule(self, string: str, level: int = 0) -> Tree: + # grammar in hcl2.lark defines that a string is built of any number of string parts, + # each string part can be either interpolation expression, escaped interpolation string + # or regular string + # this method build hcl2 string rule based on arbitrary string, + # splitting such string into individual parts and building a lark tree out of them + # + result = [] + + pattern = re.compile(r"(\${1,2}\{(?:[^{}]|\{[^{}]*})*})") + parts = re.split(pattern, string) + # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}'] + + if parts[-1] == "": + parts.pop() + if len(parts) > 0 and parts[0] == "": + parts.pop(0) + + for part in parts: + if part.startswith("$${") and part.endswith("}"): + result.append(Token("ESCAPED_INTERPOLATION", part)) + + # unwrap interpolation expression and recurse into it + elif part.startswith("${") and part.endswith("}"): + part = part[2:-1] + if part.startswith('"') and part.endswith('"'): + part = part[1:-1] + part = self._transform_value_to_expr_term(part, level) + else: + part = Tree( + Token("RULE", "expr_term"), + [Tree(Token("RULE", "identifier"), [Token("NAME", part)])], + ) + + result.append(Tree(Token("RULE", "interpolation"), [part])) + + else: + result.append(Token("STRING_CHARS", part)) + + result = [Tree(Token("RULE", "string_part"), [element]) for element in result] + return Tree(Token("RULE", "string"), result) + def _is_block(self, value: Any) -> bool: if isinstance(value, dict): block_body = value @@ -485,8 +537,8 @@ def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> Tree: block_labels, block_body_dict = self._calculate_block_labels( block_v ) - block_label_tokens = [ - Token("STRING_LIT", f'"{block_label}"') + block_label_trees = [ + self._build_string_rule(block_label, level) for block_label in block_labels ] block_body = self._transform_dict_to_body( @@ -496,7 +548,7 @@ def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> Tree: # create our actual block to add to our own body block = Tree( Token("RULE", "block"), - [identifier_name] + block_label_tokens + [block_body], + [identifier_name] + block_label_trees + [block_body], ) children.append(block) # add empty line after block @@ -675,10 +727,10 @@ def _transform_value_to_expr_term(self, value, level) -> Union[Token, Tree]: parsed_value = attribute.children[2] return parsed_value - # otherwise it's just a string. + # otherwise it's a string return Tree( Token("RULE", "expr_term"), - [Token("STRING_LIT", self._escape_interpolated_str(value))], + [self._build_string_rule(self._escape_interpolated_str(value), level)], ) # otherwise, we don't know the type diff --git a/hcl2/transformer.py b/hcl2/transformer.py index 6f13f9c4..382092d6 100644 --- a/hcl2/transformer.py +++ b/hcl2/transformer.py @@ -247,7 +247,8 @@ def heredoc_template(self, args: List) -> str: raise RuntimeError(f"Invalid Heredoc token: {args[0]}") trim_chars = "\n\t " - return f'"{match.group(2).rstrip(trim_chars)}"' + result = match.group(2).rstrip(trim_chars) + return f'"{result}"' def heredoc_template_trim(self, args: List) -> str: # See https://github.com/hashicorp/hcl2/blob/master/hcl/hclsyntax/spec.md#template-expressions @@ -297,12 +298,17 @@ def for_object_expr(self, args: List) -> str: # e.g. f"{2 + 2} {{2 + 2}}" == "4 {2 + 2}" return f"{{{for_expr}}}" - def string_with_interpolation(self, args: List) -> str: - return '"' + ("".join(args)) + '"' + def string(self, args: List) -> str: + return '"' + "".join(args) + '"' - def interpolation_maybe_nested(self, args: List) -> str: - # return "".join(args) - return "${" + ("".join(args)) + "}" + def string_part(self, args: List) -> str: + value = self.to_tf_inline(args[0]) + if value.startswith('"') and value.endswith('"'): + value = value[1:-1] + return value + + def interpolation(self, args: List) -> str: + return '"${' + str(args[0]) + '}"' def strip_new_line_tokens(self, args: List) -> List: """ diff --git a/test/helpers/terraform-config-json/string_interpolations.json b/test/helpers/terraform-config-json/string_interpolations.json index 136920e6..885baf89 100644 --- a/test/helpers/terraform-config-json/string_interpolations.json +++ b/test/helpers/terraform-config-json/string_interpolations.json @@ -1 +1,13 @@ -{"locals": [{"simple_interpolation": "prefix:${var.foo}-suffix", "embedded_interpolation": "(long substring without interpolation); ${module.special_constants.aws_accounts[\"aaa-${local.foo}-${local.bar}\"]}/us-west-2/key_foo", "deeply_nested_interpolation": "prefix1-${\"prefix2-${\"prefix3-${local.foo}\"}\"}", "escaped_interpolation": "prefix:$${aws:username}-suffix"}]} +{ + "locals": [ + { + "simple_interpolation": "prefix:${var.foo}-suffix", + "embedded_interpolation": "(long substring without interpolation); ${module.special_constants.aws_accounts[\"aaa-${local.foo}-${local.bar}\"]}/us-west-2/key_foo", + "deeply_nested_interpolation": "prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}", + "escaped_interpolation": "prefix:$${aws:username}-suffix", + "simple_and_escaped": "${\"bar\"}$${baz:bat}", + "simple_and_escaped_reversed": "$${baz:bat}${\"bar\"}", + "nested_escaped": "bar-${\"$${baz:bat}\"}" + } + ] +} diff --git a/test/helpers/terraform-config/string_interpolations.tf b/test/helpers/terraform-config/string_interpolations.tf index 3b6ddea9..582b4aac 100644 --- a/test/helpers/terraform-config/string_interpolations.tf +++ b/test/helpers/terraform-config/string_interpolations.tf @@ -1,6 +1,9 @@ locals { simple_interpolation = "prefix:${var.foo}-suffix" embedded_interpolation = "(long substring without interpolation); ${module.special_constants.aws_accounts["aaa-${local.foo}-${local.bar}"]}/us-west-2/key_foo" - deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-${local.foo}"}"}" + deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" escaped_interpolation = "prefix:$${aws:username}-suffix" + simple_and_escaped = "${"bar"}$${baz:bat}" + simple_and_escaped_reversed = "$${baz:bat}${"bar"}" + nested_escaped = "bar-${"$${baz:bat}"}" } diff --git a/test/unit/test_builder.py b/test/unit/test_builder.py index 9c913505..2ce0cfed 100644 --- a/test/unit/test_builder.py +++ b/test/unit/test_builder.py @@ -73,8 +73,11 @@ def test_locals_embedded_interpolation_tf(self): "simple_interpolation": "prefix:${var.foo}-suffix", "embedded_interpolation": "(long substring without interpolation); " '${module.special_constants.aws_accounts["aaa-${local.foo}-${local.bar}"]}/us-west-2/key_foo', - "deeply_nested_interpolation": 'prefix1-${"prefix2-${"prefix3-${local.foo}"}"}', + "deeply_nested_interpolation": 'prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}', "escaped_interpolation": "prefix:$${aws:username}-suffix", + "simple_and_escaped": '${"bar"}$${baz:bat}', + "simple_and_escaped_reversed": '$${baz:bat}${"bar"}', + "nested_escaped": 'bar-${"$${baz:bat}"}', } builder.block("locals", **attributes)