diff --git a/hcl2/rules/base.py b/hcl2/rules/base.py index ce457f45..dacec8b4 100644 --- a/hcl2/rules/base.py +++ b/hcl2/rules/base.py @@ -85,8 +85,8 @@ def serialize( attribute_names.add(child.identifier.serialize(options)) result.update(child.serialize(options)) if options.with_comments: - # collect in-line comments from attribute assignments, expressions etc inline_comments.extend(child.expression.inline_comments()) + comments.extend(child.expression.absorbed_comments()) if isinstance(child, NewLineOrCommentRule) and options.with_comments: child_comments = child.to_list() diff --git a/hcl2/rules/expressions.py b/hcl2/rules/expressions.py index f29eea5a..057e1ffc 100644 --- a/hcl2/rules/expressions.py +++ b/hcl2/rules/expressions.py @@ -242,6 +242,38 @@ def binary_term(self) -> BinaryTermRule: """Return the binary term (operator + right-hand operand).""" return self._children[1] + @property + def _trailing_nl(self) -> Optional[NewLineOrCommentRule]: + """Return the trailing new_line_or_comment child, if present.""" + child = self._children[2] + if isinstance(child, NewLineOrCommentRule): + return child + return None + + def inline_comments(self): + """Collect inline comments, excluding absorbed body-level comments.""" + trailing = self._trailing_nl + result = [] + for child in self._children: + if isinstance(child, NewLineOrCommentRule): + # Trailing NL_OR_COMMENT with a leading newline contains + # body-level comments absorbed by the grammar, not inline ones. + if child is trailing and not child.is_inline: + continue + comments = child.to_list() + if comments is not None: + result.extend(comments) + elif isinstance(child, InlineCommentMixIn): + result.extend(child.inline_comments()) + return result + + def absorbed_comments(self): + """Return body-level comments absorbed into the trailing NL_OR_COMMENT.""" + trailing = self._trailing_nl + if trailing is not None and not trailing.is_inline: + return trailing.to_list() or [] + return [] + def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: diff --git a/hcl2/rules/whitespace.py b/hcl2/rules/whitespace.py index 9488a77b..9cb464f7 100644 --- a/hcl2/rules/whitespace.py +++ b/hcl2/rules/whitespace.py @@ -26,7 +26,17 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: """Serialize to the raw comment/newline string.""" - return self.token.serialize() + return "".join(child.serialize() for child in self._children) + + @property + def is_inline(self) -> bool: + """True if this comment is on the same line as preceding code. + + A raw string starting with ``\\n`` means the comment sits on its own + line (standalone). One starting with ``#``, ``//``, or ``/*`` is + inline — it follows code on the same line. + """ + return not self.serialize().startswith("\n") def to_list( self, options: SerializationOptions = SerializationOptions() @@ -91,3 +101,11 @@ def inline_comments(self): result.extend(child.inline_comments()) return result + + def absorbed_comments(self): + """Return body-level comments absorbed by grammar into this expression. + + Default: empty. ``BinaryOpRule`` overrides this because its trailing + ``new_line_or_comment?`` can swallow the next body-level comment. + """ + return [] diff --git a/test/integration/specialized/comments.json b/test/integration/specialized/comments.json new file mode 100644 index 00000000..5d7e6ef4 --- /dev/null +++ b/test/integration/specialized/comments.json @@ -0,0 +1,57 @@ +{ + "resource": [ + { + "\"aws_instance\"": { + "\"web\"": { + "ami": "\"abc-123\"", + "instance_type": "\"t2.micro\"", + "count": "${1 + 2}", + "tags": { + "Name": "\"web\"", + "Env": "\"prod\"" + }, + "enabled": "true", + "nested": [ + { + "key": "\"value\"", + "__comments__": [ + { + "value": "comment inside nested block" + } + ], + "__is_block__": true + } + ], + "__comments__": [ + { + "value": "standalone comment inside block" + }, + { + "value": "hash standalone comment" + }, + { + "value": "absorbed standalone after binary_op" + }, + { + "value": "multi-line\n block comment" + } + ], + "__inline_comments__": [ + { + "value": "comment inside object" + }, + { + "value": "inline after value" + } + ], + "__is_block__": true + } + } + } + ], + "__comments__": [ + { + "value": "top-level standalone comment" + } + ] +} diff --git a/test/integration/specialized/comments.tf b/test/integration/specialized/comments.tf new file mode 100644 index 00000000..6755f2d3 --- /dev/null +++ b/test/integration/specialized/comments.tf @@ -0,0 +1,28 @@ +// top-level standalone comment +resource "aws_instance" "web" { + ami = "abc-123" + + // standalone comment inside block + instance_type = "t2.micro" + + # hash standalone comment + count = 1 + 2 + # absorbed standalone after binary_op + + tags = { + Name = "web" + # comment inside object + Env = "prod" # inline after value + } + + /* + multi-line + block comment + */ + enabled = true + + nested { + // comment inside nested block + key = "value" + } +} diff --git a/test/integration/test_specialized.py b/test/integration/test_specialized.py index 399b486a..60faf194 100644 --- a/test/integration/test_specialized.py +++ b/test/integration/test_specialized.py @@ -150,6 +150,86 @@ def test_full_round_trip(self): self.assertEqual(reserialized, serialized) +class TestCommentSerialization(TestCase): + """Test that comments are correctly classified during HCL → JSON serialization. + + Covers: + - Standalone comments (// and #) at body level → __comments__ + - Standalone comments absorbed by binary_op grammar → __comments__ + - Comments inside expressions (objects) → __inline_comments__ + - Multi-line block comments → __comments__ + - Comments in nested blocks + - Top-level comments + """ + + maxDiff = None + _OPTIONS = SerializationOptions(with_comments=True) + + def test_comment_classification(self): + hcl_path = SPECIAL_DIR / "comments.tf" + json_path = SPECIAL_DIR / "comments.json" + + actual = _parse_and_serialize(hcl_path.read_text(), options=self._OPTIONS) + expected = json.loads(json_path.read_text()) + + self.assertEqual(actual, expected) + + def test_top_level_comments(self): + actual = _parse_and_serialize("// file header\nx = 1\n", options=self._OPTIONS) + self.assertEqual(actual["__comments__"], [{"value": "file header"}]) + + def test_standalone_in_body(self): + actual = _parse_and_serialize( + 'resource "a" "b" {\n # standalone\n x = 1\n}\n', + options=self._OPTIONS, + ) + block = actual["resource"][0]['"a"']['"b"'] + self.assertEqual(block["__comments__"], [{"value": "standalone"}]) + self.assertNotIn("__inline_comments__", block) + + def test_absorbed_after_binary_op(self): + actual = _parse_and_serialize( + "x {\n a = 1 + 2\n # absorbed\n b = 3\n}\n", + options=self._OPTIONS, + ) + block = actual["x"][0] + self.assertIn({"value": "absorbed"}, block["__comments__"]) + self.assertNotIn("__inline_comments__", block) + + def test_inline_after_binary_op(self): + actual = _parse_and_serialize( + "x {\n a = 1 + 2 # inline\n b = 3\n}\n", + options=self._OPTIONS, + ) + block = actual["x"][0] + self.assertEqual(block["__inline_comments__"], [{"value": "inline"}]) + + def test_comment_inside_object(self): + actual = _parse_and_serialize( + "x {\n m = {\n # inside\n k = 1\n }\n}\n", + options=self._OPTIONS, + ) + block = actual["x"][0] + self.assertEqual(block["__inline_comments__"], [{"value": "inside"}]) + self.assertNotIn("__comments__", block) + + def test_multiline_block_comment(self): + actual = _parse_and_serialize( + "x {\n /*\n multi\n line\n */\n a = 1\n}\n", + options=self._OPTIONS, + ) + block = actual["x"][0] + self.assertEqual(block["__comments__"], [{"value": "multi\n line"}]) + + def test_no_comments_without_option(self): + actual = _parse_and_serialize( + "// comment\nx = 1\n", + options=SerializationOptions(with_comments=False), + ) + self.assertNotIn("__comments__", actual) + self.assertNotIn("__inline_comments__", actual) + + class TestHeredocs(TestCase): """Test heredoc serialization, flattening, restoration, and round-trips.