diff --git a/cli/hq.py b/cli/hq.py index c67c4417..7b0532ca 100755 --- a/cli/hq.py +++ b/cli/hq.py @@ -667,12 +667,6 @@ def main(): # pylint: disable=too-many-branches,too-many-statements serialization_options = None if args.with_comments: serialization_options = SerializationOptions(with_comments=True) - print( - "Warning: --with-comments only includes comments for top-level body " - "queries (e.g. 'resource[*]' on a single file). Comments adjacent to " - "individual blocks are not yet captured by sub-block queries.", - file=sys.stderr, - ) # --schema: dump schema and exit if args.schema: diff --git a/hcl2/query/attributes.py b/hcl2/query/attributes.py index bf795a21..567bb037 100644 --- a/hcl2/query/attributes.py +++ b/hcl2/query/attributes.py @@ -1,15 +1,25 @@ """AttributeView facade.""" -from typing import Any +from typing import Any, List, Optional from hcl2.query._base import NodeView, register_view, view_for +from hcl2.rules.abstract import LarkElement from hcl2.rules.base import AttributeRule +from hcl2.utils import SerializationOptions @register_view(AttributeRule) class AttributeView(NodeView): """View over an HCL2 attribute (AttributeRule).""" + def __init__( + self, + node: LarkElement, + adjacent_comments: Optional[List[dict]] = None, + ): + super().__init__(node) + self._adjacent_comments = adjacent_comments + @property def name(self) -> str: """Return the attribute name as a plain string.""" @@ -27,3 +37,15 @@ def value_node(self) -> "NodeView": """Return a view over the expression node.""" node: AttributeRule = self._node # type: ignore[assignment] return view_for(node.expression) + + def to_dict(self, options: Optional[SerializationOptions] = None) -> Any: + """Serialize, merging adjacent comments from the parent body.""" + result = super().to_dict(options=options) + if ( + self._adjacent_comments + and options is not None + and options.with_comments + and isinstance(result, dict) + ): + result["__comments__"] = self._adjacent_comments + return result diff --git a/hcl2/query/blocks.py b/hcl2/query/blocks.py index 1c14a0f8..2a5fd6cf 100644 --- a/hcl2/query/blocks.py +++ b/hcl2/query/blocks.py @@ -1,11 +1,14 @@ """BlockView facade.""" -from typing import List, Optional +from typing import Any, List, Optional +from hcl2.const import COMMENTS_KEY from hcl2.query._base import NodeView, register_view +from hcl2.rules.abstract import LarkElement from hcl2.rules.base import BlockRule from hcl2.rules.literal_rules import IdentifierRule from hcl2.rules.strings import StringRule +from hcl2.utils import SerializationOptions def _label_to_str(label) -> str: @@ -25,6 +28,14 @@ def _label_to_str(label) -> str: class BlockView(NodeView): """View over an HCL2 block (BlockRule).""" + def __init__( + self, + node: LarkElement, + adjacent_comments: Optional[List[dict]] = None, + ): + super().__init__(node) + self._adjacent_comments = adjacent_comments + @property def block_type(self) -> str: """Return the block type (first label) as a plain string.""" @@ -50,6 +61,21 @@ def body(self) -> "NodeView": node: BlockRule = self._node # type: ignore[assignment] return BodyView(node.body) + def to_dict(self, options: Optional[SerializationOptions] = None) -> Any: + """Serialize, merging adjacent comments from the parent body.""" + result = super().to_dict(options=options) + if ( + self._adjacent_comments + and options is not None + and options.with_comments + and isinstance(result, dict) + ): + # Place adjacent comments at the outer level of the block dict, + # alongside the label keys — not drilled into the body dict. + existing = result.get(COMMENTS_KEY, []) + result[COMMENTS_KEY] = self._adjacent_comments + existing + return result + def blocks( self, block_type: Optional[str] = None, *labels: str ) -> List["NodeView"]: diff --git a/hcl2/query/body.py b/hcl2/query/body.py index fdb6aa42..a3bce89e 100644 --- a/hcl2/query/body.py +++ b/hcl2/query/body.py @@ -4,6 +4,32 @@ from hcl2.query._base import NodeView, register_view from hcl2.rules.base import AttributeRule, BlockRule, BodyRule, StartRule +from hcl2.rules.whitespace import NewLineOrCommentRule + + +def _collect_leading_comments(body: BodyRule, child_index: int) -> List[dict]: + """Collect comments from NewLineOrCommentRule siblings preceding *child_index*. + + Walks backward through ``body.children`` from ``child_index - 1``, + collecting comment dicts via ``to_list()``, stopping at the first + ``BlockRule`` or ``AttributeRule`` (the previous semantic sibling) or + the start of the children list. + """ + chunks: List[List[dict]] = [] + for i in range(child_index - 1, -1, -1): + sibling = body.children[i] + if isinstance(sibling, (BlockRule, AttributeRule)): + break + if isinstance(sibling, NewLineOrCommentRule): + comments = sibling.to_list() + if comments: + chunks.append(comments) + # Reverse node order (walked backward) but keep each node's comments in order + chunks.reverse() + result: List[dict] = [] + for chunk in chunks: + result.extend(chunk) + return result @register_view(StartRule) @@ -63,7 +89,8 @@ def blocks( for child in node.children: if not isinstance(child, BlockRule): continue - block_view = BlockView(child) + adjacent = _collect_leading_comments(node, child.index) or None + block_view = BlockView(child, adjacent_comments=adjacent) if block_type is not None and block_view.block_type != block_type: continue if labels: @@ -84,7 +111,8 @@ def attributes(self, name: Optional[str] = None) -> List["NodeView"]: for child in node.children: if not isinstance(child, AttributeRule): continue - attr_view = AttributeView(child) + adjacent = _collect_leading_comments(node, child.index) or None + attr_view = AttributeView(child, adjacent_comments=adjacent) if name is not None and attr_view.name != name: continue results.append(attr_view) diff --git a/hcl2/rules/base.py b/hcl2/rules/base.py index ce457f45..dacec8b4 100644 --- a/hcl2/rules/base.py +++ b/hcl2/rules/base.py @@ -85,8 +85,8 @@ def serialize( attribute_names.add(child.identifier.serialize(options)) result.update(child.serialize(options)) if options.with_comments: - # collect in-line comments from attribute assignments, expressions etc inline_comments.extend(child.expression.inline_comments()) + comments.extend(child.expression.absorbed_comments()) if isinstance(child, NewLineOrCommentRule) and options.with_comments: child_comments = child.to_list() diff --git a/hcl2/rules/expressions.py b/hcl2/rules/expressions.py index f29eea5a..057e1ffc 100644 --- a/hcl2/rules/expressions.py +++ b/hcl2/rules/expressions.py @@ -242,6 +242,38 @@ def binary_term(self) -> BinaryTermRule: """Return the binary term (operator + right-hand operand).""" return self._children[1] + @property + def _trailing_nl(self) -> Optional[NewLineOrCommentRule]: + """Return the trailing new_line_or_comment child, if present.""" + child = self._children[2] + if isinstance(child, NewLineOrCommentRule): + return child + return None + + def inline_comments(self): + """Collect inline comments, excluding absorbed body-level comments.""" + trailing = self._trailing_nl + result = [] + for child in self._children: + if isinstance(child, NewLineOrCommentRule): + # Trailing NL_OR_COMMENT with a leading newline contains + # body-level comments absorbed by the grammar, not inline ones. + if child is trailing and not child.is_inline: + continue + comments = child.to_list() + if comments is not None: + result.extend(comments) + elif isinstance(child, InlineCommentMixIn): + result.extend(child.inline_comments()) + return result + + def absorbed_comments(self): + """Return body-level comments absorbed into the trailing NL_OR_COMMENT.""" + trailing = self._trailing_nl + if trailing is not None and not trailing.is_inline: + return trailing.to_list() or [] + return [] + def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: diff --git a/hcl2/rules/whitespace.py b/hcl2/rules/whitespace.py index 9488a77b..9cb464f7 100644 --- a/hcl2/rules/whitespace.py +++ b/hcl2/rules/whitespace.py @@ -26,7 +26,17 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: """Serialize to the raw comment/newline string.""" - return self.token.serialize() + return "".join(child.serialize() for child in self._children) + + @property + def is_inline(self) -> bool: + """True if this comment is on the same line as preceding code. + + A raw string starting with ``\\n`` means the comment sits on its own + line (standalone). One starting with ``#``, ``//``, or ``/*`` is + inline — it follows code on the same line. + """ + return not self.serialize().startswith("\n") def to_list( self, options: SerializationOptions = SerializationOptions() @@ -91,3 +101,11 @@ def inline_comments(self): result.extend(child.inline_comments()) return result + + def absorbed_comments(self): + """Return body-level comments absorbed by grammar into this expression. + + Default: empty. ``BinaryOpRule`` overrides this because its trailing + ``new_line_or_comment?`` can swallow the next body-level comment. + """ + return [] diff --git a/test/integration/specialized/comments.json b/test/integration/specialized/comments.json new file mode 100644 index 00000000..5d7e6ef4 --- /dev/null +++ b/test/integration/specialized/comments.json @@ -0,0 +1,57 @@ +{ + "resource": [ + { + "\"aws_instance\"": { + "\"web\"": { + "ami": "\"abc-123\"", + "instance_type": "\"t2.micro\"", + "count": "${1 + 2}", + "tags": { + "Name": "\"web\"", + "Env": "\"prod\"" + }, + "enabled": "true", + "nested": [ + { + "key": "\"value\"", + "__comments__": [ + { + "value": "comment inside nested block" + } + ], + "__is_block__": true + } + ], + "__comments__": [ + { + "value": "standalone comment inside block" + }, + { + "value": "hash standalone comment" + }, + { + "value": "absorbed standalone after binary_op" + }, + { + "value": "multi-line\n block comment" + } + ], + "__inline_comments__": [ + { + "value": "comment inside object" + }, + { + "value": "inline after value" + } + ], + "__is_block__": true + } + } + } + ], + "__comments__": [ + { + "value": "top-level standalone comment" + } + ] +} diff --git a/test/integration/specialized/comments.tf b/test/integration/specialized/comments.tf new file mode 100644 index 00000000..6755f2d3 --- /dev/null +++ b/test/integration/specialized/comments.tf @@ -0,0 +1,28 @@ +// top-level standalone comment +resource "aws_instance" "web" { + ami = "abc-123" + + // standalone comment inside block + instance_type = "t2.micro" + + # hash standalone comment + count = 1 + 2 + # absorbed standalone after binary_op + + tags = { + Name = "web" + # comment inside object + Env = "prod" # inline after value + } + + /* + multi-line + block comment + */ + enabled = true + + nested { + // comment inside nested block + key = "value" + } +} diff --git a/test/integration/test_specialized.py b/test/integration/test_specialized.py index 399b486a..60faf194 100644 --- a/test/integration/test_specialized.py +++ b/test/integration/test_specialized.py @@ -150,6 +150,86 @@ def test_full_round_trip(self): self.assertEqual(reserialized, serialized) +class TestCommentSerialization(TestCase): + """Test that comments are correctly classified during HCL → JSON serialization. + + Covers: + - Standalone comments (// and #) at body level → __comments__ + - Standalone comments absorbed by binary_op grammar → __comments__ + - Comments inside expressions (objects) → __inline_comments__ + - Multi-line block comments → __comments__ + - Comments in nested blocks + - Top-level comments + """ + + maxDiff = None + _OPTIONS = SerializationOptions(with_comments=True) + + def test_comment_classification(self): + hcl_path = SPECIAL_DIR / "comments.tf" + json_path = SPECIAL_DIR / "comments.json" + + actual = _parse_and_serialize(hcl_path.read_text(), options=self._OPTIONS) + expected = json.loads(json_path.read_text()) + + self.assertEqual(actual, expected) + + def test_top_level_comments(self): + actual = _parse_and_serialize("// file header\nx = 1\n", options=self._OPTIONS) + self.assertEqual(actual["__comments__"], [{"value": "file header"}]) + + def test_standalone_in_body(self): + actual = _parse_and_serialize( + 'resource "a" "b" {\n # standalone\n x = 1\n}\n', + options=self._OPTIONS, + ) + block = actual["resource"][0]['"a"']['"b"'] + self.assertEqual(block["__comments__"], [{"value": "standalone"}]) + self.assertNotIn("__inline_comments__", block) + + def test_absorbed_after_binary_op(self): + actual = _parse_and_serialize( + "x {\n a = 1 + 2\n # absorbed\n b = 3\n}\n", + options=self._OPTIONS, + ) + block = actual["x"][0] + self.assertIn({"value": "absorbed"}, block["__comments__"]) + self.assertNotIn("__inline_comments__", block) + + def test_inline_after_binary_op(self): + actual = _parse_and_serialize( + "x {\n a = 1 + 2 # inline\n b = 3\n}\n", + options=self._OPTIONS, + ) + block = actual["x"][0] + self.assertEqual(block["__inline_comments__"], [{"value": "inline"}]) + + def test_comment_inside_object(self): + actual = _parse_and_serialize( + "x {\n m = {\n # inside\n k = 1\n }\n}\n", + options=self._OPTIONS, + ) + block = actual["x"][0] + self.assertEqual(block["__inline_comments__"], [{"value": "inside"}]) + self.assertNotIn("__comments__", block) + + def test_multiline_block_comment(self): + actual = _parse_and_serialize( + "x {\n /*\n multi\n line\n */\n a = 1\n}\n", + options=self._OPTIONS, + ) + block = actual["x"][0] + self.assertEqual(block["__comments__"], [{"value": "multi\n line"}]) + + def test_no_comments_without_option(self): + actual = _parse_and_serialize( + "// comment\nx = 1\n", + options=SerializationOptions(with_comments=False), + ) + self.assertNotIn("__comments__", actual) + self.assertNotIn("__inline_comments__", actual) + + class TestHeredocs(TestCase): """Test heredoc serialization, flattening, restoration, and round-trips. diff --git a/test/unit/query/test_attributes.py b/test/unit/query/test_attributes.py index be09030e..07144719 100644 --- a/test/unit/query/test_attributes.py +++ b/test/unit/query/test_attributes.py @@ -2,6 +2,7 @@ from unittest import TestCase from hcl2.query.body import DocumentView +from hcl2.utils import SerializationOptions class TestAttributeView(TestCase): @@ -38,3 +39,27 @@ def test_to_dict(self): attr = doc.attribute("x") result = attr.to_dict() self.assertEqual(result, {"x": 42}) + + +class TestAttributeViewAdjacentComments(TestCase): + """Tests for adjacent comment merging in AttributeView.to_dict().""" + + _OPTS = SerializationOptions(with_comments=True) + + def test_adjacent_comment(self): + doc = DocumentView.parse("# about x\nx = 1\n") + attr = doc.body.attributes("x")[0] + result = attr.to_dict(options=self._OPTS) + self.assertEqual(result["__comments__"], [{"value": "about x"}]) + + def test_no_comments_without_option(self): + doc = DocumentView.parse("# about x\nx = 1\n") + attr = doc.body.attributes("x")[0] + result = attr.to_dict() + self.assertNotIn("__comments__", result) + + def test_no_adjacent_comments(self): + doc = DocumentView.parse("x = 1\n") + attr = doc.body.attributes("x")[0] + result = attr.to_dict(options=self._OPTS) + self.assertNotIn("__comments__", result) diff --git a/test/unit/query/test_blocks.py b/test/unit/query/test_blocks.py index ca365046..80f3a7ac 100644 --- a/test/unit/query/test_blocks.py +++ b/test/unit/query/test_blocks.py @@ -2,6 +2,7 @@ from unittest import TestCase from hcl2.query.body import DocumentView +from hcl2.utils import SerializationOptions class TestBlockView(TestCase): @@ -65,3 +66,59 @@ def test_attributes_filtered(self): attrs = block.attributes("a") self.assertEqual(len(attrs), 1) self.assertEqual(attrs[0].name, "a") + + +class TestBlockViewAdjacentComments(TestCase): + """Tests for adjacent comment merging in BlockView.to_dict().""" + + _OPTS = SerializationOptions(with_comments=True) + + def test_adjacent_comments_at_outer_level(self): + doc = DocumentView.parse( + '# about resource\nresource "type" "name" {\n x = 1\n}\n' + ) + block = doc.blocks("resource")[0] + result = block.to_dict(options=self._OPTS) + # Adjacent comments go at outer level, alongside the label key + self.assertEqual(result["__comments__"], [{"value": "about resource"}]) + self.assertNotIn("__comments__", result['"type"']['"name"']) + + def test_adjacent_separate_from_inner_comments(self): + doc = DocumentView.parse( + '# adjacent\nresource "type" "name" {\n # inner\n x = 1\n}\n' + ) + block = doc.blocks("resource")[0] + result = block.to_dict(options=self._OPTS) + # Adjacent at outer level + self.assertEqual(result["__comments__"], [{"value": "adjacent"}]) + # Inner stays in body dict under __comments__ + body = result['"type"']['"name"'] + self.assertEqual(body["__comments__"], [{"value": "inner"}]) + + def test_no_comments_without_option(self): + doc = DocumentView.parse('# about\nresource "type" "name" {}\n') + block = doc.blocks("resource")[0] + result = block.to_dict() + self.assertNotIn("__comments__", result) + + def test_no_labels_block_merges_adjacent_and_inner(self): + doc = DocumentView.parse("# about locals\nlocals {\n # inner\n x = 1\n}\n") + block = doc.blocks("locals")[0] + result = block.to_dict(options=self._OPTS) + # No name labels -> body dict IS the top level, so they merge + self.assertEqual( + result["__comments__"], + [{"value": "about locals"}, {"value": "inner"}], + ) + + def test_single_label_block(self): + doc = DocumentView.parse('# about var\nvariable "name" {\n default = 1\n}\n') + block = doc.blocks("variable")[0] + result = block.to_dict(options=self._OPTS) + self.assertEqual(result["__comments__"], [{"value": "about var"}]) + + def test_no_adjacent_comments(self): + doc = DocumentView.parse('resource "type" "name" {\n x = 1\n}\n') + block = doc.blocks("resource")[0] + result = block.to_dict(options=self._OPTS) + self.assertNotIn("__comments__", result) diff --git a/test/unit/query/test_body.py b/test/unit/query/test_body.py index 00467e82..0a8b75bf 100644 --- a/test/unit/query/test_body.py +++ b/test/unit/query/test_body.py @@ -1,7 +1,7 @@ # pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase -from hcl2.query.body import DocumentView, BodyView +from hcl2.query.body import DocumentView, BodyView, _collect_leading_comments class TestDocumentView(TestCase): @@ -94,3 +94,82 @@ def test_attributes(self): body = doc.body attrs = body.attributes() self.assertEqual(len(attrs), 2) + + +class TestCollectLeadingComments(TestCase): + """Tests for _collect_leading_comments helper.""" + + def _body(self, hcl: str): + doc = DocumentView.parse(hcl) + return doc.body.raw # BodyRule + + def test_comment_before_block(self): + body = self._body('# about resource\nresource "a" "b" {}\n') + # Find the BlockRule child + from hcl2.rules.base import BlockRule + + for child in body.children: + if isinstance(child, BlockRule): + result = _collect_leading_comments(body, child.index) + self.assertEqual(result, [{"value": "about resource"}]) + return + self.fail("No BlockRule found") + + def test_comment_before_attribute(self): + body = self._body("# about x\nx = 1\n") + from hcl2.rules.base import AttributeRule + + for child in body.children: + if isinstance(child, AttributeRule): + result = _collect_leading_comments(body, child.index) + self.assertEqual(result, [{"value": "about x"}]) + return + self.fail("No AttributeRule found") + + def test_stops_at_previous_semantic_sibling(self): + body = self._body("x = 1\n# about y\ny = 2\n") + from hcl2.rules.base import AttributeRule + + attrs = [c for c in body.children if isinstance(c, AttributeRule)] + # First attribute (x) — comment before it is empty (only bare newlines) + result_x = _collect_leading_comments(body, attrs[0].index) + self.assertEqual(result_x, []) + # Second attribute (y) — has "about y" above it + result_y = _collect_leading_comments(body, attrs[1].index) + self.assertEqual(result_y, [{"value": "about y"}]) + + def test_bare_newlines_not_collected(self): + body = self._body("\n\nx = 1\n") + from hcl2.rules.base import AttributeRule + + for child in body.children: + if isinstance(child, AttributeRule): + result = _collect_leading_comments(body, child.index) + self.assertEqual(result, []) + return + self.fail("No AttributeRule found") + + def test_multiple_comments_in_order(self): + body = self._body("# first\n# second\nx = 1\n") + from hcl2.rules.base import AttributeRule + + for child in body.children: + if isinstance(child, AttributeRule): + result = _collect_leading_comments(body, child.index) + self.assertEqual(result, [{"value": "first"}, {"value": "second"}]) + return + self.fail("No AttributeRule found") + + def test_comment_between_two_blocks(self): + body = self._body('resource "a" "b" {}\n# about variable\nvariable "c" {}\n') + from hcl2.rules.base import BlockRule + + blocks = [c for c in body.children if isinstance(c, BlockRule)] + self.assertEqual(len(blocks), 2) + # First block: no leading comments + self.assertEqual(_collect_leading_comments(body, blocks[0].index), []) + # Second block: "about variable" + self.assertEqual( + _collect_leading_comments(body, blocks[1].index), + [{"value": "about variable"}], + )