Skip to content

Commit c3a8e3c

Browse files
authored
Strip markdown HTML comments within paragraph (#1698)
Standalone HTML comments is converted to `RDoc::Markup::Raw`. However, HTML comments in a paragraph were not handled correctly. Since RDoc paragraph can't contain Raw, inline HTML comments should be stripped. HTML comments in a paragraph `"\n<!-- -->\n"` will generate double newline `"\n\n"`, so ToHtml also needs to handle this double newline. Fix this markdown comment rendering ```ruby ## # :markup: markdown # <!-- comment1 --> # hello # <!-- comment2 --> # world ``` Before ```html <!-- comment1 --> <p>hello &lt;!– comment2 –&gt; world</p> ``` After ```html <!-- comment1 --> <p>hello world</p> ```
1 parent 40d464d commit c3a8e3c

4 files changed

Lines changed: 56 additions & 11 deletions

File tree

lib/rdoc/markdown.kpeg

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -975,6 +975,7 @@ Inline = Str
975975
| InlineNote
976976
| Code
977977
| RawHtml
978+
| StrippedComment
978979
| Entity
979980
| EscapedChar
980981
| Symbol
@@ -1152,8 +1153,8 @@ Code = ( Ticks1 < (
11521153
)
11531154
{ code text }
11541155

1155-
RawHtml = < (HtmlComment | HtmlBlockScript | HtmlTag) >
1156-
{ if html? then text else '' end }
1156+
RawHtml = < HtmlBlockScript | HtmlTag > { html? ? text : '' }
1157+
StrippedComment = < HtmlComment > { '' }
11571158

11581159
BlankLine = @Sp @Newline { "\n" }
11591160

lib/rdoc/markdown.rb

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9787,7 +9787,7 @@ def _Inlines
97879787
return _tmp
97889788
end
97899789

9790-
# Inline = (Str | @Endline | UlOrStarLine | @Space | Strong | Emph | Strike | Image | Link | NoteReference | InlineNote | Code | RawHtml | Entity | EscapedChar | Symbol)
9790+
# Inline = (Str | @Endline | UlOrStarLine | @Space | Strong | Emph | Strike | Image | Link | NoteReference | InlineNote | Code | RawHtml | StrippedComment | Entity | EscapedChar | Symbol)
97919791
def _Inline
97929792

97939793
_save = self.pos
@@ -9831,6 +9831,9 @@ def _Inline
98319831
_tmp = apply(:_RawHtml)
98329832
break if _tmp
98339833
self.pos = _save
9834+
_tmp = apply(:_StrippedComment)
9835+
break if _tmp
9836+
self.pos = _save
98349837
_tmp = apply(:_Entity)
98359838
break if _tmp
98369839
self.pos = _save
@@ -14131,7 +14134,7 @@ def _Code
1413114134
return _tmp
1413214135
end
1413314136

14134-
# RawHtml = < (HtmlComment | HtmlBlockScript | HtmlTag) > { if html? then text else '' end }
14137+
# RawHtml = < (HtmlBlockScript | HtmlTag) > { html? ? text : '' }
1413514138
def _RawHtml
1413614139

1413714140
_save = self.pos
@@ -14140,9 +14143,6 @@ def _RawHtml
1414014143

1414114144
_save1 = self.pos
1414214145
while true # choice
14143-
_tmp = apply(:_HtmlComment)
14144-
break if _tmp
14145-
self.pos = _save1
1414614146
_tmp = apply(:_HtmlBlockScript)
1414714147
break if _tmp
1414814148
self.pos = _save1
@@ -14159,7 +14159,7 @@ def _RawHtml
1415914159
self.pos = _save
1416014160
break
1416114161
end
14162-
@result = begin; if html? then text else '' end ; end
14162+
@result = begin; html? ? text : '' ; end
1416314163
_tmp = true
1416414164
unless _tmp
1416514165
self.pos = _save
@@ -14171,6 +14171,32 @@ def _RawHtml
1417114171
return _tmp
1417214172
end
1417314173

14174+
# StrippedComment = < HtmlComment > { '' }
14175+
def _StrippedComment
14176+
14177+
_save = self.pos
14178+
while true # sequence
14179+
_text_start = self.pos
14180+
_tmp = apply(:_HtmlComment)
14181+
if _tmp
14182+
text = get_text(_text_start)
14183+
end
14184+
unless _tmp
14185+
self.pos = _save
14186+
break
14187+
end
14188+
@result = begin; '' ; end
14189+
_tmp = true
14190+
unless _tmp
14191+
self.pos = _save
14192+
end
14193+
break
14194+
end # end sequence
14195+
14196+
set_failed_rule :_StrippedComment unless _tmp
14197+
return _tmp
14198+
end
14199+
1417414200
# BlankLine = @Sp @Newline { "\n" }
1417514201
def _BlankLine
1417614202

@@ -16733,7 +16759,7 @@ def _DefinitionListDefinition
1673316759
Rules[:_InStyleTags] = rule_info("InStyleTags", "StyleOpen (!StyleClose .)* StyleClose")
1673416760
Rules[:_StyleBlock] = rule_info("StyleBlock", "< InStyleTags > @BlankLine* { if css? then RDoc::Markup::Raw.new text end }")
1673516761
Rules[:_Inlines] = rule_info("Inlines", "(!@Endline Inline:i { i } | @Endline:c !(&{ github? } Ticks3 /[^`\\n]*$/) &Inline { c })+:chunks @Endline? { chunks }")
16736-
Rules[:_Inline] = rule_info("Inline", "(Str | @Endline | UlOrStarLine | @Space | Strong | Emph | Strike | Image | Link | NoteReference | InlineNote | Code | RawHtml | Entity | EscapedChar | Symbol)")
16762+
Rules[:_Inline] = rule_info("Inline", "(Str | @Endline | UlOrStarLine | @Space | Strong | Emph | Strike | Image | Link | NoteReference | InlineNote | Code | RawHtml | StrippedComment | Entity | EscapedChar | Symbol)")
1673716763
Rules[:_Space] = rule_info("Space", "@Spacechar+ { \" \" }")
1673816764
Rules[:_Str] = rule_info("Str", "@StartList:a < @NormalChar+ > { a = text } (StrChunk:c { a << c })* { rdoc_escape(a) }")
1673916765
Rules[:_StrChunk] = rule_info("StrChunk", "< (@NormalChar | /_+/ &Alphanumeric)+ > { text }")
@@ -16785,7 +16811,8 @@ def _DefinitionListDefinition
1678516811
Rules[:_Ticks4] = rule_info("Ticks4", "\"````\" !\"`\"")
1678616812
Rules[:_Ticks5] = rule_info("Ticks5", "\"`````\" !\"`\"")
1678716813
Rules[:_Code] = rule_info("Code", "(Ticks1 < ((!\"`\" Nonspacechar)+ | !Ticks1 /`+/ | !Ticks1 (@Spacechar | @Newline !@BlankLine))+ > Ticks1 | Ticks2 < ((!\"`\" Nonspacechar)+ | !Ticks2 /`+/ | !Ticks2 (@Spacechar | @Newline !@BlankLine))+ > Ticks2 | Ticks3 < ((!\"`\" Nonspacechar)+ | !Ticks3 /`+/ | !Ticks3 (@Spacechar | @Newline !@BlankLine))+ > Ticks3 | Ticks4 < ((!\"`\" Nonspacechar)+ | !Ticks4 /`+/ | !Ticks4 (@Spacechar | @Newline !@BlankLine))+ > Ticks4 | Ticks5 < ((!\"`\" Nonspacechar)+ | !Ticks5 /`+/ | !Ticks5 (@Spacechar | @Newline !@BlankLine))+ > Ticks5) { code text }")
16788-
Rules[:_RawHtml] = rule_info("RawHtml", "< (HtmlComment | HtmlBlockScript | HtmlTag) > { if html? then text else '' end }")
16814+
Rules[:_RawHtml] = rule_info("RawHtml", "< (HtmlBlockScript | HtmlTag) > { html? ? text : '' }")
16815+
Rules[:_StrippedComment] = rule_info("StrippedComment", "< HtmlComment > { '' }")
1678916816
Rules[:_BlankLine] = rule_info("BlankLine", "@Sp @Newline { \"\\n\" }")
1679016817
Rules[:_Quoted] = rule_info("Quoted", "(\"\\\"\" (!\"\\\"\" .)* \"\\\"\" | \"'\" (!\"'\" .)* \"'\")")
1679116818
Rules[:_HtmlAttribute] = rule_info("HtmlAttribute", "(AlphanumericAscii | \"-\")+ Spnl (\"=\" Spnl (Quoted | (!\">\" Nonspacechar)+))? Spnl")

lib/rdoc/markup/to_html.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ def accept_block_quote(block_quote)
314314
def accept_paragraph(paragraph)
315315
@res << "\n<p>"
316316
text = paragraph.text @hard_break
317-
text = text.gsub(/(#{SPACE_SEPARATED_LETTER_CLASS})?\K\r?\n(?=(?(1)(#{SPACE_SEPARATED_LETTER_CLASS})?))/o) {
317+
text = text.gsub(/(#{SPACE_SEPARATED_LETTER_CLASS})?\K(?:\r?\n)+(?=(?(1)(#{SPACE_SEPARATED_LETTER_CLASS})?))/o) {
318318
defined?($2) && ' '
319319
}
320320
@res << to_html(text)

test/rdoc/rdoc_markdown_test.rb

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1590,4 +1590,21 @@ def test_escape_tilde_not_supported
15901590
assert_match(/\\~/, html)
15911591
end
15921592

1593+
def test_html_comment_suppressed
1594+
# Standalone HTML block tags and comments are preserved as raw text.
1595+
# But if comments are inside a paragraph, they should be stripped
1596+
# because raw inside para is not permitted and RDoc format doesn't have comment nodes.
1597+
markdown = <<~MARKDOWN
1598+
<!-- comment -->
1599+
a<!-- comment -->b
1600+
<!-- comment -->
1601+
c
1602+
MARKDOWN
1603+
1604+
expected_doc = doc(raw('<!-- comment -->'), para("ab\n\nc"))
1605+
assert_equal expected_doc, parse(markdown)
1606+
1607+
expected_html = "<!-- comment -->\n<p>ab c</p>\n"
1608+
assert_equal expected_html, render(markdown)
1609+
end
15931610
end

0 commit comments

Comments
 (0)