@@ -1555,9 +1555,10 @@ impl<'src> LexerWithCheckpoint<'src> for HtmlLexer<'src> {
15551555 }
15561556}
15571557
1558- /// Tracks whether the lexer is currently inside an open string literal while
1559- /// scanning Astro frontmatter. Used to determine whether a `---` sequence is
1560- /// a genuine closing fence or merely three dashes that appear inside a string.
1558+ /// Tracks whether the lexer is currently inside an open string literal, regex
1559+ /// literal, or comment while scanning Astro frontmatter. Used to determine
1560+ /// whether a `---` sequence is a genuine closing fence or merely three dashes
1561+ /// that appear inside a string or regex.
15611562///
15621563/// ## Design
15631564///
@@ -1568,6 +1569,11 @@ impl<'src> LexerWithCheckpoint<'src> for HtmlLexer<'src> {
15681569/// already open; it closes the string only when it **matches** the opening
15691570/// quote. For example, a `'` inside a `"…"` string is treated as a literal
15701571/// character, not as a new string opener.
1572+ /// - The **regex flag** (`in_regex`): set when a `/` is encountered in a
1573+ /// position where it starts a regex literal (determined by the previous
1574+ /// non-whitespace byte). While set, all bytes are consumed until an
1575+ /// unescaped `/` closes the regex. Quotes and dashes inside a regex are
1576+ /// not treated as string delimiters or fence markers.
15711577/// - The **comment state** (`comment`): distinguishes single-line (`//`) from
15721578/// multi-line (`/* … */`) comments, so that quote characters inside comments
15731579/// are not counted as string delimiters.
@@ -1578,13 +1584,17 @@ impl<'src> LexerWithCheckpoint<'src> for HtmlLexer<'src> {
15781584struct QuotesSeen {
15791585 /// The quote character that opened the current string, if any.
15801586 current_quote : Option < u8 > ,
1587+ /// Whether we are currently inside a regex literal (`/…/`).
1588+ in_regex : bool ,
15811589 /// Current comment state.
15821590 comment : QuotesSeenComment ,
15831591 /// Whether the previous byte was an unescaped backslash.
15841592 escaped : bool ,
15851593 /// The previous byte, needed to detect `//` and `/* */` comment markers
15861594 /// and the `*/` block-comment terminator.
15871595 prev_byte : Option < u8 > ,
1596+ /// The previous non-whitespace byte, used for the regex-start heuristic.
1597+ prev_non_ws_byte : Option < u8 > ,
15881598}
15891599
15901600/// Distinguishes the kind of comment the lexer is currently inside.
@@ -1602,9 +1612,11 @@ impl QuotesSeen {
16021612 fn new ( ) -> Self {
16031613 Self {
16041614 current_quote : None ,
1615+ in_regex : false ,
16051616 comment : QuotesSeenComment :: None ,
16061617 escaped : false ,
16071618 prev_byte : None ,
1619+ prev_non_ws_byte : None ,
16081620 }
16091621 }
16101622
@@ -1617,13 +1629,21 @@ impl QuotesSeen {
16171629 self . comment = QuotesSeenComment :: None ;
16181630 }
16191631 self . prev_byte = Some ( byte) ;
1632+ if !byte. is_ascii_whitespace ( ) {
1633+ self . prev_non_ws_byte = Some ( byte) ;
1634+ }
16201635 // Quotes inside comments are ignored.
16211636 return ;
16221637 }
16231638 QuotesSeenComment :: MultiLine => {
16241639 // Multi-line comment ends at `*/`.
16251640 if self . prev_byte == Some ( b'*' ) && byte == b'/' {
16261641 self . comment = QuotesSeenComment :: None ;
1642+ // Use a neutral prev_byte so the closing `/` of `*/` is
1643+ // not mistaken for a potential regex or comment opener.
1644+ self . prev_byte = None ;
1645+ self . prev_non_ws_byte = Some ( b'/' ) ;
1646+ return ;
16271647 }
16281648 self . prev_byte = Some ( byte) ;
16291649 // Quotes inside comments are ignored.
@@ -1632,11 +1652,31 @@ impl QuotesSeen {
16321652 QuotesSeenComment :: None => { }
16331653 }
16341654
1655+ // Inside a regex literal: consume bytes until an unescaped `/` closes it.
1656+ if self . in_regex {
1657+ if byte == b'\\' {
1658+ self . escaped = !self . escaped ;
1659+ self . prev_byte = Some ( byte) ;
1660+ } else if byte == b'/' && !self . escaped {
1661+ self . in_regex = false ;
1662+ self . escaped = false ;
1663+ // Use a neutral prev_byte so the closing `/` of the regex is
1664+ // not mistaken for a deferred slash (comment/regex opener).
1665+ self . prev_byte = None ;
1666+ self . prev_non_ws_byte = Some ( b'/' ) ;
1667+ } else {
1668+ self . escaped = false ;
1669+ self . prev_byte = Some ( byte) ;
1670+ }
1671+ return ;
1672+ }
1673+
16351674 // Handle escape sequences: a `\` that is not itself escaped toggles the
16361675 // escape flag for the next character.
16371676 if byte == b'\\' {
16381677 self . escaped = !self . escaped ;
16391678 self . prev_byte = Some ( byte) ;
1679+ self . prev_non_ws_byte = Some ( byte) ;
16401680 return ;
16411681 }
16421682
@@ -1647,24 +1687,60 @@ impl QuotesSeen {
16471687
16481688 if was_escaped {
16491689 self . prev_byte = Some ( byte) ;
1690+ if !byte. is_ascii_whitespace ( ) {
1691+ self . prev_non_ws_byte = Some ( byte) ;
1692+ }
16501693 return ;
16511694 }
16521695
1653- // Detect comment openers — only valid outside of open strings.
1696+ // Detect comment openers and regex literals — only valid outside of open strings.
1697+ if self . current_quote . is_none ( ) && byte == b'/' {
1698+ // Check if the previous byte was also `/` → single-line comment.
1699+ if self . prev_byte == Some ( b'/' ) {
1700+ self . comment = QuotesSeenComment :: SingleLine ;
1701+ self . prev_byte = Some ( byte) ;
1702+ // Don't update prev_non_ws_byte — it was already preserved
1703+ // when we deferred the first `/`.
1704+ return ;
1705+ }
1706+
1707+ // The `/` might start a comment (if followed by `/` or `*`), a
1708+ // regex literal, or be a division operator. We defer the decision:
1709+ // store it as prev_byte and decide on the *next* byte.
1710+ // Crucially, do NOT update prev_non_ws_byte here — we need to
1711+ // preserve the byte before the `/` for the regex heuristic.
1712+ self . prev_byte = Some ( byte) ;
1713+ return ;
1714+ }
1715+
1716+ // If the *previous* byte was `/` (outside a string), decide now whether
1717+ // it was a comment opener, a regex opener, or plain division.
16541718 if self . current_quote . is_none ( ) && self . prev_byte == Some ( b'/' ) {
1655- match byte {
1656- b'/' => {
1657- self . comment = QuotesSeenComment :: SingleLine ;
1658- self . prev_byte = Some ( byte) ;
1659- return ;
1719+ if byte == b'*' {
1720+ self . comment = QuotesSeenComment :: MultiLine ;
1721+ self . prev_byte = Some ( byte) ;
1722+ self . prev_non_ws_byte = Some ( byte) ;
1723+ return ;
1724+ }
1725+
1726+ // Not `//` or `/*`, so the previous `/` was either a regex opener
1727+ // or a division operator. Use the previous non-whitespace byte
1728+ // before the `/` to decide.
1729+ if self . slash_starts_regex ( ) {
1730+ // The `/` opened a regex. The current byte is the first byte
1731+ // inside the regex body.
1732+ self . in_regex = true ;
1733+ if byte == b'\\' {
1734+ self . escaped = true ;
16601735 }
1661- b'*' => {
1662- self . comment = QuotesSeenComment :: MultiLine ;
1663- self . prev_byte = Some ( byte) ;
1664- return ;
1736+ self . prev_byte = Some ( byte) ;
1737+ if !byte. is_ascii_whitespace ( ) {
1738+ self . prev_non_ws_byte = Some ( byte) ;
16651739 }
1666- _ => { }
1740+ return ;
16671741 }
1742+ // It was division; update prev_non_ws_byte to `/` now.
1743+ self . prev_non_ws_byte = Some ( b'/' ) ;
16681744 }
16691745
16701746 // Track string delimiters.
@@ -1689,13 +1765,42 @@ impl QuotesSeen {
16891765 }
16901766
16911767 self . prev_byte = Some ( byte) ;
1768+ if !byte. is_ascii_whitespace ( ) {
1769+ self . prev_non_ws_byte = Some ( byte) ;
1770+ }
16921771 }
16931772
1694- /// Returns `true` when the tracker is not currently inside an open string literal
1695- /// or a comment. Both states must be absent for a `---` fence to be a valid
1696- /// frontmatter closing delimiter.
1773+ /// Returns whether a deferred `/` starts a regex literal based on
1774+ /// `prev_non_ws_byte`. After an identifier character, closing
1775+ /// paren/bracket, number, or `++`/`--` suffix, `/` is division. In all
1776+ /// other positions `/` starts a regex.
1777+ fn slash_starts_regex ( & self ) -> bool {
1778+ match self . prev_non_ws_byte {
1779+ None => true ,
1780+ Some ( b) => !matches ! (
1781+ b,
1782+ b'a' ..=b'z'
1783+ | b'A' ..=b'Z'
1784+ | b'0' ..=b'9'
1785+ | b'_'
1786+ | b'$'
1787+ | b')'
1788+ | b']'
1789+ | b'+'
1790+ | b'-'
1791+ ) ,
1792+ }
1793+ }
1794+
1795+ /// Returns `true` when the tracker is not currently inside an open string
1796+ /// literal, regex literal, or comment, and there is no pending deferred
1797+ /// slash that might open a regex. All conditions must be absent for a
1798+ /// `---` fence to be a valid frontmatter closing delimiter.
16971799 fn is_empty ( & self ) -> bool {
1698- self . current_quote . is_none ( ) && self . comment == QuotesSeenComment :: None
1800+ self . current_quote . is_none ( )
1801+ && !self . in_regex
1802+ && self . comment == QuotesSeenComment :: None
1803+ && self . prev_byte != Some ( b'/' )
16991804 }
17001805}
17011806
@@ -1952,4 +2057,45 @@ const f = "something" "#;
19522057 "double backslash followed by closing quote must close the string"
19532058 ) ;
19542059 }
2060+
2061+ // --- Tests for issue #9187: regex literals in frontmatter ---
2062+
2063+ /// A regex literal containing a single quote must not leave the tracker in a
2064+ /// non-empty state. The quote inside the regex is not a string delimiter.
2065+ #[ test]
2066+ fn issue_9187_regex_with_single_quote ( ) {
2067+ let source = "const test = /'/\n " ;
2068+ let mut quotes_seen = QuotesSeen :: new ( ) ;
2069+ track ( source, & mut quotes_seen) ;
2070+ assert ! (
2071+ quotes_seen. is_empty( ) ,
2072+ "regex literal containing single quote must not open a string"
2073+ ) ;
2074+ }
2075+
2076+ /// A regex literal containing a double quote must not leave the tracker in a
2077+ /// non-empty state.
2078+ #[ test]
2079+ fn issue_9187_regex_with_double_quote ( ) {
2080+ let source = "const test = /\" /\n " ;
2081+ let mut quotes_seen = QuotesSeen :: new ( ) ;
2082+ track ( source, & mut quotes_seen) ;
2083+ assert ! (
2084+ quotes_seen. is_empty( ) ,
2085+ "regex literal containing double quote must not open a string"
2086+ ) ;
2087+ }
2088+
2089+ /// A regex literal containing `---` must not cause the tracker to misidentify
2090+ /// the fence. The tracker must remain empty after the regex closes.
2091+ #[ test]
2092+ fn issue_9187_regex_with_dashes ( ) {
2093+ let source = "const test = /---/\n " ;
2094+ let mut quotes_seen = QuotesSeen :: new ( ) ;
2095+ track ( source, & mut quotes_seen) ;
2096+ assert ! (
2097+ quotes_seen. is_empty( ) ,
2098+ "regex literal containing dashes must not confuse the tracker"
2099+ ) ;
2100+ }
19552101}
0 commit comments