@@ -13,12 +13,20 @@ use crate::rules::ruff::rules::Context;
1313use crate :: settings:: LinterSettings ;
1414
1515/// ## What it does
16- /// Checks for ambiguous unicode characters in strings.
16+ /// Checks for ambiguous Unicode characters in strings.
1717///
1818/// ## Why is this bad?
19- /// The use of ambiguous unicode characters can confuse readers and cause
19+ /// Some Unicode characters are visually similar to ASCII characters, but have
20+ /// different code points. For example, `LATIN CAPITAL LETTER A` (`U+0041`) is
21+ /// visually similar, but not identical, to the ASCII character `A`.
22+ ///
23+ /// The use of ambiguous Unicode characters can confuse readers and cause
2024/// subtle bugs.
2125///
26+ /// In [preview], this rule will also flag Unicode characters that are
27+ /// confusable with other, non-preferred Unicode characters. For example, the
28+ /// spec recommends `GREEK CAPITAL LETTER OMEGA` over `OHM SIGN`.
29+ ///
2230/// ## Example
2331/// ```python
2432/// print("Ηello, world!") # "Η" is the Greek eta (`U+0397`).
@@ -28,6 +36,8 @@ use crate::settings::LinterSettings;
2836/// ```python
2937/// print("Hello, world!") # "H" is the Latin capital H (`U+0048`).
3038/// ```
39+ ///
40+ /// [preview]: https://docs.astral.sh/ruff/preview/
3141#[ violation]
3242pub struct AmbiguousUnicodeCharacterString {
3343 confusable : char ,
@@ -50,12 +60,20 @@ impl Violation for AmbiguousUnicodeCharacterString {
5060}
5161
5262/// ## What it does
53- /// Checks for ambiguous unicode characters in docstrings.
63+ /// Checks for ambiguous Unicode characters in docstrings.
5464///
5565/// ## Why is this bad?
56- /// The use of ambiguous unicode characters can confuse readers and cause
66+ /// Some Unicode characters are visually similar to ASCII characters, but have
67+ /// different code points. For example, `LATIN CAPITAL LETTER A` (`U+0041`) is
68+ /// visually similar, but not identical, to the ASCII character `A`.
69+ ///
70+ /// The use of ambiguous Unicode characters can confuse readers and cause
5771/// subtle bugs.
5872///
73+ /// In [preview], this rule will also flag Unicode characters that are
74+ /// confusable with other, non-preferred Unicode characters. For example, the
75+ /// spec recommends `GREEK CAPITAL LETTER OMEGA` over `OHM SIGN`.
76+ ///
5977/// ## Example
6078/// ```python
6179/// """A lovely docstring (with a `U+FF09` parenthesis)."""
@@ -65,6 +83,8 @@ impl Violation for AmbiguousUnicodeCharacterString {
6583/// ```python
6684/// """A lovely docstring (with no strange parentheses)."""
6785/// ```
86+ ///
87+ /// [preview]: https://docs.astral.sh/ruff/preview/
6888#[ violation]
6989pub struct AmbiguousUnicodeCharacterDocstring {
7090 confusable : char ,
@@ -87,12 +107,20 @@ impl Violation for AmbiguousUnicodeCharacterDocstring {
87107}
88108
89109/// ## What it does
90- /// Checks for ambiguous unicode characters in comments.
110+ /// Checks for ambiguous Unicode characters in comments.
91111///
92112/// ## Why is this bad?
93- /// The use of ambiguous unicode characters can confuse readers and cause
113+ /// Some Unicode characters are visually similar to ASCII characters, but have
114+ /// different code points. For example, `LATIN CAPITAL LETTER A` (`U+0041`) is
115+ /// visually similar, but not identical, to the ASCII character `A`.
116+ ///
117+ /// The use of ambiguous Unicode characters can confuse readers and cause
94118/// subtle bugs.
95119///
120+ /// In [preview], this rule will also flag Unicode characters that are
121+ /// confusable with other, non-preferred Unicode characters. For example, the
122+ /// spec recommends `GREEK CAPITAL LETTER OMEGA` over `OHM SIGN`.
123+ ///
96124/// ## Example
97125/// ```python
98126/// foo() # nоqa # "о" is Cyrillic (`U+043E`)
@@ -102,6 +130,8 @@ impl Violation for AmbiguousUnicodeCharacterDocstring {
102130/// ```python
103131/// foo() # noqa # "o" is Latin (`U+006F`)
104132/// ```
133+ ///
134+ /// [preview]: https://docs.astral.sh/ruff/preview/
105135#[ violation]
106136pub struct AmbiguousUnicodeCharacterComment {
107137 confusable : char ,
@@ -159,7 +189,9 @@ pub(crate) fn ambiguous_unicode_character(
159189 // Check if the boundary character is itself an ambiguous unicode character, in which
160190 // case, it's always included as a diagnostic.
161191 if !current_char. is_ascii ( ) {
162- if let Some ( representant) = confusable ( current_char as u32 ) {
192+ if let Some ( representant) = confusable ( current_char as u32 )
193+ . filter ( |representant| settings. preview . is_enabled ( ) || representant. is_ascii ( ) )
194+ {
163195 let candidate = Candidate :: new (
164196 TextSize :: try_from ( relative_offset) . unwrap ( ) + range. start ( ) ,
165197 current_char,
@@ -173,7 +205,9 @@ pub(crate) fn ambiguous_unicode_character(
173205 } else if current_char. is_ascii ( ) {
174206 // The current word contains at least one ASCII character.
175207 word_flags |= WordFlags :: ASCII ;
176- } else if let Some ( representant) = confusable ( current_char as u32 ) {
208+ } else if let Some ( representant) = confusable ( current_char as u32 )
209+ . filter ( |representant| settings. preview . is_enabled ( ) || representant. is_ascii ( ) )
210+ {
177211 // The current word contains an ambiguous unicode character.
178212 word_candidates. push ( Candidate :: new (
179213 TextSize :: try_from ( relative_offset) . unwrap ( ) + range. start ( ) ,
0 commit comments