Skip to content

Commit 2a03907

Browse files
authored
Emit escape sequences for private-use characters in expanded mode (#1430)
1 parent e0e132e commit 2a03907

5 files changed

Lines changed: 167 additions & 11 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
## 1.37.6
22

3+
* In expanded mode, emit characters in Unicode private-use areas as escape
4+
sequences rather than literal characters.
5+
36
* Fix a bug where quotes would be omitted for an attribute selector whose value
47
was a single backslash.
58

lib/src/util/character.dart

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,34 @@ bool isHex(int? character) {
5353

5454
/// Returns whether [character] is the beginning of a UTF-16 surrogate pair.
5555
bool isHighSurrogate(int character) =>
56-
character >= 0xD800 && character <= 0xDBFF;
56+
// A character is a high surrogate exactly if it matches 0b110110XXXXXXXXXX.
57+
// 0x36 == 0b110110.
58+
character >> 10 == 0x36;
59+
60+
/// Returns whether [character] is a Unicode private-use code point in the Basic
61+
/// Multilingual Plane.
62+
///
63+
/// See https://en.wikipedia.org/wiki/Private_Use_Areas for details.
64+
bool isPrivateUseBMP(int character) =>
65+
character >= 0xE000 && character <= 0xF8FF;
66+
67+
/// Returns whether [character] is the high surrogate for a code point in a
68+
/// Unicode private-use supplementary plane.
69+
///
70+
/// See https://en.wikipedia.org/wiki/Private_Use_Areas for details.
71+
bool isPrivateUseHighSurrogate(int character) =>
72+
// Supplementary Private Use Area-A's and B's high surrogates range from
73+
// 0xDB80 to 0xDBFF, which covers exactly the range 0b110110111XXXXXXX.
74+
// 0b110110111 == 0x1B7.
75+
character >> 7 == 0x1B7;
76+
77+
/// Combines a UTF-16 high and low surrogate pair into a single code unit.
78+
///
79+
/// See https://en.wikipedia.org/wiki/UTF-16 for details.
80+
int combineSurrogates(int highSurrogate, int lowSurrogate) =>
81+
// 0x3FF == 0b0000001111111111, which masks out the six bits that indicate
82+
// high/low surrogates.
83+
0x10000 + ((highSurrogate & 0x3FF) << 10) + (lowSurrogate & 0x3FF);
5784

5885
// Returns whether [character] can start a simple selector other than a type
5986
// selector.

lib/src/visitor/serialize.dart

Lines changed: 61 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -948,15 +948,7 @@ class _SerializeVisitor
948948
case $gs:
949949
case $rs:
950950
case $us:
951-
buffer.writeCharCode($backslash);
952-
if (char > 0xF) buffer.writeCharCode(hexCharFor(char >> 4));
953-
buffer.writeCharCode(hexCharFor(char & 0xF));
954-
if (string.length == i + 1) break;
955-
956-
var next = string.codeUnitAt(i + 1);
957-
if (isHex(next) || next == $space || next == $tab) {
958-
buffer.writeCharCode($space);
959-
}
951+
_writeEscape(buffer, char, string, i);
960952
break;
961953

962954
case $backslash:
@@ -965,6 +957,12 @@ class _SerializeVisitor
965957
break;
966958

967959
default:
960+
var newIndex = _tryPrivateUseCharacter(buffer, char, string, i);
961+
if (newIndex != null) {
962+
i = newIndex;
963+
break;
964+
}
965+
968966
buffer.writeCharCode(char);
969967
break;
970968
}
@@ -996,13 +994,66 @@ class _SerializeVisitor
996994
break;
997995

998996
default:
999-
_buffer.writeCharCode(char);
1000997
afterNewline = false;
998+
var newIndex = _tryPrivateUseCharacter(_buffer, char, string, i);
999+
if (newIndex != null) {
1000+
i = newIndex;
1001+
break;
1002+
}
1003+
1004+
_buffer.writeCharCode(char);
10011005
break;
10021006
}
10031007
}
10041008
}
10051009

1010+
/// If [codeUnit] is (the beginning of) a private-use character and Sass isn't
1011+
/// emitting compressed CSS, writes that character as an escape to [buffer].
1012+
///
1013+
/// The [string] is the string from which [codeUnit] was read, and [i] is the
1014+
/// index it was read from. If this successfully writes the character, returns
1015+
/// the index of the *last* code unit that was consumed for it. Otherwise,
1016+
/// returns `null`.
1017+
///
1018+
/// In expanded mode, we print all characters in Private Use Areas as escape
1019+
/// codes since there's no useful way to render them directly. These
1020+
/// characters are often used for glyph fonts, where it's useful for readers
1021+
/// to be able to distinguish between them in the rendered stylesheet.
1022+
int? _tryPrivateUseCharacter(
1023+
StringBuffer buffer, int codeUnit, String string, int i) {
1024+
if (_isCompressed) return null;
1025+
1026+
if (isPrivateUseBMP(codeUnit)) {
1027+
_writeEscape(buffer, codeUnit, string, i);
1028+
return i;
1029+
}
1030+
1031+
if (isPrivateUseHighSurrogate(codeUnit) && string.length > i + 1) {
1032+
_writeEscape(buffer,
1033+
combineSurrogates(codeUnit, string.codeUnitAt(i + 1)), string, i + 1);
1034+
return i + 1;
1035+
}
1036+
1037+
return null;
1038+
}
1039+
1040+
/// Writes [character] as a hexadecimal escape sequence to [buffer].
1041+
///
1042+
/// The [string] is the string from which the escape is being written, and [i]
1043+
/// is the index of the last code unit of [character] in that string. These
1044+
/// are used to write a trailing space after the escape if necessary to
1045+
/// disambiguate it from the next character.
1046+
void _writeEscape(StringBuffer buffer, int character, String string, int i) {
1047+
buffer.writeCharCode($backslash);
1048+
buffer.write(character.toRadixString(16));
1049+
1050+
if (string.length == i + 1) return;
1051+
var next = string.codeUnitAt(i + 1);
1052+
if (isHex(next) || next == $space || next == $tab) {
1053+
buffer.writeCharCode($space);
1054+
}
1055+
}
1056+
10061057
// ## Selectors
10071058

10081059
void visitAttributeSelector(AttributeSelector attribute) {

test/compressed_test.dart

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,35 @@ void main() {
143143
expect(_compile("a {b: #cc3232}"), equals("a{b:#cc3232}"));
144144
});
145145
});
146+
147+
group("strings", () {
148+
group("emits private-use area characters as literal characters", () {
149+
var testCharacter = (int character) {
150+
var escape = "\\${character.toRadixString(16)}";
151+
test("$escape", () {
152+
expect(
153+
_compile("a {b: $escape}"),
154+
equalsIgnoringWhitespace(
155+
"a{b:${String.fromCharCode(character)}}"));
156+
});
157+
};
158+
159+
group("in the basic multilingual plane", () {
160+
testCharacter(0xe000);
161+
testCharacter(0xf000);
162+
testCharacter(0xf8ff);
163+
});
164+
165+
group("in the supplementary planes", () {
166+
testCharacter(0xf0000);
167+
testCharacter(0xfabcd);
168+
testCharacter(0xffffd);
169+
testCharacter(0x100000);
170+
testCharacter(0x10abcd);
171+
testCharacter(0x10fffd);
172+
});
173+
});
174+
});
146175
});
147176

148177
group("the top level", () {

test/output_test.dart

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,52 @@ import 'package:test/test.dart';
1111
import 'package:sass/sass.dart';
1212

1313
void main() {
14+
group("emits private-use area characters as escapes in expanded mode", () {
15+
var testCharacter = (String escape) {
16+
test("$escape", () {
17+
expect(compileString("a {b: $escape}"),
18+
equalsIgnoringWhitespace("a { b: $escape; }"));
19+
});
20+
};
21+
22+
group("in the basic multilingual plane", () {
23+
testCharacter(r"\e000");
24+
testCharacter(r"\f000");
25+
testCharacter(r"\f8ff");
26+
});
27+
28+
group("in the supplementary planes", () {
29+
testCharacter(r"\f0000");
30+
testCharacter(r"\fabcd");
31+
testCharacter(r"\ffffd");
32+
testCharacter(r"\100000");
33+
testCharacter(r"\10abcd");
34+
testCharacter(r"\10fffd");
35+
36+
// Although these aren't technically in private-use areaa, they're in
37+
// private-use planes and they have no visual representation to we
38+
// escape them as well.
39+
group("that aren't technically in PUAs", () {
40+
testCharacter(r"\ffffe");
41+
testCharacter(r"\fffff");
42+
testCharacter(r"\10fffe");
43+
testCharacter(r"\10ffff");
44+
});
45+
});
46+
47+
group("adds a space", () {
48+
test("if followed by a hex character", () {
49+
expect(compileString(r"a {b: '\e000 a'}"),
50+
equalsIgnoringWhitespace(r'a { b: "\e000 a"; }'));
51+
});
52+
53+
test("if followed by a space", () {
54+
expect(compileString(r"a {b: '\e000 '}"),
55+
equalsIgnoringWhitespace(r'a { b: "\e000 "; }'));
56+
});
57+
});
58+
});
59+
1460
// Regression test for sass/dart-sass#623. This needs to be tested here
1561
// because sass-spec normalizes CR LF newlines.
1662
group("normalizes newlines in a loud comment", () {

0 commit comments

Comments
 (0)