Skip to content

Commit 1cc494a

Browse files
authored
Merge pull request #256 from facelessuser/enhance/lang
Changes to `:lang()` as defined in recent CSS spec update
2 parents f38a93a + 9c02bb8 commit 1cc494a

5 files changed

Lines changed: 56 additions & 7 deletions

File tree

docs/src/dictionary/en-custom.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ tuples
7373
un
7474
unmatchable
7575
unpickle
76+
untagged
7677
unvisited
7778
whitespace
7879
wildcard

docs/src/markdown/about/changelog.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
# Changelog
22

3-
## 2.4.0
3+
## 2.5
4+
5+
- **NEW**: Update to support changes related to `:lang()` in the official CSS spec. `:lang("")` should match unspecified
6+
languages, e.g. `lang=""`, but not `lang=und`.
7+
8+
## 2.4
49

510
- **NEW**: `:nth-child()` and `:nth-last-child()` will forgive irregular comma usage.
611
- **NEW**: Formally drop Python 3.6.

soupsieve/__meta__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,5 +193,5 @@ def parse_version(ver: str) -> Version:
193193
return Version(major, minor, micro, release, pre, post, dev)
194194

195195

196-
__version_info__ = Version(2, 4, 0, "final", post=1)
196+
__version_info__ = Version(2, 5, 0, "final", post=1)
197197
__version__ = __version_info__._get_canonical()

soupsieve/css_match.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -601,13 +601,18 @@ def extended_language_filter(self, lang_range: str, lang_tag: str) -> bool:
601601
ranges = lang_range.split('-')
602602
subtags = lang_tag.lower().split('-')
603603
length = len(ranges)
604+
slength = len(subtags)
604605
rindex = 0
605606
sindex = 0
606607
r = ranges[rindex]
607608
s = subtags[sindex]
608609

610+
# Empty specified language should match unspecified language attributes
611+
if length == 1 and slength == 1 and not r and r == s:
612+
return True
613+
609614
# Primary tag needs to match
610-
if r != '*' and r != s:
615+
if (r != '*' and r != s) or (r == '*' and slength == 1 and not s):
611616
match = False
612617

613618
rindex += 1
@@ -1184,7 +1189,7 @@ def match_lang(self, el: bs4.Tag, langs: tuple[ct.SelectorLang, ...]) -> bool:
11841189
break
11851190

11861191
# Use cached meta language.
1187-
if not found_lang and self.cached_meta_lang:
1192+
if found_lang is None and self.cached_meta_lang:
11881193
for cache in self.cached_meta_lang:
11891194
if root is cache[0]:
11901195
found_lang = cache[1]
@@ -1218,13 +1223,13 @@ def match_lang(self, el: bs4.Tag, langs: tuple[ct.SelectorLang, ...]) -> bool:
12181223
found_lang = content
12191224
self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))
12201225
break
1221-
if found_lang:
1226+
if found_lang is not None:
12221227
break
1223-
if not found_lang:
1228+
if found_lang is None:
12241229
self.cached_meta_lang.append((cast(str, root), ''))
12251230

12261231
# If we determined a language, compare.
1227-
if found_lang:
1232+
if found_lang is not None:
12281233
for patterns in langs:
12291234
match = False
12301235
for pattern in patterns:

tests/test_level4/test_lang.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,44 @@ def test_avoid_implicit_language(self):
155155
flags=util.HTML
156156
)
157157

158+
def test_language_und(self):
159+
"""Test that undefined language can be matched by `*`."""
160+
161+
markup = """
162+
<div id="1" lang=""></div>
163+
<div id="2" lang="und"></div>
164+
<div id="3" lang=>
165+
<div id="4"></div>
166+
</div>
167+
<div id="5"></div>
168+
"""
169+
170+
self.assert_selector(
171+
markup,
172+
"div:lang('*')",
173+
['2'],
174+
flags=util.HTML
175+
)
176+
177+
def test_language_empty_string(self):
178+
"""Test that an empty string language will only match untagged languages `lang=""`."""
179+
180+
markup = """
181+
<div id="1" lang=""></div>
182+
<div id="2" lang="und"></div>
183+
<div id="3" lang=>
184+
<div id="4"></div>
185+
</div>
186+
<div id="5"></div>
187+
"""
188+
189+
self.assert_selector(
190+
markup,
191+
"div:lang('')",
192+
['1', '3', '4'],
193+
flags=util.HTML
194+
)
195+
158196
def test_language_list(self):
159197
"""Test language list."""
160198

0 commit comments

Comments
 (0)