File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -54,7 +54,19 @@ This module also defines utility functions.
5454 "!", "?", and "=" are forbidden.
5555 The name cannot start with a digit or a character like "-", ".", and "·".
5656
57- ..versionadded:: next
57+ .. versionadded :: next
58+
59+
60+ .. function :: is_valid_text(data)
61+
62+ Return ``True `` if the string is a sequence of legal XML 1.0 characters,
63+ ``False `` otherwise.
64+
65+ Almost all characters are permitted in XML 1.0 documents, except C0 control
66+ characters (excluding TAB, CR and LF), surrogate characters and special
67+ Unicode characters U+FFFE and U+FFFF.
68+
69+ .. versionadded :: next
5870
5971
6072.. _xml-security :
Original file line number Diff line number Diff line change @@ -1714,6 +1714,10 @@ xml
17141714 whether a string can be used as an element or attribute name in XML.
17151715 (Contributed by Serhiy Storchaka in :gh: `139489 `.)
17161716
1717+ * Add the :func: `xml.is_valid_text ` function, which allows to check
1718+ whether a string can be used in the XML document.
1719+ (Contributed by Serhiy Storchaka in :gh: `139489 `.)
1720+
17171721
17181722xml.parsers.expat
17191723-----------------
Original file line number Diff line number Diff line change @@ -22,6 +22,22 @@ def test_is_valid_name(self):
2222 for c in '<>/!?=\x00 \x01 \x7f \ud800 \udfff \ufffe \uffff \U000F0000 ' :
2323 self .assertFalse (is_valid_name ('name' + c ))
2424
25+ def test_is_valid_text (self ):
26+ is_valid_text = xml .is_valid_text
27+ self .assertTrue (is_valid_text ('' ))
28+ self .assertTrue (is_valid_text ('!0Aa_~ \r \n \t \x85 \xa0 ' ))
29+ self .assertTrue (is_valid_text ('\ud7ff \ue000 \ufffd \U00010000 \U0010ffff ' ))
30+ self .assertFalse (is_valid_text ('\x00 ' ))
31+ self .assertFalse (is_valid_text ('\x01 ' ))
32+ self .assertFalse (is_valid_text ('\x1f ' ))
33+ self .assertTrue (is_valid_text ('\x7f ' ))
34+ self .assertTrue (is_valid_text ('\x80 ' ))
35+ self .assertTrue (is_valid_text ('\x9f ' ))
36+ self .assertFalse (is_valid_text ('\ud800 ' ))
37+ self .assertFalse (is_valid_text ('\udfff ' ))
38+ self .assertFalse (is_valid_text ('\ufffe ' ))
39+ self .assertFalse (is_valid_text ('\uffff ' ))
40+
2541
2642if __name__ == '__main__' :
2743 unittest .main ()
Original file line number Diff line number Diff line change @@ -23,3 +23,15 @@ def is_valid_name(name):
2323 '\uF900 -\uFDCF \uFDF0 -\uFFFD \U00010000 -\U000EFFFF '
2424 ']*+' ,
2525 name ) is not None
26+
27+ # https://www.w3.org/TR/xml/#charsets
28+ _ILLEGAL_XML_CHAR = (
29+ '['
30+ '\x00 -\x08 \x0B \x0C \x0E -\x1F ' # C0 controls except TAB, CR and LF
31+ '\uD800 -\uDFFF ' # the surrogate blocks
32+ '\uFFFE \uFFFF ' # special Unicode characters
33+ ']' )
34+
35+ def is_valid_text (data ):
36+ """Test whether a string is a sequence of legal XML 1.0 characters."""
37+ return _re .search (_ILLEGAL_XML_CHAR , data ) is None
Original file line number Diff line number Diff line change 1+ Add the :func: `xml.is_valid_text ` function, which allows to check whether
2+ a string can be used in the XML document.
You can’t perform that action at this time.
0 commit comments