Z|[+-]\d\d:?\d\d)?"
).set_name("ISO8601 datetime")
"ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name("UUID")
"UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
_html_stripper = any_open_tag.suppress() | any_close_tag.suppress()
@staticmethod
def strip_html_tags(s: str, l: int, tokens: ParseResults):
"""Parse action to remove HTML tags from web page HTML source
Example::
# strip HTML links from normal text
text = 'More info at the pyparsing wiki page | '
td, td_end = make_html_tags("TD")
table_text = td + SkipTo(td_end).set_parse_action(pyparsing_common.strip_html_tags)("body") + td_end
print(table_text.parse_string(text).body)
Prints::
More info at the pyparsing wiki page
"""
return pyparsing_common._html_stripper.transform_string(tokens[0])
_commasepitem = (
Combine(
OneOrMore(
~Literal(",")
+ ~LineEnd()
+ Word(printables, exclude_chars=",")
+ Opt(White(" \t") + ~FollowedBy(LineEnd() | ","))
)
)
.streamline()
.set_name("commaItem")
)
comma_separated_list = DelimitedList(
Opt(quoted_string.copy() | _commasepitem, default="")
).set_name("comma separated list")
"""Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
upcase_tokens = staticmethod(token_map(lambda t: t.upper()))
"""Parse action to convert tokens to upper case."""
downcase_tokens = staticmethod(token_map(lambda t: t.lower()))
"""Parse action to convert tokens to lower case."""
# fmt: off
url = Regex(
# https://mathiasbynens.be/demo/url-regex
# https://gist.github.com/dperini/729294
r"(?P" +
# protocol identifier (optional)
# short syntax // still required
r"(?:(?:(?Phttps?|ftp):)?\/\/)" +
# user:pass BasicAuth (optional)
r"(?:(?P\S+(?::\S*)?)@)?" +
r"(?P" +
# IP address exclusion
# private & local networks
r"(?!(?:10|127)(?:\.\d{1,3}){3})" +
r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" +
r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" +
# IP address dotted notation octets
# excludes loopback network 0.0.0.0
# excludes reserved space >= 224.0.0.0
# excludes network & broadcast addresses
# (first & last IP address of each class)
r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" +
r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" +
r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" +
r"|" +
# host & domain names, may end with dot
# can be replaced by a shortest alternative
# (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+
r"(?:" +
r"(?:" +
r"[a-z0-9\u00a1-\uffff]" +
r"[a-z0-9\u00a1-\uffff_-]{0,62}" +
r")?" +
r"[a-z0-9\u00a1-\uffff]\." +
r")+" +
# TLD identifier name, may end with dot
r"(?:[a-z\u00a1-\uffff]{2,}\.?)" +
r")" +
# port number (optional)
r"(:(?P\d{2,5}))?" +
# resource path (optional)
r"(?P\/[^?# ]*)?" +
# query string (optional)
r"(\?(?P[^#]*))?" +
# fragment (optional)
r"(#(?P\S*))?" +
r")"
).set_name("url")
"""URL (http/https/ftp scheme)"""
# fmt: on
# pre-PEP8 compatibility names
convertToInteger = convert_to_integer
"""Deprecated - use :class:`convert_to_integer`"""
convertToFloat = convert_to_float
"""Deprecated - use :class:`convert_to_float`"""
convertToDate = convert_to_date
"""Deprecated - use :class:`convert_to_date`"""
convertToDatetime = convert_to_datetime
"""Deprecated - use :class:`convert_to_datetime`"""
stripHTMLTags = strip_html_tags
"""Deprecated - use :class:`strip_html_tags`"""
upcaseTokens = upcase_tokens
"""Deprecated - use :class:`upcase_tokens`"""
downcaseTokens = downcase_tokens
"""Deprecated - use :class:`downcase_tokens`"""
_builtin_exprs = [
v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement)
]