hexlib/hexlib/regex_util.py

8 lines
260 B
Python

import re
LINK_RE = re.compile(r"(https?://[\w\-_.]+\.[a-z]{2,4}([^\s<'\"]*|$))")
HTML_HREF_RE = re.compile(r"href=\"([^\"]+)\"")
WHITESPACE_RE = re.compile(r"\s+")
PUNCTUATION_RE = re.compile(r"[.,;:\"“!?/()|*=>]+")
XML_ENTITY_RE = re.compile(r"&[a-z]+;")