Hackfut Security File Manager
Current Path:
/usr/lib64/python2.6/site-packages/lxml/html
usr
/
lib64
/
python2.6
/
site-packages
/
lxml
/
html
/
📁
..
📄
ElementSoup.py
(319 B)
📄
ElementSoup.pyc
(622 B)
📄
ElementSoup.pyo
(622 B)
📄
__init__.py
(50.92 KB)
📄
__init__.pyc
(55.04 KB)
📄
__init__.pyo
(54.79 KB)
📄
_dictmixin.py
(3.46 KB)
📄
_dictmixin.pyc
(4.58 KB)
📄
_dictmixin.pyo
(4.58 KB)
📄
_diffcommand.py
(2.03 KB)
📄
_diffcommand.pyc
(2.79 KB)
📄
_diffcommand.pyo
(2.79 KB)
📄
_html5builder.py
(3.05 KB)
📄
_html5builder.pyc
(4.39 KB)
📄
_html5builder.pyo
(4.39 KB)
📄
_setmixin.py
(2.43 KB)
📄
_setmixin.pyc
(5.07 KB)
📄
_setmixin.pyo
(5.07 KB)
📄
builder.py
(4.21 KB)
📄
builder.pyc
(3.83 KB)
📄
builder.pyo
(3.83 KB)
📄
clean.py
(24.43 KB)
📄
clean.pyc
(18.46 KB)
📄
clean.pyo
(18.36 KB)
📄
defs.py
(3.71 KB)
📄
defs.pyc
(3.49 KB)
📄
defs.pyo
(3.49 KB)
📄
diff.py
(29.66 KB)
📄
diff.pyc
(28.02 KB)
📄
diff.pyo
(27.68 KB)
📄
formfill.py
(9.53 KB)
📄
formfill.pyc
(9.78 KB)
📄
formfill.pyo
(9.53 KB)
📄
html5parser.py
(5.18 KB)
📄
html5parser.pyc
(5.73 KB)
📄
html5parser.pyo
(5.73 KB)
📄
soupparser.py
(4.17 KB)
📄
soupparser.pyc
(4.72 KB)
📄
soupparser.pyo
(4.72 KB)
📄
usedoctest.py
(249 B)
📄
usedoctest.pyc
(464 B)
📄
usedoctest.pyo
(464 B)
Editing: html5parser.py
""" An interface to html5lib. """ import urllib from html5lib import HTMLParser as _HTMLParser, XHTMLParser as _XHTMLParser from lxml import etree from lxml.html import _contains_block_level_tag, XHTML_NAMESPACE from lxml.html._html5builder import TreeBuilder # python3 compatibility try: _strings = basestring except NameError: _strings = (bytes, str) class HTMLParser(_HTMLParser): """An html5lib HTML parser with lxml as tree.""" def __init__(self, strict=False): _HTMLParser.__init__(self, strict=strict, tree=TreeBuilder) class XHTMLParser(_XHTMLParser): """An html5lib XHTML Parser with lxml as tree.""" def __init__(self, strict=False): _XHTMLParser.__init__(self, strict=strict, tree=TreeBuilder) def _find_tag(tree, tag): elem = tree.find(tag) if elem is not None: return elem return tree.find('{%s}%s' % (XHTML_NAMESPACE, tag)) def document_fromstring(html, guess_charset=True, parser=None): """Parse a whole document into a string.""" if not isinstance(html, _strings): raise TypeError('string required') if parser is None: parser = html_parser return parser.parse(html, useChardet=guess_charset).getroot() def fragments_fromstring(html, no_leading_text=False, guess_charset=False, parser=None): """Parses several HTML elements, returning a list of elements. The first item in the list may be a string. If no_leading_text is true, then it will be an error if there is leading text, and it will always be a list of only elements. If `guess_charset` is `True` and the text was not unicode but a bytestring, the `chardet` library will perform charset guessing on the string. """ if not isinstance(html, _strings): raise TypeError('string required') if parser is None: parser = html_parser children = parser.parseFragment(html, 'div', useChardet=guess_charset) if children and isinstance(children[0], _strings): if no_leading_text: if children[0].strip(): raise etree.ParserError('There is leading text: %r' % children[0]) del children[0] return children def fragment_fromstring(html, create_parent=False, guess_charset=False, parser=None): """Parses a single HTML element; it is an error if there is more than one element, or if anything but whitespace precedes or follows the element. If create_parent is true (or is a tag name) then a parent node will be created to encapsulate the HTML in a single element. """ if not isinstance(html, _strings): raise TypeError('string required') if create_parent: container = create_parent or 'div' html = '<%s>%s</%s>' % (container, html, container) children = fragments_fromstring(html, True, guess_charset, parser) if not children: raise etree.ParserError('No elements found') if len(children) > 1: raise etree.ParserError('Multiple elements found') result = children[0] if result.tail and result.tail.strip(): raise etree.ParserError('Element followed by text: %r' % result.tail) result.tail = None return result def fromstring(html, guess_charset=True, parser=None): """Parse the html, returning a single element/document. This tries to minimally parse the chunk of text, without knowing if it is a fragment or a document. base_url will set the document's base_url attribute (and the tree's docinfo.URL) """ if not isinstance(html, _strings): raise TypeError('string required') doc = document_fromstring(html, parser=parser, guess_charset=guess_charset) # document starts with doctype or <html>, full document! start = html[:50].lstrip().lower() if start.startswith('<html') or start.startswith('<!doctype'): return doc head = _find_tag(doc, 'head') # if the head is not empty we have a full document if len(head): return doc body = _find_tag(doc, 'body') # The body has just one element, so it was probably a single # element passed in if (len(body) == 1 and (not body.text or not body.text.strip()) and (not body[-1].tail or not body[-1].tail.strip())): return body[0] # Now we have a body which represents a bunch of tags which have the # content that was passed in. We will create a fake container, which # is the body tag, except <body> implies too much structure. if _contains_block_level_tag(body): body.tag = 'div' else: body.tag = 'span' return body def parse(filename_url_or_file, guess_charset=True, parser=None): """Parse a filename, URL, or file-like object into an HTML document tree. Note: this returns a tree, not an element. Use ``parse(...).getroot()`` to get the document root. """ if parser is None: parser = html_parser if isinstance(filename_url_or_file, basestring): fp = urllib.urlopen(filename_url_or_file) else: fp = filename_url_or_file return parser.parse(fp, useChardet=guess_charset) html_parser = HTMLParser() xhtml_parser = XHTMLParser()
Upload File
Create Folder