Tryag File Manager
Home
-
Turbo Force
Current Path :
/
usr
/
lib
/
python2.4
/
site-packages
/
_xmlplus
/
dom
/
ext
/
reader
/
Upload File :
New :
File
Dir
//usr/lib/python2.4/site-packages/_xmlplus/dom/ext/reader/HtmlLib.py
######################################################################## # # File Name: HtmlLib.py # # """ Components for reading HTML files using htmllib.py. WWW: http://4suite.com/4DOM e-mail: support@4suite.com Copyright (c) 2000 Fourthought Inc, USA. All Rights Reserved. See http://4suite.com/COPYRIGHT for license and copyright information """ import os, urllib from xml.dom.ext import reader from xml.dom import Node import Sgmlop class Reader(reader.Reader): def __init__(self): self.parser = Sgmlop.HtmlParser() def fromStream(self, stream, ownerDoc=None, charset=''): self.parser.initParser() self.parser.initState(ownerDoc, charset) self.parser.parse(stream) frag = self.parser.rootNode if ownerDoc is None: # Use the created document doc = frag.ownerDocument # we have created a new document, and a documentFragment that belongs to this document. # However, the document is already '<HTML><HEAD><TITLE></TITLE></HEAD><BODY></BODY></HTML>' # so we need to find out if we have an HTML node in the fragment. for child in frag.childNodes: if child.nodeType == Node.ELEMENT_NODE and child.tagName == 'HTML': # clean up the junk automatically generated by the HTMLDomImplementation while doc.documentElement.firstChild: c = doc.documentElement.removeChild(doc.documentElement.firstChild) self.releaseNode(c) # copy stuff while child.firstChild: doc.documentElement.appendChild(child.firstChild) self.releaseNode(frag) return doc # We are here if we could not find an HTML element in the fragment. # In this case, we should append everything under the BODY element in the document body = doc.documentElement.lastChild body.appendChild(frag) self.releaseNode(frag) return doc else: # an owner document was passed, we return the fragment, as is. return frag def fromUri(self, uri, ownerDoc=None, charset=''): stream = reader.BASIC_RESOLVER.resolve(uri) try: return self.fromStream(stream, ownerDoc, charset) finally: stream.close() def fromString(self, str, ownerDoc=None, charset=''): stream = reader.StrStream(str) try: return self.fromStream(stream, ownerDoc, charset) finally: stream.close() ########################## Deprecated ############################## def FromHtmlStream(fp, ownerDoc=None, charset=''): return Reader().fromStream(fp, ownerDoc, charset) def FromHtmlFile(fileName, ownerDoc=None, charset=''): return Reader().fromUri(fileName, ownerDoc, charset) def FromHtmlUrl(url, ownerDoc=None, charset=''): return Reader().fromUri(url, ownerDoc, charset) def FromHtml(text, ownerDoc=None, charset=''): return Reader().fromString(text, ownerDoc, charset)