"""HTML parser that extracts form information.
This is intended to support functional tests that need to extract
information from HTML forms returned by the publisher.
See *formparser.txt* for documentation.
This isn't intended to simulate a browser session; that's provided by
the `zope.testbrowser` package.
"""
__docformat__ = "reStructuredText"
import HTMLParser
import urlparse
def parse(data, base=None):
"""Return a form collection parsed from `data`.
`base` should be the URL from which `data` was retrieved.
"""
parser = FormParser(data, base)
return parser.parse()
class FormParser(object):
def __init__(self, data, base=None):
self.data = data
self.base = base
self._parser = HTMLParser.HTMLParser()
self._parser.handle_data = self._handle_data
self._parser.handle_endtag = self._handle_endtag
self._parser.handle_starttag = self._handle_starttag
self._parser.handle_startendtag = self._handle_starttag
self._buffer = []
self.current = None # current form
self.forms = FormCollection()
def parse(self):
"""Parse the document, returning the collection of forms."""
self._parser.feed(self.data)
self._parser.close()
return self.forms
# HTMLParser handlers
def _handle_data(self, data):
self._buffer.append(data)
def _handle_endtag(self, tag):
if tag == "textarea":
self.textarea.value = "".join(self._buffer)
self.textarea = None
elif tag == "select":
self.select = None
elif tag == "option":
option = self.select.options[-1]
label = "".join(self._buffer)
if not option.label:
option.label = label
if not option.value:
option.value = label
if option.selected:
if self.select.multiple:
self.select.value.append(option.value)
else:
self.select.value = option.value
def _handle_starttag(self, tag, attrs):
del self._buffer[:]
d = {}
for name, value in attrs:
d[name] = value
name = d.get("name")
id = d.get("id") or d.get("xml:id")
if tag == "form":
method = kwattr(d, "method", "get")
action = d.get("action", "").strip() or None
if self.base and action:
action = urlparse.urljoin(self.base, action)
enctype = kwattr(d, "enctype", "application/x-www-form-urlencoded")
self.current = Form(name, id, method, action, enctype)
self.forms.append(self.current)
elif tag == "input":
type = kwattr(d, "type", "text")
checked = "checked" in d
disabled = "disabled" in d
readonly = "readonly" in d
src = d.get("src", "").strip() or None
if self.base and src:
src = urlparse.urljoin(self.base, src)
value = d.get("value")
size = intattr(d, "size")
maxlength = intattr(d, "maxlength")
self._add_field(
Input(name, id, type, value, checked,
disabled, readonly, src, size, maxlength))
elif tag == "button":
pass
elif tag == "textarea":
disabled = "disabled" in d
readonly = "readonly" in d
self.textarea = Input(name, id, "textarea", None,
None, disabled, readonly,
None, None, None)
self.textarea.rows = intattr(d, "rows")
self.textarea.cols = intattr(d, "cols")
self._add_field(self.textarea)
# The value will be set when the is seen.
elif tag == "base":
href = d.get("href", "").strip()
if href and self.base:
href = urlparse.urljoin(self.base, href)
self.base = href
elif tag == "select":
disabled = "disabled" in d
multiple = "multiple" in d
size = intattr(d, "size")
self.select = Select(name, id, disabled, multiple, size)
self._add_field(self.select)
elif tag == "option":
disabled = "disabled" in d
selected = "selected" in d
value = d.get("value")
label = d.get("label")
option = Option(id, value, selected, label, disabled)
self.select.options.append(option)
# Helpers:
def _add_field(self, field):
if field.name in self.current:
ob = self.current[field.name]
if isinstance(ob, list):
ob.append(field)
else:
self.current[field.name] = [ob, field]
else:
self.current[field.name] = field
def kwattr(d, name, default=None):
"""Return attribute, converted to lowercase."""
v = d.get(name, default)
if v != default and v is not None:
v = v.strip().lower()
v = v or default
return v
def intattr(d, name):
"""Return attribute as an integer, or None."""
if name in d:
v = d[name].strip()
return int(v)
else:
return None
class FormCollection(list):
"""Collection of all forms from a page."""
def __getattr__(self, name):
for form in self:
if form.name == name:
return form
raise AttributeError(name)
class Form(dict):
"""A specific form within a page."""
# This object should provide some method to prepare a dictionary
# that can be passed directly as the value of the `form` argument
# to the `http()` function of the Zope functional test.
#
# This is probably a low priority given the availability of the
# `zope.testbrowser` package.
def __init__(self, name, id, method, action, enctype):
super(Form, self).__init__()
self.name = name
self.id = id
self.method = method
self.action = action
self.enctype = enctype
class Input(object):
"""Input element."""
rows = None
cols = None
def __init__(self, name, id, type, value, checked, disabled, readonly,
src, size, maxlength):
super(Input, self).__init__()
self.name = name
self.id = id
self.type = type
self.value = value
self.checked = checked
self.disabled = disabled
self.readonly = readonly
self.src = src
self.size = size
self.maxlength = maxlength
class Select(Input):
"""Select element."""
def __init__(self, name, id, disabled, multiple, size):
super(Select, self).__init__(name, id, "select", None, None,
disabled, None, None, size, None)
self.options = []
self.multiple = multiple
if multiple:
self.value = []
class Option(object):
"""Individual value representation for a select element."""
def __init__(self, id, value, selected, label, disabled):
super(Option, self).__init__()
self.id = id
self.value = value
self.selected = selected
self.label = label
self.disabled = disabled