[cvs] / gkb / ClientForm.py  

cvs: gkb/ClientForm.py


1 : tmcnulty 1.1 """HTML form handling for web clients.
2 :    
3 :     ClientForm is a Python module for handling HTML forms on the client
4 :     side, useful for parsing HTML forms, filling them in and returning the
5 :     completed forms to the server. It has developed from a port of Gisle
6 :     Aas' Perl module HTML::Form, from the libwww-perl library, but the
7 :     interface is not the same.
8 :    
9 :     The most useful docstring is the one for HTMLForm.
10 :    
11 :     RFC 1866: HTML 2.0
12 :     RFC 1867: Form-based File Upload in HTML
13 :     RFC 2388: Returning Values from Forms: multipart/form-data
14 :     HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX)
15 :     HTML 4.01 Specification, W3C Recommendation 24 December 1999
16 :    
17 :    
18 :     Copyright 2002-2003 John J. Lee <jjl@pobox.com>
19 :     Copyright 1998-2000 Gisle Aas.
20 :    
21 :     This code is free software; you can redistribute it and/or modify it
22 :     under the terms of the BSD License (see the file COPYING included with
23 :     the distribution).
24 :    
25 :     """
26 :    
27 :     # XXX
28 :     # Controls can have name=None (eg. forms constructed partly with
29 :     # JavaScript), but find_control can't be told to find a control
30 :     # with that name, because None there means 'unspecified'. Can still
31 :     # get at by nr, but would be nice to be able to specify something
32 :     # equivalent to name=None, too.
33 :     # Unicode: see Wichert Akkerman's 2004-01-22 message to c.l.py.
34 :     # Add some functional tests
35 :     # Especially single and multiple file upload on the internet.
36 :     # Does file upload work when name is missing? Sourceforge tracker form
37 :     # doesn't like it. Check standards, and test with Apache. Test
38 :     # binary upload with Apache.
39 :     # Support for list item ids. How to handle missing ids? (How do I deal
40 :     # with duplicate OPTION labels ATM? Can't remember...)
41 :     # Get rid of MapBase, AList and MimeWriter.
42 :     # Deal with character sets properly. Not sure what the issues are here.
43 :     # Do URL encodings need any attention?
44 :     # I don't *think* any encoding of control names, filenames or data is
45 :     # necessary -- HTML spec. doesn't require it, and Mozilla Firebird 0.6
46 :     # doesn't seem to do it.
47 :     # Add charset parameter to Content-type headers? How to find value??
48 :     # Add label support for CHECKBOX and RADIO. Actually, I may not bother
49 :     # to fix this, since a discussion with Gisle on libwww-perl list seemed
50 :     # to show that it wouldn't be very useful.
51 :     # I'm not going to fix this unless somebody tells me what real servers
52 :     # that want this encoding actually expect: If enctype is
53 :     # application/x-www-form-urlencoded and there's a FILE control present.
54 :     # Strictly, it should be 'name=data' (see HTML 4.01 spec., section
55 :     # 17.13.2), but I send "name=" ATM. What about multiple file upload??
56 :     # Remove single-selection code: can be special case of multi-selection,
57 :     # with a few variations, I think.
58 :     # Factor out multiple-selection list code? May not be easy. Maybe like
59 :     # this:
60 :    
61 :     # ListControl
62 :     # ^
63 :     # | MultipleListControlMixin
64 :     # | ^
65 :     # SelectControl /
66 :     # ^ /
67 :     # \ /
68 :     # MultiSelectControl
69 :    
70 :    
71 :     # Plan
72 :     # ----
73 :     # Maybe a 0.2.x, cleaned up a bit and with id support for list items?
74 :     # Not sure it's worth it...
75 :     # Unify single / multiple selection code.
76 :     # action should probably be an absolute URI, like DOMForm.
77 :     # Remove toggle methods.
78 :     # Replace by_label with choice between value / id / label /
79 :     # element contents (see discussion with Gisle about labels on
80 :     # libwww-perl list).
81 :     # ...what else?
82 :     # Work on DOMForm.
83 :     # XForms? Don't know if there's a need here.
84 :    
85 :    
86 :     try: True
87 :     except NameError:
88 :     True = 1
89 :     False = 0
90 :    
91 :     try: bool
92 :     except NameError:
93 :     def bool(expr):
94 :     if expr: return True
95 :     else: return False
96 :    
97 :     import sys, urllib, urllib2, types, string, mimetools, copy
98 :     from urlparse import urljoin
99 :     from cStringIO import StringIO
100 :     try:
101 :     from types import UnicodeType
102 :     except ImportError:
103 :     UNICODE = False
104 :     else:
105 :     UNICODE = True
106 :    
107 :     VERSION = "0.1.16"
108 :    
109 :     CHUNK = 1024 # size of chunks fed to parser, in bytes
110 :    
111 :     # This version of urlencode is from my Python 1.5.2 back-port of the
112 :     # Python 2.1 CVS maintenance branch of urllib. It will accept a sequence
113 :     # of pairs instead of a mapping -- the 2.0 version only accepts a mapping.
114 :     def urlencode(query,doseq=False,):
115 :     """Encode a sequence of two-element tuples or dictionary into a URL query \
116 :     string.
117 :    
118 :     If any values in the query arg are sequences and doseq is true, each
119 :     sequence element is converted to a separate parameter.
120 :    
121 :     If the query arg is a sequence of two-element tuples, the order of the
122 :     parameters in the output will match the order of parameters in the
123 :     input.
124 :     """
125 :    
126 :     if hasattr(query,"items"):
127 :     # mapping objects
128 :     query = query.items()
129 :     else:
130 :     # it's a bother at times that strings and string-like objects are
131 :     # sequences...
132 :     try:
133 :     # non-sequence items should not work with len()
134 :     x = len(query)
135 :     # non-empty strings will fail this
136 :     if len(query) and type(query[0]) != types.TupleType:
137 :     raise TypeError()
138 :     # zero-length sequences of all types will get here and succeed,
139 :     # but that's a minor nit - since the original implementation
140 :     # allowed empty dicts that type of behavior probably should be
141 :     # preserved for consistency
142 :     except TypeError:
143 :     ty,va,tb = sys.exc_info()
144 :     raise TypeError("not a valid non-string sequence or mapping "
145 :     "object", tb)
146 :    
147 :     l = []
148 :     if not doseq:
149 :     # preserve old behavior
150 :     for k, v in query:
151 :     k = urllib.quote_plus(str(k))
152 :     v = urllib.quote_plus(str(v))
153 :     l.append(k + '=' + v)
154 :     else:
155 :     for k, v in query:
156 :     k = urllib.quote_plus(str(k))
157 :     if type(v) == types.StringType:
158 :     v = urllib.quote_plus(v)
159 :     l.append(k + '=' + v)
160 :     elif UNICODE and type(v) == types.UnicodeType:
161 :     # is there a reasonable way to convert to ASCII?
162 :     # encode generates a string, but "replace" or "ignore"
163 :     # lose information and "strict" can raise UnicodeError
164 :     v = urllib.quote_plus(v.encode("ASCII","replace"))
165 :     l.append(k + '=' + v)
166 :     else:
167 :     try:
168 :     # is this a sufficient test for sequence-ness?
169 :     x = len(v)
170 :     except TypeError:
171 :     # not a sequence
172 :     v = urllib.quote_plus(str(v))
173 :     l.append(k + '=' + v)
174 :     else:
175 :     # loop over the sequence
176 :     for elt in v:
177 :     l.append(k + '=' + urllib.quote_plus(str(elt)))
178 :     return string.join(l, '&')
179 :    
180 :     def startswith(string, initial):
181 :     if len(initial) > len(string): return False
182 :     return string[:len(initial)] == initial
183 :    
184 :     def issequence(x):
185 :     try:
186 :     x[0]
187 :     except (TypeError, KeyError):
188 :     return False
189 :     except IndexError:
190 :     pass
191 :     return True
192 :    
193 :     def isstringlike(x):
194 :     try: x+""
195 :     except: return False
196 :     else: return True
197 :    
198 :    
199 :     # XXX don't really want to drag this along (MapBase, AList, MimeWriter)
200 :    
201 :     # This is essentially the same as UserDict.DictMixin. I wrote this before
202 :     # that, and DictMixin isn't available in 1.5.2 anyway.
203 :     class MapBase:
204 :     """Mapping designed to be easily derived from.
205 :    
206 :     Subclass it and override __init__, __setitem__, __getitem__, __delitem__
207 :     and keys. Nothing else should need to be overridden, unlike UserDict.
208 :     This significantly simplifies dictionary-like classes.
209 :    
210 :     Also different from UserDict in that it has a redonly flag, and can be
211 :     updated (and initialised) with a sequence of pairs (key, value).
212 :    
213 :     """
214 :     def __init__(self, init=None):
215 :     self._data = {}
216 :     self.readonly = False
217 :     if init is not None: self.update(init)
218 :    
219 :     def __getitem__(self, key):
220 :     return self._data[key]
221 :    
222 :     def __setitem__(self, key, item):
223 :     if not self.readonly:
224 :     self._data[key] = item
225 :     else:
226 :     raise TypeError("object doesn't support item assignment")
227 :    
228 :     def __delitem__(self, key):
229 :     if not self.readonly:
230 :     del self._data[key]
231 :     else:
232 :     raise TypeError("object doesn't support item deletion")
233 :    
234 :     def keys(self):
235 :     return self._data.keys()
236 :    
237 :     # now the internal workings, there should be no need to override these:
238 :    
239 :     def clear(self):
240 :     for k in self.keys():
241 :     del self[k]
242 :    
243 :     def __repr__(self):
244 :     rep = []
245 :     for k, v in self.items():
246 :     rep.append("%s: %s" % (repr(k), repr(v)))
247 :     return self.__class__.__name__+"{"+(string.join(rep, ", "))+"}"
248 :    
249 :     def copy(self):
250 :     return copy.copy(self)
251 :    
252 :     def __cmp__(self, dict):
253 :     # note: return value is *not* boolean
254 :     for k, v in self.items():
255 :     if not (dict.has_key(k) and dict[k] == v):
256 :     return 1 # different
257 :     return 0 # the same
258 :    
259 :     def __len__(self):
260 :     return len(self.keys())
261 :    
262 :     def values(self):
263 :     r = []
264 :     for k in self.keys():
265 :     r.append(self[k])
266 :     return r
267 :    
268 :     def items(self):
269 :     keys = self.keys()
270 :     vals = self.values()
271 :     r = []
272 :     for i in len(self):
273 :     r.append((keys[i], vals[i]))
274 :     return r
275 :    
276 :     def has_key(self, key):
277 :     return key in self.keys()
278 :    
279 :     def update(self, map):
280 :     if issequence(map) and not isstringlike(map):
281 :     items = map
282 :     else:
283 :     items = map.items()
284 :     for tup in items:
285 :     if not isinstance(tup, TupleType):
286 :     raise TypeError(
287 :     "MapBase.update requires a map or a sequence of pairs")
288 :     k, v = tup
289 :     self[k] = v
290 :    
291 :     def get(self, key, failobj=None):
292 :     if key in self.keys():
293 :     return self[key]
294 :     else:
295 :     return failobj
296 :    
297 :     def setdefault(self, key, failobj=None):
298 :     if not self.has_key(key):
299 :     self[key] = failobj
300 :     return self[key]
301 :    
302 :    
303 :     class AList(MapBase):
304 :     """Read-only ordered mapping."""
305 :     def __init__(self, seq=[]):
306 :     self.readonly = True
307 :     self._inverted = False
308 :     self._data = list(seq[:])
309 :     self._keys = []
310 :     self._values = []
311 :     for key, value in seq:
312 :     self._keys.append(key)
313 :     self._values.append(value)
314 :    
315 :     def set_inverted(self, inverted):
316 :     if (inverted and not self._inverted) or (
317 :     not inverted and self._inverted):
318 :     self._keys, self._values = self._values, self._keys
319 :     if inverted: self._inverted = True
320 :     else: self._inverted = False
321 :    
322 :     def __getitem__(self, key):
323 :     try:
324 :     i = self._keys.index(key)
325 :     except ValueError:
326 :     raise KeyError(key)
327 :     return self._values[i]
328 :    
329 :     def __delitem__(self, key):
330 :     try:
331 :     i = self._keys.index[key]
332 :     except ValueError:
333 :     raise KeyError(key)
334 :     del self._values[i]
335 :    
336 :     def keys(self): return list(self._keys[:])
337 :     def values(self): return list(self._values[:])
338 :     def items(self):
339 :     data = self._data[:]
340 :     if not self._inverted:
341 :     return data
342 :     else:
343 :     newdata = []
344 :     for k, v in data:
345 :     newdata.append((v, k))
346 :     return newdata
347 :    
348 :    
349 :     def choose_boundary():
350 :     b = mimetools.choose_boundary()
351 :     string.replace(b, ".", "")
352 :     return b
353 :    
354 :     # This cut-n-pasted MimeWriter from standard library is here so can add
355 :     # to HTTP headers rather than message body when appropriate. It also uses
356 :     # \r\n in place of \n. This is nasty.
357 :     class MimeWriter:
358 :    
359 :     """Generic MIME writer.
360 :    
361 :     Methods:
362 :    
363 :     __init__()
364 :     addheader()
365 :     flushheaders()
366 :     startbody()
367 :     startmultipartbody()
368 :     nextpart()
369 :     lastpart()
370 :    
371 :     A MIME writer is much more primitive than a MIME parser. It
372 :     doesn't seek around on the output file, and it doesn't use large
373 :     amounts of buffer space, so you have to write the parts in the
374 :     order they should occur on the output file. It does buffer the
375 :     headers you add, allowing you to rearrange their order.
376 :    
377 :     General usage is:
378 :    
379 :     f = <open the output file>
380 :     w = MimeWriter(f)
381 :     ...call w.addheader(key, value) 0 or more times...
382 :    
383 :     followed by either:
384 :    
385 :     f = w.startbody(content_type)
386 :     ...call f.write(data) for body data...
387 :    
388 :     or:
389 :    
390 :     w.startmultipartbody(subtype)
391 :     for each part:
392 :     subwriter = w.nextpart()
393 :     ...use the subwriter's methods to create the subpart...
394 :     w.lastpart()
395 :    
396 :     The subwriter is another MimeWriter instance, and should be
397 :     treated in the same way as the toplevel MimeWriter. This way,
398 :     writing recursive body parts is easy.
399 :    
400 :     Warning: don't forget to call lastpart()!
401 :    
402 :     XXX There should be more state so calls made in the wrong order
403 :     are detected.
404 :    
405 :     Some special cases:
406 :    
407 :     - startbody() just returns the file passed to the constructor;
408 :     but don't use this knowledge, as it may be changed.
409 :    
410 :     - startmultipartbody() actually returns a file as well;
411 :     this can be used to write the initial 'if you can read this your
412 :     mailer is not MIME-aware' message.
413 :    
414 :     - If you call flushheaders(), the headers accumulated so far are
415 :     written out (and forgotten); this is useful if you don't need a
416 :     body part at all, e.g. for a subpart of type message/rfc822
417 :     that's (mis)used to store some header-like information.
418 :    
419 :     - Passing a keyword argument 'prefix=<flag>' to addheader(),
420 :     start*body() affects where the header is inserted; 0 means
421 :     append at the end, 1 means insert at the start; default is
422 :     append for addheader(), but insert for start*body(), which use
423 :     it to determine where the Content-type header goes.
424 :    
425 :     """
426 :    
427 :     def __init__(self, fp, http_hdrs=None):
428 :     self._http_hdrs = http_hdrs
429 :     self._fp = fp
430 :     self._headers = []
431 :     self._boundary = []
432 :     self._first_part = True
433 :    
434 :     def addheader(self, key, value, prefix=0,
435 :     add_to_http_hdrs=0):
436 :     """
437 :     prefix is ignored if add_to_http_hdrs is true.
438 :     """
439 :     lines = string.split(value, "\r\n")
440 :     while lines and not lines[-1]: del lines[-1]
441 :     while lines and not lines[0]: del lines[0]
442 :     if add_to_http_hdrs:
443 :     value = string.join(lines, "")
444 :     self._http_hdrs.append((key, value))
445 :     else:
446 :     for i in range(1, len(lines)):
447 :     lines[i] = " " + string.strip(lines[i])
448 :     value = string.join(lines, "\r\n") + "\r\n"
449 :     line = key + ": " + value
450 :     if prefix:
451 :     self._headers.insert(0, line)
452 :     else:
453 :     self._headers.append(line)
454 :    
455 :     def flushheaders(self):
456 :     self._fp.writelines(self._headers)
457 :     self._headers = []
458 :    
459 :     def startbody(self, ctype=None, plist=[], prefix=1,
460 :     add_to_http_hdrs=0, content_type=1):
461 :     """
462 :     prefix is ignored if add_to_http_hdrs is true.
463 :     """
464 :     if content_type and ctype:
465 :     for name, value in plist:
466 :     ctype = ctype + ';\r\n %s=%s' % (name, value)
467 :     self.addheader("Content-type", ctype, prefix=prefix,
468 :     add_to_http_hdrs=add_to_http_hdrs)
469 :     self.flushheaders()
470 :     if not add_to_http_hdrs: self._fp.write("\r\n")
471 :     self._first_part = True
472 :     return self._fp
473 :    
474 :     def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1,
475 :     add_to_http_hdrs=0, content_type=1):
476 :     boundary = boundary or choose_boundary()
477 :     self._boundary.append(boundary)
478 :     return self.startbody("multipart/" + subtype,
479 :     [("boundary", boundary)] + plist,
480 :     prefix=prefix,
481 :     add_to_http_hdrs=add_to_http_hdrs,
482 :     content_type=content_type)
483 :    
484 :     def nextpart(self):
485 :     boundary = self._boundary[-1]
486 :     if self._first_part:
487 :     self._first_part = False
488 :     else:
489 :     self._fp.write("\r\n")
490 :     self._fp.write("--" + boundary + "\r\n")
491 :     return self.__class__(self._fp)
492 :    
493 :     def lastpart(self):
494 :     if self._first_part:
495 :     self.nextpart()
496 :     boundary = self._boundary.pop()
497 :     self._fp.write("\r\n--" + boundary + "--\r\n")
498 :    
499 :    
500 :     class ControlNotFoundError(ValueError): pass
501 :     class ItemNotFoundError(ValueError): pass
502 :     class ItemCountError(ValueError): pass
503 :    
504 :     class ParseError(Exception): pass
505 :    
506 :    
507 :     class _AbstractFormParser:
508 :     """forms attribute contains HTMLForm instances on completion."""
509 :     # pinched (and modified) from Moshe Zadka
510 :     def __init__(self, entitydefs=None):
511 :     if entitydefs is not None:
512 :     self.entitydefs = entitydefs
513 :     self.base = None
514 :     self.forms = []
515 :     self._current_form = None
516 :     self._select = None
517 :     self._optgroup = None
518 :     self._option = None
519 :     self._textarea = None
520 :    
521 :     def do_base(self, attrs):
522 :     for key, value in attrs:
523 :     if key == "href":
524 :     self.base = value
525 :    
526 :     def start_form(self, attrs):
527 :     if self._current_form is not None:
528 :     raise ParseError("nested FORMs")
529 :     name = None
530 :     action = None
531 :     enctype = "application/x-www-form-urlencoded"
532 :     method = "GET"
533 :     d = {}
534 :     for key, value in attrs:
535 :     if key == "name":
536 :     name = value
537 :     elif key == "action":
538 :     action = value
539 :     elif key == "method":
540 :     method = string.upper(value)
541 :     elif key == "enctype":
542 :     enctype = string.lower(value)
543 :     d[key] = value
544 :     controls = []
545 :     self._current_form = (name, action, method, enctype), d, controls
546 :    
547 :     def end_form(self):
548 :     if self._current_form is None:
549 :     raise ParseError("end of FORM before start")
550 :     self.forms.append(self._current_form)
551 :     self._current_form = None
552 :    
553 :     def start_select(self, attrs):
554 :     if self._current_form is None:
555 :     raise ParseError("start of SELECT before start of FORM")
556 :     if self._select is not None:
557 :     raise ParseError("nested SELECTs")
558 :     if self._textarea is not None:
559 :     raise ParseError("SELECT inside TEXTAREA")
560 :     d = {}
561 :     for key, val in attrs:
562 :     d[key] = val
563 :    
564 :     self._select = d
565 :    
566 :     self._append_select_control({"__select": d})
567 :    
568 :     def end_select(self):
569 :     if self._current_form is None:
570 :     raise ParseError("end of SELECT before start of FORM")
571 :     if self._select is None:
572 :     raise ParseError("end of SELECT before start")
573 :    
574 :     if self._option is not None:
575 :     self._end_option()
576 :    
577 :     self._select = None
578 :    
579 :     def start_optgroup(self, attrs):
580 :     if self._select is None:
581 :     raise ParseError("OPTGROUP outside of SELECT")
582 :     d = {}
583 :     for key, val in attrs:
584 :     d[key] = val
585 :    
586 :     self._optgroup = d
587 :    
588 :     def end_optgroup(self):
589 :     if self._optgroup is None:
590 :     raise ParseError("end of OPTGROUP before start")
591 :     self._optgroup = None
592 :    
593 :     def _start_option(self, attrs):
594 :     if self._select is None:
595 :     raise ParseError("OPTION outside of SELECT")
596 :     if self._option is not None:
597 :     self._end_option()
598 :    
599 :     d = {}
600 :     for key, val in attrs:
601 :     d[key] = val
602 :    
603 :     self._option = {}
604 :     self._option.update(d)
605 :     if (self._optgroup and self._optgroup.has_key("disabled") and
606 :     not self._option.has_key("disabled")):
607 :     self._option["disabled"] = None
608 :    
609 :     def _end_option(self):
610 :     if self._option is None:
611 :     raise ParseError("end of OPTION before start")
612 :    
613 :     contents = string.strip(self._option.get("contents", ""))
614 :     self._option["contents"] = contents
615 :     if not self._option.has_key("value"):
616 :     self._option["value"] = contents
617 :     if not self._option.has_key("label"):
618 :     self._option["label"] = contents
619 :     # stuff dict of SELECT HTML attrs into a special private key
620 :     # (gets deleted again later)
621 :     self._option["__select"] = self._select
622 :     self._append_select_control(self._option)
623 :     self._option = None
624 :    
625 :     def _append_select_control(self, attrs):
626 :     controls = self._current_form[2]
627 :     name = self._select.get("name")
628 :     controls.append(("select", name, attrs))
629 :    
630 :     def start_textarea(self, attrs):
631 :     if self._current_form is None:
632 :     raise ParseError("start of TEXTAREA before start of FORM")
633 :     if self._textarea is not None:
634 :     raise ParseError("nested TEXTAREAs")
635 :     if self._select is not None:
636 :     raise ParseError("TEXTAREA inside SELECT")
637 :     d = {}
638 :     for key, val in attrs:
639 :     d[key] = val
640 :    
641 :     self._textarea = d
642 :    
643 :     def end_textarea(self):
644 :     if self._current_form is None:
645 :     raise ParseError("end of TEXTAREA before start of FORM")
646 :     if self._textarea is None:
647 :     raise ParseError("end of TEXTAREA before start")
648 :     controls = self._current_form[2]
649 :     name = self._textarea.get("name")
650 :     controls.append(("textarea", name, self._textarea))
651 :     self._textarea = None
652 :    
653 :     def handle_data(self, data):
654 :     if self._option is not None:
655 :     # self._option is a dictionary of the OPTION element's HTML
656 :     # attributes, but it has two special keys, one of which is the
657 :     # special "contents" key contains text between OPTION tags (the
658 :     # other is the "__select" key: see the end_option method)
659 :     map = self._option
660 :     key = "contents"
661 :     elif self._textarea is not None:
662 :     map = self._textarea
663 :     key = "value"
664 :     else:
665 :     return
666 :    
667 :     if not map.has_key(key):
668 :     map[key] = data
669 :     else:
670 :     map[key] = map[key] + data
671 :    
672 :     def do_button(self, attrs):
673 :     if self._current_form is None:
674 :     raise ParseError("start of BUTTON before start of FORM")
675 :     d = {}
676 :     d["type"] = "submit" # default
677 :     for key, val in attrs:
678 :     d[key] = val
679 :     controls = self._current_form[2]
680 :    
681 :     type = d["type"]
682 :     name = d.get("name")
683 :     # we don't want to lose information, so use a type string that
684 :     # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON}
685 :     # eg. type for BUTTON/RESET is "resetbutton"
686 :     # (type for INPUT/RESET is "reset")
687 :     type = type+"button"
688 :     controls.append((type, name, d))
689 :    
690 :     def do_input(self, attrs):
691 :     if self._current_form is None:
692 :     raise ParseError("start of INPUT before start of FORM")
693 :     d = {}
694 :     d["type"] = "text" # default
695 :     for key, val in attrs:
696 :     d[key] = val
697 :     controls = self._current_form[2]
698 :    
699 :     type = d["type"]
700 :     name = d.get("name")
701 :     controls.append((type, name, d))
702 :    
703 :     def do_isindex(self, attrs):
704 :     if self._current_form is None:
705 :     raise ParseError("start of ISINDEX before start of FORM")
706 :     d = {}
707 :     for key, val in attrs:
708 :     d[key] = val
709 :     controls = self._current_form[2]
710 :    
711 :     # isindex doesn't have type or name HTML attributes
712 :     controls.append(("isindex", None, d))
713 :    
714 :     # HTMLParser is recent, so live without it if it's not available
715 :     try:
716 :     import HTMLParser
717 :     except ImportError:
718 :     class XHTMLCompatibleFormParser:
719 :     def __init__(self, entitydefs=None):
720 :     raise ValueError("HTMLParser could not be imported")
721 :     else:
722 :     class XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser):
723 :     """Good for
724 :    
725 :     """
726 :     # thanks to Michael Howitz for this!
727 :     def __init__(self, entitydefs=None):
728 :     HTMLParser.HTMLParser.__init__(self)
729 :     _AbstractFormParser.__init__(self, entitydefs)
730 :    
731 :     def start_option(self, attrs):
732 :     _AbstractFormParser._start_option(self, attrs)
733 :    
734 :     def end_option(self):
735 :     _AbstractFormParser._end_option(self)
736 :    
737 :     def handle_starttag(self, tag, attrs):
738 :     try:
739 :     method = getattr(self, 'start_' + tag)
740 :     except AttributeError:
741 :     try:
742 :     method = getattr(self, 'do_' + tag)
743 :     except AttributeError:
744 :     pass # unknown tag
745 :     else:
746 :     method(attrs)
747 :     else:
748 :     method(attrs)
749 :    
750 :     def handle_endtag(self, tag):
751 :     try:
752 :     method = getattr(self, 'end_' + tag)
753 :     except AttributeError:
754 :     pass # unknown tag
755 :     else:
756 :     method()
757 :    
758 :     # handle_charref, handle_entityref and default entitydefs are taken
759 :     # from sgmllib
760 :     def handle_charref(self, name):
761 :     try:
762 :     n = int(name)
763 :     except ValueError:
764 :     self.unknown_charref(name)
765 :     return
766 :     if not 0 <= n <= 255:
767 :     self.unknown_charref(name)
768 :     return
769 :     self.handle_data(chr(n))
770 :    
771 :     # Definition of entities -- derived classes may override
772 :     entitydefs = \
773 :     {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
774 :    
775 :     def handle_entityref(self, name):
776 :     table = self.entitydefs
777 :     if name in table:
778 :     self.handle_data(table[name])
779 :     else:
780 :     self.unknown_entityref(name)
781 :     return
782 :    
783 :     # These methods would have passed through the ref intact if I'd thought
784 :     # of it earlier, but since the old parser silently swallows unknown
785 :     # refs, so does this new parser.
786 :     def unknown_entityref(self, ref): pass
787 :     def unknown_charref(self, ref): pass
788 :    
789 :     import htmllib, formatter
790 :     class FormParser(_AbstractFormParser, htmllib.HTMLParser):
791 :     def __init__(self, entitydefs=None):
792 :     htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
793 :     _AbstractFormParser.__init__(self, entitydefs)
794 :    
795 :     def do_option(self, attrs):
796 :     _AbstractFormParser._start_option(self, attrs)
797 :    
798 :     #FormParser = XHTMLCompatibleFormParser # testing hack
799 :    
800 :     def ParseResponse(response, select_default=False,
801 :     ignore_errors=False, # ignored!
802 :     form_parser_class=FormParser):
803 :     """Parse HTTP response and return a list of HTMLForm instances.
804 :    
805 :     The return value of urllib2.urlopen can be conveniently passed to this
806 :     function as the response parameter.
807 :    
808 :     ClientForm.ParseError is raised on parse errors.
809 :    
810 :     response: file-like object (supporting read() method) with a method
811 :     geturl(), returning the URI of the HTTP response
812 :     select_default: for multiple-selection SELECT controls and RADIO controls,
813 :     pick the first item as the default if none are selected in the HTML
814 :     form_parser_class: class to instantiate and use to pass
815 :    
816 :     Pass a true value for select_default if you want the behaviour specified by
817 :     RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
818 :     RADIO or multiple-selection SELECT control if none were selected in the
819 :     HTML. Most browsers (including Microsoft Internet Explorer (IE) and
820 :     Netscape Navigator) instead leave all items unselected in these cases. The
821 :     W3C HTML 4.0 standard leaves this behaviour undefined in the case of
822 :     multiple-selection SELECT controls, but insists that at least one RADIO
823 :     button should be checked at all times, in contradiction to browser
824 :     behaviour.
825 :    
826 :     There is a choice of parsers. ClientForm.XHTMLCompatibleFormParser (uses
827 :     HTMLParser.HTMLParser) works best for XHTML, ClientForm.FormParser (uses
828 :     htmllib.HTMLParser) (the default) works best for ordinary grubby HTML.
829 :     Note that HTMLParser is only available in Python 2.2 and later. You can
830 :     pass your own class in here as a hack to work around bad HTML, but at your
831 :     own risk: there is no well-defined interface.
832 :    
833 :     """
834 :     return ParseFile(response, response.geturl(), select_default,
835 :     False,
836 :     form_parser_class)
837 :    
838 :     def ParseFile(file, base_uri, select_default=False,
839 :     ignore_errors=False, # ignored!
840 :     form_parser_class=FormParser):
841 :     """Parse HTML and return a list of HTMLForm instances.
842 :    
843 :     ClientForm.ParseError is raised on parse errors.
844 :    
845 :     file: file-like object (supporting read() method) containing HTML with zero
846 :     or more forms to be parsed
847 :     base_uri: the URI of the document (note that the base URI used to submit
848 :     the form will be that given in the BASE element if present, not that of
849 :     the document)
850 :    
851 :     For the other arguments and further details, see ParseResponse.__doc__.
852 :    
853 :     """
854 :     use_htmllib = True
855 :     fp = form_parser_class()
856 :     while 1:
857 :     data = file.read(CHUNK)
858 :     try:
859 :     fp.feed(data)
860 :     except ParseError, e:
861 :     e.base_uri = base_uri
862 :     if len(data) != CHUNK: break
863 :     if fp.base is not None:
864 :     # HTML BASE element takes precedence over document URI
865 :     base_uri = fp.base
866 :     forms = []
867 :     for (name, action, method, enctype), attrs, controls in fp.forms:
868 :     if action is None:
869 :     action = base_uri
870 :     else:
871 :     action = urljoin(base_uri, action)
872 :     form = HTMLForm(action, method, enctype, name, attrs)
873 :     for type, name, attr in controls:
874 :     form.new_control(type, name, attr, select_default=select_default)
875 :     forms.append(form)
876 :     for form in forms:
877 :     form.fixup()
878 :     return forms
879 :    
880 :    
881 :     class Control:
882 :     """An HTML form control.
883 :    
884 :     An HTMLForm contains a sequence of Controls. HTMLForm delegates lots of
885 :     things to Control objects, and most of Control's methods are, in effect,
886 :     documented by the HTMLForm docstrings.
887 :    
888 :     The Controls in an HTMLForm can be got at via the HTMLForm.find_control
889 :     method or the HTMLForm.controls attribute.
890 :    
891 :     Control instances are usually constructed using the ParseFile /
892 :     ParseResponse functions, so you can probably ignore the rest of this
893 :     paragraph. A Control is only properly initialised after the fixup method
894 :     has been called. In fact, this is only strictly necessary for ListControl
895 :     instances. This is necessary because ListControls are built up from
896 :     ListControls each containing only a single item, and their initial value(s)
897 :     can only be known after the sequence is complete.
898 :    
899 :     The types and values that are acceptable for assignment to the value
900 :     attribute are defined by subclasses.
901 :    
902 :     If the disabled attribute is true, this represents the state typically
903 :     represented by browsers by `greying out' a control. If the disabled
904 :     attribute is true, the Control will raise AttributeError if an attempt is
905 :     made to change its value. In addition, the control will not be considered
906 :     `successful' as defined by the W3C HTML 4 standard -- ie. it will
907 :     contribute no data to the return value of the HTMLForm.click* methods. To
908 :     enable a control, set the disabled attribute to a false value.
909 :    
910 :     If the readonly attribute is true, the Control will raise AttributeError if
911 :     an attempt is made to change its value. To make a control writable, set
912 :     the readonly attribute to a false value.
913 :    
914 :     All controls have the disabled and readonly attributes, not only those that
915 :     may have the HTML attributes of the same names.
916 :    
917 :     On assignment to the value attribute, the following exceptions are raised:
918 :     TypeError, AttributeError (if the value attribute should not be assigned
919 :     to, because the control is disabled, for example) and ValueError.
920 :    
921 :     If the name or value attributes are None, or the value is an empty list, or
922 :     if the control is disabled, the control is not successful.
923 :    
924 :     Public attributes:
925 :    
926 :     type: string describing type of control (see the keys of the
927 :     HTMLForm.type2class dictionary for the allowable values) (readonly)
928 :     name: name of control (readonly)
929 :     value: current value of control (subclasses may allow a single value, a
930 :     sequence of values, or either)
931 :     disabled: disabled state
932 :     readonly: readonly state
933 :     id: value of id HTML attribute
934 :    
935 :     """
936 :     def __init__(self, type, name, attrs):
937 :     """
938 :     type: string describing type of control (see the keys of the
939 :     HTMLForm.type2class dictionary for the allowable values)
940 :     name: control name
941 :     attrs: HTML attributes of control's HTML element
942 :    
943 :     """
944 :     raise NotImplementedError()
945 :    
946 :     def add_to_form(self, form):
947 :     form.controls.append(self)
948 :    
949 :     def fixup(self):
950 :     pass
951 :    
952 :     def is_of_kind(self, kind):
953 :     raise NotImplementedError()
954 :    
955 :     def __getattr__(self, name): raise NotImplementedError()
956 :     def __setattr__(self, name, value): raise NotImplementedError()
957 :    
958 :     def pairs(self):
959 :     """Return list of (key, value) pairs suitable for passing to urlencode.
960 :     """
961 :     raise NotImplementedError()
962 :    
963 :     def _write_mime_data(self, mw):
964 :     """Write data for this control to a MimeWriter."""
965 :     # called by HTMLForm
966 :     for name, value in self.pairs():
967 :     mw2 = mw.nextpart()
968 :     mw2.addheader("Content-disposition",
969 :     'form-data; name="%s"' % name, 1)
970 :     f = mw2.startbody(prefix=0)
971 :     f.write(value)
972 :    
973 :     def __str__(self):
974 :     raise NotImplementedError()
975 :    
976 :    
977 :     #---------------------------------------------------
978 :     class ScalarControl(Control):
979 :     """Control whose value is not restricted to one of a prescribed set.
980 :    
981 :     Some ScalarControls don't accept any value attribute. Otherwise, takes a
982 :     single value, which must be string-like.
983 :    
984 :     Additional read-only public attribute:
985 :    
986 :     attrs: dictionary mapping the names of original HTML attributes of the
987 :     control to their values
988 :    
989 :     """
990 :     def __init__(self, type, name, attrs):
991 :     self.__dict__["type"] = string.lower(type)
992 :     self.__dict__["name"] = name
993 :     self._value = attrs.get("value")
994 :     self.disabled = attrs.has_key("disabled")
995 :     self.readonly = attrs.has_key("readonly")
996 :     self.id = attrs.get("id")
997 :    
998 :     self.attrs = attrs.copy()
999 :    
1000 :     self._clicked = False
1001 :    
1002 :     def __getattr__(self, name):
1003 :     if name == "value":
1004 :     return self.__dict__["_value"]
1005 :     else:
1006 :     raise AttributeError("%s instance has no attribute '%s'" %
1007 :     (self.__class__.__name__, name))
1008 :    
1009 :     def __setattr__(self, name, value):
1010 :     if name == "value":
1011 :     if not isstringlike(value):
1012 :     raise TypeError("must assign a string")
1013 :     elif self.readonly:
1014 :     raise AttributeError("control '%s' is readonly" % self.name)
1015 :     elif self.disabled:
1016 :     raise AttributeError("control '%s' is disabled" % self.name)
1017 :     self.__dict__["_value"] = value
1018 :     elif name in ("name", "type"):
1019 :     raise AttributeError("%s attribute is readonly" % name)
1020 :     else:
1021 :     self.__dict__[name] = value
1022 :    
1023 :     def pairs(self):
1024 :     name = self.name
1025 :     value = self.value
1026 :     if name is None or value is None or self.disabled:
1027 :     return []
1028 :     return [(name, value)]
1029 :    
1030 :     def __str__(self):
1031 :     name = self.name
1032 :     value = self.value
1033 :     if name is None: name = "<None>"
1034 :     if value is None: value = "<None>"
1035 :    
1036 :     infos = []
1037 :     if self.disabled: infos.append("disabled")
1038 :     if self.readonly: infos.append("readonly")
1039 :     info = string.join(infos, ", ")
1040 :     if info: info = " (%s)" % info
1041 :    
1042 :     return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
1043 :    
1044 :    
1045 :     #---------------------------------------------------
1046 :     class TextControl(ScalarControl):
1047 :     """Textual input control.
1048 :    
1049 :     Covers:
1050 :    
1051 :     INPUT/TEXT
1052 :     INPUT/PASSWORD
1053 :     INPUT/FILE
1054 :     INPUT/HIDDEN
1055 :     TEXTAREA
1056 :    
1057 :     """
1058 :     def __init__(self, type, name, attrs):
1059 :     ScalarControl.__init__(self, type, name, attrs)
1060 :     if self.type == "hidden": self.readonly = True
1061 :     if self._value is None:
1062 :     self._value = ""
1063 :    
1064 :     def is_of_kind(self, kind): return kind == "text"
1065 :    
1066 :     #---------------------------------------------------
1067 :     class FileControl(ScalarControl):
1068 :     """File upload with INPUT TYPE=FILE.
1069 :    
1070 :     The value attribute of a FileControl is always None. Use add_file instead.
1071 :    
1072 :     Additional public method: add_file
1073 :    
1074 :     """
1075 :    
1076 :     def __init__(self, type, name, attrs):
1077 :     ScalarControl.__init__(self, type, name, attrs)
1078 :     self._value = None
1079 :     self._upload_data = []
1080 :    
1081 :     def is_of_kind(self, kind): return kind == "file"
1082 :    
1083 :     def __setattr__(self, name, value):
1084 :     if name in ("value", "name", "type"):
1085 :     raise AttributeError("%s attribute is readonly" % name)
1086 :     else:
1087 :     self.__dict__[name] = value
1088 :    
1089 :     def add_file(self, file_object, content_type=None, filename=None):
1090 :     if not hasattr(file_object, "read"):
1091 :     raise TypeError("file-like object must have read method")
1092 :     if content_type is not None and not isstringlike(content_type):
1093 :     raise TypeError("content type must be None or string-like")
1094 :     if filename is not None and not isstringlike(filename):
1095 :     raise TypeError("filename must be None or string-like")
1096 :     if content_type is None:
1097 :     content_type = "application/octet-stream"
1098 :     self._upload_data.append((file_object, content_type, filename))
1099 :    
1100 :     def pairs(self):
1101 :     # XXX should it be successful even if unnamed?
1102 :     if self.name is None or self.disabled:
1103 :     return []
1104 :     return [(self.name, "")]
1105 :    
1106 :     def _write_mime_data(self, mw):
1107 :     # called by HTMLForm
1108 :     if len(self._upload_data) == 1:
1109 :     # single file
1110 :     file_object, content_type, filename = self._upload_data[0]
1111 :     mw2 = mw.nextpart()
1112 :     fn_part = filename and ('; filename="%s"' % filename) or ''
1113 :     disp = 'form-data; name="%s"%s' % (self.name, fn_part)
1114 :     mw2.addheader("Content-disposition", disp, prefix=1)
1115 :     fh = mw2.startbody(content_type, prefix=0)
1116 :     fh.write(file_object.read())
1117 :     elif len(self._upload_data) != 0:
1118 :     # multiple files
1119 :     mw2 = mw.nextpart()
1120 :     disp = 'form-data; name="%s"' % self.name
1121 :     mw2.addheader("Content-disposition", disp, prefix=1)
1122 :     fh = mw2.startmultipartbody("mixed", prefix=0)
1123 :     for file_object, content_type, filename in self._upload_data:
1124 :     mw3 = mw2.nextpart()
1125 :     fn_part = filename and ('; filename="%s"' % filename) or ''
1126 :     disp = 'file%s' % fn_part
1127 :     mw3.addheader("Content-disposition", disp, prefix=1)
1128 :     fh2 = mw3.startbody(content_type, prefix=0)
1129 :     fh2.write(file_object.read())
1130 :     mw2.lastpart()
1131 :    
1132 :     def __str__(self):
1133 :     name = self.name
1134 :     if name is None: name = "<None>"
1135 :    
1136 :     if not self._upload_data:
1137 :     value = "<No files added>"
1138 :     else:
1139 :     value = []
1140 :     for file, ctype, filename in self._upload_data:
1141 :     if filename is None:
1142 :     value.append("<Unnamed file>")
1143 :     else:
1144 :     value.append(filename)
1145 :     value = string.join(value, ", ")
1146 :    
1147 :     info = []
1148 :     if self.disabled: info.append("disabled")
1149 :     if self.readonly: info.append("readonly")
1150 :     info = string.join(info, ", ")
1151 :     if info: info = " (%s)" % info
1152 :    
1153 :     return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
1154 :    
1155 :    
1156 :     #---------------------------------------------------
1157 :     class IsindexControl(ScalarControl):
1158 :     """ISINDEX control.
1159 :    
1160 :     ISINDEX is the odd-one-out of HTML form controls. In fact, it isn't really
1161 :     part of regular HTML forms at all, and predates it. You're only allowed
1162 :     one ISINDEX per HTML document. ISINDEX and regular form submission are
1163 :     mutually exclusive -- either submit a form, or the ISINDEX.
1164 :    
1165 :     Having said this, since ISINDEX controls may appear in forms (which is
1166 :     probably bad HTML), ParseFile / ParseResponse will include them in the
1167 :     HTMLForm instances it returns. You can set the ISINDEX's value, as with
1168 :     any other control (but note that ISINDEX controls have no name, so you'll
1169 :     need to use the type argument of set_value!). When you submit the form,
1170 :     the ISINDEX will not be successful (ie., no data will get returned to the
1171 :     server as a result of its presence), unless you click on the ISINDEX
1172 :     control, in which case the ISINDEX gets submitted instead of the form:
1173 :    
1174 :     form.set_value("my isindex value", type="isindex")
1175 :     urllib2.urlopen(form.click(type="isindex"))
1176 :    
1177 :     ISINDEX elements outside of FORMs are ignored. If you want to submit one
1178 :     by hand, do it like so:
1179 :    
1180 :     url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value"))
1181 :     result = urllib2.urlopen(url)
1182 :    
1183 :     """
1184 :     def __init__(self, type, name, attrs):
1185 :     ScalarControl.__init__(self, type, name, attrs)
1186 :     if self._value is None:
1187 :     self._value = ""
1188 :    
1189 :     def is_of_kind(self, kind): return kind in ["text", "clickable"]
1190 :    
1191 :     def pairs(self):
1192 :     return []
1193 :    
1194 :     def _click(self, form, coord, return_type):
1195 :     # Relative URL for ISINDEX submission: instead of "foo=bar+baz",
1196 :     # want "bar+baz".
1197 :     # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
1198 :     # deprecated in 4.01, but it should still say how to submit it).
1199 :     # Submission of ISINDEX is explained in the HTML 3.2 spec, though.
1200 :     url = urljoin(form.action, "?"+urllib.quote_plus(self.value))
1201 :     req_data = url, None, []
1202 :    
1203 :     if return_type == "pairs":
1204 :     return []
1205 :     elif return_type == "request_data":
1206 :     return req_data
1207 :     else:
1208 :     return urllib2.Request(url)
1209 :    
1210 :     def __str__(self):
1211 :     value = self.value
1212 :     if value is None: value = "<None>"
1213 :    
1214 :     infos = []
1215 :     if self.disabled: infos.append("disabled")
1216 :     if self.readonly: infos.append("readonly")
1217 :     info = string.join(infos, ", ")
1218 :     if info: info = " (%s)" % info
1219 :    
1220 :     return "<%s(%s)%s>" % (self.__class__.__name__, value, info)
1221 :    
1222 :    
1223 :     #---------------------------------------------------
1224 :     class IgnoreControl(ScalarControl):
1225 :     """Control that we're not interested in.
1226 :    
1227 :     Covers:
1228 :    
1229 :     INPUT/RESET
1230 :     BUTTON/RESET
1231 :     INPUT/BUTTON
1232 :     BUTTON/BUTTON
1233 :    
1234 :     These controls are always unsuccessful, in the terminology of HTML 4 (ie.
1235 :     they never require any information to be returned to the server).
1236 :    
1237 :     BUTTON/BUTTON is used to generate events for script embedded in HTML.
1238 :    
1239 :     The value attribute of IgnoreControl is always None.
1240 :    
1241 :     """
1242 :     def __init__(self, type, name, attrs):
1243 :     ScalarControl.__init__(self, type, name, attrs)
1244 :     self._value = None
1245 :    
1246 :     def is_of_kind(self, kind): return False
1247 :    
1248 :     def __setattr__(self, name, value):
1249 :     if name == "value":
1250 :     raise AttributeError(
1251 :     "control '%s' is ignored, hence read-only" % self.name)
1252 :     elif name in ("name", "type"):
1253 :     raise AttributeError("%s attribute is readonly" % name)
1254 :     else:
1255 :     self.__dict__[name] = value
1256 :    
1257 :    
1258 :     #---------------------------------------------------
1259 :     class ListControl(Control):
1260 :     """Control representing a sequence of items.
1261 :    
1262 :     The value attribute of a ListControl represents the selected list items in
1263 :     the control.
1264 :    
1265 :     ListControl implements both list controls that take a single value and
1266 :     those that take multiple values.
1267 :    
1268 :     ListControls accept sequence values only. Some controls only accept
1269 :     sequences of length 0 or 1 (RADIO, and single-selection SELECT).
1270 :     In those cases, ItemCountError is raised if len(sequence) > 1. CHECKBOXes
1271 :     and multiple-selection SELECTs (those having the "multiple" HTML attribute)
1272 :     accept sequences of any length.
1273 :    
1274 :     Note the following mistake:
1275 :    
1276 :     control.value = some_value
1277 :     assert control.value == some_value # not necessarily true
1278 :    
1279 :     The reason for this is that the value attribute always gives the list items
1280 :     in the order they were listed in the HTML.
1281 :    
1282 :     ListControl items can also be referred to by their labels instead of names.
1283 :     Use the by_label argument, and the set_value_by_label, get_value_by_label
1284 :     methods.
1285 :    
1286 :     XXX RadioControl and CheckboxControl don't implement by_label yet.
1287 :    
1288 :     Note that, rather confusingly, though SELECT controls are represented in
1289 :     HTML by SELECT elements (which contain OPTION elements, representing
1290 :     individual list items), CHECKBOXes and RADIOs are not represented by *any*
1291 :     element. Instead, those controls are represented by a collection of INPUT
1292 :     elements. For example, this is a SELECT control, named "control1":
1293 :    
1294 :     <select name="control1">
1295 :     <option>foo</option>
1296 :     <option value="1">bar</option>
1297 :     </select>
1298 :    
1299 :     and this is a CHECKBOX control, named "control2":
1300 :    
1301 :     <input type="checkbox" name="control2" value="foo" id="cbe1">
1302 :     <input type="checkbox" name="control2" value="bar" id="cbe2">
1303 :    
1304 :     The id attribute of a CHECKBOX or RADIO ListControl is always that of its
1305 :     first element (for example, "cbe1" above).
1306 :    
1307 :    
1308 :     Additional read-only public attribute: multiple.
1309 :    
1310 :     """
1311 :    
1312 :     # ListControls are built up by the parser from their component items by
1313 :     # creating one ListControl per item, consolidating them into a single
1314 :     # master ListControl held by the HTMLForm:
1315 :    
1316 :     # -User calls form.new_control(...)
1317 :     # -Form creates Control, and calls control.add_to_form(self).
1318 :     # -Control looks for a Control with the same name and type in the form,
1319 :     # and if it finds one, merges itself with that control by calling
1320 :     # control.merge_control(self). The first Control added to the form, of
1321 :     # a particular name and type, is the only one that survives in the
1322 :     # form.
1323 :     # -Form calls control.fixup for all its controls. ListControls in the
1324 :     # form know they can now safely pick their default values.
1325 :    
1326 :     # To create a ListControl without an HTMLForm, use:
1327 :    
1328 :     # control.merge_control(new_control)
1329 :    
1330 :     # (actually, it's much easier just to use ParseFile)
1331 :    
1332 :     def __init__(self, type, name, attrs={}, select_default=False,
1333 :     called_as_base_class=False):
1334 :     """
1335 :     select_default: for RADIO and multiple-selection SELECT controls, pick
1336 :     the first item as the default if no 'selected' HTML attribute is
1337 :     present
1338 :    
1339 :     """
1340 :     if not called_as_base_class:
1341 :     raise NotImplementedError()
1342 :    
1343 :     self.__dict__["type"] = string.lower(type)
1344 :     self.__dict__["name"] = name
1345 :     self._value = attrs.get("value")
1346 :     self.disabled = False
1347 :     self.readonly = False
1348 :     self.id = attrs.get("id")
1349 :    
1350 :     self._attrs = attrs.copy()
1351 :     # As Controls are merged in with .merge_control(), self._attrs will
1352 :     # refer to each Control in turn -- always the most recently merged
1353 :     # control. Each merged-in Control instance corresponds to a single
1354 :     # list item: see ListControl.__doc__.
1355 :     if attrs:
1356 :     self._attrs_list = [self._attrs] # extended by .merge_control()
1357 :     self._disabled_list = [self._attrs.has_key("disabled")] # ditto
1358 :     else:
1359 :     self._attrs_list = [] # extended by .merge_control()
1360 :     self._disabled_list = [] # ditto
1361 :    
1362 :     self._select_default = select_default
1363 :     self._clicked = False
1364 :     # Some list controls can have their default set only after all items
1365 :     # are known. If so, self._value_is_set is false, and the self.fixup
1366 :     # method, called after all items have been added, sets the default.
1367 :     self._value_is_set = False
1368 :    
1369 :     def is_of_kind(self, kind):
1370 :     if kind == "list":
1371 :     return True
1372 :     elif kind == "multilist":
1373 :     return bool(self.multiple)
1374 :     elif kind == "singlelist":
1375 :     return not self.multiple
1376 :     else:
1377 :     return False
1378 :    
1379 :     def _value_from_label(self, label):
1380 :     raise NotImplementedError("control '%s' does not yet support "
1381 :     "by_label" % self.name)
1382 :    
1383 :     def toggle(self, name, by_label=False):
1384 :     return self._set_selected_state(name, 2, by_label)
1385 :     def set(self, selected, name, by_label=False):
1386 :     action = int(bool(selected))
1387 :     return self._set_selected_state(name, action, by_label)
1388 :    
1389 :     def _set_selected_state(self, name, action, by_label):
1390 :     """
1391 :     name: item name
1392 :     action:
1393 :     0: clear
1394 :     1: set
1395 :     2: toggle
1396 :    
1397 :     """
1398 :     if not isstringlike(name):
1399 :     raise TypeError("item name must be string-like")
1400 :     if self.disabled:
1401 :     raise AttributeError("control '%s' is disabled" % self.name)
1402 :     if self.readonly:
1403 :     raise AttributeError("control '%s' is readonly" % self.name)
1404 :     if by_label:
1405 :     name = self._value_from_label(name)
1406 :     try:
1407 :     i = self._menu.index(name)
1408 :     except ValueError:
1409 :     raise ItemNotFoundError("no item named '%s'" % name)
1410 :    
1411 :     if self.multiple:
1412 :     if action == 2:
1413 :     action = not self._selected[i]
1414 :     if action and self._disabled_list[i]:
1415 :     raise AttributeError("item '%s' is disabled" % name)
1416 :     self._selected[i] = bool(action)
1417 :     else:
1418 :     if action == 2:
1419 :     if self._selected == name:
1420 :     action = 0
1421 :     else:
1422 :     action = 1
1423 :     if action == 0 and self._selected == name:
1424 :     self._selected = None
1425 :     elif action == 1:
1426 :     if self._disabled_list[i]:
1427 :     raise AttributeError("item '%s' is disabled" % name)
1428 :     self._selected = name
1429 :    
1430 :     def toggle_single(self, by_label=False):
1431 :     self._set_single_selected_state(2, by_label)
1432 :     def set_single(self, selected, by_label=False):
1433 :     action = int(bool(selected))
1434 :     self._set_single_selected_state(action, by_label)
1435 :    
1436 :     def _set_single_selected_state(self, action, by_label):
1437 :     if len(self._menu) != 1:
1438 :     raise ItemCountError("'%s' is not a single-item control" %
1439 :     self.name)
1440 :    
1441 :     name = self._menu[0]
1442 :     if by_label:
1443 :     name = self._value_from_label(name)
1444 :     self._set_selected_state(name, action, by_label)
1445 :    
1446 :     def get_item_disabled(self, name, by_label=False):
1447 :     """Get disabled state of named list item in a ListControl."""
1448 :     if by_label:
1449 :     name = self._value_from_label(name)
1450 :     try:
1451 :     i = self._menu.index(name)
1452 :     except ValueError:
1453 :     raise ItemNotFoundError()
1454 :     else:
1455 :     return self._disabled_list[i]
1456 :    
1457 :     def set_item_disabled(self, disabled, name, by_label=False):
1458 :     """Set disabled state of named list item in a ListControl.
1459 :    
1460 :     disabled: boolean disabled state
1461 :    
1462 :     """
1463 :     if by_label:
1464 :     name = self._value_from_label(name)
1465 :     try:
1466 :     i = self._menu.index(name)
1467 :     except ValueError:
1468 :     raise ItemNotFoundError()
1469 :     else:
1470 :     self._disabled_list[i] = bool(disabled)
1471 :    
1472 :     def set_all_items_disabled(self, disabled):
1473 :     """Set disabled state of all list items in a ListControl.
1474 :    
1475 :     disabled: boolean disabled state
1476 :    
1477 :     """
1478 :     for i in range(len(self._disabled_list)):
1479 :     self._disabled_list[i] = bool(disabled)
1480 :    
1481 :     def get_item_attrs(self, name, by_label=False):
1482 :     """Return dictionary of HTML attributes for a single ListControl item.
1483 :    
1484 :     The HTML element types that describe list items are: OPTION for SELECT
1485 :     controls, INPUT for the rest. These elements have HTML attributes that
1486 :     you may occasionally want to know about -- for example, the "alt" HTML
1487 :     attribute gives a text string describing the item (graphical browsers
1488 :     usually display this as a tooltip).
1489 :    
1490 :     The returned dictionary maps HTML attribute names to values. The names
1491 :     and values are taken from the original HTML.
1492 :    
1493 :     Note that for SELECT controls, the returned dictionary contains a
1494 :     special key "contents" -- see SelectControl.__doc__.
1495 :    
1496 :     """
1497 :     if by_label:
1498 :     name = self._value_from_label(name)
1499 :     try:
1500 :     i = self._menu.index(name)
1501 :     except ValueError:
1502 :     raise ItemNotFoundError()
1503 :     return self._attrs_list[i]
1504 :    
1505 :     def add_to_form(self, form):
1506 :     try:
1507 :     control = form.find_control(self.name, self.type)
1508 :     except ControlNotFoundError:
1509 :     Control.add_to_form(self, form)
1510 :     else:
1511 :     control.merge_control(self)
1512 :    
1513 :     def merge_control(self, control):
1514 :     assert bool(control.multiple) == bool(self.multiple)
1515 :     assert isinstance(control, self.__class__)
1516 :     self._menu.extend(control._menu)
1517 :     self._attrs_list.extend(control._attrs_list)
1518 :     self._disabled_list.extend(control._disabled_list)
1519 :     if control.multiple:
1520 :     self._selected.extend(control._selected)
1521 :     else:
1522 :     if control._value_is_set:
1523 :     self._selected = control._selected
1524 :     if control._value_is_set:
1525 :     self._value_is_set = True
1526 :    
1527 :     def fixup(self):
1528 :     """
1529 :     ListControls are built up from component list items (which are also
1530 :     ListControls) during parsing. This method should be called after all
1531 :     items have been added. See ListControl.__doc__ for the reason this is
1532 :     required.
1533 :    
1534 :     """
1535 :     # Need to set default selection where no item was indicated as being
1536 :     # selected by the HTML:
1537 :    
1538 :     # CHECKBOX:
1539 :     # Nothing should be selected.
1540 :     # SELECT/single, SELECT/multiple and RADIO:
1541 :     # RFC 1866 (HTML 2.0): says first item should be selected.
1542 :     # W3C HTML 4.01 Specification: says that client behaviour is
1543 :     # undefined in this case. For RADIO, exactly one must be selected,
1544 :     # though which one is undefined.
1545 :     # Both Netscape and Microsoft Internet Explorer (IE) choose first
1546 :     # item for SELECT/single. However, both IE5 and Mozilla (both 1.0
1547 :     # and Firebird 0.6) leave all items unselected for RADIO and
1548 :     # SELECT/multiple.
1549 :    
1550 :     # Since both Netscape and IE all choose the first item for
1551 :     # SELECT/single, we do the same. OTOH, both Netscape and IE
1552 :     # leave SELECT/multiple with nothing selected, in violation of RFC 1866
1553 :     # (but not in violation of the W3C HTML 4 standard); the same is true
1554 :     # of RADIO (which *is* in violation of the HTML 4 standard). We follow
1555 :     # RFC 1866 if the select_default attribute is set, and Netscape and IE
1556 :     # otherwise. RFC 1866 and HTML 4 are always violated insofar as you
1557 :     # can deselect all items in a RadioControl.
1558 :    
1559 :     raise NotImplementedError()
1560 :    
1561 :     def __getattr__(self, name):
1562 :     if name == "value":
1563 :     menu = self._menu
1564 :     if self.multiple:
1565 :     values = []
1566 :     for i in range(len(menu)):
1567 :     if self._selected[i]: values.append(menu[i])
1568 :     return values
1569 :     else:
1570 :     if self._selected is None: return []
1571 :     else: return [self._selected]
1572 :     else:
1573 :     raise AttributeError("%s instance has no attribute '%s'" %
1574 :     (self.__class__.__name__, name))
1575 :    
1576 :     def __setattr__(self, name, value):
1577 :     if name == "value":
1578 :     if self.disabled:
1579 :     raise AttributeError("control '%s' is disabled" % self.name)
1580 :     if self.readonly:
1581 :     raise AttributeError("control '%s' is readonly" % self.name)
1582 :     self._set_value(value)
1583 :     elif name in ("name", "type", "multiple"):
1584 :     raise AttributeError("%s attribute is readonly" % name)
1585 :     else:
1586 :     self.__dict__[name] = value
1587 :    
1588 :     def _set_value(self, value):
1589 :     if self.multiple:
1590 :     self._multiple_set_value(value)
1591 :     else:
1592 :     self._single_set_value(value)
1593 :    
1594 :     def _single_set_value(self, value):
1595 :     if value is None or isstringlike(value):
1596 :     raise TypeError("ListControl, must set a sequence")
1597 :     nr = len(value)
1598 :     if not (0 <= nr <= 1):
1599 :     raise ItemCountError("single selection list, must set sequence of "
1600 :     "length 0 or 1")
1601 :    
1602 :     if nr == 0:
1603 :     self._selected = None
1604 :     else:
1605 :     value = value[0]
1606 :     try:
1607 :     i = self._menu.index(value)
1608 :     except ValueError:
1609 :     raise ItemNotFoundError("no item named '%s'" %
1610 :     repr(value))
1611 :     if self._disabled_list[i]:
1612 :     raise AttributeError("item '%s' is disabled" % value)
1613 :     self._selected = value
1614 :    
1615 :     def _multiple_set_value(self, value):
1616 :     if value is None or isstringlike(value):
1617 :     raise TypeError("ListControl, must set a sequence")
1618 :    
1619 :     selected = [False]*len(self._selected)
1620 :     menu = self._menu
1621 :     disabled_list = self._disabled_list
1622 :    
1623 :     for v in value:
1624 :     found = False
1625 :     for i in range(len(menu)):
1626 :     item_name = menu[i]
1627 :     if v == item_name:
1628 :     if disabled_list[i]:
1629 :     raise AttributeError("item '%s' is disabled" % value)
1630 :     selected[i] = True
1631 :     found = True
1632 :     break
1633 :     if not found:
1634 :     raise ItemNotFoundError("no item named '%s'" % repr(v))
1635 :     self._selected = selected
1636 :    
1637 :     def set_value_by_label(self, value):
1638 :     raise NotImplementedError("control '%s' does not yet support "
1639 :     "by_label" % self.name)
1640 :     def get_value_by_label(self):
1641 :     raise NotImplementedError("control '%s' does not yet support "
1642 :     "by_label" % self.name)
1643 :    
1644 :     def possible_items(self, by_label=False):
1645 :     if by_label:
1646 :     raise NotImplementedError(
1647 :     "control '%s' does not yet support by_label" % self.name)
1648 :     return copy.copy(self._menu)
1649 :    
1650 :     def pairs(self):
1651 :     if self.disabled:
1652 :     return []
1653 :    
1654 :     if not self.multiple:
1655 :     name = self.name
1656 :     value = self._selected
1657 :     if name is None or value is None:
1658 :     return []
1659 :     return [(name, value)]
1660 :     else:
1661 :     control_name = self.name # usually the name HTML attribute
1662 :     pairs = []
1663 :     for i in range(len(self._menu)):
1664 :     item_name = self._menu[i] # usually the value HTML attribute
1665 :     if self._selected[i]:
1666 :     pairs.append((control_name, item_name))
1667 :     return pairs
1668 :    
1669 :     def _item_str(self, i):
1670 :     item_name = self._menu[i]
1671 :     if self.multiple:
1672 :     if self._selected[i]:
1673 :     item_name = "*"+item_name
1674 :     else:
1675 :     if self._selected == item_name:
1676 :     item_name = "*"+item_name
1677 :     if self._disabled_list[i]:
1678 :     item_name = "(%s)" % item_name
1679 :     return item_name
1680 :    
1681 :     def __str__(self):
1682 :     name = self.name
1683 :     if name is None: name = "<None>"
1684 :    
1685 :     display = []
1686 :     for i in range(len(self._menu)):
1687 :     s = self._item_str(i)
1688 :     display.append(s)
1689 :    
1690 :     infos = []
1691 :     if self.disabled: infos.append("disabled")
1692 :     if self.readonly: infos.append("readonly")
1693 :     info = string.join(infos, ", ")
1694 :     if info: info = " (%s)" % info
1695 :    
1696 :     return "<%s(%s=[%s])%s>" % (self.__class__.__name__,
1697 :     name, string.join(display, ", "), info)
1698 :    
1699 :    
1700 :     class RadioControl(ListControl):
1701 :     """
1702 :     Covers:
1703 :    
1704 :     INPUT/RADIO
1705 :    
1706 :     """
1707 :     def __init__(self, type, name, attrs, select_default=False):
1708 :     ListControl.__init__(self, type, name, attrs, select_default,
1709 :     called_as_base_class=True)
1710 :     self.__dict__["multiple"] = False
1711 :     value = attrs.get("value", "on")
1712 :     self._menu = [value]
1713 :     checked = attrs.has_key("checked")
1714 :     if checked:
1715 :     self._value_is_set = True
1716 :     self._selected = value
1717 :     else:
1718 :     self._selected = None
1719 :    
1720 :     def fixup(self):
1721 :     if not self._value_is_set:
1722 :     # no item explicitly selected
1723 :     assert self._selected is None
1724 :     if self._select_default:
1725 :     self._selected = self._menu[0]
1726 :     self._value_is_set = True
1727 :    
1728 :    
1729 :     class CheckboxControl(ListControl):
1730 :     """
1731 :     Covers:
1732 :    
1733 :     INPUT/CHECKBOX
1734 :    
1735 :     """
1736 :     def __init__(self, type, name, attrs, select_default=False):
1737 :     ListControl.__init__(self, type, name, attrs, select_default,
1738 :     called_as_base_class=True)
1739 :     self.__dict__["multiple"] = True
1740 :     value = attrs.get("value", "on")
1741 :     self._menu = [value]
1742 :     checked = attrs.has_key("checked")
1743 :     self._selected = [checked]
1744 :     self._value_is_set = True
1745 :    
1746 :     def fixup(self):
1747 :     # If no items were explicitly checked in HTML, that's how we must
1748 :     # leave it, so we have nothing to do here.
1749 :     assert self._value_is_set
1750 :    
1751 :    
1752 :     class SelectControl(ListControl):
1753 :     """
1754 :     Covers:
1755 :    
1756 :     SELECT (and OPTION)
1757 :    
1758 :     SELECT control values and labels are subject to some messy defaulting
1759 :     rules. For example, if the HTML repreentation of the control is:
1760 :    
1761 :     <SELECT name=year>
1762 :     <OPTION value=0 label="2002">current year</OPTION>
1763 :     <OPTION value=1>2001</OPTION>
1764 :     <OPTION>2000</OPTION>
1765 :     </SELECT>
1766 :    
1767 :     The items, in order, have labels "2002", "2001" and "2000", whereas their
1768 :     values are "0", "1" and "2000" respectively. Note that the value of the
1769 :     last OPTION in this example defaults to its contents, as specified by RFC
1770 :     1866, as do the labels of the second and third OPTIONs.
1771 :    
1772 :     The OPTION labels are sometimes more meaningful than the OPTION values,
1773 :     which can make for more maintainable code.
1774 :    
1775 :     Additional read-only public attribute: attrs
1776 :    
1777 :     The attrs attribute is a dictionary of the original HTML attributes of the
1778 :     SELECT element. Other ListControls do not have this attribute, because in
1779 :     other cases the control as a whole does not correspond to any single HTML
1780 :     element. The get_item_attrs method may be used as usual to get at the
1781 :     HTML attributes of the HTML elements corresponding to individual list items
1782 :     (for SELECT controls, these are OPTION elements).
1783 :    
1784 :     Another special case is that the attributes dictionaries returned by
1785 :     get_item_attrs have a special key "contents" which does not correspond to
1786 :     any real HTML attribute, but rather contains the contents of the OPTION
1787 :     element:
1788 :    
1789 :     <OPTION>this bit</OPTION>
1790 :    
1791 :     """
1792 :     # HTML attributes here are treated slightly from other list controls:
1793 :     # -The SELECT HTML attributes dictionary is stuffed into the OPTION
1794 :     # HTML attributes dictionary under the "__select" key.
1795 :     # -The content of each OPTION element is stored under the special
1796 :     # "contents" key of the dictionary.
1797 :     # After all this, the dictionary is passed to the SelectControl constructor
1798 :     # as the attrs argument, as usual. However:
1799 :     # -The first SelectControl constructed when building up a SELECT control
1800 :     # has a constructor attrs argument containing only the __select key -- so
1801 :     # this SelectControl represents an empty SELECT control.
1802 :     # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and
1803 :     # the __select dictionary containing the SELECT HTML-attributes.
1804 :     def __init__(self, type, name, attrs, select_default=False):
1805 :     # fish out the SELECT HTML attributes from the OPTION HTML attributes
1806 :     # dictionary
1807 :     self.attrs = attrs["__select"].copy()
1808 :     attrs = attrs.copy()
1809 :     del attrs["__select"]
1810 :    
1811 :     ListControl.__init__(self, type, name, attrs, select_default,
1812 :     called_as_base_class=True)
1813 :    
1814 :     self._label_map = None
1815 :     self.disabled = self.attrs.has_key("disabled")
1816 :     self.id = self.attrs.get("id")
1817 :    
1818 :     self._menu = []
1819 :     self._selected = []
1820 :     self._value_is_set = False
1821 :     if self.attrs.has_key("multiple"):
1822 :     self.__dict__["multiple"] = True
1823 :     self._selected = []
1824 :     else:
1825 :     self.__dict__["multiple"] = False
1826 :     self._selected = None
1827 :    
1828 :     if attrs: # OPTION item data was provided
1829 :     value = attrs["value"]
1830 :     self._menu.append(value)
1831 :     selected = attrs.has_key("selected")
1832 :     if selected:
1833 :     self._value_is_set = True
1834 :     if self.attrs.has_key("multiple"):
1835 :     self._selected.append(selected)
1836 :     elif selected:
1837 :     self._selected = value
1838 :    
1839 :     def _build_select_label_map(self):
1840 :     """Return an ordered mapping of labels to values.
1841 :    
1842 :     For example, if the HTML repreentation of the control is as given in
1843 :     SelectControl.__doc__, this function will return a mapping like:
1844 :    
1845 :     {"2002": "0", "2001": "1", "2000": "2000"}
1846 :    
1847 :     """
1848 :     alist = []
1849 :     for val in self._menu:
1850 :     attrs = self.get_item_attrs(val)
1851 :     alist.append((attrs["label"], val))
1852 :     return AList(alist)
1853 :    
1854 :     def _value_from_label(self, label):
1855 :     try:
1856 :     return self._label_map[label]
1857 :     except KeyError:
1858 :     raise ItemNotFoundError("no item has label '%s'" % label)
1859 :    
1860 :     def fixup(self):
1861 :     if not self._value_is_set:
1862 :     # No item explicitly selected.
1863 :     if len(self._menu) > 0:
1864 :     if self.multiple:
1865 :     if self._select_default:
1866 :     self._selected[0] = True
1867 :     else:
1868 :     assert self._selected is None
1869 :     self._selected = self._menu[0]
1870 :     self._value_is_set = True
1871 :     self._label_map = self._build_select_label_map()
1872 :    
1873 :     def _delete_items(self):
1874 :     # useful for simulating JavaScript code, but not a stable interface yet
1875 :     self._menu = []
1876 :     self._value_is_set = False
1877 :     if self.multiple:
1878 :     self._selected = []
1879 :     else:
1880 :     self._selected = None
1881 :    
1882 :     def possible_items(self, by_label=False):
1883 :     if not by_label:
1884 :     return copy.copy(self._menu)
1885 :     else:
1886 :     self._label_map.set_inverted(True)
1887 :     try:
1888 :     r = map(lambda v, self=self: self._label_map[v], self._menu)
1889 :     finally:
1890 :     self._label_map.set_inverted(False)
1891 :     return r
1892 :    
1893 :     def set_value_by_label(self, value):
1894 :     if isstringlike(value):
1895 :     raise TypeError("ListControl, must set a sequence, not a string")
1896 :     if self.disabled:
1897 :     raise AttributeError("control '%s' is disabled" % self.name)
1898 :     if self.readonly:
1899 :     raise AttributeError("control '%s' is readonly" % self.name)
1900 :    
1901 :     try:
1902 :     value = map(lambda v, self=self: self._label_map[v], value)
1903 :     except KeyError, e:
1904 :     raise ItemNotFoundError("no item has label '%s'" % e.args[0])
1905 :     self._set_value(value)
1906 :    
1907 :     def get_value_by_label(self):
1908 :     menu = self._menu
1909 :     self._label_map.set_inverted(True)
1910 :     try:
1911 :     if self.multiple:
1912 :     values = []
1913 :     for i in range(len(menu)):
1914 :     if self._selected[i]:
1915 :     values.append(self._label_map[menu[i]])
1916 :     return values
1917 :     else:
1918 :     return [self._label_map[self._selected]]
1919 :     finally:
1920 :     self._label_map.set_inverted(False)
1921 :    
1922 :    
1923 :     #---------------------------------------------------
1924 :     class SubmitControl(ScalarControl):
1925 :     """
1926 :     Covers:
1927 :    
1928 :     INPUT/SUBMIT
1929 :     BUTTON/SUBMIT
1930 :    
1931 :     """
1932 :     def __init__(self, type, name, attrs):
1933 :     ScalarControl.__init__(self, type, name, attrs)
1934 :     # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it
1935 :     # blank, Konqueror 3.1 defaults to "Submit". HTML spec. doesn't seem
1936 :     # to define this.
1937 :     if self.value is None: self.value = ""
1938 :     self.readonly = True
1939 :    
1940 :     def is_of_kind(self, kind): return kind == "clickable"
1941 :    
1942 :     def _click(self, form, coord, return_type):
1943 :     self._clicked = coord
1944 :     r = form._switch_click(return_type)
1945 :     self._clicked = False
1946 :     return r
1947 :    
1948 :     def pairs(self):
1949 :     if not self._clicked:
1950 :     return []
1951 :     return ScalarControl.pairs(self)
1952 :    
1953 :    
1954 :     #---------------------------------------------------
1955 :     class ImageControl(SubmitControl):
1956 :     """
1957 :     Covers:
1958 :    
1959 :     INPUT/IMAGE
1960 :    
1961 :     The value attribute of an ImageControl is always None. Coordinates are
1962 :     specified using one of the HTMLForm.click* methods.
1963 :    
1964 :     """
1965 :     def __init__(self, type, name, attrs):
1966 :     ScalarControl.__init__(self, type, name, attrs)
1967 :     self.__dict__["value"] = None
1968 :    
1969 :     def __setattr__(self, name, value):
1970 :     if name in ("value", "name", "type"):
1971 :     raise AttributeError("%s attribute is readonly" % name)
1972 :     else:
1973 :     self.__dict__[name] = value
1974 :    
1975 :     def pairs(self):
1976 :     clicked = self._clicked
1977 :     if self.disabled or not clicked:
1978 :     return []
1979 :     name = self.name
1980 :     if name is None: return []
1981 :     return [("%s.x" % name, str(clicked[0])),
1982 :     ("%s.y" % name, str(clicked[1]))]
1983 :    
1984 :    
1985 :     # aliases, just to make str(control) and str(form) clearer
1986 :     class PasswordControl(TextControl): pass
1987 :     class HiddenControl(TextControl): pass
1988 :     class TextareaControl(TextControl): pass
1989 :     class SubmitButtonControl(SubmitControl): pass
1990 :    
1991 :    
1992 :     def is_listcontrol(control): return control.is_of_kind("list")
1993 :    
1994 :    
1995 :     class HTMLForm:
1996 :     """Represents a single HTML <form> ... </form> element.
1997 :    
1998 :     A form consists of a sequence of controls that usually have names, and
1999 :     which can take on various values. The values of the various types of
2000 :     controls represent variously: text, zero-or-one-of-many or many-of-many
2001 :     choices, and files to be uploaded. Some controls can be clicked on to
2002 :     submit the form, and clickable controls' values sometimes include the
2003 :     coordinates of the click.
2004 :    
2005 :     Forms can be filled in with data to be returned to the server, and then
2006 :     submitted, using the click method to generate a request object suitable for
2007 :     passing to urllib2.urlopen (or the click_request_data or click_pairs
2008 :     methods if you're not using urllib2).
2009 :    
2010 :     import ClientForm
2011 :     forms = ClientForm.ParseFile(html, base_uri)
2012 :     form = forms[0]
2013 :    
2014 :     form["query"] = "Python"
2015 :     form.set("lots", "nr_results")
2016 :    
2017 :     response = urllib2.urlopen(form.click())
2018 :    
2019 :     Usually, HTMLForm instances are not created directly. Instead, the
2020 :     ParseFile or ParseResponse factory functions are used. If you do construct
2021 :     HTMLForm objects yourself, however, note that an HTMLForm instance is only
2022 :     properly initialised after the fixup method has been called (ParseFile and
2023 :     ParseResponse do this for you). See ListControl.__doc__ for the reason
2024 :     this is required.
2025 :    
2026 :     Indexing a form (form["control_name"]) returns the named Control's value
2027 :     attribute. Assignment to a form index (form["control_name"] = something)
2028 :     is equivalent to assignment to the named Control's value attribute. If you
2029 :     need to be more specific than just supplying the control's name, use the
2030 :     set_value and get_value methods.
2031 :    
2032 :     ListControl values are lists of item names. The list item's name is the
2033 :     value of the corresponding HTML element's "value" attribute.
2034 :    
2035 :     Example:
2036 :    
2037 :     <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT>
2038 :     <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT>
2039 :    
2040 :     defines a CHECKBOX control with name "cheeses" which has two items, named
2041 :     "leicester" and "cheddar".
2042 :    
2043 :     Another example:
2044 :    
2045 :     <SELECT name="more_cheeses">
2046 :     <OPTION>1</OPTION>
2047 :     <OPTION value="2" label="CHEDDAR">cheddar</OPTION>
2048 :     </SELECT>
2049 :    
2050 :     defines a SELECT control with name "more_cheeses" which has two items,
2051 :     named "1" and "2" (because the OPTION element's value HTML attribute
2052 :     defaults to the element contents).
2053 :    
2054 :     To set, clear or toggle individual list items, use the set and toggle
2055 :     methods. To set the whole value, do as for any other control:use indexing
2056 :     or the set_/get_value methods.
2057 :    
2058 :     Example:
2059 :    
2060 :     # select *only* the item named "cheddar"
2061 :     form["cheeses"] = ["cheddar"]
2062 :     # select "cheddar", leave other items unaffected
2063 :     form.set("cheddar", "cheeses")
2064 :    
2065 :     Some controls (RADIO and SELECT without the multiple attribute) can only
2066 :     have zero or one items selected at a time. Some controls (CHECKBOX and
2067 :     SELECT with the multiple attribute) can have multiple items selected at a
2068 :     time. To set the whole value of a ListControl, assign a sequence to a form
2069 :     index:
2070 :    
2071 :     form["cheeses"] = ["cheddar", "leicester"]
2072 :    
2073 :     If the ListControl is not multiple-selection, the assigned list must be of
2074 :     length one.
2075 :    
2076 :     To check whether a control has an item, or whether an item is selected,
2077 :     respectively:
2078 :    
2079 :     "cheddar" in form.possible_items("cheeses")
2080 :     "cheddar" in form["cheeses"] # (or "cheddar" in form.get_value("cheeses"))
2081 :    
2082 :     Note that some list items may be disabled (see below).
2083 :    
2084 :     Note the following mistake:
2085 :    
2086 :     form[control_name] = control_value
2087 :     assert form[control_name] == control_value # not necessarily true
2088 :    
2089 :     The reason for this is that form[control_name] always gives the list items
2090 :     in the order they were listed in the HTML.
2091 :    
2092 :     List items (hence list values, too) can be referred to in terms of list
2093 :     item labels rather than list item names. Currently, this is only possible
2094 :     for SELECT controls (this is a bug). To use this feature, use the by_label
2095 :     arguments to the various HTMLForm methods. Note that it is *item* names
2096 :     (hence ListControl values also), not *control* names, that can be referred
2097 :     to by label.
2098 :    
2099 :     The question of default values of OPTION contents, labels and values is
2100 :     somewhat complicated: see SelectControl.__doc__ and
2101 :     ListControl.get_item_attrs.__doc__ if you think you need to know.
2102 :    
2103 :     Controls can be disabled or readonly. In either case, the control's value
2104 :     cannot be changed until you clear those flags (see example below).
2105 :     Disabled is the state typically represented by browsers by `greying out' a
2106 :     control. Disabled controls are not `successful' -- they don't cause data
2107 :     to get returned to the server. Readonly controls usually appear in
2108 :     browsers as read-only text boxes. Readonly controls are successful. List
2109 :     items can also be disabled. Attempts to select disabled items (with
2110 :     form[name] = value, or using the ListControl.set method, for example) fail.
2111 :     Attempts to clear disabled items are allowed.
2112 :    
2113 :     If a lot of controls are readonly, it can be useful to do this:
2114 :    
2115 :     form.set_all_readonly(False)
2116 :    
2117 :     When you want to do several things with a single control, or want to do
2118 :     less common things, like changing which controls and items are disabled,
2119 :     you can get at a particular control:
2120 :    
2121 :     control = form.find_control("cheeses")
2122 :     control.disabled = False
2123 :     control.readonly = False
2124 :     control.set_item_disabled(False, "gruyere")
2125 :     control.set("gruyere")
2126 :    
2127 :     Most methods on HTMLForm just delegate to the contained controls, so see
2128 :     the docstrings of the various Control classes for further documentation.
2129 :     Most of these delegating methods take name, type, kind, id and nr arguments
2130 :     to specify the control to be operated on: see
2131 :     HTMLForm.find_control.__doc__.
2132 :    
2133 :     ControlNotFoundError (subclass of ValueError) is raised if the specified
2134 :     control can't be found. This includes occasions where a non-ListControl
2135 :     is found, but the method (set, for example) requires a ListControl.
2136 :     ItemNotFoundError (subclass of ValueError) is raised if a list item can't
2137 :     be found. ItemCountError (subclass of ValueError) is raised if an attempt
2138 :     is made to select more than one item and the control doesn't allow that, or
2139 :     set/get_single are called and the control contains more than one item.
2140 :     AttributeError is raised if a control or item is readonly or disabled and
2141 :     an attempt is made to alter its value.
2142 :    
2143 :     XXX CheckBoxControl and RadioControl don't yet support item access by label
2144 :    
2145 :     Security note: Remember that any passwords you store in HTMLForm instances
2146 :     will be saved to disk in the clear if you pickle them (directly or
2147 :     indirectly). The simplest solution to this is to avoid pickling HTMLForm
2148 :     objects. You could also pickle before filling in any password, or just set
2149 :     the password to "" before pickling.
2150 :    
2151 :    
2152 :     Public attributes:
2153 :    
2154 :     action: full (absolute URI) form action
2155 :     method: "GET" or "POST"
2156 :     enctype: form transfer encoding MIME type
2157 :     name: name of form (None if no name was specified)
2158 :     attrs: dictionary mapping original HTML form attributes to their values
2159 :    
2160 :     controls: list of Control instances; do not alter this list
2161 :     (instead, call form.new_control to make a Control and add it to the
2162 :     form, or control.add_to_form if you already have a Control instance)
2163 :    
2164 :    
2165 :    
2166 :     Methods for form filling:
2167 :     -------------------------
2168 :    
2169 :     Most of the these methods have very similar arguments. See
2170 :     HTMLForm.find_control.__doc__ for details of the name, type, kind and nr
2171 :     arguments. See above for a description of by_label.
2172 :    
2173 :     def find_control(self,
2174 :     name=None, type=None, kind=None, id=None, predicate=None,
2175 :     nr=None)
2176 :    
2177 :     get_value(name=None, type=None, kind=None, id=None, nr=None,
2178 :     by_label=False)
2179 :     set_value(value,
2180 :     name=None, type=None, kind=None, id=None, nr=None,
2181 :     by_label=False)
2182 :    
2183 :     set_all_readonly(readonly)
2184 :    
2185 :    
2186 :     Methods applying only to ListControls:
2187 :    
2188 :     possible_items(name=None, type=None, kind=None, id=None, nr=None,
2189 :     by_label=False)
2190 :    
2191 :     set(selected, item_name,
2192 :     name=None, type=None, kind=None, id=None, nr=None,
2193 :     by_label=False)
2194 :     toggle(item_name,
2195 :     name=None, type=None, id=None, nr=None,
2196 :     by_label=False)
2197 :    
2198 :     set_single(selected,
2199 :     name=None, type=None, kind=None, id=None, nr=None,
2200 :     by_label=False)
2201 :     toggle_single(name=None, type=None, kind=None, id=None, nr=None,
2202 :     by_label=False)
2203 :    
2204 :    
2205 :     Method applying only to FileControls:
2206 :    
2207 :     add_file(file_object,
2208 :     content_type="application/octet-stream", filename=None,
2209 :     name=None, id=None, nr=None)
2210 :    
2211 :    
2212 :     Methods applying only to clickable controls:
2213 :    
2214 :     click(name=None, type=None, id=None, nr=0, coord=(1,1))
2215 :     click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1))
2216 :     click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1))
2217 :    
2218 :     """
2219 :    
2220 :     type2class = {
2221 :     "text": TextControl,
2222 :     "password": PasswordControl,
2223 :     "hidden": HiddenControl,
2224 :     "textarea": TextareaControl,
2225 :    
2226 :     "isindex": IsindexControl,
2227 :    
2228 :     "file": FileControl,
2229 :    
2230 :     "button": IgnoreControl,
2231 :     "buttonbutton": IgnoreControl,
2232 :     "reset": IgnoreControl,
2233 :     "resetbutton": IgnoreControl,
2234 :    
2235 :     "submit": SubmitControl,
2236 :     "submitbutton": SubmitButtonControl,
2237 :     "image": ImageControl,
2238 :    
2239 :     "radio": RadioControl,
2240 :     "checkbox": CheckboxControl,
2241 :     "select": SelectControl,
2242 :     }
2243 :    
2244 :     #---------------------------------------------------
2245 :     # Initialisation. Use ParseResponse / ParseFile instead.
2246 :    
2247 :     def __init__(self, action, method="GET",
2248 :     enctype="application/x-www-form-urlencoded",
2249 :     name=None, attrs=None):
2250 :     """
2251 :     In the usual case, use ParseResponse (or ParseFile) to create new
2252 :     HTMLForm objects.
2253 :    
2254 :     action: full (absolute URI) form action
2255 :     method: "GET" or "POST"
2256 :     enctype: form transfer encoding MIME type
2257 :     name: name of form
2258 :     attrs: dictionary mapping original HTML form attributes to their values
2259 :    
2260 :     """
2261 :     self.action = action
2262 :     self.method = method
2263 :     self.enctype = enctype
2264 :     self.name = name
2265 :     if attrs is not None:
2266 :     self.attrs = attrs.copy()
2267 :     else:
2268 :     self.attrs = {}
2269 :     self.controls = []
2270 :    
2271 :     def new_control(self, type, name, attrs,
2272 :     ignore_unknown=False, select_default=False):
2273 :     """Adds a new control to the form.
2274 :    
2275 :     This is usually called by ParseFile and ParseResponse. Don't call it
2276 :     youself unless you're building your own Control instances.
2277 :    
2278 :     Note that controls representing lists of items are built up from
2279 :     controls holding only a single list item. See ListControl.__doc__ for
2280 :     further information.
2281 :    
2282 :     type: type of control (see Control.__doc__ for a list)
2283 :     attrs: HTML attributes of control
2284 :     ignore_unknown: if true, use a dummy Control instance for controls of
2285 :     unknown type; otherwise, raise ValueError
2286 :     select_default: for RADIO and multiple-selection SELECT controls, pick
2287 :     the first item as the default if no 'selected' HTML attribute is
2288 :     present (this defaulting happens when the HTMLForm.fixup method is
2289 :     called)
2290 :    
2291 :     """
2292 :     type = string.lower(type)
2293 :     klass = self.type2class.get(type)
2294 :     if klass is None:
2295 :     if ignore_unknown:
2296 :     klass = IgnoreControl
2297 :     else:
2298 :     raise ValueError("Unknown control type '%s'" % type)
2299 :    
2300 :     a = attrs.copy()
2301 :     if issubclass(klass, ListControl):
2302 :     control = klass(type, name, a, select_default)
2303 :     else:
2304 :     control = klass(type, name, a)
2305 :     control.add_to_form(self)
2306 :    
2307 :     def fixup(self):
2308 :     """Normalise form after all controls have been added.
2309 :    
2310 :     This is usually called by ParseFile and ParseResponse. Don't call it
2311 :     youself unless you're building your own Control instances.
2312 :    
2313 :     This method should only be called once, after all controls have been
2314 :     added to the form.
2315 :    
2316 :     """
2317 :     for control in self.controls:
2318 :     control.fixup()
2319 :    
2320 :     #---------------------------------------------------
2321 :     def __str__(self):
2322 :     header = "%s %s %s" % (self.method, self.action, self.enctype)
2323 :     rep = [header]
2324 :     for control in self.controls:
2325 :     rep.append(" %s" % str(control))
2326 :     return "<%s>" % string.join(rep, "\n")
2327 :    
2328 :     #---------------------------------------------------
2329 :     # Form-filling methods.
2330 :    
2331 :     def __getitem__(self, name):
2332 :     return self.find_control(name).value
2333 :     def __setitem__(self, name, value):
2334 :     control = self.find_control(name)
2335 :     try:
2336 :     control.value = value
2337 :     except AttributeError, e:
2338 :     raise ValueError(str(e))
2339 :    
2340 :     def get_value(self,
2341 :     name=None, type=None, kind=None, id=None, nr=None,
2342 :     by_label=False):
2343 :     """Return value of control.
2344 :    
2345 :     If only name and value arguments are supplied, equivalent to
2346 :    
2347 :     form[name]
2348 :    
2349 :     """
2350 :     c = self.find_control(name, type, kind, id, nr=nr)
2351 :     if by_label:
2352 :     try:
2353 :     meth = c.get_value_by_label
2354 :     except AttributeError:
2355 :     raise NotImplementedError(
2356 :     "control '%s' does not yet support by_label" % c.name)
2357 :     else:
2358 :     return meth()
2359 :     else:
2360 :     return c.value
2361 :     def set_value(self, value,
2362 :     name=None, type=None, kind=None, id=None, nr=None,
2363 :     by_label=False):
2364 :     """Set value of control.
2365 :    
2366 :     If only name and value arguments are supplied, equivalent to
2367 :    
2368 :     form[name] = value
2369 :    
2370 :     """
2371 :     c = self.find_control(name, type, kind, id, nr=nr)
2372 :     if by_label:
2373 :     try:
2374 :     meth = c.set_value_by_label
2375 :     except AttributeError:
2376 :     raise NotImplementedError(
2377 :     "control '%s' does not yet support by_label" % c.name)
2378 :     else:
2379 :     meth(value)
2380 :     else:
2381 :     c.value = value
2382 :    
2383 :     def set_all_readonly(self, readonly):
2384 :     for control in self.controls:
2385 :     control.readonly = bool(readonly)
2386 :    
2387 :    
2388 :     #---------------------------------------------------
2389 :     # Form-filling methods applying only to ListControls.
2390 :    
2391 :     def possible_items(self,
2392 :     name=None, type=None, kind=None, id=None, nr=None,
2393 :     by_label=False):
2394 :     """Return a list of all values that the specified control can take."""
2395 :     c = self._find_list_control(name, type, kind, id, nr)
2396 :     return c.possible_items(by_label)
2397 :    
2398 :     def set(self, selected, item_name,
2399 :     name=None, type=None, kind=None, id=None, nr=None,
2400 :     by_label=False):
2401 :     """Select / deselect named list item.
2402 :    
2403 :     selected: boolean selected state
2404 :    
2405 :     """
2406 :     self._find_list_control(name, type, kind, id, nr).set(
2407 :     selected, item_name, by_label)
2408 :     def toggle(self, item_name,
2409 :     name=None, type=None, kind=None, id=None, nr=None,
2410 :     by_label=False):
2411 :     """Toggle selected state of named list item."""
2412 :     self._find_list_control(name, type, kind, id, nr).toggle(
2413 :     item_name, by_label)
2414 :    
2415 :     def set_single(self, selected,
2416 :     name=None, type=None, kind=None, id=None, nr=None,
2417 :     by_label=False):
2418 :     """Select / deselect list item in a control having only one item.
2419 :    
2420 :     If the control has multiple list items, ItemCountError is raised.
2421 :    
2422 :     This is just a convenience method, so you don't need to know the item's
2423 :     name -- the item name in these single-item controls is usually
2424 :     something meaningless like "1" or "on".
2425 :    
2426 :     For example, if a checkbox has a single item named "on", the following
2427 :     two calls are equivalent:
2428 :    
2429 :     control.toggle("on")
2430 :     control.toggle_single()
2431 :    
2432 :     """
2433 :     self._find_list_control(name, type, kind, id, nr).set_single(
2434 :     selected, by_label)
2435 :     def toggle_single(self, name=None, type=None, kind=None, id=None, nr=None,
2436 :     by_label=False):
2437 :     """Toggle selected state of list item in control having only one item.
2438 :    
2439 :     The rest is as for HTMLForm.set_single.__doc__.
2440 :    
2441 :     """
2442 :     self._find_list_control(name, type, kind, id, nr).toggle_single(
2443 :     by_label)
2444 :    
2445 :     #---------------------------------------------------
2446 :     # Form-filling method applying only to FileControls.
2447 :    
2448 :     def add_file(self, file_object, content_type=None, filename=None,
2449 :     name=None, id=None, nr=None):
2450 :     """Add a file to be uploaded.
2451 :    
2452 :     file_object: file-like object (with read method) from which to read
2453 :     data to upload
2454 :     content_type: MIME content type of data to upload
2455 :     filename: filename to pass to server
2456 :    
2457 :     If filename is None, no filename is sent to the server.
2458 :    
2459 :     If content_type is None, the content type is guessed based on the
2460 :     filename and the data from read from the file object.
2461 :    
2462 :     XXX
2463 :     At the moment, guessed content type is always application/octet-stream.
2464 :     Use sndhdr, imghdr modules. Should also try to guess HTML, XML, and
2465 :     plain text.
2466 :    
2467 :     Note the following useful HTML attributes of file upload controls (see
2468 :     HTML 4.01 spec, section 17):
2469 :    
2470 :     accept: comma-separated list of content types that the server will
2471 :     handle correctly; you can use this to filter out non-conforming files
2472 :     size: XXX IIRC, this is indicative of whether form wants multiple or
2473 :     single files
2474 :     maxlength: XXX hint of max content length in bytes?
2475 :    
2476 :     """
2477 :     self.find_control(name, "file", id=id, nr=nr).add_file(
2478 :     file_object, content_type, filename)
2479 :    
2480 :     #---------------------------------------------------
2481 :     # Form submission methods, applying only to clickable controls.
2482 :    
2483 :     def click(self, name=None, type=None, id=None, nr=0, coord=(1,1)):
2484 :     """Return request that would result from clicking on a control.
2485 :    
2486 :     The request object is a urllib2.Request instance, which you can pass to
2487 :     urllib2.urlopen (or ClientCookie.urlopen).
2488 :    
2489 :     Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and
2490 :     IMAGEs) can be clicked.
2491 :    
2492 :     Will click on the first clickable control, subject to the name, type
2493 :     and nr arguments (as for find_control). If no name, type, id or number
2494 :     is specified and there are no clickable controls, a request will be
2495 :     returned for the form in its current, un-clicked, state.
2496 :    
2497 :     IndexError is raised if any of name, type, id or nr is specified but no
2498 :     matching control is found. ValueError is raised if the HTMLForm has an
2499 :     enctype attribute that is not recognised.
2500 :    
2501 :     You can optionally specify a coordinate to click at, which only makes a
2502 :     difference if you clicked on an image.
2503 :    
2504 :     """
2505 :     return self._click(name, type, id, nr, coord, "request")
2506 :    
2507 :     def click_request_data(self,
2508 :     name=None, type=None, id=None, nr=0, coord=(1,1)):
2509 :     """As for click method, but return a tuple (url, data, headers).
2510 :    
2511 :     You can use this data to send a request to the server. This is useful
2512 :     if you're using httplib or urllib rather than urllib2. Otherwise, use
2513 :     the click method.
2514 :    
2515 :     # Untested. Have to subclass to add headers, I think -- so use urllib2
2516 :     # instead!
2517 :     import urllib
2518 :     url, data, hdrs = form.click_request_data()
2519 :     r = urllib.urlopen(url, data)
2520 :    
2521 :     # Untested. I don't know of any reason to use httplib -- you can get
2522 :     # just as much control with urllib2.
2523 :     import httplib, urlparse
2524 :     url, data, hdrs = form.click_request_data()
2525 :     tup = urlparse(url)
2526 :     host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:])
2527 :     conn = httplib.HTTPConnection(host)
2528 :     if data:
2529 :     httplib.request("POST", path, data, hdrs)
2530 :     else:
2531 :     httplib.request("GET", path, headers=hdrs)
2532 :     r = conn.getresponse()
2533 :    
2534 :     """
2535 :     return self._click(name, type, id, nr, coord, "request_data")
2536 :    
2537 :     def click_pairs(self, name=None, type=None, id=None, nr=0, coord=(1,1)):
2538 :     """As for click_request_data, but returns a list of (key, value) pairs.
2539 :    
2540 :     You can use this list as an argument to ClientForm.urlencode. This is
2541 :     usually only useful if you're using httplib or urllib rather than
2542 :     urllib2 or ClientCookie. It may also be useful if you want to manually
2543 :     tweak the keys and/or values, but this should not be necessary.
2544 :     Otherwise, use the click method.
2545 :    
2546 :     Note that this method is only useful for forms of MIME type
2547 :     x-www-form-urlencoded. In particular, it does not return the
2548 :     information required for file upload. If you need file upload and are
2549 :     not using urllib2, use click_request_data.
2550 :    
2551 :     Also note that Python 2.0's urllib.urlencode is slightly broken: it
2552 :     only accepts a mapping, not a sequence of pairs, as an argument. This
2553 :     messes up any ordering in the argument. Use ClientForm.urlencode
2554 :     instead.
2555 :    
2556 :     """
2557 :     return self._click(name, type, id, nr, coord, "pairs")
2558 :    
2559 :     #---------------------------------------------------
2560 :    
2561 :     def find_control(self,
2562 :     name=None, type=None, kind=None, id=None, predicate=None,
2563 :     nr=None):
2564 :     """Locate and return some specific control within the form.
2565 :    
2566 :     At least one of the name, type, kind, predicate and nr arguments must
2567 :     be supplied. If no matching control is found, ControlNotFoundError is
2568 :     raised.
2569 :    
2570 :     If name is specified, then the control must have the indicated name.
2571 :    
2572 :     If type is specified then the control must have the specified type (in
2573 :     addition to the types possible for <input> HTML tags: "text",
2574 :     "password", "hidden", "submit", "image", "button", "radio", "checkbox",
2575 :     "file" we also have "reset", "buttonbutton", "submitbutton",
2576 :     "resetbutton", "textarea", "select" and "isindex").
2577 :    
2578 :     If kind is specified, then the control must fall into the specified
2579 :     group, each of which satisfies a particular interface. The types are
2580 :     "text", "list", "multilist", "singlelist", "clickable" and "file".
2581 :    
2582 :     If id is specified, then the control must have the indicated id.
2583 :    
2584 :     If predicate is specified, then the control must match that function.
2585 :     The predicate function is passed the control as its single argument,
2586 :     and should return a boolean value indicating whether the control
2587 :     matched.
2588 :    
2589 :     nr, if supplied, is the sequence number of the control (where 0 is the
2590 :     first). Note that control 0 is the first control matching all the
2591 :     other arguments (if supplied); it is not necessarily the first control
2592 :     in the form.
2593 :    
2594 :     """
2595 :     if ((name is None) and (type is None) and (kind is None) and
2596 :     (id is None) and (predicate is None) and (nr is None)):
2597 :     raise ValueError(
2598 :     "at least one argument must be supplied to specify control")
2599 :     if nr is None: nr = 0
2600 :    
2601 :     return self._find_control(name, type, kind, id, predicate, nr)
2602 :    
2603 :     #---------------------------------------------------
2604 :     # Private methods.
2605 :    
2606 :     def _find_list_control(self,
2607 :     name=None, type=None, kind=None, id=None, nr=None):
2608 :     if ((name is None) and (type is None) and (kind is None) and
2609 :     (id is None) and (nr is None)):
2610 :     raise ValueError(
2611 :     "at least one argument must be supplied to specify control")
2612 :     if nr is None: nr = 0
2613 :    
2614 :     return self._find_control(name, type, kind, id, is_listcontrol, nr)
2615 :    
2616 :     def _find_control(self, name, type, kind, id, predicate, nr):
2617 :     if (name is not None) and not isstringlike(name):
2618 :     raise TypeError("control name must be string-like")
2619 :     if (type is not None) and not isstringlike(type):
2620 :     raise TypeError("control type must be string-like")
2621 :     if (kind is not None) and not isstringlike(kind):
2622 :     raise TypeError("control kind must be string-like")
2623 :     if (id is not None) and not isstringlike(id):
2624 :     raise TypeError("control id must be string-like")
2625 :     if (predicate is not None) and not callable(predicate):
2626 :     raise TypeError("control predicate must be callable")
2627 :     if nr < 0: raise ValueError("control number must be a positive "
2628 :     "integer")
2629 :    
2630 :     orig_nr = nr
2631 :    
2632 :     for control in self.controls:
2633 :     if name is not None and name != control.name:
2634 :     continue
2635 :     if type is not None and type != control.type:
2636 :     continue
2637 :     if kind is not None and not control.is_of_kind(kind):
2638 :     continue
2639 :     if id is not None and id != control.id:
2640 :     continue
2641 :     if predicate and not predicate(control):
2642 :     continue
2643 :     if nr:
2644 :     nr = nr - 1
2645 :     continue
2646 :     return control
2647 :    
2648 :     description = []
2649 :     if name is not None: description.append("name '%s'" % name)
2650 :     if type is not None: description.append("type '%s'" % type)
2651 :     if kind is not None: description.append("kind '%s'" % kind)
2652 :     if id is not None: description.append("id '%s'" % id)
2653 :     if predicate is not None:
2654 :     description.append("predicate %s" % predicate)
2655 :     if orig_nr: description.append("nr %d" % orig_nr)
2656 :     description = string.join(description, ", ")
2657 :     raise ControlNotFoundError("no control matching "+description)
2658 :    
2659 :     def _click(self, name, type, id, nr, coord, return_type):
2660 :     try:
2661 :     control = self._find_control(name, type, "clickable", id, None, nr)
2662 :     except ControlNotFoundError:
2663 :     if ((name is not None) or (type is not None) or (id is not None) or
2664 :     (nr != 0)):
2665 :     raise
2666 :     # no clickable controls, but no control was explicitly requested,
2667 :     # so return state without clicking any control
2668 :     return self._switch_click(return_type)
2669 :     else:
2670 :     return control._click(self, coord, return_type)
2671 :    
2672 :     def _pairs(self):
2673 :     """Return sequence of (key, value) pairs suitable for urlencoding."""
2674 :     pairs = []
2675 :     for control in self.controls:
2676 :     pairs.extend(control.pairs())
2677 :     return pairs
2678 :    
2679 :     def _request_data(self):
2680 :     """Return a tuple (url, data, headers)."""
2681 :     method = string.upper(self.method)
2682 :     if method == "GET":
2683 :     if self.enctype != "application/x-www-form-urlencoded":
2684 :     raise ValueError(
2685 :     "unknown GET form encoding type '%s'" % self.enctype)
2686 :     if "?" in self.action:
2687 :     fmt = "%s&%s"
2688 :     else:
2689 :     fmt = "%s?%s"
2690 :     uri = fmt % (self.action, urlencode(self._pairs()))
2691 :     return uri, None, []
2692 :     elif method == "POST":
2693 :     if self.enctype == "application/x-www-form-urlencoded":
2694 :     return (self.action, urlencode(self._pairs()),
2695 :     [("Content-type", self.enctype)])
2696 :     elif self.enctype == "multipart/form-data":
2697 :     data = StringIO()
2698 :     http_hdrs = []
2699 :     mw = MimeWriter(data, http_hdrs)
2700 :     f = mw.startmultipartbody("form-data", add_to_http_hdrs=True,
2701 :     prefix=0)
2702 :     for control in self.controls:
2703 :     control._write_mime_data(mw)
2704 :     mw.lastpart()
2705 :     return self.action, data.getvalue(), http_hdrs
2706 :     else:
2707 :     raise ValueError(
2708 :     "unknown POST form encoding type '%s'" % self.enctype)
2709 :     else:
2710 :     raise ValueError("Unknown method '%s'" % method)
2711 :    
2712 :     def _switch_click(self, return_type):
2713 :     # This is called by HTMLForm and clickable Controls to hide switching
2714 :     # on return_type.
2715 :     if return_type == "pairs":
2716 :     return self._pairs()
2717 :     elif return_type == "request_data":
2718 :     return self._request_data()
2719 :     else:
2720 :     req_data = self._request_data()
2721 :     req = urllib2.Request(req_data[0], req_data[1])
2722 :     for key, val in req_data[2]:
2723 :     req.add_header(key, val)
2724 :     return req

Tobias McNulty

Powered by ViewCVS 1.0-dev
(Powered by ViewCVS)

ViewCVS and CVS Help