############################################################################## # # Copyright (c) 2001, 2002 Zope Corporation and Contributors. # All Rights Reserved. # # This software is subject to the provisions of the Zope Public License, # Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS # FOR A PARTICULAR PURPOSE. # ############################################################################## """HTTP Request Parser This server uses asyncore to accept connections and do initial processing but threads to do work. $Id: httprequestparser.py 38178 2005-08-30 21:50:19Z mj $ """ import re from urllib import unquote from zope.server.fixedstreamreceiver import FixedStreamReceiver from zope.server.buffers import OverflowableBuffer from zope.server.utilities import find_double_newline from zope.server.interfaces import IStreamConsumer from zope.interface import implements try: from cStringIO import StringIO except ImportError: from StringIO import StringIO class HTTPRequestParser(object): """A structure that collects the HTTP request. Once the stream is completed, the instance is passed to a server task constructor. """ implements(IStreamConsumer) completed = 0 # Set once request is completed. empty = 0 # Set if no request was made. header_plus = '' chunked = 0 content_length = 0 body_rcv = None # Other attributes: first_line, header, headers, command, uri, version, # path, query, fragment # headers is a mapping containing keys translated to uppercase # with dashes turned into underscores. def __init__(self, adj): """ adj is an Adjustments object. """ self.headers = {} self.adj = adj def received(self, data): """ Receives the HTTP stream for one request. Returns the number of bytes consumed. Sets the completed flag once both the header and the body have been received. """ if self.completed: return 0 # Can't consume any more. datalen = len(data) br = self.body_rcv if br is None: # In header. s = self.header_plus + data index = find_double_newline(s) if index >= 0: # Header finished. header_plus = s[:index] consumed = len(data) - (len(s) - index) self.in_header = 0 # Remove preceeding blank lines. header_plus = header_plus.lstrip() if not header_plus: self.empty = 1 self.completed = 1 else: self.parse_header(header_plus) if self.body_rcv is None: self.completed = 1 return consumed else: # Header not finished yet. self.header_plus = s return datalen else: # In body. consumed = br.received(data) if br.completed: self.completed = 1 return consumed def parse_header(self, header_plus): """ Parses the header_plus block of text (the headers plus the first line of the request). """ index = header_plus.find('\n') if index >= 0: first_line = header_plus[:index].rstrip() header = header_plus[index + 1:] else: first_line = header_plus.rstrip() header = '' self.first_line = first_line self.header = header lines = self.get_header_lines() headers = self.headers for line in lines: index = line.find(':') if index > 0: key = line[:index] value = line[index + 1:].strip() key1 = key.upper().replace('-', '_') # If a header already exists, we append subsequent values # seperated by a comma. Applications already need to handle # the comma seperated values, as HTTP front ends might do # the concatenation for you (behavior specified in RFC2616). try: headers[key1] += ', %s' % value except KeyError: headers[key1] = value # else there's garbage in the headers? command, uri, version = self.crack_first_line() self.command = str(command) self.uri = str(uri) self.version = version self.split_uri() if version == '1.1': te = headers.get('TRANSFER_ENCODING', '') if te == 'chunked': from zope.server.http.chunking import ChunkedReceiver self.chunked = 1 buf = OverflowableBuffer(self.adj.inbuf_overflow) self.body_rcv = ChunkedReceiver(buf) if not self.chunked: try: cl = int(headers.get('CONTENT_LENGTH', 0)) except ValueError: cl = 0 self.content_length = cl if cl > 0: buf = OverflowableBuffer(self.adj.inbuf_overflow) self.body_rcv = FixedStreamReceiver(cl, buf) def get_header_lines(self): """ Splits the header into lines, putting multi-line headers together. """ r = [] lines = self.header.split('\n') for line in lines: if line and line[0] in ' \t': r[-1] = r[-1] + line[1:] else: r.append(line) return r first_line_re = re.compile ( '([^ ]+) (?:[^ :?#]+://[^ ?#/]*)?([^ ]+)(( HTTP/([0-9.]+))$|$)') def crack_first_line(self): r = self.first_line m = self.first_line_re.match (r) if m is not None and m.end() == len(r): if m.group(3): version = m.group(5) else: version = None return m.group(1).upper(), m.group(2), version else: return None, None, None path_regex = re.compile ( # path query fragment r'([^?#]*)(\?[^#]*)?(#.*)?' ) def split_uri(self): m = self.path_regex.match (self.uri) if m.end() != len(self.uri): raise ValueError("Broken URI") else: path, query, self.fragment = m.groups() if path and '%' in path: path = unquote(path) self.path = path if query: query = query[1:] self.query = query def getBodyStream(self): body_rcv = self.body_rcv if body_rcv is not None: return body_rcv.getfile() else: return StringIO('')