Package wsgitools :: Module filters
[hide private]
[frames] | no frames]

Source Code for Module wsgitools.filters

  1  """ 
  2  This module contains a generic way to create middelwares that filter data. 
  3  The work is mainly done by the L{WSGIFilterMiddleware} class. One can write 
  4  filters by extending the L{BaseWSGIFilter} class and passing this class 
  5  (not an instance) to the L{WSGIFilterMiddleware} constructor. 
  6  """ 
  7   
  8  __all__ = [] 
  9   
 10  import sys 
 11  import time 
 12  import gzip 
 13  import io 
 14   
 15  from wsgitools.internal import str2bytes 
 16   
 17  __all__.append("CloseableIterator") 
18 -class CloseableIterator(object):
19 """Concatenating iterator with close attribute."""
20 - def __init__(self, close_function, *iterators):
21 """If close_function is not C{None}, it will be the C{close} attribute 22 of the created iterator object. Further parameters specify iterators 23 that are to be concatenated. 24 @type close_function: a function or C{None} 25 """ 26 if close_function is not None: 27 self.close = close_function 28 self.iterators = list(map(iter, iterators))
29 - def __iter__(self):
30 """iterator interface 31 @rtype: gen() 32 """ 33 return self
34 - def __next__(self):
35 """iterator interface""" 36 if not self.iterators: 37 raise StopIteration 38 try: 39 return next(self.iterators[0]) 40 except StopIteration: 41 self.iterators.pop(0) 42 return next(self)
43 - def next(self):
44 return self.__next__()
45 46 __all__.append("CloseableList")
47 -class CloseableList(list):
48 """A list with a close attribute."""
49 - def __init__(self, close_function, *args):
50 """If close_function is not C{None}, it will be the C{close} attribute 51 of the created list object. Other parameters are passed to the list 52 constructor. 53 @type close_function: a function or C{None} 54 """ 55 if close_function is not None: 56 self.close = close_function 57 list.__init__(self, *args)
58 - def __iter__(self):
59 """iterator interface""" 60 return CloseableIterator(getattr(self, "close", None), 61 list.__iter__(self))
62 63 __all__.append("BaseWSGIFilter")
64 -class BaseWSGIFilter(object):
65 """Generic WSGI filter class to be used with L{WSGIFilterMiddleware}. 66 67 For each request a filter object gets created. 68 The environment is then passed through L{filter_environ}. 69 Possible exceptions are filtered by L{filter_exc_info}. 70 After that for each C{(header, value)} tuple L{filter_header} is used. 71 The resulting list is filtered through L{filter_headers}. 72 Any data is filtered through L{filter_data}. 73 In order to possibly append data the L{append_data} method is invoked. 74 When the request has finished L{handle_close} is invoked. 75 76 All methods do not modify the passed data by default. Passing the 77 L{BaseWSGIFilter} class to a L{WSGIFilterMiddleware} will result in not 78 modifying requests at all. 79 """
80 - def __init__(self):
81 """This constructor does nothing and can safely be overwritten. It is 82 only listed here to document that it must be callable without additional 83 parameters."""
84 - def filter_environ(self, environ):
85 """Receives a dict with the environment passed to the wsgi application 86 and a C{dict} must be returned. The default is to return the same dict. 87 @type environ: {str: str} 88 @rtype: {str: str} 89 """ 90 return environ
91 - def filter_exc_info(self, exc_info):
92 """Receives either C{None} or a tuple passed as third argument to 93 C{start_response} from the wrapped wsgi application. Either C{None} or 94 such a tuple must be returned.""" 95 return exc_info
96 - def filter_status(self, status):
97 """Receives a status string passed as first argument to 98 C{start_response} from the wrapped wsgi application. A valid HTTP status 99 string must be returned. 100 @type status: str 101 @rtype: str 102 """ 103 return status
104 - def filter_header(self, headername, headervalue):
105 """This function is invoked for each C{(headername, headervalue)} tuple 106 in the second argument to the C{start_response} from the wrapped wsgi 107 application. Such a value or C{None} for discarding the header must be 108 returned. 109 @type headername: str 110 @type headervalue: str 111 @rtype: (str, str) 112 """ 113 return (headername, headervalue)
114 - def filter_headers(self, headers):
115 """A list of headers passed as the second argument to the 116 C{start_response} from the wrapped wsgi application is passed to this 117 function and such a list must also be returned. 118 @type headers: [(str, str)] 119 @rtype: [(str, str)] 120 """ 121 return headers
122 - def filter_data(self, data):
123 """For each string that is either written by the C{write} callable or 124 returned from the wrapped wsgi application this method is invoked. It 125 must return a string. 126 @type data: bytes 127 @rtype: bytes 128 """ 129 return data
130 - def append_data(self):
131 """This function can be used to append data to the response. A list of 132 strings or some kind of iterable yielding strings has to be returned. 133 The default is to return an empty list. 134 @rtype: gen([bytes]) 135 """ 136 return []
137 - def handle_close(self):
138 """This method is invoked after the request has finished."""
139 140 __all__.append("WSGIFilterMiddleware")
141 -class WSGIFilterMiddleware(object):
142 """This wsgi middleware can be used with specialized L{BaseWSGIFilter}s to 143 modify wsgi requests and/or reponses."""
144 - def __init__(self, app, filterclass):
145 """ 146 @param app: is a wsgi application. 147 @type filterclass: L{BaseWSGIFilter}s subclass 148 @param filterclass: is a subclass of L{BaseWSGIFilter} or some class 149 that implements the interface.""" 150 self.app = app 151 self.filterclass = filterclass
152 - def __call__(self, environ, start_response):
153 """wsgi interface 154 @type environ: {str, str} 155 @rtype: gen([bytes]) 156 """ 157 assert isinstance(environ, dict) 158 reqfilter = self.filterclass() 159 environ = reqfilter.filter_environ(environ) 160 161 def modified_start_response(status, headers, exc_info=None): 162 assert isinstance(status, str) 163 assert isinstance(headers, list) 164 exc_info = reqfilter.filter_exc_info(exc_info) 165 status = reqfilter.filter_status(status) 166 headers = (reqfilter.filter_header(h, v) for h, v in headers) 167 headers = [h for h in headers if h] 168 headers = reqfilter.filter_headers(headers) 169 write = start_response(status, headers, exc_info) 170 def modified_write(data): 171 write(reqfilter.filter_data(data))
172 return modified_write
173 174 ret = self.app(environ, modified_start_response) 175 assert hasattr(ret, "__iter__") 176 177 def modified_close(): 178 reqfilter.handle_close() 179 getattr(ret, "close", lambda:0)() 180 181 if isinstance(ret, list): 182 return CloseableList(modified_close, 183 list(map(reqfilter.filter_data, ret)) 184 + list(reqfilter.append_data())) 185 ret = iter(ret) 186 def late_append_data(): 187 """Invoke C{reqfilter.append_data()} after C{filter_data()} has seen 188 all data.""" 189 for data in reqfilter.append_data(): 190 yield data 191 return CloseableIterator(modified_close, 192 (reqfilter.filter_data(data) for data in ret), 193 late_append_data()) 194
195 # Using map and lambda here since pylint cannot handle list comprehension in 196 # default arguments. Also note that neither ' nor " are considered printable. 197 # For escape_string to be reversible \ is also not considered printable. 198 -def escape_string(string, replacer=list(map( 199 lambda i: chr(i) if str2bytes(chr(i)).isalnum() or 200 chr(i) in '!#$%&()*+,-./:;<=>?@[]^_`{|}~ ' else 201 r"\x%2.2x" % i, 202 range(256)))):
203 """Encodes non-printable characters in a string using \\xXX escapes. 204 205 @type string: str 206 @rtype: str 207 """ 208 return "".join(replacer[ord(char)] for char in string)
209 210 __all__.append("RequestLogWSGIFilter")
211 -class RequestLogWSGIFilter(BaseWSGIFilter):
212 """This filter logs all requests in the apache log file format.""" 213 @classmethod
214 - def creator(cls, log, flush=True):
215 """Returns a function creating L{RequestLogWSGIFilter}s on given log 216 file. log has to be a file-like object. 217 @type log: file-like 218 @param log: elements of type str are written to the log. That means in 219 Py3.X the contents are decoded and in Py2.X the log is assumed 220 to be encoded in latin1. This follows the spirit of WSGI. 221 @type flush: bool 222 @param flush: if True, invoke the flush method on log after each 223 write invocation 224 """ 225 return lambda:cls(log, flush)
226 - def __init__(self, log=sys.stdout, flush=True):
227 """ 228 @type log: file-like 229 @type flush: bool 230 @param flush: if True, invoke the flush method on log after each 231 write invocation 232 """ 233 BaseWSGIFilter.__init__(self) 234 assert hasattr(log, "write") 235 assert hasattr(log, "flush") or not flush 236 self.log = log 237 self.flush = flush 238 self.remote = "?" 239 self.user = "-" 240 self.time = time.strftime("%d/%b/%Y:%T %z") 241 self.reqmethod = "" 242 self.path = "" 243 self.proto = None 244 self.status = "" 245 self.length = 0 246 self.referrer = None 247 self.useragent = None
248 - def filter_environ(self, environ):
249 """BaseWSGIFilter interface 250 @type environ: {str: str} 251 @rtype: {str: str} 252 """ 253 assert isinstance(environ, dict) 254 self.remote = environ.get("REMOTE_ADDR", self.remote) 255 self.user = environ.get("REMOTE_USER", self.user) 256 self.reqmethod = environ["REQUEST_METHOD"] 257 self.path = environ["SCRIPT_NAME"] + environ["PATH_INFO"] 258 if environ.get("QUERY_STRING"): 259 self.path = "%s?%s" % (self.path, environ["QUERY_STRING"]) 260 self.proto = environ.get("SERVER_PROTOCOL", self.proto) 261 self.referrer = environ.get("HTTP_REFERER", self.referrer) 262 self.useragent = environ.get("HTTP_USER_AGENT", self.useragent) 263 return environ
264 - def filter_status(self, status):
265 """BaseWSGIFilter interface 266 @type status: str 267 @rtype: str 268 """ 269 assert isinstance(status, str) 270 self.status = status.split()[0] 271 return status
272 - def filter_data(self, data):
273 assert isinstance(data, bytes) 274 self.length += len(data) 275 return data
276 - def handle_close(self):
277 """BaseWSGIFilter interface""" 278 line = '%s %s - [%s]' % (self.remote, self.user, self.time) 279 line = '%s "%s %s' % (line, escape_string(self.reqmethod), 280 escape_string(self.path)) 281 if self.proto is not None: 282 line = "%s %s" % (line, self.proto) 283 line = '%s" %s %d' % (line, self.status, self.length) 284 if self.referrer is not None: 285 line = '%s "%s"' % (line, escape_string(self.referrer)) 286 else: 287 line += " -" 288 if self.useragent is not None: 289 line = '%s "%s"' % (line, escape_string(self.useragent)) 290 else: 291 line += " -" 292 self.log.write("%s\n" % line) 293 if self.flush: 294 self.log.flush()
295 296 __all__.append("TimerWSGIFilter")
297 -class TimerWSGIFilter(BaseWSGIFilter):
298 """Replaces a specific string in the data returned from the filtered wsgi 299 application with the time the request took. The string has to be exactly 300 eight bytes long, defaults to C{"?GenTime"} and must be an element of the 301 iterable returned by the filtered application. If the application returns 302 something like C{["spam?GenTime", "?GenTime spam", "?GenTime"]} only the 303 last occurance get's replaced.""" 304 @classmethod
305 - def creator(cls, pattern):
306 """Returns a function creating L{TimerWSGIFilter}s with a given pattern 307 beeing a string of exactly eight bytes. 308 @type pattern: bytes 309 """ 310 return lambda:cls(pattern)
311 - def __init__(self, pattern=b"?GenTime"):
312 """ 313 @type pattern: str 314 """ 315 BaseWSGIFilter.__init__(self) 316 assert isinstance(pattern, bytes) 317 self.pattern = pattern 318 self.start = time.time()
319 - def filter_data(self, data):
320 """BaseWSGIFilter interface 321 @type data: bytes 322 @rtype: bytes 323 """ 324 if data == self.pattern: 325 return str2bytes("%8.3g" % (time.time() - self.start)) 326 return data
327 328 __all__.append("EncodeWSGIFilter")
329 -class EncodeWSGIFilter(BaseWSGIFilter):
330 """Encodes all body data (no headers) with given charset. 331 @note: This violates the wsgi standard as it requires unicode objects 332 whereas wsgi mandates the use of bytes. 333 """ 334 @classmethod
335 - def creator(cls, charset):
336 """Returns a function creating L{EncodeWSGIFilter}s with a given 337 charset. 338 @type charset: str 339 """ 340 return lambda:cls(charset)
341 - def __init__(self, charset="utf-8"):
342 """ 343 @type charset: str 344 """ 345 BaseWSGIFilter.__init__(self) 346 self.charset = charset
347 - def filter_data(self, data):
348 """BaseWSGIFilter interface 349 @type data: str 350 @rtype: bytes 351 """ 352 return data.encode(self.charset)
353 - def filter_header(self, header, value):
354 """BaseWSGIFilter interface 355 @type header: str 356 @type value: str 357 @rtype: (str, str) 358 """ 359 if header.lower() != "content-type": 360 return (header, value) 361 return (header, "%s; charset=%s" % (value, self.charset))
362 363 __all__.append("GzipWSGIFilter")
364 -class GzipWSGIFilter(BaseWSGIFilter):
365 """Compresses content using gzip.""" 366 @classmethod
367 - def creator(cls, flush=True):
368 """ 369 Returns a function creating L{GzipWSGIFilter}s. 370 @type flush: bool 371 @param flush: whether or not the filter should always flush the buffer 372 """ 373 return lambda:cls(flush)
374 - def __init__(self, flush=True):
375 """ 376 @type flush: bool 377 @param flush: whether or not the filter should always flush the buffer 378 """ 379 BaseWSGIFilter.__init__(self) 380 self.flush = flush 381 self.compress = False 382 self.sio = None 383 self.gzip = None
384 - def filter_environ(self, environ):
385 """BaseWSGIFilter interface 386 @type environ: {str: str} 387 """ 388 assert isinstance(environ, dict) 389 if "HTTP_ACCEPT_ENCODING" in environ: 390 acceptenc = environ["HTTP_ACCEPT_ENCODING"].split(',') 391 acceptenc = map(str.strip, acceptenc) 392 if "gzip" in acceptenc: 393 self.compress = True 394 self.sio = io.BytesIO() 395 self.gzip = gzip.GzipFile(fileobj=self.sio, mode="w") 396 return environ
397 - def filter_header(self, headername, headervalue):
398 """ BaseWSGIFilter interface 399 @type headername: str 400 @type headervalue: str 401 @rtype: (str, str) or None 402 """ 403 if self.compress: 404 if headername.lower() == "content-length": 405 return None 406 return (headername, headervalue)
407 - def filter_headers(self, headers):
408 """BaseWSGIFilter interface 409 @type headers: [(str, str)] 410 @rtype: [(str, str)] 411 """ 412 assert isinstance(headers, list) 413 if self.compress: 414 headers.append(("Content-encoding", "gzip")) 415 return headers
416 - def filter_data(self, data):
417 if not self.compress: 418 return data 419 self.gzip.write(data) 420 if self.flush: 421 self.gzip.flush() 422 data = self.sio.getvalue() 423 self.sio.truncate(0) 424 self.sio.seek(0) 425 return data
426 - def append_data(self):
427 if not self.compress: 428 return [] 429 self.gzip.close() 430 data = self.sio.getvalue() 431 return [data]
432
433 -class ReusableWSGIInputFilter(BaseWSGIFilter):
434 """Make C{environ["wsgi.input"]} readable multiple times. Although this is 435 not required by the standard it is sometimes desirable to read C{wsgi.input} 436 multiple times. This filter will therefore replace that variable with a 437 C{BytesIO} instance which provides a C{seek} method. 438 """ 439 @classmethod
440 - def creator(cls, maxrequestsize):
441 """ 442 Returns a function creating L{ReusableWSGIInputFilter}s with desired 443 maxrequestsize being set. If there is more data than maxrequestsize is 444 available in C{wsgi.input} the rest will be ignored. (It is up to the 445 adapter to eat this data.) 446 @type maxrequestsize: int 447 @param maxrequestsize: is the maximum number of bytes to store in the 448 C{BytesIO} 449 """ 450 return lambda:cls(maxrequestsize)
451 - def __init__(self, maxrequestsize=65536):
452 """ReusableWSGIInputFilters constructor. 453 @type maxrequestsize: int 454 @param maxrequestsize: is the maximum number of bytes to store in the 455 C{BytesIO}, see L{creator} 456 """ 457 BaseWSGIFilter.__init__(self) 458 self.maxrequestsize = maxrequestsize
459
460 - def filter_environ(self, environ):
461 """BaseWSGIFilter interface 462 @type environ: {str: str} 463 """ 464 465 if isinstance(environ["wsgi.input"], io.BytesIO): 466 return environ # nothing to be done 467 468 # XXX: is this really a good idea? use with care 469 environ["wsgitools.oldinput"] = environ["wsgi.input"] 470 data = io.BytesIO(environ["wsgi.input"].read(self.maxrequestsize)) 471 environ["wsgi.input"] = data 472 473 return environ
474