1 """
2 This module contains a generic way to create middelwares that filter data.
3 The work is mainly done by the L{WSGIFilterMiddleware} class. One can write
4 filters by extending the L{BaseWSGIFilter} class and passing this class
5 (not an instance) to the L{WSGIFilterMiddleware} constructor.
6 """
7
8 __all__ = []
9
10 import sys
11 import time
12 import gzip
13 import io
14
15 from wsgitools.internal import str2bytes
16
17 __all__.append("CloseableIterator")
19 """Concatenating iterator with close attribute."""
20 - def __init__(self, close_function, *iterators):
21 """If close_function is not C{None}, it will be the C{close} attribute
22 of the created iterator object. Further parameters specify iterators
23 that are to be concatenated.
24 @type close_function: a function or C{None}
25 """
26 if close_function is not None:
27 self.close = close_function
28 self.iterators = list(map(iter, iterators))
30 """iterator interface
31 @rtype: gen()
32 """
33 return self
35 """iterator interface"""
36 if not self.iterators:
37 raise StopIteration
38 try:
39 return next(self.iterators[0])
40 except StopIteration:
41 self.iterators.pop(0)
42 return next(self)
45
46 __all__.append("CloseableList")
48 """A list with a close attribute."""
49 - def __init__(self, close_function, *args):
50 """If close_function is not C{None}, it will be the C{close} attribute
51 of the created list object. Other parameters are passed to the list
52 constructor.
53 @type close_function: a function or C{None}
54 """
55 if close_function is not None:
56 self.close = close_function
57 list.__init__(self, *args)
62
63 __all__.append("BaseWSGIFilter")
65 """Generic WSGI filter class to be used with L{WSGIFilterMiddleware}.
66
67 For each request a filter object gets created.
68 The environment is then passed through L{filter_environ}.
69 Possible exceptions are filtered by L{filter_exc_info}.
70 After that for each C{(header, value)} tuple L{filter_header} is used.
71 The resulting list is filtered through L{filter_headers}.
72 Any data is filtered through L{filter_data}.
73 In order to possibly append data the L{append_data} method is invoked.
74 When the request has finished L{handle_close} is invoked.
75
76 All methods do not modify the passed data by default. Passing the
77 L{BaseWSGIFilter} class to a L{WSGIFilterMiddleware} will result in not
78 modifying requests at all.
79 """
81 """This constructor does nothing and can safely be overwritten. It is
82 only listed here to document that it must be callable without additional
83 parameters."""
85 """Receives a dict with the environment passed to the wsgi application
86 and a C{dict} must be returned. The default is to return the same dict.
87 @type environ: {str: str}
88 @rtype: {str: str}
89 """
90 return environ
92 """Receives either C{None} or a tuple passed as third argument to
93 C{start_response} from the wrapped wsgi application. Either C{None} or
94 such a tuple must be returned."""
95 return exc_info
97 """Receives a status string passed as first argument to
98 C{start_response} from the wrapped wsgi application. A valid HTTP status
99 string must be returned.
100 @type status: str
101 @rtype: str
102 """
103 return status
105 """This function is invoked for each C{(headername, headervalue)} tuple
106 in the second argument to the C{start_response} from the wrapped wsgi
107 application. Such a value or C{None} for discarding the header must be
108 returned.
109 @type headername: str
110 @type headervalue: str
111 @rtype: (str, str)
112 """
113 return (headername, headervalue)
115 """A list of headers passed as the second argument to the
116 C{start_response} from the wrapped wsgi application is passed to this
117 function and such a list must also be returned.
118 @type headers: [(str, str)]
119 @rtype: [(str, str)]
120 """
121 return headers
123 """For each string that is either written by the C{write} callable or
124 returned from the wrapped wsgi application this method is invoked. It
125 must return a string.
126 @type data: bytes
127 @rtype: bytes
128 """
129 return data
131 """This function can be used to append data to the response. A list of
132 strings or some kind of iterable yielding strings has to be returned.
133 The default is to return an empty list.
134 @rtype: gen([bytes])
135 """
136 return []
138 """This method is invoked after the request has finished."""
139
140 __all__.append("WSGIFilterMiddleware")
142 """This wsgi middleware can be used with specialized L{BaseWSGIFilter}s to
143 modify wsgi requests and/or reponses."""
145 """
146 @param app: is a wsgi application.
147 @type filterclass: L{BaseWSGIFilter}s subclass
148 @param filterclass: is a subclass of L{BaseWSGIFilter} or some class
149 that implements the interface."""
150 self.app = app
151 self.filterclass = filterclass
152 - def __call__(self, environ, start_response):
153 """wsgi interface
154 @type environ: {str, str}
155 @rtype: gen([bytes])
156 """
157 assert isinstance(environ, dict)
158 reqfilter = self.filterclass()
159 environ = reqfilter.filter_environ(environ)
160
161 def modified_start_response(status, headers, exc_info=None):
162 assert isinstance(status, str)
163 assert isinstance(headers, list)
164 exc_info = reqfilter.filter_exc_info(exc_info)
165 status = reqfilter.filter_status(status)
166 headers = (reqfilter.filter_header(h, v) for h, v in headers)
167 headers = [h for h in headers if h]
168 headers = reqfilter.filter_headers(headers)
169 write = start_response(status, headers, exc_info)
170 def modified_write(data):
171 write(reqfilter.filter_data(data))
172 return modified_write
173
174 ret = self.app(environ, modified_start_response)
175 assert hasattr(ret, "__iter__")
176
177 def modified_close():
178 reqfilter.handle_close()
179 getattr(ret, "close", lambda:0)()
180
181 if isinstance(ret, list):
182 return CloseableList(modified_close,
183 list(map(reqfilter.filter_data, ret))
184 + list(reqfilter.append_data()))
185 ret = iter(ret)
186 def late_append_data():
187 """Invoke C{reqfilter.append_data()} after C{filter_data()} has seen
188 all data."""
189 for data in reqfilter.append_data():
190 yield data
191 return CloseableIterator(modified_close,
192 (reqfilter.filter_data(data) for data in ret),
193 late_append_data())
194
195
196
197
198 -def escape_string(string, replacer=list(map(
199 lambda i: chr(i) if str2bytes(chr(i)).isalnum() or
200 chr(i) in '!#$%&()*+,-./:;<=>?@[]^_`{|}~ ' else
201 r"\x%2.2x" % i,
202 range(256)))):
203 """Encodes non-printable characters in a string using \\xXX escapes.
204
205 @type string: str
206 @rtype: str
207 """
208 return "".join(replacer[ord(char)] for char in string)
209
210 __all__.append("RequestLogWSGIFilter")
212 """This filter logs all requests in the apache log file format."""
213 @classmethod
214 - def creator(cls, log, flush=True):
215 """Returns a function creating L{RequestLogWSGIFilter}s on given log
216 file. log has to be a file-like object.
217 @type log: file-like
218 @param log: elements of type str are written to the log. That means in
219 Py3.X the contents are decoded and in Py2.X the log is assumed
220 to be encoded in latin1. This follows the spirit of WSGI.
221 @type flush: bool
222 @param flush: if True, invoke the flush method on log after each
223 write invocation
224 """
225 return lambda:cls(log, flush)
226 - def __init__(self, log=sys.stdout, flush=True):
227 """
228 @type log: file-like
229 @type flush: bool
230 @param flush: if True, invoke the flush method on log after each
231 write invocation
232 """
233 BaseWSGIFilter.__init__(self)
234 assert hasattr(log, "write")
235 assert hasattr(log, "flush") or not flush
236 self.log = log
237 self.flush = flush
238 self.remote = "?"
239 self.user = "-"
240 self.time = time.strftime("%d/%b/%Y:%T %z")
241 self.reqmethod = ""
242 self.path = ""
243 self.proto = None
244 self.status = ""
245 self.length = 0
246 self.referrer = None
247 self.useragent = None
249 """BaseWSGIFilter interface
250 @type environ: {str: str}
251 @rtype: {str: str}
252 """
253 assert isinstance(environ, dict)
254 self.remote = environ.get("REMOTE_ADDR", self.remote)
255 self.user = environ.get("REMOTE_USER", self.user)
256 self.reqmethod = environ["REQUEST_METHOD"]
257 self.path = environ["SCRIPT_NAME"] + environ["PATH_INFO"]
258 if environ.get("QUERY_STRING"):
259 self.path = "%s?%s" % (self.path, environ["QUERY_STRING"])
260 self.proto = environ.get("SERVER_PROTOCOL", self.proto)
261 self.referrer = environ.get("HTTP_REFERER", self.referrer)
262 self.useragent = environ.get("HTTP_USER_AGENT", self.useragent)
263 return environ
265 """BaseWSGIFilter interface
266 @type status: str
267 @rtype: str
268 """
269 assert isinstance(status, str)
270 self.status = status.split()[0]
271 return status
273 assert isinstance(data, bytes)
274 self.length += len(data)
275 return data
277 """BaseWSGIFilter interface"""
278 line = '%s %s - [%s]' % (self.remote, self.user, self.time)
279 line = '%s "%s %s' % (line, escape_string(self.reqmethod),
280 escape_string(self.path))
281 if self.proto is not None:
282 line = "%s %s" % (line, self.proto)
283 line = '%s" %s %d' % (line, self.status, self.length)
284 if self.referrer is not None:
285 line = '%s "%s"' % (line, escape_string(self.referrer))
286 else:
287 line += " -"
288 if self.useragent is not None:
289 line = '%s "%s"' % (line, escape_string(self.useragent))
290 else:
291 line += " -"
292 self.log.write("%s\n" % line)
293 if self.flush:
294 self.log.flush()
295
296 __all__.append("TimerWSGIFilter")
298 """Replaces a specific string in the data returned from the filtered wsgi
299 application with the time the request took. The string has to be exactly
300 eight bytes long, defaults to C{"?GenTime"} and must be an element of the
301 iterable returned by the filtered application. If the application returns
302 something like C{["spam?GenTime", "?GenTime spam", "?GenTime"]} only the
303 last occurance get's replaced."""
304 @classmethod
306 """Returns a function creating L{TimerWSGIFilter}s with a given pattern
307 beeing a string of exactly eight bytes.
308 @type pattern: bytes
309 """
310 return lambda:cls(pattern)
311 - def __init__(self, pattern=b"?GenTime"):
312 """
313 @type pattern: str
314 """
315 BaseWSGIFilter.__init__(self)
316 assert isinstance(pattern, bytes)
317 self.pattern = pattern
318 self.start = time.time()
320 """BaseWSGIFilter interface
321 @type data: bytes
322 @rtype: bytes
323 """
324 if data == self.pattern:
325 return str2bytes("%8.3g" % (time.time() - self.start))
326 return data
327
328 __all__.append("EncodeWSGIFilter")
330 """Encodes all body data (no headers) with given charset.
331 @note: This violates the wsgi standard as it requires unicode objects
332 whereas wsgi mandates the use of bytes.
333 """
334 @classmethod
336 """Returns a function creating L{EncodeWSGIFilter}s with a given
337 charset.
338 @type charset: str
339 """
340 return lambda:cls(charset)
348 """BaseWSGIFilter interface
349 @type data: str
350 @rtype: bytes
351 """
352 return data.encode(self.charset)
354 """BaseWSGIFilter interface
355 @type header: str
356 @type value: str
357 @rtype: (str, str)
358 """
359 if header.lower() != "content-type":
360 return (header, value)
361 return (header, "%s; charset=%s" % (value, self.charset))
362
363 __all__.append("GzipWSGIFilter")
365 """Compresses content using gzip."""
366 @classmethod
368 """
369 Returns a function creating L{GzipWSGIFilter}s.
370 @type flush: bool
371 @param flush: whether or not the filter should always flush the buffer
372 """
373 return lambda:cls(flush)
375 """
376 @type flush: bool
377 @param flush: whether or not the filter should always flush the buffer
378 """
379 BaseWSGIFilter.__init__(self)
380 self.flush = flush
381 self.compress = False
382 self.sio = None
383 self.gzip = None
385 """BaseWSGIFilter interface
386 @type environ: {str: str}
387 """
388 assert isinstance(environ, dict)
389 if "HTTP_ACCEPT_ENCODING" in environ:
390 acceptenc = environ["HTTP_ACCEPT_ENCODING"].split(',')
391 acceptenc = map(str.strip, acceptenc)
392 if "gzip" in acceptenc:
393 self.compress = True
394 self.sio = io.BytesIO()
395 self.gzip = gzip.GzipFile(fileobj=self.sio, mode="w")
396 return environ
398 """ BaseWSGIFilter interface
399 @type headername: str
400 @type headervalue: str
401 @rtype: (str, str) or None
402 """
403 if self.compress:
404 if headername.lower() == "content-length":
405 return None
406 return (headername, headervalue)
408 """BaseWSGIFilter interface
409 @type headers: [(str, str)]
410 @rtype: [(str, str)]
411 """
412 assert isinstance(headers, list)
413 if self.compress:
414 headers.append(("Content-encoding", "gzip"))
415 return headers
417 if not self.compress:
418 return data
419 self.gzip.write(data)
420 if self.flush:
421 self.gzip.flush()
422 data = self.sio.getvalue()
423 self.sio.truncate(0)
424 self.sio.seek(0)
425 return data
427 if not self.compress:
428 return []
429 self.gzip.close()
430 data = self.sio.getvalue()
431 return [data]
432
474