forked from twisted/treq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
content.py
125 lines (88 loc) · 3.58 KB
/
content.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import cgi
import json
from twisted.internet.defer import Deferred, succeed
from twisted.internet.protocol import Protocol
from twisted.web.client import ResponseDone
from twisted.web.http import PotentialDataLoss
def _encoding_from_headers(headers):
content_types = headers.getRawHeaders(u'content-type')
if content_types is None:
return None
# This seems to be the choice browsers make when encountering multiple
# content-type headers.
content_type, params = cgi.parse_header(content_types[-1])
if 'charset' in params:
return params.get('charset').strip("'\"")
if content_type == 'application/json':
return 'UTF-8'
class _BodyCollector(Protocol):
def __init__(self, finished, collector):
self.finished = finished
self.collector = collector
def dataReceived(self, data):
self.collector(data)
def connectionLost(self, reason):
if reason.check(ResponseDone):
self.finished.callback(None)
elif reason.check(PotentialDataLoss):
# http://twistedmatrix.com/trac/ticket/4840
self.finished.callback(None)
else:
self.finished.errback(reason)
def collect(response, collector):
"""
Incrementally collect the body of the response.
This function may only be called **once** for a given response.
:param IResponse response: The HTTP response to collect the body from.
:param collector: A callable to be called each time data is available
from the response body.
:type collector: single argument callable
:rtype: Deferred that fires with None when the entire body has been read.
"""
if response.length == 0:
return succeed(None)
d = Deferred()
response.deliverBody(_BodyCollector(d, collector))
return d
def content(response):
"""
Read the contents of an HTTP response.
This function may be called multiple times for a response, it uses a
``WeakKeyDictionary`` to cache the contents of the response.
:param IResponse response: The HTTP Response to get the contents of.
:rtype: Deferred that fires with the content as a str.
"""
_content = []
d = collect(response, _content.append)
d.addCallback(lambda _: b''.join(_content))
return d
def json_content(response, **kwargs):
"""
Read the contents of an HTTP response and attempt to decode it as JSON.
This function relies on :py:func:`content` and so may be called more than
once for a given response.
:param IResponse response: The HTTP Response to get the contents of.
:param kwargs: Any keyword arguments accepted by :py:func:`json.loads`
:rtype: Deferred that fires with the decoded JSON.
"""
# RFC7159 (8.1): Default JSON character encoding is UTF-8
d = text_content(response, encoding='utf-8')
d.addCallback(lambda text: json.loads(text, **kwargs))
return d
def text_content(response, encoding='ISO-8859-1'):
"""
Read the contents of an HTTP response and decode it with an appropriate
charset, which may be guessed from the ``Content-Type`` header.
:param IResponse response: The HTTP Response to get the contents of.
:param str encoding: A charset, such as ``UTF-8`` or ``ISO-8859-1``,
used if the response does not specify an encoding.
:rtype: Deferred that fires with a unicode string.
"""
def _decode_content(c):
e = _encoding_from_headers(response.headers)
if e is not None:
return c.decode(e)
return c.decode(encoding)
d = content(response)
d.addCallback(_decode_content)
return d