root/scrapy/tests/test_http_response.py @ 1809:ad834af83918

Revision 1809:ad834af83918, 11.3 kB (checked in by Pablo Hoffman <pablo@…>, 11 months ago)

added DEFAULT_RESPONSE_ENCODING setting

Line 
1import unittest
2import weakref
3
4from scrapy.http import Response, TextResponse, HtmlResponse, XmlResponse, Headers
5from scrapy.conf import settings
6
7
8class BaseResponseTest(unittest.TestCase):
9
10    response_class = Response
11
12    def test_init(self):
13        # Response requires url in the consturctor
14        self.assertRaises(Exception, self.response_class)
15        self.assertTrue(isinstance(self.response_class('http://example.com/'), self.response_class))
16        # body can be str or None
17        self.assertTrue(isinstance(self.response_class('http://example.com/', body=''), self.response_class))
18        self.assertTrue(isinstance(self.response_class('http://example.com/', body='body'), self.response_class))
19        # test presence of all optional parameters
20        self.assertTrue(isinstance(self.response_class('http://example.com/', headers={}, status=200, body=''), self.response_class))
21
22        r = self.response_class("http://www.example.com")
23        assert isinstance(r.url, str)
24        self.assertEqual(r.url, "http://www.example.com")
25        self.assertEqual(r.status, 200)
26
27        assert isinstance(r.headers, Headers)
28        self.assertEqual(r.headers, {})
29        self.assertEqual(r.meta, {})
30
31        meta = {"lala": "lolo"}
32        headers = {"caca": "coco"}
33        body = "a body"
34        r = self.response_class("http://www.example.com", meta=meta, headers=headers, body=body)
35
36        assert r.meta is not meta
37        self.assertEqual(r.meta, meta)
38        assert r.headers is not headers
39        self.assertEqual(r.headers["caca"], "coco")
40
41        r = self.response_class("http://www.example.com", status=301)
42        self.assertEqual(r.status, 301)
43        r = self.response_class("http://www.example.com", status='301')
44        self.assertEqual(r.status, 301)
45        self.assertRaises(ValueError, self.response_class, "http://example.com", status='lala200')
46
47    def test_copy(self):
48        """Test Response copy"""
49
50        r1 = self.response_class("http://www.example.com", body="Some body")
51        r1.meta['foo'] = 'bar'
52        r1.flags.append('cached')
53        r2 = r1.copy()
54
55        self.assertEqual(r1.status, r2.status)
56        self.assertEqual(r1.body, r2.body)
57
58        # make sure meta dict is shallow copied
59        assert r1.meta is not r2.meta, "meta must be a shallow copy, not identical"
60        self.assertEqual(r1.meta, r2.meta)
61
62        # make sure flags list is shallow copied
63        assert r1.flags is not r2.flags, "flags must be a shallow copy, not identical"
64        self.assertEqual(r1.flags, r2.flags)
65
66        # make sure headers attribute is shallow copied
67        assert r1.headers is not r2.headers, "headers must be a shallow copy, not identical"
68        self.assertEqual(r1.headers, r2.headers)
69
70    def test_copy_inherited_classes(self):
71        """Test Response children copies preserve their class"""
72
73        class CustomResponse(self.response_class):
74            pass
75
76        r1 = CustomResponse('http://www.example.com')
77        r2 = r1.copy()
78
79        assert type(r2) is CustomResponse
80
81    def test_replace(self):
82        """Test Response.replace() method"""
83        hdrs = Headers({"key": "value"})
84        r1 = self.response_class("http://www.example.com")
85        r2 = r1.replace(status=301, body="New body", headers=hdrs)
86        assert r1.body == ''
87        self.assertEqual(r1.url, r2.url)
88        self.assertEqual((r1.status, r2.status), (200, 301))
89        self.assertEqual((r1.body, r2.body), ('', "New body"))
90        self.assertEqual((r1.headers, r2.headers), ({}, hdrs))
91
92        # Empty attributes (which may fail if not compared properly)
93        r3 = self.response_class("http://www.example.com", meta={'a': 1}, flags=['cached'])
94        r4 = r3.replace(body='', meta={}, flags=[])
95        self.assertEqual(r4.body, '')
96        self.assertEqual(r4.meta, {})
97        self.assertEqual(r4.flags, [])
98
99    def test_weakref_slots(self):
100        """Check that classes are using slots and are weak-referenceable"""
101        x = self.response_class('http://www.example.com')
102        weakref.ref(x)
103        assert not hasattr(x, '__dict__'), "%s does not use __slots__" % \
104            x.__class__.__name__
105
106    def _assert_response_values(self, response, encoding, body):
107        if isinstance(body, unicode):
108            body_unicode = body
109            body_str = body.encode(encoding)
110        else:
111            body_unicode = body.decode(encoding)
112            body_str = body
113
114        assert isinstance(response.body, str)
115        self.assertEqual(response.encoding, encoding)
116        self.assertEqual(response.body, body_str)
117        self.assertEqual(response.body_as_unicode(), body_unicode)
118
119class ResponseText(BaseResponseTest):
120
121    def test_no_unicode_url(self):
122        self.assertRaises(TypeError, self.response_class, u'http://www.example.com')
123   
124
125class TextResponseTest(BaseResponseTest):
126
127    response_class = TextResponse
128
129    def test_replace(self):
130        super(TextResponseTest, self).test_replace()
131        r1 = self.response_class("http://www.example.com", body="hello", encoding="cp852")
132        r2 = r1.replace(url="http://www.example.com/other")
133        r3 = r1.replace(url="http://www.example.com/other", encoding="latin1")
134
135        assert isinstance(r2, self.response_class)
136        self.assertEqual(r2.url, "http://www.example.com/other")
137        self.assertEqual(r2.encoding, "cp852")
138        self.assertEqual(r3.url, "http://www.example.com/other")
139        self.assertEqual(r3.encoding, "latin1")
140
141    def test_unicode_url(self):
142        # instantiate with unicode url without encoding (should set default encoding)
143        resp = self.response_class(u"http://www.example.com/")
144        self.assertEqual(resp.encoding, settings['DEFAULT_RESPONSE_ENCODING'])
145
146        # make sure urls are converted to str
147        resp = self.response_class(url=u"http://www.example.com/", encoding='utf-8')
148        assert isinstance(resp.url, str)
149
150        resp = self.response_class(url=u"http://www.example.com/price/\xa3", encoding='utf-8')
151        self.assertEqual(resp.url, 'http://www.example.com/price/\xc2\xa3')
152        resp = self.response_class(url=u"http://www.example.com/price/\xa3", encoding='latin-1')
153        self.assertEqual(resp.url, 'http://www.example.com/price/\xa3')
154        resp = self.response_class(url="http://www.example.com/price/", encoding='utf-8')
155        resp.url = u'http://www.example.com/price/\xa3'
156        self.assertEqual(resp.url, 'http://www.example.com/price/\xc2\xa3')
157        resp = self.response_class(u"http://www.example.com/price/\xa3", headers={"Content-type": ["text/html; charset=utf-8"]})
158        self.assertEqual(resp.url, 'http://www.example.com/price/\xc2\xa3')
159        resp = self.response_class(u"http://www.example.com/price/\xa3", headers={"Content-type": ["text/html; charset=iso-8859-1"]})
160        self.assertEqual(resp.url, 'http://www.example.com/price/\xa3')
161
162    def test_unicode_body(self):
163        unicode_string = u'\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0447\u0435\u0441\u043a\u0438\u0439 \u0442\u0435\u043a\u0441\u0442'
164        self.assertRaises(TypeError, self.response_class, 'http://www.example.com', body=u'unicode body')
165
166        original_string = unicode_string.encode('cp1251')
167        r1 = self.response_class('http://www.example.com', body=original_string, encoding='cp1251')
168
169        # check body_as_unicode
170        self.assertTrue(isinstance(r1.body_as_unicode(), unicode))
171        self.assertEqual(r1.body_as_unicode(), unicode_string)
172
173    def test_encoding(self):
174        r1 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=utf-8"]}, body="\xc2\xa3")
175        r2 = self.response_class("http://www.example.com", encoding='utf-8', body=u"\xa3")
176        r3 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=iso-8859-1"]}, body="\xa3")
177        r4 = self.response_class("http://www.example.com", body="\xa2\xa3")
178
179        self.assertEqual(r1.headers_encoding(), "utf-8")
180        self.assertEqual(r2.headers_encoding(), None)
181        self.assertEqual(r2.encoding, 'utf-8')
182        self.assertEqual(r3.headers_encoding(), "iso-8859-1")
183        self.assertEqual(r3.encoding, 'iso-8859-1')
184        self.assertEqual(r4.headers_encoding(), None)
185        assert r4.body_encoding() is not None and r4.body_encoding() != 'ascii'
186        self._assert_response_values(r1, 'utf-8', u"\xa3")
187        self._assert_response_values(r2, 'utf-8', u"\xa3")
188        self._assert_response_values(r3, 'iso-8859-1', u"\xa3")
189
190        # TextResponse (and subclasses) must be passed a encoding when instantiating with unicode bodies
191        self.assertRaises(TypeError, self.response_class, "http://www.example.com", body=u"\xa3")
192
193class HtmlResponseTest(TextResponseTest):
194
195    response_class = HtmlResponse
196
197    def test_html_encoding(self):
198       
199        body = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
200        </head><body>Price: \xa3100</body></html>'
201        """
202        r1 = self.response_class("http://www.example.com", body=body)
203        self._assert_response_values(r1, 'iso-8859-1', body)
204
205        body = """<?xml version="1.0" encoding="iso-8859-1"?>
206        <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
207        Price: \xa3100
208        """
209        r2 = self.response_class("http://www.example.com", body=body)
210        self._assert_response_values(r2, 'iso-8859-1', body)
211
212        # for conflicting declarations headers must take precedence
213        body = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=utf-8">
214        </head><body>Price: \xa3100</body></html>'
215        """
216        r3 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=iso-8859-1"]}, body=body)
217        self._assert_response_values(r3, 'iso-8859-1', body)
218
219        # make sure replace() preserves the encoding of the original response
220        body = "New body \xa3"
221        r4 = r3.replace(body=body)
222        self._assert_response_values(r4, 'iso-8859-1', body)
223
224
225
226class XmlResponseTest(TextResponseTest):
227
228    response_class = XmlResponse
229
230    def test_xml_encoding(self):
231
232        body = "<xml></xml>"
233        r1 = self.response_class("http://www.example.com", body=body)
234        self._assert_response_values(r1, settings['DEFAULT_RESPONSE_ENCODING'], body)
235
236        body = """<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
237        r2 = self.response_class("http://www.example.com", body=body)
238        self._assert_response_values(r2, 'iso-8859-1', body)
239
240        # make sure replace() preserves the explicit encoding passed in the constructor
241        body = """<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
242        r3 = self.response_class("http://www.example.com", body=body, encoding='utf-8')
243        body2 = "New body"
244        r4 = r3.replace(body=body2)
245        self._assert_response_values(r4, 'utf-8', body2)
246
247        # make sure replace() rediscovers the encoding (if not given explicitly) when changing the body
248        body = """<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
249        r5 = self.response_class("http://www.example.com", body=body)
250        body2 = """<?xml version="1.0" encoding="utf-8"?><xml></xml>"""
251        r6 = r5.replace(body=body2)
252        self._assert_response_values(r5, 'iso-8859-1', body)
253        self._assert_response_values(r6, 'utf-8', body2)
254
255
256if __name__ == "__main__":
257    unittest.main()
Note: See TracBrowser for help on using the browser.