| 1 | import unittest |
|---|
| 2 | import weakref |
|---|
| 3 | |
|---|
| 4 | from scrapy.http import Response, TextResponse, HtmlResponse, XmlResponse, Headers |
|---|
| 5 | from scrapy.conf import settings |
|---|
| 6 | |
|---|
| 7 | |
|---|
| 8 | class BaseResponseTest(unittest.TestCase): |
|---|
| 9 | |
|---|
| 10 | response_class = Response |
|---|
| 11 | |
|---|
| 12 | def test_init(self): |
|---|
| 13 | # Response requires url in the consturctor |
|---|
| 14 | self.assertRaises(Exception, self.response_class) |
|---|
| 15 | self.assertTrue(isinstance(self.response_class('http://example.com/'), self.response_class)) |
|---|
| 16 | # body can be str or None |
|---|
| 17 | self.assertTrue(isinstance(self.response_class('http://example.com/', body=''), self.response_class)) |
|---|
| 18 | self.assertTrue(isinstance(self.response_class('http://example.com/', body='body'), self.response_class)) |
|---|
| 19 | # test presence of all optional parameters |
|---|
| 20 | self.assertTrue(isinstance(self.response_class('http://example.com/', headers={}, status=200, body=''), self.response_class)) |
|---|
| 21 | |
|---|
| 22 | r = self.response_class("http://www.example.com") |
|---|
| 23 | assert isinstance(r.url, str) |
|---|
| 24 | self.assertEqual(r.url, "http://www.example.com") |
|---|
| 25 | self.assertEqual(r.status, 200) |
|---|
| 26 | |
|---|
| 27 | assert isinstance(r.headers, Headers) |
|---|
| 28 | self.assertEqual(r.headers, {}) |
|---|
| 29 | self.assertEqual(r.meta, {}) |
|---|
| 30 | |
|---|
| 31 | meta = {"lala": "lolo"} |
|---|
| 32 | headers = {"caca": "coco"} |
|---|
| 33 | body = "a body" |
|---|
| 34 | r = self.response_class("http://www.example.com", meta=meta, headers=headers, body=body) |
|---|
| 35 | |
|---|
| 36 | assert r.meta is not meta |
|---|
| 37 | self.assertEqual(r.meta, meta) |
|---|
| 38 | assert r.headers is not headers |
|---|
| 39 | self.assertEqual(r.headers["caca"], "coco") |
|---|
| 40 | |
|---|
| 41 | r = self.response_class("http://www.example.com", status=301) |
|---|
| 42 | self.assertEqual(r.status, 301) |
|---|
| 43 | r = self.response_class("http://www.example.com", status='301') |
|---|
| 44 | self.assertEqual(r.status, 301) |
|---|
| 45 | self.assertRaises(ValueError, self.response_class, "http://example.com", status='lala200') |
|---|
| 46 | |
|---|
| 47 | def test_copy(self): |
|---|
| 48 | """Test Response copy""" |
|---|
| 49 | |
|---|
| 50 | r1 = self.response_class("http://www.example.com", body="Some body") |
|---|
| 51 | r1.meta['foo'] = 'bar' |
|---|
| 52 | r1.flags.append('cached') |
|---|
| 53 | r2 = r1.copy() |
|---|
| 54 | |
|---|
| 55 | self.assertEqual(r1.status, r2.status) |
|---|
| 56 | self.assertEqual(r1.body, r2.body) |
|---|
| 57 | |
|---|
| 58 | # make sure meta dict is shallow copied |
|---|
| 59 | assert r1.meta is not r2.meta, "meta must be a shallow copy, not identical" |
|---|
| 60 | self.assertEqual(r1.meta, r2.meta) |
|---|
| 61 | |
|---|
| 62 | # make sure flags list is shallow copied |
|---|
| 63 | assert r1.flags is not r2.flags, "flags must be a shallow copy, not identical" |
|---|
| 64 | self.assertEqual(r1.flags, r2.flags) |
|---|
| 65 | |
|---|
| 66 | # make sure headers attribute is shallow copied |
|---|
| 67 | assert r1.headers is not r2.headers, "headers must be a shallow copy, not identical" |
|---|
| 68 | self.assertEqual(r1.headers, r2.headers) |
|---|
| 69 | |
|---|
| 70 | def test_copy_inherited_classes(self): |
|---|
| 71 | """Test Response children copies preserve their class""" |
|---|
| 72 | |
|---|
| 73 | class CustomResponse(self.response_class): |
|---|
| 74 | pass |
|---|
| 75 | |
|---|
| 76 | r1 = CustomResponse('http://www.example.com') |
|---|
| 77 | r2 = r1.copy() |
|---|
| 78 | |
|---|
| 79 | assert type(r2) is CustomResponse |
|---|
| 80 | |
|---|
| 81 | def test_replace(self): |
|---|
| 82 | """Test Response.replace() method""" |
|---|
| 83 | hdrs = Headers({"key": "value"}) |
|---|
| 84 | r1 = self.response_class("http://www.example.com") |
|---|
| 85 | r2 = r1.replace(status=301, body="New body", headers=hdrs) |
|---|
| 86 | assert r1.body == '' |
|---|
| 87 | self.assertEqual(r1.url, r2.url) |
|---|
| 88 | self.assertEqual((r1.status, r2.status), (200, 301)) |
|---|
| 89 | self.assertEqual((r1.body, r2.body), ('', "New body")) |
|---|
| 90 | self.assertEqual((r1.headers, r2.headers), ({}, hdrs)) |
|---|
| 91 | |
|---|
| 92 | # Empty attributes (which may fail if not compared properly) |
|---|
| 93 | r3 = self.response_class("http://www.example.com", meta={'a': 1}, flags=['cached']) |
|---|
| 94 | r4 = r3.replace(body='', meta={}, flags=[]) |
|---|
| 95 | self.assertEqual(r4.body, '') |
|---|
| 96 | self.assertEqual(r4.meta, {}) |
|---|
| 97 | self.assertEqual(r4.flags, []) |
|---|
| 98 | |
|---|
| 99 | def test_weakref_slots(self): |
|---|
| 100 | """Check that classes are using slots and are weak-referenceable""" |
|---|
| 101 | x = self.response_class('http://www.example.com') |
|---|
| 102 | weakref.ref(x) |
|---|
| 103 | assert not hasattr(x, '__dict__'), "%s does not use __slots__" % \ |
|---|
| 104 | x.__class__.__name__ |
|---|
| 105 | |
|---|
| 106 | def _assert_response_values(self, response, encoding, body): |
|---|
| 107 | if isinstance(body, unicode): |
|---|
| 108 | body_unicode = body |
|---|
| 109 | body_str = body.encode(encoding) |
|---|
| 110 | else: |
|---|
| 111 | body_unicode = body.decode(encoding) |
|---|
| 112 | body_str = body |
|---|
| 113 | |
|---|
| 114 | assert isinstance(response.body, str) |
|---|
| 115 | self.assertEqual(response.encoding, encoding) |
|---|
| 116 | self.assertEqual(response.body, body_str) |
|---|
| 117 | self.assertEqual(response.body_as_unicode(), body_unicode) |
|---|
| 118 | |
|---|
| 119 | class ResponseText(BaseResponseTest): |
|---|
| 120 | |
|---|
| 121 | def test_no_unicode_url(self): |
|---|
| 122 | self.assertRaises(TypeError, self.response_class, u'http://www.example.com') |
|---|
| 123 | |
|---|
| 124 | |
|---|
| 125 | class TextResponseTest(BaseResponseTest): |
|---|
| 126 | |
|---|
| 127 | response_class = TextResponse |
|---|
| 128 | |
|---|
| 129 | def test_replace(self): |
|---|
| 130 | super(TextResponseTest, self).test_replace() |
|---|
| 131 | r1 = self.response_class("http://www.example.com", body="hello", encoding="cp852") |
|---|
| 132 | r2 = r1.replace(url="http://www.example.com/other") |
|---|
| 133 | r3 = r1.replace(url="http://www.example.com/other", encoding="latin1") |
|---|
| 134 | |
|---|
| 135 | assert isinstance(r2, self.response_class) |
|---|
| 136 | self.assertEqual(r2.url, "http://www.example.com/other") |
|---|
| 137 | self.assertEqual(r2.encoding, "cp852") |
|---|
| 138 | self.assertEqual(r3.url, "http://www.example.com/other") |
|---|
| 139 | self.assertEqual(r3.encoding, "latin1") |
|---|
| 140 | |
|---|
| 141 | def test_unicode_url(self): |
|---|
| 142 | # instantiate with unicode url without encoding (should set default encoding) |
|---|
| 143 | resp = self.response_class(u"http://www.example.com/") |
|---|
| 144 | self.assertEqual(resp.encoding, settings['DEFAULT_RESPONSE_ENCODING']) |
|---|
| 145 | |
|---|
| 146 | # make sure urls are converted to str |
|---|
| 147 | resp = self.response_class(url=u"http://www.example.com/", encoding='utf-8') |
|---|
| 148 | assert isinstance(resp.url, str) |
|---|
| 149 | |
|---|
| 150 | resp = self.response_class(url=u"http://www.example.com/price/\xa3", encoding='utf-8') |
|---|
| 151 | self.assertEqual(resp.url, 'http://www.example.com/price/\xc2\xa3') |
|---|
| 152 | resp = self.response_class(url=u"http://www.example.com/price/\xa3", encoding='latin-1') |
|---|
| 153 | self.assertEqual(resp.url, 'http://www.example.com/price/\xa3') |
|---|
| 154 | resp = self.response_class(url="http://www.example.com/price/", encoding='utf-8') |
|---|
| 155 | resp.url = u'http://www.example.com/price/\xa3' |
|---|
| 156 | self.assertEqual(resp.url, 'http://www.example.com/price/\xc2\xa3') |
|---|
| 157 | resp = self.response_class(u"http://www.example.com/price/\xa3", headers={"Content-type": ["text/html; charset=utf-8"]}) |
|---|
| 158 | self.assertEqual(resp.url, 'http://www.example.com/price/\xc2\xa3') |
|---|
| 159 | resp = self.response_class(u"http://www.example.com/price/\xa3", headers={"Content-type": ["text/html; charset=iso-8859-1"]}) |
|---|
| 160 | self.assertEqual(resp.url, 'http://www.example.com/price/\xa3') |
|---|
| 161 | |
|---|
| 162 | def test_unicode_body(self): |
|---|
| 163 | unicode_string = u'\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0447\u0435\u0441\u043a\u0438\u0439 \u0442\u0435\u043a\u0441\u0442' |
|---|
| 164 | self.assertRaises(TypeError, self.response_class, 'http://www.example.com', body=u'unicode body') |
|---|
| 165 | |
|---|
| 166 | original_string = unicode_string.encode('cp1251') |
|---|
| 167 | r1 = self.response_class('http://www.example.com', body=original_string, encoding='cp1251') |
|---|
| 168 | |
|---|
| 169 | # check body_as_unicode |
|---|
| 170 | self.assertTrue(isinstance(r1.body_as_unicode(), unicode)) |
|---|
| 171 | self.assertEqual(r1.body_as_unicode(), unicode_string) |
|---|
| 172 | |
|---|
| 173 | def test_encoding(self): |
|---|
| 174 | r1 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=utf-8"]}, body="\xc2\xa3") |
|---|
| 175 | r2 = self.response_class("http://www.example.com", encoding='utf-8', body=u"\xa3") |
|---|
| 176 | r3 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=iso-8859-1"]}, body="\xa3") |
|---|
| 177 | r4 = self.response_class("http://www.example.com", body="\xa2\xa3") |
|---|
| 178 | |
|---|
| 179 | self.assertEqual(r1.headers_encoding(), "utf-8") |
|---|
| 180 | self.assertEqual(r2.headers_encoding(), None) |
|---|
| 181 | self.assertEqual(r2.encoding, 'utf-8') |
|---|
| 182 | self.assertEqual(r3.headers_encoding(), "iso-8859-1") |
|---|
| 183 | self.assertEqual(r3.encoding, 'iso-8859-1') |
|---|
| 184 | self.assertEqual(r4.headers_encoding(), None) |
|---|
| 185 | assert r4.body_encoding() is not None and r4.body_encoding() != 'ascii' |
|---|
| 186 | self._assert_response_values(r1, 'utf-8', u"\xa3") |
|---|
| 187 | self._assert_response_values(r2, 'utf-8', u"\xa3") |
|---|
| 188 | self._assert_response_values(r3, 'iso-8859-1', u"\xa3") |
|---|
| 189 | |
|---|
| 190 | # TextResponse (and subclasses) must be passed a encoding when instantiating with unicode bodies |
|---|
| 191 | self.assertRaises(TypeError, self.response_class, "http://www.example.com", body=u"\xa3") |
|---|
| 192 | |
|---|
| 193 | class HtmlResponseTest(TextResponseTest): |
|---|
| 194 | |
|---|
| 195 | response_class = HtmlResponse |
|---|
| 196 | |
|---|
| 197 | def test_html_encoding(self): |
|---|
| 198 | |
|---|
| 199 | body = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> |
|---|
| 200 | </head><body>Price: \xa3100</body></html>' |
|---|
| 201 | """ |
|---|
| 202 | r1 = self.response_class("http://www.example.com", body=body) |
|---|
| 203 | self._assert_response_values(r1, 'iso-8859-1', body) |
|---|
| 204 | |
|---|
| 205 | body = """<?xml version="1.0" encoding="iso-8859-1"?> |
|---|
| 206 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> |
|---|
| 207 | Price: \xa3100 |
|---|
| 208 | """ |
|---|
| 209 | r2 = self.response_class("http://www.example.com", body=body) |
|---|
| 210 | self._assert_response_values(r2, 'iso-8859-1', body) |
|---|
| 211 | |
|---|
| 212 | # for conflicting declarations headers must take precedence |
|---|
| 213 | body = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=utf-8"> |
|---|
| 214 | </head><body>Price: \xa3100</body></html>' |
|---|
| 215 | """ |
|---|
| 216 | r3 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=iso-8859-1"]}, body=body) |
|---|
| 217 | self._assert_response_values(r3, 'iso-8859-1', body) |
|---|
| 218 | |
|---|
| 219 | # make sure replace() preserves the encoding of the original response |
|---|
| 220 | body = "New body \xa3" |
|---|
| 221 | r4 = r3.replace(body=body) |
|---|
| 222 | self._assert_response_values(r4, 'iso-8859-1', body) |
|---|
| 223 | |
|---|
| 224 | |
|---|
| 225 | |
|---|
| 226 | class XmlResponseTest(TextResponseTest): |
|---|
| 227 | |
|---|
| 228 | response_class = XmlResponse |
|---|
| 229 | |
|---|
| 230 | def test_xml_encoding(self): |
|---|
| 231 | |
|---|
| 232 | body = "<xml></xml>" |
|---|
| 233 | r1 = self.response_class("http://www.example.com", body=body) |
|---|
| 234 | self._assert_response_values(r1, settings['DEFAULT_RESPONSE_ENCODING'], body) |
|---|
| 235 | |
|---|
| 236 | body = """<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>""" |
|---|
| 237 | r2 = self.response_class("http://www.example.com", body=body) |
|---|
| 238 | self._assert_response_values(r2, 'iso-8859-1', body) |
|---|
| 239 | |
|---|
| 240 | # make sure replace() preserves the explicit encoding passed in the constructor |
|---|
| 241 | body = """<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>""" |
|---|
| 242 | r3 = self.response_class("http://www.example.com", body=body, encoding='utf-8') |
|---|
| 243 | body2 = "New body" |
|---|
| 244 | r4 = r3.replace(body=body2) |
|---|
| 245 | self._assert_response_values(r4, 'utf-8', body2) |
|---|
| 246 | |
|---|
| 247 | # make sure replace() rediscovers the encoding (if not given explicitly) when changing the body |
|---|
| 248 | body = """<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>""" |
|---|
| 249 | r5 = self.response_class("http://www.example.com", body=body) |
|---|
| 250 | body2 = """<?xml version="1.0" encoding="utf-8"?><xml></xml>""" |
|---|
| 251 | r6 = r5.replace(body=body2) |
|---|
| 252 | self._assert_response_values(r5, 'iso-8859-1', body) |
|---|
| 253 | self._assert_response_values(r6, 'utf-8', body2) |
|---|
| 254 | |
|---|
| 255 | |
|---|
| 256 | if __name__ == "__main__": |
|---|
| 257 | unittest.main() |
|---|