Changeset 1809:ad834af83918
- Timestamp:
- 10/21/09 16:13:41 (9 months ago)
- Author:
- Pablo Hoffman <pablo@…>
- Branch:
- default
- Message:
-
added DEFAULT_RESPONSE_ENCODING setting
- Files:
-
Legend:
- Unmodified
- Added
- Removed
-
|
r1785
|
r1809
|
|
| 339 | 339 | The default headers used for Scrapy HTTP Requests. They're populated in the |
| 340 | 340 | :class:`~scrapy.contrib.downloadermiddleware.defaultheaders.DefaultHeadersMiddleware`. |
| | 341 | |
| | 342 | .. setting:: DEFAULT_RESPONSE_ENCODING |
| | 343 | |
| | 344 | DEFAULT_RESPONSE_ENCODING |
| | 345 | ------------------------- |
| | 346 | |
| | 347 | Default: ``'ascii'`` |
| | 348 | |
| | 349 | The default encoding to use for :class:`~scrapy.http.TextResponse` objects (and |
| | 350 | subclasses) when no encoding is declared and no encoding could be inferred from |
| | 351 | the body. |
| 341 | 352 | |
| 342 | 353 | .. setting:: DEPTH_LIMIT |
-
|
r1782
|
r1809
|
|
| 37 | 37 | 'Accept-Language': 'en', |
| 38 | 38 | } |
| | 39 | |
| | 40 | DEFAULT_RESPONSE_ENCODING = 'ascii' |
| 39 | 41 | |
| 40 | 42 | DEPTH_LIMIT = 0 |
-
|
r1693
|
r1809
|
|
| 12 | 12 | from scrapy.http.response import Response |
| 13 | 13 | from scrapy.utils.python import memoizemethod_noargs |
| | 14 | from scrapy.conf import settings |
| 14 | 15 | |
| 15 | 16 | class TextResponse(Response): |
| 16 | 17 | |
| | 18 | _DEFAULT_ENCODING = settings['DEFAULT_RESPONSE_ENCODING'] |
| 17 | 19 | _ENCODING_RE = re.compile(r'charset=([\w-]+)', re.I) |
| 18 | 20 | |
| … |
… |
|
| 72 | 74 | dammit = UnicodeDammit(self.body, possible_encodings) |
| 73 | 75 | self._body_inferred_encoding = dammit.originalEncoding |
| | 76 | if self._body_inferred_encoding in ('ascii', None): |
| | 77 | self._body_inferred_encoding = self._DEFAULT_ENCODING |
| 74 | 78 | return dammit.unicode |
| 75 | 79 | |
-
|
r1693
|
r1809
|
|
| 3 | 3 | |
| 4 | 4 | from scrapy.http import Response, TextResponse, HtmlResponse, XmlResponse, Headers |
| | 5 | from scrapy.conf import settings |
| 5 | 6 | |
| 6 | 7 | |
| … |
… |
|
| 139 | 140 | |
| 140 | 141 | def test_unicode_url(self): |
| 141 | | # instantiate with unicode url without encoding |
| 142 | | self.assertRaises(TypeError, self.response_class, u"http://www.example.com/") |
| | 142 | # instantiate with unicode url without encoding (should set default encoding) |
| | 143 | resp = self.response_class(u"http://www.example.com/") |
| | 144 | self.assertEqual(resp.encoding, settings['DEFAULT_RESPONSE_ENCODING']) |
| | 145 | |
| 143 | 146 | # make sure urls are converted to str |
| 144 | 147 | resp = self.response_class(url=u"http://www.example.com/", encoding='utf-8') |
| … |
… |
|
| 188 | 191 | self.assertRaises(TypeError, self.response_class, "http://www.example.com", body=u"\xa3") |
| 189 | 192 | |
| 190 | | |
| 191 | 193 | class HtmlResponseTest(TextResponseTest): |
| 192 | 194 | |
| … |
… |
|
| 230 | 232 | body = "<xml></xml>" |
| 231 | 233 | r1 = self.response_class("http://www.example.com", body=body) |
| 232 | | # XXX: we may want to swtich default XmlResponse encoding to utf-8 |
| 233 | | self._assert_response_values(r1, 'ascii', body) |
| | 234 | self._assert_response_values(r1, settings['DEFAULT_RESPONSE_ENCODING'], body) |
| 234 | 235 | |
| 235 | 236 | body = """<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>""" |