Changeset 1809:ad834af83918

Show
Ignore:
Timestamp:
10/21/09 16:13:41 (9 months ago)
Author:
Pablo Hoffman <pablo@…>
Branch:
default
Message:

added DEFAULT_RESPONSE_ENCODING setting

Files:
4 modified

Legend:

Unmodified
Added
Removed
  • docs/topics/settings.rst

    r1785 r1809  
    339339The default headers used for Scrapy HTTP Requests. They're populated in the 
    340340:class:`~scrapy.contrib.downloadermiddleware.defaultheaders.DefaultHeadersMiddleware`. 
     341 
     342.. setting:: DEFAULT_RESPONSE_ENCODING 
     343 
     344DEFAULT_RESPONSE_ENCODING 
     345------------------------- 
     346 
     347Default: ``'ascii'`` 
     348 
     349The default encoding to use for :class:`~scrapy.http.TextResponse` objects (and 
     350subclasses) when no encoding is declared and no encoding could be inferred from 
     351the body. 
    341352 
    342353.. setting:: DEPTH_LIMIT 
  • scrapy/conf/default_settings.py

    r1782 r1809  
    3737    'Accept-Language': 'en', 
    3838} 
     39 
     40DEFAULT_RESPONSE_ENCODING = 'ascii' 
    3941 
    4042DEPTH_LIMIT = 0 
  • scrapy/http/response/text.py

    r1693 r1809  
    1212from scrapy.http.response import Response 
    1313from scrapy.utils.python import memoizemethod_noargs 
     14from scrapy.conf import settings 
    1415 
    1516class TextResponse(Response): 
    1617 
     18    _DEFAULT_ENCODING = settings['DEFAULT_RESPONSE_ENCODING'] 
    1719    _ENCODING_RE = re.compile(r'charset=([\w-]+)', re.I) 
    1820 
     
    7274        dammit = UnicodeDammit(self.body, possible_encodings) 
    7375        self._body_inferred_encoding = dammit.originalEncoding 
     76        if self._body_inferred_encoding in ('ascii', None): 
     77            self._body_inferred_encoding = self._DEFAULT_ENCODING 
    7478        return dammit.unicode 
    7579 
  • scrapy/tests/test_http_response.py

    r1693 r1809  
    33 
    44from scrapy.http import Response, TextResponse, HtmlResponse, XmlResponse, Headers 
     5from scrapy.conf import settings 
    56 
    67 
     
    139140 
    140141    def test_unicode_url(self): 
    141         # instantiate with unicode url without encoding 
    142         self.assertRaises(TypeError, self.response_class, u"http://www.example.com/") 
     142        # instantiate with unicode url without encoding (should set default encoding) 
     143        resp = self.response_class(u"http://www.example.com/") 
     144        self.assertEqual(resp.encoding, settings['DEFAULT_RESPONSE_ENCODING']) 
     145 
    143146        # make sure urls are converted to str 
    144147        resp = self.response_class(url=u"http://www.example.com/", encoding='utf-8') 
     
    188191        self.assertRaises(TypeError, self.response_class, "http://www.example.com", body=u"\xa3") 
    189192 
    190  
    191193class HtmlResponseTest(TextResponseTest): 
    192194 
     
    230232        body = "<xml></xml>" 
    231233        r1 = self.response_class("http://www.example.com", body=body) 
    232         # XXX: we may want to swtich default XmlResponse encoding to utf-8 
    233         self._assert_response_values(r1, 'ascii', body) 
     234        self._assert_response_values(r1, settings['DEFAULT_RESPONSE_ENCODING'], body) 
    234235 
    235236        body = """<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""