Show
Ignore:
Files:
28 added
7 modified

Legend:

Unmodified
Added
Removed
  • docs/experimental/index.rst

    r1789 r1932  
    2222   djangoitems 
    2323   scheduler-middleware 
     24   crawlspider-v2  
  • docs/intro/tutorial.rst

    r1827 r1928  
    421421Now doing a crawl on the dmoz.org domain yields ``DmozItem``'s:: 
    422422 
    423    [dmoz.org] DEBUG: Scraped DmozItem({'title': [u'Text Processing in Python'], 'link': [u'http://gnosis.cx/TPiP/'], 'desc': [u' - By David Mertz; Addison Wesley. Book in progress, full text, ASCII format. Asks for feedback. [author website, Gnosis Software, Inc.]\n']}) in <http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> 
    424    [dmoz.org] DEBUG: Scraped DmozItem({'title': [u'XML Processing with Python'], 'link': [u'http://www.informit.com/store/product.aspx?isbn=0130211192'], 'desc': [u' - By Sean McGrath; Prentice Hall PTR, 2000, ISBN 0130211192, has CD-ROM. Methods to build XML applications fast, Python tutorial, DOM and SAX, new Pyxie open source XML processing library. [Prentice Hall PTR]\n']}) in <http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> 
     423   [dmoz.org] DEBUG: Scraped DmozItem(desc=[u' - By David Mertz; Addison Wesley. Book in progress, full text, ASCII format. Asks for feedback. [author website, Gnosis Software, Inc.]\n'], link=[u'http://gnosis.cx/TPiP/'], title=[u'Text Processing in Python']) in <http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> 
     424   [dmoz.org] DEBUG: Scraped DmozItem(desc=[u' - By Sean McGrath; Prentice Hall PTR, 2000, ISBN 0130211192, has CD-ROM. Methods to build XML applications fast, Python tutorial, DOM and SAX, new Pyxie open source XML processing library. [Prentice Hall PTR]\n'], link=[u'http://www.informit.com/store/product.aspx?isbn=0130211192'], title=[u'XML Processing with Python']) in <http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> 
    425425 
    426426 
  • docs/topics/item-pipeline.rst

    r1911 r1929  
    9999 
    100100        def process_item(self, spider, item): 
    101             if item.id in self.duplicates[spider]: 
     101            if item['id'] in self.duplicates[spider]: 
    102102                raise DropItem("Duplicate item found: %s" % item) 
    103103            else: 
    104                 self.duplicates[spider].add(item.id) 
     104                self.duplicates[spider].add(item['id']) 
    105105                return item 
    106106 
     
    179179  using the :setting:`EXPORT_FIELDS` setting. 
    180180 
    181 * ``jsonlines``: uses a :class:`~jsonlines.JsonLinesItemExporter` 
     181* ``json``: uses a :class:`~jsonlines.JsonLinesItemExporter` 
    182182 
    183183* ``pickle``: uses a :class:`PickleItemExporter` 
  • scrapy/templates/project/module/pipelines.py.tmpl

    r1606 r1927  
    55 
    66class ${ProjectName}Pipeline(object): 
    7     def process_item(self, domain, item): 
     7    def process_item(self, spider, item): 
    88        return item 
  • scrapy/templates/spiders/crawl.tmpl

    r1752 r1927  
    1111 
    1212    rules = ( 
    13         Rule(SgmlLinkExtractor(allow=(r'Items/', )), 'parse_item', follow=True), 
     13        Rule(SgmlLinkExtractor(allow=r'Items/'), callback='parse_item', follow=True), 
    1414    ) 
    1515 
    1616    def parse_item(self, response): 
    17         xs = HtmlXPathSelector(response) 
     17        hxs = HtmlXPathSelector(response) 
    1818        i = ${ProjectName}Item() 
    19         #i['site_id'] = xs.select('//input[@id="sid"]/@value').extract() 
    20         #i['name'] = xs.select('//div[@id="name"]').extract() 
    21         #i['description'] = xs.select('//div[@id="description"]').extract() 
     19        #i['site_id'] = hxs.select('//input[@id="sid"]/@value').extract() 
     20        #i['name'] = hxs.select('//div[@id="name"]').extract() 
     21        #i['description'] = hxs.select('//div[@id="description"]').extract() 
    2222        return i 
    2323 
  • scrapy/tests/test_utils_python.py

    r1562 r1930  
     1import operator 
    12import unittest 
    23 
    34from scrapy.utils.python import str_to_unicode, unicode_to_str, \ 
    4     memoizemethod_noargs, isbinarytext 
     5    memoizemethod_noargs, isbinarytext, equal_attributes 
    56 
    67class UtilsPythonTestCase(unittest.TestCase): 
     
    6263        assert isbinarytext("\x02\xa3") 
    6364 
     65    def test_equal_attributes(self): 
     66        class Obj: 
     67            pass 
     68 
     69        a = Obj() 
     70        b = Obj() 
     71        # no attributes given return False 
     72        self.failIf(equal_attributes(a, b, [])) 
     73        # not existent attributes 
     74        self.failIf(equal_attributes(a, b, ['x', 'y'])) 
     75 
     76        a.x = 1 
     77        b.x = 1 
     78        # equal attribute 
     79        self.failUnless(equal_attributes(a, b, ['x'])) 
     80 
     81        b.y = 2 
     82        # obj1 has no attribute y 
     83        self.failIf(equal_attributes(a, b, ['x', 'y'])) 
     84 
     85        a.y = 2 
     86        # equal attributes 
     87        self.failUnless(equal_attributes(a, b, ['x', 'y'])) 
     88 
     89        a.y = 1 
     90        # differente attributes 
     91        self.failIf(equal_attributes(a, b, ['x', 'y'])) 
     92 
     93        # test callable 
     94        a.meta = {} 
     95        b.meta = {} 
     96        self.failUnless(equal_attributes(a, b, ['meta'])) 
     97 
     98        # compare ['meta']['a'] 
     99        a.meta['z'] = 1 
     100        b.meta['z'] = 1 
     101 
     102        get_z = operator.itemgetter('z') 
     103        get_meta = operator.attrgetter('meta') 
     104        compare_z = lambda obj: get_z(get_meta(obj)) 
     105 
     106        self.failUnless(equal_attributes(a, b, [compare_z, 'x'])) 
     107        # fail z equality 
     108        a.meta['z'] = 2 
     109        self.failIf(equal_attributes(a, b, [compare_z, 'x'])) 
     110 
     111 
    64112if __name__ == "__main__": 
    65113    unittest.main() 
  • scrapy/utils/python.py

    r1561 r1930  
    217217        raise TypeError('%s is not callable' % type(func)) 
    218218    return func_args 
     219 
     220 
     221def equal_attributes(obj1, obj2, attributes): 
     222    """Compare two objects attributes""" 
     223    # not attributes given return False by default 
     224    if not attributes: 
     225        return False 
     226 
     227    for attr in attributes: 
     228        # support callables like itemgetter 
     229        if callable(attr): 
     230            if not attr(obj1) == attr(obj2): 
     231                return False 
     232        else: 
     233            # check that objects has attribute 
     234            if not hasattr(obj1, attr): 
     235                return False 
     236            if not hasattr(obj2, attr): 
     237                return False 
     238            # compare object attributes 
     239            if not getattr(obj1, attr) == getattr(obj2, attr): 
     240                return False 
     241    # all attributes equal 
     242    return True 
     243