root/scrapy/contrib/closedomain.py @ 1822:af6645fd14ed

Revision 1822:af6645fd14ed, 1.5 kB (checked in by Pablo Hoffman <pablo@…>, 10 months ago)

* Renamed domain_{opened,closed,idle} signals to spider_{opened,closed,idle}
* Changed them to pass spider instances only (no domains) (refs #105)

Line 
1"""CloseDomain is an extension that forces spiders to be closed after certain
2conditions are met.
3
4See documentation in docs/topics/extensions.rst
5"""
6
7from collections import defaultdict
8
9from twisted.internet import reactor
10from scrapy.xlib.pydispatch import dispatcher
11
12from scrapy.core import signals
13from scrapy.core.engine import scrapyengine
14from scrapy.conf import settings
15
16class CloseDomain(object):
17
18    def __init__(self):
19        self.timeout = settings.getint('CLOSEDOMAIN_TIMEOUT')
20        self.itempassed = settings.getint('CLOSEDOMAIN_ITEMPASSED')
21
22        self.counts = defaultdict(int)
23        self.tasks = {}
24
25        if self.timeout:
26            dispatcher.connect(self.spider_opened, signal=signals.spider_opened)
27        if self.itempassed:
28            dispatcher.connect(self.item_passed, signal=signals.item_passed)
29        dispatcher.connect(self.spider_closed, signal=signals.spider_closed)
30
31    def spider_opened(self, spider):
32        self.tasks[spider] = reactor.callLater(self.timeout, scrapyengine.close_spider, \
33            spider=spider, reason='closedomain_timeout')
34       
35    def item_passed(self, item, spider):
36        self.counts[spider] += 1
37        if self.counts[spider] == self.itempassed:
38            scrapyengine.close_spider(spider, 'closedomain_itempassed')
39
40    def spider_closed(self, spider):
41        self.counts.pop(spider, None)
42        tsk = self.tasks.pop(spider, None)
43        if tsk and not tsk.called:
44            tsk.cancel()
Note: See TracBrowser for help on using the browser.