Changeset 1833:99f58d4e1866
- Timestamp:
- 11/06/09 15:54:17 (9 months ago)
- Author:
- Pablo Hoffman <pablo@…>
- Branch:
- default
- Message:
-
renamed CloseDomain? extension to CloseSpider?, and renamed CLOSEDOMAIN_* settings to CLOSESPIDER_*
- Files:
-
Legend:
- Unmodified
- Added
- Removed
-
|
r1822
|
r1833
|
|
| 280 | 280 | ~~~~~~~~~~~~~~~~~~~~~~ |
| 281 | 281 | |
| 282 | | .. module:: scrapy.contrib.closedomain |
| | 282 | .. module:: scrapy.contrib.closespider |
| 283 | 283 | :synopsis: Close domain extension |
| 284 | 284 | |
| 285 | | .. class:: scrapy.contrib.closedomain.CloseDomain |
| | 285 | .. class:: scrapy.contrib.closespider.CloseSpider |
| 286 | 286 | |
| 287 | 287 | Closes a domain/spider automatically when some conditions are met, using a |
| … |
… |
|
| 291 | 291 | settings. Other conditions will be supported in the future. |
| 292 | 292 | |
| 293 | | .. setting:: CLOSEDOMAIN_TIMEOUT |
| 294 | | |
| 295 | | CLOSEDOMAIN_TIMEOUT |
| | 293 | .. setting:: CLOSESPIDER_TIMEOUT |
| | 294 | |
| | 295 | CLOSESPIDER_TIMEOUT |
| 296 | 296 | """"""""""""""""""" |
| 297 | 297 | |
| … |
… |
|
| 300 | 300 | An integer which specifies a number of seconds. If the domain remains open for |
| 301 | 301 | more than that number of second, it will be automatically closed with the |
| 302 | | reason ``closedomain_timeout``. If zero (or non set) domains won't be closed by |
| | 302 | reason ``closespider_timeout``. If zero (or non set) domains won't be closed by |
| 303 | 303 | timeout. |
| 304 | 304 | |
| 305 | | .. setting:: CLOSEDOMAIN_ITEMPASSED |
| 306 | | |
| 307 | | CLOSEDOMAIN_ITEMPASSED |
| | 305 | .. setting:: CLOSESPIDER_ITEMPASSED |
| | 306 | |
| | 307 | CLOSESPIDER_ITEMPASSED |
| 308 | 308 | """""""""""""""""""""" |
| 309 | 309 | |
| … |
… |
|
| 312 | 312 | An integer which specifies a number of items. If the spider scrapes more than |
| 313 | 313 | that amount if items and those items are passed by the item pipeline, the |
| 314 | | domain will be closed with the reason ``closedomain_itempassed``. If zero (or |
| | 314 | domain will be closed with the reason ``closespider_itempassed``. If zero (or |
| 315 | 315 | non set) domains won't be closed by number of passed items. |
| 316 | 316 | |
-
|
r1831
|
r1833
|
|
| 19 | 19 | BOT_VERSION = '1.0' |
| 20 | 20 | |
| 21 | | CLOSEDOMAIN_TIMEOUT = 0 |
| 22 | | CLOSEDOMAIN_ITEMPASSED = 0 |
| | 21 | CLOSESPIDER_TIMEOUT = 0 |
| | 22 | CLOSESPIDER_ITEMPASSED = 0 |
| 23 | 23 | |
| 24 | 24 | COMMANDS_MODULE = '' |
| … |
… |
|
| 87 | 87 | 'scrapy.contrib.memusage.MemoryUsage': 0, |
| 88 | 88 | 'scrapy.contrib.memdebug.MemoryDebugger': 0, |
| 89 | | 'scrapy.contrib.closedomain.CloseDomain': 0, |
| | 89 | 'scrapy.contrib.closespider.CloseSpider': 0, |
| 90 | 90 | } |
| 91 | 91 | |
-
|
r1822
|
r1833
|
|
| 1 | | """CloseDomain is an extension that forces spiders to be closed after certain |
| | 1 | """CloseSpider is an extension that forces spiders to be closed after certain |
| 2 | 2 | conditions are met. |
| 3 | 3 | |
| … |
… |
|
| 14 | 14 | from scrapy.conf import settings |
| 15 | 15 | |
| 16 | | class CloseDomain(object): |
| | 16 | class CloseSpider(object): |
| 17 | 17 | |
| 18 | 18 | def __init__(self): |
| 19 | | self.timeout = settings.getint('CLOSEDOMAIN_TIMEOUT') |
| 20 | | self.itempassed = settings.getint('CLOSEDOMAIN_ITEMPASSED') |
| | 19 | self.timeout = settings.getint('CLOSESPIDER_TIMEOUT') |
| | 20 | self.itempassed = settings.getint('CLOSESPIDER_ITEMPASSED') |
| 21 | 21 | |
| 22 | 22 | self.counts = defaultdict(int) |
| … |
… |
|
| 31 | 31 | def spider_opened(self, spider): |
| 32 | 32 | self.tasks[spider] = reactor.callLater(self.timeout, scrapyengine.close_spider, \ |
| 33 | | spider=spider, reason='closedomain_timeout') |
| | 33 | spider=spider, reason='closespider_timeout') |
| 34 | 34 | |
| 35 | 35 | def item_passed(self, item, spider): |
| 36 | 36 | self.counts[spider] += 1 |
| 37 | 37 | if self.counts[spider] == self.itempassed: |
| 38 | | scrapyengine.close_spider(spider, 'closedomain_itempassed') |
| | 38 | scrapyengine.close_spider(spider, 'closespider_itempassed') |
| 39 | 39 | |
| 40 | 40 | def spider_closed(self, spider): |