root/scrapy/conf/default_settings.py @ 1833:99f58d4e1866

Revision 1833:99f58d4e1866, 5.7 kB (checked in by Pablo Hoffman <pablo@…>, 10 months ago)

renamed CloseDomain? extension to CloseSpider?, and renamed CLOSEDOMAIN_* settings to CLOSESPIDER_*

Line 
1"""
2This module contains the default values for all settings used by Scrapy.
3
4For more information about these settings you can read the settings
5documentation in docs/topics/settings.rst
6
7Scrapy developers, if you add a setting here remember to:
8
9* add it in alphabetical order
10* group similar settings without leaving blank lines
11* add its documentation to the available settings documentation
12  (docs/topics/settings.rst)
13
14"""
15
16from os.path import join, abspath, dirname
17
18BOT_NAME = 'scrapybot'
19BOT_VERSION = '1.0'
20
21CLOSESPIDER_TIMEOUT = 0
22CLOSESPIDER_ITEMPASSED = 0
23
24COMMANDS_MODULE = ''
25COMMANDS_SETTINGS_MODULE = ''
26
27CONCURRENT_ITEMS = 100
28
29CONCURRENT_SPIDERS = 8
30
31COOKIES_DEBUG = False
32
33DEFAULT_ITEM_CLASS = 'scrapy.item.Item'
34
35DEFAULT_REQUEST_HEADERS = {
36    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
37    'Accept-Language': 'en',
38}
39
40DEFAULT_RESPONSE_ENCODING = 'ascii'
41
42DEPTH_LIMIT = 0
43DEPTH_STATS = True
44
45SPIDER_SCHEDULER = 'scrapy.contrib.spiderscheduler.FifoSpiderScheduler'
46
47DOWNLOAD_DELAY = 0
48DOWNLOAD_TIMEOUT = 180      # 3mins
49
50DOWNLOADER_DEBUG = False
51
52DOWNLOADER_HTTPCLIENTFACTORY = 'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory'
53
54DOWNLOADER_MIDDLEWARES = {}
55
56DOWNLOADER_MIDDLEWARES_BASE = {
57    # Engine side
58    'scrapy.contrib.downloadermiddleware.robotstxt.RobotsTxtMiddleware': 100,
59    'scrapy.contrib.downloadermiddleware.httpauth.HttpAuthMiddleware': 300,
60    'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': 400,
61    'scrapy.contrib.downloadermiddleware.retry.RetryMiddleware': 500,
62    'scrapy.contrib.downloadermiddleware.defaultheaders.DefaultHeadersMiddleware': 550,
63    'scrapy.contrib.downloadermiddleware.redirect.RedirectMiddleware': 600,
64    'scrapy.contrib.downloadermiddleware.cookies.CookiesMiddleware': 700,
65    'scrapy.contrib.downloadermiddleware.httpproxy.HttpProxyMiddleware': 750,
66    'scrapy.contrib.downloadermiddleware.httpcompression.HttpCompressionMiddleware': 800,
67    'scrapy.contrib.downloadermiddleware.stats.DownloaderStats': 850,
68    'scrapy.contrib.downloadermiddleware.httpcache.HttpCacheMiddleware': 900,
69    # Downloader side
70}
71
72DOWNLOADER_STATS = True
73
74DUPEFILTER_CLASS = 'scrapy.contrib.dupefilter.RequestFingerprintDupeFilter'
75
76EXTENSIONS = {}
77
78EXTENSIONS_BASE = {
79    'scrapy.contrib.corestats.CoreStats': 0,
80    'scrapy.management.web.WebConsole': 0,
81    'scrapy.management.telnet.TelnetConsole': 0,
82    'scrapy.contrib.webconsole.scheduler.SchedulerQueue': 0,
83    'scrapy.contrib.webconsole.livestats.LiveStats': 0,
84    'scrapy.contrib.webconsole.spiderctl.Spiderctl': 0,
85    'scrapy.contrib.webconsole.enginestatus.EngineStatus': 0,
86    'scrapy.contrib.webconsole.stats.StatsDump': 0,
87    'scrapy.contrib.memusage.MemoryUsage': 0,
88    'scrapy.contrib.memdebug.MemoryDebugger': 0,
89    'scrapy.contrib.closespider.CloseSpider': 0,
90}
91
92GROUPSETTINGS_ENABLED = False
93GROUPSETTINGS_MODULE = ''
94
95HTTPCACHE_DIR = ''
96HTTPCACHE_IGNORE_MISSING = False
97HTTPCACHE_SECTORIZE = True
98HTTPCACHE_EXPIRATION_SECS = 0
99
100ITEM_PROCESSOR = 'scrapy.contrib.pipeline.ItemPipelineManager'
101
102# Item pipelines are typically set in specific commands settings
103ITEM_PIPELINES = []
104
105LOG_ENABLED = True
106LOG_FORMATTER_CRAWLED = 'scrapy.contrib.logformatter.crawled_logline'
107LOG_STDOUT = False
108LOG_LEVEL = 'DEBUG'
109LOG_FILE = None
110
111MAIL_HOST = 'localhost'
112MAIL_FROM = 'scrapy@localhost'
113
114MEMDEBUG_ENABLED = False        # enable memory debugging
115MEMDEBUG_NOTIFY = []            # send memory debugging report by mail at engine shutdown
116
117MEMUSAGE_ENABLED = 1
118MEMUSAGE_LIMIT_MB = 0
119MEMUSAGE_NOTIFY_MAIL = []
120MEMUSAGE_REPORT = False
121MEMUSAGE_WARNING_MB = 0
122
123MYSQL_CONNECTION_SETTINGS = {}
124
125NEWSPIDER_MODULE = ''
126
127REDIRECT_MAX_METAREFRESH_DELAY = 100
128REDIRECT_MAX_TIMES = 20 # uses Firefox default setting
129REDIRECT_PRIORITY_ADJUST = +2
130
131REQUEST_HANDLERS = {}
132REQUEST_HANDLERS_BASE = {
133    'file': 'scrapy.core.downloader.handlers.file.download_file',
134    'http': 'scrapy.core.downloader.handlers.http.download_http',
135    'https': 'scrapy.core.downloader.handlers.http.download_http',
136}
137
138REQUESTS_QUEUE_SIZE = 0
139REQUESTS_PER_SPIDER = 8     # max simultaneous requests per domain
140
141# contrib.middleware.retry.RetryMiddleware default settings
142RETRY_TIMES = 2 # initial response + 2 retries = 3 requests
143RETRY_HTTP_CODES = ['500', '503', '504', '400', '408']
144RETRY_PRIORITY_ADJUST = -1
145
146ROBOTSTXT_OBEY = False
147
148SCHEDULER = 'scrapy.core.scheduler.Scheduler'
149
150SCHEDULER_MIDDLEWARES = {}
151
152SCHEDULER_MIDDLEWARES_BASE = {
153    'scrapy.contrib.schedulermiddleware.duplicatesfilter.DuplicatesFilterMiddleware': 500,
154}
155
156SCHEDULER_ORDER = 'BFO'   # available orders: BFO (default), DFO
157
158SPIDER_MANAGER_CLASS = 'scrapy.contrib.spidermanager.TwistedPluginSpiderManager'
159
160SPIDER_MIDDLEWARES = {}
161
162SPIDER_MIDDLEWARES_BASE = {
163    # Engine side
164    'scrapy.contrib.spidermiddleware.httperror.HttpErrorMiddleware': 50,
165    'scrapy.contrib.itemsampler.ItemSamplerMiddleware': 100,
166    'scrapy.contrib.spidermiddleware.requestlimit.RequestLimitMiddleware': 200,
167    'scrapy.contrib.spidermiddleware.offsite.OffsiteMiddleware': 500,
168    'scrapy.contrib.spidermiddleware.referer.RefererMiddleware': 700,
169    'scrapy.contrib.spidermiddleware.urllength.UrlLengthMiddleware': 800,
170    'scrapy.contrib.spidermiddleware.depth.DepthMiddleware': 900,
171    # Spider side
172}
173
174SPIDER_MODULES = []
175
176SPIDERPROFILER_ENABLED = False
177
178STATS_CLASS = 'scrapy.stats.collector.MemoryStatsCollector'
179STATS_ENABLED = True
180STATS_DUMP = False
181
182STATS_SDB_DOMAIN = 'scrapy_stats'
183STATS_SDB_ASYNC = False
184
185STATSMAILER_RCPTS = []
186
187TEMPLATES_DIR = abspath(join(dirname(__file__), '..', 'templates'))
188
189URLLENGTH_LIMIT = 2083
190
191USER_AGENT = '%s/%s' % (BOT_NAME, BOT_VERSION)
192
193TELNETCONSOLE_ENABLED = 1
194TELNETCONSOLE_PORT = 6023  # if None, uses a dynamic port
195
196WEBCONSOLE_ENABLED = True
197WEBCONSOLE_PORT = 6080
198WEBCONSOLE_LOGFILE = None
Note: See TracBrowser for help on using the browser.