root/scrapy/conf/default_settings.py @ 1782:ba8d73a2c322

Revision 1782:ba8d73a2c322, 5.7 kB (checked in by Pablo Hoffman <pablo@…>, 11 months ago)

some improvements to 'Crawled' log lines, delegating the formatting to a pluggable function

Line 
1"""
2This module contains the default values for all settings used by Scrapy.
3
4For more information about these settings you can read the settings
5documentation in docs/topics/settings.rst
6
7Scrapy developers, if you add a setting here remember to:
8
9* add it in alphabetical order
10* group similar settings without leaving blank lines
11* add its documentation to the available settings documentation
12  (docs/topics/settings.rst)
13
14"""
15
16from os.path import join, abspath, dirname
17
18BOT_NAME = 'scrapybot'
19BOT_VERSION = '1.0'
20
21CLOSEDOMAIN_TIMEOUT = 0
22CLOSEDOMAIN_ITEMPASSED = 0
23
24COMMANDS_MODULE = ''
25COMMANDS_SETTINGS_MODULE = ''
26
27CONCURRENT_DOMAINS = 8
28
29CONCURRENT_ITEMS = 100
30
31COOKIES_DEBUG = False
32
33DEFAULT_ITEM_CLASS = 'scrapy.item.Item'
34
35DEFAULT_REQUEST_HEADERS = {
36    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
37    'Accept-Language': 'en',
38}
39
40DEPTH_LIMIT = 0
41DEPTH_STATS = True
42
43SPIDER_SCHEDULER = 'scrapy.contrib.spiderscheduler.FifoSpiderScheduler'
44
45DOWNLOAD_DELAY = 0
46DOWNLOAD_TIMEOUT = 180      # 3mins
47
48DOWNLOADER_DEBUG = False
49
50DOWNLOADER_HTTPCLIENTFACTORY = 'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory'
51
52DOWNLOADER_MIDDLEWARES = {}
53
54DOWNLOADER_MIDDLEWARES_BASE = {
55    # Engine side
56    'scrapy.contrib.downloadermiddleware.robotstxt.RobotsTxtMiddleware': 100,
57    'scrapy.contrib.downloadermiddleware.httpauth.HttpAuthMiddleware': 300,
58    'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': 400,
59    'scrapy.contrib.downloadermiddleware.retry.RetryMiddleware': 500,
60    'scrapy.contrib.downloadermiddleware.defaultheaders.DefaultHeadersMiddleware': 550,
61    'scrapy.contrib.downloadermiddleware.redirect.RedirectMiddleware': 600,
62    'scrapy.contrib.downloadermiddleware.cookies.CookiesMiddleware': 700,
63    'scrapy.contrib.downloadermiddleware.httpproxy.HttpProxyMiddleware': 750,
64    'scrapy.contrib.downloadermiddleware.httpcompression.HttpCompressionMiddleware': 800,
65    'scrapy.contrib.downloadermiddleware.stats.DownloaderStats': 850,
66    'scrapy.contrib.downloadermiddleware.httpcache.HttpCacheMiddleware': 900,
67    # Downloader side
68}
69
70DOWNLOADER_STATS = True
71
72DUPEFILTER_CLASS = 'scrapy.contrib.dupefilter.RequestFingerprintDupeFilter'
73
74EXTENSIONS = {}
75
76EXTENSIONS_BASE = {
77    'scrapy.contrib.corestats.CoreStats': 0,
78    'scrapy.management.web.WebConsole': 0,
79    'scrapy.management.telnet.TelnetConsole': 0,
80    'scrapy.contrib.webconsole.scheduler.SchedulerQueue': 0,
81    'scrapy.contrib.webconsole.livestats.LiveStats': 0,
82    'scrapy.contrib.webconsole.spiderctl.Spiderctl': 0,
83    'scrapy.contrib.webconsole.enginestatus.EngineStatus': 0,
84    'scrapy.contrib.webconsole.stats.StatsDump': 0,
85    'scrapy.contrib.memusage.MemoryUsage': 0,
86    'scrapy.contrib.memdebug.MemoryDebugger': 0,
87    'scrapy.contrib.closedomain.CloseDomain': 0,
88}
89
90GROUPSETTINGS_ENABLED = False
91GROUPSETTINGS_MODULE = ''
92
93HTTPCACHE_DIR = ''
94HTTPCACHE_IGNORE_MISSING = False
95HTTPCACHE_SECTORIZE = True
96HTTPCACHE_EXPIRATION_SECS = 0
97
98ITEM_PROCESSOR = 'scrapy.contrib.pipeline.ItemPipelineManager'
99
100# Item pipelines are typically set in specific commands settings
101ITEM_PIPELINES = []
102
103LOG_ENABLED = True
104LOG_FORMATTER_CRAWLED = 'scrapy.contrib.logformatter.crawled_logline'
105LOG_STDOUT = False
106LOG_LEVEL = 'DEBUG'
107LOG_FILE = None
108
109MAIL_HOST = 'localhost'
110MAIL_FROM = 'scrapy@localhost'
111
112MEMDEBUG_ENABLED = False        # enable memory debugging
113MEMDEBUG_NOTIFY = []            # send memory debugging report by mail at engine shutdown
114
115MEMUSAGE_ENABLED = 1
116MEMUSAGE_LIMIT_MB = 0
117MEMUSAGE_NOTIFY_MAIL = []
118MEMUSAGE_REPORT = False
119MEMUSAGE_WARNING_MB = 0
120
121MYSQL_CONNECTION_SETTINGS = {}
122
123NEWSPIDER_MODULE = ''
124
125REDIRECT_MAX_METAREFRESH_DELAY = 100
126REDIRECT_MAX_TIMES = 20 # uses Firefox default setting
127REDIRECT_PRIORITY_ADJUST = +2
128
129REQUEST_HANDLERS = {}
130REQUEST_HANDLERS_BASE = {
131    'file': 'scrapy.core.downloader.handlers.file.download_file',
132    'http': 'scrapy.core.downloader.handlers.http.download_http',
133    'https': 'scrapy.core.downloader.handlers.http.download_http',
134}
135
136REQUESTS_QUEUE_SIZE = 0
137REQUESTS_PER_DOMAIN = 8     # max simultaneous requests per domain
138
139# contrib.middleware.retry.RetryMiddleware default settings
140RETRY_TIMES = 2 # initial response + 2 retries = 3 requests
141RETRY_HTTP_CODES = ['500', '503', '504', '400', '408']
142RETRY_PRIORITY_ADJUST = -1
143
144ROBOTSTXT_OBEY = False
145
146SCHEDULER = 'scrapy.core.scheduler.Scheduler'
147
148SCHEDULER_MIDDLEWARES = {}
149
150SCHEDULER_MIDDLEWARES_BASE = {
151    'scrapy.contrib.schedulermiddleware.duplicatesfilter.DuplicatesFilterMiddleware': 500,
152}
153
154SCHEDULER_ORDER = 'BFO'   # available orders: BFO (default), DFO
155
156SPIDER_MANAGER_CLASS = 'scrapy.contrib.spidermanager.TwistedPluginSpiderManager'
157
158SPIDER_MIDDLEWARES = {}
159
160SPIDER_MIDDLEWARES_BASE = {
161    # Engine side
162    'scrapy.contrib.spidermiddleware.httperror.HttpErrorMiddleware': 50,
163    'scrapy.contrib.itemsampler.ItemSamplerMiddleware': 100,
164    'scrapy.contrib.spidermiddleware.requestlimit.RequestLimitMiddleware': 200,
165    'scrapy.contrib.spidermiddleware.offsite.OffsiteMiddleware': 500,
166    'scrapy.contrib.spidermiddleware.referer.RefererMiddleware': 700,
167    'scrapy.contrib.spidermiddleware.urllength.UrlLengthMiddleware': 800,
168    'scrapy.contrib.spidermiddleware.depth.DepthMiddleware': 900,
169    # Spider side
170}
171
172SPIDER_MODULES = []
173
174SPIDERPROFILER_ENABLED = False
175
176STATS_CLASS = 'scrapy.stats.collector.MemoryStatsCollector'
177STATS_ENABLED = True
178STATS_DUMP = False
179
180STATS_SDB_DOMAIN = 'scrapy_stats'
181STATS_SDB_ASYNC = False
182
183STATSMAILER_RCPTS = []
184
185TEMPLATES_DIR = abspath(join(dirname(__file__), '..', 'templates'))
186
187URLLENGTH_LIMIT = 2083
188
189USER_AGENT = '%s/%s' % (BOT_NAME, BOT_VERSION)
190
191TELNETCONSOLE_ENABLED = 1
192TELNETCONSOLE_PORT = 6023  # if None, uses a dynamic port
193
194WEBCONSOLE_ENABLED = True
195WEBCONSOLE_PORT = 6080
196WEBCONSOLE_LOGFILE = None
Note: See TracBrowser for help on using the browser.