| 1 | """ |
|---|
| 2 | This module contains the default values for all settings used by Scrapy. |
|---|
| 3 | |
|---|
| 4 | For more information about these settings you can read the settings |
|---|
| 5 | documentation in docs/topics/settings.rst |
|---|
| 6 | |
|---|
| 7 | Scrapy developers, if you add a setting here remember to: |
|---|
| 8 | |
|---|
| 9 | * add it in alphabetical order |
|---|
| 10 | * group similar settings without leaving blank lines |
|---|
| 11 | * add its documentation to the available settings documentation |
|---|
| 12 | (docs/topics/settings.rst) |
|---|
| 13 | |
|---|
| 14 | """ |
|---|
| 15 | |
|---|
| 16 | from os.path import join, abspath, dirname |
|---|
| 17 | |
|---|
| 18 | BOT_NAME = 'scrapybot' |
|---|
| 19 | BOT_VERSION = '1.0' |
|---|
| 20 | |
|---|
| 21 | CLOSEDOMAIN_TIMEOUT = 0 |
|---|
| 22 | CLOSEDOMAIN_ITEMPASSED = 0 |
|---|
| 23 | |
|---|
| 24 | COMMANDS_MODULE = '' |
|---|
| 25 | COMMANDS_SETTINGS_MODULE = '' |
|---|
| 26 | |
|---|
| 27 | CONCURRENT_DOMAINS = 8 |
|---|
| 28 | |
|---|
| 29 | CONCURRENT_ITEMS = 100 |
|---|
| 30 | |
|---|
| 31 | COOKIES_DEBUG = False |
|---|
| 32 | |
|---|
| 33 | DEFAULT_ITEM_CLASS = 'scrapy.item.Item' |
|---|
| 34 | |
|---|
| 35 | DEFAULT_REQUEST_HEADERS = { |
|---|
| 36 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', |
|---|
| 37 | 'Accept-Language': 'en', |
|---|
| 38 | } |
|---|
| 39 | |
|---|
| 40 | DEPTH_LIMIT = 0 |
|---|
| 41 | DEPTH_STATS = True |
|---|
| 42 | |
|---|
| 43 | SPIDER_SCHEDULER = 'scrapy.contrib.spiderscheduler.FifoSpiderScheduler' |
|---|
| 44 | |
|---|
| 45 | DOWNLOAD_DELAY = 0 |
|---|
| 46 | DOWNLOAD_TIMEOUT = 180 # 3mins |
|---|
| 47 | |
|---|
| 48 | DOWNLOADER_DEBUG = False |
|---|
| 49 | |
|---|
| 50 | DOWNLOADER_HTTPCLIENTFACTORY = 'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory' |
|---|
| 51 | |
|---|
| 52 | DOWNLOADER_MIDDLEWARES = {} |
|---|
| 53 | |
|---|
| 54 | DOWNLOADER_MIDDLEWARES_BASE = { |
|---|
| 55 | # Engine side |
|---|
| 56 | 'scrapy.contrib.downloadermiddleware.robotstxt.RobotsTxtMiddleware': 100, |
|---|
| 57 | 'scrapy.contrib.downloadermiddleware.httpauth.HttpAuthMiddleware': 300, |
|---|
| 58 | 'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': 400, |
|---|
| 59 | 'scrapy.contrib.downloadermiddleware.retry.RetryMiddleware': 500, |
|---|
| 60 | 'scrapy.contrib.downloadermiddleware.defaultheaders.DefaultHeadersMiddleware': 550, |
|---|
| 61 | 'scrapy.contrib.downloadermiddleware.redirect.RedirectMiddleware': 600, |
|---|
| 62 | 'scrapy.contrib.downloadermiddleware.cookies.CookiesMiddleware': 700, |
|---|
| 63 | 'scrapy.contrib.downloadermiddleware.httpproxy.HttpProxyMiddleware': 750, |
|---|
| 64 | 'scrapy.contrib.downloadermiddleware.httpcompression.HttpCompressionMiddleware': 800, |
|---|
| 65 | 'scrapy.contrib.downloadermiddleware.stats.DownloaderStats': 850, |
|---|
| 66 | 'scrapy.contrib.downloadermiddleware.httpcache.HttpCacheMiddleware': 900, |
|---|
| 67 | # Downloader side |
|---|
| 68 | } |
|---|
| 69 | |
|---|
| 70 | DOWNLOADER_STATS = True |
|---|
| 71 | |
|---|
| 72 | DUPEFILTER_CLASS = 'scrapy.contrib.dupefilter.RequestFingerprintDupeFilter' |
|---|
| 73 | |
|---|
| 74 | EXTENSIONS = {} |
|---|
| 75 | |
|---|
| 76 | EXTENSIONS_BASE = { |
|---|
| 77 | 'scrapy.contrib.corestats.CoreStats': 0, |
|---|
| 78 | 'scrapy.management.web.WebConsole': 0, |
|---|
| 79 | 'scrapy.management.telnet.TelnetConsole': 0, |
|---|
| 80 | 'scrapy.contrib.webconsole.scheduler.SchedulerQueue': 0, |
|---|
| 81 | 'scrapy.contrib.webconsole.livestats.LiveStats': 0, |
|---|
| 82 | 'scrapy.contrib.webconsole.spiderctl.Spiderctl': 0, |
|---|
| 83 | 'scrapy.contrib.webconsole.enginestatus.EngineStatus': 0, |
|---|
| 84 | 'scrapy.contrib.webconsole.stats.StatsDump': 0, |
|---|
| 85 | 'scrapy.contrib.memusage.MemoryUsage': 0, |
|---|
| 86 | 'scrapy.contrib.memdebug.MemoryDebugger': 0, |
|---|
| 87 | 'scrapy.contrib.closedomain.CloseDomain': 0, |
|---|
| 88 | } |
|---|
| 89 | |
|---|
| 90 | GROUPSETTINGS_ENABLED = False |
|---|
| 91 | GROUPSETTINGS_MODULE = '' |
|---|
| 92 | |
|---|
| 93 | HTTPCACHE_DIR = '' |
|---|
| 94 | HTTPCACHE_IGNORE_MISSING = False |
|---|
| 95 | HTTPCACHE_SECTORIZE = True |
|---|
| 96 | HTTPCACHE_EXPIRATION_SECS = 0 |
|---|
| 97 | |
|---|
| 98 | ITEM_PROCESSOR = 'scrapy.contrib.pipeline.ItemPipelineManager' |
|---|
| 99 | |
|---|
| 100 | # Item pipelines are typically set in specific commands settings |
|---|
| 101 | ITEM_PIPELINES = [] |
|---|
| 102 | |
|---|
| 103 | LOG_ENABLED = True |
|---|
| 104 | LOG_FORMATTER_CRAWLED = 'scrapy.contrib.logformatter.crawled_logline' |
|---|
| 105 | LOG_STDOUT = False |
|---|
| 106 | LOG_LEVEL = 'DEBUG' |
|---|
| 107 | LOG_FILE = None |
|---|
| 108 | |
|---|
| 109 | MAIL_HOST = 'localhost' |
|---|
| 110 | MAIL_FROM = 'scrapy@localhost' |
|---|
| 111 | |
|---|
| 112 | MEMDEBUG_ENABLED = False # enable memory debugging |
|---|
| 113 | MEMDEBUG_NOTIFY = [] # send memory debugging report by mail at engine shutdown |
|---|
| 114 | |
|---|
| 115 | MEMUSAGE_ENABLED = 1 |
|---|
| 116 | MEMUSAGE_LIMIT_MB = 0 |
|---|
| 117 | MEMUSAGE_NOTIFY_MAIL = [] |
|---|
| 118 | MEMUSAGE_REPORT = False |
|---|
| 119 | MEMUSAGE_WARNING_MB = 0 |
|---|
| 120 | |
|---|
| 121 | MYSQL_CONNECTION_SETTINGS = {} |
|---|
| 122 | |
|---|
| 123 | NEWSPIDER_MODULE = '' |
|---|
| 124 | |
|---|
| 125 | REDIRECT_MAX_METAREFRESH_DELAY = 100 |
|---|
| 126 | REDIRECT_MAX_TIMES = 20 # uses Firefox default setting |
|---|
| 127 | REDIRECT_PRIORITY_ADJUST = +2 |
|---|
| 128 | |
|---|
| 129 | REQUEST_HANDLERS = {} |
|---|
| 130 | REQUEST_HANDLERS_BASE = { |
|---|
| 131 | 'file': 'scrapy.core.downloader.handlers.file.download_file', |
|---|
| 132 | 'http': 'scrapy.core.downloader.handlers.http.download_http', |
|---|
| 133 | 'https': 'scrapy.core.downloader.handlers.http.download_http', |
|---|
| 134 | } |
|---|
| 135 | |
|---|
| 136 | REQUESTS_QUEUE_SIZE = 0 |
|---|
| 137 | REQUESTS_PER_DOMAIN = 8 # max simultaneous requests per domain |
|---|
| 138 | |
|---|
| 139 | # contrib.middleware.retry.RetryMiddleware default settings |
|---|
| 140 | RETRY_TIMES = 2 # initial response + 2 retries = 3 requests |
|---|
| 141 | RETRY_HTTP_CODES = ['500', '503', '504', '400', '408'] |
|---|
| 142 | RETRY_PRIORITY_ADJUST = -1 |
|---|
| 143 | |
|---|
| 144 | ROBOTSTXT_OBEY = False |
|---|
| 145 | |
|---|
| 146 | SCHEDULER = 'scrapy.core.scheduler.Scheduler' |
|---|
| 147 | |
|---|
| 148 | SCHEDULER_MIDDLEWARES = {} |
|---|
| 149 | |
|---|
| 150 | SCHEDULER_MIDDLEWARES_BASE = { |
|---|
| 151 | 'scrapy.contrib.schedulermiddleware.duplicatesfilter.DuplicatesFilterMiddleware': 500, |
|---|
| 152 | } |
|---|
| 153 | |
|---|
| 154 | SCHEDULER_ORDER = 'BFO' # available orders: BFO (default), DFO |
|---|
| 155 | |
|---|
| 156 | SPIDER_MANAGER_CLASS = 'scrapy.contrib.spidermanager.TwistedPluginSpiderManager' |
|---|
| 157 | |
|---|
| 158 | SPIDER_MIDDLEWARES = {} |
|---|
| 159 | |
|---|
| 160 | SPIDER_MIDDLEWARES_BASE = { |
|---|
| 161 | # Engine side |
|---|
| 162 | 'scrapy.contrib.spidermiddleware.httperror.HttpErrorMiddleware': 50, |
|---|
| 163 | 'scrapy.contrib.itemsampler.ItemSamplerMiddleware': 100, |
|---|
| 164 | 'scrapy.contrib.spidermiddleware.requestlimit.RequestLimitMiddleware': 200, |
|---|
| 165 | 'scrapy.contrib.spidermiddleware.offsite.OffsiteMiddleware': 500, |
|---|
| 166 | 'scrapy.contrib.spidermiddleware.referer.RefererMiddleware': 700, |
|---|
| 167 | 'scrapy.contrib.spidermiddleware.urllength.UrlLengthMiddleware': 800, |
|---|
| 168 | 'scrapy.contrib.spidermiddleware.depth.DepthMiddleware': 900, |
|---|
| 169 | # Spider side |
|---|
| 170 | } |
|---|
| 171 | |
|---|
| 172 | SPIDER_MODULES = [] |
|---|
| 173 | |
|---|
| 174 | SPIDERPROFILER_ENABLED = False |
|---|
| 175 | |
|---|
| 176 | STATS_CLASS = 'scrapy.stats.collector.MemoryStatsCollector' |
|---|
| 177 | STATS_ENABLED = True |
|---|
| 178 | STATS_DUMP = False |
|---|
| 179 | |
|---|
| 180 | STATS_SDB_DOMAIN = 'scrapy_stats' |
|---|
| 181 | STATS_SDB_ASYNC = False |
|---|
| 182 | |
|---|
| 183 | STATSMAILER_RCPTS = [] |
|---|
| 184 | |
|---|
| 185 | TEMPLATES_DIR = abspath(join(dirname(__file__), '..', 'templates')) |
|---|
| 186 | |
|---|
| 187 | URLLENGTH_LIMIT = 2083 |
|---|
| 188 | |
|---|
| 189 | USER_AGENT = '%s/%s' % (BOT_NAME, BOT_VERSION) |
|---|
| 190 | |
|---|
| 191 | TELNETCONSOLE_ENABLED = 1 |
|---|
| 192 | TELNETCONSOLE_PORT = 6023 # if None, uses a dynamic port |
|---|
| 193 | |
|---|
| 194 | WEBCONSOLE_ENABLED = True |
|---|
| 195 | WEBCONSOLE_PORT = 6080 |
|---|
| 196 | WEBCONSOLE_LOGFILE = None |
|---|