Settings#
Settings can be defined as OKAMI_SETTINGS environment variable otherwise default settings will be loaded.
It should be set like this
export OKAMI_SETTINGS=okami.cfg.example
or passed with command
OKAMI_SETTINGS=okami.cfg.example okami command
To import okami settings in your project use this from okami import settings
.
#
Below are available okami settings located in okami.cfg.default
module with default values.
VERSION#
Current okami version
DEBUG#
Enable debugging for asyncio module. Check Debug mode of asyncio documentation.
defaultDEBUG = False
SPIDERS#
List of python modules in your project containing spider implementations available to okami.
defaultSPIDERS = []
STORAGE#
Storage class path and name
defaultSTORAGE = "okami.Storage"
DOWNLOADER#
Downloader class path and name
defaultDOWNLOADER = "okami.Downloader"
HTTP_SERVER#
HTTP server class path and name
defaultHTTP_SERVER = "okami.server.Server"
THROTTLE#
Throttle class path and name
defaultTHROTTLE = "okami.Throttle"
STORAGE_SETTINGS#
Arguments passed to storage module
defaultSTORAGE_SETTINGS = {}
THROTTLE_SETTINGS#
Arguments passed to throttle module
defaultTHROTTLE_SETTINGS = {}
HTTP_SERVER_ADDRESS#
HTTP server default address
defaultHTTP_SERVER_ADDRESS = "0.0.0.0:5566"
USER_AGENT#
Default USER-AGENT used in requests
defaultUSER_AGENT = "Okami/{}".format(okami.__version__)
EVENT_LOOP_POLICY#
Set a custom event loop policy object. Check Customizing the event loop policy documentation.
defaultEVENT_LOOP_POLICY = None
ASYNC_TIMEOUT#
Asyncio Future object timeout. Check concurrent.futures.wait documentation.
defaultASYNC_TIMEOUT = 10
ASYNC_SLOW_CALLBACK_DURATION#
Asyncio minimum duration in seconds of slow callbacks. Check Debug mode of asyncio documentation.
defaultASYNC_SLOW_CALLBACK_DURATION = 0.1
PAUSE_TIMEOUT#
Pause timeout, in case of server connection errors etc. okami pauses scraping
defaultPAUSE_TIMEOUT = 5
CONN_TIMEOUT#
Connection timeout
defaultCONN_TIMEOUT = 20
CONN_VERIFY_SSL#
SSL verification for HTTP requests
defaultCONN_VERIFY_SSL = False
CONN_MAX_CONCURRENT_CONNECTIONS#
Maximum number of concurrent connections to website
defaultCONN_MAX_CONCURRENT_CONNECTIONS = 5
CONN_MAX_CONCURRENT_REQUESTS#
Maximum number of concurrent requests to website. Effectively an async loop size.
defaultCONN_MAX_CONCURRENT_REQUESTS = 10
CONN_MAX_RETRIES#
Maximum number of connection retries in case of connection issues
defaultCONN_MAX_RETRIES = 5
CONN_MAX_HTTP_REDIRECTS#
Maximum number of HTTP redirects
defaultCONN_MAX_HTTP_REDIRECTS = 10
REQUEST_MAX_FAILED#
Maximum number of failed requests before okami stops
defaultREQUEST_MAX_FAILED = 50
REQUEST_MAX_PENDING#
Maximum number of pending requests before logging an error
defaultREQUEST_MAX_PENDING = 10
BASE_HTTP_MIDDLEWARE#
List of base http middleware. Should not change.
default
BASE_HTTP_MIDDLEWARE = ( "okami.middleware.Session", "okami.middleware.Headers", )
HTTP_MIDDLEWARE#
List of http middleware. Use to add custom handlers.
defaultHTTP_MIDDLEWARE = ()
BASE_SPIDER_MIDDLEWARE#
List of base spider middleware. Should not change.
defaultBASE_SPIDER_MIDDLEWARE = ()
SPIDER_MIDDLEWARE#
List of spider middleware. Use to add custom handlers.
defaultSPIDER_MIDDLEWARE = ()
BASE_STARTUP_PIPELINE#
List of base startup pipelines. Should not change.
defaultBASE_STARTUP_PIPELINE = ()
STARTUP_PIPELINE#
List of startup pipelines. Use to add custom handlers.
defaultSTARTUP_PIPELINE = ()
BASE_ITEMS_PIPELINE#
List of base items pipelines. Should not change.
defaultBASE_ITEMS_PIPELINE = ()
ITEMS_PIPELINE#
List of items pipelines. Use to add custom handlers.
defaultITEMS_PIPELINE = ()
BASE_TASKS_PIPELINE#
List of base tasks pipelines. Should not change.
defaultBASE_TASKS_PIPELINE = ()
TASKS_PIPELINE#
List of tasks pipelines. Use to add custom handlers.
defaultTASKS_PIPELINE = ()
DELTA_ENABLED#
Delta middleware enable
defaultDELTA_ENABLED = False
DELTA_PATH#
Delta middleware database directory. Defaults to current directory.
defaultDELTA_PATH = None