Skip to content

Settings#

Settings can be defined as OKAMI_SETTINGS environment variable otherwise default settings will be loaded.

It should be set like this

  • export OKAMI_SETTINGS=okami.cfg.example

or passed with command

  • OKAMI_SETTINGS=okami.cfg.example okami command

To import okami settings in your project use this from okami import settings.

#

Below are available okami settings located in okami.cfg.default module with default values.

 

VERSION#

Current okami version

 

DEBUG#

Enable debugging for asyncio module. Check Debug mode of asyncio documentation.

defaultDEBUG = False

 

SPIDERS#

List of python modules in your project containing spider implementations available to okami.

defaultSPIDERS = []

 

STORAGE#

Storage class path and name

defaultSTORAGE = "okami.Storage"

 

DOWNLOADER#

Downloader class path and name

defaultDOWNLOADER = "okami.Downloader"

 

HTTP_SERVER#

HTTP server class path and name

defaultHTTP_SERVER = "okami.server.Server"

 

THROTTLE#

Throttle class path and name

defaultTHROTTLE = "okami.Throttle"

 

STORAGE_SETTINGS#

Arguments passed to storage module

defaultSTORAGE_SETTINGS = {}

 

THROTTLE_SETTINGS#

Arguments passed to throttle module

defaultTHROTTLE_SETTINGS = {}

 

HTTP_SERVER_ADDRESS#

HTTP server default address

defaultHTTP_SERVER_ADDRESS = "0.0.0.0:5566"

 

USER_AGENT#

Default USER-AGENT used in requests

defaultUSER_AGENT = "Okami/{}".format(okami.__version__)

 

EVENT_LOOP_POLICY#

Set a custom event loop policy object. Check Customizing the event loop policy documentation.

defaultEVENT_LOOP_POLICY = None

 

ASYNC_TIMEOUT#

Asyncio Future object timeout. Check concurrent.futures.wait documentation.

defaultASYNC_TIMEOUT = 10

 

ASYNC_SLOW_CALLBACK_DURATION#

Asyncio minimum duration in seconds of slow callbacks. Check Debug mode of asyncio documentation.

defaultASYNC_SLOW_CALLBACK_DURATION = 0.1

 

PAUSE_TIMEOUT#

Pause timeout, in case of server connection errors etc. okami pauses scraping

defaultPAUSE_TIMEOUT = 5

 

CONN_TIMEOUT#

Connection timeout

defaultCONN_TIMEOUT = 20

 

CONN_VERIFY_SSL#

SSL verification for HTTP requests

defaultCONN_VERIFY_SSL = False

 

CONN_MAX_CONCURRENT_CONNECTIONS#

Maximum number of concurrent connections to website

defaultCONN_MAX_CONCURRENT_CONNECTIONS = 5

 

CONN_MAX_CONCURRENT_REQUESTS#

Maximum number of concurrent requests to website. Effectively an async loop size.

defaultCONN_MAX_CONCURRENT_REQUESTS = 10

 

CONN_MAX_RETRIES#

Maximum number of connection retries in case of connection issues

defaultCONN_MAX_RETRIES = 5

 

CONN_MAX_HTTP_REDIRECTS#

Maximum number of HTTP redirects

defaultCONN_MAX_HTTP_REDIRECTS = 10

 

REQUEST_MAX_FAILED#

Maximum number of failed requests before okami stops

defaultREQUEST_MAX_FAILED = 50

 

REQUEST_MAX_PENDING#

Maximum number of pending requests before logging an error

defaultREQUEST_MAX_PENDING = 10

 

BASE_HTTP_MIDDLEWARE#

List of base http middleware. Should not change.

default

BASE_HTTP_MIDDLEWARE = (
    "okami.middleware.Session",
    "okami.middleware.Headers",
)

 

HTTP_MIDDLEWARE#

List of http middleware. Use to add custom handlers.

defaultHTTP_MIDDLEWARE = ()

 

BASE_SPIDER_MIDDLEWARE#

List of base spider middleware. Should not change.

defaultBASE_SPIDER_MIDDLEWARE = ()

 

SPIDER_MIDDLEWARE#

List of spider middleware. Use to add custom handlers.

defaultSPIDER_MIDDLEWARE = ()

 

BASE_STARTUP_PIPELINE#

List of base startup pipelines. Should not change.

defaultBASE_STARTUP_PIPELINE = ()

 

STARTUP_PIPELINE#

List of startup pipelines. Use to add custom handlers.

defaultSTARTUP_PIPELINE = ()

 

BASE_ITEMS_PIPELINE#

List of base items pipelines. Should not change.

defaultBASE_ITEMS_PIPELINE = ()

 

ITEMS_PIPELINE#

List of items pipelines. Use to add custom handlers.

defaultITEMS_PIPELINE = ()

 

BASE_TASKS_PIPELINE#

List of base tasks pipelines. Should not change.

defaultBASE_TASKS_PIPELINE = ()

 

TASKS_PIPELINE#

List of tasks pipelines. Use to add custom handlers.

defaultTASKS_PIPELINE = ()

 

DELTA_ENABLED#

Delta middleware enable

defaultDELTA_ENABLED = False

 

DELTA_PATH#

Delta middleware database directory. Defaults to current directory.

defaultDELTA_PATH = None