Source code for scrapy_redis.pipelines
from scrapy.utils.misc import load_object
from scrapy.utils.serialize import ScrapyJSONEncoder
from twisted.internet.threads import deferToThread
from . import connection, defaults
default_serialize = ScrapyJSONEncoder().encode
[docs]
class RedisPipeline:
"""Pushes serialized item into a redis list/queue
Settings
--------
REDIS_ITEMS_KEY : str
Redis key where to store items.
REDIS_ITEMS_SERIALIZER : str
Object path to serializer function.
"""
def __init__(
self, server, key=defaults.PIPELINE_KEY, serialize_func=default_serialize
):
"""Initialize pipeline.
Parameters
----------
server : StrictRedis
Redis client instance.
key : str
Redis key where to store items.
serialize_func : callable
Items serializer function.
"""
self.server = server
self.key = key
self.serialize = serialize_func
[docs]
@classmethod
def from_settings(cls, settings):
params = {
"server": connection.from_settings(settings),
}
if settings.get("REDIS_ITEMS_KEY"):
params["key"] = settings["REDIS_ITEMS_KEY"]
if settings.get("REDIS_ITEMS_SERIALIZER"):
params["serialize_func"] = load_object(settings["REDIS_ITEMS_SERIALIZER"])
return cls(**params)
[docs]
@classmethod
def from_crawler(cls, crawler):
return cls.from_settings(crawler.settings)
[docs]
def process_item(self, item, spider):
return deferToThread(self._process_item, item, spider)
def _process_item(self, item, spider):
key = self.item_key(item, spider)
data = self.serialize(item)
self.server.rpush(key, data)
return item
[docs]
def item_key(self, item, spider):
"""Returns redis key based on given spider.
Override this function to use a different key depending on the item
and/or spider.
"""
return self.key % {"spider": spider.name}