Aggregator.App.index_query.IndexAggregator#

class Aggregator.App.index_query.IndexAggregator(domains: List[str], cc_indexes_server: str = 'http://index.commoncrawl.org/collinfo.json', cc_servers: List[str] = [], since: datetime = datetime.datetime(1, 1, 1, 0, 0), to: datetime = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), limit: Optional[int] = None, max_retry: int = 5, prefetch_size: int = 3, sleep_step: int = 2)#
__init__(domains: List[str], cc_indexes_server: str = 'http://index.commoncrawl.org/collinfo.json', cc_servers: List[str] = [], since: datetime = datetime.datetime(1, 1, 1, 0, 0), to: datetime = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), limit: Optional[int] = None, max_retry: int = 5, prefetch_size: int = 3, sleep_step: int = 2) None#

Methods

__init__(domains[, cc_indexes_server, ...])

aclose(exc_type, exc_val[, exc_tb])

aopen()

get_all_CC_indexes(client, cdx_server)

get_captured_responses(client, cdx_server, ...)

get_number_of_pages(client, cdx_server, ...)