cmoncrawl.processor.pipeline.streamer.StreamerFileJSON#

class cmoncrawl.processor.pipeline.streamer.StreamerFileJSON(root: Path, max_directory_size: int, max_file_size: int, pretty: bool = False)#
__init__(root: Path, max_directory_size: int, max_file_size: int, pretty: bool = False)#

Methods

__init__(root, max_directory_size, max_file_size)

clean_up()

get_file_name(metadata)

metadata_to_string(extracted_data)

stream(extracted_data, metadata)