Documentation for Hasher Module

module
torrentfile.hasher

Piece/File Hashers for Bittorrent meta file contents.

Classes
  • Hasher Piece hasher for Bittorrent V1 files.
  • HasherV2 Calculate the root hash and piece layers for file contents.
  • HasherHybrid Calculate root and piece hashes for creating hybrid torrent file.
Functions
  • merkle_root(blocks) Calculate the merkle root for a seq of sha256 hash digests.

torrentfile.hasher.merkle_root(blocks)

Source code in torrentfile\hasher.py
def merkle_root(blocks):
    """Calculate the merkle root for a seq of sha256 hash digests."""
    while len(blocks) > 1:
        blocks = [sha256(x + y).digest() for x, y in zip(*[iter(blocks)] * 2)]
    return blocks[0]

torrentfile.hasher.Hasher

__init__(self, paths, piece_length) special

Source code in torrentfile\hasher.py
def __init__(self, paths, piece_length):
    """Generate hashes of piece length data from filelist contents."""
    self.piece_length = piece_length
    self.paths = paths
    self.total = sum([os.path.getsize(i) for i in self.paths])
    self.index = 0
    self.current = open(self.paths[0], "rb")
    hashlog.debug(
        "Hashing v1 torrent file. Size: %s Piece Length: %s",
        humanize_bytes(self.total),
        humanize_bytes(self.piece_length),
    )

__iter__(self) special

Source code in torrentfile\hasher.py
def __iter__(self):
    """Iterate through feed pieces.

    Returns
    -------
    self : `iterator`
        Iterator for leaves/hash pieces.
    """
    return self

__next__(self) special

Source code in torrentfile\hasher.py
def __next__(self):
    """Generate piece-length pieces of data from input file list."""
    while True:
        piece = bytearray(self.piece_length)
        size = self.current.readinto(piece)
        if size == 0:
            if not self.next_file():
                raise StopIteration
        elif size < self.piece_length:
            return self._handle_partial(piece[:size])
        else:
            return sha1(piece).digest()  # nosec

next_file(self)

Source code in torrentfile\hasher.py
def next_file(self):
    """Seemlessly transition to next file in file list."""
    self.index += 1
    if self.index < len(self.paths):
        self.current.close()
        self.current = open(self.paths[self.index], "rb")
        return True
    return False

torrentfile.hasher.HasherV2

__init__(self, path, piece_length) special

Source code in torrentfile\hasher.py
def __init__(self, path, piece_length):
    """Calculate and store hash information for specific file."""
    self.path = path
    self.root = None
    self.piece_layer = None
    self.layer_hashes = []
    self.piece_length = piece_length
    self.num_blocks = piece_length // BLOCK_SIZE
    hashlog.debug(
        "Hashing partial v2 torrent file. Piece Length: %s Path: %s",
        humanize_bytes(self.piece_length),
        str(self.path),
    )

    with open(self.path, "rb") as fd:
        self.process_file(fd)

process_file(self, fd)

Source code in torrentfile\hasher.py
def process_file(self, fd):
    """Calculate hashes over 16KiB chuncks of file content.

    Parameters
    ----------
    fd : `str`
        Opened file in read mode.
    """
    while True:
        total = 0
        blocks = []
        leaf = bytearray(BLOCK_SIZE)
        # generate leaves of merkle tree

        for _ in range(self.num_blocks):
            size = fd.readinto(leaf)
            total += size
            if not size:
                break
            blocks.append(sha256(leaf[:size]).digest())

        # blocks is empty mean eof
        if not blocks:
            break
        if len(blocks) != self.num_blocks:
            # when size of file doesn't fill the last block
            if not self.layer_hashes:
                # when the there is only one block for file

                next_pow_2 = 1 << int(math.log2(total) + 1)
                remaining = ((next_pow_2 - total) // BLOCK_SIZE) + 1

            else:
                # when the file contains multiple pieces
                remaining = self.num_blocks - size
            # pad the the rest with zeroes to fill remaining space.
            padding = [bytes(HASH_SIZE) for _ in range(remaining)]
            blocks.extend(padding)
        # calculate the root hash for the merkle tree up to piece-length

        layer_hash = merkle_root(blocks)
        self.layer_hashes.append(layer_hash)
    self._calculate_root()

torrentfile.hasher.HasherHybrid

__init__(self, path, piece_length) special

Source code in torrentfile\hasher.py
def __init__(self, path, piece_length):
    """Construct Hasher class instances for each file in torrent."""
    self.path = path
    self.piece_length = piece_length
    self.pieces = []
    self.layer_hashes = []
    self.piece_layer = None
    self.root = None
    self.padding_piece = None
    self.padding_file = None
    self.amount = piece_length // BLOCK_SIZE
    hashlog.debug(
        "Hashing partial Hybrid torrent file. Piece Length: %s Path: %s",
        humanize_bytes(self.piece_length),
        str(self.path),
    )
    with open(path, "rb") as data:
        self._process_file(data)