Module bytewax.exhash
Exhash is a consistent hash that Bytewax calls internally to route data to workers.
We do not use Python's hash
because it is not consistent between
processes by default and do not want to force modifying hash behavior
in unrelated code via
PYTHONHASHSEED
.
If you need to route on a new key type, register a new version as is
done below in your own code. You must make sure that if two objects
are x == y
they also are exhash(x) == exhash(y)
.
Expand source code
"""Exhash is a consistent hash that Bytewax calls internally to
route data to workers.
We do not use Python's `hash` because it is not consistent between
processes by default and do not want to force modifying hash behavior
in unrelated code via
[`PYTHONHASHSEED`](https://docs.python.org/3/using/cmdline.html#envvar-PYTHONHASHSEED).
If you need to route on a new key type, register a new version as is
done below in your own code. You _must_ make sure that if two objects
are `x == y` they also are `exhash(x) == exhash(y)`.
"""
from functools import singledispatch
from hashlib import blake2b
@singledispatch
def exhash(key, h=None):
"""A consistent hash of a value."""
raise NotImplementedError(f"{type(key)} isn't exhash-able")
def new_hasher():
"""Build a new `hashlib` hasher object.
Override this if you want to use a different hashing method.
"""
return blake2b(digest_size=8)
@exhash.register
def _(key: list, h=None):
raise NotImplementedError("can't exhash mutable list")
@exhash.register
def _(key: set, h=None):
raise NotImplementedError("can't exhash mutable set")
@exhash.register
def _(key: dict, h=None):
raise NotImplementedError("can't exhash mutable dict")
@exhash.register
def _(key: int, h=None):
if h is None:
h = new_hasher()
h.update(key.to_bytes(key.bit_length() // 8 + 1, byteorder="little", signed=True))
return h
@exhash.register
def _(key: str, h=None):
if h is None:
h = new_hasher()
h.update(key.encode())
return h
@exhash.register
def _(key: bytes, h=None):
if h is None:
h = new_hasher()
h.update(key)
return h
@exhash.register
def _(key: tuple, h=None):
if h is None:
h = new_hasher()
for x in key:
h = exhash(x, h)
return h
@exhash.register
def _(key: frozenset, h=None):
if h is None:
h = new_hasher()
for x in sorted(key):
h = exhash(x, h)
return h
Functions
def exhash(key, h=None)
-
A consistent hash of a value.
Expand source code
@singledispatch def exhash(key, h=None): """A consistent hash of a value.""" raise NotImplementedError(f"{type(key)} isn't exhash-able")
def new_hasher()
-
Build a new
hashlib
hasher object.Override this if you want to use a different hashing method.
Expand source code
def new_hasher(): """Build a new `hashlib` hasher object. Override this if you want to use a different hashing method. """ return blake2b(digest_size=8)