Skip to content

Module arti.fingerprints

None

None

View Source
from __future__ import annotations

__path__ = __import__("pkgutil").extend_path(__path__, __name__)

import operator

from collections.abc import Callable

from functools import reduce

from typing import Optional, Union

import farmhash

from arti.internal.models import Model

from arti.internal.utils import int64, uint64

def _gen_fingerprint_binop(

    op: Callable[[int, int], int]

) -> Callable[[Fingerprint, Union[int, Fingerprint]], Fingerprint]:

    def _fingerprint_binop(self: Fingerprint, other: Union[int, Fingerprint]) -> Fingerprint:

        if isinstance(other, int):

            other = Fingerprint.from_int(other)

        if isinstance(other, Fingerprint):

            if self.key is None or other.key is None:

                return Fingerprint.empty()

            return Fingerprint(key=op(self.key, other.key))

        return NotImplemented

    return _fingerprint_binop

class Fingerprint(Model):

    """Fingerprint represents a unique identity as an int64 value.

    Using an int(64) has a number of convenient properties:

    - can be combined independent of order with XOR

    - can be stored relatively cheaply

    - empty 0 values drop out when combined (5 ^ 0 = 5)

    - is relatively cross-platform (across databases, languages, etc)

    There are two "special" Fingerprints w/ factory functions that, when combined with other

    Fingerprints:

    - `empty()`: returns `empty()`

    - `identity()`: return the other Fingerprint

    """

    key: Optional[int64]

    def combine(self, *others: Fingerprint) -> Fingerprint:

        return reduce(operator.xor, others, self)

    @classmethod

    def empty(cls) -> Fingerprint:

        """Return a Fingerprint that, when combined, will return Fingerprint.empty()"""

        return cls(key=None)

    @classmethod

    def from_int(cls, x: int, /) -> Fingerprint:

        return cls.from_int64(int64(x))

    @classmethod

    def from_int64(cls, x: int64, /) -> Fingerprint:

        return cls(key=x)

    @classmethod

    def from_string(cls, x: str, /) -> Fingerprint:

        """Fingerprint an arbitrary string.

        Fingerprints using Farmhash Fingerprint64, converted to int64 via two's complement.

        """

        return cls.from_uint64(uint64(farmhash.fingerprint64(x)))

    @classmethod

    def from_uint64(cls, x: uint64, /) -> Fingerprint:

        return cls.from_int64(int64(x))

    @classmethod

    def identity(cls) -> Fingerprint:

        """Return a Fingerprint that, when combined, will return the other Fingerprint."""

        return cls(key=int64(0))

    @property

    def is_empty(self) -> bool:

        return self.key is None

    @property

    def is_identity(self) -> bool:

        return self.key == 0

    __and__ = _gen_fingerprint_binop(operator.__and__)

    __lshift__ = _gen_fingerprint_binop(operator.__lshift__)

    __or__ = _gen_fingerprint_binop(operator.__or__)

    __rshift__ = _gen_fingerprint_binop(operator.__rshift__)

    __xor__ = _gen_fingerprint_binop(operator.__xor__)

    def __eq__(self, other: object) -> bool:

        if isinstance(other, int):

            other = Fingerprint.from_int(other)

        if isinstance(other, Fingerprint):

            return self.key == other.key

        return NotImplemented

Classes

Fingerprint

class Fingerprint(
    __pydantic_self__,
    **data: Any
)
View Source
class Fingerprint(Model):

    """Fingerprint represents a unique identity as an int64 value.

    Using an int(64) has a number of convenient properties:

    - can be combined independent of order with XOR

    - can be stored relatively cheaply

    - empty 0 values drop out when combined (5 ^ 0 = 5)

    - is relatively cross-platform (across databases, languages, etc)

    There are two "special" Fingerprints w/ factory functions that, when combined with other

    Fingerprints:

    - `empty()`: returns `empty()`

    - `identity()`: return the other Fingerprint

    """

    key: Optional[int64]

    def combine(self, *others: Fingerprint) -> Fingerprint:

        return reduce(operator.xor, others, self)

    @classmethod

    def empty(cls) -> Fingerprint:

        """Return a Fingerprint that, when combined, will return Fingerprint.empty()"""

        return cls(key=None)

    @classmethod

    def from_int(cls, x: int, /) -> Fingerprint:

        return cls.from_int64(int64(x))

    @classmethod

    def from_int64(cls, x: int64, /) -> Fingerprint:

        return cls(key=x)

    @classmethod

    def from_string(cls, x: str, /) -> Fingerprint:

        """Fingerprint an arbitrary string.

        Fingerprints using Farmhash Fingerprint64, converted to int64 via two's complement.

        """

        return cls.from_uint64(uint64(farmhash.fingerprint64(x)))

    @classmethod

    def from_uint64(cls, x: uint64, /) -> Fingerprint:

        return cls.from_int64(int64(x))

    @classmethod

    def identity(cls) -> Fingerprint:

        """Return a Fingerprint that, when combined, will return the other Fingerprint."""

        return cls(key=int64(0))

    @property

    def is_empty(self) -> bool:

        return self.key is None

    @property

    def is_identity(self) -> bool:

        return self.key == 0

    __and__ = _gen_fingerprint_binop(operator.__and__)

    __lshift__ = _gen_fingerprint_binop(operator.__lshift__)

    __or__ = _gen_fingerprint_binop(operator.__or__)

    __rshift__ = _gen_fingerprint_binop(operator.__rshift__)

    __xor__ = _gen_fingerprint_binop(operator.__xor__)

    def __eq__(self, other: object) -> bool:

        if isinstance(other, int):

            other = Fingerprint.from_int(other)

        if isinstance(other, Fingerprint):

            return self.key == other.key

        return NotImplemented

Ancestors (in MRO)

  • arti.internal.models.Model
  • pydantic.main.BaseModel
  • pydantic.utils.Representation

Class variables

Config

Static methods

construct

def construct(
    _fields_set: Optional[ForwardRef('SetStr')] = None,
    **values: Any
) -> 'Model'

Creates a new model setting dict and fields_set from trusted or pre-validated data.

Default values are respected, but no other validation is performed. Behaves as if Config.extra = 'allow' was set since it adds all passed values

empty

def empty(

) -> 'Fingerprint'

Return a Fingerprint that, when combined, will return Fingerprint.empty()

View Source
    @classmethod

    def empty(cls) -> Fingerprint:

        """Return a Fingerprint that, when combined, will return Fingerprint.empty()"""

        return cls(key=None)

from_int

def from_int(
    x: 'int',
    /
) -> 'Fingerprint'
View Source
    @classmethod

    def from_int(cls, x: int, /) -> Fingerprint:

        return cls.from_int64(int64(x))

from_int64

def from_int64(
    x: 'int64',
    /
) -> 'Fingerprint'
View Source
    @classmethod

    def from_int64(cls, x: int64, /) -> Fingerprint:

        return cls(key=x)

from_orm

def from_orm(
    obj: Any
) -> 'Model'

from_string

def from_string(
    x: 'str',
    /
) -> 'Fingerprint'

Fingerprint an arbitrary string.

Fingerprints using Farmhash Fingerprint64, converted to int64 via two's complement.

View Source
    @classmethod

    def from_string(cls, x: str, /) -> Fingerprint:

        """Fingerprint an arbitrary string.

        Fingerprints using Farmhash Fingerprint64, converted to int64 via two's complement.

        """

        return cls.from_uint64(uint64(farmhash.fingerprint64(x)))

from_uint64

def from_uint64(
    x: 'uint64',
    /
) -> 'Fingerprint'
View Source
    @classmethod

    def from_uint64(cls, x: uint64, /) -> Fingerprint:

        return cls.from_int64(int64(x))

identity

def identity(

) -> 'Fingerprint'

Return a Fingerprint that, when combined, will return the other Fingerprint.

View Source
    @classmethod

    def identity(cls) -> Fingerprint:

        """Return a Fingerprint that, when combined, will return the other Fingerprint."""

        return cls(key=int64(0))

parse_file

def parse_file(
    path: Union[str, pathlib.Path],
    *,
    content_type: 'unicode' = None,
    encoding: 'unicode' = 'utf8',
    proto: pydantic.parse.Protocol = None,
    allow_pickle: bool = False
) -> 'Model'

parse_obj

def parse_obj(
    obj: Any
) -> 'Model'

parse_raw

def parse_raw(
    b: Union[str, bytes],
    *,
    content_type: 'unicode' = None,
    encoding: 'unicode' = 'utf8',
    proto: pydantic.parse.Protocol = None,
    allow_pickle: bool = False
) -> 'Model'

schema

def schema(
    by_alias: bool = True,
    ref_template: 'unicode' = '#/definitions/{model}'
) -> 'DictStrAny'

schema_json

def schema_json(
    *,
    by_alias: bool = True,
    ref_template: 'unicode' = '#/definitions/{model}',
    **dumps_kwargs: Any
) -> 'unicode'

update_forward_refs

def update_forward_refs(
    **localns: Any
) -> None

Try to update ForwardRefs on fields based on this Model, globalns and localns.

validate

def validate(
    value: Any
) -> 'Model'

Instance variables

fingerprint
is_empty
is_identity

Methods

combine

def combine(
    self,
    *others: 'Fingerprint'
) -> 'Fingerprint'
View Source
    def combine(self, *others: Fingerprint) -> Fingerprint:

        return reduce(operator.xor, others, self)

copy

def copy(
    self,
    *,
    deep: 'bool' = False,
    validate: 'bool' = True,
    **kwargs: 'Any'
) -> 'Self'

Duplicate a model, optionally choose which fields to include, exclude and change.

Parameters:

Name Type Description Default
include None fields to include in new model None
exclude None fields to exclude from new model, as with values this takes precedence over include None
update None values to change/add in the new model. Note: the data is not validated before creating
the new model: you should trust this data None
deep None set to True to make a deep copy of the model None

Returns:

Type Description
None new model instance
View Source
    def copy(self, *, deep: bool = False, validate: bool = True, **kwargs: Any) -> Self:

        copy = super().copy(deep=deep, **kwargs)

        if validate:

            # NOTE: We set exclude_unset=False so that all existing defaulted fields are reused (as

            # is normal `.copy` behavior).

            #

            # To reduce `repr` noise, we'll reset .__fields_set__ to those of the pre-validation copy

            # (which includes those originally set + updated).

            fields_set = copy.__fields_set__

            copy = copy.validate(

                dict(copy._iter(to_dict=False, by_alias=False, exclude_unset=False))

            )

            # Use object.__setattr__ to bypass frozen model assignment errors

            object.__setattr__(copy, "__fields_set__", set(fields_set))

            # Copy over the private attributes, which are missing after validation (since we're only

            # passing the fields).

            for name in self.__private_attributes__:

                if (value := getattr(self, name, Undefined)) is not Undefined:

                    if deep:

                        value = deepcopy(value)

                    object.__setattr__(copy, name, value)

        return copy

dict

def dict(
    self,
    *,
    include: Union[ForwardRef('AbstractSetIntStr'), ForwardRef('MappingIntStrAny'), NoneType] = None,
    exclude: Union[ForwardRef('AbstractSetIntStr'), ForwardRef('MappingIntStrAny'), NoneType] = None,
    by_alias: bool = False,
    skip_defaults: Optional[bool] = None,
    exclude_unset: bool = False,
    exclude_defaults: bool = False,
    exclude_none: bool = False
) -> 'DictStrAny'

Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.

json

def json(
    self,
    *,
    include: Union[ForwardRef('AbstractSetIntStr'), ForwardRef('MappingIntStrAny'), NoneType] = None,
    exclude: Union[ForwardRef('AbstractSetIntStr'), ForwardRef('MappingIntStrAny'), NoneType] = None,
    by_alias: bool = False,
    skip_defaults: Optional[bool] = None,
    exclude_unset: bool = False,
    exclude_defaults: bool = False,
    exclude_none: bool = False,
    encoder: Optional[Callable[[Any], Any]] = None,
    models_as_dict: bool = True,
    **dumps_kwargs: Any
) -> 'unicode'

Generate a JSON representation of the model, include and exclude arguments as per dict().

encoder is an optional function to supply as default to json.dumps(), other arguments as per json.dumps().