Skip to content

Module arti.internal.models

None

None

View Source
from __future__ import annotations

from collections.abc import Generator, Mapping, Sequence

from copy import deepcopy

from functools import cached_property, partial

from typing import (

    TYPE_CHECKING,

    Annotated,

    Any,

    ClassVar,

    Literal,

    Optional,

    TypeVar,

    Union,

    get_args,

    get_origin,

)

from box import Box

from pydantic import BaseModel, Extra, root_validator, validator

from pydantic.fields import ModelField, Undefined

from pydantic.json import pydantic_encoder as pydantic_json_encoder

from arti.internal.type_hints import Self, is_union, lenient_issubclass

from arti.internal.utils import class_name, frozendict

if TYPE_CHECKING:

    from pydantic.typing import AbstractSetIntStr, MappingIntStrAny

    from arti.fingerprints import Fingerprint

    from arti.types import Type

def _check_types(value: Any, type_: type) -> Any:

    mismatch_error = ValueError(f"expected an instance of {type_}, got: {value}")

    if type_ is Any:

        return value

    origin = get_origin(type_)

    if origin is not None:

        args = get_args(type_)

        if origin is Annotated:

            return _check_types(value, args[0])

        if origin is Literal:

            return _check_types(value, type(args[0]))

        # NOTE: Optional[t] -> Union[t, NoneType]

        if is_union(origin):

            for subtype in args:

                try:

                    return _check_types(value, subtype)

                except ValueError:

                    pass

            raise mismatch_error

        if issubclass(origin, (dict, Mapping)):

            value = _check_types(value, origin)

            for k, v in value.items():

                _check_types(k, args[0])

                _check_types(v, args[1])

            return value

        # Variadic tuples will be handled below

        if issubclass(origin, tuple) and ... not in args:

            value = _check_types(value, origin)

            if len(value) != len(args):

                raise mismatch_error

            for i, subtype in enumerate(args):

                _check_types(value[i], subtype)

            return value

        for t in (tuple, list, set, frozenset, Sequence):

            if issubclass(origin, t):

                value = _check_types(value, origin)

                for subvalue in value:

                    _check_types(subvalue, args[0])

                return value

        if issubclass(origin, type):

            if not lenient_issubclass(value, args[0]):

                raise ValueError(f"expected a subclass of {args[0]}, got: {value}")

            return value

        if set(args) == {Any}:

            return _check_types(value, origin)  # pragma: no cover

        if isinstance(value, origin):  # Other Generic args can't really be checked generally

            return value

        raise ValueError(f"expected a instance of {origin}, got: {value}")  # pragma: no cover

    if isinstance(type_, TypeVar):

        return value  # TODO: Check __bound__, __covariant__, __contravariant__

    # Models are immutable, so we convert all Mappings to frozendicts.

    if isinstance(value, Mapping) and not isinstance(value, frozendict):

        value = frozendict(value)

    if lenient_issubclass(type_, Mapping):

        type_ = frozendict

    if not lenient_issubclass(type(value), type_):

        raise mismatch_error

    return value

_Model = TypeVar("_Model", bound="Model")

class Model(BaseModel):

    # A model can be marked _abstract_ to prevent direct instantiation, such as when it is intended

    # as a base class for other models with arbitrary data. As the subclasses of an _abstract_ model

    # have unknown fields (varying per subclass), we don't have targets to mark abstract with

    # abc.ABC nor typing.Protocol. See [1] for more context.

    #

    # 1: https://github.com/artigraph/artigraph/pull/60#discussion_r669089086

    _abstract_: ClassVar[bool] = True

    _class_key_: ClassVar[str] = class_name()

    _fingerprint_excludes_: ClassVar[Optional[frozenset[str]]] = None

    _fingerprint_includes_: ClassVar[Optional[frozenset[str]]] = None

    @classmethod

    def __init_subclass__(cls, **kwargs: Any) -> None:

        super().__init_subclass__(**kwargs)

        # Default _abstract_ to False if not set explicitly on the class. __dict__ is read-only.

        cls._abstract_ = cls.__dict__.get("_abstract_", False)

        field_names = set(cls.__fields__)

        if cls._fingerprint_excludes_ and (

            unknown_excludes := cls._fingerprint_excludes_ - field_names

        ):

            raise ValueError(f"Unknown `_fingerprint_excludes_` field(s): {unknown_excludes}")

        if cls._fingerprint_includes_ and (

            unknown_includes := cls._fingerprint_includes_ - field_names

        ):

            raise ValueError(f"Unknown `_fingerprint_includes_` field(s): {unknown_includes}")

    @root_validator(pre=True)

    @classmethod

    def _block_abstract_instance(cls, values: dict[str, Any]) -> dict[str, Any]:

        if cls._abstract_:

            raise ValueError(f"{cls} cannot be instantiated directly!")

        return values

    @validator("*", pre=True)

    @classmethod

    def _strict_types(cls, value: Any, field: ModelField) -> Any:

        """Check that the value is a stricter instance of the declared type annotation.

        Pydantic will attempt to *parse* values (eg: "5" -> 5), but we'd prefer stricter values for

        clarity and to avoid silent precision loss (eg: 5.3 -> 5).

        """

        # `field.type_` points to the *inner* type (eg: `int`->`int`; `tuple[int, ...]` -> `int`)

        # while `field.outer_type_` will (mostly) include the full spec and match the `value` we

        # received. The caveat is the `field.outer_type_` will never be wrapped in `Optional`

        # (though nested fields like `tuple[tuple[Optional[int]]]` would). Hence, we pull the

        # `field.outer_type_`, but add back the `Optional` wrapping if necessary.

        type_ = field.outer_type_

        if field.allow_none:

            type_ = Optional[type_]

        return _check_types(value, type_)

    # By default, pydantic just compares models by their dict representation, causing models of

    # different types but same fields (eg: Int8 and Int16) to be equivalent. This can be removed if

    # [1] is merged+released.

    #

    # 1: https://github.com/samuelcolvin/pydantic/pull/3066

    def __eq__(self, other: Any) -> bool:

        return self.__class__ == other.__class__ and tuple(self._iter()) == tuple(other._iter())

    def __hash__(self) -> int:

        # Override the default __hash__ to match the fingerprint, which notably excludes

        # `cached_property`s.

        #

        # If `cached_property`s are included, the hash will be different before and after caching,

        # which wrecks havoc if a model is a key in a dict (`key in mydict` will be `False`...).

        #

        # This is only safe as the models are (mostly) frozen.

        assert (key := self.fingerprint.key) is not None

        return key

    # Omitting unpassed args in repr by default

    def __repr_args__(self) -> Sequence[tuple[Optional[str], Any]]:

        return [(k, v) for k, v in super().__repr_args__() if k in self.__fields_set__]

    def __str__(self) -> str:

        return repr(self)

    class Config:

        extra = Extra.forbid

        frozen = True

        json_encoders = {frozendict: dict}

        keep_untouched = (cached_property,)

        smart_union = True

        validate_assignment = True  # Unused with frozen, unless that is overridden in subclass.

    def copy(self, *, deep: bool = False, validate: bool = True, **kwargs: Any) -> Self:

        copy = super().copy(deep=deep, **kwargs)

        if validate:

            # NOTE: We set exclude_unset=False so that all existing defaulted fields are reused (as

            # is normal `.copy` behavior).

            #

            # To reduce `repr` noise, we'll reset .__fields_set__ to those of the pre-validation copy

            # (which includes those originally set + updated).

            fields_set = copy.__fields_set__

            copy = copy.validate(

                dict(copy._iter(to_dict=False, by_alias=False, exclude_unset=False))

            )

            # Use object.__setattr__ to bypass frozen model assignment errors

            object.__setattr__(copy, "__fields_set__", set(fields_set))

            # Copy over the private attributes, which are missing after validation (since we're only

            # passing the fields).

            for name in self.__private_attributes__:

                if (value := getattr(self, name, Undefined)) is not Undefined:

                    if deep:

                        value = deepcopy(value)

                    object.__setattr__(copy, name, value)

        return copy

    @staticmethod

    def _fingerprint_json_encoder(obj: Any, encoder: Any = pydantic_json_encoder) -> Any:

        from arti.fingerprints import Fingerprint

        if isinstance(obj, Fingerprint):

            return obj.key

        if isinstance(obj, Model):

            return obj.fingerprint

        if lenient_issubclass(obj, Model):

            return obj._class_key_  # eg: View.artifact_class

        return encoder(obj)

    @property

    def fingerprint(self) -> Fingerprint:

        from arti.fingerprints import Fingerprint

        # `.json` cannot be used, even with a custom encoder, because it calls `.dict`, which

        # converts the sub-models to dicts. Instead, we want to access `.fingerprint` (in the

        # decoder).

        data = dict(

            sorted(  # Sort to ensure stability

                self._iter(

                    exclude=self._fingerprint_excludes_,

                    include=self._fingerprint_includes_,

                ),

                key=lambda kv: kv[0],

            )

        )

        json_repr = self.__config__.json_dumps(

            data,

            default=partial(self._fingerprint_json_encoder, encoder=self.__json_encoder__),

            sort_keys=True,

        )

        return Fingerprint.from_string(f"{self._class_key_}:{json_repr}")

    @classmethod

    def _get_value(

        cls,

        v: Any,

        to_dict: bool,

        by_alias: bool,

        include: Optional[Union[AbstractSetIntStr, MappingIntStrAny]],

        exclude: Optional[Union[AbstractSetIntStr, MappingIntStrAny]],

        exclude_unset: bool,

        exclude_defaults: bool,

        exclude_none: bool,

    ) -> Any:

        new = super()._get_value(

            v,

            to_dict=to_dict,

            by_alias=by_alias,

            include=include,

            exclude=exclude,

            exclude_unset=exclude_unset,

            exclude_defaults=exclude_defaults,

            exclude_none=exclude_none,

        )

        # Copying dict subclasses doesn't preserve the subclass[1]. Further, we have extra Box

        # configuration (namely frozen_box=True) we need to preserve.

        #

        # 1: https://github.com/pydantic/pydantic/issues/5225

        if isinstance(v, Box):

            return v.__class__(new, **v._Box__box_config())

        return new

    # Filter out non-fields from ._iter (and thus .dict, .json, etc), such as `@cached_property`

    # after access (which just gets cached in .__dict__).

    def _iter(self, *args: Any, **kwargs: Any) -> Generator[tuple[str, Any], None, None]:

        for key, value in super()._iter(*args, **kwargs):

            if key in self.__fields__:

                yield key, value

    @classmethod

    def _pydantic_type_system_post_field_conversion_hook_(

        cls, type_: Type, *, name: str, required: bool

    ) -> Type:

        return type_

def get_field_default(model: type[Model], field: str) -> Optional[Any]:

    return model.__fields__[field].default

Variables

TYPE_CHECKING

Functions

get_field_default

def get_field_default(
    model: 'type[Model]',
    field: 'str'
) -> 'Optional[Any]'
View Source
def get_field_default(model: type[Model], field: str) -> Optional[Any]:

    return model.__fields__[field].default

Classes

Model

class Model(
    __pydantic_self__,
    **data: Any
)
View Source
class Model(BaseModel):

    # A model can be marked _abstract_ to prevent direct instantiation, such as when it is intended

    # as a base class for other models with arbitrary data. As the subclasses of an _abstract_ model

    # have unknown fields (varying per subclass), we don't have targets to mark abstract with

    # abc.ABC nor typing.Protocol. See [1] for more context.

    #

    # 1: https://github.com/artigraph/artigraph/pull/60#discussion_r669089086

    _abstract_: ClassVar[bool] = True

    _class_key_: ClassVar[str] = class_name()

    _fingerprint_excludes_: ClassVar[Optional[frozenset[str]]] = None

    _fingerprint_includes_: ClassVar[Optional[frozenset[str]]] = None

    @classmethod

    def __init_subclass__(cls, **kwargs: Any) -> None:

        super().__init_subclass__(**kwargs)

        # Default _abstract_ to False if not set explicitly on the class. __dict__ is read-only.

        cls._abstract_ = cls.__dict__.get("_abstract_", False)

        field_names = set(cls.__fields__)

        if cls._fingerprint_excludes_ and (

            unknown_excludes := cls._fingerprint_excludes_ - field_names

        ):

            raise ValueError(f"Unknown `_fingerprint_excludes_` field(s): {unknown_excludes}")

        if cls._fingerprint_includes_ and (

            unknown_includes := cls._fingerprint_includes_ - field_names

        ):

            raise ValueError(f"Unknown `_fingerprint_includes_` field(s): {unknown_includes}")

    @root_validator(pre=True)

    @classmethod

    def _block_abstract_instance(cls, values: dict[str, Any]) -> dict[str, Any]:

        if cls._abstract_:

            raise ValueError(f"{cls} cannot be instantiated directly!")

        return values

    @validator("*", pre=True)

    @classmethod

    def _strict_types(cls, value: Any, field: ModelField) -> Any:

        """Check that the value is a stricter instance of the declared type annotation.

        Pydantic will attempt to *parse* values (eg: "5" -> 5), but we'd prefer stricter values for

        clarity and to avoid silent precision loss (eg: 5.3 -> 5).

        """

        # `field.type_` points to the *inner* type (eg: `int`->`int`; `tuple[int, ...]` -> `int`)

        # while `field.outer_type_` will (mostly) include the full spec and match the `value` we

        # received. The caveat is the `field.outer_type_` will never be wrapped in `Optional`

        # (though nested fields like `tuple[tuple[Optional[int]]]` would). Hence, we pull the

        # `field.outer_type_`, but add back the `Optional` wrapping if necessary.

        type_ = field.outer_type_

        if field.allow_none:

            type_ = Optional[type_]

        return _check_types(value, type_)

    # By default, pydantic just compares models by their dict representation, causing models of

    # different types but same fields (eg: Int8 and Int16) to be equivalent. This can be removed if

    # [1] is merged+released.

    #

    # 1: https://github.com/samuelcolvin/pydantic/pull/3066

    def __eq__(self, other: Any) -> bool:

        return self.__class__ == other.__class__ and tuple(self._iter()) == tuple(other._iter())

    def __hash__(self) -> int:

        # Override the default __hash__ to match the fingerprint, which notably excludes

        # `cached_property`s.

        #

        # If `cached_property`s are included, the hash will be different before and after caching,

        # which wrecks havoc if a model is a key in a dict (`key in mydict` will be `False`...).

        #

        # This is only safe as the models are (mostly) frozen.

        assert (key := self.fingerprint.key) is not None

        return key

    # Omitting unpassed args in repr by default

    def __repr_args__(self) -> Sequence[tuple[Optional[str], Any]]:

        return [(k, v) for k, v in super().__repr_args__() if k in self.__fields_set__]

    def __str__(self) -> str:

        return repr(self)

    class Config:

        extra = Extra.forbid

        frozen = True

        json_encoders = {frozendict: dict}

        keep_untouched = (cached_property,)

        smart_union = True

        validate_assignment = True  # Unused with frozen, unless that is overridden in subclass.

    def copy(self, *, deep: bool = False, validate: bool = True, **kwargs: Any) -> Self:

        copy = super().copy(deep=deep, **kwargs)

        if validate:

            # NOTE: We set exclude_unset=False so that all existing defaulted fields are reused (as

            # is normal `.copy` behavior).

            #

            # To reduce `repr` noise, we'll reset .__fields_set__ to those of the pre-validation copy

            # (which includes those originally set + updated).

            fields_set = copy.__fields_set__

            copy = copy.validate(

                dict(copy._iter(to_dict=False, by_alias=False, exclude_unset=False))

            )

            # Use object.__setattr__ to bypass frozen model assignment errors

            object.__setattr__(copy, "__fields_set__", set(fields_set))

            # Copy over the private attributes, which are missing after validation (since we're only

            # passing the fields).

            for name in self.__private_attributes__:

                if (value := getattr(self, name, Undefined)) is not Undefined:

                    if deep:

                        value = deepcopy(value)

                    object.__setattr__(copy, name, value)

        return copy

    @staticmethod

    def _fingerprint_json_encoder(obj: Any, encoder: Any = pydantic_json_encoder) -> Any:

        from arti.fingerprints import Fingerprint

        if isinstance(obj, Fingerprint):

            return obj.key

        if isinstance(obj, Model):

            return obj.fingerprint

        if lenient_issubclass(obj, Model):

            return obj._class_key_  # eg: View.artifact_class

        return encoder(obj)

    @property

    def fingerprint(self) -> Fingerprint:

        from arti.fingerprints import Fingerprint

        # `.json` cannot be used, even with a custom encoder, because it calls `.dict`, which

        # converts the sub-models to dicts. Instead, we want to access `.fingerprint` (in the

        # decoder).

        data = dict(

            sorted(  # Sort to ensure stability

                self._iter(

                    exclude=self._fingerprint_excludes_,

                    include=self._fingerprint_includes_,

                ),

                key=lambda kv: kv[0],

            )

        )

        json_repr = self.__config__.json_dumps(

            data,

            default=partial(self._fingerprint_json_encoder, encoder=self.__json_encoder__),

            sort_keys=True,

        )

        return Fingerprint.from_string(f"{self._class_key_}:{json_repr}")

    @classmethod

    def _get_value(

        cls,

        v: Any,

        to_dict: bool,

        by_alias: bool,

        include: Optional[Union[AbstractSetIntStr, MappingIntStrAny]],

        exclude: Optional[Union[AbstractSetIntStr, MappingIntStrAny]],

        exclude_unset: bool,

        exclude_defaults: bool,

        exclude_none: bool,

    ) -> Any:

        new = super()._get_value(

            v,

            to_dict=to_dict,

            by_alias=by_alias,

            include=include,

            exclude=exclude,

            exclude_unset=exclude_unset,

            exclude_defaults=exclude_defaults,

            exclude_none=exclude_none,

        )

        # Copying dict subclasses doesn't preserve the subclass[1]. Further, we have extra Box

        # configuration (namely frozen_box=True) we need to preserve.

        #

        # 1: https://github.com/pydantic/pydantic/issues/5225

        if isinstance(v, Box):

            return v.__class__(new, **v._Box__box_config())

        return new

    # Filter out non-fields from ._iter (and thus .dict, .json, etc), such as `@cached_property`

    # after access (which just gets cached in .__dict__).

    def _iter(self, *args: Any, **kwargs: Any) -> Generator[tuple[str, Any], None, None]:

        for key, value in super()._iter(*args, **kwargs):

            if key in self.__fields__:

                yield key, value

    @classmethod

    def _pydantic_type_system_post_field_conversion_hook_(

        cls, type_: Type, *, name: str, required: bool

    ) -> Type:

        return type_

Ancestors (in MRO)

  • pydantic.main.BaseModel
  • pydantic.utils.Representation

Descendants

  • arti.annotations.Annotation
  • arti.types.Type
  • arti.types._ContainerMixin
  • arti.types._NamedMixin
  • arti.types._TimeMixin
  • arti.types.TypeSystem
  • arti.formats.Format
  • arti.statistics.Statistic
  • arti.fingerprints.Fingerprint
  • arti.partitions.PartitionKey
  • arti.storage.StoragePartition
  • arti.storage.Storage
  • arti.artifacts.Artifact
  • arti.backends.Backend
  • arti.views.View
  • arti.versions.Version
  • arti.producers.Producer
  • arti.producers.ProducerOutput
  • arti.graphs.Graph
  • arti.graphs.GraphSnapshot
  • arti.executors.Executor
  • arti.thresholds.Threshold
  • arti.storage.google.cloud.storage._GCSMixin

Class variables

Config

Static methods

construct

def construct(
    _fields_set: Optional[ForwardRef('SetStr')] = None,
    **values: Any
) -> 'Model'

Creates a new model setting dict and fields_set from trusted or pre-validated data.

Default values are respected, but no other validation is performed. Behaves as if Config.extra = 'allow' was set since it adds all passed values

from_orm

def from_orm(
    obj: Any
) -> 'Model'

parse_file

def parse_file(
    path: Union[str, pathlib.Path],
    *,
    content_type: 'unicode' = None,
    encoding: 'unicode' = 'utf8',
    proto: pydantic.parse.Protocol = None,
    allow_pickle: bool = False
) -> 'Model'

parse_obj

def parse_obj(
    obj: Any
) -> 'Model'

parse_raw

def parse_raw(
    b: Union[str, bytes],
    *,
    content_type: 'unicode' = None,
    encoding: 'unicode' = 'utf8',
    proto: pydantic.parse.Protocol = None,
    allow_pickle: bool = False
) -> 'Model'

schema

def schema(
    by_alias: bool = True,
    ref_template: 'unicode' = '#/definitions/{model}'
) -> 'DictStrAny'

schema_json

def schema_json(
    *,
    by_alias: bool = True,
    ref_template: 'unicode' = '#/definitions/{model}',
    **dumps_kwargs: Any
) -> 'unicode'

update_forward_refs

def update_forward_refs(
    **localns: Any
) -> None

Try to update ForwardRefs on fields based on this Model, globalns and localns.

validate

def validate(
    value: Any
) -> 'Model'

Instance variables

fingerprint

Methods

copy

def copy(
    self,
    *,
    deep: 'bool' = False,
    validate: 'bool' = True,
    **kwargs: 'Any'
) -> 'Self'

Duplicate a model, optionally choose which fields to include, exclude and change.

Parameters:

Name Type Description Default
include None fields to include in new model None
exclude None fields to exclude from new model, as with values this takes precedence over include None
update None values to change/add in the new model. Note: the data is not validated before creating
the new model: you should trust this data None
deep None set to True to make a deep copy of the model None

Returns:

Type Description
None new model instance
View Source
    def copy(self, *, deep: bool = False, validate: bool = True, **kwargs: Any) -> Self:

        copy = super().copy(deep=deep, **kwargs)

        if validate:

            # NOTE: We set exclude_unset=False so that all existing defaulted fields are reused (as

            # is normal `.copy` behavior).

            #

            # To reduce `repr` noise, we'll reset .__fields_set__ to those of the pre-validation copy

            # (which includes those originally set + updated).

            fields_set = copy.__fields_set__

            copy = copy.validate(

                dict(copy._iter(to_dict=False, by_alias=False, exclude_unset=False))

            )

            # Use object.__setattr__ to bypass frozen model assignment errors

            object.__setattr__(copy, "__fields_set__", set(fields_set))

            # Copy over the private attributes, which are missing after validation (since we're only

            # passing the fields).

            for name in self.__private_attributes__:

                if (value := getattr(self, name, Undefined)) is not Undefined:

                    if deep:

                        value = deepcopy(value)

                    object.__setattr__(copy, name, value)

        return copy

dict

def dict(
    self,
    *,
    include: Union[ForwardRef('AbstractSetIntStr'), ForwardRef('MappingIntStrAny'), NoneType] = None,
    exclude: Union[ForwardRef('AbstractSetIntStr'), ForwardRef('MappingIntStrAny'), NoneType] = None,
    by_alias: bool = False,
    skip_defaults: Optional[bool] = None,
    exclude_unset: bool = False,
    exclude_defaults: bool = False,
    exclude_none: bool = False
) -> 'DictStrAny'

Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.

json

def json(
    self,
    *,
    include: Union[ForwardRef('AbstractSetIntStr'), ForwardRef('MappingIntStrAny'), NoneType] = None,
    exclude: Union[ForwardRef('AbstractSetIntStr'), ForwardRef('MappingIntStrAny'), NoneType] = None,
    by_alias: bool = False,
    skip_defaults: Optional[bool] = None,
    exclude_unset: bool = False,
    exclude_defaults: bool = False,
    exclude_none: bool = False,
    encoder: Optional[Callable[[Any], Any]] = None,
    models_as_dict: bool = True,
    **dumps_kwargs: Any
) -> 'unicode'

Generate a JSON representation of the model, include and exclude arguments as per dict().

encoder is an optional function to supply as default to json.dumps(), other arguments as per json.dumps().