Skip to content

Module arti.types.pandas

None

None

View Source
from __future__ import annotations

from typing import Any, cast

import numpy as np

import pandas as pd

from arti.types import List, String, Struct, Type, TypeAdapter, TypeSystem

from arti.types.numpy import numpy_type_system

# TODO: How should (multi)indexes be handled; perhaps as a "hint"?

pandas_type_system = TypeSystem(key="pandas", extends=(numpy_type_system,))

@pandas_type_system.register_adapter

class SeriesAdapter(TypeAdapter):

    artigraph = List

    system = pd.Series

    @classmethod

    def matches_artigraph(cls, type_: Type, *, hints: dict[str, Any]) -> bool:

        return (

            isinstance(type_, cls.artigraph)

            # List(element=Struct(...)) are handled by the DataFrameAdapter.

            and not isinstance(type_.element, Struct)

        )

    @classmethod

    def to_artigraph(cls, type_: Any, *, hints: dict[str, Any], type_system: TypeSystem) -> Type:

        dtype = type_.dtype

        if dtype == np.dtype("O"):

            # TODO: Should we handle empty series by defaulting to "String", but issuing

            # a warning?

            example_value = type_.iloc[0]

            if isinstance(example_value, str):

                return List(element=String())

            # TODO: Handle dicts, lists, etc.

            raise NotImplementedError(

                f"Non-string {dtype} is not supported yet, got values of: {example_value}"

            )

        return List(element=type_system.to_artigraph(dtype, hints=hints))

    @classmethod

    def matches_system(cls, type_: Any, *, hints: dict[str, Any]) -> bool:

        return isinstance(type_, cls.system)

    @classmethod

    def to_system(cls, type_: Type, *, hints: dict[str, Any], type_system: TypeSystem) -> Any:

        assert isinstance(type_, cls.artigraph)

        dtype = type_system.to_system(type_.element, hints=hints)

        return pd.Series([dtype()], dtype=dtype)

@pandas_type_system.register_adapter

class DataFrameAdapter(TypeAdapter):

    """Convert between a List of Structs and a pd.DataFrame.

    Expects a List type like:

    >>> from arti.types import Float64, Int8, List, Struct

    >>> from arti.types.pandas import pandas_type_system

    >>>

    >>> arti_type = List(element=Struct(fields={"col1": Int8(), "col2": Float64()}))

    >>> pandas_type_system.to_system(arti_type, hints={})

       col1  col2

    0     0   0.0

    """

    artigraph = List

    system = pd.DataFrame

    @classmethod

    def matches_artigraph(cls, type_: Type, *, hints: dict[str, Any]) -> bool:

        # Match Lists of Structs, but not sub-fields (eg: a column containing lists). We may need to

        # pass a `hint` to identify when we're not at the root to distinguish the main dataframe

        # from columns containing list[dict[...]] values.

        return isinstance(type_, cls.artigraph) and isinstance(type_.element, Struct)

    @classmethod

    def to_artigraph(cls, type_: Any, *, hints: dict[str, Any], type_system: TypeSystem) -> Type:

        assert isinstance(type_, cls.system)

        return List(

            element=Struct(

                fields={

                    name: cast(List, type_system.to_artigraph(type_[name], hints=hints)).element

                    for name in type_.columns

                }

            )

        )

    @classmethod

    def matches_system(cls, type_: Any, *, hints: dict[str, Any]) -> bool:

        return isinstance(type_, cls.system)

    @classmethod

    def to_system(cls, type_: Type, *, hints: dict[str, Any], type_system: TypeSystem) -> Any:

        assert isinstance(type_, cls.artigraph)

        assert isinstance(type_.element, Struct)

        # NOTE: We automatically wrap the sub-types as List(...) to match the SeriesAdapter.

        return pd.DataFrame(

            {

                name: type_system.to_system(List(element=subtype), hints=hints)

                for name, subtype in type_.element.fields.items()

            }

        )

Variables

pandas_type_system

Classes

DataFrameAdapter

class DataFrameAdapter(
    /,
    *args,
    **kwargs
)
View Source
@pandas_type_system.register_adapter

class DataFrameAdapter(TypeAdapter):

    """Convert between a List of Structs and a pd.DataFrame.

    Expects a List type like:

    >>> from arti.types import Float64, Int8, List, Struct

    >>> from arti.types.pandas import pandas_type_system

    >>>

    >>> arti_type = List(element=Struct(fields={"col1": Int8(), "col2": Float64()}))

    >>> pandas_type_system.to_system(arti_type, hints={})

       col1  col2

    0     0   0.0

    """

    artigraph = List

    system = pd.DataFrame

    @classmethod

    def matches_artigraph(cls, type_: Type, *, hints: dict[str, Any]) -> bool:

        # Match Lists of Structs, but not sub-fields (eg: a column containing lists). We may need to

        # pass a `hint` to identify when we're not at the root to distinguish the main dataframe

        # from columns containing list[dict[...]] values.

        return isinstance(type_, cls.artigraph) and isinstance(type_.element, Struct)

    @classmethod

    def to_artigraph(cls, type_: Any, *, hints: dict[str, Any], type_system: TypeSystem) -> Type:

        assert isinstance(type_, cls.system)

        return List(

            element=Struct(

                fields={

                    name: cast(List, type_system.to_artigraph(type_[name], hints=hints)).element

                    for name in type_.columns

                }

            )

        )

    @classmethod

    def matches_system(cls, type_: Any, *, hints: dict[str, Any]) -> bool:

        return isinstance(type_, cls.system)

    @classmethod

    def to_system(cls, type_: Type, *, hints: dict[str, Any], type_system: TypeSystem) -> Any:

        assert isinstance(type_, cls.artigraph)

        assert isinstance(type_.element, Struct)

        # NOTE: We automatically wrap the sub-types as List(...) to match the SeriesAdapter.

        return pd.DataFrame(

            {

                name: type_system.to_system(List(element=subtype), hints=hints)

                for name, subtype in type_.element.fields.items()

            }

        )

Ancestors (in MRO)

  • arti.types.TypeAdapter

Class variables

artigraph
key
priority
system

Static methods

matches_artigraph

def matches_artigraph(
    type_: 'Type',
    *,
    hints: 'dict[str, Any]'
) -> 'bool'
View Source
    @classmethod

    def matches_artigraph(cls, type_: Type, *, hints: dict[str, Any]) -> bool:

        # Match Lists of Structs, but not sub-fields (eg: a column containing lists). We may need to

        # pass a `hint` to identify when we're not at the root to distinguish the main dataframe

        # from columns containing list[dict[...]] values.

        return isinstance(type_, cls.artigraph) and isinstance(type_.element, Struct)

matches_system

def matches_system(
    type_: 'Any',
    *,
    hints: 'dict[str, Any]'
) -> 'bool'
View Source
    @classmethod

    def matches_system(cls, type_: Any, *, hints: dict[str, Any]) -> bool:

        return isinstance(type_, cls.system)

to_artigraph

def to_artigraph(
    type_: 'Any',
    *,
    hints: 'dict[str, Any]',
    type_system: 'TypeSystem'
) -> 'Type'
View Source
    @classmethod

    def to_artigraph(cls, type_: Any, *, hints: dict[str, Any], type_system: TypeSystem) -> Type:

        assert isinstance(type_, cls.system)

        return List(

            element=Struct(

                fields={

                    name: cast(List, type_system.to_artigraph(type_[name], hints=hints)).element

                    for name in type_.columns

                }

            )

        )

to_system

def to_system(
    type_: 'Type',
    *,
    hints: 'dict[str, Any]',
    type_system: 'TypeSystem'
) -> 'Any'
View Source
    @classmethod

    def to_system(cls, type_: Type, *, hints: dict[str, Any], type_system: TypeSystem) -> Any:

        assert isinstance(type_, cls.artigraph)

        assert isinstance(type_.element, Struct)

        # NOTE: We automatically wrap the sub-types as List(...) to match the SeriesAdapter.

        return pd.DataFrame(

            {

                name: type_system.to_system(List(element=subtype), hints=hints)

                for name, subtype in type_.element.fields.items()

            }

        )

SeriesAdapter

class SeriesAdapter(
    /,
    *args,
    **kwargs
)
View Source
@pandas_type_system.register_adapter

class SeriesAdapter(TypeAdapter):

    artigraph = List

    system = pd.Series

    @classmethod

    def matches_artigraph(cls, type_: Type, *, hints: dict[str, Any]) -> bool:

        return (

            isinstance(type_, cls.artigraph)

            # List(element=Struct(...)) are handled by the DataFrameAdapter.

            and not isinstance(type_.element, Struct)

        )

    @classmethod

    def to_artigraph(cls, type_: Any, *, hints: dict[str, Any], type_system: TypeSystem) -> Type:

        dtype = type_.dtype

        if dtype == np.dtype("O"):

            # TODO: Should we handle empty series by defaulting to "String", but issuing

            # a warning?

            example_value = type_.iloc[0]

            if isinstance(example_value, str):

                return List(element=String())

            # TODO: Handle dicts, lists, etc.

            raise NotImplementedError(

                f"Non-string {dtype} is not supported yet, got values of: {example_value}"

            )

        return List(element=type_system.to_artigraph(dtype, hints=hints))

    @classmethod

    def matches_system(cls, type_: Any, *, hints: dict[str, Any]) -> bool:

        return isinstance(type_, cls.system)

    @classmethod

    def to_system(cls, type_: Type, *, hints: dict[str, Any], type_system: TypeSystem) -> Any:

        assert isinstance(type_, cls.artigraph)

        dtype = type_system.to_system(type_.element, hints=hints)

        return pd.Series([dtype()], dtype=dtype)

Ancestors (in MRO)

  • arti.types.TypeAdapter

Class variables

artigraph
key
priority
system

Static methods

matches_artigraph

def matches_artigraph(
    type_: 'Type',
    *,
    hints: 'dict[str, Any]'
) -> 'bool'
View Source
    @classmethod

    def matches_artigraph(cls, type_: Type, *, hints: dict[str, Any]) -> bool:

        return (

            isinstance(type_, cls.artigraph)

            # List(element=Struct(...)) are handled by the DataFrameAdapter.

            and not isinstance(type_.element, Struct)

        )

matches_system

def matches_system(
    type_: 'Any',
    *,
    hints: 'dict[str, Any]'
) -> 'bool'
View Source
    @classmethod

    def matches_system(cls, type_: Any, *, hints: dict[str, Any]) -> bool:

        return isinstance(type_, cls.system)

to_artigraph

def to_artigraph(
    type_: 'Any',
    *,
    hints: 'dict[str, Any]',
    type_system: 'TypeSystem'
) -> 'Type'
View Source
    @classmethod

    def to_artigraph(cls, type_: Any, *, hints: dict[str, Any], type_system: TypeSystem) -> Type:

        dtype = type_.dtype

        if dtype == np.dtype("O"):

            # TODO: Should we handle empty series by defaulting to "String", but issuing

            # a warning?

            example_value = type_.iloc[0]

            if isinstance(example_value, str):

                return List(element=String())

            # TODO: Handle dicts, lists, etc.

            raise NotImplementedError(

                f"Non-string {dtype} is not supported yet, got values of: {example_value}"

            )

        return List(element=type_system.to_artigraph(dtype, hints=hints))

to_system

def to_system(
    type_: 'Type',
    *,
    hints: 'dict[str, Any]',
    type_system: 'TypeSystem'
) -> 'Any'
View Source
    @classmethod

    def to_system(cls, type_: Type, *, hints: dict[str, Any], type_system: TypeSystem) -> Any:

        assert isinstance(type_, cls.artigraph)

        dtype = type_system.to_system(type_.element, hints=hints)

        return pd.Series([dtype()], dtype=dtype)