Source code for mlflow.entities.assessment_source

import warnings
from dataclasses import asdict, dataclass
from typing import Any

from mlflow.entities._mlflow_object import _MlflowObject
from mlflow.exceptions import MlflowException
from mlflow.protos.assessments_pb2 import AssessmentSource as ProtoAssessmentSource
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
from mlflow.utils.annotations import experimental


[docs]@experimental(version="2.21.0")
@dataclass
class AssessmentSource(_MlflowObject):
    """
    Source of an assessment (human, LLM as a judge with GPT-4, etc).

    When recording an assessment, MLflow mandates providing a source information
    to keep track of how the assessment is conducted.

    Args:
        source_type: The type of the assessment source. Must be one of the values in
            the AssessmentSourceType enum or an instance of the enumerator value.
        source_id: An identifier for the source, e.g. user ID or LLM judge ID. If not
            provided, the default value "default" is used.

    Note:

    The legacy AssessmentSourceType "AI_JUDGE" is deprecated and will be resolved as
    "LLM_JUDGE". You will receive a warning if using this deprecated value. This legacy
    term will be removed in a future version of MLflow.

    Example:

    Human annotation can be represented with a source type of "HUMAN":

    .. code-block:: python

        import mlflow
        from mlflow.entities.assessment import AssessmentSource, AssessmentSourceType

        source = AssessmentSource(
            source_type=AssessmentSourceType.HUMAN,  # or "HUMAN"
            source_id="bob@example.com",
        )

    LLM-as-a-judge can be represented with a source type of "LLM_JUDGE":

    .. code-block:: python

        import mlflow
        from mlflow.entities.assessment import AssessmentSource, AssessmentSourceType

        source = AssessmentSource(
            source_type=AssessmentSourceType.LLM_JUDGE,  # or "LLM_JUDGE"
            source_id="gpt-4o-mini",
        )

    Heuristic evaluation can be represented with a source type of "CODE":

    .. code-block:: python

        import mlflow
        from mlflow.entities.assessment import AssessmentSource, AssessmentSourceType

        source = AssessmentSource(
            source_type=AssessmentSourceType.CODE,  # or "CODE"
            source_id="repo/evaluation_script.py",
        )

    To record more context about the assessment, you can use the `metadata` field of
    the assessment logging APIs as well.
    """

    source_type: str
    source_id: str = "default"

    def __post_init__(self):
        # Perform the standardization on source_type after initialization
        self.source_type = AssessmentSourceType._standardize(self.source_type)

[docs]    def to_dictionary(self) -> dict[str, Any]:
        return asdict(self)

[docs]    @classmethod
    def from_dictionary(cls, source_dict: dict[str, Any]) -> "AssessmentSource":
        return cls(**source_dict)

[docs]    def to_proto(self):
        source = ProtoAssessmentSource()
        source.source_type = ProtoAssessmentSource.SourceType.Value(self.source_type)
        if self.source_id is not None:
            source.source_id = self.source_id
        return source

[docs]    @classmethod
    def from_proto(cls, proto):
        return AssessmentSource(
            source_type=AssessmentSourceType.from_proto(proto.source_type),
            source_id=proto.source_id if proto.source_id else None,
        )


[docs]@experimental(version="2.21.0")
class AssessmentSourceType:
    """
    Enumeration and validator for assessment source types.

    This class provides constants for valid assessment source types and handles validation
    and standardization of source type values. It supports both direct constant access and
    instance creation with string validation.

    The class automatically handles:
    - Case-insensitive string inputs (converts to uppercase)
    - Deprecation warnings for legacy values (AI_JUDGE → LLM_JUDGE)
    - Validation of source type values

    Available source types:
        - HUMAN: Assessment performed by a human evaluator
        - LLM_JUDGE: Assessment performed by an LLM-as-a-judge (e.g., GPT-4)
        - CODE: Assessment performed by deterministic code/heuristics
        - SOURCE_TYPE_UNSPECIFIED: Default when source type is not specified

    Note:
        The legacy "AI_JUDGE" type is deprecated and automatically converted to "LLM_JUDGE"
        with a deprecation warning. This ensures backward compatibility while encouraging
        migration to the new terminology.

    Example:
        Using class constants directly:

        .. code-block:: python

            from mlflow.entities.assessment import AssessmentSource, AssessmentSourceType

            # Direct constant usage
            source = AssessmentSource(source_type=AssessmentSourceType.LLM_JUDGE, source_id="gpt-4")

        String validation through instance creation:

        .. code-block:: python

            # String input - case insensitive
            source = AssessmentSource(
                source_type="llm_judge",  # Will be standardized to "LLM_JUDGE"
                source_id="gpt-4",
            )

            # Deprecated value - triggers warning
            source = AssessmentSource(
                source_type="AI_JUDGE",  # Warning: converts to "LLM_JUDGE"
                source_id="gpt-4",
            )
    """

    SOURCE_TYPE_UNSPECIFIED = "SOURCE_TYPE_UNSPECIFIED"
    LLM_JUDGE = "LLM_JUDGE"
    AI_JUDGE = "AI_JUDGE"  # Deprecated, use LLM_JUDGE instead
    HUMAN = "HUMAN"
    CODE = "CODE"
    _SOURCE_TYPES = [SOURCE_TYPE_UNSPECIFIED, LLM_JUDGE, HUMAN, CODE]

    def __init__(self, source_type: str):
        self._source_type = AssessmentSourceType._parse(source_type)

    @staticmethod
    def _parse(source_type: str) -> str:
        source_type = source_type.upper()

        # Backwards compatibility shim for mlflow.evaluations.AssessmentSourceType
        if source_type == AssessmentSourceType.AI_JUDGE:
            warnings.warn(
                "AI_JUDGE is deprecated. Use LLM_JUDGE instead.",
                DeprecationWarning,
            )
            source_type = AssessmentSourceType.LLM_JUDGE

        if source_type not in AssessmentSourceType._SOURCE_TYPES:
            raise MlflowException(
                message=(
                    f"Invalid assessment source type: {source_type}. "
                    f"Valid source types: {AssessmentSourceType._SOURCE_TYPES}"
                ),
                error_code=INVALID_PARAMETER_VALUE,
            )
        return source_type

    def __str__(self):
        return self._source_type

    @staticmethod
    def _standardize(source_type: str) -> str:
        return str(AssessmentSourceType(source_type))

[docs]    @classmethod
    def from_proto(cls, proto_source_type) -> str:
        return ProtoAssessmentSource.SourceType.Name(proto_source_type)