Source code for mlflow.entities.assessment_source

import warnings
from dataclasses import asdict, dataclass
from typing import Any

from mlflow.entities._mlflow_object import _MlflowObject
from mlflow.exceptions import MlflowException
from mlflow.protos.assessments_pb2 import AssessmentSource as ProtoAssessmentSource
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
from mlflow.utils.annotations import experimental


[docs]@experimental(version="2.21.0") @dataclass class AssessmentSource(_MlflowObject): """ Source of an assessment (human, LLM as a judge with GPT-4, etc). When recording an assessment, MLflow mandates providing a source information to keep track of how the assessment is conducted. Args: source_type: The type of the assessment source. Must be one of the values in the AssessmentSourceType enum or an instance of the enumerator value. source_id: An identifier for the source, e.g. user ID or LLM judge ID. If not provided, the default value "default" is used. Note: The legacy AssessmentSourceType "AI_JUDGE" is deprecated and will be resolved as "LLM_JUDGE". You will receive a warning if using this deprecated value. This legacy term will be removed in a future version of MLflow. Example: Human annotation can be represented with a source type of "HUMAN": .. code-block:: python import mlflow from mlflow.entities.assessment import AssessmentSource, AssessmentSourceType source = AssessmentSource( source_type=AssessmentSourceType.HUMAN, # or "HUMAN" source_id="bob@example.com", ) LLM-as-a-judge can be represented with a source type of "LLM_JUDGE": .. code-block:: python import mlflow from mlflow.entities.assessment import AssessmentSource, AssessmentSourceType source = AssessmentSource( source_type=AssessmentSourceType.LLM_JUDGE, # or "LLM_JUDGE" source_id="gpt-4o-mini", ) Heuristic evaluation can be represented with a source type of "CODE": .. code-block:: python import mlflow from mlflow.entities.assessment import AssessmentSource, AssessmentSourceType source = AssessmentSource( source_type=AssessmentSourceType.CODE, # or "CODE" source_id="repo/evaluation_script.py", ) To record more context about the assessment, you can use the `metadata` field of the assessment logging APIs as well. """ source_type: str source_id: str = "default" def __post_init__(self): # Perform the standardization on source_type after initialization self.source_type = AssessmentSourceType._standardize(self.source_type)
[docs] def to_dictionary(self) -> dict[str, Any]: return asdict(self)
[docs] @classmethod def from_dictionary(cls, source_dict: dict[str, Any]) -> "AssessmentSource": return cls(**source_dict)
[docs] def to_proto(self): source = ProtoAssessmentSource() source.source_type = ProtoAssessmentSource.SourceType.Value(self.source_type) if self.source_id is not None: source.source_id = self.source_id return source
[docs] @classmethod def from_proto(cls, proto): return AssessmentSource( source_type=AssessmentSourceType.from_proto(proto.source_type), source_id=proto.source_id if proto.source_id else None, )
[docs]@experimental(version="2.21.0") class AssessmentSourceType: """ Enumeration and validator for assessment source types. This class provides constants for valid assessment source types and handles validation and standardization of source type values. It supports both direct constant access and instance creation with string validation. The class automatically handles: - Case-insensitive string inputs (converts to uppercase) - Deprecation warnings for legacy values (AI_JUDGE → LLM_JUDGE) - Validation of source type values Available source types: - HUMAN: Assessment performed by a human evaluator - LLM_JUDGE: Assessment performed by an LLM-as-a-judge (e.g., GPT-4) - CODE: Assessment performed by deterministic code/heuristics - SOURCE_TYPE_UNSPECIFIED: Default when source type is not specified Note: The legacy "AI_JUDGE" type is deprecated and automatically converted to "LLM_JUDGE" with a deprecation warning. This ensures backward compatibility while encouraging migration to the new terminology. Example: Using class constants directly: .. code-block:: python from mlflow.entities.assessment import AssessmentSource, AssessmentSourceType # Direct constant usage source = AssessmentSource(source_type=AssessmentSourceType.LLM_JUDGE, source_id="gpt-4") String validation through instance creation: .. code-block:: python # String input - case insensitive source = AssessmentSource( source_type="llm_judge", # Will be standardized to "LLM_JUDGE" source_id="gpt-4", ) # Deprecated value - triggers warning source = AssessmentSource( source_type="AI_JUDGE", # Warning: converts to "LLM_JUDGE" source_id="gpt-4", ) """ SOURCE_TYPE_UNSPECIFIED = "SOURCE_TYPE_UNSPECIFIED" LLM_JUDGE = "LLM_JUDGE" AI_JUDGE = "AI_JUDGE" # Deprecated, use LLM_JUDGE instead HUMAN = "HUMAN" CODE = "CODE" _SOURCE_TYPES = [SOURCE_TYPE_UNSPECIFIED, LLM_JUDGE, HUMAN, CODE] def __init__(self, source_type: str): self._source_type = AssessmentSourceType._parse(source_type) @staticmethod def _parse(source_type: str) -> str: source_type = source_type.upper() # Backwards compatibility shim for mlflow.evaluations.AssessmentSourceType if source_type == AssessmentSourceType.AI_JUDGE: warnings.warn( "AI_JUDGE is deprecated. Use LLM_JUDGE instead.", DeprecationWarning, ) source_type = AssessmentSourceType.LLM_JUDGE if source_type not in AssessmentSourceType._SOURCE_TYPES: raise MlflowException( message=( f"Invalid assessment source type: {source_type}. " f"Valid source types: {AssessmentSourceType._SOURCE_TYPES}" ), error_code=INVALID_PARAMETER_VALUE, ) return source_type def __str__(self): return self._source_type @staticmethod def _standardize(source_type: str) -> str: return str(AssessmentSourceType(source_type))
[docs] @classmethod def from_proto(cls, proto_source_type) -> str: return ProtoAssessmentSource.SourceType.Name(proto_source_type)