Source code for mlflow.tracing.assessment

from typing import Any, Optional, Union

from mlflow.entities.assessment import (
    DEFAULT_FEEDBACK_NAME,
    Assessment,
    AssessmentError,
    Expectation,
    Feedback,
    FeedbackValueType,
)
from mlflow.entities.assessment_source import AssessmentSource
from mlflow.tracing.client import TracingClient
from mlflow.utils.annotations import experimental


[docs]@experimental
def log_assessment(trace_id: str, assessment: Assessment) -> Assessment:
    """
    .. important::

        This API is currently only available for `Databricks Managed MLflow <https://www.databricks.com/product/managed-mlflow>`_.

    Logs an assessment to a Trace. The assessment can be an expectation or a feedback.

    - Expectation: A label that represents the expected value for a particular operation.
        For example, an expected answer for a user question from a chatbot.
    - Feedback: A label that represents the feedback on the quality of the operation.
        Feedback can come from different sources, such as human judges, heuristic scorers,
        or LLM-as-a-Judge.

    The following code annotates a trace with a feedback provided by LLM-as-a-Judge.

    .. code-block:: python

        import mlflow
        from mlflow.entities import Feedback

        feedback = Feedback(
            name="faithfulness",
            value=0.9,
            rationale="The model is faithful to the input.",
            metadata={"model": "gpt-4o-mini"},
        )

        mlflow.log_assessment(trace_id="1234", assessment=feedback)

    The following code annotates a trace with human-provided ground truth with source information.
    When the source is not provided, the default source is set to "default" with type "HUMAN"

    .. code-block:: python

        import mlflow
        from mlflow.entities import AssessmentSource, AssessmentSourceType, Expectation

        # Specify the annotator information as a source.
        source = AssessmentSource(
            source_type=AssessmentSourceType.HUMAN,
            source_id="john@example.com",
        )

        expectation = Expectation(
            name="expected_answer",
            value=42,
            source=source,
        )

        mlflow.log_assessment(trace_id="1234", assessment=expectation)

    The expectation value can be any JSON-serializable value. For example, you may
     record the full LLM message as the expectation value.

    .. code-block:: python

        import mlflow
        from mlflow.entities.assessment import Expectation

        expectation = Expectation(
            name="expected_message",
            # Full LLM message including expected tool calls
            value={
                "role": "assistant",
                "content": "The answer is 42.",
                "tool_calls": [
                    {
                        "id": "1234",
                        "type": "function",
                        "function": {"name": "add", "arguments": "40 + 2"},
                    }
                ],
            },
        )
        mlflow.log_assessment(trace_id="1234", assessment=expectation)

    You can also log an error information during the feedback generation process. To do so,
    provide an instance of :py:class:`~mlflow.entities.AssessmentError` to the `error`
    parameter, and leave the `value` parameter as `None`.

    .. code-block:: python

        import mlflow
        from mlflow.entities import AssessmentError, Feedback

        error = AssessmentError(
            error_code="RATE_LIMIT_EXCEEDED",
            error_message="Rate limit for the judge exceeded.",
        )

        feedback = Feedback(
            trace_id="1234",
            name="faithfulness",
            error=error,
        )
        mlflow.log_assessment(trace_id="1234", assessment=feedback)


    """
    TracingClient().log_assessment(trace_id, assessment)


@experimental
def log_expectation(
    *,
    trace_id: str,
    name: str,
    value: Any,
    source: Optional[AssessmentSource] = None,
    metadata: Optional[dict[str, Any]] = None,
    span_id: Optional[str] = None,
) -> Assessment:
    """
    Logs an expectation (e.g. ground truth label) to a Trace. This API only takes keyword arguments.

    Args:
        trace_id: The ID of the trace.
        name: The name of the expectation assessment e.g., "expected_answer
        value: The value of the expectation. It can be any JSON-serializable value.
        source: The source of the expectation assessment. Must be an instance of
                :py:class:`~mlflow.entities.AssessmentSource`. If not provided,
                default to CODE source type.
        metadata: Additional metadata for the expectation.
        span_id: The ID of the span associated with the expectation, if it needs be
                associated with a specific span in the trace.

    Returns:
        :py:class:`~mlflow.entities.Assessment`: The created expectation assessment.
    """
    assessment = Expectation(
        name=name,
        source=source,
        value=value,
        metadata=metadata,
        span_id=span_id,
    )
    return TracingClient().log_assessment(trace_id, assessment)


[docs]@experimental
def update_assessment(
    trace_id: str,
    assessment_id: str,
    assessment: Assessment,
) -> Assessment:
    """
    .. important::

        This API is currently only available for `Databricks Managed MLflow <https://www.databricks.com/product/managed-mlflow>`_.

    Updates an existing expectation (ground truth) in a Trace.

    Args:
        trace_id: The ID of the trace.
        assessment_id: The ID of the expectation assessment to update.
        assessment: The updated assessment.

    Returns:
        :py:class:`~mlflow.entities.Assessment`: The updated feedback assessment.

    Example:

    The following code updates an existing expectation with a new value.
    To update other fields, provide the corresponding parameters.

    .. code-block:: python

        import mlflow
        from mlflow.entities import Expectation, ExpectationValue

        # Create an expectation with value 42.
        response = mlflow.log_assessment(
            trace_id="1234",
            assessment=Expectation(name="expected_answer", value=42),
        )
        assessment_id = response.assessment_id

        # Update the expectation with a new value 43.
        mlflow.update_assessment(
            trace_id="1234",
            assessment_id=assessment.assessment_id,
            assessment=Expectation(name="expected_answer", value=43),
        )
    """
    return TracingClient().update_assessment(
        assessment_id=assessment_id,
        trace_id=trace_id,
        assessment=assessment,
    )


[docs]@experimental
def delete_assessment(trace_id: str, assessment_id: str):
    """
    .. important::

        This API is currently only available for `Databricks Managed MLflow <https://www.databricks.com/product/managed-mlflow>`_.

    Deletes an assessment associated with a trace.

    Args:
        trace_id: The ID of the trace.
        assessment_id: The ID of the assessment to delete.
    """
    return TracingClient().delete_assessment(trace_id=trace_id, assessment_id=assessment_id)


@experimental
def log_feedback(
    *,
    trace_id: str,
    name: str = DEFAULT_FEEDBACK_NAME,
    value: Optional[FeedbackValueType] = None,
    source: Optional[AssessmentSource] = None,
    error: Optional[Union[Expectation, AssessmentError]] = None,
    rationale: Optional[str] = None,
    metadata: Optional[dict[str, Any]] = None,
    span_id: Optional[str] = None,
) -> Assessment:
    """
    Logs feedback to a Trace. This API only takes keyword arguments.

    Args:
        trace_id: The ID of the trace.
        name: The name of the feedback assessment e.g., "faithfulness". Defaults to
            "feedback" if not provided.
        value: The value of the feedback. Must be one of the following types:
            - float
            - int
            - str
            - bool
            - list of values of the same types as above
            - dict with string keys and values of the same types as above
        source: The source of the feedback assessment. Must be an instance of
                :py:class:`~mlflow.entities.AssessmentSource`. If not provided, defaults to
                CODE source type
        error: An error object representing any issues encountered while computing the
            feedback, e.g., a timeout error from an LLM judge. Accepts an exception
            object, or an :py:class:`~mlflow.entities.Expectation` object. Either
            this or `value` must be provided.
        rationale: The rationale / justification for the feedback.
        metadata: Additional metadata for the feedback.
        span_id: The ID of the span associated with the feedback, if it needs be
                associated with a specific span in the trace.

    Returns:
        :py:class:`~mlflow.entities.Assessment`: The created feedback assessment.
    """

    assessment = Feedback(
        name=name,
        source=source,
        value=value,
        error=error,
        rationale=rationale,
        metadata=metadata,
        span_id=span_id,
    )
    return TracingClient().log_assessment(trace_id, assessment)