from typing import Any, Optional, Union
from mlflow.entities.assessment import (
DEFAULT_FEEDBACK_NAME,
Assessment,
AssessmentError,
Expectation,
Feedback,
FeedbackValueType,
)
from mlflow.entities.assessment_source import AssessmentSource
from mlflow.tracing.client import TracingClient
from mlflow.utils.annotations import experimental
[docs]@experimental
def log_assessment(trace_id: str, assessment: Assessment) -> Assessment:
"""
.. important::
This API is currently only available for `Databricks Managed MLflow <https://www.databricks.com/product/managed-mlflow>`_.
Logs an assessment to a Trace. The assessment can be an expectation or a feedback.
- Expectation: A label that represents the expected value for a particular operation.
For example, an expected answer for a user question from a chatbot.
- Feedback: A label that represents the feedback on the quality of the operation.
Feedback can come from different sources, such as human judges, heuristic scorers,
or LLM-as-a-Judge.
The following code annotates a trace with a feedback provided by LLM-as-a-Judge.
.. code-block:: python
import mlflow
from mlflow.entities import Feedback
feedback = Feedback(
name="faithfulness",
value=0.9,
rationale="The model is faithful to the input.",
metadata={"model": "gpt-4o-mini"},
)
mlflow.log_assessment(trace_id="1234", assessment=feedback)
The following code annotates a trace with human-provided ground truth with source information.
When the source is not provided, the default source is set to "default" with type "HUMAN"
.. code-block:: python
import mlflow
from mlflow.entities import AssessmentSource, AssessmentSourceType, Expectation
# Specify the annotator information as a source.
source = AssessmentSource(
source_type=AssessmentSourceType.HUMAN,
source_id="john@example.com",
)
expectation = Expectation(
name="expected_answer",
value=42,
source=source,
)
mlflow.log_assessment(trace_id="1234", assessment=expectation)
The expectation value can be any JSON-serializable value. For example, you may
record the full LLM message as the expectation value.
.. code-block:: python
import mlflow
from mlflow.entities.assessment import Expectation
expectation = Expectation(
name="expected_message",
# Full LLM message including expected tool calls
value={
"role": "assistant",
"content": "The answer is 42.",
"tool_calls": [
{
"id": "1234",
"type": "function",
"function": {"name": "add", "arguments": "40 + 2"},
}
],
},
)
mlflow.log_assessment(trace_id="1234", assessment=expectation)
You can also log an error information during the feedback generation process. To do so,
provide an instance of :py:class:`~mlflow.entities.AssessmentError` to the `error`
parameter, and leave the `value` parameter as `None`.
.. code-block:: python
import mlflow
from mlflow.entities import AssessmentError, Feedback
error = AssessmentError(
error_code="RATE_LIMIT_EXCEEDED",
error_message="Rate limit for the judge exceeded.",
)
feedback = Feedback(
trace_id="1234",
name="faithfulness",
error=error,
)
mlflow.log_assessment(trace_id="1234", assessment=feedback)
"""
TracingClient().log_assessment(trace_id, assessment)
@experimental
def log_expectation(
*,
trace_id: str,
name: str,
value: Any,
source: Optional[AssessmentSource] = None,
metadata: Optional[dict[str, Any]] = None,
span_id: Optional[str] = None,
) -> Assessment:
"""
Logs an expectation (e.g. ground truth label) to a Trace. This API only takes keyword arguments.
Args:
trace_id: The ID of the trace.
name: The name of the expectation assessment e.g., "expected_answer
value: The value of the expectation. It can be any JSON-serializable value.
source: The source of the expectation assessment. Must be an instance of
:py:class:`~mlflow.entities.AssessmentSource`. If not provided,
default to CODE source type.
metadata: Additional metadata for the expectation.
span_id: The ID of the span associated with the expectation, if it needs be
associated with a specific span in the trace.
Returns:
:py:class:`~mlflow.entities.Assessment`: The created expectation assessment.
"""
assessment = Expectation(
name=name,
source=source,
value=value,
metadata=metadata,
span_id=span_id,
)
return TracingClient().log_assessment(trace_id, assessment)
[docs]@experimental
def update_assessment(
trace_id: str,
assessment_id: str,
assessment: Assessment,
) -> Assessment:
"""
.. important::
This API is currently only available for `Databricks Managed MLflow <https://www.databricks.com/product/managed-mlflow>`_.
Updates an existing expectation (ground truth) in a Trace.
Args:
trace_id: The ID of the trace.
assessment_id: The ID of the expectation assessment to update.
assessment: The updated assessment.
Returns:
:py:class:`~mlflow.entities.Assessment`: The updated feedback assessment.
Example:
The following code updates an existing expectation with a new value.
To update other fields, provide the corresponding parameters.
.. code-block:: python
import mlflow
from mlflow.entities import Expectation, ExpectationValue
# Create an expectation with value 42.
response = mlflow.log_assessment(
trace_id="1234",
assessment=Expectation(name="expected_answer", value=42),
)
assessment_id = response.assessment_id
# Update the expectation with a new value 43.
mlflow.update_assessment(
trace_id="1234",
assessment_id=assessment.assessment_id,
assessment=Expectation(name="expected_answer", value=43),
)
"""
return TracingClient().update_assessment(
assessment_id=assessment_id,
trace_id=trace_id,
assessment=assessment,
)
[docs]@experimental
def delete_assessment(trace_id: str, assessment_id: str):
"""
.. important::
This API is currently only available for `Databricks Managed MLflow <https://www.databricks.com/product/managed-mlflow>`_.
Deletes an assessment associated with a trace.
Args:
trace_id: The ID of the trace.
assessment_id: The ID of the assessment to delete.
"""
return TracingClient().delete_assessment(trace_id=trace_id, assessment_id=assessment_id)
@experimental
def log_feedback(
*,
trace_id: str,
name: str = DEFAULT_FEEDBACK_NAME,
value: Optional[FeedbackValueType] = None,
source: Optional[AssessmentSource] = None,
error: Optional[Union[Expectation, AssessmentError]] = None,
rationale: Optional[str] = None,
metadata: Optional[dict[str, Any]] = None,
span_id: Optional[str] = None,
) -> Assessment:
"""
Logs feedback to a Trace. This API only takes keyword arguments.
Args:
trace_id: The ID of the trace.
name: The name of the feedback assessment e.g., "faithfulness". Defaults to
"feedback" if not provided.
value: The value of the feedback. Must be one of the following types:
- float
- int
- str
- bool
- list of values of the same types as above
- dict with string keys and values of the same types as above
source: The source of the feedback assessment. Must be an instance of
:py:class:`~mlflow.entities.AssessmentSource`. If not provided, defaults to
CODE source type
error: An error object representing any issues encountered while computing the
feedback, e.g., a timeout error from an LLM judge. Accepts an exception
object, or an :py:class:`~mlflow.entities.Expectation` object. Either
this or `value` must be provided.
rationale: The rationale / justification for the feedback.
metadata: Additional metadata for the feedback.
span_id: The ID of the span associated with the feedback, if it needs be
associated with a specific span in the trace.
Returns:
:py:class:`~mlflow.entities.Assessment`: The created feedback assessment.
"""
assessment = Feedback(
name=name,
source=source,
value=value,
error=error,
rationale=rationale,
metadata=metadata,
span_id=span_id,
)
return TracingClient().log_assessment(trace_id, assessment)