Source code for mlflow.genai.scheduled_scorers

from dataclasses import dataclass
from typing import Optional

from mlflow.genai.scorers.base import Scorer
from mlflow.utils.annotations import experimental

_ERROR_MSG = (
    "The `databricks-agents` package is required to use `mlflow.genai.scheduled_scorers`. "
    "Please install it with `pip install databricks-agents`."
)


[docs]@experimental(version="3.0.0") @dataclass() class ScorerScheduleConfig: """ A scheduled scorer configuration for automated monitoring of generative AI applications. Scheduled scorers are used to automatically evaluate traces logged to MLflow experiments by production applications. They are part of `Databricks Lakehouse Monitoring for GenAI <https://docs.databricks.com/aws/en/generative-ai/agent-evaluation/monitoring>`_, which helps track quality metrics like groundedness, safety, and guideline adherence alongside operational metrics like volume, latency, and cost. When configured, scheduled scorers run automatically in the background to evaluate a sample of traces based on the specified sampling rate and filter criteria. The Assessments are displayed in the Traces tab of the MLflow experiment and can be used to identify quality issues in production. Args: scorer: The scorer function to run on sampled traces. Must be either a built-in scorer (e.g., Safety, Correctness) or a function decorated with @scorer. Subclasses of Scorer are not supported. scheduled_scorer_name: The name for this scheduled scorer configuration within the experiment. This name must be unique among all scheduled scorers in the same experiment. We recommend using the scorer's name (e.g., scorer.name) for consistency. sample_rate: The fraction of traces to evaluate, between 0.0 and 1.0. For example, 0.1 means 10% of traces will be randomly selected for evaluation. filter_string: An optional MLflow search_traces compatible filter string to apply before sampling traces. Only traces matching this filter will be considered for evaluation. Uses the same syntax as mlflow.search_traces(). Example: .. code-block:: python from mlflow.genai.scorers import Safety, scorer from mlflow.genai.scheduled_scorers import ScorerScheduleConfig # Using a built-in scorer safety_config = ScorerScheduleConfig( scorer=Safety(), scheduled_scorer_name="production_safety", sample_rate=0.2, # Evaluate 20% of traces filter_string="trace.status = 'OK'", ) # Using a custom scorer @scorer def response_length(outputs): return len(str(outputs)) > 100 length_config = ScorerScheduleConfig( scorer=response_length, scheduled_scorer_name="adequate_length", sample_rate=0.1, # Evaluate 10% of traces filter_string="trace.status = 'OK'", ) Note: Scheduled scorers are executed automatically by Databricks and do not need to be manually triggered. The Assessments appear in the Traces tab of the MLflow experiment. Only traces logged directly to the experiment are monitored; traces logged to individual runs within the experiment are not evaluated. .. warning:: This API is in Beta and may change or be removed in a future release without warning. """ scorer: Scorer scheduled_scorer_name: str sample_rate: float filter_string: Optional[str] = None