Source code for mlflow.genai.labeling.labeling

from typing import TYPE_CHECKING, Any, Iterable, Union

from mlflow.entities import Trace
from mlflow.exceptions import MlflowException
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE

if TYPE_CHECKING:
    import pandas as pd
    from databricks.agents.review_app import (
        LabelSchema as _LabelSchema,
    )
    from databricks.agents.review_app import (
        ReviewApp as _ReviewApp,
    )
    from databricks.agents.review_app.labeling import Agent as _Agent


[docs]class Agent: """The agent configuration, used for generating responses in the review app. .. note:: This functionality is only available in Databricks. Please run `pip install mlflow[databricks]` to use it. """ def __init__(self, agent: "_Agent"): self._agent = agent @property def agent_name(self) -> str: """The name of the agent.""" return self._agent.agent_name @property def model_serving_endpoint(self) -> str: """The model serving endpoint used by the agent.""" return self._agent.model_serving_endpoint
[docs]class LabelingSession: """A session for labeling items in the review app. .. note:: This functionality is only available in Databricks. Please run `pip install mlflow[databricks]` to use it. """ def __init__( self, *, name: str, assigned_users: list[str], agent: str | None, label_schemas: list[str], labeling_session_id: str, mlflow_run_id: str, review_app_id: str, experiment_id: str, url: str, enable_multi_turn_chat: bool, custom_inputs: dict[str, Any] | None, ): self._name = name self._assigned_users = assigned_users self._agent = agent self._label_schemas = label_schemas self._labeling_session_id = labeling_session_id self._mlflow_run_id = mlflow_run_id self._review_app_id = review_app_id self._experiment_id = experiment_id self._url = url self._enable_multi_turn_chat = enable_multi_turn_chat self._custom_inputs = custom_inputs @property def name(self) -> str: """The name of the labeling session.""" return self._name @property def assigned_users(self) -> list[str]: """The users assigned to label items in the session.""" return self._assigned_users @property def agent(self) -> str | None: """The agent used to generate responses for the items in the session.""" return self._agent @property def label_schemas(self) -> list[str]: """The label schemas used in the session.""" return self._label_schemas @property def labeling_session_id(self) -> str: """The unique identifier of the labeling session.""" return self._labeling_session_id @property def mlflow_run_id(self) -> str: """The MLflow run ID associated with the session.""" return self._mlflow_run_id @property def review_app_id(self) -> str: """The review app ID associated with the session.""" return self._review_app_id @property def experiment_id(self) -> str: """The experiment ID associated with the session.""" return self._experiment_id @property def url(self) -> str: """The URL of the labeling session in the review app.""" return self._url @property def enable_multi_turn_chat(self) -> bool: """Whether multi-turn chat is enabled for the session.""" return self._enable_multi_turn_chat @property def custom_inputs(self) -> dict[str, Any] | None: """Custom inputs used in the session.""" return self._custom_inputs def _get_store(self): """ Get a labeling store instance. This method is defined in order to avoid circular imports. """ from mlflow.genai.labeling.stores import _get_labeling_store return _get_labeling_store()
[docs] def add_dataset( self, dataset_name: str, record_ids: list[str] | None = None ) -> "LabelingSession": """Add a dataset to the labeling session. .. note:: This functionality is only available in Databricks. Please run `pip install mlflow[databricks]` to use it. Args: dataset_name: The name of the dataset. record_ids: Optional. The individual record ids to be added to the session. If not provided, all records in the dataset will be added. Returns: LabelingSession: The updated labeling session. """ store = self._get_store() return store.add_dataset_to_session(self, dataset_name, record_ids)
[docs] def add_traces( self, traces: Union[Iterable[Trace], Iterable[str], "pd.DataFrame"], ) -> "LabelingSession": """Add traces to the labeling session. .. note:: This functionality is only available in Databricks. Please run `pip install mlflow[databricks]` to use it. Args: traces: Can be either: a) a pandas DataFrame with a 'trace' column. The 'trace' column should contain either `mlflow.entities.Trace` objects or their json string representations. b) an iterable of `mlflow.entities.Trace` objects. c) an iterable of json string representations of `mlflow.entities.Trace` objects. Returns: LabelingSession: The updated labeling session. """ import pandas as pd if isinstance(traces, pd.DataFrame): if "trace" not in traces.columns: raise MlflowException( "traces must have a 'trace' column like the result of mlflow.search_traces()", error_code=INVALID_PARAMETER_VALUE, ) traces = traces["trace"].to_list() trace_list: list[Trace] = [] for trace in traces: if isinstance(trace, str): trace_list.append(Trace.from_json(trace)) elif isinstance(trace, Trace): trace_list.append(trace) elif trace is None: raise MlflowException( "trace cannot be None. Must be mlflow.entities.Trace or its json string " "representation.", error_code=INVALID_PARAMETER_VALUE, ) else: raise MlflowException( f"Expected mlflow.entities.Trace or json string, got {type(trace).__name__}", error_code=INVALID_PARAMETER_VALUE, ) store = self._get_store() return store.add_traces_to_session(self, trace_list)
[docs] def sync(self, to_dataset: str) -> None: """Sync the traces and expectations from the labeling session to a dataset. .. note:: This functionality is only available in Databricks. Please run `pip install mlflow[databricks]` to use it. Args: to_dataset: The name of the dataset to sync traces and expectations to. """ store = self._get_store() return store.sync_session_expectations(self, to_dataset)
[docs] def set_assigned_users(self, assigned_users: list[str]) -> "LabelingSession": """Set the assigned users for the labeling session. .. note:: This functionality is only available in Databricks. Please run `pip install mlflow[databricks]` to use it. Args: assigned_users: The list of users to assign to the session. Returns: LabelingSession: The updated labeling session. """ store = self._get_store() return store.set_session_assigned_users(self, assigned_users)
[docs]class ReviewApp: """A review app is used to collect feedback from stakeholders for a given experiment. .. note:: This functionality is only available in Databricks. Please run `pip install mlflow[databricks]` to use it. """ def __init__(self, app: "_ReviewApp"): self._app = app @property def review_app_id(self) -> str: """The ID of the review app.""" return self._app.review_app_id @property def experiment_id(self) -> str: """The ID of the experiment.""" return self._app.experiment_id @property def url(self) -> str: """The URL of the review app for stakeholders to provide feedback.""" return self._app.url @property def agents(self) -> list[Agent]: """The agents to be used to generate responses.""" return [Agent(agent) for agent in self._app.agents] @property def label_schemas(self) -> list["_LabelSchema"]: """The label schemas to be used in the review app.""" return self._app.label_schemas
[docs] def add_agent( self, *, agent_name: str, model_serving_endpoint: str, overwrite: bool = False ) -> "ReviewApp": """Add an agent to the review app to be used to generate responses. .. note:: This functionality is only available in Databricks. Please run `pip install mlflow[databricks]` to use it. Args: agent_name: The name of the agent. model_serving_endpoint: The model serving endpoint to be used by the agent. overwrite: Whether to overwrite an existing agent with the same name. Returns: ReviewApp: The updated review app. """ return ReviewApp( self._app.add_agent( agent_name=agent_name, model_serving_endpoint=model_serving_endpoint, overwrite=overwrite, ) )
[docs] def remove_agent(self, agent_name: str) -> "ReviewApp": """Remove an agent from the review app. .. note:: This functionality is only available in Databricks. Please run `pip install mlflow[databricks]` to use it. Args: agent_name: The name of the agent to remove. Returns: ReviewApp: The updated review app. """ return ReviewApp(self._app.remove_agent(agent_name))