Source code for mlflow.metrics.genai.base

from dataclasses import dataclass
from typing import Dict, Optional, Union

from mlflow.utils.annotations import experimental


[docs]@experimental @dataclass class EvaluationExample: """ Stores the sample example during few shot learning during LLM evaluation :param input: The input provided to the model :param output: The output generated by the model :param score: The score given by the evaluator :param justification: The justification given by the evaluator :param grading_context: The grading_context provided to the evaluator for evaluation. Either a dictionary of grading context column names and grading context strings or a single grading context string. .. code-block:: python :caption: Example for creating an EvaluationExample from mlflow.metrics.base import EvaluationExample example = EvaluationExample( input="What is MLflow?", output="MLflow is an open-source platform for managing machine " "learning workflows, including experiment tracking, model packaging, " "versioning, and deployment, simplifying the ML lifecycle.", score=4, justification="The definition effectively explains what MLflow is " "its purpose, and its developer. It could be more concise for a 5-score.", grading_context={ "ground_truth": "MLflow is an open-source platform for managing " "the end-to-end machine learning (ML) lifecycle. It was developed by Databricks, " "a company that specializes in big data and machine learning solutions. MLflow is " "designed to address the challenges that data scientists and machine learning " "engineers face when developing, training, and deploying machine learning models." }, ) print(str(example)) .. code-block:: text :caption: Output Input: What is MLflow? Provided output: "MLflow is an open-source platform for managing machine " "learning workflows, including experiment tracking, model packaging, " "versioning, and deployment, simplifying the ML lifecycle." Provided ground_truth: "MLflow is an open-source platform for managing " "the end-to-end machine learning (ML) lifecycle. It was developed by Databricks, " "a company that specializes in big data and machine learning solutions. MLflow is " "designed to address the challenges that data scientists and machine learning " "engineers face when developing, training, and deploying machine learning models." Score: 4 Justification: "The definition effectively explains what MLflow is " "its purpose, and its developer. It could be more concise for a 5-score." """ input: str output: str score: float justification: str grading_context: Optional[Union[Dict[str, str], str]] = None def _format_grading_context(self): if isinstance(self.grading_context, dict): return "\n".join( [f"key: {key}\nvalue:\n{value}" for key, value in self.grading_context.items()] ) else: return self.grading_context def __str__(self) -> str: grading_context = ( "" if self.grading_context is None else "Additional information used by the model:\n" f"{self._format_grading_context()}" ) return f""" Example Input: {self.input} Example Output: {self.output} {grading_context} Example score: {self.score} Example justification: {self.justification} """