Source code for mlflow.genai.optimize.types

import multiprocessing
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Callable

from mlflow.entities import Feedback, Trace
from mlflow.entities.model_registry import PromptVersion
from mlflow.utils.annotations import deprecated, experimental

if TYPE_CHECKING:
    from mlflow.genai.optimize.optimizers import BasePromptOptimizer


AggregationFn = Callable[[dict[str, bool | float | str | Feedback | list[Feedback]]], float]


[docs]@deprecated(
    since="3.5.0",
)
@dataclass
class LLMParams:
    """
    Parameters for configuring a LLM Model.

    Args:
        model_name: Name of the model in the format `<provider>:/<model name>` or
            `<provider>/<model name>`. For example, "openai:/gpt-4o", "anthropic:/claude-4",
            or "openai/gpt-4o".
        base_uri: Optional base URI for the API endpoint. If not provided,
            the default endpoint for the provider will be used.
        temperature: Optional sampling temperature for the model's outputs.
            Higher values (e.g., 0.8) make the output more random,
            while lower values (e.g., 0.2) make it more deterministic.
    """

    model_name: str
    base_uri: str | None = None
    temperature: float | None = None


[docs]@deprecated(
    since="3.5.0",
)
@dataclass
class OptimizerConfig:
    """
    Configuration for prompt optimization.

    Args:
        num_instruction_candidates: Number of candidate instructions to generate
            during each optimization iteration. Higher values may lead to better
            results but increase optimization time. Default: 6
        max_few_shot_examples: Maximum number of examples to show in few-shot
            demonstrations. Default: 6
        num_threads: Number of threads to use for parallel optimization.
            Default: (number of CPU cores * 2 + 1)
        optimizer_llm: Optional LLM parameters for the teacher model. If not provided,
            the target LLM will be used as the teacher.
        algorithm: The optimization algorithm to use. When a string is provided,
            it must be one of the supported algorithms: "DSPy/MIPROv2".
            When a BasePromptOptimizer is provided, it will be used as the optimizer.
            Default: "DSPy/MIPROv2"
        verbose: Whether to show optimizer logs during optimization. Default: False
        autolog: Whether to enable automatic logging and prompt registration.
            If set to True, a MLflow run is automatically created to store optimization
            parameters, datasets and metrics, and the optimized prompt is registered.
            If set to False, the raw optimized template is returned without registration.
            Default: True
        convert_to_single_text: Whether to convert the optimized prompt to a single prompt.
            Default: True
        extract_instructions: Whether to extract instructions from the initial prompt.
            Default: True
    """

    num_instruction_candidates: int = 6
    max_few_shot_examples: int = 6
    num_threads: int = field(default_factory=lambda: (multiprocessing.cpu_count() or 1) * 2 + 1)
    optimizer_llm: LLMParams | None = None
    algorithm: str | type["BasePromptOptimizer"] = "DSPy/MIPROv2"
    verbose: bool = False
    autolog: bool = True
    convert_to_single_text: bool = True
    extract_instructions: bool = True


@experimental(version="3.5.0")
@dataclass
class EvaluationResultRecord:
    """
    The output type of `eval_fn` in the
    :py:func:`mlflow.genai.optimize.BasePromptOptimizer.optimize()` API.

    Args:
        inputs: The inputs of the evaluation.
        outputs: The outputs of the prediction function.
        expectations: The expected outputs.
        score: The aggregated score of the evaluation result. None if no scorers are provided.
        trace: The trace of the evaluation execution.
        rationales: The rationales of the evaluation result.
        individual_scores: Individual scores from each scorer (scorer_name -> score).
    """

    inputs: dict[str, Any]
    outputs: Any
    expectations: Any
    score: float | None
    trace: Trace
    rationales: dict[str, str]
    individual_scores: dict[str, float] = field(default_factory=dict)


[docs]@experimental(version="3.5.0")
@dataclass
class PromptOptimizerOutput:
    """
    An output of the :py:func:`mlflow.genai.optimize.BasePromptOptimizer.optimize()` API.

    Args:
        optimized_prompts: The optimized prompts as
            a dict (prompt template name -> prompt template).
            e.g., {"question": "What is the capital of {{country}}?"}
        initial_eval_score: The evaluation score before optimization (optional).
        final_eval_score: The evaluation score after optimization (optional).
        initial_eval_score_per_scorer: Per-scorer scores before optimization (scorer name -> score).
        final_eval_score_per_scorer: Per-scorer scores after optimization (scorer name -> score).
    """

    optimized_prompts: dict[str, str]
    initial_eval_score: float | None = None
    final_eval_score: float | None = None
    initial_eval_score_per_scorer: dict[str, float] = field(default_factory=dict)
    final_eval_score_per_scorer: dict[str, float] = field(default_factory=dict)


[docs]@experimental(version="3.5.0")
@dataclass
class PromptOptimizationResult:
    """
    Result of the :py:func:`mlflow.genai.optimize_prompts()` API.

    Args:
        optimized_prompts: The optimized prompts.
        optimizer_name: The name of the optimizer.
        initial_eval_score: The evaluation score before optimization (optional).
        final_eval_score: The evaluation score after optimization (optional).
        initial_eval_score_per_scorer: Per-scorer scores before optimization (scorer name -> score).
        final_eval_score_per_scorer: Per-scorer scores after optimization (scorer name -> score).
    """

    optimized_prompts: list[PromptVersion]
    optimizer_name: str
    initial_eval_score: float | None = None
    final_eval_score: float | None = None
    initial_eval_score_per_scorer: dict[str, float] = field(default_factory=dict)
    final_eval_score_per_scorer: dict[str, float] = field(default_factory=dict)