Index

optimize ¶

Optimization framework for OpenJarvis configuration tuning.

Classes¶

LLMOptimizer ¶

LLMOptimizer(search_space: SearchSpace, optimizer_model: str = 'claude-sonnet-4-6', optimizer_backend: Optional[InferenceBackend] = None)

Uses a cloud LLM to propose optimal OpenJarvis configs.

Inspired by DSPy's GEPA: uses textual feedback from execution traces rather than just scalar rewards.

Source code in src/openjarvis/learning/optimize/llm_optimizer.py

def __init__(
    self,
    search_space: SearchSpace,
    optimizer_model: str = "claude-sonnet-4-6",
    optimizer_backend: Optional[InferenceBackend] = None,
) -> None:
    self.search_space = search_space
    self.optimizer_model = optimizer_model
    self.optimizer_backend = optimizer_backend

Functions¶

propose_initial ¶

propose_initial() -> TrialConfig

Propose a reasonable starting config from the search space.

Source code in src/openjarvis/learning/optimize/llm_optimizer.py

def propose_initial(self) -> TrialConfig:
    """Propose a reasonable starting config from the search space."""
    if self.optimizer_backend is None:
        raise ValueError("optimizer_backend is required to propose configurations")

    prompt = self._build_initial_prompt()
    response = self.optimizer_backend.generate(
        prompt,
        model=self.optimizer_model,
        system="You are an expert AI systems optimizer.",
        temperature=0.7,
        max_tokens=2048,
    )
    return self._parse_config_response(response)

propose_next ¶

propose_next(history: List[TrialResult], traces: Optional[List[Trace]] = None, frontier_ids: Optional[set] = None) -> TrialConfig

Ask the LLM to propose the next config to evaluate.

Source code in src/openjarvis/learning/optimize/llm_optimizer.py

def propose_next(
    self,
    history: List[TrialResult],
    traces: Optional[List[Trace]] = None,
    frontier_ids: Optional[set] = None,
) -> TrialConfig:
    """Ask the LLM to propose the next config to evaluate."""
    if self.optimizer_backend is None:
        raise ValueError("optimizer_backend is required to propose configurations")

    prompt = self._build_propose_prompt(history, traces, frontier_ids=frontier_ids)
    response = self.optimizer_backend.generate(
        prompt,
        model=self.optimizer_model,
        system="You are an expert AI systems optimizer.",
        temperature=0.7,
        max_tokens=2048,
    )
    return self._parse_config_response(response)

analyze_trial ¶

analyze_trial(trial: TrialConfig, summary: RunSummary, traces: Optional[List[Trace]] = None, sample_scores: Optional[List[SampleScore]] = None, per_benchmark: Optional[List[BenchmarkScore]] = None) -> TrialFeedback

Ask the LLM to analyze a completed trial. Returns structured feedback.

Source code in src/openjarvis/learning/optimize/llm_optimizer.py

def analyze_trial(
    self,
    trial: TrialConfig,
    summary: RunSummary,
    traces: Optional[List[Trace]] = None,
    sample_scores: Optional[List[SampleScore]] = None,
    per_benchmark: Optional[List[BenchmarkScore]] = None,
) -> TrialFeedback:
    """Ask the LLM to analyze a completed trial. Returns structured feedback."""
    if self.optimizer_backend is None:
        raise ValueError("optimizer_backend is required to analyze trials")

    prompt = self._build_analyze_prompt(
        trial,
        summary,
        traces,
        sample_scores,
        per_benchmark,
    )
    response = self.optimizer_backend.generate(
        prompt,
        model=self.optimizer_model,
        system="You are an expert AI systems analyst.",
        temperature=0.3,
        max_tokens=2048,
    )
    return self._parse_feedback_response(response)

propose_targeted ¶

propose_targeted(history: List[TrialResult], base_config: TrialConfig, target_primitive: str, frontier_ids: Optional[set] = None) -> TrialConfig

Propose a config that only changes one primitive.

Source code in src/openjarvis/learning/optimize/llm_optimizer.py

def propose_targeted(
    self,
    history: List[TrialResult],
    base_config: TrialConfig,
    target_primitive: str,
    frontier_ids: Optional[set] = None,
) -> TrialConfig:
    """Propose a config that only changes one primitive."""
    if self.optimizer_backend is None:
        raise ValueError("optimizer_backend is required to propose configurations")

    prompt = self._build_targeted_prompt(
        history,
        base_config,
        target_primitive,
        frontier_ids,
    )
    response = self.optimizer_backend.generate(
        prompt,
        model=self.optimizer_model,
        system="You are an expert AI systems optimizer.",
        temperature=0.7,
        max_tokens=2048,
    )
    proposed = self._parse_config_response(response)

    # Enforce constraint: preserve non-target params from base_config
    merged_params = dict(base_config.params)
    for key, value in proposed.params.items():
        if key.startswith(target_primitive + ".") or key.startswith(
            target_primitive.rstrip("s") + "."
        ):
            merged_params[key] = value
    proposed.params = merged_params
    return proposed

propose_merge ¶

propose_merge(candidates: List[TrialResult], history: List[TrialResult], frontier_ids: Optional[set] = None) -> TrialConfig

Combine best aspects of frontier members into one config.

Source code in src/openjarvis/learning/optimize/llm_optimizer.py

def propose_merge(
    self,
    candidates: List[TrialResult],
    history: List[TrialResult],
    frontier_ids: Optional[set] = None,
) -> TrialConfig:
    """Combine best aspects of frontier members into one config."""
    if self.optimizer_backend is None:
        raise ValueError("optimizer_backend is required to propose configurations")

    prompt = self._build_merge_prompt(candidates, history, frontier_ids)
    response = self.optimizer_backend.generate(
        prompt,
        model=self.optimizer_model,
        system="You are an expert AI systems optimizer.",
        temperature=0.7,
        max_tokens=2048,
    )
    return self._parse_config_response(response)

OptimizationEngine ¶

OptimizationEngine(search_space: SearchSpace, llm_optimizer: LLMOptimizer, trial_runner: TrialRunner, store: Optional[OptimizationStore] = None, max_trials: int = 20, early_stop_patience: int = 5)

Orchestrates the optimize loop: propose -> evaluate -> analyze -> repeat.

Source code in src/openjarvis/learning/optimize/optimizer.py

def __init__(
    self,
    search_space: SearchSpace,
    llm_optimizer: LLMOptimizer,
    trial_runner: TrialRunner,
    store: Optional[OptimizationStore] = None,
    max_trials: int = 20,
    early_stop_patience: int = 5,
) -> None:
    self.search_space = search_space
    self.llm_optimizer = llm_optimizer
    self.trial_runner = trial_runner
    self.store = store
    self.max_trials = max_trials
    self.early_stop_patience = early_stop_patience

Functions¶

run ¶

run(progress_callback: Optional[Callable[[int, int], None]] = None) -> OptimizationRun

Execute the full optimization loop.

Generate a run_id via uuid.
llm_optimizer.propose_initial() -> first config.
Loop up to max_trials: a. trial_runner.run_trial(config) -> TrialResult b. llm_optimizer.analyze_trial(config, summary, traces) c. Update TrialResult with analysis text d. Append to history e. If store, store.save_trial(result) f. Update best_trial if accuracy improved g. Check early stopping (no improvement for patience trials) h. If not stopped, llm_optimizer.propose_next(history)
Set run status to "completed".
If store, store.save_run(optimization_run).
Return the :class:OptimizationRun.

Args: progress_callback: Optional (trial_num, max_trials) -> None called after each trial completes.

Source code in src/openjarvis/learning/optimize/optimizer.py

def run(
    self,
    progress_callback: Optional[Callable[[int, int], None]] = None,
) -> OptimizationRun:
    """Execute the full optimization loop.

    1. Generate a run_id via uuid.
    2. ``llm_optimizer.propose_initial()`` -> first config.
    3. Loop up to ``max_trials``:
       a. ``trial_runner.run_trial(config)`` -> TrialResult
       b. ``llm_optimizer.analyze_trial(config, summary, traces)``
       c. Update TrialResult with analysis text
       d. Append to history
       e. If store, ``store.save_trial(result)``
       f. Update best_trial if accuracy improved
       g. Check early stopping (no improvement for *patience* trials)
       h. If not stopped, ``llm_optimizer.propose_next(history)``
    4. Set run status to ``"completed"``.
    5. If store, ``store.save_run(optimization_run)``.
    6. Return the :class:`OptimizationRun`.

    Args:
        progress_callback: Optional ``(trial_num, max_trials) -> None``
            called after each trial completes.
    """
    run_id = uuid.uuid4().hex[:16]
    # Detect benchmark name(s) from the trial runner
    from openjarvis.learning.optimize.trial_runner import MultiBenchTrialRunner

    benchmark_name = getattr(self.trial_runner, "benchmark", "")
    benchmark_names: List[str] = []
    if isinstance(self.trial_runner, MultiBenchTrialRunner):
        benchmark_names = [s.benchmark for s in self.trial_runner.benchmark_specs]
        benchmark_name = "+".join(benchmark_names)

    optimization_run = OptimizationRun(
        run_id=run_id,
        search_space=self.search_space,
        status="running",
        optimizer_model=self.llm_optimizer.optimizer_model,
        benchmark=benchmark_name,
        benchmarks=benchmark_names,
    )

    history: List[TrialResult] = []
    best_accuracy = -1.0
    trials_without_improvement = 0

    # First config
    config = self.llm_optimizer.propose_initial()

    for trial_num in range(1, self.max_trials + 1):
        LOGGER.info(
            "Trial %d/%d (id=%s)",
            trial_num,
            self.max_trials,
            config.trial_id,
        )

        # Evaluate
        result = self.trial_runner.run_trial(config)

        # Analyze — returns TrialFeedback
        if result.summary is not None:
            feedback = self.llm_optimizer.analyze_trial(
                config,
                result.summary,
                sample_scores=result.sample_scores or None,
                per_benchmark=result.per_benchmark or None,
            )
            result.structured_feedback = feedback
            result.analysis = feedback.summary_text
        elif result.per_benchmark:
            # Multi-benchmark composite: build a synthetic summary
            # for analysis from per_benchmark data
            from openjarvis.evals.core.types import RunSummary as _RS

            synth = _RS(
                benchmark="multi",
                category="multi",
                backend="jarvis-agent",
                model=result.config.params.get("intelligence.model", ""),
                accuracy=result.accuracy,
                mean_latency_seconds=result.mean_latency_seconds,
                total_cost_usd=result.total_cost_usd,
                total_energy_joules=result.total_energy_joules,
                total_samples=result.samples_evaluated,
                scored_samples=result.samples_evaluated,
                correct=int(result.accuracy * result.samples_evaluated),
                errors=0,
                total_input_tokens=0,
                total_output_tokens=result.total_tokens,
            )
            feedback = self.llm_optimizer.analyze_trial(
                config,
                synth,
                per_benchmark=result.per_benchmark,
            )
            result.structured_feedback = feedback
            result.analysis = feedback.summary_text
        else:
            result.analysis = ""

        # Record
        history.append(result)
        optimization_run.trials.append(result)

        # Recompute Pareto frontier
        optimization_run.pareto_frontier = compute_pareto_frontier(
            history,
            optimization_run.objectives,
        )
        frontier_ids = {t.trial_id for t in optimization_run.pareto_frontier}

        # Persist trial
        if self.store is not None:
            self.store.save_trial(run_id, result)

        # Track best
        if result.accuracy > best_accuracy:
            best_accuracy = result.accuracy
            optimization_run.best_trial = result
            trials_without_improvement = 0
        else:
            trials_without_improvement += 1

        # Progress callback
        if progress_callback is not None:
            progress_callback(trial_num, self.max_trials)

        # Early stopping
        if trials_without_improvement >= self.early_stop_patience:
            LOGGER.info(
                "Early stopping after %d trials without improvement.",
                self.early_stop_patience,
            )
            break

        # Propose next (unless this was the last trial)
        if trial_num < self.max_trials:
            # Decide proposal strategy
            target_primitive = ""
            if result.structured_feedback:
                target_primitive = result.structured_feedback.target_primitive

            if trial_num % 5 == 0 and len(optimization_run.pareto_frontier) >= 2:
                # Merge frontier members periodically
                candidates = optimization_run.pareto_frontier[:3]
                config = self.llm_optimizer.propose_merge(
                    candidates,
                    history,
                    frontier_ids=frontier_ids,
                )
            elif target_primitive and trial_num > 2:
                # Targeted mutation on the suggested primitive
                config = self.llm_optimizer.propose_targeted(
                    history,
                    result.config,
                    target_primitive,
                    frontier_ids=frontier_ids,
                )
            else:
                config = self.llm_optimizer.propose_next(
                    history,
                    frontier_ids=frontier_ids,
                )

    optimization_run.status = "completed"

    if self.store is not None:
        self.store.save_run(optimization_run)

    return optimization_run

export_best_recipe ¶

export_best_recipe(run: OptimizationRun, path: Path) -> Path

Export the best trial's config as a TOML recipe file.

Args: run: A completed :class:OptimizationRun. path: Destination path for the TOML file.

Returns: The path written to.

Raises: ValueError: If there is no best trial in the run.

Source code in src/openjarvis/learning/optimize/optimizer.py

def export_best_recipe(self, run: OptimizationRun, path: Path) -> Path:
    """Export the best trial's config as a TOML recipe file.

    Args:
        run: A completed :class:`OptimizationRun`.
        path: Destination path for the TOML file.

    Returns:
        The *path* written to.

    Raises:
        ValueError: If there is no best trial in the run.
    """
    if run.best_trial is None:
        raise ValueError("No best trial to export.")

    recipe_data = self._trial_to_recipe_dict(run.best_trial)
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)

    if tomli_w is not None:
        with open(path, "wb") as fh:
            tomli_w.dump(recipe_data, fh)
    else:
        # Fallback: write TOML manually
        self._write_toml_fallback(recipe_data, path)

    run.best_recipe_path = str(path)
    return path