@bench.command()
@click.option("-m", "--model", "model_name", default=None, help="Model to benchmark.")
@click.option("-e", "--engine", "engine_key", default=None, help="Engine backend.")
@click.option(
"-n", "--samples", "num_samples", default=10, type=int,
help="Number of samples per benchmark.",
)
@click.option(
"-b", "--benchmark", "benchmark_name", default=None,
help="Specific benchmark to run (default: all).",
)
@click.option(
"-o", "--output", "output_path", default=None, type=click.Path(),
help="Write JSONL results to file.",
)
@click.option(
"--json", "output_json", is_flag=True,
help="Output JSON summary to stdout.",
)
@click.option(
"-w", "--warmup", "warmup", default=0, type=int,
help="Number of warmup iterations before measurement.",
)
@click.option(
"--setup-energy", "setup_energy", is_flag=True,
help="Run energy monitor setup script when missing (for energy benchmark).",
)
def run(
model_name: str | None,
engine_key: str | None,
num_samples: int,
benchmark_name: str | None,
output_path: str | None,
output_json: bool,
warmup: int,
setup_energy: bool,
) -> None:
"""Run benchmarks against an inference engine."""
console = Console(stderr=True)
config = load_config()
# Import and register benchmarks
from openjarvis.bench import ensure_registered
from openjarvis.bench._stubs import BenchmarkSuite
from openjarvis.core.registry import BenchmarkRegistry
ensure_registered()
# Get engine
resolved = get_engine(config, engine_key)
if resolved is None:
console.print("[red bold]No inference engine available.[/red bold]")
sys.exit(1)
engine_name, engine = resolved
# Resolve model
if model_name is None:
models = engine.list_models()
if models:
model_name = models[0]
else:
console.print("[red]No model available on engine.[/red]")
sys.exit(1)
# Select benchmarks
if benchmark_name:
if not BenchmarkRegistry.contains(benchmark_name):
console.print(
f"[red]Unknown benchmark: {benchmark_name}. "
f"Available: {', '.join(BenchmarkRegistry.keys())}[/red]"
)
sys.exit(1)
bench_cls = BenchmarkRegistry.get(benchmark_name)
benchmarks = [bench_cls()]
else:
benchmarks = [cls() for _, cls in BenchmarkRegistry.items()]
if not benchmarks:
console.print("[yellow]No benchmarks registered.[/yellow]")
return
suite = BenchmarkSuite(benchmarks)
# Create energy monitor when running energy benchmark or when gpu_metrics enabled
needs_energy = any(b.name == "energy" for b in benchmarks)
energy_monitor = None
if config.telemetry.gpu_metrics or needs_energy:
try:
from openjarvis.telemetry.energy_monitor import create_energy_monitor
energy_monitor = create_energy_monitor(
prefer_vendor=config.telemetry.energy_vendor or None,
)
except Exception as exc:
logger.debug("Energy monitor init skipped: %s", exc)
# If energy benchmark needs monitor but none available, offer setup
if needs_energy and energy_monitor is None:
import platform
setup_script = (
Path(__file__).resolve().parents[3]
/ "scripts"
/ "setup-energy-monitor.sh"
)
is_darwin_arm = (
platform.system() == "Darwin"
and platform.machine() == "arm64"
)
extra_hint = (
"openjarvis[energy-apple]" if is_darwin_arm
else "openjarvis[gpu-metrics]"
if platform.system() == "Linux"
else "openjarvis[energy-all]"
)
extra_name = extra_hint.split("[")[1].rstrip("]")
msg = (
"[yellow]Energy monitor not available"
" — energy metrics will be zero.[/yellow]\n"
f" Install: [bold]uv sync "
f"--extra {extra_name}[/bold]\n"
)
if setup_energy and setup_script.exists():
console.print("[cyan]Running energy monitor setup...[/cyan]")
try:
subprocess.run(
[str(setup_script)],
cwd=setup_script.parent.parent,
check=True,
)
from openjarvis.telemetry.energy_monitor import create_energy_monitor
energy_monitor = create_energy_monitor(
prefer_vendor=config.telemetry.energy_vendor or None,
)
if energy_monitor is not None:
console.print("[green]Energy monitor installed.[/green]")
except (subprocess.CalledProcessError, Exception) as exc:
console.print(f"[red]Setup failed: {exc}[/red]")
console.print(msg)
else:
console.print(msg)
# Banner + configuration
_print_banner(console)
_section(console, "Configuration")
bench_names = [b.name for b in benchmarks]
config_panel = Panel(
f"[cyan]Engine:[/cyan] {engine_name}\n"
f"[cyan]Model:[/cyan] {model_name}\n"
f"[cyan]Benchmarks:[/cyan] {', '.join(bench_names)}\n"
f"[cyan]Samples:[/cyan] {num_samples}\n"
f"[cyan]Warmup:[/cyan] {warmup}",
title="[bold]Run Configuration[/bold]",
border_style="blue",
expand=False,
)
console.print(config_panel)
# Run benchmarks
_section(console, "Execution")
with console.status(
f"[bold cyan]Running {len(benchmarks)} benchmark(s)...[/bold cyan]",
):
results = suite.run_all(
engine, model_name,
num_samples=num_samples, warmup_samples=warmup,
energy_monitor=energy_monitor,
)
# Output results
if output_path:
jsonl = suite.to_jsonl(results)
with open(output_path, "w") as fh:
fh.write(jsonl + "\n")
console.print(f"[green]Results written to {output_path}[/green]")
if output_json:
summary = suite.summary(results)
click.echo(json_mod.dumps(summary, indent=2))
elif not output_path:
# Pretty-print results as Rich tables
_section(console, "Results")
for r in results:
_render_stats_table(console, r)
# Cleanup energy monitor
if energy_monitor is not None:
try:
energy_monitor.close()
except Exception as exc:
logger.debug("Energy monitor cleanup failed: %s", exc)