test_config

test_config ¶

Tests for eval suite config loading and matrix expansion.

Classes¶

TestExpandSuite ¶

Functions¶

test_tools_list_not_shared ¶

test_tools_list_not_shared()

Each RunConfig should get its own tools list (no shared mutation).

Source code in src/openjarvis/evals/tests/test_config.py

def test_tools_list_not_shared(self):
    """Each RunConfig should get its own tools list (no shared mutation)."""
    suite = EvalSuiteConfig(
        models=[ModelConfig(name="m1"), ModelConfig(name="m2")],
        benchmarks=[BenchmarkConfig(name="gaia", tools=["calc"])],
    )
    configs = expand_suite(suite)
    configs[0].tools.append("extra")
    assert configs[1].tools == ["calc"]

TestCLIConfig ¶

Functions¶

test_run_config_loads_and_prints_suite_info ¶

test_run_config_loads_and_prints_suite_info(tmp_path)

Verify --config loads config and prints suite header.

We don't actually run the eval (requires backends), but we verify the config is loaded and the matrix expansion starts.

Source code in src/openjarvis/evals/tests/test_config.py

def test_run_config_loads_and_prints_suite_info(self, tmp_path):
    """Verify --config loads config and prints suite header.

    We don't actually run the eval (requires backends), but we verify
    the config is loaded and the matrix expansion starts.
    """
    from unittest.mock import patch

    from click.testing import CliRunner

    from openjarvis.evals.cli import main

    p = _write_toml(
        tmp_path,
        """\
        [meta]
        name = "test-suite"

        [[models]]
        name = "qwen3:8b"

        [[benchmarks]]
        name = "supergpqa"
    """,
    )

    runner = CliRunner()
    with patch("openjarvis.evals.cli._run_single", side_effect=Exception("mock")):
        result = runner.invoke(main, ["run", "--config", str(p)])

    # Should print suite info before failing
    assert "test-suite" in result.output
    assert "1 model(s) x 1 benchmark(s)" in result.output