Skip to content

test_cli_endpoint

test_cli_endpoint

--base-url/--api-key forwarding through the eval CLI plumbing.

Covers the fix for the eval-CLI endpoint gap: the flags used to be silently dropped for jarvis-direct/jarvis-agent and ignored by terminalbench-native (which hardcoded api_base="http://localhost:8000/v1").

Classes

TestBuildBackendForwardsEndpoint

Functions
test_suite_mode_scopes_endpoint_to_external_backends
test_suite_mode_scopes_endpoint_to_external_backends(mock_cls)

[backend.external] suite semantics stay hermes/openclaw-only: first_party_endpoint=False must not forward to first-party.

Source code in src/openjarvis/evals/tests/test_cli_endpoint.py
@patch("openjarvis.evals.backends.jarvis_direct.JarvisDirectBackend")
def test_suite_mode_scopes_endpoint_to_external_backends(self, mock_cls):
    """[backend.external] suite semantics stay hermes/openclaw-only:
    first_party_endpoint=False must not forward to first-party."""
    _build_backend(
        "jarvis-direct",
        "vllm",
        "orchestrator",
        [],
        base_url="http://node7:8123/v1",
        api_key="sk-k",
        first_party_endpoint=False,
    )
    kwargs = mock_cls.call_args.kwargs
    assert kwargs["base_url"] is None
    assert kwargs["api_key"] is None

TestTerminalBenchNativeApiBase

Functions
test_api_key_exported_as_openai_api_key_during_run
test_api_key_exported_as_openai_api_key_during_run(mock_cls, monkeypatch)

terminus-2 reads OPENAI_API_KEY via LiteLLM; the var must be set during harness.run() and restored afterwards.

Source code in src/openjarvis/evals/tests/test_cli_endpoint.py
@patch("openjarvis.evals.backends.terminalbench_native.TerminalBenchNativeBackend")
def test_api_key_exported_as_openai_api_key_during_run(self, mock_cls, monkeypatch):
    """terminus-2 reads OPENAI_API_KEY via LiteLLM; the var must be set
    during harness.run() and restored afterwards."""
    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
    seen: dict = {}

    def fake_run_harness(run_id):
        seen["openai_api_key"] = os.environ.get("OPENAI_API_KEY")
        return SimpleNamespace(trial_results=[])

    mock_backend = MagicMock()
    mock_backend.run_harness.side_effect = fake_run_harness
    mock_cls.return_value = mock_backend

    _run_terminalbench_native(
        _tb_config(),
        _quiet_console(),
        base_url="http://node7:8123/v1",
        api_key="sk-tb",
    )
    assert seen["openai_api_key"] == "sk-tb"
    assert "OPENAI_API_KEY" not in os.environ  # restored