Cloud inference engine — OpenAI, Anthropic, Google, and MiniMax API backends.
Classes
CloudEngine
Bases: InferenceEngine
Cloud inference via OpenAI, Anthropic, Google, and MiniMax SDKs.
Source code in src/openjarvis/engine/cloud.py
| def __init__(self) -> None:
self._openai_client: Any = None
self._anthropic_client: Any = None
self._google_client: Any = None
self._openrouter_client: Any = None
self._minimax_client: Any = None
self._codex_client: Any = None
# Gemini thought_signatures: tool_call_id -> signature bytes
self._thought_sigs: Dict[str, bytes] = {}
self._init_clients()
|
Functions
stream_full
async
stream_full(messages: Sequence[Message], *, model: str, temperature: float = 0.7, max_tokens: int = 1024, **kwargs: Any) -> AsyncIterator[StreamChunk]
Yield StreamChunks with content, tool_calls, and finish_reason.
Source code in src/openjarvis/engine/cloud.py
| async def stream_full(
self,
messages: Sequence[Message],
*,
model: str,
temperature: float = 0.7,
max_tokens: int = 1024,
**kwargs: Any,
) -> AsyncIterator[StreamChunk]:
"""Yield StreamChunks with content, tool_calls, and finish_reason."""
kw = dict(
model=model,
temperature=temperature,
max_tokens=max_tokens,
**kwargs,
)
if _is_anthropic_model(model):
async for chunk in self._stream_full_anthropic(messages, **kw):
yield chunk
elif _is_google_model(model):
async for chunk in super().stream_full(messages, **kw):
yield chunk
else:
async for chunk in self._stream_full_openai(messages, **kw):
yield chunk
|
Functions
estimate_cost
estimate_cost(model: str, prompt_tokens: int, completion_tokens: int) -> float
Estimate USD cost based on the hardcoded pricing table.
Source code in src/openjarvis/engine/cloud.py
| def estimate_cost(model: str, prompt_tokens: int, completion_tokens: int) -> float:
"""Estimate USD cost based on the hardcoded pricing table."""
# Try exact match first, then prefix match
prices = PRICING.get(model)
if prices is None:
for key, val in PRICING.items():
if model.startswith(key):
prices = val
break
if prices is None:
return 0.0
input_cost = (prompt_tokens / 1_000_000) * prices[0]
output_cost = (completion_tokens / 1_000_000) * prices[1]
return input_cost + output_cost
|