@click.command()
@click.argument("query", nargs=-1, required=True)
@click.option("-m", "--model", "model_name", default=None, help="Model to use.")
@click.option("-e", "--engine", "engine_key", default=None, help="Engine backend.")
@click.option(
"-t", "--temperature", default=None, type=float,
help="Sampling temperature (default: from config).",
)
@click.option(
"--max-tokens", default=None, type=int,
help="Max tokens to generate (default: from config).",
)
@click.option("--json", "output_json", is_flag=True, help="Output raw JSON result.")
@click.option("--no-stream", is_flag=True, help="Disable streaming (sync mode).")
@click.option(
"--no-context", is_flag=True,
help="Disable memory context injection.",
)
@click.option(
"-a", "--agent", "agent_name", default=None,
help="Agent to use (simple, orchestrator).",
)
@click.option(
"--tools", "tool_names", default=None,
help="Comma-separated tool names to enable (e.g. calculator,think).",
)
@click.option(
"--profile", "enable_profile", is_flag=True,
help="Print inference telemetry profile (latency, tokens, energy, IPW).",
)
def ask(
query: tuple[str, ...],
model_name: str | None,
engine_key: str | None,
temperature: float,
max_tokens: int,
output_json: bool,
no_stream: bool,
no_context: bool,
agent_name: str | None,
tool_names: str | None,
enable_profile: bool,
) -> None:
"""Ask Jarvis a question."""
console = Console(stderr=True)
query_text = " ".join(query)
wall_start = time.monotonic() if enable_profile else None
# Load config
config = load_config()
# Fall back to config values for generation params
if temperature is None:
temperature = config.intelligence.temperature
if max_tokens is None:
max_tokens = config.intelligence.max_tokens
# Set up telemetry
bus = EventBus(record_history=True)
telem_store: TelemetryStore | None = None
if config.telemetry.enabled:
try:
telem_store = TelemetryStore(config.telemetry.db_path)
telem_store.subscribe_to_bus(bus)
except Exception as exc:
logger.debug("Failed to initialize telemetry store: %s", exc)
# Discover engines
register_builtin_models()
effective_engine_key = engine_key or config.intelligence.preferred_engine or None
resolved = get_engine(config, effective_engine_key)
if resolved is None:
console.print(
"[red bold]No inference engine available.[/red bold]\n\n"
"Make sure an engine is running:\n"
" [cyan]ollama serve[/cyan] — start Ollama\n"
" [cyan]vllm serve <model>[/cyan] — start vLLM\n"
" [cyan]llama-server -m <gguf>[/cyan] — start llama.cpp\n\n"
"Or set OPENAI_API_KEY / ANTHROPIC_API_KEY for cloud inference."
)
sys.exit(1)
engine_name, engine = resolved
# Wrap engine with InstrumentedEngine for telemetry (energy + GPU metrics)
energy_monitor = None
want_energy = config.telemetry.gpu_metrics or enable_profile
if want_energy:
try:
from openjarvis.telemetry.energy_monitor import create_energy_monitor
energy_monitor = create_energy_monitor(
prefer_vendor=config.telemetry.energy_vendor or None,
)
except Exception as exc:
logger.debug("Failed to create energy monitor: %s", exc)
engine = InstrumentedEngine(engine, bus, energy_monitor=energy_monitor)
# Discover models and merge into registry
all_engines = discover_engines(config)
all_models = discover_models(all_engines)
for ek, model_ids in all_models.items():
merge_discovered_models(ek, model_ids)
# Resolve model via config fallback chain
if model_name is None:
model_name = config.intelligence.default_model
if not model_name:
# Try first available from engine
engine_models = all_models.get(engine_name, [])
if engine_models:
model_name = engine_models[0]
if not model_name:
model_name = config.intelligence.fallback_model
if not model_name:
console.print("[red]No model available on engine.[/red]")
sys.exit(1)
# Agent mode
if agent_name is not None:
parsed_tools = tool_names.split(",") if tool_names else []
try:
result = _run_agent(
agent_name, query_text, engine, model_name,
parsed_tools, config, bus, temperature, max_tokens,
)
except EngineConnectionError as exc:
console.print(f"[red]Engine error:[/red] {exc}")
console.print(hint_no_engine())
sys.exit(1)
if output_json:
click.echo(json_mod.dumps({
"content": result.content,
"turns": result.turns,
"tool_results": [
{
"tool_name": tr.tool_name,
"content": tr.content,
"success": tr.success,
}
for tr in result.tool_results
],
}, indent=2))
else:
click.echo(result.content)
if enable_profile:
_print_profile(
bus, time.monotonic() - wall_start,
engine_name, model_name, console,
)
if telem_store is not None:
try:
telem_store.close()
except Exception as exc:
logger.debug("Error closing telemetry store: %s", exc)
return
# Direct-to-engine mode (no agent)
messages = [Message(role=Role.USER, content=query_text)]
# Memory-augmented context injection
if not no_context and config.agent.context_from_memory:
try:
from openjarvis.tools.storage.context import (
ContextConfig,
inject_context,
)
backend = _get_memory_backend(config)
if backend is not None:
ctx_cfg = ContextConfig(
top_k=config.memory.context_top_k,
min_score=config.memory.context_min_score,
max_context_tokens=(
config.memory.context_max_tokens
),
)
messages = inject_context(
query_text, messages, backend,
config=ctx_cfg,
)
except Exception as exc:
logger.debug("Failed to inject memory context: %s", exc)
# Generate (InstrumentedEngine handles telemetry + energy recording)
try:
with console.status("[bold green]Generating...[/bold green]"):
result = engine.generate(
messages,
model=model_name,
temperature=temperature,
max_tokens=max_tokens,
)
except EngineConnectionError as exc:
console.print(f"[red]Engine error:[/red] {exc}")
console.print(hint_no_engine())
sys.exit(1)
# Output
if output_json:
click.echo(json_mod.dumps(result, indent=2))
else:
click.echo(result.get("content", ""))
if enable_profile:
_print_profile(
bus, time.monotonic() - wall_start,
engine_name, model_name, console,
)
# Cleanup
if energy_monitor is not None:
try:
energy_monitor.close()
except Exception as exc:
logger.debug("Error closing energy monitor: %s", exc)
if telem_store is not None:
try:
telem_store.close()
except Exception as exc:
logger.debug("Error closing telemetry store: %s", exc)