Skip to content

ask

ask

jarvis ask — send a query to the assistant.

Classes

Functions

ask

ask(query: tuple[str, ...], model_name: str | None, engine_key: str | None, temperature: float, max_tokens: int, output_json: bool, no_stream: bool, no_context: bool, agent_name: str | None, tool_names: str | None, enable_profile: bool) -> None

Ask Jarvis a question.

Source code in src/openjarvis/cli/ask.py
@click.command()
@click.argument("query", nargs=-1, required=True)
@click.option("-m", "--model", "model_name", default=None, help="Model to use.")
@click.option("-e", "--engine", "engine_key", default=None, help="Engine backend.")
@click.option(
    "-t", "--temperature", default=None, type=float,
    help="Sampling temperature (default: from config).",
)
@click.option(
    "--max-tokens", default=None, type=int,
    help="Max tokens to generate (default: from config).",
)
@click.option("--json", "output_json", is_flag=True, help="Output raw JSON result.")
@click.option("--no-stream", is_flag=True, help="Disable streaming (sync mode).")
@click.option(
    "--no-context", is_flag=True,
    help="Disable memory context injection.",
)
@click.option(
    "-a", "--agent", "agent_name", default=None,
    help="Agent to use (simple, orchestrator).",
)
@click.option(
    "--tools", "tool_names", default=None,
    help="Comma-separated tool names to enable (e.g. calculator,think).",
)
@click.option(
    "--profile", "enable_profile", is_flag=True,
    help="Print inference telemetry profile (latency, tokens, energy, IPW).",
)
def ask(
    query: tuple[str, ...],
    model_name: str | None,
    engine_key: str | None,
    temperature: float,
    max_tokens: int,
    output_json: bool,
    no_stream: bool,
    no_context: bool,
    agent_name: str | None,
    tool_names: str | None,
    enable_profile: bool,
) -> None:
    """Ask Jarvis a question."""
    console = Console(stderr=True)
    query_text = " ".join(query)

    wall_start = time.monotonic() if enable_profile else None

    # Load config
    config = load_config()

    # Fall back to config values for generation params
    if temperature is None:
        temperature = config.intelligence.temperature
    if max_tokens is None:
        max_tokens = config.intelligence.max_tokens

    # Set up telemetry
    bus = EventBus(record_history=True)
    telem_store: TelemetryStore | None = None
    if config.telemetry.enabled:
        try:
            telem_store = TelemetryStore(config.telemetry.db_path)
            telem_store.subscribe_to_bus(bus)
        except Exception as exc:
            logger.debug("Failed to initialize telemetry store: %s", exc)

    # Discover engines
    register_builtin_models()

    effective_engine_key = engine_key or config.intelligence.preferred_engine or None
    resolved = get_engine(config, effective_engine_key)
    if resolved is None:
        console.print(
            "[red bold]No inference engine available.[/red bold]\n\n"
            "Make sure an engine is running:\n"
            "  [cyan]ollama serve[/cyan]          — start Ollama\n"
            "  [cyan]vllm serve <model>[/cyan]    — start vLLM\n"
            "  [cyan]llama-server -m <gguf>[/cyan] — start llama.cpp\n\n"
            "Or set OPENAI_API_KEY / ANTHROPIC_API_KEY for cloud inference."
        )
        sys.exit(1)

    engine_name, engine = resolved

    # Wrap engine with InstrumentedEngine for telemetry (energy + GPU metrics)
    energy_monitor = None
    want_energy = config.telemetry.gpu_metrics or enable_profile
    if want_energy:
        try:
            from openjarvis.telemetry.energy_monitor import create_energy_monitor

            energy_monitor = create_energy_monitor(
                prefer_vendor=config.telemetry.energy_vendor or None,
            )
        except Exception as exc:
            logger.debug("Failed to create energy monitor: %s", exc)
    engine = InstrumentedEngine(engine, bus, energy_monitor=energy_monitor)

    # Discover models and merge into registry
    all_engines = discover_engines(config)
    all_models = discover_models(all_engines)
    for ek, model_ids in all_models.items():
        merge_discovered_models(ek, model_ids)

    # Resolve model via config fallback chain
    if model_name is None:
        model_name = config.intelligence.default_model
    if not model_name:
        # Try first available from engine
        engine_models = all_models.get(engine_name, [])
        if engine_models:
            model_name = engine_models[0]
    if not model_name:
        model_name = config.intelligence.fallback_model
    if not model_name:
        console.print("[red]No model available on engine.[/red]")
        sys.exit(1)

    # Agent mode
    if agent_name is not None:
        parsed_tools = tool_names.split(",") if tool_names else []
        try:
            result = _run_agent(
                agent_name, query_text, engine, model_name,
                parsed_tools, config, bus, temperature, max_tokens,
            )
        except EngineConnectionError as exc:
            console.print(f"[red]Engine error:[/red] {exc}")
            console.print(hint_no_engine())
            sys.exit(1)

        if output_json:
            click.echo(json_mod.dumps({
                "content": result.content,
                "turns": result.turns,
                "tool_results": [
                    {
                        "tool_name": tr.tool_name,
                        "content": tr.content,
                        "success": tr.success,
                    }
                    for tr in result.tool_results
                ],
            }, indent=2))
        else:
            click.echo(result.content)

        if enable_profile:
            _print_profile(
                bus, time.monotonic() - wall_start,
                engine_name, model_name, console,
            )

        if telem_store is not None:
            try:
                telem_store.close()
            except Exception as exc:
                logger.debug("Error closing telemetry store: %s", exc)
        return

    # Direct-to-engine mode (no agent)
    messages = [Message(role=Role.USER, content=query_text)]

    # Memory-augmented context injection
    if not no_context and config.agent.context_from_memory:
        try:
            from openjarvis.tools.storage.context import (
                ContextConfig,
                inject_context,
            )
            backend = _get_memory_backend(config)
            if backend is not None:
                ctx_cfg = ContextConfig(
                    top_k=config.memory.context_top_k,
                    min_score=config.memory.context_min_score,
                    max_context_tokens=(
                        config.memory.context_max_tokens
                    ),
                )
                messages = inject_context(
                    query_text, messages, backend,
                    config=ctx_cfg,
                )
        except Exception as exc:
            logger.debug("Failed to inject memory context: %s", exc)

    # Generate (InstrumentedEngine handles telemetry + energy recording)
    try:
        with console.status("[bold green]Generating...[/bold green]"):
            result = engine.generate(
                messages,
                model=model_name,
                temperature=temperature,
                max_tokens=max_tokens,
            )
    except EngineConnectionError as exc:
        console.print(f"[red]Engine error:[/red] {exc}")
        console.print(hint_no_engine())
        sys.exit(1)

    # Output
    if output_json:
        click.echo(json_mod.dumps(result, indent=2))
    else:
        click.echo(result.get("content", ""))

    if enable_profile:
        _print_profile(
            bus, time.monotonic() - wall_start,
            engine_name, model_name, console,
        )

    # Cleanup
    if energy_monitor is not None:
        try:
            energy_monitor.close()
        except Exception as exc:
            logger.debug("Error closing energy monitor: %s", exc)
    if telem_store is not None:
        try:
            telem_store.close()
        except Exception as exc:
            logger.debug("Error closing telemetry store: %s", exc)