ask

ask ¶

jarvis ask — send a query to the assistant.

Classes¶

Functions¶

ask ¶

ask(query: tuple[str, ...], model_name: str | None, engine_key: str | None, temperature: float, max_tokens: int, output_json: bool, no_stream: bool, no_context: bool, agent_name: str | None, tool_names: str | None, enable_profile: bool) -> None

Ask Jarvis a question.

Source code in src/openjarvis/cli/ask.py

@click.command()
@click.argument("query", nargs=-1, required=True)
@click.option("-m", "--model", "model_name", default=None, help="Model to use.")
@click.option("-e", "--engine", "engine_key", default=None, help="Engine backend.")
@click.option(
    "-t",
    "--temperature",
    default=None,
    type=float,
    help="Sampling temperature (default: from config).",
)
@click.option(
    "--max-tokens",
    default=None,
    type=int,
    help="Max tokens to generate (default: from config).",
)
@click.option("--json", "output_json", is_flag=True, help="Output raw JSON result.")
@click.option("--no-stream", is_flag=True, help="Disable streaming (sync mode).")
@click.option(
    "--no-context",
    is_flag=True,
    help="Disable memory context injection.",
)
@click.option(
    "-a",
    "--agent",
    "agent_name",
    default=None,
    help="Agent to use (simple, orchestrator).",
)
@click.option(
    "--tools",
    "tool_names",
    default=None,
    help="Comma-separated tool names to enable (e.g. calculator,think).",
)
@click.option(
    "--profile",
    "enable_profile",
    is_flag=True,
    help="Print inference telemetry profile (latency, tokens, energy, IPW).",
)
def ask(
    query: tuple[str, ...],
    model_name: str | None,
    engine_key: str | None,
    temperature: float,
    max_tokens: int,
    output_json: bool,
    no_stream: bool,
    no_context: bool,
    agent_name: str | None,
    tool_names: str | None,
    enable_profile: bool,
) -> None:
    """Ask Jarvis a question."""
    console = Console(stderr=True)
    query_text = " ".join(query)

    wall_start = time.monotonic() if enable_profile else None

    # Load config
    config = load_config()

    # Track whether the user explicitly set --max-tokens
    user_set_max_tokens = max_tokens is not None

    # Fall back to config values for generation params
    if temperature is None:
        temperature = config.intelligence.temperature
    if max_tokens is None:
        max_tokens = config.intelligence.max_tokens

    # Run complexity analysis on the query
    from openjarvis.learning.routing.complexity import (
        ComplexityResult,
        adjust_tokens_for_model,
        score_complexity,
    )

    complexity_result: ComplexityResult = score_complexity(query_text)
    logger.debug(
        "Complexity analysis: score=%.3f tier=%s suggested_max_tokens=%d",
        complexity_result.score,
        complexity_result.tier,
        complexity_result.suggested_max_tokens,
    )

    # Set up telemetry
    bus = EventBus(record_history=True)
    telem_store: TelemetryStore | None = None
    if config.telemetry.enabled:
        try:
            telem_store = TelemetryStore(config.telemetry.db_path)
            telem_store.subscribe_to_bus(bus)
        except Exception as exc:
            logger.debug("Failed to initialize telemetry store: %s", exc)

    # Discover engines
    register_builtin_models()

    effective_engine_key = engine_key or config.intelligence.preferred_engine or None
    resolved = get_engine(config, effective_engine_key)
    if resolved is None:
        console.print(
            "[red bold]No inference engine available.[/red bold]\n\n"
            "Make sure an engine is running:\n"
            "  [cyan]ollama serve[/cyan]          — start Ollama\n"
            "  [cyan]vllm serve <model>[/cyan]    — start vLLM\n"
            "  [cyan]llama-server -m <gguf>[/cyan] — start llama.cpp\n\n"
            "Or set OPENAI_API_KEY / ANTHROPIC_API_KEY for cloud inference.\n\n"
            "[dim]To use a remote engine:[/dim]\n"
            "  [cyan]jarvis config set engine.ollama.host http://<remote-ip>:11434[/cyan]\n"
            "  [dim]or[/dim] [cyan]export OLLAMA_HOST=http://<remote-ip>:11434[/cyan]"
        )
        sys.exit(1)

    engine_name, engine = resolved

    # Apply security guardrails
    from openjarvis.security import setup_security

    sec = setup_security(config, engine, bus)
    engine = sec.engine

    # Wrap engine with InstrumentedEngine for telemetry (energy + GPU metrics)
    energy_monitor = None
    want_energy = config.telemetry.gpu_metrics or enable_profile
    if want_energy:
        try:
            from openjarvis.telemetry.energy_monitor import create_energy_monitor

            energy_monitor = create_energy_monitor(
                prefer_vendor=config.telemetry.energy_vendor or None,
            )
        except Exception as exc:
            logger.debug("Failed to create energy monitor: %s", exc)
    engine = InstrumentedEngine(engine, bus, energy_monitor=energy_monitor)

    # Discover models and merge into registry
    all_engines = discover_engines(config)
    all_models = discover_models(all_engines)
    for ek, model_ids in all_models.items():
        merge_discovered_models(ek, model_ids)

    # Resolve model via config fallback chain
    if model_name is None:
        model_name = config.intelligence.default_model
    if not model_name:
        # Try first available from engine
        engine_models = all_models.get(engine_name, [])
        if engine_models:
            model_name = engine_models[0]
    if not model_name:
        model_name = config.intelligence.fallback_model
    if not model_name:
        console.print("[red]No model available on engine.[/red]")
        sys.exit(1)

    # Apply complexity-suggested token budget when user didn't override.
    # Use at least the config default so we never reduce tokens below what
    # the user would have gotten without the analyzer.
    if not user_set_max_tokens:
        suggested = adjust_tokens_for_model(
            complexity_result.suggested_max_tokens,
            model_name,
        )
        max_tokens = max(suggested, config.intelligence.max_tokens)
        logger.debug(
            "Using complexity-suggested max_tokens=%d (model=%s)",
            max_tokens,
            model_name,
        )

    # Agent mode
    if agent_name is not None:
        parsed_tools = resolve_tool_names(
            tool_names,
            getattr(config.tools, "enabled", None),
            getattr(config.agent, "tools", None),
        )
        try:
            result = _run_agent(
                agent_name,
                query_text,
                engine,
                model_name,
                parsed_tools,
                config,
                bus,
                temperature,
                max_tokens,
                capability_policy=sec.capability_policy,
            )
        except EngineConnectionError as exc:
            console.print(f"[red]Engine error:[/red] {exc}")
            console.print(hint_no_engine())
            sys.exit(1)

        if output_json:
            click.echo(
                json_mod.dumps(
                    {
                        "content": result.content,
                        "turns": result.turns,
                        "tool_results": [
                            {
                                "tool_name": tr.tool_name,
                                "content": tr.content,
                                "success": tr.success,
                            }
                            for tr in result.tool_results
                        ],
                    },
                    indent=2,
                )
            )
        else:
            click.echo(result.content)

        if enable_profile:
            _print_profile(
                bus,
                time.monotonic() - wall_start,
                engine_name,
                model_name,
                console,
                complexity_result=complexity_result,
            )

        if telem_store is not None:
            try:
                telem_store.close()
            except Exception as exc:
                logger.debug("Error closing telemetry store: %s", exc)
        return

    # Direct-to-engine mode (no agent)
    messages = [Message(role=Role.USER, content=query_text)]

    # Memory-augmented context injection
    if not no_context and config.agent.context_from_memory:
        try:
            from openjarvis.tools.storage.context import (
                ContextConfig,
                inject_context,
            )

            backend = _get_memory_backend(config)
            if backend is not None:
                ctx_cfg = ContextConfig(
                    top_k=config.memory.context_top_k,
                    min_score=config.memory.context_min_score,
                    max_context_tokens=(config.memory.context_max_tokens),
                )
                messages = inject_context(
                    query_text,
                    messages,
                    backend,
                    config=ctx_cfg,
                )
        except Exception as exc:
            logger.debug("Failed to inject memory context: %s", exc)

    # Generate (InstrumentedEngine handles telemetry + energy recording)
    try:
        with console.status("[bold green]Generating...[/bold green]"):
            result = engine.generate(
                messages,
                model=model_name,
                temperature=temperature,
                max_tokens=max_tokens,
            )
    except EngineConnectionError as exc:
        console.print(f"[red]Engine error:[/red] {exc}")
        console.print(hint_no_engine())
        sys.exit(1)

    # Output
    if output_json:
        click.echo(json_mod.dumps(result, indent=2))
    else:
        click.echo(result.get("content", ""))

    if enable_profile:
        _print_profile(
            bus,
            time.monotonic() - wall_start,
            engine_name,
            model_name,
            console,
            complexity_result=complexity_result,
        )

    # Cleanup
    if energy_monitor is not None:
        try:
            energy_monitor.close()
        except Exception as exc:
            logger.debug("Error closing energy monitor: %s", exc)
    if telem_store is not None:
        try:
            telem_store.close()
        except Exception as exc:
            logger.debug("Error closing telemetry store: %s", exc)