ask

ask ¶

jarvis ask — send a query to the assistant.

Classes¶

Functions¶

ask ¶

ask(ctx: Context, query: tuple[str, ...], model_name: str | None, engine_key: str | None, temperature: float, max_tokens: int, output_json: bool, no_stream: bool, no_context: bool, agent_name: str | None, tool_names: str | None, enable_profile: bool, research_mode: bool, knowledge_db: str | None, persona_name: str | None, image_paths: tuple[str, ...] = (), capture_screen: bool = False) -> None

Ask Jarvis a question.

Source code in src/openjarvis/cli/ask.py

@click.command()
@click.argument("query", nargs=-1, required=True)
@click.option("-m", "--model", "model_name", default=None, help="Model to use.")
@click.option("-e", "--engine", "engine_key", default=None, help="Engine backend.")
@click.option(
    "-t",
    "--temperature",
    default=None,
    type=float,
    help="Sampling temperature (default: from config).",
)
@click.option(
    "--max-tokens",
    default=None,
    type=int,
    help="Max tokens to generate (default: from config).",
)
@click.option("--json", "output_json", is_flag=True, help="Output raw JSON result.")
@click.option("--no-stream", is_flag=True, help="Disable streaming (sync mode).")
@click.option(
    "--no-context",
    is_flag=True,
    help="Disable memory context injection.",
)
@click.option(
    "-a",
    "--agent",
    "agent_name",
    default=None,
    help=(
        "Agent to use (simple, orchestrator, ...). "
        "When omitted, falls back to ``agent.default_agent`` from config. "
        "Pass ``--agent ''`` to force direct-to-engine mode (no agent)."
    ),
)
@click.option(
    "--tools",
    "tool_names",
    default=None,
    help="Comma-separated tool names to enable (e.g. calculator,think).",
)
@click.option(
    "--profile",
    "enable_profile",
    is_flag=True,
    help="Print inference telemetry profile (latency, tokens, energy, IPW).",
)
@click.option(
    "--research",
    "research_mode",
    is_flag=True,
    help=(
        "Route the query through the hybrid-search research agent over the "
        "personal knowledge store (BM25 + dense embeddings, max 5 tool calls)."
    ),
)
@click.option(
    "--knowledge-db",
    "knowledge_db",
    default=None,
    help=(
        "Override the KnowledgeStore path used by --research "
        "(default: ~/.openjarvis/knowledge.db)."
    ),
)
@click.option(
    "-i",
    "--image",
    "image_paths",
    multiple=True,
    type=click.Path(exists=True, dir_okay=False),
    help="Image file for a vision model (e.g. gemma3). Repeatable.",
)
@click.option(
    "-S",
    "--screen",
    "capture_screen",
    is_flag=True,
    help="Capture the current screen and send it to the vision model.",
)
@click.option(
    "--persona",
    "persona_name",
    default=None,
    help=(
        "Named persona dir under ~/.openjarvis/personas/<name>/ "
        "(overrides config). Pass 'none' to disable all persona files."
    ),
)
@click.pass_context
def ask(
    ctx: click.Context,
    query: tuple[str, ...],
    model_name: str | None,
    engine_key: str | None,
    temperature: float,
    max_tokens: int,
    output_json: bool,
    no_stream: bool,
    no_context: bool,
    agent_name: str | None,
    tool_names: str | None,
    enable_profile: bool,
    research_mode: bool,
    knowledge_db: str | None,
    persona_name: str | None,
    image_paths: tuple[str, ...] = (),
    capture_screen: bool = False,
) -> None:
    """Ask Jarvis a question."""
    quiet = (ctx.obj or {}).get("quiet", False) or output_json
    print_banner(quiet=quiet)
    console = Console(stderr=True)
    query_text = " ".join(query)

    # Vision: collect base64 images from --image files and/or --screen.
    image_b64: list[str] = []
    for _img_path in image_paths:
        try:
            with open(_img_path, "rb") as _fh:
                image_b64.append(base64.b64encode(_fh.read()).decode("ascii"))
        except OSError as exc:
            console.print(f"[red]Could not read image {_img_path}: {exc}[/red]")
            sys.exit(1)
    if capture_screen:
        try:
            from openjarvis.cli._screen import capture_screen_to_temp

            _shot = capture_screen_to_temp()
            with open(_shot, "rb") as _fh:
                image_b64.append(base64.b64encode(_fh.read()).decode("ascii"))
            logger.debug("Captured screen to %s", _shot)
        except Exception as exc:  # noqa: BLE001
            console.print(f"[red]Screen capture failed:[/red] {exc}")
            sys.exit(1)

    wall_start = time.monotonic() if enable_profile else None

    # Load config
    config = load_config()

    # Resolve effective MemoryFilesConfig with --persona override
    import dataclasses as _dc

    effective_mf = (
        _dc.replace(config.memory_files, persona_name=persona_name)
        if persona_name is not None
        else config.memory_files
    )

    # Honor `agent.default_agent` from config when --agent was not explicitly
    # passed. Pass `--agent ""` to opt out and use direct-to-engine mode.
    # Without this fallback, `[agent].default_system_prompt` and the
    # SOUL.md / MEMORY.md / USER.md persona system are silently bypassed for
    # the most common command (`jarvis ask "..."`).
    agent_explicitly_set = agent_name is not None
    if agent_name is None:
        configured_default = (config.agent.default_agent or "").strip()
        if configured_default:
            agent_name = configured_default

    # Vision flows only through direct-to-engine mode. If an image/screenshot
    # was supplied without an explicit --agent, route to direct mode so the
    # picture reaches the model; if an agent was explicitly requested, say
    # plainly that the image is being skipped rather than dropping it silently.
    if image_b64:
        if not agent_explicitly_set:
            agent_name = ""
        else:
            console.print(
                "[yellow]Note:[/yellow] --image/--screen only works in direct "
                "mode; the image is ignored with --agent set. Re-run with "
                '`--agent ""` to use vision.'
            )

    # Track whether the user explicitly set --max-tokens
    user_set_max_tokens = max_tokens is not None

    # Fall back to config values for generation params
    if temperature is None:
        temperature = config.intelligence.temperature
    if max_tokens is None:
        max_tokens = config.intelligence.max_tokens

    # Run complexity analysis on the query
    from openjarvis.learning.routing.complexity import (
        ComplexityResult,
        adjust_tokens_for_model,
        score_complexity,
    )

    complexity_result: ComplexityResult = score_complexity(query_text)
    logger.debug(
        "Complexity analysis: score=%.3f tier=%s suggested_max_tokens=%d",
        complexity_result.score,
        complexity_result.tier,
        complexity_result.suggested_max_tokens,
    )

    # Set up telemetry
    bus = EventBus(record_history=True)
    telem_store: TelemetryStore | None = None
    if config.telemetry.enabled:
        try:
            telem_store = TelemetryStore(config.telemetry.db_path)
            telem_store.subscribe_to_bus(bus)
        except Exception as exc:
            logger.debug("Failed to initialize telemetry store: %s", exc)

    # Discover engines
    register_builtin_models()

    effective_engine_key = engine_key or config.intelligence.preferred_engine or None
    # Pass the model we intend to run so engine selection can skip an engine
    # that can't actually serve it (e.g. the cloud fallback when the local
    # engine is down but only a non-OpenAI key is set — see #532). This is the
    # -m flag or the configured default; when neither is set we leave it None
    # and a model is chosen per-engine below.
    selection_model = model_name or config.intelligence.default_model or None
    resolved = get_engine(config, effective_engine_key, model=selection_model)
    if resolved is None:
        console.print(
            "[red bold]No inference engine available.[/red bold]\n\n"
            "Make sure an engine is running:\n"
            "  [cyan]ollama serve[/cyan]          — start Ollama\n"
            "  [cyan]vllm serve <model>[/cyan]    — start vLLM\n"
            "  [cyan]llama-server -m <gguf>[/cyan] — start llama.cpp\n\n"
            "Or set OPENAI_API_KEY / ANTHROPIC_API_KEY for cloud inference.\n\n"
            "[dim]To use a remote engine:[/dim]\n"
            "  [cyan]jarvis config set engine.ollama.host http://<remote-ip>:11434[/cyan]\n"
            "  [dim]or[/dim] [cyan]export OLLAMA_HOST=http://<remote-ip>:11434[/cyan]"
        )
        sys.exit(1)

    engine_name, engine = resolved

    # ------------------------------------------------------------------
    # Research mode — hybrid search + agentic loop over the knowledge store
    # ------------------------------------------------------------------
    if research_mode:
        _run_research(
            query_text=query_text,
            engine=engine,
            model_name=model_name,
            knowledge_db=knowledge_db,
            output_json=output_json,
            console=console,
        )
        return

    # Apply security guardrails
    from openjarvis.security import setup_security

    sec = setup_security(config, engine, bus)
    engine = sec.engine

    # Wrap engine with InstrumentedEngine for telemetry (energy + GPU metrics)
    energy_monitor = None
    want_energy = config.telemetry.gpu_metrics or enable_profile
    if want_energy:
        try:
            from openjarvis.telemetry.energy_monitor import create_energy_monitor

            energy_monitor = create_energy_monitor(
                prefer_vendor=config.telemetry.energy_vendor or None,
            )
        except Exception as exc:
            logger.debug("Failed to create energy monitor: %s", exc)
    engine = InstrumentedEngine(engine, bus, energy_monitor=energy_monitor)

    # Discover models and merge into registry
    all_engines = discover_engines(config)
    all_models = discover_models(all_engines)
    for ek, model_ids in all_models.items():
        merge_discovered_models(ek, model_ids)

    # Resolve model via config fallback chain
    if model_name is None:
        model_name = config.intelligence.default_model
    if not model_name:
        # Try first available from engine
        engine_models = all_models.get(engine_name, [])
        if engine_models:
            model_name = engine_models[0]
    if not model_name:
        model_name = config.intelligence.fallback_model
    if not model_name:
        console.print("[red]No model available on engine.[/red]")
        sys.exit(1)

    # Apply complexity-suggested token budget when user didn't override.
    # Use at least the config default so we never reduce tokens below what
    # the user would have gotten without the analyzer.
    if not user_set_max_tokens:
        suggested = adjust_tokens_for_model(
            complexity_result.suggested_max_tokens,
            model_name,
        )
        max_tokens = max(suggested, config.intelligence.max_tokens)
        logger.debug(
            "Using complexity-suggested max_tokens=%d (model=%s)",
            max_tokens,
            model_name,
        )

    # Agent mode (treat empty-string `--agent ""` as explicit opt-out)
    if agent_name:
        parsed_tools = resolve_tool_names(
            tool_names,
            getattr(config.tools, "enabled", None),
            getattr(config.agent, "tools", None),
        )
        try:
            result = _run_agent(
                agent_name,
                query_text,
                engine,
                model_name,
                parsed_tools,
                config,
                bus,
                temperature,
                max_tokens,
                capability_policy=sec.capability_policy,
                memory_files_config=effective_mf,
            )
        except EngineContextLengthError as exc:
            # Not a reachability problem — pointing the user at server/host
            # config (hint_no_engine) would be misleading here.
            console.print(f"[red]{exc}[/red]")
            sys.exit(1)
        except EngineConnectionError as exc:
            console.print(f"[red]Engine error:[/red] {exc}")
            console.print(hint_no_engine())
            sys.exit(1)

        if output_json:
            click.echo(
                json_mod.dumps(
                    {
                        "content": result.content,
                        "turns": result.turns,
                        "tool_results": [
                            {
                                "tool_name": tr.tool_name,
                                "content": tr.content,
                                "success": tr.success,
                            }
                            for tr in result.tool_results
                        ],
                    },
                    indent=2,
                )
            )
        else:
            click.echo(result.content)

        if enable_profile:
            _print_profile(
                bus,
                time.monotonic() - wall_start,
                engine_name,
                model_name,
                console,
                complexity_result=complexity_result,
            )

        if telem_store is not None:
            try:
                telem_store.close()
            except Exception as exc:
                logger.debug("Error closing telemetry store: %s", exc)
        return

    # Direct-to-engine mode (no agent)
    # Privacy guard: a screenshot/image is sensitive, and OpenJarvis is
    # local-first. If the active engine isn't local, warn before the image
    # leaves the machine rather than silently uploading it to a third party.
    _LOCAL_ENGINES = {
        "ollama",
        "llamacpp",
        "vllm",
        "sglang",
        "exo",
        "nexa",
        "uzu",
        "apple_fm",
        "gemma_cpp",
    }
    if image_b64 and engine_name not in _LOCAL_ENGINES:
        console.print(
            f"[yellow]Privacy warning:[/yellow] sending {len(image_b64)} "
            f"image(s) to a non-local engine ('{engine_name}'). The image will "
            "leave this machine. Use a local engine (e.g. ollama) to keep "
            "vision on-device."
        )
    messages = [Message(role=Role.USER, content=query_text)]

    # Memory-augmented context injection
    if not no_context and config.agent.context_from_memory:
        try:
            from openjarvis.tools.storage.context import (
                ContextConfig,
                inject_context,
            )

            backend = _get_memory_backend(config)
            if backend is not None:
                ctx_cfg = ContextConfig(
                    top_k=config.memory.context_top_k,
                    min_score=config.memory.context_min_score,
                    max_context_tokens=(config.memory.context_max_tokens),
                )
                messages = inject_context(
                    query_text,
                    messages,
                    backend,
                    config=ctx_cfg,
                )
        except Exception as exc:
            logger.debug("Failed to inject memory context: %s", exc)

    # Vision: attach images to the final user message *after* any context
    # injection (which may rebuild the list). messages_to_dicts() forwards
    # the "images" field to Ollama's /api/chat.
    if image_b64:
        for _m in reversed(messages):
            if _m.role == Role.USER:
                _m.images = image_b64
                break

    # Generate (InstrumentedEngine handles telemetry + energy recording)
    try:
        with console.status("[bold green]Generating...[/bold green]"):
            result = engine.generate(
                messages,
                model=model_name,
                temperature=temperature,
                max_tokens=max_tokens,
            )
    except EngineContextLengthError as exc:
        # Not a reachability problem — pointing the user at server/host
        # config (hint_no_engine) would be misleading here.
        console.print(f"[red]{exc}[/red]")
        sys.exit(1)
    except EngineConnectionError as exc:
        console.print(f"[red]Engine error:[/red] {exc}")
        console.print(hint_no_engine())
        sys.exit(1)

    # Output
    if output_json:
        click.echo(json_mod.dumps(result, indent=2))
    else:
        click.echo(result.get("content", ""))

    if enable_profile:
        _print_profile(
            bus,
            time.monotonic() - wall_start,
            engine_name,
            model_name,
            console,
            complexity_result=complexity_result,
        )

    # Cleanup
    if energy_monitor is not None:
        try:
            energy_monitor.close()
        except Exception as exc:
            logger.debug("Error closing energy monitor: %s", exc)
    if telem_store is not None:
        try:
            telem_store.close()
        except Exception as exc:
            logger.debug("Error closing telemetry store: %s", exc)