@click.command()
@click.argument("query", nargs=-1, required=True)
@click.option("-m", "--model", "model_name", default=None, help="Model to use.")
@click.option("-e", "--engine", "engine_key", default=None, help="Engine backend.")
@click.option(
"-t",
"--temperature",
default=None,
type=float,
help="Sampling temperature (default: from config).",
)
@click.option(
"--max-tokens",
default=None,
type=int,
help="Max tokens to generate (default: from config).",
)
@click.option("--json", "output_json", is_flag=True, help="Output raw JSON result.")
@click.option("--no-stream", is_flag=True, help="Disable streaming (sync mode).")
@click.option(
"--no-context",
is_flag=True,
help="Disable memory context injection.",
)
@click.option(
"-a",
"--agent",
"agent_name",
default=None,
help=(
"Agent to use (simple, orchestrator, ...). "
"When omitted, falls back to ``agent.default_agent`` from config. "
"Pass ``--agent ''`` to force direct-to-engine mode (no agent)."
),
)
@click.option(
"--tools",
"tool_names",
default=None,
help="Comma-separated tool names to enable (e.g. calculator,think).",
)
@click.option(
"--profile",
"enable_profile",
is_flag=True,
help="Print inference telemetry profile (latency, tokens, energy, IPW).",
)
@click.option(
"--research",
"research_mode",
is_flag=True,
help=(
"Route the query through the hybrid-search research agent over the "
"personal knowledge store (BM25 + dense embeddings, max 5 tool calls)."
),
)
@click.option(
"--knowledge-db",
"knowledge_db",
default=None,
help=(
"Override the KnowledgeStore path used by --research "
"(default: ~/.openjarvis/knowledge.db)."
),
)
@click.option(
"-i",
"--image",
"image_paths",
multiple=True,
type=click.Path(exists=True, dir_okay=False),
help="Image file for a vision model (e.g. gemma3). Repeatable.",
)
@click.option(
"-S",
"--screen",
"capture_screen",
is_flag=True,
help="Capture the current screen and send it to the vision model.",
)
@click.option(
"--persona",
"persona_name",
default=None,
help=(
"Named persona dir under ~/.openjarvis/personas/<name>/ "
"(overrides config). Pass 'none' to disable all persona files."
),
)
@click.pass_context
def ask(
ctx: click.Context,
query: tuple[str, ...],
model_name: str | None,
engine_key: str | None,
temperature: float,
max_tokens: int,
output_json: bool,
no_stream: bool,
no_context: bool,
agent_name: str | None,
tool_names: str | None,
enable_profile: bool,
research_mode: bool,
knowledge_db: str | None,
persona_name: str | None,
image_paths: tuple[str, ...] = (),
capture_screen: bool = False,
) -> None:
"""Ask Jarvis a question."""
quiet = (ctx.obj or {}).get("quiet", False) or output_json
print_banner(quiet=quiet)
console = Console(stderr=True)
query_text = " ".join(query)
# Vision: collect base64 images from --image files and/or --screen.
image_b64: list[str] = []
for _img_path in image_paths:
try:
with open(_img_path, "rb") as _fh:
image_b64.append(base64.b64encode(_fh.read()).decode("ascii"))
except OSError as exc:
console.print(f"[red]Could not read image {_img_path}: {exc}[/red]")
sys.exit(1)
if capture_screen:
try:
from openjarvis.cli._screen import capture_screen_to_temp
_shot = capture_screen_to_temp()
with open(_shot, "rb") as _fh:
image_b64.append(base64.b64encode(_fh.read()).decode("ascii"))
logger.debug("Captured screen to %s", _shot)
except Exception as exc: # noqa: BLE001
console.print(f"[red]Screen capture failed:[/red] {exc}")
sys.exit(1)
wall_start = time.monotonic() if enable_profile else None
# Load config
config = load_config()
# Resolve effective MemoryFilesConfig with --persona override
import dataclasses as _dc
effective_mf = (
_dc.replace(config.memory_files, persona_name=persona_name)
if persona_name is not None
else config.memory_files
)
# Honor `agent.default_agent` from config when --agent was not explicitly
# passed. Pass `--agent ""` to opt out and use direct-to-engine mode.
# Without this fallback, `[agent].default_system_prompt` and the
# SOUL.md / MEMORY.md / USER.md persona system are silently bypassed for
# the most common command (`jarvis ask "..."`).
agent_explicitly_set = agent_name is not None
if agent_name is None:
configured_default = (config.agent.default_agent or "").strip()
if configured_default:
agent_name = configured_default
# Vision flows only through direct-to-engine mode. If an image/screenshot
# was supplied without an explicit --agent, route to direct mode so the
# picture reaches the model; if an agent was explicitly requested, say
# plainly that the image is being skipped rather than dropping it silently.
if image_b64:
if not agent_explicitly_set:
agent_name = ""
else:
console.print(
"[yellow]Note:[/yellow] --image/--screen only works in direct "
"mode; the image is ignored with --agent set. Re-run with "
'`--agent ""` to use vision.'
)
# Track whether the user explicitly set --max-tokens
user_set_max_tokens = max_tokens is not None
# Fall back to config values for generation params
if temperature is None:
temperature = config.intelligence.temperature
if max_tokens is None:
max_tokens = config.intelligence.max_tokens
# Run complexity analysis on the query
from openjarvis.learning.routing.complexity import (
ComplexityResult,
adjust_tokens_for_model,
score_complexity,
)
complexity_result: ComplexityResult = score_complexity(query_text)
logger.debug(
"Complexity analysis: score=%.3f tier=%s suggested_max_tokens=%d",
complexity_result.score,
complexity_result.tier,
complexity_result.suggested_max_tokens,
)
# Set up telemetry
bus = EventBus(record_history=True)
telem_store: TelemetryStore | None = None
if config.telemetry.enabled:
try:
telem_store = TelemetryStore(config.telemetry.db_path)
telem_store.subscribe_to_bus(bus)
except Exception as exc:
logger.debug("Failed to initialize telemetry store: %s", exc)
# Discover engines
register_builtin_models()
effective_engine_key = engine_key or config.intelligence.preferred_engine or None
# Pass the model we intend to run so engine selection can skip an engine
# that can't actually serve it (e.g. the cloud fallback when the local
# engine is down but only a non-OpenAI key is set — see #532). This is the
# -m flag or the configured default; when neither is set we leave it None
# and a model is chosen per-engine below.
selection_model = model_name or config.intelligence.default_model or None
resolved = get_engine(config, effective_engine_key, model=selection_model)
if resolved is None:
console.print(
"[red bold]No inference engine available.[/red bold]\n\n"
"Make sure an engine is running:\n"
" [cyan]ollama serve[/cyan] — start Ollama\n"
" [cyan]vllm serve <model>[/cyan] — start vLLM\n"
" [cyan]llama-server -m <gguf>[/cyan] — start llama.cpp\n\n"
"Or set OPENAI_API_KEY / ANTHROPIC_API_KEY for cloud inference.\n\n"
"[dim]To use a remote engine:[/dim]\n"
" [cyan]jarvis config set engine.ollama.host http://<remote-ip>:11434[/cyan]\n"
" [dim]or[/dim] [cyan]export OLLAMA_HOST=http://<remote-ip>:11434[/cyan]"
)
sys.exit(1)
engine_name, engine = resolved
# ------------------------------------------------------------------
# Research mode — hybrid search + agentic loop over the knowledge store
# ------------------------------------------------------------------
if research_mode:
_run_research(
query_text=query_text,
engine=engine,
model_name=model_name,
knowledge_db=knowledge_db,
output_json=output_json,
console=console,
)
return
# Apply security guardrails
from openjarvis.security import setup_security
sec = setup_security(config, engine, bus)
engine = sec.engine
# Wrap engine with InstrumentedEngine for telemetry (energy + GPU metrics)
energy_monitor = None
want_energy = config.telemetry.gpu_metrics or enable_profile
if want_energy:
try:
from openjarvis.telemetry.energy_monitor import create_energy_monitor
energy_monitor = create_energy_monitor(
prefer_vendor=config.telemetry.energy_vendor or None,
)
except Exception as exc:
logger.debug("Failed to create energy monitor: %s", exc)
engine = InstrumentedEngine(engine, bus, energy_monitor=energy_monitor)
# Discover models and merge into registry
all_engines = discover_engines(config)
all_models = discover_models(all_engines)
for ek, model_ids in all_models.items():
merge_discovered_models(ek, model_ids)
# Resolve model via config fallback chain
if model_name is None:
model_name = config.intelligence.default_model
if not model_name:
# Try first available from engine
engine_models = all_models.get(engine_name, [])
if engine_models:
model_name = engine_models[0]
if not model_name:
model_name = config.intelligence.fallback_model
if not model_name:
console.print("[red]No model available on engine.[/red]")
sys.exit(1)
# Apply complexity-suggested token budget when user didn't override.
# Use at least the config default so we never reduce tokens below what
# the user would have gotten without the analyzer.
if not user_set_max_tokens:
suggested = adjust_tokens_for_model(
complexity_result.suggested_max_tokens,
model_name,
)
max_tokens = max(suggested, config.intelligence.max_tokens)
logger.debug(
"Using complexity-suggested max_tokens=%d (model=%s)",
max_tokens,
model_name,
)
# Agent mode (treat empty-string `--agent ""` as explicit opt-out)
if agent_name:
parsed_tools = resolve_tool_names(
tool_names,
getattr(config.tools, "enabled", None),
getattr(config.agent, "tools", None),
)
try:
result = _run_agent(
agent_name,
query_text,
engine,
model_name,
parsed_tools,
config,
bus,
temperature,
max_tokens,
capability_policy=sec.capability_policy,
memory_files_config=effective_mf,
)
except EngineConnectionError as exc:
console.print(f"[red]Engine error:[/red] {exc}")
console.print(hint_no_engine())
sys.exit(1)
if output_json:
click.echo(
json_mod.dumps(
{
"content": result.content,
"turns": result.turns,
"tool_results": [
{
"tool_name": tr.tool_name,
"content": tr.content,
"success": tr.success,
}
for tr in result.tool_results
],
},
indent=2,
)
)
else:
click.echo(result.content)
if enable_profile:
_print_profile(
bus,
time.monotonic() - wall_start,
engine_name,
model_name,
console,
complexity_result=complexity_result,
)
if telem_store is not None:
try:
telem_store.close()
except Exception as exc:
logger.debug("Error closing telemetry store: %s", exc)
return
# Direct-to-engine mode (no agent)
# Privacy guard: a screenshot/image is sensitive, and OpenJarvis is
# local-first. If the active engine isn't local, warn before the image
# leaves the machine rather than silently uploading it to a third party.
_LOCAL_ENGINES = {
"ollama",
"llamacpp",
"vllm",
"sglang",
"exo",
"nexa",
"uzu",
"apple_fm",
"gemma_cpp",
}
if image_b64 and engine_name not in _LOCAL_ENGINES:
console.print(
f"[yellow]Privacy warning:[/yellow] sending {len(image_b64)} "
f"image(s) to a non-local engine ('{engine_name}'). The image will "
"leave this machine. Use a local engine (e.g. ollama) to keep "
"vision on-device."
)
messages = [Message(role=Role.USER, content=query_text)]
# Memory-augmented context injection
if not no_context and config.agent.context_from_memory:
try:
from openjarvis.tools.storage.context import (
ContextConfig,
inject_context,
)
backend = _get_memory_backend(config)
if backend is not None:
ctx_cfg = ContextConfig(
top_k=config.memory.context_top_k,
min_score=config.memory.context_min_score,
max_context_tokens=(config.memory.context_max_tokens),
)
messages = inject_context(
query_text,
messages,
backend,
config=ctx_cfg,
)
except Exception as exc:
logger.debug("Failed to inject memory context: %s", exc)
# Vision: attach images to the final user message *after* any context
# injection (which may rebuild the list). messages_to_dicts() forwards
# the "images" field to Ollama's /api/chat.
if image_b64:
for _m in reversed(messages):
if _m.role == Role.USER:
_m.images = image_b64
break
# Generate (InstrumentedEngine handles telemetry + energy recording)
try:
with console.status("[bold green]Generating...[/bold green]"):
result = engine.generate(
messages,
model=model_name,
temperature=temperature,
max_tokens=max_tokens,
)
except EngineConnectionError as exc:
console.print(f"[red]Engine error:[/red] {exc}")
console.print(hint_no_engine())
sys.exit(1)
# Output
if output_json:
click.echo(json_mod.dumps(result, indent=2))
else:
click.echo(result.get("content", ""))
if enable_profile:
_print_profile(
bus,
time.monotonic() - wall_start,
engine_name,
model_name,
console,
complexity_result=complexity_result,
)
# Cleanup
if energy_monitor is not None:
try:
energy_monitor.close()
except Exception as exc:
logger.debug("Error closing energy monitor: %s", exc)
if telem_store is not None:
try:
telem_store.close()
except Exception as exc:
logger.debug("Error closing telemetry store: %s", exc)