@memory.command()
@click.argument("path")
@click.option(
"--backend", "-b", default=None,
help="Override the default memory backend.",
)
@click.option(
"--chunk-size", default=512, type=int,
help="Chunk size in tokens.",
)
@click.option(
"--chunk-overlap", default=64, type=int,
help="Overlap between chunks in tokens.",
)
def index(
path: str,
backend: str | None,
chunk_size: int,
chunk_overlap: int,
) -> None:
"""Index documents from a file or directory."""
console = Console(stderr=True)
target = Path(path)
if not target.exists():
console.print(f"[red]Path not found:[/red] {path}")
raise SystemExit(1)
t0 = time.time()
cfg = ChunkConfig(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
)
console.print(f"[cyan]Indexing[/cyan] {path} ...")
chunks = ingest_path(target, config=cfg)
if not chunks:
console.print("[yellow]No indexable content found.[/yellow]")
return
mem = _get_backend(backend)
try:
for chunk in track(chunks, description="Storing chunks...", console=console):
mem.store(
chunk.content,
source=chunk.source,
metadata={
"offset": chunk.offset,
"index": chunk.index,
},
)
finally:
if hasattr(mem, "close"):
mem.close()
elapsed = time.time() - t0
sources = {c.source for c in chunks}
console.print(
f"[green]Indexed {len(chunks)} chunks "
f"from {len(sources)} file(s) "
f"in {elapsed:.1f}s.[/green]"
)