Skip to content

bm25

bm25

BM25 memory backend — classic term-frequency retrieval.

Classes

BM25Memory

BM25Memory()

Bases: MemoryBackend

In-memory BM25 (Okapi) retrieval backend.

Uses the rank_bm25 library to score documents against a query using the classic BM25 probabilistic ranking function. All data lives in memory — there is no persistence across restarts.

Source code in src/openjarvis/tools/storage/bm25.py
def __init__(self) -> None:
    _r = get_rust_module()
    self._rust_impl = _r.BM25Memory()
Functions
store
store(content: str, *, source: str = '', metadata: Optional[Dict[str, Any]] = None) -> str

Persist content and return a unique document id.

Source code in src/openjarvis/tools/storage/bm25.py
def store(
    self,
    content: str,
    *,
    source: str = "",
    metadata: Optional[Dict[str, Any]] = None,
) -> str:
    """Persist *content* and return a unique document id."""
    meta_json = json.dumps(metadata) if metadata else None
    doc_id = self._rust_impl.store(content, source, meta_json)
    bus = get_event_bus()
    bus.publish(EventType.MEMORY_STORE, {
        "backend": self.backend_id,
        "doc_id": doc_id,
        "source": source,
    })
    return doc_id
retrieve
retrieve(query: str, *, top_k: int = 5, **kwargs: Any) -> List[RetrievalResult]

Search for query and return the top-k results — always via Rust backend.

Source code in src/openjarvis/tools/storage/bm25.py
def retrieve(
    self,
    query: str,
    *,
    top_k: int = 5,
    **kwargs: Any,
) -> List[RetrievalResult]:
    """Search for *query* and return the top-k results — always via Rust backend."""
    if not query.strip():
        return []
    from openjarvis._rust_bridge import retrieval_results_from_json
    results = retrieval_results_from_json(
        self._rust_impl.retrieve(query, top_k),
    )
    bus = get_event_bus()
    bus.publish(EventType.MEMORY_RETRIEVE, {
        "backend": self.backend_id,
        "query": query,
        "num_results": len(results),
    })
    return results
delete
delete(doc_id: str) -> bool

Delete a document by id — always via Rust backend.

Source code in src/openjarvis/tools/storage/bm25.py
def delete(self, doc_id: str) -> bool:
    """Delete a document by id — always via Rust backend."""
    return self._rust_impl.delete(doc_id)
clear
clear() -> None

Remove all stored documents — always via Rust backend.

Source code in src/openjarvis/tools/storage/bm25.py
def clear(self) -> None:
    """Remove all stored documents — always via Rust backend."""
    self._rust_impl.clear()
count
count() -> int

Return the number of stored documents — always via Rust backend.

Source code in src/openjarvis/tools/storage/bm25.py
def count(self) -> int:
    """Return the number of stored documents — always via Rust backend."""
    return self._rust_impl.count()

Functions