Skip to content

gemma_cpp

gemma_cpp

gemma.cpp inference engine backend via pygemma pybind11 bindings.

Classes

GemmaCppEngine

GemmaCppEngine(model_path: str | None = None, tokenizer_path: str | None = None, model_type: str | None = None, num_threads: int = 0)

Bases: InferenceEngine

gemma.cpp backend via pygemma pybind11 bindings (in-process, CPU).

Source code in src/openjarvis/engine/gemma_cpp.py
def __init__(
    self,
    model_path: str | None = None,
    tokenizer_path: str | None = None,
    model_type: str | None = None,
    num_threads: int = 0,
) -> None:
    self._model_path = model_path or os.environ.get("GEMMA_CPP_MODEL_PATH", "")
    self._tokenizer_path = tokenizer_path or os.environ.get(
        "GEMMA_CPP_TOKENIZER_PATH", ""
    )
    self._model_type = model_type or os.environ.get("GEMMA_CPP_MODEL_TYPE", "")
    self._num_threads = num_threads or int(
        os.environ.get("GEMMA_CPP_NUM_THREADS", "0")
    )
    self._gemma: Any = None  # lazy-loaded pygemma.Gemma instance
Functions
prepare
prepare(model: str) -> None

Load model into memory.

Source code in src/openjarvis/engine/gemma_cpp.py
def prepare(self, model: str) -> None:
    """Load model into memory."""
    self._ensure_loaded()
close
close() -> None

Unload model and free memory.

Source code in src/openjarvis/engine/gemma_cpp.py
def close(self) -> None:
    """Unload model and free memory."""
    self._gemma = None

Functions