GemmaCppEngine(model_path: str | None = None, tokenizer_path: str | None = None, model_type: str | None = None, num_threads: int = 0)
Bases: InferenceEngine
gemma.cpp backend via pygemma pybind11 bindings (in-process, CPU).
Source code in src/openjarvis/engine/gemma_cpp.py
| def __init__(
self,
model_path: str | None = None,
tokenizer_path: str | None = None,
model_type: str | None = None,
num_threads: int = 0,
) -> None:
self._model_path = model_path or os.environ.get("GEMMA_CPP_MODEL_PATH", "")
self._tokenizer_path = tokenizer_path or os.environ.get(
"GEMMA_CPP_TOKENIZER_PATH", ""
)
self._model_type = model_type or os.environ.get("GEMMA_CPP_MODEL_TYPE", "")
self._num_threads = num_threads or int(
os.environ.get("GEMMA_CPP_NUM_THREADS", "0")
)
self._gemma: Any = None # lazy-loaded pygemma.Gemma instance
|
Functions
prepare
prepare(model: str) -> None
Load model into memory.
Source code in src/openjarvis/engine/gemma_cpp.py
| def prepare(self, model: str) -> None:
"""Load model into memory."""
self._ensure_loaded()
|
close
Unload model and free memory.
Source code in src/openjarvis/engine/gemma_cpp.py
| def close(self) -> None:
"""Unload model and free memory."""
self._gemma = None
|