Skip to content

dataset

dataset

Abstract base class for dataset providers.

Classes

DatasetProvider

Bases: ABC

Base class for all evaluation dataset providers.

Functions
load abstractmethod
load(*, max_samples: Optional[int] = None, split: Optional[str] = None, seed: Optional[int] = None) -> None

Load the dataset (possibly downloading from HuggingFace).

Source code in src/openjarvis/evals/core/dataset.py
@abstractmethod
def load(
    self,
    *,
    max_samples: Optional[int] = None,
    split: Optional[str] = None,
    seed: Optional[int] = None,
) -> None:
    """Load the dataset (possibly downloading from HuggingFace)."""
iter_records abstractmethod
iter_records() -> Iterable[EvalRecord]

Iterate over loaded records.

Source code in src/openjarvis/evals/core/dataset.py
@abstractmethod
def iter_records(self) -> Iterable[EvalRecord]:
    """Iterate over loaded records."""
size abstractmethod
size() -> int

Return the number of loaded records.

Source code in src/openjarvis/evals/core/dataset.py
@abstractmethod
def size(self) -> int:
    """Return the number of loaded records."""
create_task_env
create_task_env(record: EvalRecord) -> Optional[AbstractContextManager]

Return a task environment context manager, or None.

Source code in src/openjarvis/evals/core/dataset.py
def create_task_env(
    self, record: EvalRecord,
) -> Optional[AbstractContextManager]:
    """Return a task environment context manager, or None."""
    return None
verify_requirements
verify_requirements() -> List[str]

Return list of unsatisfied requirements, or empty list.

Source code in src/openjarvis/evals/core/dataset.py
def verify_requirements(self) -> List[str]:
    """Return list of unsatisfied requirements, or empty list."""
    return []
iter_episodes
iter_episodes() -> Iterable[List[EvalRecord]]

Iterate over episodes (groups of sequential records).

Default: each record is its own single-record episode. Override for benchmarks requiring sequential processing with shared agent state within an episode.

Source code in src/openjarvis/evals/core/dataset.py
def iter_episodes(self) -> Iterable[List[EvalRecord]]:
    """Iterate over episodes (groups of sequential records).

    Default: each record is its own single-record episode.
    Override for benchmarks requiring sequential processing
    with shared agent state within an episode.
    """
    for record in self.iter_records():
        yield [record]