Skip to content

dataset

dataset

Backward-compat shim: moved to learning.optimize.

Classes

PersonalBenchmarkDataset

PersonalBenchmarkDataset(benchmark: PersonalBenchmark)

Bases: DatasetProvider

Wraps a PersonalBenchmark as a DatasetProvider for EvalRunner.

Source code in src/openjarvis/learning/optimize/personal/dataset.py
def __init__(self, benchmark: PersonalBenchmark) -> None:
    self._benchmark = benchmark
    self._records: List[EvalRecord] = []
Functions
load
load(*, max_samples: Optional[int] = None, split: Optional[str] = None, seed: Optional[int] = None) -> None

Convert :class:PersonalBenchmarkSample instances to :class:EvalRecord.

Source code in src/openjarvis/learning/optimize/personal/dataset.py
def load(
    self,
    *,
    max_samples: Optional[int] = None,
    split: Optional[str] = None,
    seed: Optional[int] = None,
) -> None:
    """Convert :class:`PersonalBenchmarkSample` instances to :class:`EvalRecord`."""
    samples = self._benchmark.samples
    if max_samples is not None:
        samples = samples[:max_samples]
    self._records = [
        EvalRecord(
            record_id=s.trace_id,
            problem=s.query,
            reference=s.reference_answer,
            category=s.category,
            subject=s.agent or "general",
            metadata=s.metadata,
        )
        for s in samples
    ]
iter_records
iter_records() -> Iterable[EvalRecord]

Iterate over loaded records.

Source code in src/openjarvis/learning/optimize/personal/dataset.py
def iter_records(self) -> Iterable[EvalRecord]:
    """Iterate over loaded records."""
    return iter(self._records)
size
size() -> int

Return the number of loaded records.

Source code in src/openjarvis/learning/optimize/personal/dataset.py
def size(self) -> int:
    """Return the number of loaded records."""
    return len(self._records)