Skip to content

trace

trace

Trace data model for agentic eval runs.

Classes

TurnTrace dataclass

TurnTrace(turn_index: int, input_tokens: int = 0, output_tokens: int = 0, tool_result_tokens: int = 0, tools_called: List[str] = list(), tool_latencies_s: Dict[str, float] = dict(), wall_clock_s: float = 0.0, error: Optional[str] = None, gpu_energy_joules: Optional[float] = None, cpu_energy_joules: Optional[float] = None, gpu_power_avg_watts: Optional[float] = None, cpu_power_avg_watts: Optional[float] = None, cost_usd: Optional[float] = None, action_energy_breakdown: Optional[List[Dict[str, Any]]] = None)

Per-turn telemetry data.

QueryTrace dataclass

QueryTrace(query_id: str, workload_type: str, query_text: str = '', response_text: str = '', turns: List[TurnTrace] = list(), total_wall_clock_s: float = 0.0, completed: bool = False, timed_out: bool = False, query_gpu_energy_joules: Optional[float] = None, query_cpu_energy_joules: Optional[float] = None, query_gpu_power_avg_watts: Optional[float] = None, query_cpu_power_avg_watts: Optional[float] = None, is_resolved: Optional[bool] = None, query_mbu_avg_pct: Optional[float] = None, query_mbu_max_pct: Optional[float] = None)

Per-query aggregate telemetry.

Attributes
total_tokens property
total_tokens: int

Total tokens (input + output) across all turns.

avg_gpu_power_watts property
avg_gpu_power_watts: Optional[float]

Mean GPU power across turns; falls back to query-level power.

avg_cpu_power_watts property
avg_cpu_power_watts: Optional[float]

Mean CPU power across turns; falls back to query-level power.

throughput_tokens_per_sec property
throughput_tokens_per_sec: Optional[float]

Output tokens per second; None if zero tokens or zero time.

energy_per_token_joules property
energy_per_token_joules: Optional[float]

GPU energy per output token; None if no energy data or zero tokens.

Functions
save_jsonl
save_jsonl(path: Path) -> None

Append this trace as a JSONL line.

Source code in src/openjarvis/evals/core/trace.py
def save_jsonl(self, path: Path) -> None:
    """Append this trace as a JSONL line."""
    path.parent.mkdir(parents=True, exist_ok=True)
    with open(path, "a") as f:
        f.write(json.dumps(self.to_dict()) + "\n")
load_jsonl classmethod
load_jsonl(path: Path) -> List[QueryTrace]

Load traces from a JSONL file.

Source code in src/openjarvis/evals/core/trace.py
@classmethod
def load_jsonl(cls, path: Path) -> List[QueryTrace]:
    """Load traces from a JSONL file."""
    traces = []
    with open(path) as f:
        for line in f:
            line = line.strip()
            if line:
                traces.append(cls.from_dict(json.loads(line)))
    return traces
to_hf_dataset staticmethod
to_hf_dataset(traces: List[QueryTrace]) -> Any

Convert a list of QueryTrace objects to a HuggingFace Dataset.

Returns: A datasets.Dataset with one row per trace.

Source code in src/openjarvis/evals/core/trace.py
@staticmethod
def to_hf_dataset(traces: List[QueryTrace]) -> Any:
    """Convert a list of QueryTrace objects to a HuggingFace Dataset.

    Returns:
        A datasets.Dataset with one row per trace.
    """
    from datasets import Dataset

    rows = []
    for trace in traces:
        rows.append({
            "query_id": trace.query_id,
            "workload_type": trace.workload_type,
            "query_text": trace.query_text,
            "response_text": trace.response_text,
            "num_turns": trace.num_turns,
            "total_input_tokens": trace.total_input_tokens,
            "total_output_tokens": trace.total_output_tokens,
            "total_tool_calls": trace.total_tool_calls,
            "total_wall_clock_s": trace.total_wall_clock_s,
            "total_gpu_energy_joules": trace.total_gpu_energy_joules,
            "total_cpu_energy_joules": trace.total_cpu_energy_joules,
            "total_tokens": trace.total_tokens,
            "total_cost_usd": trace.total_cost_usd,
            "avg_gpu_power_watts": trace.avg_gpu_power_watts,
            "avg_cpu_power_watts": trace.avg_cpu_power_watts,
            "throughput_tokens_per_sec": trace.throughput_tokens_per_sec,
            "energy_per_token_joules": trace.energy_per_token_joules,
            "completed": trace.completed,
            "timed_out": trace.timed_out,
            "is_resolved": trace.is_resolved,
            "query_mbu_avg_pct": trace.query_mbu_avg_pct,
            "query_mbu_max_pct": trace.query_mbu_max_pct,
            "trace_json": json.dumps(trace.to_dict()),
        })
    return Dataset.from_list(rows)