Skip to content

skill_discovery

skill_discovery

Skill discovery -- mine recurring tool sequences from traces.

Classes

DiscoveredSkill dataclass

DiscoveredSkill(name: str, description: str, tool_sequence: List[str], frequency: int, avg_outcome: float, example_inputs: List[str] = list())

A skill discovered from trace analysis.

SkillDiscovery

SkillDiscovery(*, min_frequency: int = 3, min_sequence_length: int = 2, max_sequence_length: int = 4, min_outcome: float = 0.5)

Mine recurring tool sequences from trace data to auto-generate skills.

Analyzes TraceStore data for patterns like: - "web_search -> file_write" (research-then-save) - "file_read -> calculator -> file_write" (read-compute-save)

When a sequence appears >= min_frequency times with positive outcomes, it's surfaced as a DiscoveredSkill that can be registered.

Source code in src/openjarvis/learning/skill_discovery.py
def __init__(
    self,
    *,
    min_frequency: int = 3,
    min_sequence_length: int = 2,
    max_sequence_length: int = 4,
    min_outcome: float = 0.5,
) -> None:
    self._min_freq = min_frequency
    self._min_len = min_sequence_length
    self._max_len = max_sequence_length
    self._min_outcome = min_outcome
    self._discovered: List[DiscoveredSkill] = []
Attributes
discovered_skills property
discovered_skills: List[DiscoveredSkill]

Return the most recently discovered skills.

Functions
analyze_traces
analyze_traces(traces: List[Any]) -> List[DiscoveredSkill]

Analyze a list of traces for recurring tool sequences.

PARAMETER DESCRIPTION
traces

List of Trace objects (or dicts with 'steps' and 'outcome' keys). Each trace should have steps with 'step_type' and 'tool_name'.

TYPE: List[Any]

RETURNS DESCRIPTION
List of DiscoveredSkill objects meeting frequency and outcome thresholds.
Source code in src/openjarvis/learning/skill_discovery.py
def analyze_traces(self, traces: List[Any]) -> List[DiscoveredSkill]:
    """Analyze a list of traces for recurring tool sequences.

    Parameters
    ----------
    traces:
        List of Trace objects (or dicts with 'steps' and 'outcome' keys).
        Each trace should have steps with 'step_type' and 'tool_name'.

    Returns
    -------
    List of DiscoveredSkill objects meeting frequency and outcome thresholds.
    """
    # Extract tool sequences from traces
    sequence_data: Dict[Tuple[str, ...], List[float]] = defaultdict(list)
    sequence_inputs: Dict[Tuple[str, ...], List[str]] = defaultdict(list)

    for trace in traces:
        tool_calls = self._extract_tool_sequence(trace)
        outcome = self._extract_outcome(trace)
        query = self._extract_query(trace)

        if len(tool_calls) < self._min_len:
            continue

        # Generate all subsequences of valid length
        upper = min(self._max_len + 1, len(tool_calls) + 1)
        for length in range(self._min_len, upper):
            for start in range(len(tool_calls) - length + 1):
                seq = tuple(tool_calls[start:start + length])
                sequence_data[seq].append(outcome)
                if query and len(sequence_inputs[seq]) < 3:
                    sequence_inputs[seq].append(query)

    # Filter by frequency and outcome
    discovered = []
    for seq, outcomes in sequence_data.items():
        freq = len(outcomes)
        avg_outcome = sum(outcomes) / len(outcomes) if outcomes else 0.0

        if freq >= self._min_freq and avg_outcome >= self._min_outcome:
            name = "_".join(seq)
            desc = f"Auto-discovered skill: {' -> '.join(seq)} (seen {freq} times)"
            discovered.append(DiscoveredSkill(
                name=name,
                description=desc,
                tool_sequence=list(seq),
                frequency=freq,
                avg_outcome=avg_outcome,
                example_inputs=sequence_inputs.get(seq, []),
            ))

    # Sort by frequency * outcome (quality score)
    discovered.sort(key=lambda s: s.frequency * s.avg_outcome, reverse=True)
    self._discovered = discovered
    return discovered
to_skill_manifests
to_skill_manifests() -> List[Dict[str, Any]]

Convert discovered skills to TOML-compatible manifest dicts.

Source code in src/openjarvis/learning/skill_discovery.py
def to_skill_manifests(self) -> List[Dict[str, Any]]:
    """Convert discovered skills to TOML-compatible manifest dicts."""
    manifests = []
    for skill in self._discovered:
        manifests.append({
            "name": skill.name,
            "description": skill.description,
            "steps": [
                {"tool": tool, "params": {}} for tool in skill.tool_sequence
            ],
            "metadata": {
                "auto_discovered": True,
                "frequency": skill.frequency,
                "avg_outcome": skill.avg_outcome,
            },
        })
    return manifests