def transcribe(
self,
audio: bytes,
*,
format: str = "wav",
language: Optional[str] = None,
) -> TranscriptionResult:
"""Transcribe audio using Deepgram's API."""
if self._client is None:
raise RuntimeError("Deepgram client not initialized (missing API key?)")
mime_map = {
"wav": "audio/wav",
"mp3": "audio/mpeg",
"ogg": "audio/ogg",
"flac": "audio/flac",
"webm": "audio/webm",
"m4a": "audio/mp4",
}
mime_type = mime_map.get(format, "audio/wav")
options_kwargs: dict = {"model": "nova-2", "smart_format": True}
if language:
options_kwargs["language"] = language
else:
options_kwargs["detect_language"] = True
payload = {"buffer": audio, "mimetype": mime_type}
if PrerecordedOptions is not None:
options = PrerecordedOptions(**options_kwargs)
else:
options = options_kwargs
response = self._client.listen.rest.v("1").transcribe_file(
payload, options,
)
# Extract transcript from response
channels = response.results.channels
if channels and channels[0].alternatives:
alt = channels[0].alternatives[0]
text = alt.transcript
confidence = getattr(alt, "confidence", None)
else:
text = ""
confidence = None
detected_lang = None
if channels:
detected_lang = getattr(channels[0], "detected_language", None)
duration = getattr(response.metadata, "duration", 0.0)
return TranscriptionResult(
text=text,
language=detected_lang,
confidence=confidence,
duration_seconds=duration,
segments=[],
)