Pipeline

audawispr.pipeline.Pipeline

Pipeline(
    output,
    *,
    language="fr",
    ipa=False,
    model_size="small",
    device="auto",
    compute_type="int8",
    vad=True,
    pause_split_ms=700,
    min_duration_ms=600,
    max_duration_ms=7000,
    translation_provider="none",
    deck_name=None,
    keep_work=False,
)

Narrow public API for running the full audawispr pipeline.

Usage::

from pathlib import Path
from audawispr import Pipeline

Pipeline(
    output=Path("deck.apkg"),
    language="fr",
    ipa=True,
).run(Path("lesson.mp3"))

Parameters:

output (Path) –

Output path (.apkg for Anki package, directory for CSV).
language (str, default: 'fr' ) –

Source language code passed to faster-whisper (e.g. "fr", "en", "ja", "de").
ipa (bool, default: False ) –

Generate IPA phonetic transcription (French only).
model_size (str, default: 'small' ) –

faster-whisper model size. One of "tiny", "base", "small", "medium", "large-v3".
device (str, default: 'auto' ) –

Device for Whisper inference. "auto" selects CUDA when available, else CPU.
compute_type (str, default: 'int8' ) –

Compute type for Whisper. "int8", "float16", or "float32".
vad (bool, default: True ) –

Enable voice activity detection filtering.
pause_split_ms (int, default: 700 ) –

Pause duration (ms) triggering a segment split.
min_duration_ms (int, default: 600 ) –

Minimum segment duration (ms).
max_duration_ms (int, default: 7000 ) –

Maximum segment duration (ms).
translation_provider (str, default: 'none' ) –

Translation provider. "none" (default) skips translation.
deck_name (str | None, default: None ) –

Anki deck name. Defaults to "audawispr::{language}".
keep_work (bool, default: False ) –

Keep working directory after completion.

Source code in src/audawispr/pipeline.py

def __init__(
    self,
    output: Path,
    *,
    language: str = "fr",
    ipa: bool = False,
    model_size: str = "small",
    device: str = "auto",
    compute_type: str = "int8",
    vad: bool = True,
    pause_split_ms: int = 700,
    min_duration_ms: int = 600,
    max_duration_ms: int = 7000,
    translation_provider: str = "none",
    deck_name: str | None = None,
    keep_work: bool = False,
) -> None:
    self._output = output
    self._language = language
    self._ipa = ipa
    self._model_size = model_size
    self._device = device
    self._compute_type = compute_type
    self._vad = vad
    self._pause_split_ms = pause_split_ms
    self._min_duration_ms = min_duration_ms
    self._max_duration_ms = max_duration_ms
    self._translation_provider = translation_provider
    self._deck_name = deck_name
    self._keep_work = keep_work

run

run(audio, *, progress=None, cancel=None)

Run the pipeline for the given audio file.

Parameters:

audio (Path) –

Path to the input audio file.
progress (ProgressHook | None, default: None ) –

Optional callback receiving a ProgressEvent for each pipeline phase.
cancel (CancellationToken | None, default: None ) –

Optional :class:CancellationToken for cooperative cancellation.

Returns:

PipelineResult –

:class:PipelineResult with output_path and work_dir.

Source code in src/audawispr/pipeline.py

def run(
    self,
    audio: Path,
    *,
    progress: ProgressHook | None = None,
    cancel: CancellationToken | None = None,
) -> PipelineResult:
    """Run the pipeline for the given audio file.

    :param audio: Path to the input audio file.
    :param progress: Optional callback receiving a ``ProgressEvent``
        for each pipeline phase.
    :param cancel: Optional :class:`CancellationToken` for cooperative
        cancellation.
    :returns: :class:`PipelineResult` with ``output_path`` and ``work_dir``.
    """
    request = PipelineRequest(
        audio=audio,
        output=self._output,
        language=self._language,
        ipa=self._ipa,
        model_size=self._model_size,
        device=self._device,
        compute_type=self._compute_type,
        vad=self._vad,
        pause_split_ms=self._pause_split_ms,
        min_duration_ms=self._min_duration_ms,
        max_duration_ms=self._max_duration_ms,
        translation_provider=self._translation_provider,
        deck_name=self._deck_name,
        keep_work=self._keep_work,
    )
    return run_pipeline(request, progress_hook=progress, cancellation_token=cancel)

PipelineResult

audawispr.pipeline.PipelineResult `dataclass`

PipelineResult(output_path, work_dir)

Result of a completed pipeline run.

CancellationToken

audawispr.pipeline.CancellationToken

CancellationToken()

Cooperative cancellation checked between phases.

Source code in src/audawispr/core/pipeline.py

def __init__(self) -> None:
    self._event = threading.Event()

request_cancel

request_cancel()

Request cancellation of the pipeline run.

Source code in src/audawispr/core/pipeline.py

def request_cancel(self) -> None:
    """Request cancellation of the pipeline run."""
    self._event.set()

check

check()

Raise CancelledError if cancellation was requested.

Source code in src/audawispr/core/pipeline.py

def check(self) -> None:
    """Raise CancelledError if cancellation was requested."""
    if self._event.is_set():
        raise CancelledError("pipeline run was cancelled")

ProgressHook

ProgressHook = Callable[[ProgressEvent], None]

Pipeline

audawispr.pipeline.Pipeline

run

PipelineResult

audawispr.pipeline.PipelineResult dataclass

CancellationToken

audawispr.pipeline.CancellationToken

request_cancel

check

ProgressHook

audawispr.pipeline.PipelineResult `dataclass`