basnijholt · basnijholt · Jun 18, 2026 · greptile-apps · Jun 18, 2026 · greptile-apps
diff --git a/agent_cli/agents/transcribe.py b/agent_cli/agents/transcribe.py
@@ -65,6 +65,7 @@ class TranscriptResult(TypedDict, total=False):
     transcript: str | None
     saved_recording_path: Path | None
     llm_enabled: bool
+    error: str | None
 
 
 SYSTEM_PROMPT = """
@@ -343,6 +344,14 @@ def _option_default(value: Any) -> Any:
     return getattr(value, "default", value)
 
 
+def _transcript_result_error(result: object) -> str | None:
+    """Return a structured transcript error when one is present."""
+    if not isinstance(result, dict):
+        return None
+    error = result.get("error")
+    return error if isinstance(error, str) and error else None
+
+
 async def _async_main(  # noqa: PLR0912, PLR0915, C901
     *,
     extra_instructions: str | None,
@@ -465,17 +474,27 @@ def _set_saved_recording_path(path: Path) -> None:
                     and provider_cfg.asr_provider == "wyoming"
                     else None
                 )
-                transcript = await live_transcriber(
-                    logger=LOGGER,
-                    stop_event=stop_event,
-                    quiet=general_cfg.quiet,
-                    live=live,
-                    save_recording=save_recording,
-                    extra_instructions=extra_instructions,
-                    recording_path_callback=_set_saved_recording_path,
-                    audio_level_callback=audio_level_callback,
-                    live_preview_config=live_preview_config,
-                )
+                try:
+                    transcript = await live_transcriber(
+                        logger=LOGGER,
+                        stop_event=stop_event,
+                        quiet=general_cfg.quiet,
+                        live=live,
+                        save_recording=save_recording,
+                        extra_instructions=extra_instructions,
+                        recording_path_callback=_set_saved_recording_path,
+                        audio_level_callback=audio_level_callback,
+                        live_preview_config=live_preview_config,
+                    )
+                except asr.SilentAudioCaptureError as exc:
+                    message = str(exc)
+                    LOGGER.warning(message)
+                    return TranscriptResult(
+                        raw_transcript=None,
+                        transcript=None,
+                        llm_enabled=False,
+                        error=message,
+                    )
 
         elapsed = time.monotonic() - start_time
 
@@ -898,6 +917,10 @@ def transcribe(  # noqa: PLR0912, PLR0911, PLR0915, C901
             raise typer.Exit(1) from None
         if json_output:
             print(json.dumps(result))
+        if error := _transcript_result_error(result):
+            if not json_output:
+                print(error)
+            raise typer.Exit(1)
         return
 
     # Normal recording mode
@@ -994,3 +1017,7 @@ def transcribe(  # noqa: PLR0912, PLR0911, PLR0915, C901
         raise typer.Exit(1) from None
     if json_output:
         print(json.dumps(result))
+    if error := _transcript_result_error(result):
+        if not json_output:
+            print(error)
+        raise typer.Exit(1)
diff --git a/agent_cli/services/asr.py b/agent_cli/services/asr.py
@@ -5,13 +5,14 @@
 import asyncio
 import io
 import json
+import struct
 import wave
 from contextlib import suppress
 from dataclasses import dataclass
 from datetime import UTC, datetime
 from functools import partial
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, NoReturn
 
 from agent_cli import constants
 from agent_cli.core.audio import (
@@ -71,6 +72,51 @@ def min_audio_bytes(self) -> int:
         )
 
 
+@dataclass
+class AudioCaptureStats:
+    """Basic signal stats for captured little-endian int16 PCM audio."""
+
+    byte_count: int = 0
+    peak_sample: int = 0
+
+    @property
+    def is_all_silence(self) -> bool:
+        """Return true when capture produced bytes but every sample was zero."""
+        return self.byte_count > 0 and self.peak_sample == 0
+
+    def observe(self, chunk: bytes) -> None:
+        """Update stats from one PCM chunk."""
+        self.byte_count += len(chunk)
+        self.peak_sample = max(self.peak_sample, _peak_int16_sample(chunk))
+
+
+class SilentAudioCaptureError(RuntimeError):
+    """Raised when microphone capture returns only digital silence."""
+
+    DEFAULT_MESSAGE = (
+        "Microphone capture returned only digital silence. "
+        "On macOS, allow Microphone permission for Agent CLI, "
+        "then restart Agent CLI."
+    )
+
+    def __init__(self, message: str | None = None) -> None:
+        """Create an error with the default silent-capture guidance."""
+        super().__init__(message or self.DEFAULT_MESSAGE)
+
+
+def _raise_silent_audio_capture_error() -> NoReturn:
+    raise SilentAudioCaptureError
-def _raise_silent_audio_capture_error() -> NoReturn:
-    raise SilentAudioCaptureError
+def _raise_silent_audio_capture_error() -> NoReturn:
+    raise SilentAudioCaptureError()
-def _raise_silent_audio_capture_error() -> NoReturn:
-    raise SilentAudioCaptureError
+def _raise_silent_audio_capture_error() -> NoReturn:
+    raise SilentAudioCaptureError()
+
+
+def _peak_int16_sample(chunk: bytes) -> int:
+    sample_count = len(chunk) // constants.AUDIO_FORMAT_WIDTH
+    if sample_count <= 0:
+        return 0
+    pcm = chunk[: sample_count * constants.AUDIO_FORMAT_WIDTH]
+    samples = struct.unpack(f"<{sample_count}h", pcm)
+    return max((abs(sample) for sample in samples), default=0)
+
+
 def _write_live_preview_event(
     log_file: Path,
     *,
@@ -421,8 +467,8 @@ async def _send_audio(
     recording_path_callback: Callable[[Path], None] | None = None,
     audio_level_callback: Callable[[bytes], None] | None = None,
     live_preview_callback: Callable[[bytes], Awaitable[None]] | None = None,
-) -> None:
-    """Read from mic and send to Wyoming server."""
+) -> AudioCaptureStats:
+    """Read from mic, send to Wyoming server, and return capture stats."""
     from wyoming.asr import Transcribe  # noqa: PLC0415
     from wyoming.audio import AudioChunk, AudioStart, AudioStop  # noqa: PLC0415
 
@@ -434,9 +480,11 @@ async def _send_audio(
     # Buffer to save audio if requested
     audio_buffer = io.BytesIO() if save_recording else None
     pending_audio_level_tasks: set[asyncio.Task[None]] = set()
+    capture_stats = AudioCaptureStats()
 
     async def send_chunk(chunk: bytes) -> None:
         """Send audio chunk to ASR server and optionally buffer it."""
+        capture_stats.observe(chunk)
         if audio_buffer is not None:
             audio_buffer.write(chunk)
         if audio_level_callback:
@@ -475,6 +523,7 @@ async def send_chunk(chunk: bytes) -> None:
                         recording_path_callback(saved_path)
         finally:
             await _finish_audio_level_callbacks(pending_audio_level_tasks)
+    return capture_stats
 
 
 async def record_audio_to_buffer(queue: asyncio.Queue, logger: logging.Logger) -> bytes:
@@ -692,7 +741,7 @@ async def _transcribe_live_audio_wyoming(
                 if live_preview is not None:
                     live_preview.reset_log()
                     live_preview_task = asyncio.create_task(live_preview.run())
-                _, recv_task = await manage_send_receive_tasks(
+                send_task, recv_task = await manage_send_receive_tasks(
                     _send_audio(
                         client,
                         stream,
@@ -715,8 +764,17 @@ async def _transcribe_live_audio_wyoming(
                     return_when=asyncio.ALL_COMPLETED,
                 )
                 result = recv_task.result()
+                capture_stats = send_task.result() if send_task is not None else None
-                capture_stats = send_task.result() if send_task is not None else None
+                capture_stats = send_task.result()
-                capture_stats = send_task.result() if send_task is not None else None
+                capture_stats = send_task.result()
+                if (
+                    capture_stats is not None
+                    and capture_stats.is_all_silence
+                    and not result.strip()
+                ):
+                    _raise_silent_audio_capture_error()
                 final_transcript = result
                 return result
+    except SilentAudioCaptureError:
+        raise
     except (ConnectionRefusedError, Exception):
         logger.warning("Failed to connect to Wyoming ASR server")
         return None

diff --git a/macos/AgentCLI/Sources/AgentCLI/AgentCommandRunner.swift b/macos/AgentCLI/Sources/AgentCLI/AgentCommandRunner.swift
@@ -31,6 +31,7 @@ final class AgentCommandRunner: ObservableObject {
     private var recordingIndicator = RecordingIndicatorController()
     private let pasteController: TranscriptPasteController
     private let bootstrap: AgentBootstrap
+    private let microphonePermissionController: any MicrophonePermissionControlling
     private var activityTracker = MenuActivityTracker()
     private var pendingStopRecordingCommands: Set<String> = []
     private var holdTranscriptionState: HoldTranscriptionState = .idle
@@ -69,11 +70,13 @@ final class AgentCommandRunner: ObservableObject {
 
     init(
         pasteController: TranscriptPasteController = TranscriptPasteController(),
+        microphonePermissionController: any MicrophonePermissionControlling = MicrophonePermissionController.shared,
         bootstrap: @escaping AgentBootstrap = { requirement, force, progress in
             AgentRuntime.shared.ensureReady(for: requirement, force: force, progress: progress)
         }
     ) {
         self.pasteController = pasteController
+        self.microphonePermissionController = microphonePermissionController
         self.bootstrap = bootstrap
         hasLastError = FileManager.default.fileExists(atPath: AgentRuntime.shared.lastErrorURL.path)
     }
@@ -148,6 +151,7 @@ final class AgentCommandRunner: ObservableObject {
             statusMessage = "Transcription is already recording"
             return false
         }
+        guard ensureMicrophonePermissionForRecording() else { return false }
 
         holdTranscriptionState = .recording
         holdToTranscribePasteTarget = FocusedTextTarget.capture()
@@ -196,6 +200,9 @@ final class AgentCommandRunner: ObservableObject {
             markStopRequested(for: command)
             beginTranscribingActivity()
         }
+        if shouldStartRecording {
+            guard ensureMicrophonePermissionForRecording() else { return }
+        }
 
         activeCommandCount += 1
         beginCommandActivity(for: command)
@@ -383,6 +390,25 @@ final class AgentCommandRunner: ObservableObject {
         activityTracker.beginTranscribing()
     }
 
+    private func ensureMicrophonePermissionForRecording() -> Bool {
+        let presentation = MicrophonePermissionPresentation(
+            status: microphonePermissionController.currentStatus()
+        )
+        guard presentation.canRecord else {
+            microphonePermissionController.requestAccessIfNeeded { [weak self] granted in
+                guard granted else { return }
+                Task { @MainActor in
+                    self?.statusMessage = "Microphone permission enabled. Try recording again."
+                }
+            }
+            let message = presentation.statusMessage ?? "Microphone permission is required for recording."
+            statusMessage = message
+            notify(title: "Microphone Permission Required", body: message)
+            return false
+        }
+        return true
+    }
+
     private func clearTranscribingActivityIfFinished() {
         if pendingStopRecordingCommands.isEmpty && !holdTranscriptionState.isFinishing {
             activityTracker.finishTranscribing()

diff --git a/macos/AgentCLI/Sources/AgentCLI/MicrophonePermission.swift b/macos/AgentCLI/Sources/AgentCLI/MicrophonePermission.swift
@@ -0,0 +1,58 @@
+import AVFoundation
+
+enum MicrophonePermissionStatus {
+    case authorized
+    case denied
+    case notDetermined
+}
+
+struct MicrophonePermissionPresentation {
+    let status: MicrophonePermissionStatus
+
+    var canRecord: Bool {
+        status == .authorized
+    }
+
+    var statusMessage: String? {
+        switch status {
+        case .authorized:
+            return nil
+        case .denied:
+            return "Allow Microphone permission for Agent CLI in System Settings."
+        case .notDetermined:
+            return "Approve Microphone permission for Agent CLI, then try recording again."
+        }
+    }
+}
+
+protocol MicrophonePermissionControlling {
+    func currentStatus() -> MicrophonePermissionStatus
+    func requestAccessIfNeeded(completion: @escaping (Bool) -> Void)
+}
+
+final class MicrophonePermissionController: MicrophonePermissionControlling {
+    static let shared = MicrophonePermissionController()
+
+    private init() {}
+
+    func currentStatus() -> MicrophonePermissionStatus {
+        switch AVCaptureDevice.authorizationStatus(for: .audio) {
+        case .authorized:
+            return .authorized
+        case .notDetermined:
+            return .notDetermined
+        case .denied, .restricted:
+            return .denied
+        @unknown default:
+            return .denied
+        }
+    }
+
+    func requestAccessIfNeeded(completion: @escaping (Bool) -> Void) {
+        guard currentStatus() == .notDetermined else {
+            completion(currentStatus() == .authorized)
+            return
+        }
+        AVCaptureDevice.requestAccess(for: .audio, completionHandler: completion)
+    }
-    func requestAccessIfNeeded(completion: @escaping (Bool) -> Void) {
-        guard currentStatus() == .notDetermined else {
-            completion(currentStatus() == .authorized)
-            return
-        }
-        AVCaptureDevice.requestAccess(for: .audio, completionHandler: completion)
-    }
+    func requestAccessIfNeeded(completion: @escaping (Bool) -> Void) {
+        let status = currentStatus()
+        guard status == .notDetermined else {
+            completion(status == .authorized)
+            return
+        }
+        AVCaptureDevice.requestAccess(for: .audio, completionHandler: completion)
+    }
-    func requestAccessIfNeeded(completion: @escaping (Bool) -> Void) {
-        guard currentStatus() == .notDetermined else {
-            completion(currentStatus() == .authorized)
-            return
-        }
-        AVCaptureDevice.requestAccess(for: .audio, completionHandler: completion)
-    }
+    func requestAccessIfNeeded(completion: @escaping (Bool) -> Void) {
+        let status = currentStatus()
+        guard status == .notDetermined else {
+            completion(status == .authorized)
+            return
+        }
+        AVCaptureDevice.requestAccess(for: .audio, completionHandler: completion)
+    }
+}
diff --git a/macos/AgentCLI/Tests/AgentCLITests/MicrophonePermissionTests.swift b/macos/AgentCLI/Tests/AgentCLITests/MicrophonePermissionTests.swift
@@ -0,0 +1,23 @@
+#if canImport(XCTest)
+import XCTest
+@testable import AgentCLI
+
+final class MicrophonePermissionTests: XCTestCase {
+    func testAuthorizedPermissionAllowsRecording() {
+        let presentation = MicrophonePermissionPresentation(status: .authorized)
+
+        XCTAssertTrue(presentation.canRecord)
+        XCTAssertEqual(presentation.statusMessage, nil)
+    }
+
+    func testDeniedPermissionExplainsSettingsFix() {
+        let presentation = MicrophonePermissionPresentation(status: .denied)
+
+        XCTAssertFalse(presentation.canRecord)
+        XCTAssertEqual(
+            presentation.statusMessage,
+            "Allow Microphone permission for Agent CLI in System Settings."
+        )
+    }
+}
+#endif