-
Notifications
You must be signed in to change notification settings - Fork 20
Fix macOS silent microphone capture handling #597
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -5,13 +5,14 @@ | |||||
| import asyncio | ||||||
| import io | ||||||
| import json | ||||||
| import struct | ||||||
| import wave | ||||||
| from contextlib import suppress | ||||||
| from dataclasses import dataclass | ||||||
| from datetime import UTC, datetime | ||||||
| from functools import partial | ||||||
| from pathlib import Path | ||||||
| from typing import TYPE_CHECKING | ||||||
| from typing import TYPE_CHECKING, NoReturn | ||||||
|
|
||||||
| from agent_cli import constants | ||||||
| from agent_cli.core.audio import ( | ||||||
|
|
@@ -71,6 +72,51 @@ def min_audio_bytes(self) -> int: | |||||
| ) | ||||||
|
|
||||||
|
|
||||||
| @dataclass | ||||||
| class AudioCaptureStats: | ||||||
| """Basic signal stats for captured little-endian int16 PCM audio.""" | ||||||
|
|
||||||
| byte_count: int = 0 | ||||||
| peak_sample: int = 0 | ||||||
|
|
||||||
| @property | ||||||
| def is_all_silence(self) -> bool: | ||||||
| """Return true when capture produced bytes but every sample was zero.""" | ||||||
| return self.byte_count > 0 and self.peak_sample == 0 | ||||||
|
|
||||||
| def observe(self, chunk: bytes) -> None: | ||||||
| """Update stats from one PCM chunk.""" | ||||||
| self.byte_count += len(chunk) | ||||||
| self.peak_sample = max(self.peak_sample, _peak_int16_sample(chunk)) | ||||||
|
|
||||||
|
|
||||||
| class SilentAudioCaptureError(RuntimeError): | ||||||
| """Raised when microphone capture returns only digital silence.""" | ||||||
|
|
||||||
| DEFAULT_MESSAGE = ( | ||||||
| "Microphone capture returned only digital silence. " | ||||||
| "On macOS, allow Microphone permission for Agent CLI, " | ||||||
| "then restart Agent CLI." | ||||||
| ) | ||||||
|
|
||||||
| def __init__(self, message: str | None = None) -> None: | ||||||
| """Create an error with the default silent-capture guidance.""" | ||||||
| super().__init__(message or self.DEFAULT_MESSAGE) | ||||||
|
|
||||||
|
|
||||||
| def _raise_silent_audio_capture_error() -> NoReturn: | ||||||
| raise SilentAudioCaptureError | ||||||
|
|
||||||
|
|
||||||
| def _peak_int16_sample(chunk: bytes) -> int: | ||||||
| sample_count = len(chunk) // constants.AUDIO_FORMAT_WIDTH | ||||||
| if sample_count <= 0: | ||||||
| return 0 | ||||||
| pcm = chunk[: sample_count * constants.AUDIO_FORMAT_WIDTH] | ||||||
| samples = struct.unpack(f"<{sample_count}h", pcm) | ||||||
| return max((abs(sample) for sample in samples), default=0) | ||||||
|
|
||||||
|
|
||||||
| def _write_live_preview_event( | ||||||
| log_file: Path, | ||||||
| *, | ||||||
|
|
@@ -421,8 +467,8 @@ async def _send_audio( | |||||
| recording_path_callback: Callable[[Path], None] | None = None, | ||||||
| audio_level_callback: Callable[[bytes], None] | None = None, | ||||||
| live_preview_callback: Callable[[bytes], Awaitable[None]] | None = None, | ||||||
| ) -> None: | ||||||
| """Read from mic and send to Wyoming server.""" | ||||||
| ) -> AudioCaptureStats: | ||||||
| """Read from mic, send to Wyoming server, and return capture stats.""" | ||||||
| from wyoming.asr import Transcribe # noqa: PLC0415 | ||||||
| from wyoming.audio import AudioChunk, AudioStart, AudioStop # noqa: PLC0415 | ||||||
|
|
||||||
|
|
@@ -434,9 +480,11 @@ async def _send_audio( | |||||
| # Buffer to save audio if requested | ||||||
| audio_buffer = io.BytesIO() if save_recording else None | ||||||
| pending_audio_level_tasks: set[asyncio.Task[None]] = set() | ||||||
| capture_stats = AudioCaptureStats() | ||||||
|
|
||||||
| async def send_chunk(chunk: bytes) -> None: | ||||||
| """Send audio chunk to ASR server and optionally buffer it.""" | ||||||
| capture_stats.observe(chunk) | ||||||
| if audio_buffer is not None: | ||||||
| audio_buffer.write(chunk) | ||||||
| if audio_level_callback: | ||||||
|
|
@@ -475,6 +523,7 @@ async def send_chunk(chunk: bytes) -> None: | |||||
| recording_path_callback(saved_path) | ||||||
| finally: | ||||||
| await _finish_audio_level_callbacks(pending_audio_level_tasks) | ||||||
| return capture_stats | ||||||
|
|
||||||
|
|
||||||
| async def record_audio_to_buffer(queue: asyncio.Queue, logger: logging.Logger) -> bytes: | ||||||
|
|
@@ -692,7 +741,7 @@ async def _transcribe_live_audio_wyoming( | |||||
| if live_preview is not None: | ||||||
| live_preview.reset_log() | ||||||
| live_preview_task = asyncio.create_task(live_preview.run()) | ||||||
| _, recv_task = await manage_send_receive_tasks( | ||||||
| send_task, recv_task = await manage_send_receive_tasks( | ||||||
| _send_audio( | ||||||
| client, | ||||||
| stream, | ||||||
|
|
@@ -715,8 +764,17 @@ async def _transcribe_live_audio_wyoming( | |||||
| return_when=asyncio.ALL_COMPLETED, | ||||||
| ) | ||||||
| result = recv_task.result() | ||||||
| capture_stats = send_task.result() if send_task is not None else None | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| if ( | ||||||
| capture_stats is not None | ||||||
| and capture_stats.is_all_silence | ||||||
| and not result.strip() | ||||||
| ): | ||||||
| _raise_silent_audio_capture_error() | ||||||
| final_transcript = result | ||||||
| return result | ||||||
| except SilentAudioCaptureError: | ||||||
| raise | ||||||
| except (ConnectionRefusedError, Exception): | ||||||
| logger.warning("Failed to connect to Wyoming ASR server") | ||||||
| return None | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,58 @@ | ||||||||||||||||||||||||||||||||
| import AVFoundation | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| enum MicrophonePermissionStatus { | ||||||||||||||||||||||||||||||||
| case authorized | ||||||||||||||||||||||||||||||||
| case denied | ||||||||||||||||||||||||||||||||
| case notDetermined | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| struct MicrophonePermissionPresentation { | ||||||||||||||||||||||||||||||||
| let status: MicrophonePermissionStatus | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| var canRecord: Bool { | ||||||||||||||||||||||||||||||||
| status == .authorized | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| var statusMessage: String? { | ||||||||||||||||||||||||||||||||
| switch status { | ||||||||||||||||||||||||||||||||
| case .authorized: | ||||||||||||||||||||||||||||||||
| return nil | ||||||||||||||||||||||||||||||||
| case .denied: | ||||||||||||||||||||||||||||||||
| return "Allow Microphone permission for Agent CLI in System Settings." | ||||||||||||||||||||||||||||||||
| case .notDetermined: | ||||||||||||||||||||||||||||||||
| return "Approve Microphone permission for Agent CLI, then try recording again." | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| protocol MicrophonePermissionControlling { | ||||||||||||||||||||||||||||||||
| func currentStatus() -> MicrophonePermissionStatus | ||||||||||||||||||||||||||||||||
| func requestAccessIfNeeded(completion: @escaping (Bool) -> Void) | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| final class MicrophonePermissionController: MicrophonePermissionControlling { | ||||||||||||||||||||||||||||||||
| static let shared = MicrophonePermissionController() | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| private init() {} | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| func currentStatus() -> MicrophonePermissionStatus { | ||||||||||||||||||||||||||||||||
| switch AVCaptureDevice.authorizationStatus(for: .audio) { | ||||||||||||||||||||||||||||||||
| case .authorized: | ||||||||||||||||||||||||||||||||
| return .authorized | ||||||||||||||||||||||||||||||||
| case .notDetermined: | ||||||||||||||||||||||||||||||||
| return .notDetermined | ||||||||||||||||||||||||||||||||
| case .denied, .restricted: | ||||||||||||||||||||||||||||||||
| return .denied | ||||||||||||||||||||||||||||||||
| @unknown default: | ||||||||||||||||||||||||||||||||
| return .denied | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| func requestAccessIfNeeded(completion: @escaping (Bool) -> Void) { | ||||||||||||||||||||||||||||||||
| guard currentStatus() == .notDetermined else { | ||||||||||||||||||||||||||||||||
| completion(currentStatus() == .authorized) | ||||||||||||||||||||||||||||||||
| return | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
| AVCaptureDevice.requestAccess(for: .audio, completionHandler: completion) | ||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
|
Comment on lines
+51
to
+57
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| #if canImport(XCTest) | ||
| import XCTest | ||
| @testable import AgentCLI | ||
|
|
||
| final class MicrophonePermissionTests: XCTestCase { | ||
| func testAuthorizedPermissionAllowsRecording() { | ||
| let presentation = MicrophonePermissionPresentation(status: .authorized) | ||
|
|
||
| XCTAssertTrue(presentation.canRecord) | ||
| XCTAssertEqual(presentation.statusMessage, nil) | ||
| } | ||
|
|
||
| func testDeniedPermissionExplainsSettingsFix() { | ||
| let presentation = MicrophonePermissionPresentation(status: .denied) | ||
|
|
||
| XCTAssertFalse(presentation.canRecord) | ||
| XCTAssertEqual( | ||
| presentation.statusMessage, | ||
| "Allow Microphone permission for Agent CLI in System Settings." | ||
| ) | ||
| } | ||
| } | ||
|
Comment on lines
+6
to
+22
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time! |
||
| #endif | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_raise_silent_audio_capture_errorraises the class object rather than an instance (raise SilentAudioCaptureErrorvsraise SilentAudioCaptureError()). Python 3 instantiates the class with no arguments when it seesraise ExceptionClass, so theDEFAULT_MESSAGEis used correctly and there is no runtime bug. However, raising a class directly is unconventional, surprises some static analysers, and adds an unnecessary call frame to every traceback. Since the function has no other callers and provides no testability benefit, it can be collapsed to a direct raise at the call site.Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!