From 0b4b7e47cb83056f6df4c887dc5593d648c66230 Mon Sep 17 00:00:00 2001 From: "seer-by-sentry[bot]" <157164994+seer-by-sentry[bot]@users.noreply.github.com> Date: Fri, 15 May 2026 13:20:01 +0000 Subject: [PATCH 1/3] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8DEdge=20TTS?= =?UTF-8?q?=E9=9F=B3=E9=A2=91=E6=A0=BC=E5=BC=8F=E4=B8=8D=E8=AF=86=E5=88=AB?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/common/voice/voice.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/app/common/voice/voice.py b/app/common/voice/voice.py index 9e355fcd..441dbccb 100644 --- a/app/common/voice/voice.py +++ b/app/common/voice/voice.py @@ -382,7 +382,7 @@ def get_voice(self, text: str, voice: str) -> str: logger.debug(f"获取语音: text='{text}', voice='{voice}'") file_path: str = self._get_cache_file_path(text, voice) - if os.path.exists(file_path): + if os.path.exists(file_path) and self._is_valid_audio(file_path): logger.debug(f"命中磁盘缓存: {file_path}") return file_path @@ -403,7 +403,7 @@ async def _generate_voice(self, text: str, voice: str, file_path: str) -> None: while retry_count < max_retries: try: - communicate = edge_tts.Communicate(text, voice) + communicate = edge_tts.Communicate(text, voice, audio_format="riff-24khz-16bit-mono-pcm") await communicate.save(file_path) logger.debug(f"成功生成语音并保存至: {file_path}") return @@ -467,6 +467,19 @@ def _get_cache_file_path(self, text: str, voice: str) -> str: ) filename = f"{voice}_{safe_text}.wav" return os.path.join(self.audio_dir, filename) + + def _is_valid_audio(self, file_path: str) -> bool: + """检查音频文件是否为有效格式""" + try: + sf.info(file_path) + return True + except Exception: + logger.warning(f"缓存文件格式无效,将重新生成: {file_path}") + try: + os.remove(file_path) + except OSError: + pass + return False def _save_to_disk(self, file_path: str, data: np.ndarray, fs: int) -> None: """保存到磁盘""" @@ -852,4 +865,4 @@ def stop(self) -> None: try: self.voice_engine.stop() except Exception as e: - logger.warning(f"停止系统TTS引擎失败: {e}") + logger.warning(f"停止系统TTS引擎失败: {e}") From 9dd72f531e444281991bf957998d262f14c8a962 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 15 May 2026 13:33:21 +0000 Subject: [PATCH 2/3] =?UTF-8?q?fix:=20=E5=85=BC=E5=AE=B9=20edge-tts=20?= =?UTF-8?q?=E9=9F=B3=E9=A2=91=E5=8F=82=E6=95=B0=E5=B9=B6=E4=BF=AE=E6=AD=A3?= =?UTF-8?q?=E7=BC=93=E5=AD=98=E6=A0=A1=E9=AA=8C=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent-Logs-Url: https://github.com/SECTL/SecRandom/sessions/4f3a64bb-fdf0-422c-b1ed-180dbdab86d1 Co-authored-by: WSXYT <102407247+WSXYT@users.noreply.github.com> --- app/common/voice/voice.py | 56 ++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/app/common/voice/voice.py b/app/common/voice/voice.py index 441dbccb..48c6f25e 100644 --- a/app/common/voice/voice.py +++ b/app/common/voice/voice.py @@ -7,6 +7,7 @@ # --------- 标准库 --------- import asyncio import concurrent.futures +import inspect import json import os import platform @@ -45,6 +46,13 @@ from app.tools.settings_access import readme_settings_async from app.tools.config import restore_volume +try: + _EDGE_TTS_SUPPORTS_AUDIO_FORMAT = ( + "audio_format" in inspect.signature(edge_tts.Communicate).parameters + ) +except Exception: + _EDGE_TTS_SUPPORTS_AUDIO_FORMAT = False + # 权限检查装饰器 def require_permission(permission: str): @@ -382,7 +390,7 @@ def get_voice(self, text: str, voice: str) -> str: logger.debug(f"获取语音: text='{text}', voice='{voice}'") file_path: str = self._get_cache_file_path(text, voice) - if os.path.exists(file_path) and self._is_valid_audio(file_path): + if os.path.exists(file_path) and self._is_valid_audio(file_path): logger.debug(f"命中磁盘缓存: {file_path}") return file_path @@ -403,7 +411,14 @@ async def _generate_voice(self, text: str, voice: str, file_path: str) -> None: while retry_count < max_retries: try: - communicate = edge_tts.Communicate(text, voice, audio_format="riff-24khz-16bit-mono-pcm") + if _EDGE_TTS_SUPPORTS_AUDIO_FORMAT: + communicate = edge_tts.Communicate( + text, + voice, + audio_format="riff-24khz-16bit-mono-pcm", + ) + else: + communicate = edge_tts.Communicate(text, voice) await communicate.save(file_path) logger.debug(f"成功生成语音并保存至: {file_path}") return @@ -467,19 +482,28 @@ def _get_cache_file_path(self, text: str, voice: str) -> str: ) filename = f"{voice}_{safe_text}.wav" return os.path.join(self.audio_dir, filename) - - def _is_valid_audio(self, file_path: str) -> bool: - """检查音频文件是否为有效格式""" - try: - sf.info(file_path) - return True - except Exception: - logger.warning(f"缓存文件格式无效,将重新生成: {file_path}") - try: - os.remove(file_path) - except OSError: - pass - return False + + def _is_valid_audio(self, file_path: str) -> bool: + """检查音频文件是否为有效格式""" + if sf is None: + return True + + try: + sf.info(file_path) + return True + except sf.LibsndfileError as e: + if getattr(e, "error_string", "") != "Format not recognised.": + logger.warning(f"检查缓存文件失败,保留原文件: {file_path}, 错误: {e}") + return True + + logger.warning(f"缓存文件格式无效,将重新生成: {file_path}") + try: + os.remove(file_path) + except OSError as remove_error: + logger.warning( + f"删除无效缓存文件失败: {file_path}, 错误: {remove_error}" + ) + return False def _save_to_disk(self, file_path: str, data: np.ndarray, fs: int) -> None: """保存到磁盘""" @@ -865,4 +889,4 @@ def stop(self) -> None: try: self.voice_engine.stop() except Exception as e: - logger.warning(f"停止系统TTS引擎失败: {e}") + logger.warning(f"停止系统TTS引擎失败: {e}") From e1cb6d13783da7fa6b0e4dd193ade7cdb1799d9e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 15 May 2026 13:37:37 +0000 Subject: [PATCH 3/3] =?UTF-8?q?chore:=20=E5=AE=8C=E5=96=84=E9=9F=B3?= =?UTF-8?q?=E9=A2=91=E6=A0=A1=E9=AA=8C=E5=BC=82=E5=B8=B8=E5=A4=84=E7=90=86?= =?UTF-8?q?=E4=B8=8E=E5=85=BC=E5=AE=B9=E6=97=A5=E5=BF=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent-Logs-Url: https://github.com/SECTL/SecRandom/sessions/4f3a64bb-fdf0-422c-b1ed-180dbdab86d1 Co-authored-by: WSXYT <102407247+WSXYT@users.noreply.github.com> --- app/common/voice/voice.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/app/common/voice/voice.py b/app/common/voice/voice.py index 48c6f25e..40512986 100644 --- a/app/common/voice/voice.py +++ b/app/common/voice/voice.py @@ -50,8 +50,11 @@ _EDGE_TTS_SUPPORTS_AUDIO_FORMAT = ( "audio_format" in inspect.signature(edge_tts.Communicate).parameters ) -except Exception: +except (AttributeError, TypeError, ValueError): _EDGE_TTS_SUPPORTS_AUDIO_FORMAT = False + logger.warning("无法检测 edge-tts audio_format 参数支持情况,将使用兼容模式") + +LIBSNDFILE_FORMAT_ERROR_CODE = 1 # libsndfile 错误码 1 表示格式不识别 # 权限检查装饰器 @@ -486,13 +489,16 @@ def _get_cache_file_path(self, text: str, voice: str) -> str: def _is_valid_audio(self, file_path: str) -> bool: """检查音频文件是否为有效格式""" if sf is None: + # 未安装 soundfile 时无法做格式探测,跳过校验避免误删缓存 + logger.warning(f"soundfile 不可用,跳过音频格式校验: {file_path}") return True try: sf.info(file_path) return True except sf.LibsndfileError as e: - if getattr(e, "error_string", "") != "Format not recognised.": + # 非“格式不识别”错误通常是临时I/O问题(如锁文件),保留缓存避免误删 + if e.code != LIBSNDFILE_FORMAT_ERROR_CODE: logger.warning(f"检查缓存文件失败,保留原文件: {file_path}, 错误: {e}") return True