From 83024f3cb5ee71c2139c85a7e71735eff3669d9d Mon Sep 17 00:00:00 2001 From: Edward Ly Date: Thu, 11 Jun 2026 23:34:09 -0700 Subject: [PATCH] feat: add core:audio2text:subtitles task type, register file action Signed-off-by: Edward Ly --- lib/Capabilities.php | 18 ++++++++ lib/Controller/AssistantApiController.php | 4 ++ .../FileActionTaskSuccessfulListener.php | 12 ++++++ lib/Notification/Notifier.php | 9 ++++ lib/Service/AssistantService.php | 15 +++---- lib/Service/TaskProcessingService.php | 5 ++- src/files/fileActions.js | 42 ++++++++++++++++++- 7 files changed, 96 insertions(+), 9 deletions(-) diff --git a/lib/Capabilities.php b/lib/Capabilities.php index 40ba32f36..2dba20253 100644 --- a/lib/Capabilities.php +++ b/lib/Capabilities.php @@ -128,6 +128,24 @@ public function getCapabilities(): array { 'icon' => $this->urlGenerator->imagePath(Application::APP_ID, 'client_integration/speech_to_text.svg'), ]; $capabilities['client_integration'][Application::APP_ID]['context-menu'][] = $endpoint; + + if (class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToTextSubtitles')) { + $url = $this->urlGenerator->linkToOCSRouteAbsolute(Application::APP_ID . '.assistantApi.runFileAction', [ + 'apiVersion' => 'v1', + 'fileId' => '123456789', + 'taskTypeId' => \OCP\TaskProcessing\TaskTypes\AudioToTextSubtitles::ID, + ]); + $url = str_replace($this->urlGenerator->getBaseUrl(), '', $url); + $url = str_replace('123456789', '{fileId}', $url); + $endpoint = [ + 'name' => $this->l->t('Generate subtitles using AI'), + 'url' => $url, + 'method' => 'POST', + 'mimetype_filters' => 'audio/, video/', + 'icon' => $this->urlGenerator->imagePath(Application::APP_ID, 'client_integration/speech_to_text.svg'), + ]; + $capabilities['client_integration'][Application::APP_ID]['context-menu'][] = $endpoint; + } } if ($ttsAvailable) { diff --git a/lib/Controller/AssistantApiController.php b/lib/Controller/AssistantApiController.php index 6f0270721..9c1efa317 100644 --- a/lib/Controller/AssistantApiController.php +++ b/lib/Controller/AssistantApiController.php @@ -432,6 +432,10 @@ public function runFileAction(int $fileId, string $taskTypeId): DataResponse { $message = $this->l10n->t('Assistant task submitted successfully'); if ($taskTypeId === AudioToText::ID) { $message = $this->l10n->t('Transcription task submitted successfully'); + } elseif (class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToTextSubtitles')) { + if ($taskTypeId === \OCP\TaskProcessing\TaskTypes\AudioToTextSubtitles::ID) { + $message = $this->l10n->t('Subtitles task submitted successfully'); + } } elseif ($taskTypeId === TextToTextSummary::ID) { $message = $this->l10n->t('Summarization task submitted successfully'); } elseif (class_exists('OCP\\TaskProcessing\\TaskTypes\\TextToSpeech')) { diff --git a/lib/Listener/FileActionTaskSuccessfulListener.php b/lib/Listener/FileActionTaskSuccessfulListener.php index 437c66cb3..3ee5c3140 100644 --- a/lib/Listener/FileActionTaskSuccessfulListener.php +++ b/lib/Listener/FileActionTaskSuccessfulListener.php @@ -75,6 +75,18 @@ class_exists('OCP\\TaskProcessing\\TaskTypes\\TextToSpeech') } $targetFileName = $sourceFile->getName() . ' - text to speech.' . $extension; $targetFile = $sourceFileParent->newFile($targetFileName, $speechFile->fopen('rb')); + } elseif ( + class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToTextSubtitles') + && $taskTypeId === \OCP\TaskProcessing\TaskTypes\AudioToTextSubtitles::ID + ) { + $subtitlesFileId = (int)$task->getOutput()['output']; + $subtitlesFile = $this->taskProcessingService->getOutputFile($subtitlesFileId); + $mimeType = mime_content_type($subtitlesFile->fopen('rb')); + $mimeType = $mimeType ?: 'text/plain'; + $mimes = new \Mimey\MimeTypes; + $extension = $mimes->getExtension($mimeType); + $targetFileName = $sourceFile->getName() . ' - subtitles.' . $extension; + $targetFile = $sourceFileParent->newFile($targetFileName, $subtitlesFile->fopen('rb')); } else { $textResult = $task->getOutput()['output']; $suffix = $taskTypeId === TextToTextSummary::ID ? 'summarized' : 'transcribed'; diff --git a/lib/Notification/Notifier.php b/lib/Notification/Notifier.php index e5116ffa1..35251f601 100644 --- a/lib/Notification/Notifier.php +++ b/lib/Notification/Notifier.php @@ -95,6 +95,9 @@ public function prepare(INotification $notification, string $languageCode): INot $taskTypeName = $l->t('AI image generation'); } elseif ($params['taskTypeId'] === AudioToText::ID) { $taskTypeName = $l->t('AI audio transcription'); + } elseif (class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToTextSubtitles') + && $params['taskTypeId'] === \OCP\TaskProcessing\TaskTypes\AudioToTextSubtitles::ID) { + $taskTypeName = $l->t('AI subtitles generation'); } elseif ($params['taskTypeId'] === 'copywriter') { // TODO adjust that when we have copywriter back on its feet // Catch the custom copywriter task type built on top of the FreePrompt task type. @@ -202,6 +205,9 @@ public function prepare(INotification $notification, string $languageCode): INot case AudioToText::ID: $message = $l->t('{sourceFile} has been transcribed in {targetFile}'); break; + case class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToTextSubtitles') ? \OCP\TaskProcessing\TaskTypes\AudioToTextSubtitles::ID : 'nope': + $message = $l->t('{sourceFile} has been subtitled in {targetFile}'); + break; case class_exists('OCP\\TaskProcessing\\TaskTypes\\TextToSpeech') ? \OCP\TaskProcessing\TaskTypes\TextToSpeech::ID : 'nope': $message = $l->t('{sourceFile} has been converted to audio in {targetFile}'); break; @@ -253,6 +259,9 @@ public function prepare(INotification $notification, string $languageCode): INot case AudioToText::ID: $message = $l->t('Transcription of {sourceFile} has failed'); break; + case class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToTextSubtitles') ? \OCP\TaskProcessing\TaskTypes\AudioToTextSubtitles::ID : 'nope': + $message = $l->t('Subtitling of {sourceFile} has failed'); + break; case class_exists('OCP\\TaskProcessing\\TaskTypes\\TextToSpeech') ? \OCP\TaskProcessing\TaskTypes\TextToSpeech::ID : 'nope': $message = $l->t('The text-to-speech process for {sourceFile} has failed'); break; diff --git a/lib/Service/AssistantService.php b/lib/Service/AssistantService.php index ee6e3fbdf..02086c6c0 100644 --- a/lib/Service/AssistantService.php +++ b/lib/Service/AssistantService.php @@ -69,13 +69,14 @@ class AssistantService { 'context_chat:context_chat' => 3, 'legacy:TextProcessing:OCA\ContextChat\TextProcessing\ContextChatTaskType' => 3, 'context_chat:context_chat_search' => 4, - AudioToText::ID => 5, - TextToTextTranslate::ID => 6, - ContextWrite::ID => 7, - TextToImage::ID => 8, - TextToTextSummary::ID => 9, - TextToTextHeadline::ID => 10, - TextToTextTopics::ID => 11, + AudioToText::ID => 10, + 'core:audio2text:subtitles' => 11, + TextToTextTranslate::ID => 20, + ContextWrite::ID => 21, + TextToImage::ID => 22, + TextToTextSummary::ID => 23, + TextToTextHeadline::ID => 24, + TextToTextTopics::ID => 25, ]; public array $informationSources; diff --git a/lib/Service/TaskProcessingService.php b/lib/Service/TaskProcessingService.php index d7634db1b..0db8b2866 100644 --- a/lib/Service/TaskProcessingService.php +++ b/lib/Service/TaskProcessingService.php @@ -93,6 +93,9 @@ public function isFileActionTaskTypeSupported(string $taskTypeId): bool { if (class_exists('OCP\\TaskProcessing\\TaskTypes\\TextToSpeech')) { $authorizedTaskTypes[] = \OCP\TaskProcessing\TaskTypes\TextToSpeech::ID; } + if (class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToTextSubtitles')) { + $authorizedTaskTypes[] = \OCP\TaskProcessing\TaskTypes\AudioToTextSubtitles::ID; + } return in_array($taskTypeId, $authorizedTaskTypes, true); } @@ -111,7 +114,7 @@ public function runFileAction(string $userId, int $fileId, string $taskTypeId): throw new Exception('Invalid task type for file action'); } try { - $input = $taskTypeId === AudioToText::ID + $input = ($taskTypeId === AudioToText::ID) || (class_exists('OCP\\TaskProcessing\\TaskTypes\\AudioToTextSubtitles') && $taskTypeId === \OCP\TaskProcessing\TaskTypes\AudioToTextSubtitles::ID) ? ['input' => $fileId] : ['input' => $this->assistantService->parseTextFromFile($userId, fileId: $fileId)]; } catch (NotPermittedException|GenericFileException|LockedException|\OCP\Files\NotFoundException|Exception $e) { diff --git a/src/files/fileActions.js b/src/files/fileActions.js index 36445d004..f58b87d13 100644 --- a/src/files/fileActions.js +++ b/src/files/fileActions.js @@ -10,7 +10,7 @@ import CreationSvgIcon from '@mdi/svg/svg/creation.svg?raw' import SummarizeSymbol from '@material-symbols/svg-700/outlined/summarize.svg?raw' import TTSSymbol from '@material-symbols/svg-700/outlined/text_to_speech.svg?raw' import STTSymbol from '@material-symbols/svg-700/outlined/speech_to_text.svg?raw' -import { VALID_AUDIO_MIME_TYPES, VALID_TEXT_MIME_TYPES } from '../constants.js' +import { VALID_AUDIO_MIME_TYPES, VALID_TEXT_MIME_TYPES, VALID_VIDEO_MIME_TYPES } from '../constants.js' const actionIgnoreLists = [ 'trashbin', @@ -156,6 +156,45 @@ function registerSttAction() { registerFileAction(sttAction) } +function registerSttSubtitlesAction() { + const sttSubtitlesAction = { + id: 'assistant-stt-subtitles', + parent: 'assistant-group', + displayName: ({ nodes }) => { + return t('assistant', 'Generate subtitles using AI') + }, + enabled({ nodes, view }) { + return !actionIgnoreLists.includes(view.id) + && nodes.length === 1 + && !nodes.some(({ permissions }) => (permissions & Permission.READ) === 0) + && nodes.every(({ type }) => type === FileType.File) + && nodes.every(({ mime }) => VALID_AUDIO_MIME_TYPES.includes(mime) || VALID_VIDEO_MIME_TYPES.includes(mime)) + }, + iconSvgInline: () => STTSymbol, + order: 0, + async exec({ nodes }) { + const node = nodes[0] + const { default: axios } = await import('@nextcloud/axios') + const { generateOcsUrl } = await import('@nextcloud/router') + const { showError, showSuccess } = await import('@nextcloud/dialogs') + const url = generateOcsUrl('/apps/assistant/api/v1/file-action/{fileId}/core:audio2text:subtitles', { fileId: node.fileid }) + try { + await axios.post(url) + showSuccess( + t('assistant', 'AI subtitles task submitted successfully.') + '\n' + + t('assistant', 'You will be notified when it is ready.') + '\n' + + t('assistant', 'It can also be checked in the Assistant in the "Work with audio -> Generate subtitles" menu.'), + ) + } catch (error) { + console.error(error) + showError(t('assistant', 'Failed to launch the AI file action')) + } + return null + }, + } + registerFileAction(sttSubtitlesAction) +} + const assistantEnabled = loadState('assistant', 'assistant-enabled', false) const summarizeAvailable = loadState('assistant', 'summarize-available', false) const sttAvailable = loadState('assistant', 'stt-available', false) @@ -174,6 +213,7 @@ if (assistantEnabled) { } if (sttAvailable) { registerSttAction() + registerSttSubtitlesAction() } if (summarizeAvailable) { registerSummarizeAction()