aboutsummaryrefslogtreecommitdiffstats
path: root/lib/private/SpeechToText/SpeechToTextManager.php
diff options
context:
space:
mode:
Diffstat (limited to 'lib/private/SpeechToText/SpeechToTextManager.php')
-rw-r--r--lib/private/SpeechToText/SpeechToTextManager.php177
1 files changed, 177 insertions, 0 deletions
diff --git a/lib/private/SpeechToText/SpeechToTextManager.php b/lib/private/SpeechToText/SpeechToTextManager.php
new file mode 100644
index 00000000000..d69add2d80b
--- /dev/null
+++ b/lib/private/SpeechToText/SpeechToTextManager.php
@@ -0,0 +1,177 @@
+<?php
+
+declare(strict_types=1);
+
+/**
+ * SPDX-FileCopyrightText: 2023 Nextcloud GmbH and Nextcloud contributors
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+
+namespace OC\SpeechToText;
+
+use InvalidArgumentException;
+use OC\AppFramework\Bootstrap\Coordinator;
+use OCP\BackgroundJob\IJobList;
+use OCP\Files\File;
+use OCP\Files\InvalidPathException;
+use OCP\Files\NotFoundException;
+use OCP\IConfig;
+use OCP\IServerContainer;
+use OCP\IUserSession;
+use OCP\PreConditionNotMetException;
+use OCP\SpeechToText\ISpeechToTextManager;
+use OCP\SpeechToText\ISpeechToTextProvider;
+use OCP\SpeechToText\ISpeechToTextProviderWithId;
+use OCP\SpeechToText\ISpeechToTextProviderWithUserId;
+use OCP\TaskProcessing\IManager as ITaskProcessingManager;
+use OCP\TaskProcessing\Task;
+use OCP\TaskProcessing\TaskTypes\AudioToText;
+use Psr\Container\ContainerExceptionInterface;
+use Psr\Container\NotFoundExceptionInterface;
+use Psr\Log\LoggerInterface;
+use RuntimeException;
+use Throwable;
+
+class SpeechToTextManager implements ISpeechToTextManager {
+ /** @var ?ISpeechToTextProvider[] */
+ private ?array $providers = null;
+
+ public function __construct(
+ private IServerContainer $serverContainer,
+ private Coordinator $coordinator,
+ private LoggerInterface $logger,
+ private IJobList $jobList,
+ private IConfig $config,
+ private IUserSession $userSession,
+ private ITaskProcessingManager $taskProcessingManager,
+ ) {
+ }
+
+ public function getProviders(): array {
+ $context = $this->coordinator->getRegistrationContext();
+ if ($context === null) {
+ return [];
+ }
+
+ if ($this->providers !== null) {
+ return $this->providers;
+ }
+
+ $this->providers = [];
+
+ foreach ($context->getSpeechToTextProviders() as $providerServiceRegistration) {
+ $class = $providerServiceRegistration->getService();
+ try {
+ $this->providers[$class] = $this->serverContainer->get($class);
+ } catch (NotFoundExceptionInterface|ContainerExceptionInterface|Throwable $e) {
+ $this->logger->error('Failed to load SpeechToText provider ' . $class, [
+ 'exception' => $e,
+ ]);
+ }
+ }
+
+ return $this->providers;
+ }
+
+ public function hasProviders(): bool {
+ $context = $this->coordinator->getRegistrationContext();
+ if ($context === null) {
+ return false;
+ }
+ return !empty($context->getSpeechToTextProviders());
+ }
+
+ public function scheduleFileTranscription(File $file, ?string $userId, string $appId): void {
+ if (!$this->hasProviders()) {
+ throw new PreConditionNotMetException('No SpeechToText providers have been registered');
+ }
+ try {
+ $this->jobList->add(TranscriptionJob::class, [
+ 'fileId' => $file->getId(),
+ 'owner' => $file->getOwner()->getUID(),
+ 'userId' => $userId,
+ 'appId' => $appId,
+ ]);
+ } catch (NotFoundException|InvalidPathException $e) {
+ throw new InvalidArgumentException('Invalid file provided for file transcription: ' . $e->getMessage());
+ }
+ }
+
+ public function cancelScheduledFileTranscription(File $file, ?string $userId, string $appId): void {
+ try {
+ $jobArguments = [
+ 'fileId' => $file->getId(),
+ 'owner' => $file->getOwner()->getUID(),
+ 'userId' => $userId,
+ 'appId' => $appId,
+ ];
+ if (!$this->jobList->has(TranscriptionJob::class, $jobArguments)) {
+ $this->logger->debug('Failed to cancel a Speech-to-text job for file ' . $file->getId() . '. No related job was found.');
+ return;
+ }
+ $this->jobList->remove(TranscriptionJob::class, $jobArguments);
+ } catch (NotFoundException|InvalidPathException $e) {
+ throw new InvalidArgumentException('Invalid file provided to cancel file transcription: ' . $e->getMessage());
+ }
+ }
+
+ public function transcribeFile(File $file, ?string $userId = null, string $appId = 'core'): string {
+ // try to run a TaskProcessing core:audio2text task
+ // this covers scheduling as well because OC\SpeechToText\TranscriptionJob calls this method
+ try {
+ if (isset($this->taskProcessingManager->getAvailableTaskTypes()['core:audio2text'])) {
+ $taskProcessingTask = new Task(
+ AudioToText::ID,
+ ['input' => $file->getId()],
+ $appId,
+ $userId,
+ 'from-SpeechToTextManager||' . $file->getId() . '||' . ($userId ?? '') . '||' . $appId,
+ );
+ $resultTask = $this->taskProcessingManager->runTask($taskProcessingTask);
+ if ($resultTask->getStatus() === Task::STATUS_SUCCESSFUL) {
+ $output = $resultTask->getOutput();
+ if (isset($output['output']) && is_string($output['output'])) {
+ return $output['output'];
+ }
+ }
+ }
+ } catch (Throwable $e) {
+ throw new RuntimeException('Failed to run a Speech-to-text job from STTManager with TaskProcessing for file ' . $file->getId(), 0, $e);
+ }
+
+ if (!$this->hasProviders()) {
+ throw new PreConditionNotMetException('No SpeechToText providers have been registered');
+ }
+
+ $providers = $this->getProviders();
+
+ $json = $this->config->getAppValue('core', 'ai.stt_provider', '');
+ if ($json !== '') {
+ $classNameOrId = json_decode($json, true);
+ $provider = current(array_filter($providers, function ($provider) use ($classNameOrId) {
+ if ($provider instanceof ISpeechToTextProviderWithId) {
+ return $provider->getId() === $classNameOrId;
+ }
+ return $provider::class === $classNameOrId;
+ }));
+ if ($provider !== false) {
+ $providers = [$provider];
+ }
+ }
+
+ foreach ($providers as $provider) {
+ try {
+ if ($provider instanceof ISpeechToTextProviderWithUserId) {
+ $provider->setUserId($this->userSession->getUser()?->getUID());
+ }
+ return $provider->transcribeFile($file);
+ } catch (\Throwable $e) {
+ $this->logger->info('SpeechToText transcription using provider ' . $provider->getName() . ' failed', ['exception' => $e]);
+ throw new RuntimeException('SpeechToText transcription using provider "' . $provider->getName() . '" failed: ' . $e->getMessage());
+ }
+ }
+
+ throw new RuntimeException('Could not transcribe file');
+ }
+}