Add coaching audio transcription flow

2026-06-09 14:50:17 +00:00 · 2026-06-09 14:50:17 +00:00 · ba861959bd
commit ba861959bd
parent 2a10483480
2 changed files with 229 additions and 10 deletions
--- a/backend/src/routes/coaching.js
+++ b/backend/src/routes/coaching.js
@ -1,4 +1,7 @@
 const express = require("express");
+const fs = require("fs");
+const path = require("path");
+const formidable = require("formidable");
 const db = require("../db/models");
 const wrapAsync = require("../helpers").wrapAsync;
 const { LocalAIApi } = require("../ai/LocalAIApi");
@ -26,6 +29,113 @@ function splitActionItems(value) {
    .slice(0, 8);
 }

+function getFirstUploadedFile(files, fieldName) {
+  const file = files[fieldName];
+
+  if (Array.isArray(file)) {
+    return file[0];
+  }
+
+  return file;
+}
+
+function parseAudioUpload(req) {
+  return new Promise((resolve, reject) => {
+    const form = new formidable.IncomingForm({
+      multiples: false,
+      keepExtensions: true,
+      maxFileSize: 100 * 1024 * 1024,
+    });
+
+    form.parse(req, (error, _fields, files) => {
+      if (error) {
+        reject(error);
+        return;
+      }
+
+      resolve(getFirstUploadedFile(files, "audio") || getFirstUploadedFile(files, "file"));
+    });
+  });
+}
+
+async function removeUploadedAudio(filePath) {
+  try {
+    await fs.promises.unlink(filePath);
+  } catch (error) {
+    console.warn("Failed to remove uploaded audio file", error);
+  }
+}
+
+async function transcribeAudioFile(audioFile) {
+  const filePath = audioFile.filepath || audioFile.path;
+  const fileName = audioFile.originalFilename || audioFile.name || path.basename(filePath);
+  const mimeType = audioFile.mimetype || audioFile.type || "application/octet-stream";
+  const transcriptionUrl =
+    process.env.AI_TRANSCRIPTION_URL || "https://api.openai.com/v1/audio/transcriptions";
+  const transcriptionModel = process.env.AI_TRANSCRIPTION_MODEL || "gpt-4o-mini-transcribe";
+  const transcriptionApiKey = process.env.AI_TRANSCRIPTION_API_KEY || process.env.OPENAI_API_KEY;
+
+  if (!filePath) {
+    throw new Error("Uploaded audio file does not have a readable path");
+  }
+
+  if (!transcriptionApiKey && !process.env.AI_TRANSCRIPTION_URL) {
+    return {
+      status: 501,
+      body: {
+        error: "transcription_not_configured",
+        message:
+          "Set AI_TRANSCRIPTION_URL for the AppWizzy proxy or AI_TRANSCRIPTION_API_KEY/OPENAI_API_KEY for direct transcription.",
+      },
+    };
+  }
+
+  const audioBuffer = await fs.promises.readFile(filePath);
+  const formData = new FormData();
+  formData.append("file", new Blob([audioBuffer], { type: mimeType }), fileName);
+  formData.append("model", transcriptionModel);
+  formData.append("response_format", "json");
+
+  const headers = {};
+
+  if (transcriptionApiKey) {
+    headers.Authorization = `Bearer ${transcriptionApiKey}`;
+  }
+
+  if (process.env.PROJECT_UUID) {
+    headers["project-uuid"] = process.env.PROJECT_UUID;
+  }
+
+  const response = await fetch(transcriptionUrl, {
+    method: "POST",
+    headers,
+    body: formData,
+  });
+  const responseText = await response.text();
+  let payload;
+
+  try {
+    payload = JSON.parse(responseText);
+  } catch {
+    throw new Error(`Transcription response is not JSON: ${responseText}`);
+  }
+
+  if (!response.ok) {
+    throw new Error(`Transcription request failed with ${response.status}: ${JSON.stringify(payload)}`);
+  }
+
+  const text = payload.text || payload.transcript || payload.output_text;
+
+  if (!text) {
+    throw new Error(`Transcription response does not include text: ${JSON.stringify(payload)}`);
+  }
+
+  return {
+    status: 200,
+    body: { text },
+  };
+}
+
 router.get(
  "/summary",
  wrapAsync(async (req, res) => {
@ -424,6 +534,27 @@ router.post(
  }),
 );

+router.post(
+  "/session-memory/transcribe",
+  wrapAsync(async (req, res) => {
+    const audioFile = await parseAudioUpload(req);
+
+    if (!audioFile) {
+      res.status(400).send({ error: "audio_required" });
+      return;
+    }
+
+    const filePath = audioFile.filepath || audioFile.path;
+
+    try {
+      const result = await transcribeAudioFile(audioFile);
+      res.status(result.status).send(result.body);
+    } finally {
+      await removeUploadedAudio(filePath);
+    }
+  }),
+);
+
 router.get(
  "/client-portal/me",
  wrapAsync(async (req, res) => {
--- a/frontend/src/pages/session-memory.tsx
+++ b/frontend/src/pages/session-memory.tsx
@ -1,8 +1,10 @@
 import {
  mdiCheckCircleOutline,
+  mdiCloudUploadOutline,
  mdiContentCopy,
  mdiFileDocumentEditOutline,
  mdiLightbulbOnOutline,
+  mdiMicrophoneOutline,
  mdiSendOutline,
 } from '@mdi/js';
 import axios from 'axios';
@ -122,7 +124,9 @@ const SessionMemory = () => {
  const [clientId, setClientId] = React.useState('');
  const [transcript, setTranscript] = React.useState('');
  const [draft, setDraft] = React.useState<MemoryDraft>(emptyDraft);
+  const [audioFile, setAudioFile] = React.useState<File | null>(null);
  const [isGenerating, setIsGenerating] = React.useState(false);
+  const [isTranscribing, setIsTranscribing] = React.useState(false);
  const [isSaving, setIsSaving] = React.useState(false);
  const [notice, setNotice] = React.useState('');

@ -155,16 +159,65 @@ const SessionMemory = () => {

  async function generateMemory() {
    setIsGenerating(true);
-    const response = await axios.post('/coaching/session-memory/generate', {
-      clientId,
-      transcript,
-    });
-    setDraft({
-      ...emptyDraft,
-      ...response.data,
-    });
-    setNotice('Draft generated. Review and edit before saving.');
-    setIsGenerating(false);
+
+    try {
+      const response = await axios.post('/coaching/session-memory/generate', {
+        clientId,
+        transcript,
+      });
+      setDraft({
+        ...emptyDraft,
+        ...response.data,
+      });
+      setNotice('Draft generated. Review and edit before saving.');
+    } catch (error) {
+      if (axios.isAxiosError(error)) {
+        setNotice(
+          error.response?.data?.message ||
+            error.response?.data?.error ||
+            'Memory generation failed.',
+        );
+      } else {
+        setNotice('Memory generation failed.');
+      }
+    } finally {
+      setIsGenerating(false);
+    }
+  }
+
+  async function transcribeAudio() {
+    if (!audioFile) {
+      setNotice('Choose an audio file first.');
+      return;
+    }
+
+    const formData = new FormData();
+    formData.append('audio', audioFile);
+    setIsTranscribing(true);
+
+    try {
+      const response = await axios.post(
+        '/coaching/session-memory/transcribe',
+        formData,
+        {
+          headers: { 'Content-Type': 'multipart/form-data' },
+        },
+      );
+      setTranscript(response.data.text || '');
+      setNotice('Audio transcribed. Review the transcript before generating memory.');
+    } catch (error) {
+      if (axios.isAxiosError(error)) {
+        setNotice(
+          error.response?.data?.message ||
+            error.response?.data?.error ||
+            'Audio transcription failed.',
+        );
+      } else {
+        setNotice('Audio transcription failed.');
+      }
+    } finally {
+      setIsTranscribing(false);
+    }
  }

  async function saveMemory(shareWithClient: boolean) {
@ -270,6 +323,41 @@ const SessionMemory = () => {
                ))}
              </select>

+              <div className='mt-5 rounded-lg border border-[#19192d]/10 bg-[#fffdf9] p-3'>
+                <div className='flex items-center gap-2 text-sm font-semibold text-[#19192d]'>
+                  <BaseIcon path={mdiMicrophoneOutline} size={18} />
+                  Audio transcription
+                </div>
+                <label className='mt-3 flex cursor-pointer items-center justify-between gap-3 rounded-lg border border-dashed border-[#19192d]/15 bg-white px-3 py-3 text-sm text-[#72798a]'>
+                  <span className='truncate'>
+                    {audioFile ? audioFile.name : 'Choose an audio file'}
+                  </span>
+                  <span className='inline-flex items-center gap-2 rounded-full bg-[#f3fbf8] px-3 py-1 text-xs font-semibold text-[#257f73]'>
+                    <BaseIcon path={mdiCloudUploadOutline} size={16} />
+                    Upload
+                  </span>
+                  <input
+                    type='file'
+                    accept='audio/*'
+                    className='hidden'
+                    onChange={(event) => {
+                      const file = event.target.files?.[0] || null;
+                      setAudioFile(file);
+                      setNotice('');
+                    }}
+                  />
+                </label>
+                <button
+                  type='button'
+                  className='mt-3 inline-flex items-center gap-2 rounded-full bg-[#35b7a5] px-3 py-1.5 text-sm font-semibold text-white disabled:opacity-50'
+                  disabled={isTranscribing || !audioFile}
+                  onClick={transcribeAudio}
+                >
+                  <BaseIcon path={mdiMicrophoneOutline} size={18} />
+                  {isTranscribing ? 'Transcribing...' : 'Transcribe audio'}
+                </button>
+              </div>
+
              <label className='mb-2 mt-5 block text-sm font-semibold text-[#72798a]'>
                Transcript or raw notes
              </label>