diff --git a/backend/src/routes/coaching.js b/backend/src/routes/coaching.js index 825d69a..71a5553 100644 --- a/backend/src/routes/coaching.js +++ b/backend/src/routes/coaching.js @@ -1,4 +1,7 @@ const express = require("express"); +const fs = require("fs"); +const path = require("path"); +const formidable = require("formidable"); const db = require("../db/models"); const wrapAsync = require("../helpers").wrapAsync; const { LocalAIApi } = require("../ai/LocalAIApi"); @@ -26,6 +29,113 @@ function splitActionItems(value) { .slice(0, 8); } +function getFirstUploadedFile(files, fieldName) { + const file = files[fieldName]; + + if (Array.isArray(file)) { + return file[0]; + } + + return file; +} + +function parseAudioUpload(req) { + return new Promise((resolve, reject) => { + const form = new formidable.IncomingForm({ + multiples: false, + keepExtensions: true, + maxFileSize: 100 * 1024 * 1024, + }); + + form.parse(req, (error, _fields, files) => { + if (error) { + reject(error); + return; + } + + resolve(getFirstUploadedFile(files, "audio") || getFirstUploadedFile(files, "file")); + }); + }); +} + +async function removeUploadedAudio(filePath) { + try { + await fs.promises.unlink(filePath); + } catch (error) { + console.warn("Failed to remove uploaded audio file", error); + } +} + +async function transcribeAudioFile(audioFile) { + const filePath = audioFile.filepath || audioFile.path; + const fileName = audioFile.originalFilename || audioFile.name || path.basename(filePath); + const mimeType = audioFile.mimetype || audioFile.type || "application/octet-stream"; + const transcriptionUrl = + process.env.AI_TRANSCRIPTION_URL || "https://api.openai.com/v1/audio/transcriptions"; + const transcriptionModel = process.env.AI_TRANSCRIPTION_MODEL || "gpt-4o-mini-transcribe"; + const transcriptionApiKey = process.env.AI_TRANSCRIPTION_API_KEY || process.env.OPENAI_API_KEY; + + if (!filePath) { + throw new Error("Uploaded audio file does not have a readable path"); + } + + if (!transcriptionApiKey && !process.env.AI_TRANSCRIPTION_URL) { + return { + status: 501, + body: { + error: "transcription_not_configured", + message: + "Set AI_TRANSCRIPTION_URL for the AppWizzy proxy or AI_TRANSCRIPTION_API_KEY/OPENAI_API_KEY for direct transcription.", + }, + }; + } + + const audioBuffer = await fs.promises.readFile(filePath); + const formData = new FormData(); + formData.append("file", new Blob([audioBuffer], { type: mimeType }), fileName); + formData.append("model", transcriptionModel); + formData.append("response_format", "json"); + + const headers = {}; + + if (transcriptionApiKey) { + headers.Authorization = `Bearer ${transcriptionApiKey}`; + } + + if (process.env.PROJECT_UUID) { + headers["project-uuid"] = process.env.PROJECT_UUID; + } + + const response = await fetch(transcriptionUrl, { + method: "POST", + headers, + body: formData, + }); + const responseText = await response.text(); + let payload; + + try { + payload = JSON.parse(responseText); + } catch { + throw new Error(`Transcription response is not JSON: ${responseText}`); + } + + if (!response.ok) { + throw new Error(`Transcription request failed with ${response.status}: ${JSON.stringify(payload)}`); + } + + const text = payload.text || payload.transcript || payload.output_text; + + if (!text) { + throw new Error(`Transcription response does not include text: ${JSON.stringify(payload)}`); + } + + return { + status: 200, + body: { text }, + }; +} + router.get( "/summary", wrapAsync(async (req, res) => { @@ -424,6 +534,27 @@ router.post( }), ); +router.post( + "/session-memory/transcribe", + wrapAsync(async (req, res) => { + const audioFile = await parseAudioUpload(req); + + if (!audioFile) { + res.status(400).send({ error: "audio_required" }); + return; + } + + const filePath = audioFile.filepath || audioFile.path; + + try { + const result = await transcribeAudioFile(audioFile); + res.status(result.status).send(result.body); + } finally { + await removeUploadedAudio(filePath); + } + }), +); + router.get( "/client-portal/me", wrapAsync(async (req, res) => { diff --git a/frontend/src/pages/session-memory.tsx b/frontend/src/pages/session-memory.tsx index ed6f944..1b72160 100644 --- a/frontend/src/pages/session-memory.tsx +++ b/frontend/src/pages/session-memory.tsx @@ -1,8 +1,10 @@ import { mdiCheckCircleOutline, + mdiCloudUploadOutline, mdiContentCopy, mdiFileDocumentEditOutline, mdiLightbulbOnOutline, + mdiMicrophoneOutline, mdiSendOutline, } from '@mdi/js'; import axios from 'axios'; @@ -122,7 +124,9 @@ const SessionMemory = () => { const [clientId, setClientId] = React.useState(''); const [transcript, setTranscript] = React.useState(''); const [draft, setDraft] = React.useState(emptyDraft); + const [audioFile, setAudioFile] = React.useState(null); const [isGenerating, setIsGenerating] = React.useState(false); + const [isTranscribing, setIsTranscribing] = React.useState(false); const [isSaving, setIsSaving] = React.useState(false); const [notice, setNotice] = React.useState(''); @@ -155,16 +159,65 @@ const SessionMemory = () => { async function generateMemory() { setIsGenerating(true); - const response = await axios.post('/coaching/session-memory/generate', { - clientId, - transcript, - }); - setDraft({ - ...emptyDraft, - ...response.data, - }); - setNotice('Draft generated. Review and edit before saving.'); - setIsGenerating(false); + + try { + const response = await axios.post('/coaching/session-memory/generate', { + clientId, + transcript, + }); + setDraft({ + ...emptyDraft, + ...response.data, + }); + setNotice('Draft generated. Review and edit before saving.'); + } catch (error) { + if (axios.isAxiosError(error)) { + setNotice( + error.response?.data?.message || + error.response?.data?.error || + 'Memory generation failed.', + ); + } else { + setNotice('Memory generation failed.'); + } + } finally { + setIsGenerating(false); + } + } + + async function transcribeAudio() { + if (!audioFile) { + setNotice('Choose an audio file first.'); + return; + } + + const formData = new FormData(); + formData.append('audio', audioFile); + setIsTranscribing(true); + + try { + const response = await axios.post( + '/coaching/session-memory/transcribe', + formData, + { + headers: { 'Content-Type': 'multipart/form-data' }, + }, + ); + setTranscript(response.data.text || ''); + setNotice('Audio transcribed. Review the transcript before generating memory.'); + } catch (error) { + if (axios.isAxiosError(error)) { + setNotice( + error.response?.data?.message || + error.response?.data?.error || + 'Audio transcription failed.', + ); + } else { + setNotice('Audio transcription failed.'); + } + } finally { + setIsTranscribing(false); + } } async function saveMemory(shareWithClient: boolean) { @@ -270,6 +323,41 @@ const SessionMemory = () => { ))} +
+
+ + Audio transcription +
+ + +
+