Add coaching audio transcription flow

This commit is contained in:
Flatlogic Bot 2026-06-09 14:50:17 +00:00
parent 2a10483480
commit ba861959bd
2 changed files with 229 additions and 10 deletions

View File

@ -1,4 +1,7 @@
const express = require("express");
const fs = require("fs");
const path = require("path");
const formidable = require("formidable");
const db = require("../db/models");
const wrapAsync = require("../helpers").wrapAsync;
const { LocalAIApi } = require("../ai/LocalAIApi");
@ -26,6 +29,113 @@ function splitActionItems(value) {
.slice(0, 8);
}
function getFirstUploadedFile(files, fieldName) {
const file = files[fieldName];
if (Array.isArray(file)) {
return file[0];
}
return file;
}
function parseAudioUpload(req) {
return new Promise((resolve, reject) => {
const form = new formidable.IncomingForm({
multiples: false,
keepExtensions: true,
maxFileSize: 100 * 1024 * 1024,
});
form.parse(req, (error, _fields, files) => {
if (error) {
reject(error);
return;
}
resolve(getFirstUploadedFile(files, "audio") || getFirstUploadedFile(files, "file"));
});
});
}
async function removeUploadedAudio(filePath) {
try {
await fs.promises.unlink(filePath);
} catch (error) {
console.warn("Failed to remove uploaded audio file", error);
}
}
async function transcribeAudioFile(audioFile) {
const filePath = audioFile.filepath || audioFile.path;
const fileName = audioFile.originalFilename || audioFile.name || path.basename(filePath);
const mimeType = audioFile.mimetype || audioFile.type || "application/octet-stream";
const transcriptionUrl =
process.env.AI_TRANSCRIPTION_URL || "https://api.openai.com/v1/audio/transcriptions";
const transcriptionModel = process.env.AI_TRANSCRIPTION_MODEL || "gpt-4o-mini-transcribe";
const transcriptionApiKey = process.env.AI_TRANSCRIPTION_API_KEY || process.env.OPENAI_API_KEY;
if (!filePath) {
throw new Error("Uploaded audio file does not have a readable path");
}
if (!transcriptionApiKey && !process.env.AI_TRANSCRIPTION_URL) {
return {
status: 501,
body: {
error: "transcription_not_configured",
message:
"Set AI_TRANSCRIPTION_URL for the AppWizzy proxy or AI_TRANSCRIPTION_API_KEY/OPENAI_API_KEY for direct transcription.",
},
};
}
const audioBuffer = await fs.promises.readFile(filePath);
const formData = new FormData();
formData.append("file", new Blob([audioBuffer], { type: mimeType }), fileName);
formData.append("model", transcriptionModel);
formData.append("response_format", "json");
const headers = {};
if (transcriptionApiKey) {
headers.Authorization = `Bearer ${transcriptionApiKey}`;
}
if (process.env.PROJECT_UUID) {
headers["project-uuid"] = process.env.PROJECT_UUID;
}
const response = await fetch(transcriptionUrl, {
method: "POST",
headers,
body: formData,
});
const responseText = await response.text();
let payload;
try {
payload = JSON.parse(responseText);
} catch {
throw new Error(`Transcription response is not JSON: ${responseText}`);
}
if (!response.ok) {
throw new Error(`Transcription request failed with ${response.status}: ${JSON.stringify(payload)}`);
}
const text = payload.text || payload.transcript || payload.output_text;
if (!text) {
throw new Error(`Transcription response does not include text: ${JSON.stringify(payload)}`);
}
return {
status: 200,
body: { text },
};
}
router.get(
"/summary",
wrapAsync(async (req, res) => {
@ -424,6 +534,27 @@ router.post(
}),
);
router.post(
"/session-memory/transcribe",
wrapAsync(async (req, res) => {
const audioFile = await parseAudioUpload(req);
if (!audioFile) {
res.status(400).send({ error: "audio_required" });
return;
}
const filePath = audioFile.filepath || audioFile.path;
try {
const result = await transcribeAudioFile(audioFile);
res.status(result.status).send(result.body);
} finally {
await removeUploadedAudio(filePath);
}
}),
);
router.get(
"/client-portal/me",
wrapAsync(async (req, res) => {

View File

@ -1,8 +1,10 @@
import {
mdiCheckCircleOutline,
mdiCloudUploadOutline,
mdiContentCopy,
mdiFileDocumentEditOutline,
mdiLightbulbOnOutline,
mdiMicrophoneOutline,
mdiSendOutline,
} from '@mdi/js';
import axios from 'axios';
@ -122,7 +124,9 @@ const SessionMemory = () => {
const [clientId, setClientId] = React.useState('');
const [transcript, setTranscript] = React.useState('');
const [draft, setDraft] = React.useState<MemoryDraft>(emptyDraft);
const [audioFile, setAudioFile] = React.useState<File | null>(null);
const [isGenerating, setIsGenerating] = React.useState(false);
const [isTranscribing, setIsTranscribing] = React.useState(false);
const [isSaving, setIsSaving] = React.useState(false);
const [notice, setNotice] = React.useState('');
@ -155,16 +159,65 @@ const SessionMemory = () => {
async function generateMemory() {
setIsGenerating(true);
const response = await axios.post('/coaching/session-memory/generate', {
clientId,
transcript,
});
setDraft({
...emptyDraft,
...response.data,
});
setNotice('Draft generated. Review and edit before saving.');
setIsGenerating(false);
try {
const response = await axios.post('/coaching/session-memory/generate', {
clientId,
transcript,
});
setDraft({
...emptyDraft,
...response.data,
});
setNotice('Draft generated. Review and edit before saving.');
} catch (error) {
if (axios.isAxiosError(error)) {
setNotice(
error.response?.data?.message ||
error.response?.data?.error ||
'Memory generation failed.',
);
} else {
setNotice('Memory generation failed.');
}
} finally {
setIsGenerating(false);
}
}
async function transcribeAudio() {
if (!audioFile) {
setNotice('Choose an audio file first.');
return;
}
const formData = new FormData();
formData.append('audio', audioFile);
setIsTranscribing(true);
try {
const response = await axios.post(
'/coaching/session-memory/transcribe',
formData,
{
headers: { 'Content-Type': 'multipart/form-data' },
},
);
setTranscript(response.data.text || '');
setNotice('Audio transcribed. Review the transcript before generating memory.');
} catch (error) {
if (axios.isAxiosError(error)) {
setNotice(
error.response?.data?.message ||
error.response?.data?.error ||
'Audio transcription failed.',
);
} else {
setNotice('Audio transcription failed.');
}
} finally {
setIsTranscribing(false);
}
}
async function saveMemory(shareWithClient: boolean) {
@ -270,6 +323,41 @@ const SessionMemory = () => {
))}
</select>
<div className='mt-5 rounded-lg border border-[#19192d]/10 bg-[#fffdf9] p-3'>
<div className='flex items-center gap-2 text-sm font-semibold text-[#19192d]'>
<BaseIcon path={mdiMicrophoneOutline} size={18} />
Audio transcription
</div>
<label className='mt-3 flex cursor-pointer items-center justify-between gap-3 rounded-lg border border-dashed border-[#19192d]/15 bg-white px-3 py-3 text-sm text-[#72798a]'>
<span className='truncate'>
{audioFile ? audioFile.name : 'Choose an audio file'}
</span>
<span className='inline-flex items-center gap-2 rounded-full bg-[#f3fbf8] px-3 py-1 text-xs font-semibold text-[#257f73]'>
<BaseIcon path={mdiCloudUploadOutline} size={16} />
Upload
</span>
<input
type='file'
accept='audio/*'
className='hidden'
onChange={(event) => {
const file = event.target.files?.[0] || null;
setAudioFile(file);
setNotice('');
}}
/>
</label>
<button
type='button'
className='mt-3 inline-flex items-center gap-2 rounded-full bg-[#35b7a5] px-3 py-1.5 text-sm font-semibold text-white disabled:opacity-50'
disabled={isTranscribing || !audioFile}
onClick={transcribeAudio}
>
<BaseIcon path={mdiMicrophoneOutline} size={18} />
{isTranscribing ? 'Transcribing...' : 'Transcribe audio'}
</button>
</div>
<label className='mb-2 mt-5 block text-sm font-semibold text-[#72798a]'>
Transcript or raw notes
</label>