Add coaching audio transcription flow
This commit is contained in:
parent
2a10483480
commit
ba861959bd
@ -1,4 +1,7 @@
|
||||
const express = require("express");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const formidable = require("formidable");
|
||||
const db = require("../db/models");
|
||||
const wrapAsync = require("../helpers").wrapAsync;
|
||||
const { LocalAIApi } = require("../ai/LocalAIApi");
|
||||
@ -26,6 +29,113 @@ function splitActionItems(value) {
|
||||
.slice(0, 8);
|
||||
}
|
||||
|
||||
function getFirstUploadedFile(files, fieldName) {
|
||||
const file = files[fieldName];
|
||||
|
||||
if (Array.isArray(file)) {
|
||||
return file[0];
|
||||
}
|
||||
|
||||
return file;
|
||||
}
|
||||
|
||||
function parseAudioUpload(req) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const form = new formidable.IncomingForm({
|
||||
multiples: false,
|
||||
keepExtensions: true,
|
||||
maxFileSize: 100 * 1024 * 1024,
|
||||
});
|
||||
|
||||
form.parse(req, (error, _fields, files) => {
|
||||
if (error) {
|
||||
reject(error);
|
||||
return;
|
||||
}
|
||||
|
||||
resolve(getFirstUploadedFile(files, "audio") || getFirstUploadedFile(files, "file"));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function removeUploadedAudio(filePath) {
|
||||
try {
|
||||
await fs.promises.unlink(filePath);
|
||||
} catch (error) {
|
||||
console.warn("Failed to remove uploaded audio file", error);
|
||||
}
|
||||
}
|
||||
|
||||
async function transcribeAudioFile(audioFile) {
|
||||
const filePath = audioFile.filepath || audioFile.path;
|
||||
const fileName = audioFile.originalFilename || audioFile.name || path.basename(filePath);
|
||||
const mimeType = audioFile.mimetype || audioFile.type || "application/octet-stream";
|
||||
const transcriptionUrl =
|
||||
process.env.AI_TRANSCRIPTION_URL || "https://api.openai.com/v1/audio/transcriptions";
|
||||
const transcriptionModel = process.env.AI_TRANSCRIPTION_MODEL || "gpt-4o-mini-transcribe";
|
||||
const transcriptionApiKey = process.env.AI_TRANSCRIPTION_API_KEY || process.env.OPENAI_API_KEY;
|
||||
|
||||
if (!filePath) {
|
||||
throw new Error("Uploaded audio file does not have a readable path");
|
||||
}
|
||||
|
||||
if (!transcriptionApiKey && !process.env.AI_TRANSCRIPTION_URL) {
|
||||
return {
|
||||
status: 501,
|
||||
body: {
|
||||
error: "transcription_not_configured",
|
||||
message:
|
||||
"Set AI_TRANSCRIPTION_URL for the AppWizzy proxy or AI_TRANSCRIPTION_API_KEY/OPENAI_API_KEY for direct transcription.",
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const audioBuffer = await fs.promises.readFile(filePath);
|
||||
const formData = new FormData();
|
||||
formData.append("file", new Blob([audioBuffer], { type: mimeType }), fileName);
|
||||
formData.append("model", transcriptionModel);
|
||||
formData.append("response_format", "json");
|
||||
|
||||
const headers = {};
|
||||
|
||||
if (transcriptionApiKey) {
|
||||
headers.Authorization = `Bearer ${transcriptionApiKey}`;
|
||||
}
|
||||
|
||||
if (process.env.PROJECT_UUID) {
|
||||
headers["project-uuid"] = process.env.PROJECT_UUID;
|
||||
}
|
||||
|
||||
const response = await fetch(transcriptionUrl, {
|
||||
method: "POST",
|
||||
headers,
|
||||
body: formData,
|
||||
});
|
||||
const responseText = await response.text();
|
||||
let payload;
|
||||
|
||||
try {
|
||||
payload = JSON.parse(responseText);
|
||||
} catch {
|
||||
throw new Error(`Transcription response is not JSON: ${responseText}`);
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Transcription request failed with ${response.status}: ${JSON.stringify(payload)}`);
|
||||
}
|
||||
|
||||
const text = payload.text || payload.transcript || payload.output_text;
|
||||
|
||||
if (!text) {
|
||||
throw new Error(`Transcription response does not include text: ${JSON.stringify(payload)}`);
|
||||
}
|
||||
|
||||
return {
|
||||
status: 200,
|
||||
body: { text },
|
||||
};
|
||||
}
|
||||
|
||||
router.get(
|
||||
"/summary",
|
||||
wrapAsync(async (req, res) => {
|
||||
@ -424,6 +534,27 @@ router.post(
|
||||
}),
|
||||
);
|
||||
|
||||
router.post(
|
||||
"/session-memory/transcribe",
|
||||
wrapAsync(async (req, res) => {
|
||||
const audioFile = await parseAudioUpload(req);
|
||||
|
||||
if (!audioFile) {
|
||||
res.status(400).send({ error: "audio_required" });
|
||||
return;
|
||||
}
|
||||
|
||||
const filePath = audioFile.filepath || audioFile.path;
|
||||
|
||||
try {
|
||||
const result = await transcribeAudioFile(audioFile);
|
||||
res.status(result.status).send(result.body);
|
||||
} finally {
|
||||
await removeUploadedAudio(filePath);
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
router.get(
|
||||
"/client-portal/me",
|
||||
wrapAsync(async (req, res) => {
|
||||
|
||||
@ -1,8 +1,10 @@
|
||||
import {
|
||||
mdiCheckCircleOutline,
|
||||
mdiCloudUploadOutline,
|
||||
mdiContentCopy,
|
||||
mdiFileDocumentEditOutline,
|
||||
mdiLightbulbOnOutline,
|
||||
mdiMicrophoneOutline,
|
||||
mdiSendOutline,
|
||||
} from '@mdi/js';
|
||||
import axios from 'axios';
|
||||
@ -122,7 +124,9 @@ const SessionMemory = () => {
|
||||
const [clientId, setClientId] = React.useState('');
|
||||
const [transcript, setTranscript] = React.useState('');
|
||||
const [draft, setDraft] = React.useState<MemoryDraft>(emptyDraft);
|
||||
const [audioFile, setAudioFile] = React.useState<File | null>(null);
|
||||
const [isGenerating, setIsGenerating] = React.useState(false);
|
||||
const [isTranscribing, setIsTranscribing] = React.useState(false);
|
||||
const [isSaving, setIsSaving] = React.useState(false);
|
||||
const [notice, setNotice] = React.useState('');
|
||||
|
||||
@ -155,16 +159,65 @@ const SessionMemory = () => {
|
||||
|
||||
async function generateMemory() {
|
||||
setIsGenerating(true);
|
||||
const response = await axios.post('/coaching/session-memory/generate', {
|
||||
clientId,
|
||||
transcript,
|
||||
});
|
||||
setDraft({
|
||||
...emptyDraft,
|
||||
...response.data,
|
||||
});
|
||||
setNotice('Draft generated. Review and edit before saving.');
|
||||
setIsGenerating(false);
|
||||
|
||||
try {
|
||||
const response = await axios.post('/coaching/session-memory/generate', {
|
||||
clientId,
|
||||
transcript,
|
||||
});
|
||||
setDraft({
|
||||
...emptyDraft,
|
||||
...response.data,
|
||||
});
|
||||
setNotice('Draft generated. Review and edit before saving.');
|
||||
} catch (error) {
|
||||
if (axios.isAxiosError(error)) {
|
||||
setNotice(
|
||||
error.response?.data?.message ||
|
||||
error.response?.data?.error ||
|
||||
'Memory generation failed.',
|
||||
);
|
||||
} else {
|
||||
setNotice('Memory generation failed.');
|
||||
}
|
||||
} finally {
|
||||
setIsGenerating(false);
|
||||
}
|
||||
}
|
||||
|
||||
async function transcribeAudio() {
|
||||
if (!audioFile) {
|
||||
setNotice('Choose an audio file first.');
|
||||
return;
|
||||
}
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append('audio', audioFile);
|
||||
setIsTranscribing(true);
|
||||
|
||||
try {
|
||||
const response = await axios.post(
|
||||
'/coaching/session-memory/transcribe',
|
||||
formData,
|
||||
{
|
||||
headers: { 'Content-Type': 'multipart/form-data' },
|
||||
},
|
||||
);
|
||||
setTranscript(response.data.text || '');
|
||||
setNotice('Audio transcribed. Review the transcript before generating memory.');
|
||||
} catch (error) {
|
||||
if (axios.isAxiosError(error)) {
|
||||
setNotice(
|
||||
error.response?.data?.message ||
|
||||
error.response?.data?.error ||
|
||||
'Audio transcription failed.',
|
||||
);
|
||||
} else {
|
||||
setNotice('Audio transcription failed.');
|
||||
}
|
||||
} finally {
|
||||
setIsTranscribing(false);
|
||||
}
|
||||
}
|
||||
|
||||
async function saveMemory(shareWithClient: boolean) {
|
||||
@ -270,6 +323,41 @@ const SessionMemory = () => {
|
||||
))}
|
||||
</select>
|
||||
|
||||
<div className='mt-5 rounded-lg border border-[#19192d]/10 bg-[#fffdf9] p-3'>
|
||||
<div className='flex items-center gap-2 text-sm font-semibold text-[#19192d]'>
|
||||
<BaseIcon path={mdiMicrophoneOutline} size={18} />
|
||||
Audio transcription
|
||||
</div>
|
||||
<label className='mt-3 flex cursor-pointer items-center justify-between gap-3 rounded-lg border border-dashed border-[#19192d]/15 bg-white px-3 py-3 text-sm text-[#72798a]'>
|
||||
<span className='truncate'>
|
||||
{audioFile ? audioFile.name : 'Choose an audio file'}
|
||||
</span>
|
||||
<span className='inline-flex items-center gap-2 rounded-full bg-[#f3fbf8] px-3 py-1 text-xs font-semibold text-[#257f73]'>
|
||||
<BaseIcon path={mdiCloudUploadOutline} size={16} />
|
||||
Upload
|
||||
</span>
|
||||
<input
|
||||
type='file'
|
||||
accept='audio/*'
|
||||
className='hidden'
|
||||
onChange={(event) => {
|
||||
const file = event.target.files?.[0] || null;
|
||||
setAudioFile(file);
|
||||
setNotice('');
|
||||
}}
|
||||
/>
|
||||
</label>
|
||||
<button
|
||||
type='button'
|
||||
className='mt-3 inline-flex items-center gap-2 rounded-full bg-[#35b7a5] px-3 py-1.5 text-sm font-semibold text-white disabled:opacity-50'
|
||||
disabled={isTranscribing || !audioFile}
|
||||
onClick={transcribeAudio}
|
||||
>
|
||||
<BaseIcon path={mdiMicrophoneOutline} size={18} />
|
||||
{isTranscribing ? 'Transcribing...' : 'Transcribe audio'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<label className='mb-2 mt-5 block text-sm font-semibold text-[#72798a]'>
|
||||
Transcript or raw notes
|
||||
</label>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user