diff --git a/backend/src/services/studio.js b/backend/src/services/studio.js
index fbc197c..c15e356 100644
--- a/backend/src/services/studio.js
+++ b/backend/src/services/studio.js
@@ -10,6 +10,7 @@ const MasteringSessionsDBApi = require('../db/api/mastering_sessions');
 const ExportsDBApi = require('../db/api/exports');
 const SongMetadataDBApi = require('../db/api/song_metadata');
 const CoverArtworksDBApi = require('../db/api/cover_artworks');
+const AssetsDBApi = require('../db/api/assets');
 
 const { Op } = db.Sequelize;
 
@@ -98,13 +99,42 @@ function buildDescription({ genreName, languageName, promptText, vocalMode, targ
     .join('. ');
 }
 
+function sanitizeUploadedAudioFile(rawFile) {
+  if (!rawFile || typeof rawFile !== 'object') {
+    return null;
+  }
+
+  const name = `${rawFile.name || ''}`.trim();
+  const privateUrl = `${rawFile.privateUrl || ''}`.trim();
+  const publicUrl = `${rawFile.publicUrl || ''}`.trim();
+
+  if (!name || !privateUrl || !publicUrl || !rawFile.new) {
+    return null;
+  }
+
+  if (!/\.(mp3|wav)$/i.test(name) && !/\.(mp3|wav)$/i.test(privateUrl)) {
+    throw badRequest('Only MP3 and WAV vocal uploads are supported right now.');
+  }
+
+  const sizeInBytes = Number(rawFile.sizeInBytes);
+
+  return {
+    id: rawFile.id || undefined,
+    name,
+    sizeInBytes: Number.isFinite(sizeInBytes) && sizeInBytes > 0 ? sizeInBytes : null,
+    privateUrl,
+    publicUrl,
+    new: true,
+  };
+}
+
 function generateIsrc() {
   const year = new Date().getFullYear().toString().slice(-2);
   const random = Math.random().toString().slice(2, 7);
   return `ZA-AIM-${year}-${random}`;
 }
 
-function mapSessionSummary(project, song, generationRequest, mixSession, masteringSession, exportJob, recordingSession, songMetadata, coverArtwork) {
+function mapSessionSummary(project, song, generationRequest, mixSession, masteringSession, exportJob, recordingSession, songMetadata, coverArtwork, vocalAsset, vocalUpload) {
   return {
     project: {
       id: project.id,
@@ -133,6 +163,15 @@ function mapSessionSummary(project, song, generationRequest, mixSession, masteri
           href: `/recording_sessions/${recordingSession.id}`,
         }
       : null,
+    vocalAsset: vocalAsset
+      ? {
+          id: vocalAsset.id,
+          name: vocalAsset.name,
+          fileName: vocalUpload?.name || vocalAsset.name,
+          publicUrl: vocalUpload?.publicUrl || null,
+          href: `/assets/${vocalAsset.id}`,
+        }
+      : null,
     mixSession: {
       id: mixSession.id,
       status: mixSession.status,
@@ -369,6 +408,12 @@ module.exports = class StudioService {
       throw badRequest('Target BPM must be between 60 and 180.');
     }
 
+    const uploadedVocal = sanitizeUploadedAudioFile(data.vocalUpload);
+
+    if (data.vocalMode === 'upload' && !uploadedVocal) {
+      throw badRequest('Upload an MP3 or WAV vocal take before launching the session.');
+    }
+
     const scopedWhere = organizationId ? { organizationsId: organizationId } : {};
     const [genre, language, selectedPreset, musicModel] = await Promise.all([
       db.genres.findOne({ where: { id: data.genreId, ...scopedWhere } }),
@@ -444,12 +489,30 @@ module.exports = class StudioService {
         { currentUser, transaction },
       );
 
+      const vocalAsset = uploadedVocal
+        ? await AssetsDBApi.create(
+            {
+              asset_type: 'audio',
+              audio_role: 'vocal_raw',
+              name: uploadedVocal.name,
+              uploaded_user: currentUser.id,
+              project: project.id,
+              song: song.id,
+              is_stereo: false,
+              organizations: organizationId,
+              file_blobs: [uploadedVocal],
+            },
+            { currentUser, transaction },
+          )
+        : null;
+
       const generationRequest = await GenerationRequestsDBApi.create(
         {
           project: project.id,
           song: song.id,
           requested_user: currentUser.id,
           model: musicModel?.id || null,
+          input_asset: vocalAsset?.id || null,
           request_type: data.vocalMode === 'upload' ? 'generate_beat_from_vocals' : 'generate_beat_from_text',
           prompt_text: promptText,
           target_genre: genre.id,
@@ -490,6 +553,7 @@ module.exports = class StudioService {
               instrument: track.instrument,
               volume_db: track.volume_db,
               pan: index % 2 === 0 ? -5 : 5,
+              source_asset: track.track_type === 'vocal' && vocalAsset ? vocalAsset.id : null,
               organizations: organizationId,
             },
             { currentUser, transaction },
@@ -597,6 +661,8 @@ module.exports = class StudioService {
             recordingSession,
             songMetadata,
             coverArtwork,
+            vocalAsset,
+            uploadedVocal,
           ),
           arrangementSections: arrangementSections.map((section) => ({
             id: section.id,
diff --git a/frontend/src/components/Studio/AudioWaveformPreview.tsx b/frontend/src/components/Studio/AudioWaveformPreview.tsx
new file mode 100644
index 0000000..ece1053
--- /dev/null
+++ b/frontend/src/components/Studio/AudioWaveformPreview.tsx
@@ -0,0 +1,139 @@
+import React, { useEffect, useMemo, useState } from 'react';
+
+type Props = {
+  file?: File | null;
+  audioUrl?: string;
+  title?: string;
+  subtitle?: string;
+  emptyMessage?: string;
+  isLoading?: boolean;
+};
+
+const BAR_COUNT = 56;
+
+function createWaveformBars(channelData: Float32Array) {
+  const blockSize = Math.max(1, Math.floor(channelData.length / BAR_COUNT));
+  const bars: number[] = [];
+
+  for (let index = 0; index < BAR_COUNT; index += 1) {
+    const start = index * blockSize;
+    const end = Math.min(channelData.length, start + blockSize);
+    let sum = 0;
+
+    for (let sampleIndex = start; sampleIndex < end; sampleIndex += 1) {
+      sum += Math.abs(channelData[sampleIndex]);
+    }
+
+    const average = end > start ? sum / (end - start) : 0;
+    bars.push(Math.min(100, Math.max(8, Math.round(average * 280))));
+  }
+
+  return bars;
+}
+
+const AudioWaveformPreview = ({
+  file,
+  audioUrl,
+  title = 'Waveform preview',
+  subtitle,
+  emptyMessage = 'Add an audio file to preview its waveform.',
+  isLoading = false,
+}: Props) => {
+  const [bars, setBars] = useState<number[]>([]);
+  const [errorMessage, setErrorMessage] = useState('');
+  const previewUrl = useMemo(() => (file ? URL.createObjectURL(file) : audioUrl || ''), [audioUrl, file]);
+
+  useEffect(() => {
+    return () => {
+      if (file && previewUrl) {
+        URL.revokeObjectURL(previewUrl);
+      }
+    };
+  }, [file, previewUrl]);
+
+  useEffect(() => {
+    let isActive = true;
+    let audioContext: AudioContext | null = null;
+
+    const buildWaveform = async () => {
+      if (!file && !audioUrl) {
+        setBars([]);
+        setErrorMessage('');
+        return;
+      }
+
+      try {
+        setErrorMessage('');
+
+        const audioBuffer = file ? await file.arrayBuffer() : await fetch(audioUrl as string).then((response) => response.arrayBuffer());
+
+        if (typeof window === 'undefined' || !window.AudioContext) {
+          throw new Error('AudioContext is unavailable in this browser.');
+        }
+
+        audioContext = new window.AudioContext();
+        const decoded = await audioContext.decodeAudioData(audioBuffer.slice(0));
+
+        if (!isActive) {
+          return;
+        }
+
+        setBars(createWaveformBars(decoded.getChannelData(0)));
+      } catch (error) {
+        console.error('Failed to render waveform preview:', error);
+
+        if (isActive) {
+          setBars([]);
+          setErrorMessage('Waveform preview is unavailable for this file, but the audio upload is still attached.');
+        }
+      } finally {
+        if (audioContext && audioContext.state !== 'closed') {
+          audioContext.close().catch(() => null);
+        }
+      }
+    };
+
+    buildWaveform();
+
+    return () => {
+      isActive = false;
+
+      if (audioContext && audioContext.state !== 'closed') {
+        audioContext.close().catch(() => null);
+      }
+    };
+  }, [audioUrl, file]);
+
+  return (
+    <div className="rounded-2xl border border-white/10 bg-slate-950/70 p-4">
+      <div className="flex flex-wrap items-start justify-between gap-3">
+        <div>
+          <div className="text-xs uppercase tracking-[0.18em] text-slate-500">{title}</div>
+          <div className="mt-2 text-sm font-medium text-white">{subtitle || file?.name || 'Awaiting audio upload'}</div>
+        </div>
+        <div className="rounded-full border border-white/10 px-3 py-1 text-[11px] uppercase tracking-[0.16em] text-slate-400">
+          {isLoading ? 'Rendering…' : previewUrl ? 'Ready to review' : 'No audio'}
+        </div>
+      </div>
+
+      <div className="mt-4 flex h-24 items-end gap-1 overflow-hidden rounded-2xl border border-white/10 bg-slate-900/70 px-3 py-2">
+        {bars.length ? (
+          bars.map((barHeight, index) => (
+            <div
+              key={`${index}-${barHeight}`}
+              className="min-w-[4px] flex-1 rounded-full bg-gradient-to-t from-violet-500 via-violet-300 to-emerald-300 opacity-90"
+              style={{ height: `${barHeight}%` }}
+            />
+          ))
+        ) : (
+          <div className="flex h-full w-full items-center justify-center text-center text-sm text-slate-500">{emptyMessage}</div>
+        )}
+      </div>
+
+      {previewUrl ? <audio controls className="mt-4 w-full" src={previewUrl} preload="metadata" /> : null}
+      {errorMessage ? <p className="mt-3 text-xs leading-5 text-amber-200">{errorMessage}</p> : null}
+    </div>
+  );
+};
+
+export default AudioWaveformPreview;
diff --git a/frontend/src/pages/studio.tsx b/frontend/src/pages/studio.tsx
index ac7b8b1..f20818c 100644
--- a/frontend/src/pages/studio.tsx
+++ b/frontend/src/pages/studio.tsx
@@ -1,9 +1,11 @@
 import {
   mdiAlbum,
+  mdiAlertCircleOutline,
   mdiChartTimelineVariant,
   mdiCheckCircleOutline,
   mdiChevronRight,
   mdiClockOutline,
+  mdiInformationOutline,
   mdiExportVariant,
   mdiMicrophone,
   mdiMusic,
@@ -15,7 +17,8 @@ import {
 import axios from 'axios';
 import Head from 'next/head';
 import Link from 'next/link';
-import React, { ReactElement, useEffect, useMemo, useState } from 'react';
+import React, { DragEvent, ReactElement, useEffect, useMemo, useRef, useState } from 'react';
+import AudioWaveformPreview from '../components/Studio/AudioWaveformPreview';
 import BaseButton from '../components/BaseButton';
 import BaseIcon from '../components/BaseIcon';
 import CardBox from '../components/CardBox';
@@ -26,6 +29,7 @@ import SectionTitleLineWithButton from '../components/SectionTitleLineWithButton
 import { getPageTitle } from '../config';
 import { hasPermission } from '../helpers/userPermissions';
 import LayoutAuthenticated from '../layouts/Authenticated';
+import FileUploader from '../components/Uploaders/UploadService';
 import { useAppSelector } from '../stores/hooks';
 
 type GenreOption = {
@@ -100,12 +104,24 @@ type RecentSession = {
   } | null;
 };
 
+type UploadedFile = {
+  id?: string;
+  name: string;
+  sizeInBytes?: number;
+  privateUrl: string;
+  publicUrl: string;
+  new: boolean;
+};
+
 type CreatedLink = {
   id: string;
   href: string;
   status?: string;
   format?: string;
   title?: string;
+  name?: string;
+  fileName?: string;
+  publicUrl?: string;
   bpm?: number;
   key_signature?: string;
   mood?: string;
@@ -119,6 +135,7 @@ type CreatedSession = {
   song: CreatedLink;
   generationRequest: CreatedLink;
   recordingSession?: CreatedLink | null;
+  vocalAsset?: CreatedLink | null;
   mixSession: CreatedLink;
   masteringSession: CreatedLink;
   exportJob: CreatedLink;
@@ -147,6 +164,22 @@ type LaunchpadResponse = {
   recentSessions: RecentSession[];
 };
 
+type DetectedVocalAnalysis = {
+  bpm?: number;
+  bpmConfidence?: number;
+  key?: string;
+  keyConfidence?: number;
+  overallConfidence?: number;
+};
+
+type SubmittedVocalPreview = {
+  file?: File | null;
+  audioUrl?: string;
+  name: string;
+  sizeInBytes?: number;
+  analysis?: DetectedVocalAnalysis | null;
+};
+
 type FormState = {
   title: string;
   genreId: string;
@@ -196,6 +229,11 @@ const vocalModes: Array<{ value: FormState['vocalMode']; title: string; descript
   },
 ];
 
+const vocalUploadSchema = {
+  size: 20 * 1024 * 1024,
+  formats: ['mp3', 'wav'],
+};
+
 const stageCards = [
   {
     label: 'Beat generation',
@@ -214,6 +252,456 @@ const stageCards = [
   },
 ];
 
+const NOTE_NAMES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'];
+const MAJOR_PROFILE = [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88];
+const MINOR_PROFILE = [6.33, 2.68, 3.52, 5.38, 2.6, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17];
+
+function formatFileSize(sizeInBytes?: number) {
+  if (!sizeInBytes) {
+    return '';
+  }
+
+  const sizeInKb = sizeInBytes / 1024;
+
+  if (sizeInKb < 1024) {
+    return `${Math.max(1, Math.round(sizeInKb))} KB`;
+  }
+
+  return `${(sizeInKb / 1024).toFixed(1)} MB`;
+}
+
+function normalizeTempo(value: number) {
+  let bpm = value;
+
+  while (bpm < 70) {
+    bpm *= 2;
+  }
+
+  while (bpm > 180) {
+    bpm /= 2;
+  }
+
+  return Math.round(bpm);
+}
+
+function normalizeConfidence(value?: number) {
+  if (typeof value !== 'number' || Number.isNaN(value)) {
+    return 0;
+  }
+
+  return Math.min(1, Math.max(0, value));
+}
+
+function getConfidenceLabel(value?: number) {
+  const confidence = normalizeConfidence(value);
+
+  if (confidence >= 0.78) {
+    return 'High';
+  }
+
+  if (confidence >= 0.5) {
+    return 'Medium';
+  }
+
+  return 'Low';
+}
+
+function getConfidenceClasses(value?: number) {
+  const confidence = normalizeConfidence(value);
+
+  if (confidence >= 0.78) {
+    return 'border-emerald-300/30 bg-emerald-500/10 text-emerald-100';
+  }
+
+  if (confidence >= 0.5) {
+    return 'border-amber-300/30 bg-amber-500/10 text-amber-100';
+  }
+
+  return 'border-rose-300/30 bg-rose-500/10 text-rose-100';
+}
+
+function formatConfidenceValue(value?: number) {
+  const confidence = normalizeConfidence(value);
+
+  if (!confidence) {
+    return '';
+  }
+
+  return `${Math.round(confidence * 100)}%`;
+}
+
+function formatConfidenceSummary(value?: number) {
+  const percentage = formatConfidenceValue(value);
+
+  if (!percentage) {
+    return '';
+  }
+
+  return `${getConfidenceLabel(value)} confidence · ${percentage}`;
+}
+
+function getConfidenceTooltip(metricLabel: string, value?: number) {
+  const label = getConfidenceLabel(value);
+  const percentage = formatConfidenceValue(value);
+  const summary = percentage
+    ? `${metricLabel} is currently ${label.toLowerCase()} confidence (${percentage}).`
+    : `${metricLabel} confidence is currently unavailable.`;
+
+  if (label === 'High') {
+    return `${summary} High means the vocal signal looked stable and the detector found a strong match.`;
+  }
+
+  if (label === 'Medium') {
+    return `${summary} Medium means the suggestion is usable, but you should still confirm it by ear.`;
+  }
+
+  return `${summary} Low means the take looks noisy, sparse, or ambiguous, so you should verify it manually.`;
+}
+
+function hasLowConfidence(value?: number) {
+  const confidence = normalizeConfidence(value);
+
+  return confidence > 0 && confidence < 0.5;
+}
+
+function getLowConfidenceWarning(analysis?: DetectedVocalAnalysis | null, analysisError?: string) {
+  if (analysisError) {
+    return 'This vocal take uploaded successfully, but the signal looks noisy or ambiguous enough that automatic BPM/key detection could not lock in cleanly.';
+  }
+
+  if (!analysis) {
+    return '';
+  }
+
+  if (hasLowConfidence(analysis.bpmConfidence) && hasLowConfidence(analysis.keyConfidence)) {
+    return 'Low-confidence BPM and key results usually mean the vocal is noisy, sparse, or too free-form to match reliably. Double-check both by ear before launching.';
+  }
+
+  if (hasLowConfidence(analysis.bpmConfidence)) {
+    return 'The BPM estimate is low confidence, which usually means the vocal rhythm is noisy or loosely phrased. Use tap tempo or set BPM manually before launching.';
+  }
+
+  if (hasLowConfidence(analysis.keyConfidence)) {
+    return 'The key estimate is low confidence, which usually means the pitch center is unclear or the take is noisy. Treat the suggested key as a starting point and verify by ear.';
+  }
+
+  if (hasLowConfidence(analysis.overallConfidence)) {
+    return 'Overall analysis confidence is low, so the uploaded take may be too noisy or ambiguous for strong automatic matching. Review the suggestions carefully.';
+  }
+
+  return '';
+}
+
+function mixToMono(audioBuffer: AudioBuffer) {
+  const mixed = new Float32Array(audioBuffer.length);
+
+  for (let channelIndex = 0; channelIndex < audioBuffer.numberOfChannels; channelIndex += 1) {
+    const channelData = audioBuffer.getChannelData(channelIndex);
+
+    for (let sampleIndex = 0; sampleIndex < audioBuffer.length; sampleIndex += 1) {
+      mixed[sampleIndex] += channelData[sampleIndex] / audioBuffer.numberOfChannels;
+    }
+  }
+
+  return mixed;
+}
+
+function downSampleBuffer(samples: Float32Array, sampleRate: number, targetRate = 22050) {
+  if (sampleRate <= targetRate) {
+    return { sampleRate, samples };
+  }
+
+  const ratio = sampleRate / targetRate;
+  const newLength = Math.max(1, Math.round(samples.length / ratio));
+  const downSampled = new Float32Array(newLength);
+
+  for (let index = 0; index < newLength; index += 1) {
+    const start = Math.floor(index * ratio);
+    const end = Math.min(samples.length, Math.floor((index + 1) * ratio));
+    let sum = 0;
+
+    for (let sampleIndex = start; sampleIndex < end; sampleIndex += 1) {
+      sum += samples[sampleIndex];
+    }
+
+    downSampled[index] = end > start ? sum / (end - start) : samples[start] || 0;
+  }
+
+  return { sampleRate: targetRate, samples: downSampled };
+}
+
+function estimateTempo(samples: Float32Array, sampleRate: number) {
+  const windowSizes = [1024, 2048, 4096];
+  const aggregatedScores = new Map<number, number>();
+  let totalPeaks = 0;
+
+  windowSizes.forEach((windowSize, windowIndex) => {
+    const energies: number[] = [];
+
+    for (let start = 0; start + windowSize < samples.length; start += windowSize) {
+      let sum = 0;
+
+      for (let sampleIndex = start; sampleIndex < start + windowSize; sampleIndex += 1) {
+        sum += Math.abs(samples[sampleIndex]);
+      }
+
+      energies.push(sum / windowSize);
+    }
+
+    if (energies.length < 8) {
+      return;
+    }
+
+    const meanEnergy = energies.reduce((total, value) => total + value, 0) / energies.length;
+    const variance = energies.reduce((total, value) => total + (value - meanEnergy) ** 2, 0) / energies.length;
+    const threshold = meanEnergy + Math.sqrt(variance) * (0.22 + windowIndex * 0.08);
+    const peaks: number[] = [];
+
+    for (let index = 1; index < energies.length - 1; index += 1) {
+      if (energies[index] > threshold && energies[index] >= energies[index - 1] && energies[index] >= energies[index + 1]) {
+        if (!peaks.length || index - peaks[peaks.length - 1] > 1 + windowIndex) {
+          peaks.push(index);
+        }
+      }
+    }
+
+    if (peaks.length < 2) {
+      return;
+    }
+
+    totalPeaks += peaks.length;
+    const windowWeight = 1.2 - windowIndex * 0.2;
+
+    peaks.forEach((peak, peakIndex) => {
+      for (let offset = 1; offset <= 8 && peakIndex + offset < peaks.length; offset += 1) {
+        const interval = peaks[peakIndex + offset] - peak;
+
+        if (!interval) {
+          continue;
+        }
+
+        const bpm = normalizeTempo((60 * sampleRate) / (interval * windowSize));
+        const intervalWeight = windowWeight / offset;
+        aggregatedScores.set(bpm, (aggregatedScores.get(bpm) || 0) + intervalWeight);
+      }
+    });
+  });
+
+  if (!aggregatedScores.size) {
+    return null;
+  }
+
+  const smoothedScores = new Map<number, number>();
+  Array.from(aggregatedScores.entries()).forEach(([bpm, score]) => {
+    let smoothedScore = score;
+
+    for (let neighbor = bpm - 2; neighbor <= bpm + 2; neighbor += 1) {
+      if (neighbor === bpm) {
+        continue;
+      }
+
+      const neighborScore = aggregatedScores.get(neighbor);
+
+      if (neighborScore) {
+        smoothedScore += neighborScore * (neighbor === bpm - 1 || neighbor === bpm + 1 ? 0.55 : 0.25);
+      }
+    }
+
+    smoothedScores.set(bpm, smoothedScore);
+  });
+
+  const rankedTempos = Array.from(smoothedScores.entries()).sort((left, right) => right[1] - left[1]);
+  const [bestEntry, secondEntry] = rankedTempos;
+
+  if (!bestEntry) {
+    return null;
+  }
+
+  const [bpm, bestScore] = bestEntry;
+  const secondScore = secondEntry?.[1] || 0;
+  const totalScore = rankedTempos.reduce((total, [, score]) => total + score, 0);
+  const dominance = totalScore ? bestScore / totalScore : 0;
+  const separation = bestScore ? (bestScore - secondScore) / bestScore : 0;
+  const peakCoverage = Math.min(1, totalPeaks / 28);
+  const confidence = normalizeConfidence(dominance * 0.45 + separation * 0.4 + peakCoverage * 0.15);
+
+  return { bpm, confidence };
+}
+
+function estimateDominantPitch(segment: Float32Array, sampleRate: number) {
+  let rms = 0;
+
+  for (let index = 0; index < segment.length; index += 1) {
+    rms += segment[index] * segment[index];
+  }
+
+  rms = Math.sqrt(rms / segment.length);
+
+  if (rms < 0.012) {
+    return null;
+  }
+
+  const minLag = Math.floor(sampleRate / 1000);
+  const maxLag = Math.min(Math.floor(sampleRate / 80), segment.length - 1);
+  let bestLag = -1;
+  let bestCorrelation = 0;
+  let secondCorrelation = 0;
+
+  for (let lag = minLag; lag <= maxLag; lag += 1) {
+    let correlation = 0;
+
+    for (let index = 0; index + lag < segment.length; index += 1) {
+      correlation += segment[index] * segment[index + lag];
+    }
+
+    const normalizedCorrelation = correlation / Math.max(1, segment.length - lag);
+
+    if (normalizedCorrelation > bestCorrelation) {
+      secondCorrelation = bestCorrelation;
+      bestCorrelation = normalizedCorrelation;
+      bestLag = lag;
+    } else if (normalizedCorrelation > secondCorrelation) {
+      secondCorrelation = normalizedCorrelation;
+    }
+  }
+
+  if (bestLag <= 0 || bestCorrelation <= 0) {
+    return null;
+  }
+
+  return {
+    frequency: sampleRate / bestLag,
+    rms,
+    clarity: normalizeConfidence((bestCorrelation - secondCorrelation) / bestCorrelation),
+  };
+}
+
+function scorePitchProfiles(histogram: number[], profile: number[]) {
+  return NOTE_NAMES.map((_, tonicIndex) =>
+    histogram.reduce((total, value, pitchIndex) => total + value * profile[(pitchIndex - tonicIndex + 12) % 12], 0),
+  );
+}
+
+function estimateMusicalKey(samples: Float32Array, sampleRate: number) {
+  const segmentSizes = [4096, 8192];
+  const totalSegments = 24;
+  const histogram = new Array(12).fill(0) as number[];
+  let acceptedSegments = 0;
+  let clarityTotal = 0;
+
+  segmentSizes.forEach((segmentSize, segmentIndex) => {
+    if (samples.length < segmentSize) {
+      return;
+    }
+
+    const step = Math.max(1, Math.floor((samples.length - segmentSize) / totalSegments));
+
+    for (let offset = 0; offset + segmentSize <= samples.length; offset += step) {
+      const segment = samples.slice(offset, offset + segmentSize);
+      const detectedPitch = estimateDominantPitch(segment, sampleRate);
+
+      if (!detectedPitch || detectedPitch.frequency < 80 || detectedPitch.frequency > 1000) {
+        continue;
+      }
+
+      const midi = Math.round(69 + 12 * Math.log2(detectedPitch.frequency / 440));
+      const pitchClass = ((midi % 12) + 12) % 12;
+      const segmentWeight = detectedPitch.rms * (0.75 + detectedPitch.clarity * 0.75) * (segmentIndex === 0 ? 1 : 0.85);
+
+      histogram[pitchClass] += segmentWeight;
+      acceptedSegments += 1;
+      clarityTotal += detectedPitch.clarity;
+    }
+  });
+
+  const histogramTotal = histogram.reduce((total, value) => total + value, 0);
+
+  if (!histogramTotal || !acceptedSegments) {
+    return null;
+  }
+
+  const normalizedHistogram = histogram.map((value) => value / histogramTotal);
+  const majorScores = scorePitchProfiles(normalizedHistogram, MAJOR_PROFILE);
+  const minorScores = scorePitchProfiles(normalizedHistogram, MINOR_PROFILE);
+  const rankedScores = [
+    ...majorScores.map((score, index) => ({ label: `${NOTE_NAMES[index]} major`, score })),
+    ...minorScores.map((score, index) => ({ label: `${NOTE_NAMES[index]} minor`, score })),
+  ].sort((left, right) => right.score - left.score);
+
+  const bestMatch = rankedScores[0];
+  const runnerUp = rankedScores[1];
+
+  if (!bestMatch) {
+    return null;
+  }
+
+  const margin = bestMatch.score ? (bestMatch.score - (runnerUp?.score || 0)) / bestMatch.score : 0;
+  const segmentCoverage = Math.min(1, acceptedSegments / 16);
+  const averageClarity = clarityTotal / acceptedSegments;
+  const confidence = normalizeConfidence(margin * 0.55 + averageClarity * 0.3 + segmentCoverage * 0.15);
+
+  return {
+    key: bestMatch.label,
+    confidence,
+  };
+}
+
+async function detectVocalAnalysis(file: File): Promise<DetectedVocalAnalysis | null> {
+  if (typeof window === 'undefined' || !window.AudioContext) {
+    throw new Error('Audio analysis is unavailable in this browser.');
+  }
+
+  const audioBuffer = await file.arrayBuffer();
+  const audioContext = new window.AudioContext();
+
+  try {
+    const decoded = await audioContext.decodeAudioData(audioBuffer.slice(0));
+    const mono = mixToMono(decoded);
+    const { sampleRate, samples } = downSampleBuffer(mono, decoded.sampleRate);
+    const tempoMatch = estimateTempo(samples, sampleRate);
+    const keyMatch = estimateMusicalKey(samples, sampleRate);
+    const bpm = tempoMatch?.bpm;
+    const bpmConfidence = tempoMatch?.confidence;
+    const key = keyMatch?.key;
+    const keyConfidence = keyMatch?.confidence;
+    const confidenceValues = [bpmConfidence, keyConfidence].filter((value): value is number => typeof value === 'number');
+
+    if (!bpm && !key) {
+      return null;
+    }
+
+    const overallConfidence = normalizeConfidence(
+      confidenceValues.reduce((total, value) => total + value, 0) / Math.max(1, confidenceValues.length),
+    );
+
+    return { bpm, bpmConfidence, key, keyConfidence, overallConfidence };
+  } finally {
+    if (audioContext.state !== 'closed') {
+      await audioContext.close();
+    }
+  }
+}
+
+function formatDetectedAnalysis(analysis?: DetectedVocalAnalysis | null) {
+  if (!analysis) {
+    return '';
+  }
+
+  const parts: string[] = [];
+
+  if (analysis.bpm) {
+    const confidence = formatConfidenceValue(analysis.bpmConfidence);
+    parts.push(`${analysis.bpm} BPM${confidence ? ` (${confidence})` : ''}`);
+  }
+
+  if (analysis.key) {
+    const confidence = formatConfidenceValue(analysis.keyConfidence);
+    parts.push(`${analysis.key}${confidence ? ` (${confidence})` : ''}`);
+  }
+
+  return parts.join(' · ');
+}
+
 function getStatusClasses(status?: string) {
   switch (status) {
     case 'completed':
@@ -266,6 +754,22 @@ const StudioPage = () => {
   const [aiModels, setAiModels] = useState<AiModelOption[]>([]);
   const [recentSessions, setRecentSessions] = useState<RecentSession[]>([]);
   const [createdSession, setCreatedSession] = useState<CreatedSession | null>(null);
+  const [uploadedVocal, setUploadedVocal] = useState<UploadedFile | null>(null);
+  const [uploadedVocalFile, setUploadedVocalFile] = useState<File | null>(null);
+  const [submittedVocalPreview, setSubmittedVocalPreview] = useState<SubmittedVocalPreview | null>(null);
+  const [vocalAnalysis, setVocalAnalysis] = useState<DetectedVocalAnalysis | null>(null);
+  const [isUploadingVocal, setIsUploadingVocal] = useState(false);
+  const [isAnalyzingVocal, setIsAnalyzingVocal] = useState(false);
+  const [isVocalDragActive, setIsVocalDragActive] = useState(false);
+  const [vocalUploadError, setVocalUploadError] = useState('');
+  const [vocalAnalysisError, setVocalAnalysisError] = useState('');
+  const [tapTempoMarks, setTapTempoMarks] = useState<number[]>([]);
+  const [tapTempoBpm, setTapTempoBpm] = useState<number | null>(null);
+  const [tapTempoStatus, setTapTempoStatus] = useState('Tap 4 to 8 times in rhythm to estimate BPM manually.');
+
+  const vocalFileInputRef = useRef<HTMLInputElement | null>(null);
+  const vocalDragDepthRef = useRef(0);
+  const lastAutoDetectedValuesRef = useRef({ targetBpm: '', targetKey: '' });
 
   const selectedGenre = useMemo(
     () => genres.find((genre) => genre.id === form.genreId) || null,
@@ -277,6 +781,73 @@ const StudioPage = () => {
     [form.masteringPresetId, masteringPresets],
   );
 
+  const detectedAnalysisLabel = formatDetectedAnalysis(vocalAnalysis);
+  const detectedOverallConfidence = formatConfidenceSummary(vocalAnalysis?.overallConfidence);
+  const hasDetectedSuggestions = Boolean(vocalAnalysis?.bpm || vocalAnalysis?.key);
+  const canApplyDetectedSuggestions = Boolean(
+    (vocalAnalysis?.bpm && form.targetBpm !== String(vocalAnalysis.bpm)) || (vocalAnalysis?.key && form.targetKey !== vocalAnalysis.key),
+  );
+  const lowConfidenceWarning = getLowConfidenceWarning(vocalAnalysis, vocalAnalysisError);
+  const showLowConfidenceWarning = Boolean(form.vocalMode === 'upload' && uploadedVocal && lowConfidenceWarning);
+  const shouldShowTapTempoAssist = Boolean(
+    form.vocalMode === 'upload' && uploadedVocal && (vocalAnalysisError || !vocalAnalysis?.bpm || hasLowConfidence(vocalAnalysis?.bpmConfidence)),
+  );
+  const tapTempoCount = tapTempoMarks.length;
+
+  const syncDetectedValuesToForm = (analysis: DetectedVocalAnalysis | null, force = false) => {
+    if (!analysis) {
+      return;
+    }
+
+    const detectedValues = {
+      targetBpm: analysis.bpm ? String(analysis.bpm) : '',
+      targetKey: analysis.key || '',
+    };
+
+    setForm((current) => {
+      const next = { ...current };
+
+      if (
+        detectedValues.targetBpm &&
+        (force || !current.targetBpm || current.targetBpm === lastAutoDetectedValuesRef.current.targetBpm)
+      ) {
+        next.targetBpm = detectedValues.targetBpm;
+      }
+
+      if (
+        detectedValues.targetKey &&
+        (force || !current.targetKey || current.targetKey === lastAutoDetectedValuesRef.current.targetKey)
+      ) {
+        next.targetKey = detectedValues.targetKey;
+      }
+
+      return next;
+    });
+
+    if (detectedValues.targetBpm) {
+      lastAutoDetectedValuesRef.current.targetBpm = detectedValues.targetBpm;
+    }
+
+    if (detectedValues.targetKey) {
+      lastAutoDetectedValuesRef.current.targetKey = detectedValues.targetKey;
+    }
+  };
+
+  const resetVocalPicker = () => {
+    if (vocalFileInputRef.current) {
+      vocalFileInputRef.current.value = '';
+    }
+
+    vocalDragDepthRef.current = 0;
+    setIsVocalDragActive(false);
+  };
+
+  const resetTapTempo = (message = 'Tap 4 to 8 times in rhythm to estimate BPM manually.') => {
+    setTapTempoMarks([]);
+    setTapTempoBpm(null);
+    setTapTempoStatus(message);
+  };
+
   const loadLaunchpad = async () => {
     const { data } = await axios.get<LaunchpadResponse>('/studio/launchpad');
 
@@ -342,15 +913,209 @@ const StudioPage = () => {
       } as FormState));
     };
 
+  const clearUploadedVocal = () => {
+    setUploadedVocal(null);
+    setUploadedVocalFile(null);
+    setVocalAnalysis(null);
+    setVocalUploadError('');
+    setVocalAnalysisError('');
+    setIsAnalyzingVocal(false);
+    resetTapTempo();
+    resetVocalPicker();
+  };
+
+  const handleSelectedVocalFile = async (file: File) => {
+    try {
+      setIsUploadingVocal(true);
+      setIsAnalyzingVocal(false);
+      setVocalUploadError('');
+      setVocalAnalysisError('');
+      setErrorMessage('');
+      resetTapTempo();
+
+      FileUploader.validate(file, vocalUploadSchema);
+      const remoteFile = (await FileUploader.upload('assets/file_blobs', file, vocalUploadSchema)) as UploadedFile;
+
+      setUploadedVocal(remoteFile);
+      setUploadedVocalFile(file);
+      setVocalAnalysis(null);
+
+      setIsAnalyzingVocal(true);
+
+      try {
+        const analysis = await detectVocalAnalysis(file);
+        setVocalAnalysis(analysis);
+        syncDetectedValuesToForm(analysis);
+      } catch (error) {
+        console.error('Failed to analyze uploaded vocal:', error);
+        setVocalAnalysis(null);
+        setVocalAnalysisError('The vocal uploaded successfully, but automatic BPM/key detection is unavailable for this take.');
+      } finally {
+        setIsAnalyzingVocal(false);
+      }
+    } catch (error) {
+      console.error('Failed to upload vocal file:', error);
+      clearUploadedVocal();
+      setVocalUploadError(error instanceof Error ? error.message : 'We could not upload the vocal file. Please try again.');
+    } finally {
+      setIsUploadingVocal(false);
+      resetVocalPicker();
+    }
+  };
+
+  const handleVocalUpload = async (event: React.ChangeEvent<HTMLInputElement>) => {
+    const file = event.target.files?.[0];
+
+    if (!file) {
+      return;
+    }
+
+    await handleSelectedVocalFile(file);
+  };
+
+  const handleVocalDragEnter = (event: DragEvent<HTMLDivElement>) => {
+    event.preventDefault();
+    event.stopPropagation();
+
+    if (!canCreateProjects || isSubmitting || isUploadingVocal) {
+      return;
+    }
+
+    vocalDragDepthRef.current += 1;
+    setIsVocalDragActive(true);
+  };
+
+  const handleVocalDragOver = (event: DragEvent<HTMLDivElement>) => {
+    event.preventDefault();
+    event.stopPropagation();
+
+    if (!canCreateProjects || isSubmitting || isUploadingVocal) {
+      return;
+    }
+
+    event.dataTransfer.dropEffect = 'copy';
+    setIsVocalDragActive(true);
+  };
+
+  const handleVocalDragLeave = (event: DragEvent<HTMLDivElement>) => {
+    event.preventDefault();
+    event.stopPropagation();
+
+    if (!canCreateProjects || isSubmitting || isUploadingVocal) {
+      return;
+    }
+
+    vocalDragDepthRef.current = Math.max(0, vocalDragDepthRef.current - 1);
+
+    if (!vocalDragDepthRef.current) {
+      setIsVocalDragActive(false);
+    }
+  };
+
+  const handleVocalDrop = async (event: DragEvent<HTMLDivElement>) => {
+    event.preventDefault();
+    event.stopPropagation();
+
+    vocalDragDepthRef.current = 0;
+    setIsVocalDragActive(false);
+
+    if (!canCreateProjects || isSubmitting || isUploadingVocal) {
+      return;
+    }
+
+    const file = event.dataTransfer.files?.[0];
+
+    if (!file) {
+      return;
+    }
+
+    await handleSelectedVocalFile(file);
+  };
+
+  const handleTapTempo = () => {
+    const now = Date.now();
+
+    setTapTempoMarks((current) => {
+      const recentMarks = current.filter((timestamp) => now - timestamp <= 8000);
+      const timedOut = Boolean(recentMarks.length && now - recentMarks[recentMarks.length - 1] > 2500);
+      const baseMarks = timedOut ? [] : recentMarks;
+      const nextMarks = [...baseMarks, now].slice(-8);
+
+      if (timedOut) {
+        setTapTempoBpm(null);
+        setTapTempoStatus('Tap timing reset after a pause. Start tapping again on the beat.');
+      }
+
+      if (nextMarks.length < 4) {
+        const tapsRemaining = 4 - nextMarks.length;
+        setTapTempoBpm(null);
+        setTapTempoStatus(
+          `Captured ${nextMarks.length} tap${nextMarks.length === 1 ? '' : 's'}. Tap ${tapsRemaining} more time${tapsRemaining === 1 ? '' : 's'} to estimate BPM.`,
+        );
+        return nextMarks;
+      }
+
+      const intervals = nextMarks
+        .slice(1)
+        .map((timestamp, index) => timestamp - nextMarks[index])
+        .filter((interval) => interval > 0 && interval < 2000);
+
+      if (!intervals.length) {
+        setTapTempoBpm(null);
+        setTapTempoStatus('Those taps were too uneven to estimate BPM. Try tapping a steady pulse.');
+        return nextMarks;
+      }
+
+      const averageInterval = intervals.reduce((total, interval) => total + interval, 0) / intervals.length;
+      const bpm = normalizeTempo(60000 / averageInterval);
+      const bpmValue = String(bpm);
+
+      setTapTempoBpm(bpm);
+      setTapTempoStatus(`Tap tempo estimated ${bpm} BPM from ${nextMarks.length} taps. Keep tapping to refine it, or keep this value.`);
+      setForm((currentForm) => ({
+        ...currentForm,
+        targetBpm: bpmValue,
+      }));
+      lastAutoDetectedValuesRef.current.targetBpm = bpmValue;
+
+      return nextMarks;
+    });
+  };
+
+  const handleResetTapTempo = () => {
+    resetTapTempo();
+  };
+
   const handleSubmit = async (event: React.FormEvent<HTMLFormElement>) => {
     event.preventDefault();
 
+    if (form.vocalMode === 'upload' && !uploadedVocal) {
+      setErrorMessage('Upload an MP3 or WAV vocal take before launching the session.');
+      return;
+    }
+
     try {
       setIsSubmitting(true);
       setErrorMessage('');
       setSuccessMessage('');
 
-      const { data } = await axios.post<{ message: string; session: CreatedSession }>('/studio/launchpad', form);
+      const { data } = await axios.post<{ message: string; session: CreatedSession }>('/studio/launchpad', {
+        ...form,
+        vocalUpload: uploadedVocal,
+      });
+
+      if (form.vocalMode === 'upload' && uploadedVocal) {
+        setSubmittedVocalPreview({
+          file: uploadedVocalFile,
+          audioUrl: uploadedVocal.publicUrl,
+          name: uploadedVocal.name,
+          sizeInBytes: uploadedVocal.sizeInBytes,
+          analysis: vocalAnalysis,
+        });
+      } else {
+        setSubmittedVocalPreview(null);
+      }
+
       setCreatedSession(data.session);
       setSuccessMessage(data.message);
       setForm((current) => ({
@@ -359,7 +1124,9 @@ const StudioPage = () => {
         languageId: current.languageId,
         masteringPresetId: current.masteringPresetId,
         targetBpm: current.targetBpm,
+        targetKey: current.targetKey,
       }));
+      clearUploadedVocal();
       await loadLaunchpad();
     } catch (error) {
       console.error('Failed to create studio session:', error);
@@ -508,6 +1275,114 @@ const StudioPage = () => {
             </div>
 
             <form onSubmit={handleSubmit}>
+
+              {form.vocalMode === 'upload' ? (
+                <FormField label="Vocal upload" help="Attach the raw MP3 or WAV take so beat matching and vocal asset linking can happen immediately.">
+                  <div className="rounded-3xl border border-white/10 bg-white/5 p-5">
+                    <input
+                      ref={vocalFileInputRef}
+                      type="file"
+                      className="hidden"
+                      accept=".mp3,.wav,audio/mpeg,audio/wav,audio/x-wav"
+                      onChange={handleVocalUpload}
+                      disabled={!canCreateProjects || isSubmitting || isUploadingVocal}
+                    />
+
+                    <div
+                      role="button"
+                      tabIndex={canCreateProjects ? 0 : -1}
+                      onClick={() => {
+                        if (!canCreateProjects || isSubmitting || isUploadingVocal) {
+                          return;
+                        }
+
+                        vocalFileInputRef.current?.click();
+                      }}
+                      onKeyDown={(event) => {
+                        if (event.key === 'Enter' || event.key === ' ') {
+                          event.preventDefault();
+
+                          if (!canCreateProjects || isSubmitting || isUploadingVocal) {
+                            return;
+                          }
+
+                          vocalFileInputRef.current?.click();
+                        }
+                      }}
+                      onDragEnter={handleVocalDragEnter}
+                      onDragOver={handleVocalDragOver}
+                      onDragLeave={handleVocalDragLeave}
+                      onDrop={handleVocalDrop}
+                      className={`rounded-3xl border border-dashed p-5 transition ${
+                        !canCreateProjects || isSubmitting
+                          ? 'cursor-not-allowed border-white/10 bg-slate-950/40 text-slate-500'
+                          : isVocalDragActive
+                            ? 'cursor-pointer border-emerald-300/60 bg-emerald-500/10 shadow-lg shadow-emerald-900/20'
+                            : 'cursor-pointer border-violet-300/20 bg-slate-950/50 hover:border-violet-300/40 hover:bg-slate-900/80'
+                      }`}
+                    >
+                      <div className="flex flex-wrap items-center justify-between gap-3">
+                        <div>
+                          <div className="text-xs uppercase tracking-[0.18em] text-slate-400">Drag & drop vocal intake</div>
+                          <div className="mt-2 text-base font-medium text-white">
+                            {isUploadingVocal ? 'Uploading vocal…' : uploadedVocal ? 'Replace the current vocal take' : 'Drop an MP3/WAV here or click to browse'}
+                          </div>
+                          <p className="mt-2 max-w-2xl text-sm leading-6 text-slate-300">
+                            Upload starts immediately, then the browser suggests BPM and key from the take so you can match the beat faster.
+                          </p>
+                        </div>
+
+                        <div className="rounded-full border border-white/10 px-3 py-1 text-xs uppercase tracking-[0.16em] text-slate-300">
+                          {isVocalDragActive ? 'Release to upload' : uploadedVocal ? 'Take attached' : 'Awaiting file'}
+                        </div>
+                      </div>
+
+                      <div className="mt-4 flex flex-wrap items-center gap-3">
+                        <span
+                          className={`inline-flex items-center rounded-full border px-4 py-2 text-sm font-medium transition ${
+                            !canCreateProjects || isSubmitting || isUploadingVocal
+                              ? 'border-white/10 bg-slate-900/40 text-slate-500'
+                              : 'border-violet-400/30 bg-violet-500/10 text-violet-100 hover:bg-violet-500/20'
+                          }`}
+                        >
+                          {isUploadingVocal ? 'Uploading vocal…' : uploadedVocal ? 'Choose another file' : 'Browse files'}
+                        </span>
+
+                        {uploadedVocal ? (
+                          <button
+                            type="button"
+                            onClick={(event) => {
+                              event.stopPropagation();
+                              clearUploadedVocal();
+                            }}
+                            className="inline-flex items-center rounded-full border border-white/10 px-4 py-2 text-sm font-medium text-slate-300 transition hover:border-rose-300/30 hover:text-white"
+                          >
+                            Remove upload
+                          </button>
+                        ) : null}
+
+                        <div className="text-xs uppercase tracking-[0.18em] text-slate-400">
+                          {uploadedVocal ? 'Waveform ready · asset will attach on launch' : 'No vocal take attached yet'}
+                        </div>
+                      </div>
+                    </div>
+
+                    {vocalUploadError ? <p className="mt-4 text-sm text-rose-300">{vocalUploadError}</p> : null}
+                    {vocalAnalysisError ? <p className="mt-4 text-sm text-amber-200">{vocalAnalysisError}</p> : null}
+
+                    <div className="mt-4">
+                      <AudioWaveformPreview
+                        file={uploadedVocalFile}
+                        audioUrl={uploadedVocal?.publicUrl}
+                        isLoading={isUploadingVocal || isAnalyzingVocal}
+                        title="Uploaded vocal take"
+                        subtitle={uploadedVocal ? `${uploadedVocal.name}${formatFileSize(uploadedVocal.sizeInBytes) ? ` · ${formatFileSize(uploadedVocal.sizeInBytes)}` : ''}` : undefined}
+                        emptyMessage="Upload a raw vocal take to preview the waveform and confirm the file that will be linked into the generated session."
+                      />
+                    </div>
+                  </div>
+                </FormField>
+              ) : null}
               <div className="grid gap-6 md:grid-cols-2">
                 <FormField label="Project title" help="Use a release-ready working title for the session.">
                   <input
@@ -611,6 +1486,124 @@ const StudioPage = () => {
                     </FormField>
                   </div>
 
+                  {shouldShowTapTempoAssist ? (
+                    <div className="mb-5 rounded-2xl border border-amber-300/25 bg-amber-500/10 p-4">
+                      <div className="flex flex-wrap items-start justify-between gap-3">
+                        <div>
+                          <div className="text-xs uppercase tracking-[0.18em] text-amber-100">Manual BPM assist</div>
+                          <p className="mt-2 text-sm leading-6 text-amber-50">
+                            Automatic BPM confidence is low for this take, so you can tap the pulse manually to lock in a steadier tempo.
+                          </p>
+                        </div>
+
+                        <div className="flex flex-wrap gap-2">
+                          <button
+                            type="button"
+                            onClick={handleTapTempo}
+                            disabled={!canCreateProjects || isSubmitting || isUploadingVocal}
+                            className="inline-flex items-center rounded-full border border-amber-200/40 bg-white/10 px-4 py-2 text-sm font-medium text-amber-50 transition hover:bg-white/15 disabled:cursor-not-allowed disabled:opacity-60"
+                          >
+                            Tap tempo
+                          </button>
+                          <button
+                            type="button"
+                            onClick={handleResetTapTempo}
+                            disabled={!tapTempoCount}
+                            className="inline-flex items-center rounded-full border border-white/10 px-4 py-2 text-sm font-medium text-slate-200 transition hover:border-white/20 hover:text-white disabled:cursor-not-allowed disabled:opacity-50"
+                          >
+                            Reset taps
+                          </button>
+                        </div>
+                      </div>
+
+                      <div className="mt-3 flex flex-wrap gap-2 text-xs uppercase tracking-[0.16em] text-amber-50">
+                        <span className="rounded-full border border-amber-200/30 bg-white/10 px-3 py-1">{tapTempoCount || 0} taps captured</span>
+                        {tapTempoBpm ? (
+                          <span className="rounded-full border border-emerald-300/30 bg-emerald-500/10 px-3 py-1 text-emerald-100">Tap BPM {tapTempoBpm}</span>
+                        ) : null}
+                      </div>
+
+                      <p className="mt-3 text-sm leading-6 text-amber-50/90">{tapTempoStatus}</p>
+                    </div>
+                  ) : null}
+
+                  {form.vocalMode === 'upload' ? (
+                    <div className="mb-5 rounded-2xl border border-white/10 bg-slate-950/60 p-4">
+                      <div className="flex flex-wrap items-start justify-between gap-3">
+                        <div>
+                          <div className="flex flex-wrap items-center gap-2 text-xs uppercase tracking-[0.18em] text-slate-400">
+                            <span>Auto-detected from vocal</span>
+                            <span
+                              className="inline-flex items-center rounded-full border border-white/10 bg-white/5 px-2 py-1 text-[10px] tracking-[0.16em] text-slate-300"
+                              title="High = stable signal and strong detector match. Medium = usable estimate, but verify by ear. Low = noisy or ambiguous take, so check manually."
+                            >
+                              <BaseIcon path={mdiInformationOutline} size={12} className="mr-1 text-sky-200" />
+                              Confidence guide
+                            </span>
+                          </div>
+                          <p className="mt-2 text-sm leading-6 text-slate-300">
+                            {isAnalyzingVocal
+                              ? 'Analyzing the uploaded take for BPM and key suggestions…'
+                              : detectedAnalysisLabel
+                                ? `Suggested values: ${detectedAnalysisLabel}${detectedOverallConfidence ? ` · ${detectedOverallConfidence}` : ''}`
+                                : 'Upload a vocal take to auto-fill BPM and key suggestions here.'}
+                          </p>
+                        </div>
+
+                        {hasDetectedSuggestions && canApplyDetectedSuggestions ? (
+                          <button
+                            type="button"
+                            onClick={() => syncDetectedValuesToForm(vocalAnalysis, true)}
+                            className="inline-flex items-center rounded-full border border-emerald-300/30 bg-emerald-500/10 px-4 py-2 text-sm font-medium text-emerald-100 transition hover:bg-emerald-500/20"
+                          >
+                            Apply suggestions
+                          </button>
+                        ) : null}
+                      </div>
+
+                      {hasDetectedSuggestions ? (
+                        <div className="mt-3 flex flex-wrap gap-2 text-xs uppercase tracking-[0.16em] text-slate-200">
+                          {vocalAnalysis?.bpm ? (
+                            <span
+                              className={`rounded-full border px-3 py-1 ${getConfidenceClasses(vocalAnalysis.bpmConfidence)}`}
+                              title={getConfidenceTooltip('BPM detection', vocalAnalysis.bpmConfidence)}
+                            >
+                              BPM {vocalAnalysis.bpm} · {formatConfidenceSummary(vocalAnalysis.bpmConfidence) || getConfidenceLabel(vocalAnalysis.bpmConfidence)}
+                            </span>
+                          ) : null}
+                          {vocalAnalysis?.key ? (
+                            <span
+                              className={`rounded-full border px-3 py-1 ${getConfidenceClasses(vocalAnalysis.keyConfidence)}`}
+                              title={getConfidenceTooltip('Key detection', vocalAnalysis.keyConfidence)}
+                            >
+                              Key {vocalAnalysis.key} · {formatConfidenceSummary(vocalAnalysis.keyConfidence) || getConfidenceLabel(vocalAnalysis.keyConfidence)}
+                            </span>
+                          ) : null}
+                          {detectedOverallConfidence ? (
+                            <span
+                              className={`rounded-full border px-3 py-1 ${getConfidenceClasses(vocalAnalysis?.overallConfidence)}`}
+                              title={getConfidenceTooltip('Overall vocal analysis', vocalAnalysis?.overallConfidence)}
+                            >
+                              Overall {detectedOverallConfidence}
+                            </span>
+                          ) : null}
+                        </div>
+                      ) : null}
+
+                      {showLowConfidenceWarning ? (
+                        <div className="mt-4 rounded-2xl border border-rose-300/25 bg-rose-500/10 p-4 text-sm leading-6 text-rose-50">
+                          <div className="flex items-start gap-3">
+                            <BaseIcon path={mdiAlertCircleOutline} size={18} className="mt-1 text-rose-200" />
+                            <div>
+                              <div className="text-xs uppercase tracking-[0.16em] text-rose-100">Low-confidence vocal warning</div>
+                              <p className="mt-2">{lowConfidenceWarning}</p>
+                            </div>
+                          </div>
+                        </div>
+                      ) : null}
+                    </div>
+                  ) : null}
+
                   <div className="grid gap-5 md:grid-cols-2">
                     <FormField label="Mastering preset" help="This stages the master destination and loudness profile.">
                       <select
@@ -710,6 +1703,39 @@ const StudioPage = () => {
                   <p className="mt-2 leading-6 text-slate-300">
                     {vocalModes.find((mode) => mode.value === form.vocalMode)?.description}
                   </p>
+
+                  {form.vocalMode === 'upload' ? (
+                    <div className="mt-4 rounded-2xl border border-white/10 bg-slate-950/60 p-4">
+                      <div className="text-xs uppercase tracking-[0.16em] text-slate-500">Uploaded take</div>
+                      {uploadedVocal ? (
+                        <>
+                          <div className="mt-2 text-sm font-medium text-white">
+                            {uploadedVocal.name}
+                            {formatFileSize(uploadedVocal.sizeInBytes) ? ` · ${formatFileSize(uploadedVocal.sizeInBytes)}` : ''}
+                          </div>
+                          <div className="mt-1 text-xs uppercase tracking-[0.16em] text-emerald-300">Ready for beat matching + asset attachment</div>
+                          {isAnalyzingVocal ? (
+                            <div className="mt-2 text-xs uppercase tracking-[0.16em] text-sky-200">Analyzing BPM/key from the vocal…</div>
+                          ) : detectedAnalysisLabel ? (
+                            <div className="mt-2 text-xs uppercase tracking-[0.16em] text-violet-200">
+                              Suggested {detectedAnalysisLabel}
+                              {detectedOverallConfidence ? ` · ${detectedOverallConfidence}` : ''}
+                            </div>
+                          ) : null}
+
+                          {showLowConfidenceWarning ? (
+                            <div className="mt-3 rounded-2xl border border-rose-300/20 bg-rose-500/10 px-3 py-2 text-xs leading-5 text-rose-100">
+                              {lowConfidenceWarning}
+                            </div>
+                          ) : null}
+                        </>
+                      ) : (
+                        <p className="mt-2 text-sm leading-6 text-slate-400">
+                          Add an MP3 or WAV vocal take to make the “upload vocal and match beat” workflow fully functional.
+                        </p>
+                      )}
+                    </div>
+                  ) : null}
                 </div>
               </div>
             </CardBox>
@@ -766,6 +1792,7 @@ const StudioPage = () => {
                 { label: 'Song', item: createdSession.song, icon: mdiAlbum },
                 { label: 'Generation', item: createdSession.generationRequest, icon: mdiRobot },
                 { label: 'Recording', item: createdSession.recordingSession, icon: mdiMicrophone },
+                { label: 'Vocal asset', item: createdSession.vocalAsset, icon: mdiMicrophone },
                 { label: 'Mix', item: createdSession.mixSession, icon: mdiWaveform },
                 { label: 'Master', item: createdSession.masteringSession, icon: mdiTuneVariant },
                 { label: 'Export', item: createdSession.exportJob, icon: mdiExportVariant },
@@ -797,7 +1824,85 @@ const StudioPage = () => {
               )}
             </div>
 
-            <div className="mt-6 grid gap-6 lg:grid-cols-2">
+            <div className={`mt-6 grid gap-6 ${submittedVocalPreview ? 'lg:grid-cols-3' : 'lg:grid-cols-2'}`}>
+              {submittedVocalPreview ? (
+                <div className="rounded-2xl border border-white/10 bg-white/5 p-5">
+                  <div className="flex flex-wrap items-start justify-between gap-3">
+                    <div>
+                      <div className="text-xs uppercase tracking-[0.2em] text-slate-400">Submitted vocal preview</div>
+                      <div className="mt-2 text-base font-medium text-white">
+                        {submittedVocalPreview.name}
+                        {formatFileSize(submittedVocalPreview.sizeInBytes) ? ` · ${formatFileSize(submittedVocalPreview.sizeInBytes)}` : ''}
+                      </div>
+                      <p className="mt-2 text-sm leading-6 text-slate-300">This is the exact vocal take attached to the session you just launched.</p>
+                    </div>
+
+                    {createdSession.vocalAsset ? (
+                      <Link
+                        href={createdSession.vocalAsset.href}
+                        className="inline-flex items-center rounded-full border border-white/10 px-4 py-2 text-sm font-medium text-emerald-100 transition hover:border-emerald-300/40 hover:text-white"
+                      >
+                        Open asset
+                        <BaseIcon path={mdiChevronRight} size={16} className="ml-1" />
+                      </Link>
+                    ) : null}
+                  </div>
+
+                  <div className="mt-4">
+                    <AudioWaveformPreview
+                      file={submittedVocalPreview.file}
+                      audioUrl={submittedVocalPreview.audioUrl}
+                      title="Launched vocal waveform"
+                      subtitle={submittedVocalPreview.name}
+                      emptyMessage="The uploaded take used for this session will appear here when available."
+                    />
+                  </div>
+
+                  {submittedVocalPreview.analysis ? (
+                    <div className="mt-4 flex flex-wrap gap-2 text-xs uppercase tracking-[0.16em] text-slate-200">
+                      {submittedVocalPreview.analysis.bpm ? (
+                        <span
+                          className={`rounded-full border px-3 py-1 ${getConfidenceClasses(submittedVocalPreview.analysis.bpmConfidence)}`}
+                          title={getConfidenceTooltip('Submitted BPM detection', submittedVocalPreview.analysis.bpmConfidence)}
+                        >
+                          BPM {submittedVocalPreview.analysis.bpm} ·{' '}
+                          {formatConfidenceSummary(submittedVocalPreview.analysis.bpmConfidence) || getConfidenceLabel(submittedVocalPreview.analysis.bpmConfidence)}
+                        </span>
+                      ) : null}
+                      {submittedVocalPreview.analysis.key ? (
+                        <span
+                          className={`rounded-full border px-3 py-1 ${getConfidenceClasses(submittedVocalPreview.analysis.keyConfidence)}`}
+                          title={getConfidenceTooltip('Submitted key detection', submittedVocalPreview.analysis.keyConfidence)}
+                        >
+                          Key {submittedVocalPreview.analysis.key} ·{' '}
+                          {formatConfidenceSummary(submittedVocalPreview.analysis.keyConfidence) || getConfidenceLabel(submittedVocalPreview.analysis.keyConfidence)}
+                        </span>
+                      ) : null}
+                      {formatConfidenceSummary(submittedVocalPreview.analysis.overallConfidence) ? (
+                        <span
+                          className={`rounded-full border px-3 py-1 ${getConfidenceClasses(submittedVocalPreview.analysis.overallConfidence)}`}
+                          title={getConfidenceTooltip('Submitted overall vocal analysis', submittedVocalPreview.analysis.overallConfidence)}
+                        >
+                          Overall {formatConfidenceSummary(submittedVocalPreview.analysis.overallConfidence)}
+                        </span>
+                      ) : null}
+                    </div>
+                  ) : null}
+
+                  {getLowConfidenceWarning(submittedVocalPreview.analysis) ? (
+                    <div className="mt-4 rounded-2xl border border-rose-300/20 bg-rose-500/10 px-4 py-3 text-sm leading-6 text-rose-50">
+                      <div className="flex items-start gap-3">
+                        <BaseIcon path={mdiAlertCircleOutline} size={18} className="mt-1 text-rose-200" />
+                        <div>
+                          <div className="text-xs uppercase tracking-[0.16em] text-rose-100">Review this take before publishing</div>
+                          <p className="mt-2">{getLowConfidenceWarning(submittedVocalPreview.analysis)}</p>
+                        </div>
+                      </div>
+                    </div>
+                  ) : null}
+                </div>
+              ) : null}
+
               <div className="rounded-2xl border border-white/10 bg-white/5 p-5">
                 <div className="text-xs uppercase tracking-[0.2em] text-slate-400">Arrangement builder</div>
                 <div className="mt-4 space-y-3">