這一章要來介紹文字生成語音的模型 - Eleven Labs
Eleven Labs 是目前市面上最強大的 AI 語音模型工具
第二可能是 Azure TTS, 便宜又好用, 但我們今天不討論這個
Eleven Labs 免費版允許你保存3個不同的語音, 也可以自己錄製自己的聲音
也有生成音效, 生成背景音樂的選項
那我們今天會專注在 文字轉語音 的 API 功能
我們現在 Unity 擺放 UI 的設計
會需要用到的元件有: 輸入框, Slider, 和一顆生成的按鈕
Eleven Labs Core:
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text;
using Core;
using Newtonsoft.Json;
using UnityEngine;
using UnityEngine.Networking;
using UnityEngine.UI;
namespace PolarAI.Scripts.AICore.ElevenLabs
{
public class ElevenLabsCore
{
public string ApiKey = "ELEVENLABS_API_KEY";
public string VoiceId = "YOUR_VOICE_ID";
public string ModelId = "eleven_multilingual_v2";
private string OutputFormat = "mp3_44100_128";
private AudioClip _recording;
public void Initialize(string apiKey, string voiceId)
{
ApiKey = apiKey;
VoiceId = voiceId;
}
public void SetDefaultVoiceId(string voiceId)
{
VoiceId = voiceId;
}
public void TextToSound(string text, Action<AudioClip, bool> onComplete, string voiceId=null)
{
if (string.IsNullOrWhiteSpace(text))
{
Debug.Log("TTS Text is empty.");
return;
}
CoroutineManager.Instance.StartCoroutine(DoTTS(text,onComplete, voiceId));
}
private IEnumerator DoTTS(string text,Action<AudioClip, bool> onComplete, string voiceId =null)
{
Debug.Log("Requesting ElevenLabs TTS...");
voiceId ??= VoiceId;
var url = $"https://api.elevenlabs.io/v1/text-to-speech/{voiceId}?output_format={OutputFormat}";
var payload = new ElevenTTS { text = text, model_id = ModelId };
var json = JsonConvert.SerializeObject(payload);
using var req = new UnityWebRequest(url, "POST");
byte[] body = Encoding.UTF8.GetBytes(json);
req.uploadHandler = new UploadHandlerRaw(body);
req.downloadHandler = new DownloadHandlerBuffer();
req.SetRequestHeader("Content-Type", "application/json");
req.SetRequestHeader("xi-api-key", ApiKey);
yield return req.SendWebRequest();
if (req.result != UnityWebRequest.Result.Success)
{
string err = req.error + " | " + req.downloadHandler.text;
onComplete?.Invoke(null, false);
Debug.LogError(err);
yield break;
}
byte[] audioData = req.downloadHandler.data;
string ext = OutputFormat.StartsWith("mp3") ? "mp3" : "wav";
string tempPath = Path.Combine(Application.persistentDataPath, "tts." + ext);
File.WriteAllBytes(tempPath, audioData);
yield return CoroutineManager.Instance.StartCoroutine(
LoadCLip(tempPath, ext == "mp3" ? AudioType.MPEG : AudioType.WAV, onComplete));
}
public IEnumerator LoadCLip(string path, AudioType type, Action<AudioClip,bool> onLoad)
{
using var www = UnityWebRequestMultimedia.GetAudioClip("file://" + path, type);
yield return www.SendWebRequest();
if (www.result != UnityWebRequest.Result.Success)
{
Debug.LogError("Audio load failed: " + www.error);
yield break;
}
AudioClip clip = DownloadHandlerAudioClip.GetContent(www);
onLoad?.Invoke(clip, true);
}
[Serializable]
private class ElevenTTS
{
public string text;
public string model_id;
}
}
}
UI 使用方法:
using TMPro;
using UnityEngine;
using UnityEngine.UI;
namespace PolarAI.Scripts.AICore.ElevenLabs
{
public class ElevenLabsExample : MonoBehaviour
{
[Header("ElevenLabs API 设置")] public string ElevenLabsApiKey = "";
public string VoiceId = "";
[Header("UI 组件")] public TMP_InputField TextInput;
public Button GenBtn;
public Button PlayStopBtn;
public Slider VoiceSlider;
public AudioSource AudioSource;
[Header("可选:状态文本")] public TMP_Text StatusText;
public TMP_Text TimeText; // 显示当前时间/总时间
private ElevenLabsCore ElevenLabsCore = new ElevenLabsCore();
private AudioClip _currentClip;
private bool _isGenerating = false;
private bool _isPlaying = false;
private bool _isDraggingSlider = false;
private void Start()
{
// 初始化 ElevenLabsCore
ElevenLabsCore.Initialize(ElevenLabsApiKey, VoiceId);
// 设置 UI 事件监听
SetupUIEvents();
// 初始化 UI 状态
UpdateButtonStates();
UpdateStatusText("就绪");
// 初始化进度条
if (VoiceSlider != null)
{
VoiceSlider.minValue = 0f;
VoiceSlider.maxValue = 1f;
VoiceSlider.value = 0f;
VoiceSlider.interactable = false;
}
}
private void SetupUIEvents()
{
// 生成按钮
if (GenBtn != null)
{
GenBtn.onClick.AddListener(OnGenerateButtonClicked);
}
// 播放/停止按钮
if (PlayStopBtn != null)
{
PlayStopBtn.onClick.AddListener(OnPlayStopButtonClicked);
}
// 进度滑块 - 支持拖拽调整播放位置
if (VoiceSlider != null)
{
VoiceSlider.onValueChanged.AddListener(OnProgressChanged);
// 检测拖拽开始和结束
var sliderEvents = VoiceSlider.gameObject.AddComponent<SliderDragHandler>();
sliderEvents.onBeginDrag = () => { _isDraggingSlider = true; };
sliderEvents.onEndDrag = () =>
{
_isDraggingSlider = false;
if (AudioSource != null && _currentClip != null)
{
// 根据滑块位置设置播放时间
AudioSource.time = VoiceSlider.value * _currentClip.length;
}
};
}
}
private void OnGenerateButtonClicked()
{
if (_isGenerating)
{
UpdateStatusText("正在生成中,请稍候...");
return;
}
if (TextInput == null || string.IsNullOrWhiteSpace(TextInput.text))
{
UpdateStatusText("请输入要转换的文本");
return;
}
if (string.IsNullOrWhiteSpace(ElevenLabsApiKey))
{
UpdateStatusText("错误:请设置 ElevenLabs API Key");
Debug.LogError("ElevenLabs API Key 未设置!");
return;
}
if (string.IsNullOrWhiteSpace(VoiceId))
{
UpdateStatusText("错误:请设置 Voice ID");
Debug.LogError("Voice ID 未设置!");
return;
}
GenerateSpeech(TextInput.text);
}
private void GenerateSpeech(string text)
{
_isGenerating = true;
UpdateButtonStates();
UpdateStatusText("正在生成语音...");
// 停止当前播放
if (_isPlaying && AudioSource != null)
{
AudioSource.Stop();
_isPlaying = false;
}
// 重置进度条
if (VoiceSlider != null)
{
VoiceSlider.value = 0f;
VoiceSlider.interactable = false;
}
UpdateTimeText(0f, 0f);
// 调用 ElevenLabsCore 生成语音
ElevenLabsCore.TextToSound(text, OnSpeechGenerated);
}
private void OnSpeechGenerated(AudioClip clip, bool success)
{
_isGenerating = false;
if (success && clip != null)
{
_currentClip = clip;
UpdateStatusText("语音生成成功!");
Debug.Log("语音生成成功,时长: " + clip.length + " 秒");
// 启用进度条
if (VoiceSlider != null)
{
VoiceSlider.interactable = true;
}
// 自动播放
PlayAudio();
}
else
{
UpdateStatusText("语音生成失败");
Debug.LogError("语音生成失败");
}
UpdateButtonStates();
}
private void OnPlayStopButtonClicked()
{
if (_currentClip == null)
{
UpdateStatusText("请先生成语音");
return;
}
if (_isPlaying)
{
StopAudio();
}
else
{
PlayAudio();
}
}
private void PlayAudio()
{
if (AudioSource == null)
{
Debug.LogError("AudioSource 未设置!");
return;
}
if (_currentClip == null)
{
UpdateStatusText("没有可播放的音频");
return;
}
AudioSource.clip = _currentClip;
AudioSource.Play();
_isPlaying = true;
UpdateStatusText("正在播放...");
UpdateButtonStates();
}
private void StopAudio()
{
if (AudioSource != null)
{
AudioSource.Stop();
}
_isPlaying = false;
UpdateStatusText("已停止播放");
UpdateButtonStates();
}
private void OnProgressChanged(float value)
{
// 如果用户正在拖拽滑块,更新时间显示
if (_isDraggingSlider && _currentClip != null)
{
float currentTime = value * _currentClip.length;
UpdateTimeText(currentTime, _currentClip.length);
}
}
private void Update()
{
// 更新播放进度
if (_isPlaying && AudioSource != null && AudioSource.isPlaying && _currentClip != null)
{
// 如果用户没有拖拽滑块,更新进度条
if (!_isDraggingSlider && VoiceSlider != null)
{
float progress = AudioSource.time / _currentClip.length;
VoiceSlider.value = progress;
}
// 更新时间显示
UpdateTimeText(AudioSource.time, _currentClip.length);
}
// 检测音频播放结束
if (_isPlaying && AudioSource != null && !AudioSource.isPlaying)
{
_isPlaying = false;
UpdateStatusText("播放完成");
UpdateButtonStates();
// 重置进度条到开头
if (VoiceSlider != null)
{
VoiceSlider.value = 0f;
}
if (_currentClip != null)
{
UpdateTimeText(0f, _currentClip.length);
}
}
}
private void UpdateButtonStates()
{
// 更新生成按钮状态
if (GenBtn != null)
{
GenBtn.interactable = !_isGenerating;
var btnText = GenBtn.GetComponentInChildren<TMP_Text>();
if (btnText != null)
{
btnText.text = _isGenerating ? "生成中..." : "生成语音";
}
}
// 更新播放/停止按钮状态
if (PlayStopBtn != null)
{
PlayStopBtn.interactable = _currentClip != null && !_isGenerating;
var btnText = PlayStopBtn.GetComponentInChildren<TMP_Text>();
if (btnText != null)
{
btnText.text = _isPlaying ? "停止" : "播放";
}
}
}
private void UpdateStatusText(string message)
{
if (StatusText != null)
{
StatusText.text = message;
}
Debug.Log($"[ElevenLabs] {message}");
}
private void UpdateTimeText(float currentTime, float totalTime)
{
if (TimeText != null)
{
TimeText.text = $"{FormatTime(currentTime)} / {FormatTime(totalTime)}";
}
}
private string FormatTime(float seconds)
{
int minutes = Mathf.FloorToInt(seconds / 60f);
int secs = Mathf.FloorToInt(seconds % 60f);
return $"{minutes:00}:{secs:00}";
}
private void OnDestroy()
{
// 清理事件监听
if (GenBtn != null)
{
GenBtn.onClick.RemoveListener(OnGenerateButtonClicked);
}
if (PlayStopBtn != null)
{
PlayStopBtn.onClick.RemoveListener(OnPlayStopButtonClicked);
}
if (VoiceSlider != null)
{
VoiceSlider.onValueChanged.RemoveListener(OnProgressChanged);
}
// 停止播放
if (AudioSource != null && AudioSource.isPlaying)
{
AudioSource.Stop();
}
}
// 公共方法:允许外部调用生成语音
public void GenerateSpeechFromText(string text)
{
if (!string.IsNullOrWhiteSpace(text))
{
GenerateSpeech(text);
}
}
// 公共方法:设置 API Key
public void SetApiKey(string apiKey)
{
ElevenLabsApiKey = apiKey;
ElevenLabsCore.Initialize(ElevenLabsApiKey, VoiceId);
}
// 公共方法:设置 Voice ID
public void SetVoiceId(string voiceId)
{
VoiceId = voiceId;
ElevenLabsCore.Initialize(ElevenLabsApiKey, VoiceId);
}
}
// 辅助类:检测滑块拖拽事件
public class SliderDragHandler : MonoBehaviour, UnityEngine.EventSystems.IBeginDragHandler,
UnityEngine.EventSystems.IEndDragHandler
{
public System.Action onBeginDrag;
public System.Action onEndDrag;
public void OnBeginDrag(UnityEngine.EventSystems.PointerEventData eventData)
{
onBeginDrag?.Invoke();
}
public void OnEndDrag(UnityEngine.EventSystems.PointerEventData eventData)
{
onEndDrag?.Invoke();
}
}
}
把對應的 API Key 和 UI欄 位填上去即可
Demo 效果: