2025 iThome鐵人賽
「 Flutter :30天打造念佛App,跨平台從Mobile到VR,讓極樂世界在眼前實現 ! 」
Day 17
「 Flutter 語音辨識 深入應用篇 — 生活在地球的勇者啊,阿彌陀佛怎麼念呀?(2)」
《 楞嚴經 — 大勢至菩薩念佛圓通章 》:
「 我本因地,以念佛心,入無生忍。今於此界,攝念佛人,歸於淨土。佛問圓通,我無選擇。都攝六根,淨念相繼,得三摩地,斯為第一。 」
白話:
大勢至菩薩在菩薩道修行念佛法門,以清淨心念佛證得無生法忍。今在娑婆世界,接引念佛人能歸入阿彌陀佛極樂世界。如果佛陀問我最好的圓通法門是什麼?
我沒有選擇其他法門,我收起眼、耳、鼻、舌、身、意的全部感受,沒有其他想法只一心不斷憶念阿彌陀佛,以此證得三摩地,才是最好的圓通法門。
昨天我們已經語音轉文字的基礎原理,
今天我們要進一步深入探究「關鍵字偵測KWS」!
Day17 文章目錄:
一、KWS
二、實作核心
三、細節設置
關鍵字偵測/關鍵字識別 (KWS),是在連續音訊中偵測是否出現特定關鍵詞(喚醒詞)。
與ASR語音轉文字的差異在於,KWS只回答有沒有這個關鍵字,不輸出完整逐字稿。
如果有添加@關鍵字標籤,關鍵字命中時就會
將@關鍵字標籤回傳,讓UI顯示設定的文字
優點:模型小、延遲低、耗電少,
適合:喚醒詞或需要關鍵詞觸發的場景。
行動端的KWS專用模型只有十多MB
1. 下載模型
class DownloadModel with ChangeNotifier {
ModelKind _kind = ModelKind.asr;
ModelKind get kind => _kind;
String _modelName =
"sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20";
String get modelName => _modelName;
String get channel => _kind == ModelKind.kws ? 'kws-models' : 'asr-models';
//ASR 語音轉文字
void useAsr([String? name]) {
_kind = ModelKind.asr;
_modelName = name ??
'sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20';
notifyListeners();
}
//KWS 關鍵字識別
void useKws([String? name]) {
_kind = ModelKind.kws;
_modelName = name ??
'sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile';
notifyListeners();
}
double _progress = 0;
double get progress => _progress;
void setProgress(double value) {
if (value >= 1.0) {
_progress = 1;
} else {
_progress = value;
}
notifyListeners();
}
double _unzipProgress = 0;
double get unzipProgress => _unzipProgress;
void setUnzipProgress(double value) {
if (value >= 1.0) {
_unzipProgress = 1;
} else {
_unzipProgress = value;
}
notifyListeners();
}
}
2. 設定關鍵字
import 'dart:convert';
import 'dart:io';
import 'package:path/path.dart' as path;
//設定關鍵字與對應的tokens
const customKeywords = <String>[
'ā m í t uó f ó @阿彌陀佛', //二聲
'ā m ī t uó f ó @阿彌陀佛', //一聲(平聲)
'n ā m ó ā m í t uó f ó @南無阿彌陀佛', //二聲
'n á m ó ā m í t uó f ó @南無阿彌陀佛', //一聲(平聲)
];
//產生txt檔
Future<String> writeCustomKeywords(String modelRoot) async {
final file = File(path.join(modelRoot, 'keywords_custom.txt'));
await file.writeAsString(customKeywords.join('\n') + '\n', encoding: utf8);
return file.path;
}
3. KeywordSpotter
Future<sherpa_onnx.KeywordSpotterConfig> getKwsConfigByModelName({
required String modelName,
required String keywordsFilePath,
double keywordsScore = 1.0,
double keywordsThreshold = 0.25,
}) async {
final Directory dir = await getApplicationDocumentsDirectory();
final root = join(dir.path, modelName);
//對照模型資料夾內部
final encoder = join(root, 'encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx');
final decoder = join(root, 'decoder-epoch-12-avg-2-chunk-16-left-64.onnx');
final joiner = join(root, 'joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx');
final tokens = join(root, 'tokens.txt');
final model = sherpa_onnx.OnlineModelConfig(
transducer: sherpa_onnx.OnlineTransducerModelConfig(
encoder: encoder,
decoder: decoder,
joiner : joiner,
),
tokens: tokens,
modelType: 'zipformer2', //這邊我使用的是 zipformer2,如果只填zipformer會crash
);
return sherpa_onnx.KeywordSpotterConfig(
model: model,
keywordsFile: keywordsFilePath,
keywordsScore: keywordsScore,
keywordsThreshold: keywordsThreshold,
maxActivePaths: 4,
numTrailingBlanks: 1,
);
}
4. 串流與關鍵字偵測
class _StreamingKwsScreenState extends State<StreamingKwsScreen> {
final _log = ValueNotifier<String>('');
late final AudioRecorder _rec;
sherpa_onnx.KeywordSpotter? _kws;
sherpa_onnx.OnlineStream? _stream;
StreamSubscription<RecordState>? _sub;
RecordState _state = RecordState.stop;
//初始化
@override
void initState() {
super.initState();
WidgetsBinding.instance.addPostFrameCallback((_) {
context.read<DownloadModel>().useKws( //KWS模型
'sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile',
);
});
_rec = AudioRecorder(); //錄音
_sub = _rec.onStateChanged().listen((s) => setState(() => _state = s));//監聽
}
//開始串流與關鍵字偵測
Future<void> _start() async {
try {
final dm = context.read<DownloadModel>();
final modelName = dm.modelName;
if (await needsDownload(modelName)) {
await downloadModelAndUnZip(context, modelName);
return;
}
if (await needsUnZip(modelName)) {
await unzipModelFile(context, modelName);
return;
}
final appDoc = await path_provider.getApplicationDocumentsDirectory();
final root = path.join(appDoc.path, modelName);
final customKeywordsPath = await writeCustomKeywords(root);
sherpa_onnx.initBindings();
//keywordSpotter參數設定
final cfg = await getKwsConfigByModelName(
modelName: modelName,
keywordsFilePath: customKeywordsPath,//自訂關鍵字檔的路徑
keywordsScore: 1.4, //關鍵字加權
keywordsThreshold: 0.20, //關鍵字觸發門檻
);
_kws ??= sherpa_onnx.KeywordSpotter(cfg);
_stream ??= _kws!.createStream();
if (!await _rec.hasPermission()) {
await showDialog(
context: context,
builder: (_) =>
const AlertDialog(content: Text('Microphone permission denied.')),
);
return;
}
const rc = RecordConfig(
encoder: AudioEncoder.pcm16bits,
sampleRate: 16000,
numChannels: 1,
);
final audioStream = await _rec.startStream(rc);
audioStream.listen((bytes) {
try {
//PCM16 bytes 轉成 Float32
final f32 = convertBytesToFloat32(Uint8List.fromList(bytes));
//音訊送進模型
_stream!.acceptWaveform(samples: f32, sampleRate: 16000);
//解碼
while (_kws!.isReady(_stream!)) {
_kws!.decode(_stream!);
}
//命中關鍵字
final r = _kws!.getResult(_stream!);
if (r.keyword.isNotEmpty) {
_log.value = '[HIT] ${r.keyword}\n${_log.value}';
_kws!.reset(_stream!); //重置Stream
}
} catch (e, st) {
debugPrint('KWS stream error: $e\n$st');
}
});
} catch (e, st) {
debugPrint('KWS start failed: $e\n$st');
if (!mounted) return;
await showDialog(
context: context,
builder: (_) => AlertDialog(
title: const Text('KWS 啟動失敗'),
content: Text(e.toString()),
),
);
}
}
Future<void> _stop() async {
await _rec.stop();
_stream?.free();
_stream = _kws?.createStream();
}
@override
void dispose() {
_sub?.cancel();
_rec.dispose();
_stream?.free();
_kws?.free();
super.dispose();
}
1. 自訂關鍵字
- 一行只設定一個關鍵字
- 要參照模型tokens詞彙表
- 每一個token之間要空格
- 添加@標籤文字,命中關鍵字時會回傳標籤文字
- 可以添加權重(浮點數,放冒號後,不用空格)、觸發門檻(#浮點數)
const customKeywords = <String>[
'ā m í t uó f ó @阿彌陀佛',
'ā m ī t uó f ó :2.0 #0.2 @阿彌陀佛_boost', // 加權 2,觸發門檻 0.2
];
有加權、降低觸發門檻,關鍵字更容易命中
設置關鍵字要對照模型tokens詞彙表進行轉換
2. 關鍵字加權與觸發門檻
- keywordsScore:
關鍵字加權分數,常見 1.0(不加分)~ 2.0- keywordsThreshold:
關鍵字觸發門檻,中文KWS 常見設置 0.2 ~ 0.35,0.5 以上偏嚴格,可能漏檢。
//如果在keyword.txt沒有設置權重、門檻,就會用這邊的全域數值
final cfg = await getKwsConfigByModelName(
modelName: modelName,
keywordsFilePath: customKeywordsPath,//自訂關鍵字檔的路徑
keywordsScore: 1.4, //關鍵字加權
keywordsThreshold: 0.20, //關鍵字觸發門檻
);
_kws ??= sherpa_onnx.KeywordSpotter(cfg);
重點 | 內容 |
---|---|
KWS | 關鍵字偵測 |
實作核心 | 設置KeywordSpotter |
細節設置 | 關鍵字權重與觸發門檻 |