分享科技與遊戲 by HKGoldenMr.A: 語音合成 HTML5 Web Speech Synthesis API

2019-01-07

語音合成 HTML5 Web Speech Synthesis API

HTML5 Web Speech API 除了 Speech Recognition 功能還有 Speech Synthesis 功能
Speech Recognition 是將聲音轉成文字， Speech Synthesis 是將文字轉成聲音
HTML5 Web Speech Synthesis API 同樣是網頁技術標準之一，可以由使用者自行製作需要的功能

demo

SpeechSynthesis 還在草擬階段，但比起 SpeechRecognition ，主流的瀏覽器都支援

var speechSynthesis = window.speechSynthesis;

SpeechSynthesis 是語音合成，提供多種語音合成聲音，需要載入語音合成聲音

var voices = window.speechSynthesis.getVoices();

但一些瀏覽器需要在頁面載入完成後才能載入語音合成聲音，甚至需要稍作延遲才能載入聲音種類，因此修改成

function loadVoices(speechSynthesis){
    if (voices.length == 0){
        window.setTimeout(function() {
            voices = window.speechSynthesis.getVoices();
            voices.sort(function(voice1, voice2){
                return voice1.name.localeCompare(voice2.name);
            });
        }, 100);
    }
}

var voices = [];
window.addEventListener("load", function(){
    loadVoices(speechSynthesis);
    speechSynthesis.addEventListener("voiceschanged", function(){
        loadVoices(speechSynthesis);
    });
});

通過 SpeechSynthesis 朗讀出 SpeechSynthesisUtterance
SpeechSynthesisUtterance 有一些屬性影響朗讀效果

SpeechSynthesisUtterance.text
朗讀的內容
SpeechSynthesisUtterance.voice
朗讀時所使用的言語，需要傳入 SpeechSynthesisVoice 即是 speechSynthesis.getVoices() 陣列中的元素
但不同瀏覽器提供的言語會有不同
SpeechSynthesisUtterance.volume
朗讀音量，數值為 0 至 1 ， 1 是預設音量，在下認為 0.2 音量已經好足夠
SpeechSynthesisUtterance.rate
朗讀速度，數值為 0.1 至 10 ， 1 是預設速度
SpeechSynthesisUtterance.pitch
朗讀頻率，數值為 0 至 2 ， 1 是預設頻率

var utterance = new window.SpeechSynthesisUtterance();
utterance.text = "hello, world.";
utterance.voice = voices[0];
utterance.volume = 0.2;
utterance.rate = 1;
utterance.pitch = 1;

建立 SpeechSynthesisUtterance 後，便讓 SpeechSynthesis 朗讀出來

speechSynthesis.speak(utterance);

在下的測試
亦歡迎使用這個測試

Javascript

function loadVoices(speechSynthesis, select){
    if (voices.length == 0){
        window.setTimeout(function() {
            voices = speechSynthesis.getVoices();
            voices.sort(function(voice1, voice2){
                return voice1.name.localeCompare(voice2.name);
            });
            while (select.firstChild){
                select.removeChild(select.firstChild);
            }
            for (var i in voices){
                var option = document.createElement("option");
                option.value = voices[i].lang;
                option.textContent = voices[i].name + " (" + voices[i].lang + ")";
                select.appendChild(option);
            }
        }, 100);
    }
}
 
function start(){
    stop();
    var utterance = new window.SpeechSynthesisUtterance();
    utterance.text = document.getElementById("text").value;
    utterance.voice = voices[document.getElementById("voice").selectedIndex];
    utterance.volume = document.getElementById("volume").value;
    utterance.rate = document.getElementById("rate").value;
    speechSynthesis.speak(utterance);
}
 
function stop(){
    speechSynthesis.stop();
}

function change(){
    document.getElementById("showVolume").value = document.getElementById("volume").value;
    document.getElementById("showRate").value = document.getElementById("rate").value;
    document.getElementById("showPitch").value = document.getElementById("pitch").value;
}
 
window.addEventListener("load", function(){
    var select = document.getElementById("voice");
    loadVoices(speechSynthesis, select);
    speechSynthesis.addEventListener("voiceschanged", function(){
        loadVoices(speechSynthesis, select);
    });
});
 
var voices = [];
var speechSynthesis = window.speechSynthesis;

HTML

<table border="1" width="95%">
    <colgroup>
        <col width="40"/>
    </colgroup>
    <tbody>
        <tr valign="top"><td>言語</td><td><select id="voice">
            <option></option>
        </select></td></tr>
        <tr valign="top"><td>文字</td><td><textarea id="text" rows="4" cols="40">hello, world. 你好，世界。</textarea></td></tr>
        <tr valign="top"><td>音量</td><td><input id="volume" type="range" min="0" max="1" step="0.01" value="0.2" style="width: 400px;" oninput="change();"/><input id="showVolume" type="text" value="0.2" style="width: 30px;" readonly="true"/></td></tr>
        <tr valign="top"><td>速度</td><td><input id="rate" type="range" min="0.1" max="10" step="0.1" value="1" style="width: 400px;" oninput="change();"/><input id="showRate" type="text" value="1" style="width: 30px;" readonly="true"/></td></tr>
        <tr valign="top"><td>頻率</td><td><input id="pitch" type="range" min="0" max="2" step="0.1" value="1" style="width: 400px;" oninput="change();"/><input id="showPitch" type="text" value="1" style="width: 30px;" readonly="true"/></td></tr>
        <tr valign="top"><td colspan="2"><input type="button" value="重新播放" onclick="start();"/><input type="button" value="停止播放" onclick="stop();"/></td></tr>
    </tbody>
</table>