AI 1—Intelligent Speech Technology
ICT | STEAM | |
Week 1 | PyCharm, Python script demonstrates how to run simple online speech recognition and generation using recognize_google and edge_tss. Syntax: import, print, function, parameters. | Introduce the Unihiker K10, demonstrate offline speech recognition and synthesis (Mind+ programming), and control the lights. P.S.: Remind students to register a Xiaozhi account.
|
Week 2 | Offline speech recognition and generation (FFMPEG and model pre-installed), using whisper and pyttsx3 Syntax: data type, input, comment | Laser cutting basics instruction, shape design (related to community elderly care scenarios), and drawing (preparing basic shapes). |
Week 3 | Voice chat, different text outputs correspond to different voice messages. Syntax: if, elif, else | Installation and testing: install the Xiaozhi firmware (guidebook, students can flash it themselves), and change the smart agent settings in the Xiaozhi backend. |
Week 4 | Presentation PPT, Poster | Presentation, Peer assessment |
ICT代码
# -----------------------------------
# 语音合成(在线)
# pip install edge_tts
import edge_tts
import os
text = "不用第三方库也能播放语音"
voice = "zh-CN-XiaoxiaoNeural"
file = "edge_tts.mp3"
comm = edge_tts.Communicate(text,voice)
comm.save_sync(file)
os.startfile(file)
# -----------------------------------
# 语音合成(离线)
# pip install pyttsx3 pywin32
# import pyttsx3
# e = pyttsx3.init()
# txt = "离线语音合成测试"
# e.say(txt)
# e.save_to_file(txt,"pyttsx3.wav")
# e.runAndWait()
# --------------------------------------
# 语音识别(在线)
# pip install speech_recognition, pyaudio
# import speech_recognition as sr
# r = sr.Recognizer()
# mic = sr.Microphone()
# print("开始录音,停止1秒后识别结果")
# mic.__enter__()
# audio = r.listen(mic)
# text = r.recognize_google(audio)
# print("Result ", text)
# --------------------------------------
# 语音识别(离线)
# pip install speech_recognition SpeechRecognition[faster-whisper]
# 下载ffmpeg, https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-full-shared.7z
# 解压后把 bin/ffmpeg.exe 放到 C:\Windows\System32 里就行
# import speech_recognition as sr
# r = sr.Recognizer()
# mic = sr.Microphone()
# print("开始录音,停止1秒后识别结果")
# mic.__enter__()
# audio = r.listen(mic)
# text = r.recognize_whisper(audio)
# print("Result ", text)
STEAM代码
import asr
import time
#asr.ASR_MODE_SINGLE 单次
#asr.ASR_MODE_CONTINUOUS 连续
#asr.init_asr(6000,asr.ASR_MODE_SINGLE)#配置检测时间和识别模式
asr.init_asr()
def cb(data):
if data == 1:
asr.add_tts_data("灯光已打开")
else:
asr.add_tts_data("灯光已关闭")
print("ID:", data)
#注册识别处理回调函数
asr.set_asr_callback(cb)
#启动语音识别
asr.start_asr()
#启动语音合成
asr.start_tts()
#设置命令词条
asr.add_asr_command(1,"kai deng")
asr.add_asr_command(2,"guan deng")
print("请说‘你好,小鑫’唤醒")
try:
while(True):
time.sleep(1)
except KeyboardInterrupt:
print("\n捕获到 Ctrl+C 中断!")
#释放语音识别资源
asr.free_asr()本地服务器
K10
# MindPlus
# DFRobot, 行空板 K10
from k10_base import WiFi
from unihiker_k10 import speaker,mic
import urequests # 必须加这个
# 主程序开始
wifi = WiFi()
wifi.connect(ssid="Oldmoon Pura 80 Pro",psd="98765431",timeout=50000)
while not (wifi.status()):
pass
print(eval(wifi.info())[0])
#print(dir(speaker))
# 语音合成
res = urequests.get("http://192.168.43.8:8000/tts")
f = open("/mindplus/tts_out.wav","wb")
f.write(res.content)
speaker.play_sys_music("tts_out.wav")
# 语音识别
mic.recode_sys(name="sound.wav",time=5)
f = open("/mindplus/sound.wav", "rb")
data = f.read()
res = urequests.post(
"http://192.168.43.8:8000/asr",
data=data # 直接发二进制
)
print("识别结果:", res.text)Fastapi
import edge_tts,io
import pyttsx3
import speech_recognition as sr
#K10只能录放wav,所以edge_tts默认是MP3不行
@app.get("/tts")
async def tts(txt: str = "不用第三方库也能播放语音"):
e = pyttsx3.init()
e.save_to_file(txt,"pyttsx3.wav")
e.runAndWait()
return FileResponse("pyttsx3.wav", media_type="audio/wav")
# buf = io.BytesIO()
# async for d in edge_tts.Communicate(txt,"zh-HK-HiuGaaiNeural").stream():
# if d["type"]=="audio":buf.write(d["data"])
# buf.seek(0)
# return StreamingResponse(buf,media_type="audio/mpeg")
# K10只能上传二进制,不能用传统文件上传
@app.post('/asr')
async def asr(request: Request):
r = sr.Recognizer()
try:
# 读取K10发来的音频二进制
audio_data = await request.body()
audio_file = io.BytesIO(audio_data)
with sr.AudioFile(audio_file) as source:
audio = r.record(source)
text = r.recognize_google(audio,language="zh-CN") #中文要定language参数
return text
except Exception as e:
return f"识别失败:{type(e).__name__} {str(e)}"