这段代码是有效的:
Javascript源代码:
https://gist.github.com/korakot/c21c3476c024ad6d56d5f48b0bca92be#file-record-py
from IPython.display import Javascript
from google.colab import output
from base64 import b64decode
from io import BytesIO
!pip -q install pydub
from pydub import AudioSegment
!pip install -q ffmpeg-python
import ffmpeg
RECORD = """
const sleep = time => new Promise(resolve => setTimeout(resolve, time))
const b2text = blob => new Promise((resolve, reject) => {
const reader = new FileReader()
reader.onloadend = e => resolve(e.target.result)
reader.onerror = e => reject(new Error("Failed to read blob"))
reader.readAsDataURL(blob)
})
var recordUntilSilence = time => new Promise(async (resolve, reject) => {
let stream, recorder, chunks, blob, text, audioContext, analyser, dataArr, silenceStart, threshold = 50, silenceDelay = 2000
try {
stream = await navigator.mediaDevices.getUserMedia({ audio: true })
} catch (err) {
return reject(new Error("Failed to get media stream"))
}
audioContext = new AudioContext()
const source = audioContext.createMediaStreamSource(stream)
analyser = audioContext.createAnalyser()
analyser.fftSize = 512
dataArr = new Uint8Array(analyser.frequencyBinCount)
source.connect(analyser)
recorder = new MediaRecorder(stream)
chunks = []
recorder.ondataavailable = e => chunks.push(e.data)
recorder.onstop = async () => {
blob = new Blob(chunks)
try {
text = await b2text(blob)
resolve(text)
} catch (err) {
reject(new Error("Failed to convert blob to text"))
}
}
recorder.onerror = e => reject(new Error("Recorder error"))
recorder.start()
const checkSilence = () => {
analyser.getByteFrequencyData(dataArr)
const avg = dataArr.reduce((p, c) => p + c, 0) / dataArr.length
if (avg < threshold) {
if (silenceStart === null) silenceStart = new Date().getTime()
else if (new Date().getTime() - silenceStart > silenceDelay) {
recorder.stop()
audioContext.close()
return
}
} else {
silenceStart = null
}
requestAnimationFrame(checkSilence)
}
silenceStart = null
checkSilence()
})
console.log("JavaScript code executed successfully.")
"""
def fix_riff_header(binary):
process = (ffmpeg
.input('pipe:0')
.output('pipe:1', format='wav')
.run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)
)
output, err = process.communicate(input=binary)
riff_chunk_size = len(output) - 8
q = riff_chunk_size
b = []
for i in range(4):
q, r = divmod(q, 256)
b.append(r)
riff = output[:4] + bytes(b) + output[8:]
return riff
Global_user_response = 1
def record_until_silence():
display(Javascript(RECORD))
print("Listening...")
time.sleep(1)
s = output.eval_js('recordUntilSilence()')
print("Done Listening !")
print("s=",s)
b = b64decode(s.split(',')[1])
b = fix_riff_header(b)
print("b=",b)
global Global_user_response
filename="User_Response_"+str(Global_user_response)+".wav"
Global_user_response = Global_user_response+1
with open(filename,'wb') as f:
f.write(b)
r = sr.Recognizer()
try:
detection = sr.AudioFile(filename)
with detection as source:
audio = r.record(source)
word = r.recognize_google(audio, language='en-US')
return word
except Exception as e:
print(f"An error occurred: {e}")
return None
record_until_silence()
这段代码使用JavaScript来启用麦克风,然后监听并将其存储为临时wav文件,然后使用Google Transcribe将其转换为文本。修复riff头部是为了解决wav文件头部问题。截至10/23,这段代码对我来说是有效的。