使用CoLAB时出现OSError:找不到默认输入设备

4

无法在CoLAB中使用SpeechRecognition库。

代码:

import speech_recognition as sr
r = sr.Recognizer()

with sr.Microphone() as source:
    print("Speak Anything: ")
    audio = r.listen(source)

    try:
        text = r.recognize_google(audio)
        print("You said: {}".format(text))
    except:
        print('Wrong')

我使用pip安装了以下内容:

!apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg
!pip install pyaudio
!pip install ConfigParser
!apt-get install python-gnuradio-audio-portaudio
!python -m pip install pyaudio
!pip install SpeechRecognition

错误展示:

---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-34-65e65421018d> in <module>()
      4 r = sr.Recognizer()
      5 
----> 6 with sr.Microphone() as source:
      7     print("Speak Anything: ")
      8     audio = r.listen(source)

1 frames
/usr/local/lib/python3.6/dist-packages/pyaudio.py in get_default_input_device_info(self)
    947         """
    948 
--> 949         device_index = pa.get_default_input_device()
    950         return self.get_device_info_by_index(device_index)
    951 

OSError: No Default Input Device Available
-------------------------------------------------------------------------

提供信息,我正在使用谷歌CoLAB。 --提前感谢!


这个回答解决了你的问题吗?“OSError: No Default Input Device Available” on Google Colab - undefined
1个回答

0
这段代码是有效的:
Javascript源代码:https://gist.github.com/korakot/c21c3476c024ad6d56d5f48b0bca92be#file-record-py
# all imports
from IPython.display import Javascript
from google.colab import output
from base64 import b64decode
from io import BytesIO
!pip -q install pydub
from pydub import AudioSegment
!pip install -q ffmpeg-python
import ffmpeg

RECORD = """
const sleep = time => new Promise(resolve => setTimeout(resolve, time))
const b2text = blob => new Promise((resolve, reject) => {
  const reader = new FileReader()
  reader.onloadend = e => resolve(e.target.result)
  reader.onerror = e => reject(new Error("Failed to read blob"))
  reader.readAsDataURL(blob)
})
var recordUntilSilence = time => new Promise(async (resolve, reject) => {
  let stream, recorder, chunks, blob, text, audioContext, analyser, dataArr, silenceStart, threshold = 50, silenceDelay = 2000
  try {
    stream = await navigator.mediaDevices.getUserMedia({ audio: true })
  } catch (err) {
    return reject(new Error("Failed to get media stream"))
  }
  audioContext = new AudioContext()
  const source = audioContext.createMediaStreamSource(stream)
  analyser = audioContext.createAnalyser()
  analyser.fftSize = 512
  dataArr = new Uint8Array(analyser.frequencyBinCount)
  source.connect(analyser)
  recorder = new MediaRecorder(stream)
  chunks = []
  recorder.ondataavailable = e => chunks.push(e.data)
  recorder.onstop = async () => {
    blob = new Blob(chunks)
    try {
      text = await b2text(blob)
      resolve(text)
    } catch (err) {
      reject(new Error("Failed to convert blob to text"))
    }
  }
  recorder.onerror = e => reject(new Error("Recorder error"))
  recorder.start()
  const checkSilence = () => {
    analyser.getByteFrequencyData(dataArr)
    const avg = dataArr.reduce((p, c) => p + c, 0) / dataArr.length

    if (avg < threshold) {
      if (silenceStart === null) silenceStart = new Date().getTime()
      else if (new Date().getTime() - silenceStart > silenceDelay) {
        recorder.stop()
        audioContext.close()
        return
      }
    } else {
      silenceStart = null
    }
    requestAnimationFrame(checkSilence)
  }
  silenceStart = null
  checkSilence()
})
console.log("JavaScript code executed successfully.")
"""

def fix_riff_header(binary):
  process = (ffmpeg
    .input('pipe:0')
    .output('pipe:1', format='wav')
    .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)
  )
  output, err = process.communicate(input=binary)
  
  riff_chunk_size = len(output) - 8
  # Break up the chunk size into four bytes, held in b.
  q = riff_chunk_size
  b = []
  for i in range(4):
      q, r = divmod(q, 256)
      b.append(r)

  # Replace bytes 4:8 in proc.stdout with the actual size of the RIFF chunk.
  riff = output[:4] + bytes(b) + output[8:]
  return riff

Global_user_response = 1

def record_until_silence():
  display(Javascript(RECORD))
  print("Listening...")
  time.sleep(1)
  s = output.eval_js('recordUntilSilence()')
  print("Done Listening !")
  print("s=",s)
  b = b64decode(s.split(',')[1])
  b = fix_riff_header(b)
  print("b=",b)
  global Global_user_response
  filename="User_Response_"+str(Global_user_response)+".wav"
  Global_user_response = Global_user_response+1
  with open(filename,'wb') as f:
    f.write(b)
  r = sr.Recognizer()
  try:
    detection = sr.AudioFile(filename)

    with detection as source:
        audio = r.record(source)

    word = r.recognize_google(audio, language='en-US')
    # print(f"Chunk {i+1} transcription: {word}")

    return word
  except Exception as e:
    print(f"An error occurred: {e}")
    return None
    
record_until_silence()

这段代码使用JavaScript来启用麦克风,然后监听并将其存储为临时wav文件,然后使用Google Transcribe将其转换为文本。修复riff头部是为了解决wav文件头部问题。截至10/23,这段代码对我来说是有效的。

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接