110 lines
3.7 KiB
Python
110 lines
3.7 KiB
Python
|
import asyncio
|
||
|
import io
|
||
|
|
||
|
import aiortc
|
||
|
import av
|
||
|
import numpy as np
|
||
|
import pyaudio
|
||
|
|
||
|
|
||
|
class AudioInputStreamTrack(aiortc.mediastreams.AudioStreamTrack):
|
||
|
PYAUDIO_TO_AV_FORMAT_MAP = {
|
||
|
pyaudio.paUInt8: 'u8',
|
||
|
pyaudio.paInt16: 's16',
|
||
|
pyaudio.paInt24: 's24',
|
||
|
pyaudio.paInt32: 's32',
|
||
|
pyaudio.paFloat32: 'flt',
|
||
|
}
|
||
|
|
||
|
def __init__(self, audio_format: int = pyaudio.paInt16, rate: int = 16000, channels: int = 1, packet_time: float = 0.020, device_index: int = None):
|
||
|
super().__init__()
|
||
|
|
||
|
self.p = pyaudio.PyAudio()
|
||
|
chunk_size = int(packet_time * rate)
|
||
|
self.stream = self.p.open(format=audio_format,
|
||
|
channels=channels,
|
||
|
rate=rate,
|
||
|
frames_per_buffer=chunk_size,
|
||
|
input=True,
|
||
|
input_device_index=device_index)
|
||
|
self.format = audio_format
|
||
|
self.rate = rate
|
||
|
self.channels = channels
|
||
|
self.packet_time = packet_time
|
||
|
self.chunk_size = chunk_size
|
||
|
self.pts = 0
|
||
|
|
||
|
async def recv(self):
|
||
|
mic_data = self.stream.read(self.chunk_size)
|
||
|
mic_array = np.frombuffer(mic_data, dtype=np.int16)
|
||
|
mic_array = np.expand_dims(mic_array, axis=0)
|
||
|
layout = 'stereo' if self.channels > 1 else 'mono'
|
||
|
frame = av.AudioFrame.from_ndarray(mic_array, format=self.PYAUDIO_TO_AV_FORMAT_MAP[self.format], layout=layout)
|
||
|
frame.rate = self.rate
|
||
|
frame.pts = self.pts
|
||
|
self.pts += frame.samples
|
||
|
|
||
|
return frame
|
||
|
|
||
|
|
||
|
class AudioOutputSpeaker:
|
||
|
def __init__(self, audio_format: int = pyaudio.paInt16, rate: int = 48000, channels: int = 2, packet_time: float = 0.2, device_index: int = None):
|
||
|
|
||
|
chunk_size = int(packet_time * rate)
|
||
|
self.p = pyaudio.PyAudio()
|
||
|
self.buffer = io.BytesIO()
|
||
|
self.channels = channels
|
||
|
self.stream = self.p.open(format=audio_format,
|
||
|
channels=channels,
|
||
|
rate=rate,
|
||
|
frames_per_buffer=chunk_size,
|
||
|
output=True,
|
||
|
output_device_index=device_index,
|
||
|
stream_callback=self.__pyaudio_callback)
|
||
|
self.tracks_and_tasks: list[tuple[aiortc.MediaStreamTrack, asyncio.Task | None]] = []
|
||
|
|
||
|
def __pyaudio_callback(self, in_data, frame_count, time_info, status):
|
||
|
if self.buffer.getbuffer().nbytes < frame_count * self.channels * 2:
|
||
|
buff = b'\x00\x00' * frame_count * self.channels
|
||
|
elif self.buffer.getbuffer().nbytes > 115200: # 3x the usual read size
|
||
|
self.buffer.seek(0)
|
||
|
buff = self.buffer.read(frame_count * self.channels * 4)
|
||
|
buff = buff[:frame_count * self.channels * 2]
|
||
|
self.buffer.seek(2)
|
||
|
else:
|
||
|
self.buffer.seek(0)
|
||
|
buff = self.buffer.read(frame_count * self.channels * 2)
|
||
|
self.buffer.seek(2)
|
||
|
return (buff, pyaudio.paContinue)
|
||
|
|
||
|
async def __consume(self, track):
|
||
|
while True:
|
||
|
try:
|
||
|
frame = await track.recv()
|
||
|
except aiortc.MediaStreamError:
|
||
|
return
|
||
|
|
||
|
self.buffer.write(bytes(frame.planes[0]))
|
||
|
|
||
|
def hasTrack(self, track: aiortc.MediaStreamTrack) -> bool:
|
||
|
return any(t == track for t, _ in self.tracks_and_tasks)
|
||
|
|
||
|
def addTrack(self, track: aiortc.MediaStreamTrack):
|
||
|
if not self.hasTrack(track):
|
||
|
self.tracks_and_tasks.append((track, None))
|
||
|
|
||
|
def start(self):
|
||
|
for index, (track, task) in enumerate(self.tracks_and_tasks):
|
||
|
if task is None:
|
||
|
self.tracks_and_tasks[index] = (track, asyncio.create_task(self.__consume(track)))
|
||
|
|
||
|
def stop(self):
|
||
|
for _, task in self.tracks_and_tasks:
|
||
|
if task is not None:
|
||
|
task.cancel()
|
||
|
|
||
|
self.tracks_and_tasks = []
|
||
|
self.stream.stop_stream()
|
||
|
self.stream.close()
|
||
|
self.p.terminate()
|