33import threading
44from enum import Enum
55
6+ import av
67from libcamera import controls
78
89import picamera2 .formats as formats
@@ -25,7 +26,25 @@ class Quality(Enum):
2526
2627
2728class Encoder :
28- """Base class for encoders"""
29+ """
30+ Base class for encoders.
31+
32+ Mostly this defines the API for derived encoder classes, but it also handles optional audio encoding.
33+ For audio, a separate thread is started, which encodes audio packets and forwards them to the
34+ encoder's output object(s). This only work when the output object understands the audio stream,
35+ meaning that (at the time of writing) this must be a PyavOutput (though you could send output there
36+ via a CircularOutput2).
37+
38+ Additional audio parameters:
39+ audio - set to True to enable audio encoding and output.
40+ audio_input - list of parameters that is passed to PyAv.open to create the audio input.
41+ audio_output - list of parameters passed to PyAv add_stream to define the audio codec and output stream.
42+ audio_sync - value (in us) by which to advance the audio stream to better sync with the video.
43+
44+ Reasonable defaults are supplied so that applications can often just set the audio property to True.
45+ The audio_input and audio_output parameters are passed directly to PyAV, so will accept whatever PyAV
46+ understands.
47+ """
2948
3049 def __init__ (self ):
3150 """Initialises encoder"""
@@ -40,6 +59,15 @@ def __init__(self):
4059 self .firsttimestamp = None
4160 self .frame_skip_count = 1
4261 self ._skip_count = 0
62+ self ._output_lock = threading .Lock ()
63+ # Set to True to enable audio.
64+ self .audio = False
65+ # These parameters are passed to Pyav to open the input audio container.
66+ self .audio_input = {'file' : 'default' , 'format' : 'pulse' }
67+ # THese parameters are passed to Pyav for creating the encoded audio output stream.
68+ self .audio_output = {'codec_name' : 'aac' }
69+ self .audio_sync = - 100000 # in us, so by default, delay audio by 100ms
70+ self ._audio_start = threading .Event ()
4371
4472 @property
4573 def running (self ):
@@ -208,6 +236,8 @@ def encode(self, stream, request):
208236 :param request: Request
209237 :type request: request
210238 """
239+ if self .audio :
240+ self ._audio_start .set () # Signal the audio encode thread to start.
211241 if self ._skip_count == 0 :
212242 with self ._lock :
213243 self ._encode (stream , request )
@@ -226,10 +256,24 @@ def start(self, quality=None):
226256 raise RuntimeError ("Encoder already running" )
227257 self ._setup (quality )
228258 self ._running = True
259+ self .firsttimestamp = None
229260 for out in self ._output :
230261 out .start ()
231262 self ._start ()
232263
264+ # Start the audio, if that's been requested.
265+ if self .audio :
266+ self ._audio_input_container = av .open (** self .audio_input )
267+ self ._audio_input_stream = self ._audio_input_container .streams .get (audio = 0 )[0 ]
268+ self ._audio_output_container = av .open ("/dev/null" , 'w' , format = "null" )
269+ self ._audio_output_stream = self ._audio_output_container .add_stream (** self .audio_output )
270+ # Outputs that can handle audio need to be told about its existence.
271+ for out in self ._output :
272+ out ._add_stream (self ._audio_output_stream , ** self .audio_output )
273+ self ._audio_thread = threading .Thread (target = self ._audio_thread_func , daemon = True )
274+ self ._audio_start .clear ()
275+ self ._audio_thread .start () # audio thread will wait for the _audio_start event.
276+
233277 def _start (self ):
234278 pass
235279
@@ -239,26 +283,28 @@ def stop(self):
239283 raise RuntimeError ("Encoder already stopped" )
240284 self ._running = False
241285 self ._stop ()
286+ if self .audio :
287+ self ._audio_start .set () # just in case it wasn't!
288+ self ._audio_thread .join ()
289+ self ._audio_input_container .close ()
290+ self ._audio_output_container .close ()
242291 for out in self ._output :
243292 out .stop ()
244293
245294 def _stop (self ):
246295 pass
247296
248- def outputframe (self , frame , keyframe = True , timestamp = None , packet = None ):
297+ def outputframe (self , frame , keyframe = True , timestamp = None , packet = None , audio = False ):
249298 """Writes a frame
250299
251300 :param frame: Frame
252301 :type frame: bytes
253302 :param keyframe: Whether frame is a keyframe or not, defaults to True
254303 :type keyframe: bool, optional
255304 """
256- if packet :
257- for out in self ._output :
258- out .outputframe (frame , keyframe , timestamp , packet )
259- else :
305+ with self ._output_lock :
260306 for out in self ._output :
261- out .outputframe (frame , keyframe , timestamp )
307+ out .outputframe (frame , keyframe , timestamp , packet , audio )
262308
263309 def _setup (self , quality ):
264310 pass
@@ -272,3 +318,33 @@ def _timestamp(self, request):
272318 else :
273319 timestamp_us = ts - self .firsttimestamp
274320 return timestamp_us
321+
322+ def _handle_audio_packet (self , audio_packet ):
323+ # Write out audio an packet, dealing with timestamp adjustments.
324+ time_scale_factor = 1000000 * self ._audio_output_stream .codec_context .time_base
325+ delta = int (self .audio_sync / time_scale_factor ) # convert to audio time base
326+ audio_packet .pts -= delta
327+ audio_packet .dts -= delta
328+ timestamp = int (audio_packet .pts * time_scale_factor ) # want this in us
329+ if audio_packet .pts >= 0 :
330+ self .outputframe (None , True , timestamp , audio_packet , True )
331+
332+ def _audio_thread_func (self ):
333+ # Audio thread that fetches audio packets, encodes them and forwards them to the output.
334+ # The output has to be able to understand audio, which means using a PyavOutput.
335+ # _audio_start gets signalled when the first video frame is submitted for encode, which will hopefully
336+ # keep the audio_sync adjustment more similar across different devices. Until that happens, though,
337+ # we must keep consuming and discarding the audio.
338+ for _ in self ._audio_input_container .decode (self ._audio_input_stream ):
339+ if self ._audio_start .isSet ():
340+ break
341+
342+ for audio_frame in self ._audio_input_container .decode (self ._audio_input_stream ):
343+ if not self ._running :
344+ break
345+ for audio_packet in self ._audio_output_stream .encode (audio_frame ):
346+ self ._handle_audio_packet (audio_packet )
347+
348+ # Flush out any remaining audio packets.
349+ for audio_packet in self ._audio_output_stream .encode (None ):
350+ self ._handle_audio_packet (audio_packet )
0 commit comments