Skip to content

Commit 1786285

Browse files
committed
Audio/video sync improvements for PyavOutput and CircularOutput2
Plus a few other miscellaneous fixes, and also a new streaming example. Signed-off-by: David Plowman <david.plowman@raspberrypi.com>
1 parent 3d9cd3c commit 1786285

File tree

10 files changed

+180
-33
lines changed

10 files changed

+180
-33
lines changed

examples/pyav_stream2.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/usr/bin/python3
2+
3+
import socket
4+
from threading import Event
5+
6+
from picamera2 import Picamera2
7+
from picamera2.encoders import H264Encoder
8+
from picamera2.outputs import PyavOutput
9+
10+
picam2 = Picamera2()
11+
video_config = picam2.create_video_configuration({"size": (1280, 720), 'format': 'YUV420'})
12+
picam2.configure(video_config)
13+
14+
encoder = H264Encoder(bitrate=10000000)
15+
encoder.audio = True
16+
17+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
18+
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
19+
sock.bind(("0.0.0.0", 8888))
20+
21+
while True:
22+
print("Waiting")
23+
sock.listen()
24+
25+
conn, addr = sock.accept()
26+
print("Connected")
27+
28+
output = PyavOutput(f"pipe:{conn.fileno()}", format="mpegts")
29+
event = Event()
30+
output.error_callback = lambda e: event.set() # noqa
31+
32+
picam2.start_recording(encoder, output)
33+
34+
event.wait()
35+
print("Disconnected")
36+
37+
picam2.stop_recording()

picamera2/encoders/encoder.py

Lines changed: 83 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import threading
44
from enum import Enum
55

6+
import av
67
from libcamera import controls
78

89
import picamera2.formats as formats
@@ -25,7 +26,25 @@ class Quality(Enum):
2526

2627

2728
class Encoder:
28-
"""Base class for encoders"""
29+
"""
30+
Base class for encoders.
31+
32+
Mostly this defines the API for derived encoder classes, but it also handles optional audio encoding.
33+
For audio, a separate thread is started, which encodes audio packets and forwards them to the
34+
encoder's output object(s). This only work when the output object understands the audio stream,
35+
meaning that (at the time of writing) this must be a PyavOutput (though you could send output there
36+
via a CircularOutput2).
37+
38+
Additional audio parameters:
39+
audio - set to True to enable audio encoding and output.
40+
audio_input - list of parameters that is passed to PyAv.open to create the audio input.
41+
audio_output - list of parameters passed to PyAv add_stream to define the audio codec and output stream.
42+
audio_sync - value (in us) by which to advance the audio stream to better sync with the video.
43+
44+
Reasonable defaults are supplied so that applications can often just set the audio property to True.
45+
The audio_input and audio_output parameters are passed directly to PyAV, so will accept whatever PyAV
46+
understands.
47+
"""
2948

3049
def __init__(self):
3150
"""Initialises encoder"""
@@ -40,6 +59,15 @@ def __init__(self):
4059
self.firsttimestamp = None
4160
self.frame_skip_count = 1
4261
self._skip_count = 0
62+
self._output_lock = threading.Lock()
63+
# Set to True to enable audio.
64+
self.audio = False
65+
# These parameters are passed to Pyav to open the input audio container.
66+
self.audio_input = {'file': 'default', 'format': 'pulse'}
67+
# THese parameters are passed to Pyav for creating the encoded audio output stream.
68+
self.audio_output = {'codec_name': 'aac'}
69+
self.audio_sync = -100000 # in us, so by default, delay audio by 100ms
70+
self._audio_start = threading.Event()
4371

4472
@property
4573
def running(self):
@@ -208,6 +236,8 @@ def encode(self, stream, request):
208236
:param request: Request
209237
:type request: request
210238
"""
239+
if self.audio:
240+
self._audio_start.set() # Signal the audio encode thread to start.
211241
if self._skip_count == 0:
212242
with self._lock:
213243
self._encode(stream, request)
@@ -226,10 +256,24 @@ def start(self, quality=None):
226256
raise RuntimeError("Encoder already running")
227257
self._setup(quality)
228258
self._running = True
259+
self.firsttimestamp = None
229260
for out in self._output:
230261
out.start()
231262
self._start()
232263

264+
# Start the audio, if that's been requested.
265+
if self.audio:
266+
self._audio_input_container = av.open(**self.audio_input)
267+
self._audio_input_stream = self._audio_input_container.streams.get(audio=0)[0]
268+
self._audio_output_container = av.open("/dev/null", 'w', format="null")
269+
self._audio_output_stream = self._audio_output_container.add_stream(**self.audio_output)
270+
# Outputs that can handle audio need to be told about its existence.
271+
for out in self._output:
272+
out._add_stream(self._audio_output_stream, **self.audio_output)
273+
self._audio_thread = threading.Thread(target=self._audio_thread_func, daemon=True)
274+
self._audio_start.clear()
275+
self._audio_thread.start() # audio thread will wait for the _audio_start event.
276+
233277
def _start(self):
234278
pass
235279

@@ -239,26 +283,28 @@ def stop(self):
239283
raise RuntimeError("Encoder already stopped")
240284
self._running = False
241285
self._stop()
286+
if self.audio:
287+
self._audio_start.set() # just in case it wasn't!
288+
self._audio_thread.join()
289+
self._audio_input_container.close()
290+
self._audio_output_container.close()
242291
for out in self._output:
243292
out.stop()
244293

245294
def _stop(self):
246295
pass
247296

248-
def outputframe(self, frame, keyframe=True, timestamp=None, packet=None):
297+
def outputframe(self, frame, keyframe=True, timestamp=None, packet=None, audio=False):
249298
"""Writes a frame
250299
251300
:param frame: Frame
252301
:type frame: bytes
253302
:param keyframe: Whether frame is a keyframe or not, defaults to True
254303
:type keyframe: bool, optional
255304
"""
256-
if packet:
257-
for out in self._output:
258-
out.outputframe(frame, keyframe, timestamp, packet)
259-
else:
305+
with self._output_lock:
260306
for out in self._output:
261-
out.outputframe(frame, keyframe, timestamp)
307+
out.outputframe(frame, keyframe, timestamp, packet, audio)
262308

263309
def _setup(self, quality):
264310
pass
@@ -272,3 +318,33 @@ def _timestamp(self, request):
272318
else:
273319
timestamp_us = ts - self.firsttimestamp
274320
return timestamp_us
321+
322+
def _handle_audio_packet(self, audio_packet):
323+
# Write out audio an packet, dealing with timestamp adjustments.
324+
time_scale_factor = 1000000 * self._audio_output_stream.codec_context.time_base
325+
delta = int(self.audio_sync / time_scale_factor) # convert to audio time base
326+
audio_packet.pts -= delta
327+
audio_packet.dts -= delta
328+
timestamp = int(audio_packet.pts * time_scale_factor) # want this in us
329+
if audio_packet.pts >= 0:
330+
self.outputframe(None, True, timestamp, audio_packet, True)
331+
332+
def _audio_thread_func(self):
333+
# Audio thread that fetches audio packets, encodes them and forwards them to the output.
334+
# The output has to be able to understand audio, which means using a PyavOutput.
335+
# _audio_start gets signalled when the first video frame is submitted for encode, which will hopefully
336+
# keep the audio_sync adjustment more similar across different devices. Until that happens, though,
337+
# we must keep consuming and discarding the audio.
338+
for _ in self._audio_input_container.decode(self._audio_input_stream):
339+
if self._audio_start.isSet():
340+
break
341+
342+
for audio_frame in self._audio_input_container.decode(self._audio_input_stream):
343+
if not self._running:
344+
break
345+
for audio_packet in self._audio_output_stream.encode(audio_frame):
346+
self._handle_audio_packet(audio_packet)
347+
348+
# Flush out any remaining audio packets.
349+
for audio_packet in self._audio_output_stream.encode(None):
350+
self._handle_audio_packet(audio_packet)

picamera2/outputs/circularoutput.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def buffersize(self, value):
4040
self._buffersize = value
4141
self._circular = collections.deque(maxlen=value)
4242

43-
def outputframe(self, frame, keyframe=True, timestamp=None, packet=None):
43+
def outputframe(self, frame, keyframe=True, timestamp=None, packet=None, audio=False):
4444
"""Write frame to circular buffer
4545
4646
:param frame: Frame
@@ -50,6 +50,8 @@ def outputframe(self, frame, keyframe=True, timestamp=None, packet=None):
5050
:param timestamp: Timestamp of frame
5151
:type timestamp: int
5252
"""
53+
if audio:
54+
raise RuntimeError("CircularOutput does not support audio")
5355
with self._lock:
5456
if self._buffersize == 0:
5557
return

picamera2/outputs/circularoutput2.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,19 @@
77

88

99
class CircularOutput2(Output):
10-
"""Circular buffer implementation for general outputs
10+
"""
11+
Circular buffer implementation, much like CircularOutput, but for general outputs.
12+
13+
This means it can be used in conjunction with, for example, a PyavOutput to create time-shifted
14+
recordings of both video and audio straight to an mp4 file.
1115
12-
Very like the original CircularOutput, but this version can also be used with a
13-
PyavOutput underneath, so as directly to create mp4 files.
16+
Once the CircularOutput2 has been started, use the open_output method to start start recording
17+
a new output, and use close_output when finished. If the output has not been closed when the
18+
circular buffer is stopped, then the remainder of the buffer will be flush into the output.
1419
"""
1520

16-
def __init__(self, pts=None, buffer_duration_ms=5000, always_output=True):
17-
"""Creates circular buffer for 5s worth of 30fps frames"""
21+
def __init__(self, pts=None, buffer_duration_ms=5000):
22+
"""Create a CircularOutput2."""
1823
super().__init__(pts=pts)
1924
# A note on locking. The lock is principally to protect outputframe, which is called by
2025
# the background encoder thread. Applications are going to call things like open_output,
@@ -27,7 +32,6 @@ def __init__(self, pts=None, buffer_duration_ms=5000, always_output=True):
2732
raise RuntimeError("buffer_duration_ms may not be negative")
2833
self._buffer_duration_ms = buffer_duration_ms
2934
self._circular = collections.deque()
30-
self.always_output = always_output
3135
self._output = None
3236
self._output_available = False
3337
self._streams = []
@@ -44,7 +48,7 @@ def buffer_duration_ms(self, value):
4448
self._buffer_duration_ms = value
4549

4650
def open_output(self, output):
47-
"""Set a new output object"""
51+
"""Open a new output object and start writing to it."""
4852
if self._output:
4953
raise RuntimeError("Underlying output must be closed first")
5054

@@ -72,29 +76,32 @@ def close_output(self):
7276
self._output = None
7377

7478
def _get_frame(self):
79+
# Fetch the next frame to be saved to the underlying output.
7580
if not self._circular:
7681
return
7782
if not self._first_frame:
7883
return self._circular.popleft()
7984
# Must skip ahead to the first I frame if we haven't seen one yet.
8085
while self._circular:
8186
entry = self._circular.popleft()
82-
_, key_frame, _, _ = entry
83-
if key_frame:
87+
_, key_frame, _, _, audio = entry
88+
# If there is audio, all audio frames are likely to be keyframes, so we must ignore them when
89+
# deciding when the streams can resume - only the video counts.
90+
if key_frame and not audio:
8491
self._first_frame = False
8592
return entry
8693

87-
def outputframe(self, frame, keyframe=True, timestamp=None, packet=None):
94+
def outputframe(self, frame, keyframe=True, timestamp=None, packet=None, audio=False):
8895
"""Write frame to circular buffer"""
8996
with self._lock:
9097
if self._buffer_duration_ms == 0 or not self.recording:
9198
return
92-
self._circular.append((frame, keyframe, timestamp, packet))
99+
self._circular.append((frame, keyframe, timestamp, packet, audio))
93100
# Discard any expired buffer entries.
94101
while timestamp - self._circular[0][2] > self._buffer_duration_ms * 1000:
95102
self._circular.popleft()
96103

97-
if self._output_available and self.always_output:
104+
if self._output_available:
98105
# Actually write this to the underlying output.
99106
entry = self._get_frame()
100107
if entry:
@@ -108,7 +115,7 @@ def start(self):
108115
self.recording = True
109116

110117
def stop(self):
111-
"""Close file handle and prevent recording"""
118+
"""Close file handle and stop recording"""
112119
with self._lock:
113120
if not self.recording:
114121
raise RuntimeError("Circular output was not started")
@@ -123,5 +130,7 @@ def stop(self):
123130
self._output.stop()
124131
self._output = None
125132

126-
def _add_stream(self, encoder_stream, codec, **kwargs):
127-
self._streams.append((encoder_stream, codec, kwargs))
133+
def _add_stream(self, encoder_stream, codec_name, **kwargs):
134+
# Notice the PyavOutput of a stream that will be sending it packets to write out. It will need
135+
# to forward these whenever a new underlying output is opened.
136+
self._streams.append((encoder_stream, codec_name, kwargs))

picamera2/outputs/ffmpegoutput.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,9 @@ def stop(self):
9797
# This seems to be necessary to get the subprocess to clean up fully.
9898
gc.collect()
9999

100-
def outputframe(self, frame, keyframe=True, timestamp=None, packet=None):
100+
def outputframe(self, frame, keyframe=True, timestamp=None, packet=None, audio=False):
101+
if audio:
102+
raise RuntimeError("FfmpegOutput does not support audio packets from Picamera2")
101103
if self.recording and self.ffmpeg:
102104
# Handle the case where the FFmpeg prcoess has gone away for reasons of its own.
103105
try:

picamera2/outputs/fileoutput.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def connectiondead(self, _callback):
7272
else:
7373
raise RuntimeError("Must pass callback function or None")
7474

75-
def outputframe(self, frame, keyframe=True, timestamp=None, packet=None):
75+
def outputframe(self, frame, keyframe=True, timestamp=None, packet=None, audio=False):
7676
"""Outputs frame from encoder
7777
7878
:param frame: Frame
@@ -82,6 +82,8 @@ def outputframe(self, frame, keyframe=True, timestamp=None, packet=None):
8282
:param timestamp: Timestamp of frame
8383
:type timestamp: int
8484
"""
85+
if audio:
86+
raise RuntimeError("Fileoutput does not support audio")
8587
if self._fileoutput is not None and self.recording:
8688
if self._firstframe:
8789
if not keyframe:

picamera2/outputs/output.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def stop(self):
2222
"""Stop recording"""
2323
self.recording = False
2424

25-
def outputframe(self, frame, keyframe=True, timestamp=None, packet=None):
25+
def outputframe(self, frame, keyframe=True, timestamp=None, packet=None, audio=False):
2626
"""Outputs frame from encoder
2727
2828
:param frame: Frame

0 commit comments

Comments
 (0)