@@ -77,7 +77,7 @@ impl State {
7777 ///
7878 /// ### Deviation
7979 ///
80- /// If a long running process returns the 'abort' status after receiving the data, it will be removed similarly to how `git` does it.
80+ /// If a long- running process returns the 'abort' status after receiving the data, it will be removed similarly to how `git` does it.
8181 /// However, if it returns an unsuccessful error status later, it will not be removed, but reports the error only.
8282 /// If any other non-'error' status is received, the process will be stopped. But that doesn't happen if such a status is received
8383 /// after reading the filtered result.
@@ -97,9 +97,9 @@ impl State {
9797 }
9898 }
9999
100- /// Like [`apply()]`[ Self::apply()] , but use `delay` to determine if the filter result may be delayed or not.
100+ /// Like [`apply()`]( Self::apply()) , but use `delay` to determine if the filter result may be delayed or not.
101101 ///
102- /// Poll [`list_delayed_paths()`][ Self::list_delayed_paths()] until it is empty and query the available paths again.
102+ /// Poll [`list_delayed_paths()`]( Self::list_delayed_paths()) until it is empty and query the available paths again.
103103 /// Note that even though it's possible, the API assumes that commands aren't mixed when delays are allowed.
104104 pub fn apply_delayed < ' a > (
105105 & ' a mut self ,
@@ -111,10 +111,24 @@ impl State {
111111 ) -> Result < Option < MaybeDelayed < ' a > > , Error > {
112112 match self . maybe_launch_process ( driver, operation, ctx. rela_path ) ? {
113113 Some ( Process :: SingleFile { mut child, command } ) => {
114- std:: io:: copy ( src, & mut child. stdin . take ( ) . expect ( "configured" ) ) ?;
114+ // To avoid deadlock when the filter immediately echoes input to output (like `cat`),
115+ // we need to write to stdin and read from stdout concurrently. If we write all data
116+ // to stdin before reading from stdout, and the pipe buffer fills up, both processes
117+ // will block: the filter blocks writing to stdout (buffer full), and we block writing
118+ // to stdin (waiting for the filter to consume data).
119+ //
120+ // Solution: Read all data into a buffer, then spawn a thread to write it to stdin
121+ // while we can immediately read from stdout.
122+ let mut input_data = Vec :: new ( ) ;
123+ std:: io:: copy ( src, & mut input_data) ?;
124+
125+ let stdin = child. stdin . take ( ) . expect ( "configured" ) ;
126+ let write_thread = WriterThread :: write_all_in_background ( input_data, stdin) ?;
127+
115128 Ok ( Some ( MaybeDelayed :: Immediate ( Box :: new ( ReadFilterOutput {
116129 inner : child. stdout . take ( ) ,
117130 child : driver. required . then_some ( ( child, command) ) ,
131+ write_thread : Some ( write_thread) ,
118132 } ) ) ) )
119133 }
120134 Some ( Process :: MultiFile { client, key } ) => {
@@ -202,11 +216,62 @@ pub enum MaybeDelayed<'a> {
202216 Immediate ( Box < dyn std:: io:: Read + ' a > ) ,
203217}
204218
219+ /// A helper to manage writing to stdin on a separate thread to avoid deadlock.
220+ struct WriterThread {
221+ handle : Option < std:: thread:: JoinHandle < std:: io:: Result < ( ) > > > ,
222+ }
223+
224+ impl WriterThread {
225+ /// Spawn a thread that will write all data from `data` to `stdin`.
226+ fn write_all_in_background ( data : Vec < u8 > , mut stdin : std:: process:: ChildStdin ) -> std:: io:: Result < Self > {
227+ let handle = std:: thread:: Builder :: new ( )
228+ . name ( "gix-filter-stdin-writer" . into ( ) )
229+ . stack_size ( 128 * 1024 )
230+ . spawn ( move || {
231+ use std:: io:: Write ;
232+ stdin. write_all ( & data) ?;
233+ // Explicitly drop stdin to close the pipe and signal EOF to the child
234+ drop ( stdin) ;
235+ Ok ( ( ) )
236+ } ) ?;
237+
238+ Ok ( Self { handle : Some ( handle) } )
239+ }
240+
241+ /// Wait for the writer thread to complete and return any error that occurred.
242+ fn join ( & mut self ) -> std:: io:: Result < ( ) > {
243+ let Some ( handle) = self . handle . take ( ) else {
244+ return Ok ( ( ) ) ;
245+ } ;
246+ handle. join ( ) . map_err ( |panic_info| {
247+ let msg = if let Some ( s) = panic_info. downcast_ref :: < String > ( ) {
248+ format ! ( "Writer thread panicked: {s}" )
249+ } else if let Some ( s) = panic_info. downcast_ref :: < & str > ( ) {
250+ format ! ( "Writer thread panicked: {s}" )
251+ } else {
252+ "Writer thread panicked while writing to filter stdin" . to_string ( )
253+ } ;
254+ std:: io:: Error :: other ( msg)
255+ } ) ?
256+ }
257+ }
258+
259+ impl Drop for WriterThread {
260+ fn drop ( & mut self ) {
261+ // Best effort join on drop.
262+ if let Err ( _err) = self . join ( ) {
263+ gix_trace:: debug!( err = %_err, "Failed to join writer thread during drop" ) ;
264+ }
265+ }
266+ }
267+
205268/// A utility type to facilitate streaming the output of a filter process.
206269struct ReadFilterOutput {
207270 inner : Option < std:: process:: ChildStdout > ,
208271 /// The child is present if we need its exit code to be positive.
209272 child : Option < ( std:: process:: Child , std:: process:: Command ) > ,
273+ /// The thread writing to stdin, if any. Must be joined when reading is done.
274+ write_thread : Option < WriterThread > ,
210275}
211276
212277pub ( crate ) fn handle_io_err ( err : & std:: io:: Error , running : & mut HashMap < BString , process:: Client > , process : & BStr ) {
@@ -222,9 +287,31 @@ impl std::io::Read for ReadFilterOutput {
222287 fn read ( & mut self , buf : & mut [ u8 ] ) -> std:: io:: Result < usize > {
223288 match self . inner . as_mut ( ) {
224289 Some ( inner) => {
225- let num_read = inner. read ( buf) ?;
290+ let num_read = match inner. read ( buf) {
291+ Ok ( n) => n,
292+ Err ( e) => {
293+ // On read error, ensure we join the writer thread before propagating the error.
294+ // This is expected to finish with failure as well as it should fail to write
295+ // to the process which now fails to produce output (that we try to read).
296+ if let Some ( mut write_thread) = self . write_thread . take ( ) {
297+ // Try to join but prioritize the original read error
298+ if let Err ( _thread_err) = write_thread. join ( ) {
299+ gix_trace:: debug!( thread_err = %_thread_err, read_err = %e, "write to stdin error during failed read" ) ;
300+ }
301+ }
302+ return Err ( e) ;
303+ }
304+ } ;
305+
226306 if num_read == 0 {
227307 self . inner . take ( ) ;
308+
309+ // Join the writer thread first to ensure all data has been written
310+ // and that resources are freed now.
311+ if let Some ( mut write_thread) = self . write_thread . take ( ) {
312+ write_thread. join ( ) ?;
313+ }
314+
228315 if let Some ( ( mut child, cmd) ) = self . child . take ( ) {
229316 let status = child. wait ( ) ?;
230317 if !status. success ( ) {
0 commit comments