Skip to content

Connections not closed when circuit breaker trips #3963

@goughy000

Description

@goughy000

Describe the bug

When circuit breaker activiates, connections are not closed to upstream hosts. This seems to be because in a happy path, the upstream connection is closed when the response body from upstream gets copied to the downstream response and the InputStream is closed which in turn releases the connection from any pool in use (in our case we are using ApacheClient).

When the circuit breaker trips, it instead returns a different GatewayServerResponse with the circuit breaker fallback response in it. There is now a disconnect and nothing ever closes the upstream response when it eventually comes back.

We see this most often when using the timeLimiter configuration in the circuit breaker and the upstream response takes longer than the circuit breaker time limit.

When debugging, it looks like 2 things happen:

  • This line can throw with an exception IllegalStateException: The request object has been recycled and is no longer associated with this facade which then results in the clientResponse and body never getting closed and thus leaking a connection. I think if this method throws then it should itself close() things down that it's not able to return

OR

  • This line eventually succeeds, but the fallback has already executed and returned so this is leaked

Sample

I've put together a minimal sample app that reproduces the issue. If you run this spring app and then run the curl command repeatedly and observe the console output from the sample app

# Repeatedly run this:
curl http://localhost:8080/test

Output:

Image

You'll see that the circuit breaker correctly triggers a few times and allows the responses through when they are fast. However after enough requests then spring-cloud-gateway now just returns a 500 because the connection pool is full, even if the upstream endpoint would have returned quickly

LeakApplication.java

package com.example.leak;

import io.github.resilience4j.circuitbreaker.CircuitBreakerConfig;
import io.github.resilience4j.timelimiter.TimeLimiterConfig;
import org.apache.hc.client5.http.config.RequestConfig;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager;
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManagerBuilder;
import org.apache.hc.core5.util.Timeout;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.cloud.circuitbreaker.resilience4j.Resilience4JCircuitBreakerFactory;
import org.springframework.cloud.circuitbreaker.resilience4j.Resilience4JConfigBuilder;
import org.springframework.cloud.client.circuitbreaker.Customizer;
import org.springframework.context.annotation.Bean;
import org.springframework.http.client.ClientHttpRequestFactory;
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.servlet.config.annotation.EnableWebMvc;
import org.springframework.web.servlet.function.RouterFunction;
import org.springframework.web.servlet.function.ServerResponse;

import java.time.Duration;
import java.time.LocalDateTime;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;

import static org.springframework.cloud.gateway.server.mvc.filter.BeforeFilterFunctions.rewritePath;
import static org.springframework.cloud.gateway.server.mvc.filter.BeforeFilterFunctions.uri;
import static org.springframework.cloud.gateway.server.mvc.filter.CircuitBreakerFilterFunctions.circuitBreaker;
import static org.springframework.cloud.gateway.server.mvc.handler.GatewayRouterFunctions.route;
import static org.springframework.cloud.gateway.server.mvc.handler.HandlerFunctions.http;

@SpringBootApplication
@EnableScheduling
@EnableWebMvc
public class LeakApplication {

    public static void main(String[] args) {
        SpringApplication.run(LeakApplication.class, args);
    }

    // configure a single route /test that forwards to /upstream
    @Bean
    public RouterFunction<ServerResponse> routes(
            @Value("${server.port:8080}") int port
    ) {
        return route("test")
                .GET("/test", http())
                // commenting out the next line prevents the leak from happening
                .filter(circuitBreaker("cb-0"))
                .before(uri("http://localhost:" + port))
                .before(rewritePath("/.*", "/upstream"))
                .build();
    }

    // configure the timelimiter on the circuit breaker
    @Bean
    public Customizer<Resilience4JCircuitBreakerFactory> defaultCustomizer() {
        return factory -> factory.configureDefault(id -> new Resilience4JConfigBuilder(id)
                .timeLimiterConfig(TimeLimiterConfig.custom().timeoutDuration(Duration.ofSeconds(5)).build())
                .circuitBreakerConfig(CircuitBreakerConfig.ofDefaults())
                .build());
    }

    // configure an apache pool that allows max 5 connections total
    @Bean
    public PoolingHttpClientConnectionManager poolingHttpClientConnectionManager() {
        return PoolingHttpClientConnectionManagerBuilder.create().setMaxConnTotal(5).setMaxConnPerRoute(5).build();
    }

    // configure an apache client that will...
    // - wait for up to 3 seconds for a connection from the pool
    // - no retry, just to make this more straightforward
    @Bean
    public ClientHttpRequestFactory gatewayHttpClientFactory(final PoolingHttpClientConnectionManager poolingHttpClientConnectionManager) {
        CloseableHttpClient httpClient = HttpClients
                .custom()
                .disableAutomaticRetries()
                .setConnectionManager(poolingHttpClientConnectionManager)
                .setDefaultRequestConfig(
                        RequestConfig.custom()
                                .setConnectionRequestTimeout(Timeout.of(3L, TimeUnit.SECONDS))
                                .build()
                )
                .build();
        return new HttpComponentsClientHttpRequestFactory(httpClient);
    }

    // register a task that prints out pool stats on the console for monitoring
    @Bean
    public Runnable printStatsTask(PoolingHttpClientConnectionManager poolingHttpClientConnectionManager) {
        return new Runnable() {
            @Override
            @Scheduled(fixedRate = 1000)
            public void run() {
                var stats = poolingHttpClientConnectionManager.getTotalStats();
                var stream = stats.getLeased() == stats.getMax() ? System.err : System.out;
                stream.printf("[%s] %s%n", LocalDateTime.now(), stats);
            }
        };
    }

    // the upstream controller, this is simulating an upstream service that is sometimes
    // responding quickly, and other times responding slower than the circuit breaker timeLimit
    @Controller
    public static class SlowController {
        private static final AtomicInteger COUNTER = new AtomicInteger();
        @GetMapping("/upstream")
        public @ResponseBody String slow() throws InterruptedException {
            if (COUNTER.getAndIncrement() % 2 == 0) {
                Thread.sleep(7_000);
                return "slow";
            }
            return "fast";
        }
    }
}

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>3.5.7</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>
    <groupId>com.example</groupId>
    <artifactId>leak</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>leak</name>
    <description>Demo project for Spring Boot</description>
    <url/>
    <licenses>
        <license/>
    </licenses>
    <developers>
        <developer/>
    </developers>
    <scm>
        <connection/>
        <developerConnection/>
        <tag/>
        <url/>
    </scm>
    <properties>
        <java.version>17</java.version>
        <spring-cloud.version>2025.0.0</spring-cloud.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.springframework.cloud</groupId>
            <artifactId>spring-cloud-starter-gateway-server-webmvc</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.cloud</groupId>
            <artifactId>spring-cloud-circuitbreaker-resilience4j</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.httpcomponents.client5</groupId>
            <artifactId>httpclient5</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>
    </dependencies>
    <dependencyManagement>
        <dependencies>
            <dependency>
                <groupId>org.springframework.cloud</groupId>
                <artifactId>spring-cloud-dependencies</artifactId>
                <version>${spring-cloud.version}</version>
                <type>pom</type>
                <scope>import</scope>
            </dependency>
        </dependencies>
    </dependencyManagement>

    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
            </plugin>
        </plugins>
    </build>

</project>

If possible, please provide a test case or sample application that reproduces
the problem. This makes it much easier for us to diagnose the problem and to verify that
we have fixed it.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions