-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Description
Describe the bug
When circuit breaker activiates, connections are not closed to upstream hosts. This seems to be because in a happy path, the upstream connection is closed when the response body from upstream gets copied to the downstream response and the InputStream is closed which in turn releases the connection from any pool in use (in our case we are using ApacheClient).
When the circuit breaker trips, it instead returns a different GatewayServerResponse with the circuit breaker fallback response in it. There is now a disconnect and nothing ever closes the upstream response when it eventually comes back.
We see this most often when using the timeLimiter configuration in the circuit breaker and the upstream response takes longer than the circuit breaker time limit.
When debugging, it looks like 2 things happen:
- This line can throw with an exception
IllegalStateException: The request object has been recycled and is no longer associated with this facadewhich then results in the clientResponse and body never getting closed and thus leaking a connection. I think if this method throws then it should itself close() things down that it's not able to return
OR
- This line eventually succeeds, but the fallback has already executed and returned so this is leaked
Sample
I've put together a minimal sample app that reproduces the issue. If you run this spring app and then run the curl command repeatedly and observe the console output from the sample app
# Repeatedly run this:
curl http://localhost:8080/testOutput:
You'll see that the circuit breaker correctly triggers a few times and allows the responses through when they are fast. However after enough requests then spring-cloud-gateway now just returns a 500 because the connection pool is full, even if the upstream endpoint would have returned quickly
LeakApplication.java
package com.example.leak;
import io.github.resilience4j.circuitbreaker.CircuitBreakerConfig;
import io.github.resilience4j.timelimiter.TimeLimiterConfig;
import org.apache.hc.client5.http.config.RequestConfig;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager;
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManagerBuilder;
import org.apache.hc.core5.util.Timeout;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.cloud.circuitbreaker.resilience4j.Resilience4JCircuitBreakerFactory;
import org.springframework.cloud.circuitbreaker.resilience4j.Resilience4JConfigBuilder;
import org.springframework.cloud.client.circuitbreaker.Customizer;
import org.springframework.context.annotation.Bean;
import org.springframework.http.client.ClientHttpRequestFactory;
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.servlet.config.annotation.EnableWebMvc;
import org.springframework.web.servlet.function.RouterFunction;
import org.springframework.web.servlet.function.ServerResponse;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import static org.springframework.cloud.gateway.server.mvc.filter.BeforeFilterFunctions.rewritePath;
import static org.springframework.cloud.gateway.server.mvc.filter.BeforeFilterFunctions.uri;
import static org.springframework.cloud.gateway.server.mvc.filter.CircuitBreakerFilterFunctions.circuitBreaker;
import static org.springframework.cloud.gateway.server.mvc.handler.GatewayRouterFunctions.route;
import static org.springframework.cloud.gateway.server.mvc.handler.HandlerFunctions.http;
@SpringBootApplication
@EnableScheduling
@EnableWebMvc
public class LeakApplication {
public static void main(String[] args) {
SpringApplication.run(LeakApplication.class, args);
}
// configure a single route /test that forwards to /upstream
@Bean
public RouterFunction<ServerResponse> routes(
@Value("${server.port:8080}") int port
) {
return route("test")
.GET("/test", http())
// commenting out the next line prevents the leak from happening
.filter(circuitBreaker("cb-0"))
.before(uri("http://localhost:" + port))
.before(rewritePath("/.*", "/upstream"))
.build();
}
// configure the timelimiter on the circuit breaker
@Bean
public Customizer<Resilience4JCircuitBreakerFactory> defaultCustomizer() {
return factory -> factory.configureDefault(id -> new Resilience4JConfigBuilder(id)
.timeLimiterConfig(TimeLimiterConfig.custom().timeoutDuration(Duration.ofSeconds(5)).build())
.circuitBreakerConfig(CircuitBreakerConfig.ofDefaults())
.build());
}
// configure an apache pool that allows max 5 connections total
@Bean
public PoolingHttpClientConnectionManager poolingHttpClientConnectionManager() {
return PoolingHttpClientConnectionManagerBuilder.create().setMaxConnTotal(5).setMaxConnPerRoute(5).build();
}
// configure an apache client that will...
// - wait for up to 3 seconds for a connection from the pool
// - no retry, just to make this more straightforward
@Bean
public ClientHttpRequestFactory gatewayHttpClientFactory(final PoolingHttpClientConnectionManager poolingHttpClientConnectionManager) {
CloseableHttpClient httpClient = HttpClients
.custom()
.disableAutomaticRetries()
.setConnectionManager(poolingHttpClientConnectionManager)
.setDefaultRequestConfig(
RequestConfig.custom()
.setConnectionRequestTimeout(Timeout.of(3L, TimeUnit.SECONDS))
.build()
)
.build();
return new HttpComponentsClientHttpRequestFactory(httpClient);
}
// register a task that prints out pool stats on the console for monitoring
@Bean
public Runnable printStatsTask(PoolingHttpClientConnectionManager poolingHttpClientConnectionManager) {
return new Runnable() {
@Override
@Scheduled(fixedRate = 1000)
public void run() {
var stats = poolingHttpClientConnectionManager.getTotalStats();
var stream = stats.getLeased() == stats.getMax() ? System.err : System.out;
stream.printf("[%s] %s%n", LocalDateTime.now(), stats);
}
};
}
// the upstream controller, this is simulating an upstream service that is sometimes
// responding quickly, and other times responding slower than the circuit breaker timeLimit
@Controller
public static class SlowController {
private static final AtomicInteger COUNTER = new AtomicInteger();
@GetMapping("/upstream")
public @ResponseBody String slow() throws InterruptedException {
if (COUNTER.getAndIncrement() % 2 == 0) {
Thread.sleep(7_000);
return "slow";
}
return "fast";
}
}
}pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.5.7</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.example</groupId>
<artifactId>leak</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>leak</name>
<description>Demo project for Spring Boot</description>
<url/>
<licenses>
<license/>
</licenses>
<developers>
<developer/>
</developers>
<scm>
<connection/>
<developerConnection/>
<tag/>
<url/>
</scm>
<properties>
<java.version>17</java.version>
<spring-cloud.version>2025.0.0</spring-cloud.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-gateway-server-webmvc</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-circuitbreaker-resilience4j</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents.client5</groupId>
<artifactId>httpclient5</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-dependencies</artifactId>
<version>${spring-cloud.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>If possible, please provide a test case or sample application that reproduces
the problem. This makes it much easier for us to diagnose the problem and to verify that
we have fixed it.