Skip to content

Commit 92ca770

Browse files
authored
util/linuxfw: fix MSS clamping in nftables mode (tailscale#11588)
MSS clamping for nftables was mostly not ran due to to an earlier rule in the FORWARD chain issuing accept verdict. This commit places the clamping rule into a chain of its own to ensure that it gets ran. Updates tailscale#11002 Signed-off-by: Irbe Krumina <irbe@tailscale.com>
1 parent 27038ee commit 92ca770

File tree

1 file changed

+28
-4
lines changed

1 file changed

+28
-4
lines changed

util/linuxfw/nftables_runner.go

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ type nftable struct {
6262
// - The table and chain conventions followed here are those used by
6363
// `iptables-nft` and `ufw`, so that those tools co-exist and do not
6464
// negatively affect Tailscale function.
65+
// - Be mindful that 1) all chains attached to a given hook (i.e the forward hook)
66+
// will be processed in priority order till either a rule in one of the chains issues a drop verdict
67+
// or there are no more chains for that hook
68+
// 2) processing of individual rules within a chain will stop once one of them issues a final verdict (accept, drop).
69+
// https://wiki.nftables.org/wiki-nftables/index.php/Configuring_chains
6570
type nftablesRunner struct {
6671
conn *nftables.Conn
6772
nft4 *nftable
@@ -238,6 +243,25 @@ func (n *nftablesRunner) AddSNATRuleForDst(src, dst netip.Addr) error {
238243
return n.conn.Flush()
239244
}
240245

246+
// ClampMSSToPMTU ensures that all packets with TCP flags (SYN, ACK, RST) set
247+
// being forwarded via the given interface (tun) have MSS set to <MTU of the
248+
// interface> - 40 (IP and TCP headers). This can be useful if this tailscale
249+
// instance is expected to run as a forwarding proxy, forwarding packets from an
250+
// endpoint with higher MTU in an environment where path MTU discovery is
251+
// expected to not work (such as the proxies created by the Tailscale Kubernetes
252+
// operator). ClamMSSToPMTU creates a new base-chain ts-clamp in the filter
253+
// table with accept policy and priority -150. In practice, this means that for
254+
// SYN packets the clamp rule in this chain will likely run first and accept the
255+
// packet. This is fine because 1) nftables run ALL chains with the same hook
256+
// type unless a rule in one of them drops the packet and 2) this chain does not
257+
// have functionality to drop the packet- so in practice a matching clamp rule
258+
// will always be followed by the custom tailscale filtering rules in the other
259+
// chains attached to the filter hook (FORWARD, ts-forward).
260+
// We do not want to place the clamping rule into FORWARD/ts-forward chains
261+
// because wgengine populates those chains with rules that contain accept
262+
// verdicts that would cause no further procesing within that chain. This
263+
// functionality is currently invoked from outside wgengine (containerboot), so
264+
// we don't want to race with wgengine for rule ordering within chains.
241265
func (n *nftablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error {
242266
polAccept := nftables.ChainPolicyAccept
243267
table := n.getNFTByAddr(addr)
@@ -246,13 +270,13 @@ func (n *nftablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error {
246270
return fmt.Errorf("error ensuring filter table: %w", err)
247271
}
248272

249-
// ensure forwarding chain exists
273+
// ensure ts-clamp chain exists
250274
fwChain, err := getOrCreateChain(n.conn, chainInfo{
251275
table: filterTable,
252-
name: "FORWARD",
276+
name: "ts-clamp",
253277
chainType: nftables.ChainTypeFilter,
254278
chainHook: nftables.ChainHookForward,
255-
chainPriority: nftables.ChainPriorityFilter,
279+
chainPriority: nftables.ChainPriorityMangle,
256280
chainPolicy: &polAccept,
257281
})
258282
if err != nil {
@@ -289,7 +313,7 @@ func (n *nftablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error {
289313
Xor: []byte{0x00},
290314
},
291315
&expr.Cmp{
292-
Op: expr.CmpOpNeq,
316+
Op: expr.CmpOpNeq, // match any packet with a TCP flag set (SYN, ACK, RST)
293317
Register: 1,
294318
Data: []byte{0x00},
295319
},

0 commit comments

Comments
 (0)