@@ -62,6 +62,11 @@ type nftable struct {
6262// - The table and chain conventions followed here are those used by
6363// `iptables-nft` and `ufw`, so that those tools co-exist and do not
6464// negatively affect Tailscale function.
65+ // - Be mindful that 1) all chains attached to a given hook (i.e the forward hook)
66+ // will be processed in priority order till either a rule in one of the chains issues a drop verdict
67+ // or there are no more chains for that hook
68+ // 2) processing of individual rules within a chain will stop once one of them issues a final verdict (accept, drop).
69+ // https://wiki.nftables.org/wiki-nftables/index.php/Configuring_chains
6570type nftablesRunner struct {
6671 conn * nftables.Conn
6772 nft4 * nftable
@@ -238,6 +243,25 @@ func (n *nftablesRunner) AddSNATRuleForDst(src, dst netip.Addr) error {
238243 return n .conn .Flush ()
239244}
240245
246+ // ClampMSSToPMTU ensures that all packets with TCP flags (SYN, ACK, RST) set
247+ // being forwarded via the given interface (tun) have MSS set to <MTU of the
248+ // interface> - 40 (IP and TCP headers). This can be useful if this tailscale
249+ // instance is expected to run as a forwarding proxy, forwarding packets from an
250+ // endpoint with higher MTU in an environment where path MTU discovery is
251+ // expected to not work (such as the proxies created by the Tailscale Kubernetes
252+ // operator). ClamMSSToPMTU creates a new base-chain ts-clamp in the filter
253+ // table with accept policy and priority -150. In practice, this means that for
254+ // SYN packets the clamp rule in this chain will likely run first and accept the
255+ // packet. This is fine because 1) nftables run ALL chains with the same hook
256+ // type unless a rule in one of them drops the packet and 2) this chain does not
257+ // have functionality to drop the packet- so in practice a matching clamp rule
258+ // will always be followed by the custom tailscale filtering rules in the other
259+ // chains attached to the filter hook (FORWARD, ts-forward).
260+ // We do not want to place the clamping rule into FORWARD/ts-forward chains
261+ // because wgengine populates those chains with rules that contain accept
262+ // verdicts that would cause no further procesing within that chain. This
263+ // functionality is currently invoked from outside wgengine (containerboot), so
264+ // we don't want to race with wgengine for rule ordering within chains.
241265func (n * nftablesRunner ) ClampMSSToPMTU (tun string , addr netip.Addr ) error {
242266 polAccept := nftables .ChainPolicyAccept
243267 table := n .getNFTByAddr (addr )
@@ -246,13 +270,13 @@ func (n *nftablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error {
246270 return fmt .Errorf ("error ensuring filter table: %w" , err )
247271 }
248272
249- // ensure forwarding chain exists
273+ // ensure ts-clamp chain exists
250274 fwChain , err := getOrCreateChain (n .conn , chainInfo {
251275 table : filterTable ,
252- name : "FORWARD " ,
276+ name : "ts-clamp " ,
253277 chainType : nftables .ChainTypeFilter ,
254278 chainHook : nftables .ChainHookForward ,
255- chainPriority : nftables .ChainPriorityFilter ,
279+ chainPriority : nftables .ChainPriorityMangle ,
256280 chainPolicy : & polAccept ,
257281 })
258282 if err != nil {
@@ -289,7 +313,7 @@ func (n *nftablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error {
289313 Xor : []byte {0x00 },
290314 },
291315 & expr.Cmp {
292- Op : expr .CmpOpNeq ,
316+ Op : expr .CmpOpNeq , // match any packet with a TCP flag set (SYN, ACK, RST)
293317 Register : 1 ,
294318 Data : []byte {0x00 },
295319 },
0 commit comments