Skip to content

Commit f701d39

Browse files
authored
net/udprelay: change Server.AllocateEndpoint existing alloc strategy (tailscale#15792)
The previous strategy assumed clients maintained adequate state to understand the relationship between endpoint allocation and the server it was allocated on. magicsock will not have awareness of the server's disco key pre-allocation, it only understands peerAPI address at this point. The second client to allocate on the same server could trigger re-allocation, breaking a functional relay server endpoint. If magicsock needs to force reallocation we can add opt-in behaviors for this later. Updates tailscale/corp#27502 Signed-off-by: Jordan Whited <jordan@tailscale.com>
1 parent dae2319 commit f701d39

File tree

2 files changed

+27
-35
lines changed

2 files changed

+27
-35
lines changed

net/udprelay/server.go

Lines changed: 17 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -454,9 +454,10 @@ func (s *Server) packetReadLoop() {
454454

455455
var ErrServerClosed = errors.New("server closed")
456456

457-
// AllocateEndpoint allocates a ServerEndpoint for the provided pair of
458-
// key.DiscoPublic's. It returns an error (ErrServerClosed) if the server has
459-
// been closed.
457+
// AllocateEndpoint allocates a [ServerEndpoint] for the provided pair of
458+
// [key.DiscoPublic]'s. If an allocation already exists for discoA and discoB it
459+
// is returned without modification/reallocation. AllocateEndpoint returns
460+
// [ErrServerClosed] if the server has been closed.
460461
func (s *Server) AllocateEndpoint(discoA, discoB key.DiscoPublic) (ServerEndpoint, error) {
461462
s.mu.Lock()
462463
defer s.mu.Unlock()
@@ -471,36 +472,19 @@ func (s *Server) AllocateEndpoint(discoA, discoB key.DiscoPublic) (ServerEndpoin
471472
pair := newPairOfDiscoPubKeys(discoA, discoB)
472473
e, ok := s.byDisco[pair]
473474
if ok {
474-
if !e.isBound() {
475-
// If the endpoint is not yet bound this is likely an allocation
476-
// race between two clients on the same Server. Instead of
477-
// re-allocating we return the existing allocation. We do not reset
478-
// e.allocatedAt in case a client is "stuck" in an allocation
479-
// loop and will not be able to complete a handshake, for whatever
480-
// reason. Once the endpoint expires a new endpoint will be
481-
// allocated. Clients can resolve duplicate ServerEndpoint details
482-
// via ServerEndpoint.LamportID.
483-
//
484-
// TODO: consider ServerEndpoint.BindLifetime -= time.Now()-e.allocatedAt
485-
// to give the client a more accurate picture of the bind window.
486-
// Or, some threshold to trigger re-allocation if too much time has
487-
// already passed since it was originally allocated.
488-
return ServerEndpoint{
489-
ServerDisco: s.discoPublic,
490-
AddrPorts: s.addrPorts,
491-
VNI: e.vni,
492-
LamportID: e.lamportID,
493-
BindLifetime: tstime.GoDuration{Duration: s.bindLifetime},
494-
SteadyStateLifetime: tstime.GoDuration{Duration: s.steadyStateLifetime},
495-
}, nil
496-
}
497-
// If an endpoint exists for the pair of key.DiscoPublic's, and is
498-
// already bound, delete it. We will re-allocate a new endpoint. Chances
499-
// are clients cannot make use of the existing, bound allocation if
500-
// they are requesting a new one.
501-
delete(s.byDisco, pair)
502-
delete(s.byVNI, e.vni)
503-
s.vniPool = append(s.vniPool, e.vni)
475+
// Return the existing allocation. Clients can resolve duplicate
476+
// [ServerEndpoint]'s via [ServerEndpoint.LamportID].
477+
//
478+
// TODO: consider ServerEndpoint.BindLifetime -= time.Now()-e.allocatedAt
479+
// to give the client a more accurate picture of the bind window.
480+
return ServerEndpoint{
481+
ServerDisco: s.discoPublic,
482+
AddrPorts: s.addrPorts,
483+
VNI: e.vni,
484+
LamportID: e.lamportID,
485+
BindLifetime: tstime.GoDuration{Duration: s.bindLifetime},
486+
SteadyStateLifetime: tstime.GoDuration{Duration: s.steadyStateLifetime},
487+
}, nil
504488
}
505489

506490
if len(s.vniPool) == 0 {

net/udprelay/server_test.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,7 @@ func TestServer(t *testing.T) {
174174
t.Fatal(err)
175175
}
176176

177-
// We expect the same endpoint details as the 3-way bind handshake has not
178-
// yet been completed for both relay client parties.
177+
// We expect the same endpoint details pre-handshake.
179178
if diff := cmp.Diff(dupEndpoint, endpoint, cmpopts.EquateComparable(netip.AddrPort{}, key.DiscoPublic{})); diff != "" {
180179
t.Fatalf("wrong dupEndpoint (-got +want)\n%s", diff)
181180
}
@@ -191,6 +190,15 @@ func TestServer(t *testing.T) {
191190
tcA.handshake(t)
192191
tcB.handshake(t)
193192

193+
dupEndpoint, err = server.AllocateEndpoint(discoA.Public(), discoB.Public())
194+
if err != nil {
195+
t.Fatal(err)
196+
}
197+
// We expect the same endpoint details post-handshake.
198+
if diff := cmp.Diff(dupEndpoint, endpoint, cmpopts.EquateComparable(netip.AddrPort{}, key.DiscoPublic{})); diff != "" {
199+
t.Fatalf("wrong dupEndpoint (-got +want)\n%s", diff)
200+
}
201+
194202
txToB := []byte{1, 2, 3}
195203
tcA.writeDataPkt(t, txToB)
196204
rxFromA := tcB.readDataPkt(t)

0 commit comments

Comments
 (0)