Skip to content

Commit 6feb6f3

Browse files
authored
wgengine/magicsock: add relayManager event logs (tailscale#17091)
These are gated behind magicsock component debug logging. Updates tailscale/corp#30818 Signed-off-by: Jordan Whited <jordan@tailscale.com>
1 parent 1ec3d20 commit 6feb6f3

File tree

2 files changed

+69
-11
lines changed

2 files changed

+69
-11
lines changed

wgengine/magicsock/magicsock.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2411,7 +2411,7 @@ func (c *Conn) handleDiscoMessage(msg []byte, src epAddr, shouldBeRelayHandshake
24112411
msgType, sender.ShortString(), derpNodeSrc.ShortString())
24122412
return
24132413
} else {
2414-
c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got %s for %v<->%v",
2414+
c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got %s disco[0]=%v disco[1]=%v",
24152415
c.discoShort, epDisco.short,
24162416
ep.publicKey.ShortString(), derpStr(src.String()),
24172417
msgType,

wgengine/magicsock/relaymanager.go

Lines changed: 68 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package magicsock
66
import (
77
"context"
88
"errors"
9+
"fmt"
910
"net/netip"
1011
"sync"
1112
"time"
@@ -76,8 +77,11 @@ type serverDiscoVNI struct {
7677
// relayHandshakeWork serves to track in-progress relay handshake work for a
7778
// [udprelay.ServerEndpoint]. This structure is immutable once initialized.
7879
type relayHandshakeWork struct {
79-
wlb endpointWithLastBest
80-
se udprelay.ServerEndpoint
80+
wlb endpointWithLastBest
81+
se udprelay.ServerEndpoint
82+
server candidatePeerRelay
83+
84+
handshakeGen uint32
8185

8286
// handshakeServerEndpoint() always writes to doneCh (len 1) when it
8387
// returns. It may end up writing the same event afterward to
@@ -91,6 +95,26 @@ type relayHandshakeWork struct {
9195
cancel context.CancelFunc
9296
}
9397

98+
func (r *relayHandshakeWork) dlogf(format string, args ...any) {
99+
if !r.wlb.ep.c.debugLogging.Load() {
100+
return
101+
}
102+
var relay string
103+
if r.server.nodeKey.IsZero() {
104+
relay = "from-call-me-maybe-via"
105+
} else {
106+
relay = r.server.nodeKey.ShortString()
107+
}
108+
r.wlb.ep.c.logf("%s node=%v relay=%v handshakeGen=%d disco[0]=%v disco[1]=%v",
109+
fmt.Sprintf(format, args...),
110+
r.wlb.ep.publicKey.ShortString(),
111+
relay,
112+
r.handshakeGen,
113+
r.se.ClientDisco[0].ShortString(),
114+
r.se.ClientDisco[1].ShortString(),
115+
)
116+
}
117+
94118
// newRelayServerEndpointEvent indicates a new [udprelay.ServerEndpoint] has
95119
// become known either via allocation with a relay server, or via
96120
// [disco.CallMeMaybeVia] reception. This structure is immutable once
@@ -257,7 +281,9 @@ type relayDiscoMsgEvent struct {
257281
type relayEndpointAllocWork struct {
258282
wlb endpointWithLastBest
259283
discoKeys key.SortedPairOfDiscoPublic
260-
candidatePeerRelay candidatePeerRelay
284+
candidatePeerRelay candidatePeerRelay // zero value if learned via [disco.CallMeMaybeVia]
285+
286+
allocGen uint32
261287

262288
// allocateServerEndpoint() always writes to doneCh (len 1) when it
263289
// returns. It may end up writing the same event afterward to
@@ -271,6 +297,20 @@ type relayEndpointAllocWork struct {
271297
cancel context.CancelFunc
272298
}
273299

300+
func (r *relayEndpointAllocWork) dlogf(format string, args ...any) {
301+
if !r.wlb.ep.c.debugLogging.Load() {
302+
return
303+
}
304+
r.wlb.ep.c.logf("%s node=%v relay=%v allocGen=%d disco[0]=%v disco[1]=%v",
305+
fmt.Sprintf(format, args...),
306+
r.wlb.ep.publicKey.ShortString(),
307+
r.candidatePeerRelay.nodeKey.ShortString(),
308+
r.allocGen,
309+
r.discoKeys.Get()[0].ShortString(),
310+
r.discoKeys.Get()[1].ShortString(),
311+
)
312+
}
313+
274314
// init initializes [relayManager] if it is not already initialized.
275315
func (r *relayManager) init() {
276316
r.initOnce.Do(func() {
@@ -712,6 +752,7 @@ func (r *relayManager) handleNewServerEndpointRunLoop(newServerEndpoint newRelay
712752
work := &relayHandshakeWork{
713753
wlb: newServerEndpoint.wlb,
714754
se: newServerEndpoint.se,
755+
server: newServerEndpoint.server,
715756
rxDiscoMsgCh: make(chan relayDiscoMsgEvent),
716757
doneCh: make(chan relayEndpointHandshakeWorkDoneEvent, 1),
717758
ctx: ctx,
@@ -728,8 +769,9 @@ func (r *relayManager) handleNewServerEndpointRunLoop(newServerEndpoint newRelay
728769
if r.handshakeGeneration == 0 { // generation must be nonzero
729770
r.handshakeGeneration++
730771
}
772+
work.handshakeGen = r.handshakeGeneration
731773

732-
go r.handshakeServerEndpoint(work, r.handshakeGeneration)
774+
go r.handshakeServerEndpoint(work)
733775
}
734776

735777
// sendCallMeMaybeVia sends a [disco.CallMeMaybeVia] to ep over DERP. It must be
@@ -758,7 +800,7 @@ func (r *relayManager) sendCallMeMaybeVia(ep *endpoint, se udprelay.ServerEndpoi
758800
ep.c.sendDiscoMessage(epAddr{ap: derpAddr}, ep.publicKey, epDisco.key, callMeMaybeVia, discoVerboseLog)
759801
}
760802

761-
func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork, generation uint32) {
803+
func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork) {
762804
done := relayEndpointHandshakeWorkDoneEvent{work: work}
763805
r.ensureDiscoInfoFor(work)
764806

@@ -777,10 +819,13 @@ func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork, generat
777819

778820
common := disco.BindUDPRelayEndpointCommon{
779821
VNI: work.se.VNI,
780-
Generation: generation,
822+
Generation: work.handshakeGen,
781823
RemoteKey: epDisco.key,
782824
}
783825

826+
work.dlogf("[v1] magicsock: relayManager: starting handshake addrPorts=%v",
827+
work.se.AddrPorts,
828+
)
784829
sentBindAny := false
785830
bind := &disco.BindUDPRelayEndpoint{
786831
BindUDPRelayEndpointCommon: common,
@@ -848,6 +893,7 @@ func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork, generat
848893
for {
849894
select {
850895
case <-work.ctx.Done():
896+
work.dlogf("[v1] magicsock: relayManager: handshake canceled")
851897
return
852898
case msgEvent := <-work.rxDiscoMsgCh:
853899
switch msg := msgEvent.msg.(type) {
@@ -859,12 +905,14 @@ func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork, generat
859905
if handshakeState >= disco.BindUDPRelayHandshakeStateAnswerSent {
860906
continue
861907
}
908+
work.dlogf("[v1] magicsock: relayManager: got handshake challenge from %v", msgEvent.from)
862909
txPing(msgEvent.from, &msg.Challenge)
863910
handshakeState = disco.BindUDPRelayHandshakeStateAnswerSent
864911
case *disco.Ping:
865912
if handshakeState < disco.BindUDPRelayHandshakeStateAnswerSent {
866913
continue
867914
}
915+
work.dlogf("[v1] magicsock: relayManager: got relayed ping from %v", msgEvent.from)
868916
// An inbound ping from the remote peer indicates we completed a
869917
// handshake with the relay server (our answer msg was
870918
// received). Chances are our ping was dropped before the remote
@@ -885,21 +933,26 @@ func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork, generat
885933
// round-trip latency and return.
886934
done.pongReceivedFrom = msgEvent.from
887935
done.latency = time.Since(at)
936+
work.dlogf("[v1] magicsock: relayManager: got relayed pong from %v latency=%v",
937+
msgEvent.from,
938+
done.latency.Round(time.Millisecond),
939+
)
888940
return
889941
default:
890942
// unexpected message type, silently discard
891943
continue
892944
}
893945
case <-timer.C:
894946
// The handshake timed out.
947+
work.dlogf("[v1] magicsock: relayManager: handshake timed out")
895948
return
896949
}
897950
}
898951
}
899952

900953
const allocateUDPRelayEndpointRequestTimeout = time.Second * 10
901954

902-
func (r *relayManager) allocateServerEndpoint(work *relayEndpointAllocWork, generation uint32) {
955+
func (r *relayManager) allocateServerEndpoint(work *relayEndpointAllocWork) {
903956
done := relayEndpointAllocWorkDoneEvent{work: work}
904957

905958
defer func() {
@@ -910,7 +963,7 @@ func (r *relayManager) allocateServerEndpoint(work *relayEndpointAllocWork, gene
910963

911964
dm := &disco.AllocateUDPRelayEndpointRequest{
912965
ClientDisco: work.discoKeys.Get(),
913-
Generation: generation,
966+
Generation: work.allocGen,
914967
}
915968

916969
sendAllocReq := func() {
@@ -923,6 +976,7 @@ func (r *relayManager) allocateServerEndpoint(work *relayEndpointAllocWork, gene
923976
dm,
924977
discoVerboseLog,
925978
)
979+
work.dlogf("[v1] magicsock: relayManager: sent alloc request")
926980
}
927981
go sendAllocReq()
928982

@@ -938,16 +992,19 @@ func (r *relayManager) allocateServerEndpoint(work *relayEndpointAllocWork, gene
938992
for {
939993
select {
940994
case <-work.ctx.Done():
995+
work.dlogf("[v1] magicsock: relayManager: alloc request canceled")
941996
return
942997
case <-returnAfterTimer.C:
998+
work.dlogf("[v1] magicsock: relayManager: alloc request timed out")
943999
return
9441000
case <-retryAfterTimer.C:
9451001
go sendAllocReq()
9461002
case resp := <-work.rxDiscoMsgCh:
947-
if resp.Generation != generation ||
1003+
if resp.Generation != work.allocGen ||
9481004
!work.discoKeys.Equal(key.NewSortedPairOfDiscoPublic(resp.ClientDisco[0], resp.ClientDisco[1])) {
9491005
continue
9501006
}
1007+
work.dlogf("[v1] magicsock: relayManager: got alloc response")
9511008
done.allocated = udprelay.ServerEndpoint{
9521009
ServerDisco: resp.ServerDisco,
9531010
ClientDisco: resp.ClientDisco,
@@ -1004,6 +1061,7 @@ func (r *relayManager) allocateAllServersRunLoop(wlb endpointWithLastBest) {
10041061
}
10051062
byCandidatePeerRelay[v] = started
10061063
r.allocGeneration++
1007-
go r.allocateServerEndpoint(started, r.allocGeneration)
1064+
started.allocGen = r.allocGeneration
1065+
go r.allocateServerEndpoint(started)
10081066
}
10091067
}

0 commit comments

Comments
 (0)