@@ -26,14 +26,12 @@ import (
2626 "errors"
2727 "fmt"
2828 "io"
29- "math"
3029 "net"
3130 "net/http"
3231 "net/http/httptrace"
3332 "net/netip"
3433 "net/url"
3534 "runtime"
36- "sort"
3735 "sync/atomic"
3836 "time"
3937
@@ -51,7 +49,6 @@ import (
5149 "tailscale.com/syncs"
5250 "tailscale.com/tailcfg"
5351 "tailscale.com/tstime"
54- "tailscale.com/util/multierr"
5552)
5653
5754var stdDialer net.Dialer
@@ -108,158 +105,63 @@ func (a *Dialer) dial(ctx context.Context) (*ClientConn, error) {
108105 }
109106 candidates := a .DialPlan .Candidates
110107
111- // Otherwise, we try dialing per the plan. Store the highest priority
112- // in the list, so that if we get a connection to one of those
113- // candidates we can return quickly.
114- var highestPriority int = math .MinInt
115- for _ , c := range candidates {
116- if c .Priority > highestPriority {
117- highestPriority = c .Priority
118- }
119- }
120-
121- // This context allows us to cancel in-flight connections if we get a
122- // highest-priority connection before we're all done.
108+ // Create a context to be canceled as we return, so once we get a good connection,
109+ // we can drop all the other ones.
123110 ctx , cancel := context .WithCancel (ctx )
124111 defer cancel ()
125112
126113 // Now, for each candidate, kick off a dial in parallel.
127114 type dialResult struct {
128- conn * ClientConn
129- err error
130- addr netip.Addr
131- priority int
132- }
133- resultsCh := make (chan dialResult , len (candidates ))
134-
135- var pending atomic.Int32
136- pending .Store (int32 (len (candidates )))
137- for _ , c := range candidates {
138- go func (ctx context.Context , c tailcfg.ControlIPCandidate ) {
139- var (
140- conn * ClientConn
141- err error
142- )
143-
144- // Always send results back to our channel.
145- defer func () {
146- resultsCh <- dialResult {conn , err , c .IP , c .Priority }
147- if pending .Add (- 1 ) == 0 {
148- close (resultsCh )
149- }
150- }()
151-
152- // If non-zero, wait the configured start timeout
153- // before we do anything.
154- if c .DialStartDelaySec > 0 {
155- a .logf ("[v2] controlhttp: waiting %.2f seconds before dialing %q @ %v" , c .DialStartDelaySec , a .Hostname , c .IP )
156- tmr , tmrChannel := a .clock ().NewTimer (time .Duration (c .DialStartDelaySec * float64 (time .Second )))
157- defer tmr .Stop ()
158- select {
159- case <- ctx .Done ():
160- err = ctx .Err ()
161- return
162- case <- tmrChannel :
163- }
164- }
115+ conn * ClientConn
116+ err error
117+ }
118+ resultsCh := make (chan dialResult ) // unbuffered, never closed
165119
166- // Now, create a sub-context with the given timeout and
167- // try dialing the provided host.
168- ctx , cancel := context .WithTimeout (ctx , time .Duration (c .DialTimeoutSec * float64 (time .Second )))
169- defer cancel ()
120+ dialCand := func (cand tailcfg.ControlIPCandidate ) (* ClientConn , error ) {
121+ a .logf ("[v2] controlhttp: waited %.2f seconds, dialing %q @ %s" , cand .DialStartDelaySec , a .Hostname , cand .IP .String ())
170122
171- // This will dial, and the defer above sends it back to our parent.
172- a .logf ("[v2] controlhttp: trying to dial %q @ %v" , a .Hostname , c .IP )
173- conn , err = a .dialHost (ctx , c .IP )
174- }(ctx , c )
123+ ctx , cancel := context .WithTimeout (ctx , time .Duration (cand .DialTimeoutSec * float64 (time .Second )))
124+ defer cancel ()
125+ return a .dialHost (ctx , cand .IP )
175126 }
176127
177- var results []dialResult
178- for res := range resultsCh {
179- // If we get a response that has the highest priority, we don't
180- // need to wait for any of the other connections to finish; we
181- // can just return this connection.
182- //
183- // TODO(andrew): we could make this better by keeping track of
184- // the highest remaining priority dynamically, instead of just
185- // checking for the highest total
186- if res .priority == highestPriority && res .conn != nil {
187- a .logf ("[v1] controlhttp: high-priority success dialing %q @ %v from dial plan" , a .Hostname , res .addr )
188-
189- // Drain the channel and any existing connections in
190- // the background.
128+ for _ , cand := range candidates {
129+ timer := time .AfterFunc (time .Duration (cand .DialStartDelaySec * float64 (time .Second )), func () {
191130 go func () {
192- for _ , res := range results {
193- if res .conn != nil {
194- res .conn .Close ()
131+ conn , err := dialCand (cand )
132+ select {
133+ case resultsCh <- dialResult {conn , err }:
134+ if err == nil {
135+ a .logf ("[v1] controlhttp: succeeded dialing %q @ %v from dial plan" , a .Hostname , cand .IP .String ())
195136 }
196- }
197- for res := range resultsCh {
198- if res .conn != nil {
199- res .conn .Close ()
137+ case <- ctx .Done ():
138+ if conn != nil {
139+ conn .Close ()
200140 }
201141 }
202- if a .drainFinished != nil {
203- close (a .drainFinished )
204- }
205142 }()
206- return res .conn , nil
207- }
208-
209- // This isn't a highest-priority result, so just store it until
210- // we're done.
211- results = append (results , res )
143+ })
144+ defer timer .Stop ()
212145 }
213146
214- // After we finish this function, close any remaining open connections.
215- defer func () {
216- for _ , result := range results {
217- // Note: below, we nil out the returned connection (if
218- // any) in the slice so we don't close it.
219- if result .conn != nil {
220- result .conn .Close ()
147+ var errs []error
148+ for {
149+ select {
150+ case res := <- resultsCh :
151+ if res .err == nil {
152+ return res .conn , nil
221153 }
154+ errs = append (errs , res .err )
155+ if len (errs ) == len (candidates ) {
156+ // If we get here, then we didn't get anywhere with our dial plan; fall back to just using DNS.
157+ a .logf ("controlhttp: failed dialing using DialPlan, falling back to DNS; errs=%s" , errors .Join (errs ... ))
158+ return a .dialHost (ctx , netip.Addr {})
159+ }
160+ case <- ctx .Done ():
161+ a .logf ("controlhttp: context aborted dialing" )
162+ return nil , ctx .Err ()
222163 }
223-
224- // We don't drain asynchronously after this point, so notify our
225- // channel when we return.
226- if a .drainFinished != nil {
227- close (a .drainFinished )
228- }
229- }()
230-
231- // Sort by priority, then take the first non-error response.
232- sort .Slice (results , func (i , j int ) bool {
233- // NOTE: intentionally inverted so that the highest priority
234- // item comes first
235- return results [i ].priority > results [j ].priority
236- })
237-
238- var (
239- conn * ClientConn
240- errs []error
241- )
242- for i , result := range results {
243- if result .err != nil {
244- errs = append (errs , result .err )
245- continue
246- }
247-
248- a .logf ("[v1] controlhttp: succeeded dialing %q @ %v from dial plan" , a .Hostname , result .addr )
249- conn = result .conn
250- results [i ].conn = nil // so we don't close it in the defer
251- return conn , nil
252164 }
253- if ctx .Err () != nil {
254- a .logf ("controlhttp: context aborted dialing" )
255- return nil , ctx .Err ()
256- }
257-
258- merr := multierr .New (errs ... )
259-
260- // If we get here, then we didn't get anywhere with our dial plan; fall back to just using DNS.
261- a .logf ("controlhttp: failed dialing using DialPlan, falling back to DNS; errs=%s" , merr .Error ())
262- return a .dialHost (ctx , netip.Addr {})
263165}
264166
265167// The TS_FORCE_NOISE_443 envknob forces the controlclient noise dialer to
@@ -388,6 +290,9 @@ func (a *Dialer) dialHost(ctx context.Context, optAddr netip.Addr) (*ClientConn,
388290 }
389291
390292 var err80 , err443 error
293+ if forceTLS {
294+ err80 = errors .New ("TLS forced: no port 80 dialed" )
295+ }
391296 for {
392297 select {
393298 case <- ctx .Done ():
0 commit comments