improve testing of route failover logic
Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
parent
bf4fd078fc
commit
1704977e76
11 changed files with 518 additions and 143 deletions
|
@ -3,7 +3,6 @@ package hscontrol
|
|||
import (
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
|
||||
|
@ -12,7 +11,6 @@ import (
|
|||
"github.com/rs/zerolog/log"
|
||||
"golang.org/x/net/http2"
|
||||
"golang.org/x/net/http2/h2c"
|
||||
"gorm.io/gorm"
|
||||
"tailscale.com/control/controlbase"
|
||||
"tailscale.com/control/controlhttp"
|
||||
"tailscale.com/tailcfg"
|
||||
|
@ -103,12 +101,12 @@ func (h *Headscale) NoiseUpgradeHandler(
|
|||
router.HandleFunc("/machine/map", noiseServer.NoisePollNetMapHandler)
|
||||
|
||||
server := http.Server{
|
||||
ReadTimeout: types.HTTPReadTimeout,
|
||||
ReadTimeout: types.HTTPTimeout,
|
||||
}
|
||||
|
||||
noiseServer.httpBaseConfig = &http.Server{
|
||||
Handler: router,
|
||||
ReadHeaderTimeout: types.HTTPReadTimeout,
|
||||
ReadHeaderTimeout: types.HTTPTimeout,
|
||||
}
|
||||
noiseServer.http2Server = &http2.Server{}
|
||||
|
||||
|
@ -225,15 +223,6 @@ func (ns *noiseServer) NoisePollNetMapHandler(
|
|||
key.NodePublic{},
|
||||
)
|
||||
if err != nil {
|
||||
if errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
log.Warn().
|
||||
Str("handler", "NoisePollNetMap").
|
||||
Uint64("node.id", node.ID.Uint64()).
|
||||
Msgf("Ignoring request, cannot find node with key %s", mapRequest.NodeKey.String())
|
||||
http.Error(writer, "Internal error", http.StatusNotFound)
|
||||
|
||||
return
|
||||
}
|
||||
log.Error().
|
||||
Str("handler", "NoisePollNetMap").
|
||||
Uint64("node.id", node.ID.Uint64()).
|
||||
|
@ -242,58 +231,59 @@ func (ns *noiseServer) NoisePollNetMapHandler(
|
|||
|
||||
return
|
||||
}
|
||||
log.Debug().
|
||||
Str("handler", "NoisePollNetMap").
|
||||
Str("node", node.Hostname).
|
||||
Int("cap_ver", int(mapRequest.Version)).
|
||||
Uint64("node.id", node.ID.Uint64()).
|
||||
Msg("A node sending a MapRequest with Noise protocol")
|
||||
sess := ns.headscale.newMapSession(req.Context(), mapRequest, writer, node)
|
||||
|
||||
session := ns.headscale.newMapSession(req.Context(), mapRequest, writer, node)
|
||||
sess.tracef("a node sending a MapRequest with Noise protocol")
|
||||
|
||||
// If a streaming mapSession exists for this node, close it
|
||||
// and start a new one.
|
||||
if session.isStreaming() {
|
||||
log.Debug().
|
||||
Caller().
|
||||
Uint64("node.id", node.ID.Uint64()).
|
||||
Int("cap_ver", int(mapRequest.Version)).
|
||||
Msg("Aquiring lock to check stream")
|
||||
if sess.isStreaming() {
|
||||
sess.tracef("aquiring lock to check stream")
|
||||
|
||||
ns.headscale.mapSessionMu.Lock()
|
||||
if oldSession, ok := ns.headscale.mapSessions[node.ID]; ok {
|
||||
log.Info().
|
||||
Caller().
|
||||
Uint64("node.id", node.ID.Uint64()).
|
||||
Msg("Node has an open streaming session, replacing")
|
||||
oldSession.close()
|
||||
if _, ok := ns.headscale.mapSessions[node.ID]; ok {
|
||||
// NOTE/TODO(kradalby): From how I understand the protocol, when
|
||||
// a client connects with stream=true, and already has a streaming
|
||||
// connection open, the correct way is to close the current channel
|
||||
// and replace it. However, I cannot manage to get that working with
|
||||
// some sort of lock/block happening on the cancelCh in the streaming
|
||||
// session.
|
||||
// Not closing the channel and replacing it puts us in a weird state
|
||||
// which keeps a ghost stream open, receiving keep alives, but no updates.
|
||||
//
|
||||
// Typically a new connection is opened when one exists as a client which
|
||||
// is already authenticated reconnects (e.g. down, then up). The client will
|
||||
// start auth and streaming at the same time, and then cancel the streaming
|
||||
// when the auth has finished successfully, opening a new connection.
|
||||
//
|
||||
// As a work-around to not replacing, abusing the clients "resilience"
|
||||
// by reject the new connection which will cause the client to immediately
|
||||
// reconnect and "fix" the issue, as the other connection typically has been
|
||||
// closed, meaning there is nothing to replace.
|
||||
//
|
||||
// sess.infof("node has an open stream(%p), replacing with %p", oldSession, sess)
|
||||
// oldSession.close()
|
||||
|
||||
defer ns.headscale.mapSessionMu.Unlock()
|
||||
|
||||
sess.infof("node has an open stream(%p), rejecting new stream", sess)
|
||||
return
|
||||
}
|
||||
|
||||
ns.headscale.mapSessions[node.ID] = session
|
||||
ns.headscale.mapSessions[node.ID] = sess
|
||||
ns.headscale.mapSessionMu.Unlock()
|
||||
log.Debug().
|
||||
Caller().
|
||||
Uint64("node.id", node.ID.Uint64()).
|
||||
Int("cap_ver", int(mapRequest.Version)).
|
||||
Msg("Releasing lock to check stream")
|
||||
sess.tracef("releasing lock to check stream")
|
||||
}
|
||||
|
||||
session.serve()
|
||||
sess.serve()
|
||||
|
||||
if session.isStreaming() {
|
||||
log.Debug().
|
||||
Caller().
|
||||
Uint64("node.id", node.ID.Uint64()).
|
||||
Int("cap_ver", int(mapRequest.Version)).
|
||||
Msg("Aquiring lock to remove stream")
|
||||
if sess.isStreaming() {
|
||||
sess.tracef("aquiring lock to remove stream")
|
||||
ns.headscale.mapSessionMu.Lock()
|
||||
defer ns.headscale.mapSessionMu.Unlock()
|
||||
|
||||
delete(ns.headscale.mapSessions, node.ID)
|
||||
|
||||
ns.headscale.mapSessionMu.Unlock()
|
||||
log.Debug().
|
||||
Caller().
|
||||
Uint64("node.id", node.ID.Uint64()).
|
||||
Int("cap_ver", int(mapRequest.Version)).
|
||||
Msg("Releasing lock to remove stream")
|
||||
sess.tracef("releasing lock to remove stream")
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue