Rework map session

This commit restructures the map session in to a struct
holding the state of what is needed during its lifetime.

For streaming sessions, the event loop is structured a
bit differently not hammering the clients with updates
but rather batching them over a short, configurable time
which should significantly improve cpu usage, and potentially
flakyness.

The use of Patch updates has been dialed back a little as
it does not look like its a 100% ready for prime time. Nodes
are now updated with full changes, except for a few things
like online status.

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
Kristoffer Dalby 2024-02-23 10:59:24 +01:00 committed by Juan Font
parent dd693c444c
commit 58c94d2bd3
35 changed files with 1803 additions and 1716 deletions

View file

@ -3,6 +3,7 @@ package hscontrol
import (
"encoding/binary"
"encoding/json"
"errors"
"io"
"net/http"
@ -11,6 +12,7 @@ import (
"github.com/rs/zerolog/log"
"golang.org/x/net/http2"
"golang.org/x/net/http2/h2c"
"gorm.io/gorm"
"tailscale.com/control/controlbase"
"tailscale.com/control/controlhttp"
"tailscale.com/tailcfg"
@ -163,3 +165,135 @@ func (ns *noiseServer) earlyNoise(protocolVersion int, writer io.Writer) error {
return nil
}
const (
MinimumCapVersion tailcfg.CapabilityVersion = 58
)
// NoisePollNetMapHandler takes care of /machine/:id/map using the Noise protocol
//
// This is the busiest endpoint, as it keeps the HTTP long poll that updates
// the clients when something in the network changes.
//
// The clients POST stuff like HostInfo and their Endpoints here, but
// only after their first request (marked with the ReadOnly field).
//
// At this moment the updates are sent in a quite horrendous way, but they kinda work.
func (ns *noiseServer) NoisePollNetMapHandler(
writer http.ResponseWriter,
req *http.Request,
) {
log.Trace().
Str("handler", "NoisePollNetMap").
Msg("PollNetMapHandler called")
log.Trace().
Any("headers", req.Header).
Caller().
Msg("Headers")
body, _ := io.ReadAll(req.Body)
mapRequest := tailcfg.MapRequest{}
if err := json.Unmarshal(body, &mapRequest); err != nil {
log.Error().
Caller().
Err(err).
Msg("Cannot parse MapRequest")
http.Error(writer, "Internal error", http.StatusInternalServerError)
return
}
// Reject unsupported versions
if mapRequest.Version < MinimumCapVersion {
log.Info().
Caller().
Int("min_version", int(MinimumCapVersion)).
Int("client_version", int(mapRequest.Version)).
Msg("unsupported client connected")
http.Error(writer, "Internal error", http.StatusBadRequest)
return
}
ns.nodeKey = mapRequest.NodeKey
node, err := ns.headscale.db.GetNodeByAnyKey(
ns.conn.Peer(),
mapRequest.NodeKey,
key.NodePublic{},
)
if err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
log.Warn().
Str("handler", "NoisePollNetMap").
Uint64("node.id", node.ID.Uint64()).
Msgf("Ignoring request, cannot find node with key %s", mapRequest.NodeKey.String())
http.Error(writer, "Internal error", http.StatusNotFound)
return
}
log.Error().
Str("handler", "NoisePollNetMap").
Uint64("node.id", node.ID.Uint64()).
Msgf("Failed to fetch node from the database with node key: %s", mapRequest.NodeKey.String())
http.Error(writer, "Internal error", http.StatusInternalServerError)
return
}
log.Debug().
Str("handler", "NoisePollNetMap").
Str("node", node.Hostname).
Int("cap_ver", int(mapRequest.Version)).
Uint64("node.id", node.ID.Uint64()).
Msg("A node sending a MapRequest with Noise protocol")
session := ns.headscale.newMapSession(req.Context(), mapRequest, writer, node)
// If a streaming mapSession exists for this node, close it
// and start a new one.
if session.isStreaming() {
log.Debug().
Caller().
Uint64("node.id", node.ID.Uint64()).
Int("cap_ver", int(mapRequest.Version)).
Msg("Aquiring lock to check stream")
ns.headscale.mapSessionMu.Lock()
if oldSession, ok := ns.headscale.mapSessions[node.ID]; ok {
log.Info().
Caller().
Uint64("node.id", node.ID.Uint64()).
Msg("Node has an open streaming session, replacing")
oldSession.close()
}
ns.headscale.mapSessions[node.ID] = session
ns.headscale.mapSessionMu.Unlock()
log.Debug().
Caller().
Uint64("node.id", node.ID.Uint64()).
Int("cap_ver", int(mapRequest.Version)).
Msg("Releasing lock to check stream")
}
session.serve()
if session.isStreaming() {
log.Debug().
Caller().
Uint64("node.id", node.ID.Uint64()).
Int("cap_ver", int(mapRequest.Version)).
Msg("Aquiring lock to remove stream")
ns.headscale.mapSessionMu.Lock()
delete(ns.headscale.mapSessions, node.ID)
ns.headscale.mapSessionMu.Unlock()
log.Debug().
Caller().
Uint64("node.id", node.ID.Uint64()).
Int("cap_ver", int(mapRequest.Version)).
Msg("Releasing lock to remove stream")
}
}