ensure online status and route changes are propagated (#1564)
This commit is contained in:
parent
0153e26392
commit
f65f4eca35
40 changed files with 3170 additions and 857 deletions
|
@ -8,6 +8,7 @@ import (
|
|||
"net/url"
|
||||
"os"
|
||||
"path"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
@ -21,6 +22,7 @@ import (
|
|||
"github.com/klauspost/compress/zstd"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/samber/lo"
|
||||
"golang.org/x/exp/maps"
|
||||
"tailscale.com/envknob"
|
||||
"tailscale.com/smallzstd"
|
||||
"tailscale.com/tailcfg"
|
||||
|
@ -45,6 +47,7 @@ var debugDumpMapResponsePath = envknob.String("HEADSCALE_DEBUG_DUMP_MAPRESPONSE_
|
|||
// - Keep information about the previous mapresponse so we can send a diff
|
||||
// - Store hashes
|
||||
// - Create a "minifier" that removes info not needed for the node
|
||||
// - some sort of batching, wait for 5 or 60 seconds before sending
|
||||
|
||||
type Mapper struct {
|
||||
// Configuration
|
||||
|
@ -61,8 +64,14 @@ type Mapper struct {
|
|||
|
||||
// Map isnt concurrency safe, so we need to ensure
|
||||
// only one func is accessing it over time.
|
||||
mu sync.Mutex
|
||||
peers map[uint64]*types.Node
|
||||
mu sync.Mutex
|
||||
peers map[uint64]*types.Node
|
||||
patches map[uint64][]patch
|
||||
}
|
||||
|
||||
type patch struct {
|
||||
timestamp time.Time
|
||||
change *tailcfg.PeerChange
|
||||
}
|
||||
|
||||
func NewMapper(
|
||||
|
@ -93,7 +102,8 @@ func NewMapper(
|
|||
seq: 0,
|
||||
|
||||
// TODO: populate
|
||||
peers: peers.IDMap(),
|
||||
peers: peers.IDMap(),
|
||||
patches: make(map[uint64][]patch),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -235,6 +245,19 @@ func (m *Mapper) FullMapResponse(
|
|||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
peers := maps.Keys(m.peers)
|
||||
peersWithPatches := maps.Keys(m.patches)
|
||||
slices.Sort(peers)
|
||||
slices.Sort(peersWithPatches)
|
||||
|
||||
if len(peersWithPatches) > 0 {
|
||||
log.Debug().
|
||||
Str("node", node.Hostname).
|
||||
Uints64("peers", peers).
|
||||
Uints64("pending_patches", peersWithPatches).
|
||||
Msgf("node requested full map response, but has pending patches")
|
||||
}
|
||||
|
||||
resp, err := m.fullMapResponse(node, pol, mapRequest.Version)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -272,10 +295,12 @@ func (m *Mapper) KeepAliveResponse(
|
|||
func (m *Mapper) DERPMapResponse(
|
||||
mapRequest tailcfg.MapRequest,
|
||||
node *types.Node,
|
||||
derpMap tailcfg.DERPMap,
|
||||
derpMap *tailcfg.DERPMap,
|
||||
) ([]byte, error) {
|
||||
m.derpMap = derpMap
|
||||
|
||||
resp := m.baseMapResponse()
|
||||
resp.DERPMap = &derpMap
|
||||
resp.DERPMap = derpMap
|
||||
|
||||
return m.marshalMapResponse(mapRequest, &resp, node, mapRequest.Compress)
|
||||
}
|
||||
|
@ -285,18 +310,29 @@ func (m *Mapper) PeerChangedResponse(
|
|||
node *types.Node,
|
||||
changed types.Nodes,
|
||||
pol *policy.ACLPolicy,
|
||||
messages ...string,
|
||||
) ([]byte, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
lastSeen := make(map[tailcfg.NodeID]bool)
|
||||
|
||||
// Update our internal map.
|
||||
for _, node := range changed {
|
||||
m.peers[node.ID] = node
|
||||
if patches, ok := m.patches[node.ID]; ok {
|
||||
// preserve online status in case the patch has an outdated one
|
||||
online := node.IsOnline
|
||||
|
||||
// We have just seen the node, let the peers update their list.
|
||||
lastSeen[tailcfg.NodeID(node.ID)] = true
|
||||
for _, p := range patches {
|
||||
// TODO(kradalby): Figure if this needs to be sorted by timestamp
|
||||
node.ApplyPeerChange(p.change)
|
||||
}
|
||||
|
||||
// Ensure the patches are not applied again later
|
||||
delete(m.patches, node.ID)
|
||||
|
||||
node.IsOnline = online
|
||||
}
|
||||
|
||||
m.peers[node.ID] = node
|
||||
}
|
||||
|
||||
resp := m.baseMapResponse()
|
||||
|
@ -316,11 +352,55 @@ func (m *Mapper) PeerChangedResponse(
|
|||
return nil, err
|
||||
}
|
||||
|
||||
// resp.PeerSeenChange = lastSeen
|
||||
return m.marshalMapResponse(mapRequest, &resp, node, mapRequest.Compress, messages...)
|
||||
}
|
||||
|
||||
// PeerChangedPatchResponse creates a patch MapResponse with
|
||||
// incoming update from a state change.
|
||||
func (m *Mapper) PeerChangedPatchResponse(
|
||||
mapRequest tailcfg.MapRequest,
|
||||
node *types.Node,
|
||||
changed []*tailcfg.PeerChange,
|
||||
pol *policy.ACLPolicy,
|
||||
) ([]byte, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
sendUpdate := false
|
||||
// patch the internal map
|
||||
for _, change := range changed {
|
||||
if peer, ok := m.peers[uint64(change.NodeID)]; ok {
|
||||
peer.ApplyPeerChange(change)
|
||||
sendUpdate = true
|
||||
} else {
|
||||
log.Trace().Str("node", node.Hostname).Msgf("Node with ID %s is missing from mapper for Node %s, saving patch for when node is available", change.NodeID, node.Hostname)
|
||||
|
||||
p := patch{
|
||||
timestamp: time.Now(),
|
||||
change: change,
|
||||
}
|
||||
|
||||
if patches, ok := m.patches[uint64(change.NodeID)]; ok {
|
||||
patches := append(patches, p)
|
||||
|
||||
m.patches[uint64(change.NodeID)] = patches
|
||||
} else {
|
||||
m.patches[uint64(change.NodeID)] = []patch{p}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !sendUpdate {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
resp := m.baseMapResponse()
|
||||
resp.PeersChangedPatch = changed
|
||||
|
||||
return m.marshalMapResponse(mapRequest, &resp, node, mapRequest.Compress)
|
||||
}
|
||||
|
||||
// TODO(kradalby): We need some integration tests for this.
|
||||
func (m *Mapper) PeerRemovedResponse(
|
||||
mapRequest tailcfg.MapRequest,
|
||||
node *types.Node,
|
||||
|
@ -329,13 +409,23 @@ func (m *Mapper) PeerRemovedResponse(
|
|||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
// Some nodes might have been removed already
|
||||
// so we dont want to ask downstream to remove
|
||||
// twice, than can cause a panic in tailscaled.
|
||||
notYetRemoved := []tailcfg.NodeID{}
|
||||
|
||||
// remove from our internal map
|
||||
for _, id := range removed {
|
||||
if _, ok := m.peers[uint64(id)]; ok {
|
||||
notYetRemoved = append(notYetRemoved, id)
|
||||
}
|
||||
|
||||
delete(m.peers, uint64(id))
|
||||
delete(m.patches, uint64(id))
|
||||
}
|
||||
|
||||
resp := m.baseMapResponse()
|
||||
resp.PeersRemoved = removed
|
||||
resp.PeersRemoved = notYetRemoved
|
||||
|
||||
return m.marshalMapResponse(mapRequest, &resp, node, mapRequest.Compress)
|
||||
}
|
||||
|
@ -345,6 +435,7 @@ func (m *Mapper) marshalMapResponse(
|
|||
resp *tailcfg.MapResponse,
|
||||
node *types.Node,
|
||||
compression string,
|
||||
messages ...string,
|
||||
) ([]byte, error) {
|
||||
atomic.AddUint64(&m.seq, 1)
|
||||
|
||||
|
@ -358,11 +449,25 @@ func (m *Mapper) marshalMapResponse(
|
|||
|
||||
if debugDumpMapResponsePath != "" {
|
||||
data := map[string]interface{}{
|
||||
"Messages": messages,
|
||||
"MapRequest": mapRequest,
|
||||
"MapResponse": resp,
|
||||
}
|
||||
|
||||
body, err := json.Marshal(data)
|
||||
responseType := "keepalive"
|
||||
|
||||
switch {
|
||||
case resp.Peers != nil && len(resp.Peers) > 0:
|
||||
responseType = "full"
|
||||
case resp.PeersChanged != nil && len(resp.PeersChanged) > 0:
|
||||
responseType = "changed"
|
||||
case resp.PeersChangedPatch != nil && len(resp.PeersChangedPatch) > 0:
|
||||
responseType = "patch"
|
||||
case resp.PeersRemoved != nil && len(resp.PeersRemoved) > 0:
|
||||
responseType = "removed"
|
||||
}
|
||||
|
||||
body, err := json.MarshalIndent(data, "", " ")
|
||||
if err != nil {
|
||||
log.Error().
|
||||
Caller().
|
||||
|
@ -381,7 +486,7 @@ func (m *Mapper) marshalMapResponse(
|
|||
|
||||
mapResponsePath := path.Join(
|
||||
mPath,
|
||||
fmt.Sprintf("%d-%s-%d.json", now, m.uid, atomic.LoadUint64(&m.seq)),
|
||||
fmt.Sprintf("%d-%s-%d-%s.json", now, m.uid, atomic.LoadUint64(&m.seq), responseType),
|
||||
)
|
||||
|
||||
log.Trace().Msgf("Writing MapResponse to %s", mapResponsePath)
|
||||
|
@ -438,6 +543,7 @@ func (m *Mapper) baseMapResponse() tailcfg.MapResponse {
|
|||
resp := tailcfg.MapResponse{
|
||||
KeepAlive: false,
|
||||
ControlTime: &now,
|
||||
// TODO(kradalby): Implement PingRequest?
|
||||
}
|
||||
|
||||
return resp
|
||||
|
@ -559,8 +665,5 @@ func appendPeerChanges(
|
|||
resp.UserProfiles = profiles
|
||||
resp.SSHPolicy = sshPolicy
|
||||
|
||||
// TODO(kradalby): This currently does not take last seen in keepalives into account
|
||||
resp.OnlineChange = peers.OnlineNodeMap()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -237,7 +237,6 @@ func Test_fullMapResponse(t *testing.T) {
|
|||
Tags: []string{},
|
||||
PrimaryRoutes: []netip.Prefix{netip.MustParsePrefix("192.168.0.0/24")},
|
||||
LastSeen: &lastSeen,
|
||||
Online: new(bool),
|
||||
MachineAuthorized: true,
|
||||
Capabilities: []tailcfg.NodeCapability{
|
||||
tailcfg.CapabilityFileSharing,
|
||||
|
@ -293,7 +292,6 @@ func Test_fullMapResponse(t *testing.T) {
|
|||
Tags: []string{},
|
||||
PrimaryRoutes: []netip.Prefix{},
|
||||
LastSeen: &lastSeen,
|
||||
Online: new(bool),
|
||||
MachineAuthorized: true,
|
||||
Capabilities: []tailcfg.NodeCapability{
|
||||
tailcfg.CapabilityFileSharing,
|
||||
|
@ -400,7 +398,6 @@ func Test_fullMapResponse(t *testing.T) {
|
|||
DNSConfig: &tailcfg.DNSConfig{},
|
||||
Domain: "",
|
||||
CollectServices: "false",
|
||||
OnlineChange: map[tailcfg.NodeID]bool{tailPeer1.ID: false},
|
||||
PacketFilter: []tailcfg.FilterRule{},
|
||||
UserProfiles: []tailcfg.UserProfile{{LoginName: "mini", DisplayName: "mini"}},
|
||||
SSHPolicy: &tailcfg.SSHPolicy{Rules: []*tailcfg.SSHRule{}},
|
||||
|
@ -442,10 +439,6 @@ func Test_fullMapResponse(t *testing.T) {
|
|||
DNSConfig: &tailcfg.DNSConfig{},
|
||||
Domain: "",
|
||||
CollectServices: "false",
|
||||
OnlineChange: map[tailcfg.NodeID]bool{
|
||||
tailPeer1.ID: false,
|
||||
tailcfg.NodeID(peer2.ID): false,
|
||||
},
|
||||
PacketFilter: []tailcfg.FilterRule{
|
||||
{
|
||||
SrcIPs: []string{"100.64.0.2/32"},
|
||||
|
|
|
@ -87,11 +87,9 @@ func tailNode(
|
|||
|
||||
hostname, err := node.GetFQDN(dnsConfig, baseDomain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("tailNode, failed to create FQDN: %s", err)
|
||||
}
|
||||
|
||||
online := node.IsOnline()
|
||||
|
||||
tags, _ := pol.TagsOfNode(node)
|
||||
tags = lo.Uniq(append(tags, node.ForcedTags...))
|
||||
|
||||
|
@ -101,6 +99,7 @@ func tailNode(
|
|||
strconv.FormatUint(node.ID, util.Base10),
|
||||
), // in headscale, unlike tailcontrol server, IDs are permanent
|
||||
Name: hostname,
|
||||
Cap: capVer,
|
||||
|
||||
User: tailcfg.UserID(node.UserID),
|
||||
|
||||
|
@ -116,13 +115,14 @@ func tailNode(
|
|||
Hostinfo: node.Hostinfo.View(),
|
||||
Created: node.CreatedAt,
|
||||
|
||||
Online: node.IsOnline,
|
||||
|
||||
Tags: tags,
|
||||
|
||||
PrimaryRoutes: primaryPrefixes,
|
||||
|
||||
LastSeen: node.LastSeen,
|
||||
Online: &online,
|
||||
MachineAuthorized: !node.IsExpired(),
|
||||
Expired: node.IsExpired(),
|
||||
}
|
||||
|
||||
// - 74: 2023-09-18: Client understands NodeCapMap
|
||||
|
@ -153,5 +153,11 @@ func tailNode(
|
|||
tNode.Capabilities = append(tNode.Capabilities, tailcfg.NodeAttrDisableUPnP)
|
||||
}
|
||||
|
||||
if node.IsOnline == nil || !*node.IsOnline {
|
||||
// LastSeen is only set when node is
|
||||
// not connected to the control server.
|
||||
tNode.LastSeen = node.LastSeen
|
||||
}
|
||||
|
||||
return &tNode, nil
|
||||
}
|
||||
|
|
|
@ -68,7 +68,6 @@ func TestTailNode(t *testing.T) {
|
|||
Hostinfo: hiview(tailcfg.Hostinfo{}),
|
||||
Tags: []string{},
|
||||
PrimaryRoutes: []netip.Prefix{},
|
||||
Online: new(bool),
|
||||
MachineAuthorized: true,
|
||||
Capabilities: []tailcfg.NodeCapability{
|
||||
"https://tailscale.com/cap/file-sharing", "https://tailscale.com/cap/is-admin",
|
||||
|
@ -165,7 +164,6 @@ func TestTailNode(t *testing.T) {
|
|||
},
|
||||
|
||||
LastSeen: &lastSeen,
|
||||
Online: new(bool),
|
||||
MachineAuthorized: true,
|
||||
|
||||
Capabilities: []tailcfg.NodeCapability{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue