ensure online status and route changes are propagated (#1564)

This commit is contained in:
Kristoffer Dalby 2023-12-09 18:09:24 +01:00 committed by GitHub
parent 0153e26392
commit f65f4eca35
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
40 changed files with 3170 additions and 857 deletions

View file

@ -8,6 +8,7 @@ import (
"net/url"
"os"
"path"
"slices"
"sort"
"strings"
"sync"
@ -21,6 +22,7 @@ import (
"github.com/klauspost/compress/zstd"
"github.com/rs/zerolog/log"
"github.com/samber/lo"
"golang.org/x/exp/maps"
"tailscale.com/envknob"
"tailscale.com/smallzstd"
"tailscale.com/tailcfg"
@ -45,6 +47,7 @@ var debugDumpMapResponsePath = envknob.String("HEADSCALE_DEBUG_DUMP_MAPRESPONSE_
// - Keep information about the previous mapresponse so we can send a diff
// - Store hashes
// - Create a "minifier" that removes info not needed for the node
// - some sort of batching, wait for 5 or 60 seconds before sending
type Mapper struct {
// Configuration
@ -61,8 +64,14 @@ type Mapper struct {
// Map isnt concurrency safe, so we need to ensure
// only one func is accessing it over time.
mu sync.Mutex
peers map[uint64]*types.Node
mu sync.Mutex
peers map[uint64]*types.Node
patches map[uint64][]patch
}
type patch struct {
timestamp time.Time
change *tailcfg.PeerChange
}
func NewMapper(
@ -93,7 +102,8 @@ func NewMapper(
seq: 0,
// TODO: populate
peers: peers.IDMap(),
peers: peers.IDMap(),
patches: make(map[uint64][]patch),
}
}
@ -235,6 +245,19 @@ func (m *Mapper) FullMapResponse(
m.mu.Lock()
defer m.mu.Unlock()
peers := maps.Keys(m.peers)
peersWithPatches := maps.Keys(m.patches)
slices.Sort(peers)
slices.Sort(peersWithPatches)
if len(peersWithPatches) > 0 {
log.Debug().
Str("node", node.Hostname).
Uints64("peers", peers).
Uints64("pending_patches", peersWithPatches).
Msgf("node requested full map response, but has pending patches")
}
resp, err := m.fullMapResponse(node, pol, mapRequest.Version)
if err != nil {
return nil, err
@ -272,10 +295,12 @@ func (m *Mapper) KeepAliveResponse(
func (m *Mapper) DERPMapResponse(
mapRequest tailcfg.MapRequest,
node *types.Node,
derpMap tailcfg.DERPMap,
derpMap *tailcfg.DERPMap,
) ([]byte, error) {
m.derpMap = derpMap
resp := m.baseMapResponse()
resp.DERPMap = &derpMap
resp.DERPMap = derpMap
return m.marshalMapResponse(mapRequest, &resp, node, mapRequest.Compress)
}
@ -285,18 +310,29 @@ func (m *Mapper) PeerChangedResponse(
node *types.Node,
changed types.Nodes,
pol *policy.ACLPolicy,
messages ...string,
) ([]byte, error) {
m.mu.Lock()
defer m.mu.Unlock()
lastSeen := make(map[tailcfg.NodeID]bool)
// Update our internal map.
for _, node := range changed {
m.peers[node.ID] = node
if patches, ok := m.patches[node.ID]; ok {
// preserve online status in case the patch has an outdated one
online := node.IsOnline
// We have just seen the node, let the peers update their list.
lastSeen[tailcfg.NodeID(node.ID)] = true
for _, p := range patches {
// TODO(kradalby): Figure if this needs to be sorted by timestamp
node.ApplyPeerChange(p.change)
}
// Ensure the patches are not applied again later
delete(m.patches, node.ID)
node.IsOnline = online
}
m.peers[node.ID] = node
}
resp := m.baseMapResponse()
@ -316,11 +352,55 @@ func (m *Mapper) PeerChangedResponse(
return nil, err
}
// resp.PeerSeenChange = lastSeen
return m.marshalMapResponse(mapRequest, &resp, node, mapRequest.Compress, messages...)
}
// PeerChangedPatchResponse creates a patch MapResponse with
// incoming update from a state change.
func (m *Mapper) PeerChangedPatchResponse(
mapRequest tailcfg.MapRequest,
node *types.Node,
changed []*tailcfg.PeerChange,
pol *policy.ACLPolicy,
) ([]byte, error) {
m.mu.Lock()
defer m.mu.Unlock()
sendUpdate := false
// patch the internal map
for _, change := range changed {
if peer, ok := m.peers[uint64(change.NodeID)]; ok {
peer.ApplyPeerChange(change)
sendUpdate = true
} else {
log.Trace().Str("node", node.Hostname).Msgf("Node with ID %s is missing from mapper for Node %s, saving patch for when node is available", change.NodeID, node.Hostname)
p := patch{
timestamp: time.Now(),
change: change,
}
if patches, ok := m.patches[uint64(change.NodeID)]; ok {
patches := append(patches, p)
m.patches[uint64(change.NodeID)] = patches
} else {
m.patches[uint64(change.NodeID)] = []patch{p}
}
}
}
if !sendUpdate {
return nil, nil
}
resp := m.baseMapResponse()
resp.PeersChangedPatch = changed
return m.marshalMapResponse(mapRequest, &resp, node, mapRequest.Compress)
}
// TODO(kradalby): We need some integration tests for this.
func (m *Mapper) PeerRemovedResponse(
mapRequest tailcfg.MapRequest,
node *types.Node,
@ -329,13 +409,23 @@ func (m *Mapper) PeerRemovedResponse(
m.mu.Lock()
defer m.mu.Unlock()
// Some nodes might have been removed already
// so we dont want to ask downstream to remove
// twice, than can cause a panic in tailscaled.
notYetRemoved := []tailcfg.NodeID{}
// remove from our internal map
for _, id := range removed {
if _, ok := m.peers[uint64(id)]; ok {
notYetRemoved = append(notYetRemoved, id)
}
delete(m.peers, uint64(id))
delete(m.patches, uint64(id))
}
resp := m.baseMapResponse()
resp.PeersRemoved = removed
resp.PeersRemoved = notYetRemoved
return m.marshalMapResponse(mapRequest, &resp, node, mapRequest.Compress)
}
@ -345,6 +435,7 @@ func (m *Mapper) marshalMapResponse(
resp *tailcfg.MapResponse,
node *types.Node,
compression string,
messages ...string,
) ([]byte, error) {
atomic.AddUint64(&m.seq, 1)
@ -358,11 +449,25 @@ func (m *Mapper) marshalMapResponse(
if debugDumpMapResponsePath != "" {
data := map[string]interface{}{
"Messages": messages,
"MapRequest": mapRequest,
"MapResponse": resp,
}
body, err := json.Marshal(data)
responseType := "keepalive"
switch {
case resp.Peers != nil && len(resp.Peers) > 0:
responseType = "full"
case resp.PeersChanged != nil && len(resp.PeersChanged) > 0:
responseType = "changed"
case resp.PeersChangedPatch != nil && len(resp.PeersChangedPatch) > 0:
responseType = "patch"
case resp.PeersRemoved != nil && len(resp.PeersRemoved) > 0:
responseType = "removed"
}
body, err := json.MarshalIndent(data, "", " ")
if err != nil {
log.Error().
Caller().
@ -381,7 +486,7 @@ func (m *Mapper) marshalMapResponse(
mapResponsePath := path.Join(
mPath,
fmt.Sprintf("%d-%s-%d.json", now, m.uid, atomic.LoadUint64(&m.seq)),
fmt.Sprintf("%d-%s-%d-%s.json", now, m.uid, atomic.LoadUint64(&m.seq), responseType),
)
log.Trace().Msgf("Writing MapResponse to %s", mapResponsePath)
@ -438,6 +543,7 @@ func (m *Mapper) baseMapResponse() tailcfg.MapResponse {
resp := tailcfg.MapResponse{
KeepAlive: false,
ControlTime: &now,
// TODO(kradalby): Implement PingRequest?
}
return resp
@ -559,8 +665,5 @@ func appendPeerChanges(
resp.UserProfiles = profiles
resp.SSHPolicy = sshPolicy
// TODO(kradalby): This currently does not take last seen in keepalives into account
resp.OnlineChange = peers.OnlineNodeMap()
return nil
}

View file

@ -237,7 +237,6 @@ func Test_fullMapResponse(t *testing.T) {
Tags: []string{},
PrimaryRoutes: []netip.Prefix{netip.MustParsePrefix("192.168.0.0/24")},
LastSeen: &lastSeen,
Online: new(bool),
MachineAuthorized: true,
Capabilities: []tailcfg.NodeCapability{
tailcfg.CapabilityFileSharing,
@ -293,7 +292,6 @@ func Test_fullMapResponse(t *testing.T) {
Tags: []string{},
PrimaryRoutes: []netip.Prefix{},
LastSeen: &lastSeen,
Online: new(bool),
MachineAuthorized: true,
Capabilities: []tailcfg.NodeCapability{
tailcfg.CapabilityFileSharing,
@ -400,7 +398,6 @@ func Test_fullMapResponse(t *testing.T) {
DNSConfig: &tailcfg.DNSConfig{},
Domain: "",
CollectServices: "false",
OnlineChange: map[tailcfg.NodeID]bool{tailPeer1.ID: false},
PacketFilter: []tailcfg.FilterRule{},
UserProfiles: []tailcfg.UserProfile{{LoginName: "mini", DisplayName: "mini"}},
SSHPolicy: &tailcfg.SSHPolicy{Rules: []*tailcfg.SSHRule{}},
@ -442,10 +439,6 @@ func Test_fullMapResponse(t *testing.T) {
DNSConfig: &tailcfg.DNSConfig{},
Domain: "",
CollectServices: "false",
OnlineChange: map[tailcfg.NodeID]bool{
tailPeer1.ID: false,
tailcfg.NodeID(peer2.ID): false,
},
PacketFilter: []tailcfg.FilterRule{
{
SrcIPs: []string{"100.64.0.2/32"},

View file

@ -87,11 +87,9 @@ func tailNode(
hostname, err := node.GetFQDN(dnsConfig, baseDomain)
if err != nil {
return nil, err
return nil, fmt.Errorf("tailNode, failed to create FQDN: %s", err)
}
online := node.IsOnline()
tags, _ := pol.TagsOfNode(node)
tags = lo.Uniq(append(tags, node.ForcedTags...))
@ -101,6 +99,7 @@ func tailNode(
strconv.FormatUint(node.ID, util.Base10),
), // in headscale, unlike tailcontrol server, IDs are permanent
Name: hostname,
Cap: capVer,
User: tailcfg.UserID(node.UserID),
@ -116,13 +115,14 @@ func tailNode(
Hostinfo: node.Hostinfo.View(),
Created: node.CreatedAt,
Online: node.IsOnline,
Tags: tags,
PrimaryRoutes: primaryPrefixes,
LastSeen: node.LastSeen,
Online: &online,
MachineAuthorized: !node.IsExpired(),
Expired: node.IsExpired(),
}
// - 74: 2023-09-18: Client understands NodeCapMap
@ -153,5 +153,11 @@ func tailNode(
tNode.Capabilities = append(tNode.Capabilities, tailcfg.NodeAttrDisableUPnP)
}
if node.IsOnline == nil || !*node.IsOnline {
// LastSeen is only set when node is
// not connected to the control server.
tNode.LastSeen = node.LastSeen
}
return &tNode, nil
}

View file

@ -68,7 +68,6 @@ func TestTailNode(t *testing.T) {
Hostinfo: hiview(tailcfg.Hostinfo{}),
Tags: []string{},
PrimaryRoutes: []netip.Prefix{},
Online: new(bool),
MachineAuthorized: true,
Capabilities: []tailcfg.NodeCapability{
"https://tailscale.com/cap/file-sharing", "https://tailscale.com/cap/is-admin",
@ -165,7 +164,6 @@ func TestTailNode(t *testing.T) {
},
LastSeen: &lastSeen,
Online: new(bool),
MachineAuthorized: true,
Capabilities: []tailcfg.NodeCapability{