Rework map session

This commit restructures the map session in to a struct
holding the state of what is needed during its lifetime.

For streaming sessions, the event loop is structured a
bit differently not hammering the clients with updates
but rather batching them over a short, configurable time
which should significantly improve cpu usage, and potentially
flakyness.

The use of Patch updates has been dialed back a little as
it does not look like its a 100% ready for prime time. Nodes
are now updated with full changes, except for a few things
like online status.

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
Kristoffer Dalby 2024-02-23 10:59:24 +01:00 committed by Juan Font
parent dd693c444c
commit 58c94d2bd3
35 changed files with 1803 additions and 1716 deletions

View file

@ -34,27 +34,22 @@ var (
)
)
func (hsdb *HSDatabase) ListPeers(node *types.Node) (types.Nodes, error) {
func (hsdb *HSDatabase) ListPeers(nodeID types.NodeID) (types.Nodes, error) {
return Read(hsdb.DB, func(rx *gorm.DB) (types.Nodes, error) {
return ListPeers(rx, node)
return ListPeers(rx, nodeID)
})
}
// ListPeers returns all peers of node, regardless of any Policy or if the node is expired.
func ListPeers(tx *gorm.DB, node *types.Node) (types.Nodes, error) {
log.Trace().
Caller().
Str("node", node.Hostname).
Msg("Finding direct peers")
func ListPeers(tx *gorm.DB, nodeID types.NodeID) (types.Nodes, error) {
nodes := types.Nodes{}
if err := tx.
Preload("AuthKey").
Preload("AuthKey.User").
Preload("User").
Preload("Routes").
Where("node_key <> ?",
node.NodeKey.String()).Find(&nodes).Error; err != nil {
Where("id <> ?",
nodeID).Find(&nodes).Error; err != nil {
return types.Nodes{}, err
}
@ -119,14 +114,14 @@ func getNode(tx *gorm.DB, user string, name string) (*types.Node, error) {
return nil, ErrNodeNotFound
}
func (hsdb *HSDatabase) GetNodeByID(id uint64) (*types.Node, error) {
func (hsdb *HSDatabase) GetNodeByID(id types.NodeID) (*types.Node, error) {
return Read(hsdb.DB, func(rx *gorm.DB) (*types.Node, error) {
return GetNodeByID(rx, id)
})
}
// GetNodeByID finds a Node by ID and returns the Node struct.
func GetNodeByID(tx *gorm.DB, id uint64) (*types.Node, error) {
func GetNodeByID(tx *gorm.DB, id types.NodeID) (*types.Node, error) {
mach := types.Node{}
if result := tx.
Preload("AuthKey").
@ -197,7 +192,7 @@ func GetNodeByAnyKey(
}
func (hsdb *HSDatabase) SetTags(
nodeID uint64,
nodeID types.NodeID,
tags []string,
) error {
return hsdb.Write(func(tx *gorm.DB) error {
@ -208,7 +203,7 @@ func (hsdb *HSDatabase) SetTags(
// SetTags takes a Node struct pointer and update the forced tags.
func SetTags(
tx *gorm.DB,
nodeID uint64,
nodeID types.NodeID,
tags []string,
) error {
if len(tags) == 0 {
@ -256,7 +251,7 @@ func RenameNode(tx *gorm.DB,
return nil
}
func (hsdb *HSDatabase) NodeSetExpiry(nodeID uint64, expiry time.Time) error {
func (hsdb *HSDatabase) NodeSetExpiry(nodeID types.NodeID, expiry time.Time) error {
return hsdb.Write(func(tx *gorm.DB) error {
return NodeSetExpiry(tx, nodeID, expiry)
})
@ -264,13 +259,13 @@ func (hsdb *HSDatabase) NodeSetExpiry(nodeID uint64, expiry time.Time) error {
// NodeSetExpiry takes a Node struct and a new expiry time.
func NodeSetExpiry(tx *gorm.DB,
nodeID uint64, expiry time.Time,
nodeID types.NodeID, expiry time.Time,
) error {
return tx.Model(&types.Node{}).Where("id = ?", nodeID).Update("expiry", expiry).Error
}
func (hsdb *HSDatabase) DeleteNode(node *types.Node, isConnected map[key.MachinePublic]bool) error {
return hsdb.Write(func(tx *gorm.DB) error {
func (hsdb *HSDatabase) DeleteNode(node *types.Node, isConnected types.NodeConnectedMap) ([]types.NodeID, error) {
return Write(hsdb.DB, func(tx *gorm.DB) ([]types.NodeID, error) {
return DeleteNode(tx, node, isConnected)
})
}
@ -279,24 +274,24 @@ func (hsdb *HSDatabase) DeleteNode(node *types.Node, isConnected map[key.Machine
// Caller is responsible for notifying all of change.
func DeleteNode(tx *gorm.DB,
node *types.Node,
isConnected map[key.MachinePublic]bool,
) error {
err := deleteNodeRoutes(tx, node, map[key.MachinePublic]bool{})
isConnected types.NodeConnectedMap,
) ([]types.NodeID, error) {
changed, err := deleteNodeRoutes(tx, node, isConnected)
if err != nil {
return err
return changed, err
}
// Unscoped causes the node to be fully removed from the database.
if err := tx.Unscoped().Delete(&node).Error; err != nil {
return err
return changed, err
}
return nil
return changed, nil
}
// UpdateLastSeen sets a node's last seen field indicating that we
// SetLastSeen sets a node's last seen field indicating that we
// have recently communicating with this node.
func UpdateLastSeen(tx *gorm.DB, nodeID uint64, lastSeen time.Time) error {
func SetLastSeen(tx *gorm.DB, nodeID types.NodeID, lastSeen time.Time) error {
return tx.Model(&types.Node{}).Where("id = ?", nodeID).Update("last_seen", lastSeen).Error
}
@ -606,7 +601,7 @@ func enableRoutes(tx *gorm.DB,
return &types.StateUpdate{
Type: types.StatePeerChanged,
ChangeNodes: types.Nodes{node},
ChangeNodes: []types.NodeID{node.ID},
Message: "created in db.enableRoutes",
}, nil
}
@ -681,17 +676,18 @@ func GenerateGivenName(
return givenName, nil
}
func ExpireEphemeralNodes(tx *gorm.DB,
func DeleteExpiredEphemeralNodes(tx *gorm.DB,
inactivityThreshhold time.Duration,
) (types.StateUpdate, bool) {
) ([]types.NodeID, []types.NodeID) {
users, err := ListUsers(tx)
if err != nil {
log.Error().Err(err).Msg("Error listing users")
return types.StateUpdate{}, false
return nil, nil
}
expired := make([]tailcfg.NodeID, 0)
var expired []types.NodeID
var changedNodes []types.NodeID
for _, user := range users {
nodes, err := ListNodesByUser(tx, user.Name)
if err != nil {
@ -700,40 +696,36 @@ func ExpireEphemeralNodes(tx *gorm.DB,
Str("user", user.Name).
Msg("Error listing nodes in user")
return types.StateUpdate{}, false
return nil, nil
}
for idx, node := range nodes {
if node.IsEphemeral() && node.LastSeen != nil &&
time.Now().
After(node.LastSeen.Add(inactivityThreshhold)) {
expired = append(expired, tailcfg.NodeID(node.ID))
expired = append(expired, node.ID)
log.Info().
Str("node", node.Hostname).
Msg("Ephemeral client removed from database")
// empty isConnected map as ephemeral nodes are not routes
err = DeleteNode(tx, nodes[idx], map[key.MachinePublic]bool{})
changed, err := DeleteNode(tx, nodes[idx], nil)
if err != nil {
log.Error().
Err(err).
Str("node", node.Hostname).
Msg("🤮 Cannot delete ephemeral node from the database")
}
changedNodes = append(changedNodes, changed...)
}
}
// TODO(kradalby): needs to be moved out of transaction
}
if len(expired) > 0 {
return types.StateUpdate{
Type: types.StatePeerRemoved,
Removed: expired,
}, true
}
return types.StateUpdate{}, false
return expired, changedNodes
}
func ExpireExpiredNodes(tx *gorm.DB,
@ -754,35 +746,12 @@ func ExpireExpiredNodes(tx *gorm.DB,
return time.Unix(0, 0), types.StateUpdate{}, false
}
for index, node := range nodes {
if node.IsExpired() &&
// TODO(kradalby): Replace this, it is very spammy
// It will notify about all nodes that has been expired.
// It should only notify about expired nodes since _last check_.
node.Expiry.After(lastCheck) {
for _, node := range nodes {
if node.IsExpired() && node.Expiry.After(lastCheck) {
expired = append(expired, &tailcfg.PeerChange{
NodeID: tailcfg.NodeID(node.ID),
KeyExpiry: node.Expiry,
})
now := time.Now()
// Do not use setNodeExpiry as that has a notifier hook, which
// can cause a deadlock, we are updating all changed nodes later
// and there is no point in notifiying twice.
if err := tx.Model(&nodes[index]).Updates(types.Node{
Expiry: &now,
}).Error; err != nil {
log.Error().
Err(err).
Str("node", node.Hostname).
Str("name", node.GivenName).
Msg("🤮 Cannot expire node")
} else {
log.Info().
Str("node", node.Hostname).
Str("name", node.GivenName).
Msg("Node successfully expired")
}
}
}

View file

@ -120,7 +120,7 @@ func (s *Suite) TestHardDeleteNode(c *check.C) {
}
db.DB.Save(&node)
err = db.DeleteNode(&node, map[key.MachinePublic]bool{})
_, err = db.DeleteNode(&node, types.NodeConnectedMap{})
c.Assert(err, check.IsNil)
_, err = db.getNode(user.Name, "testnode3")
@ -142,7 +142,7 @@ func (s *Suite) TestListPeers(c *check.C) {
machineKey := key.NewMachine()
node := types.Node{
ID: uint64(index),
ID: types.NodeID(index),
MachineKey: machineKey.Public(),
NodeKey: nodeKey.Public(),
Hostname: "testnode" + strconv.Itoa(index),
@ -156,7 +156,7 @@ func (s *Suite) TestListPeers(c *check.C) {
node0ByID, err := db.GetNodeByID(0)
c.Assert(err, check.IsNil)
peersOfNode0, err := db.ListPeers(node0ByID)
peersOfNode0, err := db.ListPeers(node0ByID.ID)
c.Assert(err, check.IsNil)
c.Assert(len(peersOfNode0), check.Equals, 9)
@ -189,7 +189,7 @@ func (s *Suite) TestGetACLFilteredPeers(c *check.C) {
machineKey := key.NewMachine()
node := types.Node{
ID: uint64(index),
ID: types.NodeID(index),
MachineKey: machineKey.Public(),
NodeKey: nodeKey.Public(),
IPAddresses: types.NodeAddresses{
@ -232,16 +232,16 @@ func (s *Suite) TestGetACLFilteredPeers(c *check.C) {
c.Logf("Node(%v), user: %v", testNode.Hostname, testNode.User)
c.Assert(err, check.IsNil)
adminPeers, err := db.ListPeers(adminNode)
adminPeers, err := db.ListPeers(adminNode.ID)
c.Assert(err, check.IsNil)
testPeers, err := db.ListPeers(testNode)
testPeers, err := db.ListPeers(testNode.ID)
c.Assert(err, check.IsNil)
adminRules, _, err := policy.GenerateFilterAndSSHRules(aclPolicy, adminNode, adminPeers)
adminRules, _, err := policy.GenerateFilterAndSSHRulesForTests(aclPolicy, adminNode, adminPeers)
c.Assert(err, check.IsNil)
testRules, _, err := policy.GenerateFilterAndSSHRules(aclPolicy, testNode, testPeers)
testRules, _, err := policy.GenerateFilterAndSSHRulesForTests(aclPolicy, testNode, testPeers)
c.Assert(err, check.IsNil)
peersOfAdminNode := policy.FilterNodesByACL(adminNode, adminPeers, adminRules)
@ -586,7 +586,7 @@ func (s *Suite) TestAutoApproveRoutes(c *check.C) {
c.Assert(err, check.IsNil)
// TODO(kradalby): Check state update
_, err = db.EnableAutoApprovedRoutes(pol, node0ByID)
err = db.EnableAutoApprovedRoutes(pol, node0ByID)
c.Assert(err, check.IsNil)
enabledRoutes, err := db.GetEnabledRoutes(node0ByID)

View file

@ -92,10 +92,6 @@ func CreatePreAuthKey(
}
}
if err != nil {
return nil, err
}
return &key, nil
}

View file

@ -148,7 +148,7 @@ func (*Suite) TestEphemeralKeyReusable(c *check.C) {
c.Assert(err, check.IsNil)
db.DB.Transaction(func(tx *gorm.DB) error {
ExpireEphemeralNodes(tx, time.Second*20)
DeleteExpiredEphemeralNodes(tx, time.Second*20)
return nil
})
@ -182,7 +182,7 @@ func (*Suite) TestEphemeralKeyNotReusable(c *check.C) {
c.Assert(err, check.IsNil)
db.DB.Transaction(func(tx *gorm.DB) error {
ExpireEphemeralNodes(tx, time.Second*20)
DeleteExpiredEphemeralNodes(tx, time.Second*20)
return nil
})

View file

@ -8,7 +8,6 @@ import (
"github.com/juanfont/headscale/hscontrol/types"
"github.com/rs/zerolog/log"
"gorm.io/gorm"
"tailscale.com/types/key"
)
var ErrRouteIsNotAvailable = errors.New("route is not available")
@ -124,8 +123,8 @@ func EnableRoute(tx *gorm.DB, id uint64) (*types.StateUpdate, error) {
func DisableRoute(tx *gorm.DB,
id uint64,
isConnected map[key.MachinePublic]bool,
) (*types.StateUpdate, error) {
isConnected types.NodeConnectedMap,
) ([]types.NodeID, error) {
route, err := GetRoute(tx, id)
if err != nil {
return nil, err
@ -137,16 +136,15 @@ func DisableRoute(tx *gorm.DB,
// Tailscale requires both IPv4 and IPv6 exit routes to
// be enabled at the same time, as per
// https://github.com/juanfont/headscale/issues/804#issuecomment-1399314002
var update *types.StateUpdate
var update []types.NodeID
if !route.IsExitRoute() {
update, err = failoverRouteReturnUpdate(tx, isConnected, route)
route.Enabled = false
err = tx.Save(route).Error
if err != nil {
return nil, err
}
route.Enabled = false
route.IsPrimary = false
err = tx.Save(route).Error
update, err = failoverRouteTx(tx, isConnected, route)
if err != nil {
return nil, err
}
@ -160,6 +158,7 @@ func DisableRoute(tx *gorm.DB,
if routes[i].IsExitRoute() {
routes[i].Enabled = false
routes[i].IsPrimary = false
err = tx.Save(&routes[i]).Error
if err != nil {
return nil, err
@ -168,26 +167,11 @@ func DisableRoute(tx *gorm.DB,
}
}
if routes == nil {
routes, err = GetNodeRoutes(tx, &node)
if err != nil {
return nil, err
}
}
node.Routes = routes
// If update is empty, it means that one was not created
// by failover (as a failover was not necessary), create
// one and return to the caller.
if update == nil {
update = &types.StateUpdate{
Type: types.StatePeerChanged,
ChangeNodes: types.Nodes{
&node,
},
Message: "called from db.DisableRoute",
}
update = []types.NodeID{node.ID}
}
return update, nil
@ -195,9 +179,9 @@ func DisableRoute(tx *gorm.DB,
func (hsdb *HSDatabase) DeleteRoute(
id uint64,
isConnected map[key.MachinePublic]bool,
) (*types.StateUpdate, error) {
return Write(hsdb.DB, func(tx *gorm.DB) (*types.StateUpdate, error) {
isConnected types.NodeConnectedMap,
) ([]types.NodeID, error) {
return Write(hsdb.DB, func(tx *gorm.DB) ([]types.NodeID, error) {
return DeleteRoute(tx, id, isConnected)
})
}
@ -205,8 +189,8 @@ func (hsdb *HSDatabase) DeleteRoute(
func DeleteRoute(
tx *gorm.DB,
id uint64,
isConnected map[key.MachinePublic]bool,
) (*types.StateUpdate, error) {
isConnected types.NodeConnectedMap,
) ([]types.NodeID, error) {
route, err := GetRoute(tx, id)
if err != nil {
return nil, err
@ -218,9 +202,9 @@ func DeleteRoute(
// Tailscale requires both IPv4 and IPv6 exit routes to
// be enabled at the same time, as per
// https://github.com/juanfont/headscale/issues/804#issuecomment-1399314002
var update *types.StateUpdate
var update []types.NodeID
if !route.IsExitRoute() {
update, err = failoverRouteReturnUpdate(tx, isConnected, route)
update, err = failoverRouteTx(tx, isConnected, route)
if err != nil {
return nil, nil
}
@ -229,7 +213,7 @@ func DeleteRoute(
return nil, err
}
} else {
routes, err := GetNodeRoutes(tx, &node)
routes, err = GetNodeRoutes(tx, &node)
if err != nil {
return nil, err
}
@ -259,35 +243,37 @@ func DeleteRoute(
node.Routes = routes
if update == nil {
update = &types.StateUpdate{
Type: types.StatePeerChanged,
ChangeNodes: types.Nodes{
&node,
},
Message: "called from db.DeleteRoute",
}
update = []types.NodeID{node.ID}
}
return update, nil
}
func deleteNodeRoutes(tx *gorm.DB, node *types.Node, isConnected map[key.MachinePublic]bool) error {
func deleteNodeRoutes(tx *gorm.DB, node *types.Node, isConnected types.NodeConnectedMap) ([]types.NodeID, error) {
routes, err := GetNodeRoutes(tx, node)
if err != nil {
return err
return nil, err
}
var changed []types.NodeID
for i := range routes {
if err := tx.Unscoped().Delete(&routes[i]).Error; err != nil {
return err
return nil, err
}
// TODO(kradalby): This is a bit too aggressive, we could probably
// figure out which routes needs to be failed over rather than all.
failoverRouteReturnUpdate(tx, isConnected, &routes[i])
chn, err := failoverRouteTx(tx, isConnected, &routes[i])
if err != nil {
return changed, err
}
if chn != nil {
changed = append(changed, chn...)
}
}
return nil
return changed, nil
}
// isUniquePrefix returns if there is another node providing the same route already.
@ -400,7 +386,7 @@ func SaveNodeRoutes(tx *gorm.DB, node *types.Node) (bool, error) {
for prefix, exists := range advertisedRoutes {
if !exists {
route := types.Route{
NodeID: node.ID,
NodeID: node.ID.Uint64(),
Prefix: types.IPPrefix(prefix),
Advertised: true,
Enabled: false,
@ -415,19 +401,23 @@ func SaveNodeRoutes(tx *gorm.DB, node *types.Node) (bool, error) {
return sendUpdate, nil
}
// EnsureFailoverRouteIsAvailable takes a node and checks if the node's route
// FailoverRouteIfAvailable takes a node and checks if the node's route
// currently have a functioning host that exposes the network.
func EnsureFailoverRouteIsAvailable(
// If it does not, it is failed over to another suitable route if there
// is one.
func FailoverRouteIfAvailable(
tx *gorm.DB,
isConnected map[key.MachinePublic]bool,
isConnected types.NodeConnectedMap,
node *types.Node,
) (*types.StateUpdate, error) {
log.Debug().Caller().Uint64("node.id", node.ID.Uint64()).Msgf("ROUTE DEBUG ENTERED FAILOVER")
nodeRoutes, err := GetNodeRoutes(tx, node)
if err != nil {
log.Debug().Caller().Uint64("node.id", node.ID.Uint64()).Interface("nodeRoutes", nodeRoutes).Msgf("ROUTE DEBUG NO ROUTES")
return nil, nil
}
var changedNodes types.Nodes
var changedNodes []types.NodeID
for _, nodeRoute := range nodeRoutes {
routes, err := getRoutesByPrefix(tx, netip.Prefix(nodeRoute.Prefix))
if err != nil {
@ -438,71 +428,39 @@ func EnsureFailoverRouteIsAvailable(
if route.IsPrimary {
// if we have a primary route, and the node is connected
// nothing needs to be done.
if isConnected[route.Node.MachineKey] {
continue
log.Debug().Caller().Uint64("node.id", node.ID.Uint64()).Uint64("route.node.id", route.Node.ID.Uint64()).Msgf("ROUTE DEBUG CHECKING IF ONLINE")
if isConnected[route.Node.ID] {
log.Debug().Caller().Uint64("node.id", node.ID.Uint64()).Uint64("route.node.id", route.Node.ID.Uint64()).Msgf("ROUTE DEBUG IS ONLINE")
return nil, nil
}
log.Debug().Caller().Uint64("node.id", node.ID.Uint64()).Uint64("route.node.id", route.Node.ID.Uint64()).Msgf("ROUTE DEBUG NOT ONLINE, FAILING OVER")
// if not, we need to failover the route
update, err := failoverRouteReturnUpdate(tx, isConnected, &route)
changedIDs, err := failoverRouteTx(tx, isConnected, &route)
if err != nil {
return nil, err
}
if update != nil {
changedNodes = append(changedNodes, update.ChangeNodes...)
if changedIDs != nil {
changedNodes = append(changedNodes, changedIDs...)
}
}
}
}
log.Debug().Caller().Uint64("node.id", node.ID.Uint64()).Interface("changedNodes", changedNodes).Msgf("ROUTE DEBUG")
if len(changedNodes) != 0 {
return &types.StateUpdate{
Type: types.StatePeerChanged,
ChangeNodes: changedNodes,
Message: "called from db.EnsureFailoverRouteIsAvailable",
Message: "called from db.FailoverRouteIfAvailable",
}, nil
}
return nil, nil
}
func failoverRouteReturnUpdate(
tx *gorm.DB,
isConnected map[key.MachinePublic]bool,
r *types.Route,
) (*types.StateUpdate, error) {
changedKeys, err := failoverRoute(tx, isConnected, r)
if err != nil {
return nil, err
}
log.Trace().
Interface("isConnected", isConnected).
Interface("changedKeys", changedKeys).
Msg("building route failover")
if len(changedKeys) == 0 {
return nil, nil
}
var nodes types.Nodes
for _, key := range changedKeys {
node, err := GetNodeByMachineKey(tx, key)
if err != nil {
return nil, err
}
nodes = append(nodes, node)
}
return &types.StateUpdate{
Type: types.StatePeerChanged,
ChangeNodes: nodes,
Message: "called from db.failoverRouteReturnUpdate",
}, nil
}
// failoverRoute takes a route that is no longer available,
// failoverRouteTx takes a route that is no longer available,
// this can be either from:
// - being disabled
// - being deleted
@ -510,11 +468,11 @@ func failoverRouteReturnUpdate(
//
// and tries to find a new route to take over its place.
// If the given route was not primary, it returns early.
func failoverRoute(
func failoverRouteTx(
tx *gorm.DB,
isConnected map[key.MachinePublic]bool,
isConnected types.NodeConnectedMap,
r *types.Route,
) ([]key.MachinePublic, error) {
) ([]types.NodeID, error) {
if r == nil {
return nil, nil
}
@ -535,11 +493,64 @@ func failoverRoute(
return nil, err
}
fo := failoverRoute(isConnected, r, routes)
if fo == nil {
return nil, nil
}
err = tx.Save(fo.old).Error
if err != nil {
log.Error().Err(err).Msg("disabling old primary route")
return nil, err
}
err = tx.Save(fo.new).Error
if err != nil {
log.Error().Err(err).Msg("saving new primary route")
return nil, err
}
log.Trace().
Str("hostname", fo.new.Node.Hostname).
Msgf("set primary to new route, was: id(%d), host(%s), now: id(%d), host(%s)", fo.old.ID, fo.old.Node.Hostname, fo.new.ID, fo.new.Node.Hostname)
// Return a list of the machinekeys of the changed nodes.
return []types.NodeID{fo.old.Node.ID, fo.new.Node.ID}, nil
}
type failover struct {
old *types.Route
new *types.Route
}
func failoverRoute(
isConnected types.NodeConnectedMap,
routeToReplace *types.Route,
altRoutes types.Routes,
) *failover {
if routeToReplace == nil {
return nil
}
// This route is not a primary route, and it is not
// being served to nodes.
if !routeToReplace.IsPrimary {
return nil
}
// We do not have to failover exit nodes
if routeToReplace.IsExitRoute() {
return nil
}
var newPrimary *types.Route
// Find a new suitable route
for idx, route := range routes {
if r.ID == route.ID {
for idx, route := range altRoutes {
if routeToReplace.ID == route.ID {
continue
}
@ -547,8 +558,8 @@ func failoverRoute(
continue
}
if isConnected[route.Node.MachineKey] {
newPrimary = &routes[idx]
if isConnected != nil && isConnected[route.Node.ID] {
newPrimary = &altRoutes[idx]
break
}
}
@ -559,48 +570,23 @@ func failoverRoute(
// the one currently marked as primary is the
// best we got.
if newPrimary == nil {
return nil, nil
return nil
}
log.Trace().
Str("hostname", newPrimary.Node.Hostname).
Msg("found new primary, updating db")
// Remove primary from the old route
r.IsPrimary = false
err = tx.Save(&r).Error
if err != nil {
log.Error().Err(err).Msg("error disabling new primary route")
return nil, err
}
log.Trace().
Str("hostname", newPrimary.Node.Hostname).
Msg("removed primary from old route")
// Set primary for the new primary
routeToReplace.IsPrimary = false
newPrimary.IsPrimary = true
err = tx.Save(&newPrimary).Error
if err != nil {
log.Error().Err(err).Msg("error enabling new primary route")
return nil, err
return &failover{
old: routeToReplace,
new: newPrimary,
}
log.Trace().
Str("hostname", newPrimary.Node.Hostname).
Msg("set primary to new route")
// Return a list of the machinekeys of the changed nodes.
return []key.MachinePublic{r.Node.MachineKey, newPrimary.Node.MachineKey}, nil
}
func (hsdb *HSDatabase) EnableAutoApprovedRoutes(
aclPolicy *policy.ACLPolicy,
node *types.Node,
) (*types.StateUpdate, error) {
return Write(hsdb.DB, func(tx *gorm.DB) (*types.StateUpdate, error) {
) error {
return hsdb.Write(func(tx *gorm.DB) error {
return EnableAutoApprovedRoutes(tx, aclPolicy, node)
})
}
@ -610,9 +596,9 @@ func EnableAutoApprovedRoutes(
tx *gorm.DB,
aclPolicy *policy.ACLPolicy,
node *types.Node,
) (*types.StateUpdate, error) {
) error {
if len(node.IPAddresses) == 0 {
return nil, nil // This node has no IPAddresses, so can't possibly match any autoApprovers ACLs
return nil // This node has no IPAddresses, so can't possibly match any autoApprovers ACLs
}
routes, err := GetNodeAdvertisedRoutes(tx, node)
@ -623,7 +609,7 @@ func EnableAutoApprovedRoutes(
Str("node", node.Hostname).
Msg("Could not get advertised routes for node")
return nil, err
return err
}
log.Trace().Interface("routes", routes).Msg("routes for autoapproving")
@ -641,10 +627,10 @@ func EnableAutoApprovedRoutes(
if err != nil {
log.Err(err).
Str("advertisedRoute", advertisedRoute.String()).
Uint64("nodeId", node.ID).
Uint64("nodeId", node.ID.Uint64()).
Msg("Failed to resolve autoApprovers for advertised route")
return nil, err
return err
}
log.Trace().
@ -665,7 +651,7 @@ func EnableAutoApprovedRoutes(
Str("alias", approvedAlias).
Msg("Failed to expand alias when processing autoApprovers policy")
return nil, err
return err
}
// approvedIPs should contain all of node's IPs if it matches the rule, so check for first
@ -676,25 +662,17 @@ func EnableAutoApprovedRoutes(
}
}
update := &types.StateUpdate{
Type: types.StatePeerChanged,
ChangeNodes: types.Nodes{},
Message: "created in db.EnableAutoApprovedRoutes",
}
for _, approvedRoute := range approvedRoutes {
perHostUpdate, err := EnableRoute(tx, uint64(approvedRoute.ID))
_, err := EnableRoute(tx, uint64(approvedRoute.ID))
if err != nil {
log.Err(err).
Str("approvedRoute", approvedRoute.String()).
Uint64("nodeId", node.ID).
Uint64("nodeId", node.ID.Uint64()).
Msg("Failed to enable approved route")
return nil, err
return err
}
update.ChangeNodes = append(update.ChangeNodes, perHostUpdate.ChangeNodes...)
}
return update, nil
return nil
}

View file

@ -13,7 +13,6 @@ import (
"gopkg.in/check.v1"
"gorm.io/gorm"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
)
func (s *Suite) TestGetRoutes(c *check.C) {
@ -262,7 +261,7 @@ func (s *Suite) TestDeleteRoutes(c *check.C) {
c.Assert(err, check.IsNil)
// TODO(kradalby): check stateupdate
_, err = db.DeleteRoute(uint64(routes[0].ID), map[key.MachinePublic]bool{})
_, err = db.DeleteRoute(uint64(routes[0].ID), nil)
c.Assert(err, check.IsNil)
enabledRoutes1, err := db.GetEnabledRoutes(&node1)
@ -272,20 +271,13 @@ func (s *Suite) TestDeleteRoutes(c *check.C) {
var ipp = func(s string) types.IPPrefix { return types.IPPrefix(netip.MustParsePrefix(s)) }
func TestFailoverRoute(t *testing.T) {
machineKeys := []key.MachinePublic{
key.NewMachine().Public(),
key.NewMachine().Public(),
key.NewMachine().Public(),
key.NewMachine().Public(),
}
func TestFailoverRouteTx(t *testing.T) {
tests := []struct {
name string
failingRoute types.Route
routes types.Routes
isConnected map[key.MachinePublic]bool
want []key.MachinePublic
isConnected types.NodeConnectedMap
want []types.NodeID
wantErr bool
}{
{
@ -301,10 +293,8 @@ func TestFailoverRoute(t *testing.T) {
Model: gorm.Model{
ID: 1,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{},
IsPrimary: false,
},
routes: types.Routes{},
@ -317,10 +307,8 @@ func TestFailoverRoute(t *testing.T) {
Model: gorm.Model{
ID: 1,
},
Prefix: ipp("0.0.0.0/0"),
Node: types.Node{
MachineKey: machineKeys[0],
},
Prefix: ipp("0.0.0.0/0"),
Node: types.Node{},
IsPrimary: true,
},
routes: types.Routes{},
@ -335,7 +323,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
ID: 1,
},
IsPrimary: true,
},
@ -346,7 +334,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
ID: 1,
},
IsPrimary: true,
},
@ -362,7 +350,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
ID: 1,
},
IsPrimary: true,
Enabled: true,
@ -374,7 +362,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
ID: 1,
},
IsPrimary: true,
Enabled: true,
@ -385,19 +373,19 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[1],
ID: 2,
},
IsPrimary: false,
Enabled: true,
},
},
isConnected: map[key.MachinePublic]bool{
machineKeys[0]: false,
machineKeys[1]: true,
isConnected: types.NodeConnectedMap{
1: false,
2: true,
},
want: []key.MachinePublic{
machineKeys[0],
machineKeys[1],
want: []types.NodeID{
1,
2,
},
wantErr: false,
},
@ -409,7 +397,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
ID: 1,
},
IsPrimary: false,
Enabled: true,
@ -421,7 +409,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
ID: 1,
},
IsPrimary: true,
Enabled: true,
@ -432,7 +420,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[1],
ID: 2,
},
IsPrimary: false,
Enabled: true,
@ -449,7 +437,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[1],
ID: 2,
},
IsPrimary: true,
Enabled: true,
@ -461,7 +449,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
ID: 1,
},
IsPrimary: false,
Enabled: true,
@ -472,7 +460,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[1],
ID: 2,
},
IsPrimary: true,
Enabled: true,
@ -483,20 +471,19 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[2],
ID: 3,
},
IsPrimary: false,
Enabled: true,
},
},
isConnected: map[key.MachinePublic]bool{
machineKeys[0]: true,
machineKeys[1]: true,
machineKeys[2]: true,
isConnected: types.NodeConnectedMap{
1: true,
2: true,
3: true,
},
want: []key.MachinePublic{
machineKeys[1],
machineKeys[0],
want: []types.NodeID{
2, 1,
},
wantErr: false,
},
@ -508,7 +495,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
ID: 1,
},
IsPrimary: true,
Enabled: true,
@ -520,7 +507,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
ID: 1,
},
IsPrimary: true,
Enabled: true,
@ -532,15 +519,15 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[3],
ID: 4,
},
IsPrimary: false,
Enabled: true,
},
},
isConnected: map[key.MachinePublic]bool{
machineKeys[0]: true,
machineKeys[3]: false,
isConnected: types.NodeConnectedMap{
1: true,
4: false,
},
want: nil,
wantErr: false,
@ -553,7 +540,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
ID: 1,
},
IsPrimary: true,
Enabled: true,
@ -565,7 +552,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
ID: 1,
},
IsPrimary: true,
Enabled: true,
@ -577,7 +564,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[3],
ID: 4,
},
IsPrimary: false,
Enabled: true,
@ -588,20 +575,20 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[1],
ID: 2,
},
IsPrimary: true,
Enabled: true,
},
},
isConnected: map[key.MachinePublic]bool{
machineKeys[0]: false,
machineKeys[1]: true,
machineKeys[3]: false,
isConnected: types.NodeConnectedMap{
1: false,
2: true,
4: false,
},
want: []key.MachinePublic{
machineKeys[0],
machineKeys[1],
want: []types.NodeID{
1,
2,
},
wantErr: false,
},
@ -613,7 +600,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
ID: 1,
},
IsPrimary: true,
Enabled: true,
@ -625,7 +612,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
ID: 1,
},
IsPrimary: true,
Enabled: true,
@ -637,7 +624,7 @@ func TestFailoverRoute(t *testing.T) {
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[1],
ID: 2,
},
IsPrimary: false,
Enabled: false,
@ -670,8 +657,8 @@ func TestFailoverRoute(t *testing.T) {
}
}
got, err := Write(db.DB, func(tx *gorm.DB) ([]key.MachinePublic, error) {
return failoverRoute(tx, tt.isConnected, &tt.failingRoute)
got, err := Write(db.DB, func(tx *gorm.DB) ([]types.NodeID, error) {
return failoverRouteTx(tx, tt.isConnected, &tt.failingRoute)
})
if (err != nil) != tt.wantErr {
@ -687,230 +674,177 @@ func TestFailoverRoute(t *testing.T) {
}
}
// func TestDisableRouteFailover(t *testing.T) {
// machineKeys := []key.MachinePublic{
// key.NewMachine().Public(),
// key.NewMachine().Public(),
// key.NewMachine().Public(),
// key.NewMachine().Public(),
// }
func TestFailoverRoute(t *testing.T) {
r := func(id uint, nid types.NodeID, prefix types.IPPrefix, enabled, primary bool) types.Route {
return types.Route{
Model: gorm.Model{
ID: id,
},
Node: types.Node{
ID: nid,
},
Prefix: prefix,
Enabled: enabled,
IsPrimary: primary,
}
}
rp := func(id uint, nid types.NodeID, prefix types.IPPrefix, enabled, primary bool) *types.Route {
ro := r(id, nid, prefix, enabled, primary)
return &ro
}
tests := []struct {
name string
failingRoute types.Route
routes types.Routes
isConnected types.NodeConnectedMap
want *failover
}{
{
name: "no-route",
failingRoute: types.Route{},
routes: types.Routes{},
want: nil,
},
{
name: "no-prime",
failingRoute: r(1, 1, ipp("10.0.0.0/24"), false, false),
// tests := []struct {
// name string
// nodes types.Nodes
routes: types.Routes{},
want: nil,
},
{
name: "exit-node",
failingRoute: r(1, 1, ipp("0.0.0.0/0"), false, true),
routes: types.Routes{},
want: nil,
},
{
name: "no-failover-single-route",
failingRoute: r(1, 1, ipp("10.0.0.0/24"), false, true),
routes: types.Routes{
r(1, 1, ipp("10.0.0.0/24"), false, true),
},
want: nil,
},
{
name: "failover-primary",
failingRoute: r(1, 1, ipp("10.0.0.0/24"), true, true),
routes: types.Routes{
r(1, 1, ipp("10.0.0.0/24"), true, true),
r(2, 2, ipp("10.0.0.0/24"), true, false),
},
isConnected: types.NodeConnectedMap{
1: false,
2: true,
},
want: &failover{
old: rp(1, 1, ipp("10.0.0.0/24"), true, false),
new: rp(2, 2, ipp("10.0.0.0/24"), true, true),
},
},
{
name: "failover-none-primary",
failingRoute: r(1, 1, ipp("10.0.0.0/24"), true, false),
routes: types.Routes{
r(1, 1, ipp("10.0.0.0/24"), true, true),
r(2, 2, ipp("10.0.0.0/24"), true, false),
},
want: nil,
},
{
name: "failover-primary-multi-route",
failingRoute: r(2, 2, ipp("10.0.0.0/24"), true, true),
routes: types.Routes{
r(1, 1, ipp("10.0.0.0/24"), true, false),
r(2, 2, ipp("10.0.0.0/24"), true, true),
r(3, 3, ipp("10.0.0.0/24"), true, false),
},
isConnected: types.NodeConnectedMap{
1: true,
2: true,
3: true,
},
want: &failover{
old: rp(2, 2, ipp("10.0.0.0/24"), true, false),
new: rp(1, 1, ipp("10.0.0.0/24"), true, true),
},
},
{
name: "failover-primary-no-online",
failingRoute: r(1, 1, ipp("10.0.0.0/24"), true, true),
routes: types.Routes{
r(1, 1, ipp("10.0.0.0/24"), true, true),
r(2, 4, ipp("10.0.0.0/24"), true, false),
},
isConnected: types.NodeConnectedMap{
1: true,
4: false,
},
want: nil,
},
{
name: "failover-primary-one-not-online",
failingRoute: r(1, 1, ipp("10.0.0.0/24"), true, true),
routes: types.Routes{
r(1, 1, ipp("10.0.0.0/24"), true, true),
r(2, 4, ipp("10.0.0.0/24"), true, false),
r(3, 2, ipp("10.0.0.0/24"), true, false),
},
isConnected: types.NodeConnectedMap{
1: false,
2: true,
4: false,
},
want: &failover{
old: rp(1, 1, ipp("10.0.0.0/24"), true, false),
new: rp(3, 2, ipp("10.0.0.0/24"), true, true),
},
},
{
name: "failover-primary-none-enabled",
failingRoute: r(1, 1, ipp("10.0.0.0/24"), true, true),
routes: types.Routes{
r(1, 1, ipp("10.0.0.0/24"), true, false),
r(2, 2, ipp("10.0.0.0/24"), false, true),
},
want: nil,
},
}
// routeID uint64
// isConnected map[key.MachinePublic]bool
cmps := append(
util.Comparers,
cmp.Comparer(func(x, y types.IPPrefix) bool {
return netip.Prefix(x) == netip.Prefix(y)
}),
)
// wantMachineKey key.MachinePublic
// wantErr string
// }{
// {
// name: "single-route",
// nodes: types.Nodes{
// &types.Node{
// ID: 0,
// MachineKey: machineKeys[0],
// Routes: []types.Route{
// {
// Model: gorm.Model{
// ID: 1,
// },
// Prefix: ipp("10.0.0.0/24"),
// Node: types.Node{
// MachineKey: machineKeys[0],
// },
// IsPrimary: true,
// },
// },
// Hostinfo: &tailcfg.Hostinfo{
// RoutableIPs: []netip.Prefix{
// netip.MustParsePrefix("10.0.0.0/24"),
// },
// },
// },
// },
// routeID: 1,
// wantMachineKey: machineKeys[0],
// },
// {
// name: "failover-simple",
// nodes: types.Nodes{
// &types.Node{
// ID: 0,
// MachineKey: machineKeys[0],
// Routes: []types.Route{
// {
// Model: gorm.Model{
// ID: 1,
// },
// Prefix: ipp("10.0.0.0/24"),
// IsPrimary: true,
// },
// },
// Hostinfo: &tailcfg.Hostinfo{
// RoutableIPs: []netip.Prefix{
// netip.MustParsePrefix("10.0.0.0/24"),
// },
// },
// },
// &types.Node{
// ID: 1,
// MachineKey: machineKeys[1],
// Routes: []types.Route{
// {
// Model: gorm.Model{
// ID: 2,
// },
// Prefix: ipp("10.0.0.0/24"),
// IsPrimary: false,
// },
// },
// Hostinfo: &tailcfg.Hostinfo{
// RoutableIPs: []netip.Prefix{
// netip.MustParsePrefix("10.0.0.0/24"),
// },
// },
// },
// },
// routeID: 1,
// wantMachineKey: machineKeys[1],
// },
// {
// name: "no-failover-offline",
// nodes: types.Nodes{
// &types.Node{
// ID: 0,
// MachineKey: machineKeys[0],
// Routes: []types.Route{
// {
// Model: gorm.Model{
// ID: 1,
// },
// Prefix: ipp("10.0.0.0/24"),
// IsPrimary: true,
// },
// },
// Hostinfo: &tailcfg.Hostinfo{
// RoutableIPs: []netip.Prefix{
// netip.MustParsePrefix("10.0.0.0/24"),
// },
// },
// },
// &types.Node{
// ID: 1,
// MachineKey: machineKeys[1],
// Routes: []types.Route{
// {
// Model: gorm.Model{
// ID: 2,
// },
// Prefix: ipp("10.0.0.0/24"),
// IsPrimary: false,
// },
// },
// Hostinfo: &tailcfg.Hostinfo{
// RoutableIPs: []netip.Prefix{
// netip.MustParsePrefix("10.0.0.0/24"),
// },
// },
// },
// },
// isConnected: map[key.MachinePublic]bool{
// machineKeys[0]: true,
// machineKeys[1]: false,
// },
// routeID: 1,
// wantMachineKey: machineKeys[1],
// },
// {
// name: "failover-to-online",
// nodes: types.Nodes{
// &types.Node{
// ID: 0,
// MachineKey: machineKeys[0],
// Routes: []types.Route{
// {
// Model: gorm.Model{
// ID: 1,
// },
// Prefix: ipp("10.0.0.0/24"),
// IsPrimary: true,
// },
// },
// Hostinfo: &tailcfg.Hostinfo{
// RoutableIPs: []netip.Prefix{
// netip.MustParsePrefix("10.0.0.0/24"),
// },
// },
// },
// &types.Node{
// ID: 1,
// MachineKey: machineKeys[1],
// Routes: []types.Route{
// {
// Model: gorm.Model{
// ID: 2,
// },
// Prefix: ipp("10.0.0.0/24"),
// IsPrimary: false,
// },
// },
// Hostinfo: &tailcfg.Hostinfo{
// RoutableIPs: []netip.Prefix{
// netip.MustParsePrefix("10.0.0.0/24"),
// },
// },
// },
// },
// isConnected: map[key.MachinePublic]bool{
// machineKeys[0]: true,
// machineKeys[1]: true,
// },
// routeID: 1,
// wantMachineKey: machineKeys[1],
// },
// }
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotf := failoverRoute(tt.isConnected, &tt.failingRoute, tt.routes)
// for _, tt := range tests {
// t.Run(tt.name, func(t *testing.T) {
// datab, err := NewHeadscaleDatabase("sqlite3", ":memory:", false, []netip.Prefix{}, "")
// assert.NoError(t, err)
if tt.want == nil && gotf != nil {
t.Fatalf("expected nil, got %+v", gotf)
}
// // bootstrap db
// datab.DB.Transaction(func(tx *gorm.DB) error {
// for _, node := range tt.nodes {
// err := tx.Save(node).Error
// if err != nil {
// return err
// }
if gotf == nil && tt.want != nil {
t.Fatalf("expected %+v, got nil", tt.want)
}
// _, err = SaveNodeRoutes(tx, node)
// if err != nil {
// return err
// }
// }
if tt.want != nil && gotf != nil {
want := map[string]*types.Route{
"new": tt.want.new,
"old": tt.want.old,
}
// return nil
// })
got := map[string]*types.Route{
"new": gotf.new,
"old": gotf.old,
}
// got, err := Write(datab.DB, func(tx *gorm.DB) (*types.StateUpdate, error) {
// return DisableRoute(tx, tt.routeID, tt.isConnected)
// })
// // if (err.Error() != "") != tt.wantErr {
// // t.Errorf("failoverRoute() error = %v, wantErr %v", err, tt.wantErr)
// // return
// // }
// if len(got.ChangeNodes) != 1 {
// t.Errorf("expected update with one machine, got %d", len(got.ChangeNodes))
// }
// if diff := cmp.Diff(tt.wantMachineKey, got.ChangeNodes[0].MachineKey, util.Comparers...); diff != "" {
// t.Errorf("DisableRoute() unexpected result (-want +got):\n%s", diff)
// }
// })
// }
// }
if diff := cmp.Diff(want, got, cmps...); diff != "" {
t.Fatalf("failoverRoute unexpected result (-want +got):\n%s", diff)
}
}
})
}
}