ensure online status and route changes are propagated (#1564)

This commit is contained in:
Kristoffer Dalby 2023-12-09 18:09:24 +01:00 committed by GitHub
parent 0153e26392
commit f65f4eca35
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
40 changed files with 3170 additions and 857 deletions

View file

@ -171,11 +171,13 @@ func NewHeadscaleDatabase(
dKey = "discokey:" + node.DiscoKey
}
err := db.db.Exec("UPDATE nodes SET machine_key = @mKey, node_key = @nKey, disco_key = @dKey WHERE ID = @id",
err := db.db.Exec(
"UPDATE nodes SET machine_key = @mKey, node_key = @nKey, disco_key = @dKey WHERE ID = @id",
sql.Named("mKey", mKey),
sql.Named("nKey", nKey),
sql.Named("dKey", dKey),
sql.Named("id", node.ID)).Error
sql.Named("id", node.ID),
).Error
if err != nil {
return nil, err
}

View file

@ -61,11 +61,6 @@ func (hsdb *HSDatabase) listPeers(node *types.Node) (types.Nodes, error) {
sort.Slice(nodes, func(i, j int) bool { return nodes[i].ID < nodes[j].ID })
log.Trace().
Caller().
Str("node", node.Hostname).
Msgf("Found peers: %s", nodes.String())
return nodes, nil
}
@ -176,6 +171,12 @@ func (hsdb *HSDatabase) GetNodeByMachineKey(
hsdb.mu.RLock()
defer hsdb.mu.RUnlock()
return hsdb.getNodeByMachineKey(machineKey)
}
func (hsdb *HSDatabase) getNodeByMachineKey(
machineKey key.MachinePublic,
) (*types.Node, error) {
mach := types.Node{}
if result := hsdb.db.
Preload("AuthKey").
@ -252,6 +253,10 @@ func (hsdb *HSDatabase) SetTags(
hsdb.mu.Lock()
defer hsdb.mu.Unlock()
if len(tags) == 0 {
return nil
}
newTags := []string{}
for _, tag := range tags {
if !util.StringOrPrefixListContains(newTags, tag) {
@ -265,10 +270,14 @@ func (hsdb *HSDatabase) SetTags(
return fmt.Errorf("failed to update tags for node in the database: %w", err)
}
hsdb.notifier.NotifyWithIgnore(types.StateUpdate{
Type: types.StatePeerChanged,
Changed: types.Nodes{node},
}, node.MachineKey.String())
stateUpdate := types.StateUpdate{
Type: types.StatePeerChanged,
ChangeNodes: types.Nodes{node},
Message: "called from db.SetTags",
}
if stateUpdate.Valid() {
hsdb.notifier.NotifyWithIgnore(stateUpdate, node.MachineKey.String())
}
return nil
}
@ -301,10 +310,14 @@ func (hsdb *HSDatabase) RenameNode(node *types.Node, newName string) error {
return fmt.Errorf("failed to rename node in the database: %w", err)
}
hsdb.notifier.NotifyWithIgnore(types.StateUpdate{
Type: types.StatePeerChanged,
Changed: types.Nodes{node},
}, node.MachineKey.String())
stateUpdate := types.StateUpdate{
Type: types.StatePeerChanged,
ChangeNodes: types.Nodes{node},
Message: "called from db.RenameNode",
}
if stateUpdate.Valid() {
hsdb.notifier.NotifyWithIgnore(stateUpdate, node.MachineKey.String())
}
return nil
}
@ -327,10 +340,18 @@ func (hsdb *HSDatabase) nodeSetExpiry(node *types.Node, expiry time.Time) error
)
}
hsdb.notifier.NotifyWithIgnore(types.StateUpdate{
Type: types.StatePeerChanged,
Changed: types.Nodes{node},
}, node.MachineKey.String())
stateUpdate := types.StateUpdate{
Type: types.StatePeerChangedPatch,
ChangePatches: []*tailcfg.PeerChange{
{
NodeID: tailcfg.NodeID(node.ID),
KeyExpiry: &expiry,
},
},
}
if stateUpdate.Valid() {
hsdb.notifier.NotifyAll(stateUpdate)
}
return nil
}
@ -354,10 +375,13 @@ func (hsdb *HSDatabase) deleteNode(node *types.Node) error {
return err
}
hsdb.notifier.NotifyAll(types.StateUpdate{
stateUpdate := types.StateUpdate{
Type: types.StatePeerRemoved,
Removed: []tailcfg.NodeID{tailcfg.NodeID(node.ID)},
})
}
if stateUpdate.Valid() {
hsdb.notifier.NotifyAll(stateUpdate)
}
return nil
}
@ -629,20 +653,6 @@ func (hsdb *HSDatabase) IsRoutesEnabled(node *types.Node, routeStr string) bool
return false
}
func (hsdb *HSDatabase) ListOnlineNodes(
node *types.Node,
) (map[tailcfg.NodeID]bool, error) {
hsdb.mu.RLock()
defer hsdb.mu.RUnlock()
peers, err := hsdb.listPeers(node)
if err != nil {
return nil, err
}
return peers.OnlineNodeMap(), nil
}
// enableRoutes enables new routes based on a list of new routes.
func (hsdb *HSDatabase) enableRoutes(node *types.Node, routeStrs ...string) error {
newRoutes := make([]netip.Prefix, len(routeStrs))
@ -694,10 +704,30 @@ func (hsdb *HSDatabase) enableRoutes(node *types.Node, routeStrs ...string) erro
}
}
hsdb.notifier.NotifyWithIgnore(types.StateUpdate{
Type: types.StatePeerChanged,
Changed: types.Nodes{node},
}, node.MachineKey.String())
// Ensure the node has the latest routes when notifying the other
// nodes
nRoutes, err := hsdb.getNodeRoutes(node)
if err != nil {
return fmt.Errorf("failed to read back routes: %w", err)
}
node.Routes = nRoutes
log.Trace().
Caller().
Str("node", node.Hostname).
Strs("routes", routeStrs).
Msg("enabling routes")
stateUpdate := types.StateUpdate{
Type: types.StatePeerChanged,
ChangeNodes: types.Nodes{node},
Message: "called from db.enableRoutes",
}
if stateUpdate.Valid() {
hsdb.notifier.NotifyWithIgnore(
stateUpdate, node.MachineKey.String())
}
return nil
}
@ -728,7 +758,10 @@ func generateGivenName(suppliedName string, randomSuffix bool) (string, error) {
return normalizedHostname, nil
}
func (hsdb *HSDatabase) GenerateGivenName(mkey key.MachinePublic, suppliedName string) (string, error) {
func (hsdb *HSDatabase) GenerateGivenName(
mkey key.MachinePublic,
suppliedName string,
) (string, error) {
hsdb.mu.RLock()
defer hsdb.mu.RUnlock()
@ -823,53 +856,54 @@ func (hsdb *HSDatabase) ExpireExpiredNodes(lastCheck time.Time) time.Time {
// checked everything.
started := time.Now()
users, err := hsdb.listUsers()
expired := make([]*tailcfg.PeerChange, 0)
nodes, err := hsdb.listNodes()
if err != nil {
log.Error().Err(err).Msg("Error listing users")
log.Error().
Err(err).
Msg("Error listing nodes to find expired nodes")
return time.Unix(0, 0)
}
for index, node := range nodes {
if node.IsExpired() &&
// TODO(kradalby): Replace this, it is very spammy
// It will notify about all nodes that has been expired.
// It should only notify about expired nodes since _last check_.
node.Expiry.After(lastCheck) {
expired = append(expired, &tailcfg.PeerChange{
NodeID: tailcfg.NodeID(node.ID),
KeyExpiry: node.Expiry,
})
for _, user := range users {
nodes, err := hsdb.listNodesByUser(user.Name)
if err != nil {
log.Error().
Err(err).
Str("user", user.Name).
Msg("Error listing nodes in user")
return time.Unix(0, 0)
}
expired := make([]tailcfg.NodeID, 0)
for index, node := range nodes {
if node.IsExpired() &&
node.Expiry.After(lastCheck) {
expired = append(expired, tailcfg.NodeID(node.ID))
now := time.Now()
err := hsdb.nodeSetExpiry(nodes[index], now)
if err != nil {
log.Error().
Err(err).
Str("node", node.Hostname).
Str("name", node.GivenName).
Msg("🤮 Cannot expire node")
} else {
log.Info().
Str("node", node.Hostname).
Str("name", node.GivenName).
Msg("Node successfully expired")
}
now := time.Now()
// Do not use setNodeExpiry as that has a notifier hook, which
// can cause a deadlock, we are updating all changed nodes later
// and there is no point in notifiying twice.
if err := hsdb.db.Model(nodes[index]).Updates(types.Node{
Expiry: &now,
}).Error; err != nil {
log.Error().
Err(err).
Str("node", node.Hostname).
Str("name", node.GivenName).
Msg("🤮 Cannot expire node")
} else {
log.Info().
Str("node", node.Hostname).
Str("name", node.GivenName).
Msg("Node successfully expired")
}
}
}
if len(expired) > 0 {
hsdb.notifier.NotifyAll(types.StateUpdate{
Type: types.StatePeerRemoved,
Removed: expired,
})
}
stateUpdate := types.StateUpdate{
Type: types.StatePeerChangedPatch,
ChangePatches: expired,
}
if stateUpdate.Valid() {
hsdb.notifier.NotifyAll(stateUpdate)
}
return started

View file

@ -603,8 +603,9 @@ func (s *Suite) TestAutoApproveRoutes(c *check.C) {
db.db.Save(&node)
err = db.SaveNodeRoutes(&node)
sendUpdate, err := db.SaveNodeRoutes(&node)
c.Assert(err, check.IsNil)
c.Assert(sendUpdate, check.Equals, false)
node0ByID, err := db.GetNodeByID(0)
c.Assert(err, check.IsNil)

View file

@ -7,7 +7,9 @@ import (
"github.com/juanfont/headscale/hscontrol/policy"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/rs/zerolog/log"
"github.com/samber/lo"
"gorm.io/gorm"
"tailscale.com/types/key"
)
var ErrRouteIsNotAvailable = errors.New("route is not available")
@ -21,7 +23,38 @@ func (hsdb *HSDatabase) GetRoutes() (types.Routes, error) {
func (hsdb *HSDatabase) getRoutes() (types.Routes, error) {
var routes types.Routes
err := hsdb.db.Preload("Node").Find(&routes).Error
err := hsdb.db.
Preload("Node").
Preload("Node.User").
Find(&routes).Error
if err != nil {
return nil, err
}
return routes, nil
}
func (hsdb *HSDatabase) getAdvertisedAndEnabledRoutes() (types.Routes, error) {
var routes types.Routes
err := hsdb.db.
Preload("Node").
Preload("Node.User").
Where("advertised = ? AND enabled = ?", true, true).
Find(&routes).Error
if err != nil {
return nil, err
}
return routes, nil
}
func (hsdb *HSDatabase) getRoutesByPrefix(pref netip.Prefix) (types.Routes, error) {
var routes types.Routes
err := hsdb.db.
Preload("Node").
Preload("Node.User").
Where("prefix = ?", types.IPPrefix(pref)).
Find(&routes).Error
if err != nil {
return nil, err
}
@ -40,6 +73,7 @@ func (hsdb *HSDatabase) getNodeAdvertisedRoutes(node *types.Node) (types.Routes,
var routes types.Routes
err := hsdb.db.
Preload("Node").
Preload("Node.User").
Where("node_id = ? AND advertised = true", node.ID).
Find(&routes).Error
if err != nil {
@ -60,6 +94,7 @@ func (hsdb *HSDatabase) getNodeRoutes(node *types.Node) (types.Routes, error) {
var routes types.Routes
err := hsdb.db.
Preload("Node").
Preload("Node.User").
Where("node_id = ?", node.ID).
Find(&routes).Error
if err != nil && !errors.Is(err, gorm.ErrRecordNotFound) {
@ -78,7 +113,10 @@ func (hsdb *HSDatabase) GetRoute(id uint64) (*types.Route, error) {
func (hsdb *HSDatabase) getRoute(id uint64) (*types.Route, error) {
var route types.Route
err := hsdb.db.Preload("Node").First(&route, id).Error
err := hsdb.db.
Preload("Node").
Preload("Node.User").
First(&route, id).Error
if err != nil {
return nil, err
}
@ -122,37 +160,61 @@ func (hsdb *HSDatabase) DisableRoute(id uint64) error {
return err
}
var routes types.Routes
node := route.Node
// Tailscale requires both IPv4 and IPv6 exit routes to
// be enabled at the same time, as per
// https://github.com/juanfont/headscale/issues/804#issuecomment-1399314002
if !route.IsExitRoute() {
err = hsdb.failoverRouteWithNotify(route)
if err != nil {
return err
}
route.Enabled = false
route.IsPrimary = false
err = hsdb.db.Save(route).Error
if err != nil {
return err
}
} else {
routes, err = hsdb.getNodeRoutes(&node)
if err != nil {
return err
}
return hsdb.handlePrimarySubnetFailover()
}
routes, err := hsdb.getNodeRoutes(&route.Node)
if err != nil {
return err
}
for i := range routes {
if routes[i].IsExitRoute() {
routes[i].Enabled = false
routes[i].IsPrimary = false
err = hsdb.db.Save(&routes[i]).Error
if err != nil {
return err
for i := range routes {
if routes[i].IsExitRoute() {
routes[i].Enabled = false
routes[i].IsPrimary = false
err = hsdb.db.Save(&routes[i]).Error
if err != nil {
return err
}
}
}
}
return hsdb.handlePrimarySubnetFailover()
if routes == nil {
routes, err = hsdb.getNodeRoutes(&node)
if err != nil {
return err
}
}
node.Routes = routes
stateUpdate := types.StateUpdate{
Type: types.StatePeerChanged,
ChangeNodes: types.Nodes{&node},
Message: "called from db.DisableRoute",
}
if stateUpdate.Valid() {
hsdb.notifier.NotifyAll(stateUpdate)
}
return nil
}
func (hsdb *HSDatabase) DeleteRoute(id uint64) error {
@ -164,34 +226,58 @@ func (hsdb *HSDatabase) DeleteRoute(id uint64) error {
return err
}
var routes types.Routes
node := route.Node
// Tailscale requires both IPv4 and IPv6 exit routes to
// be enabled at the same time, as per
// https://github.com/juanfont/headscale/issues/804#issuecomment-1399314002
if !route.IsExitRoute() {
err := hsdb.failoverRouteWithNotify(route)
if err != nil {
return nil
}
if err := hsdb.db.Unscoped().Delete(&route).Error; err != nil {
return err
}
} else {
routes, err := hsdb.getNodeRoutes(&node)
if err != nil {
return err
}
return hsdb.handlePrimarySubnetFailover()
}
routesToDelete := types.Routes{}
for _, r := range routes {
if r.IsExitRoute() {
routesToDelete = append(routesToDelete, r)
}
}
routes, err := hsdb.getNodeRoutes(&route.Node)
if err != nil {
return err
}
routesToDelete := types.Routes{}
for _, r := range routes {
if r.IsExitRoute() {
routesToDelete = append(routesToDelete, r)
if err := hsdb.db.Unscoped().Delete(&routesToDelete).Error; err != nil {
return err
}
}
if err := hsdb.db.Unscoped().Delete(&routesToDelete).Error; err != nil {
return err
if routes == nil {
routes, err = hsdb.getNodeRoutes(&node)
if err != nil {
return err
}
}
return hsdb.handlePrimarySubnetFailover()
node.Routes = routes
stateUpdate := types.StateUpdate{
Type: types.StatePeerChanged,
ChangeNodes: types.Nodes{&node},
Message: "called from db.DeleteRoute",
}
if stateUpdate.Valid() {
hsdb.notifier.NotifyAll(stateUpdate)
}
return nil
}
func (hsdb *HSDatabase) deleteNodeRoutes(node *types.Node) error {
@ -204,9 +290,13 @@ func (hsdb *HSDatabase) deleteNodeRoutes(node *types.Node) error {
if err := hsdb.db.Unscoped().Delete(&routes[i]).Error; err != nil {
return err
}
// TODO(kradalby): This is a bit too aggressive, we could probably
// figure out which routes needs to be failed over rather than all.
hsdb.failoverRouteWithNotify(&routes[i])
}
return hsdb.handlePrimarySubnetFailover()
return nil
}
// isUniquePrefix returns if there is another node providing the same route already.
@ -259,18 +349,22 @@ func (hsdb *HSDatabase) GetNodePrimaryRoutes(node *types.Node) (types.Routes, er
// SaveNodeRoutes takes a node and updates the database with
// the new routes.
func (hsdb *HSDatabase) SaveNodeRoutes(node *types.Node) error {
// It returns a bool wheter an update should be sent as the
// saved route impacts nodes.
func (hsdb *HSDatabase) SaveNodeRoutes(node *types.Node) (bool, error) {
hsdb.mu.Lock()
defer hsdb.mu.Unlock()
return hsdb.saveNodeRoutes(node)
}
func (hsdb *HSDatabase) saveNodeRoutes(node *types.Node) error {
func (hsdb *HSDatabase) saveNodeRoutes(node *types.Node) (bool, error) {
sendUpdate := false
currentRoutes := types.Routes{}
err := hsdb.db.Where("node_id = ?", node.ID).Find(&currentRoutes).Error
if err != nil {
return err
return sendUpdate, err
}
advertisedRoutes := map[netip.Prefix]bool{}
@ -290,7 +384,14 @@ func (hsdb *HSDatabase) saveNodeRoutes(node *types.Node) error {
currentRoutes[pos].Advertised = true
err := hsdb.db.Save(&currentRoutes[pos]).Error
if err != nil {
return err
return sendUpdate, err
}
// If a route that is newly "saved" is already
// enabled, set sendUpdate to true as it is now
// available.
if route.Enabled {
sendUpdate = true
}
}
advertisedRoutes[netip.Prefix(route.Prefix)] = true
@ -299,7 +400,7 @@ func (hsdb *HSDatabase) saveNodeRoutes(node *types.Node) error {
currentRoutes[pos].Enabled = false
err := hsdb.db.Save(&currentRoutes[pos]).Error
if err != nil {
return err
return sendUpdate, err
}
}
}
@ -314,7 +415,41 @@ func (hsdb *HSDatabase) saveNodeRoutes(node *types.Node) error {
}
err := hsdb.db.Create(&route).Error
if err != nil {
return err
return sendUpdate, err
}
}
}
return sendUpdate, nil
}
// EnsureFailoverRouteIsAvailable takes a node and checks if the node's route
// currently have a functioning host that exposes the network.
func (hsdb *HSDatabase) EnsureFailoverRouteIsAvailable(node *types.Node) error {
nodeRoutes, err := hsdb.getNodeRoutes(node)
if err != nil {
return nil
}
for _, nodeRoute := range nodeRoutes {
routes, err := hsdb.getRoutesByPrefix(netip.Prefix(nodeRoute.Prefix))
if err != nil {
return err
}
for _, route := range routes {
if route.IsPrimary {
// if we have a primary route, and the node is connected
// nothing needs to be done.
if hsdb.notifier.IsConnected(route.Node.MachineKey) {
continue
}
// if not, we need to failover the route
err := hsdb.failoverRouteWithNotify(&route)
if err != nil {
return err
}
}
}
}
@ -322,133 +457,181 @@ func (hsdb *HSDatabase) saveNodeRoutes(node *types.Node) error {
return nil
}
func (hsdb *HSDatabase) HandlePrimarySubnetFailover() error {
hsdb.mu.Lock()
defer hsdb.mu.Unlock()
return hsdb.handlePrimarySubnetFailover()
}
func (hsdb *HSDatabase) handlePrimarySubnetFailover() error {
// first, get all the enabled routes
var routes types.Routes
err := hsdb.db.
Preload("Node").
Where("advertised = ? AND enabled = ?", true, true).
Find(&routes).Error
if err != nil && !errors.Is(err, gorm.ErrRecordNotFound) {
log.Error().Err(err).Msg("error getting routes")
func (hsdb *HSDatabase) FailoverNodeRoutesWithNotify(node *types.Node) error {
routes, err := hsdb.getNodeRoutes(node)
if err != nil {
return nil
}
changedNodes := make(types.Nodes, 0)
for pos, route := range routes {
if route.IsExitRoute() {
var changedKeys []key.MachinePublic
for _, route := range routes {
changed, err := hsdb.failoverRoute(&route)
if err != nil {
return err
}
changedKeys = append(changedKeys, changed...)
}
changedKeys = lo.Uniq(changedKeys)
var nodes types.Nodes
for _, key := range changedKeys {
node, err := hsdb.GetNodeByMachineKey(key)
if err != nil {
return err
}
nodes = append(nodes, node)
}
if nodes != nil {
stateUpdate := types.StateUpdate{
Type: types.StatePeerChanged,
ChangeNodes: nodes,
Message: "called from db.FailoverNodeRoutesWithNotify",
}
if stateUpdate.Valid() {
hsdb.notifier.NotifyAll(stateUpdate)
}
}
return nil
}
func (hsdb *HSDatabase) failoverRouteWithNotify(r *types.Route) error {
changedKeys, err := hsdb.failoverRoute(r)
if err != nil {
return err
}
if len(changedKeys) == 0 {
return nil
}
var nodes types.Nodes
log.Trace().
Str("hostname", r.Node.Hostname).
Msg("loading machines with new primary routes from db")
for _, key := range changedKeys {
node, err := hsdb.getNodeByMachineKey(key)
if err != nil {
return err
}
nodes = append(nodes, node)
}
log.Trace().
Str("hostname", r.Node.Hostname).
Msg("notifying peers about primary route change")
if nodes != nil {
stateUpdate := types.StateUpdate{
Type: types.StatePeerChanged,
ChangeNodes: nodes,
Message: "called from db.failoverRouteWithNotify",
}
if stateUpdate.Valid() {
hsdb.notifier.NotifyAll(stateUpdate)
}
}
log.Trace().
Str("hostname", r.Node.Hostname).
Msg("notified peers about primary route change")
return nil
}
// failoverRoute takes a route that is no longer available,
// this can be either from:
// - being disabled
// - being deleted
// - host going offline
//
// and tries to find a new route to take over its place.
// If the given route was not primary, it returns early.
func (hsdb *HSDatabase) failoverRoute(r *types.Route) ([]key.MachinePublic, error) {
if r == nil {
return nil, nil
}
// This route is not a primary route, and it isnt
// being served to nodes.
if !r.IsPrimary {
return nil, nil
}
// We do not have to failover exit nodes
if r.IsExitRoute() {
return nil, nil
}
routes, err := hsdb.getRoutesByPrefix(netip.Prefix(r.Prefix))
if err != nil {
return nil, err
}
var newPrimary *types.Route
// Find a new suitable route
for idx, route := range routes {
if r.ID == route.ID {
continue
}
node := &route.Node
if !route.IsPrimary {
_, err := hsdb.getPrimaryRoute(netip.Prefix(route.Prefix))
if hsdb.isUniquePrefix(route) || errors.Is(err, gorm.ErrRecordNotFound) {
log.Info().
Str("prefix", netip.Prefix(route.Prefix).String()).
Str("node", route.Node.GivenName).
Msg("Setting primary route")
routes[pos].IsPrimary = true
err := hsdb.db.Save(&routes[pos]).Error
if err != nil {
log.Error().Err(err).Msg("error marking route as primary")
return err
}
changedNodes = append(changedNodes, node)
continue
}
}
if route.IsPrimary {
if route.Node.IsOnline() {
continue
}
// node offline, find a new primary
log.Info().
Str("node", route.Node.Hostname).
Str("prefix", netip.Prefix(route.Prefix).String()).
Msgf("node offline, finding a new primary subnet")
// find a new primary route
var newPrimaryRoutes types.Routes
err := hsdb.db.
Preload("Node").
Where("prefix = ? AND node_id != ? AND advertised = ? AND enabled = ?",
route.Prefix,
route.NodeID,
true, true).
Find(&newPrimaryRoutes).Error
if err != nil && !errors.Is(err, gorm.ErrRecordNotFound) {
log.Error().Err(err).Msg("error finding new primary route")
return err
}
var newPrimaryRoute *types.Route
for pos, r := range newPrimaryRoutes {
if r.Node.IsOnline() {
newPrimaryRoute = &newPrimaryRoutes[pos]
break
}
}
if newPrimaryRoute == nil {
log.Warn().
Str("node", route.Node.Hostname).
Str("prefix", netip.Prefix(route.Prefix).String()).
Msgf("no alternative primary route found")
continue
}
log.Info().
Str("old_node", route.Node.Hostname).
Str("prefix", netip.Prefix(route.Prefix).String()).
Str("new_node", newPrimaryRoute.Node.Hostname).
Msgf("found new primary route")
// disable the old primary route
routes[pos].IsPrimary = false
err = hsdb.db.Save(&routes[pos]).Error
if err != nil {
log.Error().Err(err).Msg("error disabling old primary route")
return err
}
// enable the new primary route
newPrimaryRoute.IsPrimary = true
err = hsdb.db.Save(&newPrimaryRoute).Error
if err != nil {
log.Error().Err(err).Msg("error enabling new primary route")
return err
}
changedNodes = append(changedNodes, node)
if hsdb.notifier.IsConnected(route.Node.MachineKey) {
newPrimary = &routes[idx]
break
}
}
if len(changedNodes) > 0 {
hsdb.notifier.NotifyAll(types.StateUpdate{
Type: types.StatePeerChanged,
Changed: changedNodes,
})
// If a new route was not found/available,
// return with an error.
// We do not want to update the database as
// the one currently marked as primary is the
// best we got.
if newPrimary == nil {
return nil, nil
}
return nil
log.Trace().
Str("hostname", newPrimary.Node.Hostname).
Msg("found new primary, updating db")
// Remove primary from the old route
r.IsPrimary = false
err = hsdb.db.Save(&r).Error
if err != nil {
log.Error().Err(err).Msg("error disabling new primary route")
return nil, err
}
log.Trace().
Str("hostname", newPrimary.Node.Hostname).
Msg("removed primary from old route")
// Set primary for the new primary
newPrimary.IsPrimary = true
err = hsdb.db.Save(&newPrimary).Error
if err != nil {
log.Error().Err(err).Msg("error enabling new primary route")
return nil, err
}
log.Trace().
Str("hostname", newPrimary.Node.Hostname).
Msg("set primary to new route")
// Return a list of the machinekeys of the changed nodes.
return []key.MachinePublic{r.Node.MachineKey, newPrimary.Node.MachineKey}, nil
}
// EnableAutoApprovedRoutes enables any routes advertised by a node that match the ACL autoApprovers policy.

View file

@ -2,12 +2,19 @@ package db
import (
"net/netip"
"os"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/juanfont/headscale/hscontrol/notifier"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/hscontrol/util"
"github.com/stretchr/testify/assert"
"gopkg.in/check.v1"
"gorm.io/gorm"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
)
func (s *Suite) TestGetRoutes(c *check.C) {
@ -37,8 +44,9 @@ func (s *Suite) TestGetRoutes(c *check.C) {
}
db.db.Save(&node)
err = db.SaveNodeRoutes(&node)
su, err := db.SaveNodeRoutes(&node)
c.Assert(err, check.IsNil)
c.Assert(su, check.Equals, false)
advertisedRoutes, err := db.GetAdvertisedRoutes(&node)
c.Assert(err, check.IsNil)
@ -85,8 +93,9 @@ func (s *Suite) TestGetEnableRoutes(c *check.C) {
}
db.db.Save(&node)
err = db.SaveNodeRoutes(&node)
sendUpdate, err := db.SaveNodeRoutes(&node)
c.Assert(err, check.IsNil)
c.Assert(sendUpdate, check.Equals, false)
availableRoutes, err := db.GetAdvertisedRoutes(&node)
c.Assert(err, check.IsNil)
@ -156,8 +165,9 @@ func (s *Suite) TestIsUniquePrefix(c *check.C) {
}
db.db.Save(&node1)
err = db.SaveNodeRoutes(&node1)
sendUpdate, err := db.SaveNodeRoutes(&node1)
c.Assert(err, check.IsNil)
c.Assert(sendUpdate, check.Equals, false)
err = db.enableRoutes(&node1, route.String())
c.Assert(err, check.IsNil)
@ -178,8 +188,9 @@ func (s *Suite) TestIsUniquePrefix(c *check.C) {
}
db.db.Save(&node2)
err = db.SaveNodeRoutes(&node2)
sendUpdate, err = db.SaveNodeRoutes(&node2)
c.Assert(err, check.IsNil)
c.Assert(sendUpdate, check.Equals, false)
err = db.enableRoutes(&node2, route2.String())
c.Assert(err, check.IsNil)
@ -201,142 +212,6 @@ func (s *Suite) TestIsUniquePrefix(c *check.C) {
c.Assert(len(routes), check.Equals, 0)
}
func (s *Suite) TestSubnetFailover(c *check.C) {
user, err := db.CreateUser("test")
c.Assert(err, check.IsNil)
pak, err := db.CreatePreAuthKey(user.Name, false, false, nil, nil)
c.Assert(err, check.IsNil)
_, err = db.GetNode("test", "test_enable_route_node")
c.Assert(err, check.NotNil)
prefix, err := netip.ParsePrefix(
"10.0.0.0/24",
)
c.Assert(err, check.IsNil)
prefix2, err := netip.ParsePrefix(
"150.0.10.0/25",
)
c.Assert(err, check.IsNil)
hostInfo1 := tailcfg.Hostinfo{
RoutableIPs: []netip.Prefix{prefix, prefix2},
}
now := time.Now()
node1 := types.Node{
ID: 1,
Hostname: "test_enable_route_node",
UserID: user.ID,
RegisterMethod: util.RegisterMethodAuthKey,
AuthKeyID: uint(pak.ID),
Hostinfo: &hostInfo1,
LastSeen: &now,
}
db.db.Save(&node1)
err = db.SaveNodeRoutes(&node1)
c.Assert(err, check.IsNil)
err = db.enableRoutes(&node1, prefix.String())
c.Assert(err, check.IsNil)
err = db.enableRoutes(&node1, prefix2.String())
c.Assert(err, check.IsNil)
err = db.HandlePrimarySubnetFailover()
c.Assert(err, check.IsNil)
enabledRoutes1, err := db.GetEnabledRoutes(&node1)
c.Assert(err, check.IsNil)
c.Assert(len(enabledRoutes1), check.Equals, 2)
route, err := db.getPrimaryRoute(prefix)
c.Assert(err, check.IsNil)
c.Assert(route.NodeID, check.Equals, node1.ID)
hostInfo2 := tailcfg.Hostinfo{
RoutableIPs: []netip.Prefix{prefix2},
}
node2 := types.Node{
ID: 2,
Hostname: "test_enable_route_node",
UserID: user.ID,
RegisterMethod: util.RegisterMethodAuthKey,
AuthKeyID: uint(pak.ID),
Hostinfo: &hostInfo2,
LastSeen: &now,
}
db.db.Save(&node2)
err = db.saveNodeRoutes(&node2)
c.Assert(err, check.IsNil)
err = db.enableRoutes(&node2, prefix2.String())
c.Assert(err, check.IsNil)
err = db.HandlePrimarySubnetFailover()
c.Assert(err, check.IsNil)
enabledRoutes1, err = db.GetEnabledRoutes(&node1)
c.Assert(err, check.IsNil)
c.Assert(len(enabledRoutes1), check.Equals, 2)
enabledRoutes2, err := db.GetEnabledRoutes(&node2)
c.Assert(err, check.IsNil)
c.Assert(len(enabledRoutes2), check.Equals, 1)
routes, err := db.GetNodePrimaryRoutes(&node1)
c.Assert(err, check.IsNil)
c.Assert(len(routes), check.Equals, 2)
routes, err = db.GetNodePrimaryRoutes(&node2)
c.Assert(err, check.IsNil)
c.Assert(len(routes), check.Equals, 0)
// lets make node1 lastseen 10 mins ago
before := now.Add(-10 * time.Minute)
node1.LastSeen = &before
err = db.db.Save(&node1).Error
c.Assert(err, check.IsNil)
err = db.HandlePrimarySubnetFailover()
c.Assert(err, check.IsNil)
routes, err = db.GetNodePrimaryRoutes(&node1)
c.Assert(err, check.IsNil)
c.Assert(len(routes), check.Equals, 1)
routes, err = db.GetNodePrimaryRoutes(&node2)
c.Assert(err, check.IsNil)
c.Assert(len(routes), check.Equals, 1)
node2.Hostinfo = &tailcfg.Hostinfo{
RoutableIPs: []netip.Prefix{prefix, prefix2},
}
err = db.db.Save(&node2).Error
c.Assert(err, check.IsNil)
err = db.SaveNodeRoutes(&node2)
c.Assert(err, check.IsNil)
err = db.enableRoutes(&node2, prefix.String())
c.Assert(err, check.IsNil)
err = db.HandlePrimarySubnetFailover()
c.Assert(err, check.IsNil)
routes, err = db.GetNodePrimaryRoutes(&node1)
c.Assert(err, check.IsNil)
c.Assert(len(routes), check.Equals, 0)
routes, err = db.GetNodePrimaryRoutes(&node2)
c.Assert(err, check.IsNil)
c.Assert(len(routes), check.Equals, 2)
}
func (s *Suite) TestDeleteRoutes(c *check.C) {
user, err := db.CreateUser("test")
c.Assert(err, check.IsNil)
@ -373,8 +248,9 @@ func (s *Suite) TestDeleteRoutes(c *check.C) {
}
db.db.Save(&node1)
err = db.SaveNodeRoutes(&node1)
sendUpdate, err := db.SaveNodeRoutes(&node1)
c.Assert(err, check.IsNil)
c.Assert(sendUpdate, check.Equals, false)
err = db.enableRoutes(&node1, prefix.String())
c.Assert(err, check.IsNil)
@ -392,3 +268,362 @@ func (s *Suite) TestDeleteRoutes(c *check.C) {
c.Assert(err, check.IsNil)
c.Assert(len(enabledRoutes1), check.Equals, 1)
}
func TestFailoverRoute(t *testing.T) {
ipp := func(s string) types.IPPrefix { return types.IPPrefix(netip.MustParsePrefix(s)) }
// TODO(kradalby): Count/verify updates
var sink chan types.StateUpdate
go func() {
for range sink {
}
}()
machineKeys := []key.MachinePublic{
key.NewMachine().Public(),
key.NewMachine().Public(),
key.NewMachine().Public(),
key.NewMachine().Public(),
}
tests := []struct {
name string
failingRoute types.Route
routes types.Routes
want []key.MachinePublic
wantErr bool
}{
{
name: "no-route",
failingRoute: types.Route{},
routes: types.Routes{},
want: nil,
wantErr: false,
},
{
name: "no-prime",
failingRoute: types.Route{
Model: gorm.Model{
ID: 1,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
},
IsPrimary: false,
},
routes: types.Routes{},
want: nil,
wantErr: false,
},
{
name: "exit-node",
failingRoute: types.Route{
Model: gorm.Model{
ID: 1,
},
Prefix: ipp("0.0.0.0/0"),
Node: types.Node{
MachineKey: machineKeys[0],
},
IsPrimary: true,
},
routes: types.Routes{},
want: nil,
wantErr: false,
},
{
name: "no-failover-single-route",
failingRoute: types.Route{
Model: gorm.Model{
ID: 1,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
},
IsPrimary: true,
},
routes: types.Routes{
types.Route{
Model: gorm.Model{
ID: 1,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
},
IsPrimary: true,
},
},
want: nil,
wantErr: false,
},
{
name: "failover-primary",
failingRoute: types.Route{
Model: gorm.Model{
ID: 1,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
},
IsPrimary: true,
},
routes: types.Routes{
types.Route{
Model: gorm.Model{
ID: 1,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
},
IsPrimary: true,
},
types.Route{
Model: gorm.Model{
ID: 2,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[1],
},
IsPrimary: false,
},
},
want: []key.MachinePublic{
machineKeys[0],
machineKeys[1],
},
wantErr: false,
},
{
name: "failover-none-primary",
failingRoute: types.Route{
Model: gorm.Model{
ID: 1,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
},
IsPrimary: false,
},
routes: types.Routes{
types.Route{
Model: gorm.Model{
ID: 1,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
},
IsPrimary: true,
},
types.Route{
Model: gorm.Model{
ID: 2,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[1],
},
IsPrimary: false,
},
},
want: nil,
wantErr: false,
},
{
name: "failover-primary-multi-route",
failingRoute: types.Route{
Model: gorm.Model{
ID: 2,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[1],
},
IsPrimary: true,
},
routes: types.Routes{
types.Route{
Model: gorm.Model{
ID: 1,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
},
IsPrimary: false,
},
types.Route{
Model: gorm.Model{
ID: 2,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[1],
},
IsPrimary: true,
},
types.Route{
Model: gorm.Model{
ID: 3,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[2],
},
IsPrimary: false,
},
},
want: []key.MachinePublic{
machineKeys[1],
machineKeys[0],
},
wantErr: false,
},
{
name: "failover-primary-no-online",
failingRoute: types.Route{
Model: gorm.Model{
ID: 1,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
},
IsPrimary: true,
},
routes: types.Routes{
types.Route{
Model: gorm.Model{
ID: 1,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
},
IsPrimary: true,
},
// Offline
types.Route{
Model: gorm.Model{
ID: 2,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[3],
},
IsPrimary: false,
},
},
want: nil,
wantErr: false,
},
{
name: "failover-primary-one-not-online",
failingRoute: types.Route{
Model: gorm.Model{
ID: 1,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
},
IsPrimary: true,
},
routes: types.Routes{
types.Route{
Model: gorm.Model{
ID: 1,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[0],
},
IsPrimary: true,
},
// Offline
types.Route{
Model: gorm.Model{
ID: 2,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[3],
},
IsPrimary: false,
},
types.Route{
Model: gorm.Model{
ID: 3,
},
Prefix: ipp("10.0.0.0/24"),
Node: types.Node{
MachineKey: machineKeys[1],
},
IsPrimary: true,
},
},
want: []key.MachinePublic{
machineKeys[0],
machineKeys[1],
},
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tmpDir, err := os.MkdirTemp("", "failover-db-test")
assert.NoError(t, err)
notif := notifier.NewNotifier()
db, err = NewHeadscaleDatabase(
"sqlite3",
tmpDir+"/headscale_test.db",
false,
notif,
[]netip.Prefix{
netip.MustParsePrefix("10.27.0.0/23"),
},
"",
)
assert.NoError(t, err)
// Pretend that all the nodes are connected to control
for idx, key := range machineKeys {
// Pretend one node is offline
if idx == 3 {
continue
}
notif.AddNode(key, sink)
}
for _, route := range tt.routes {
if err := db.db.Save(&route).Error; err != nil {
t.Fatalf("failed to create route: %s", err)
}
}
got, err := db.failoverRoute(&tt.failingRoute)
if (err != nil) != tt.wantErr {
t.Errorf("failoverRoute() error = %v, wantErr %v", err, tt.wantErr)
return
}
if diff := cmp.Diff(tt.want, got, util.Comparers...); diff != "" {
t.Errorf("failoverRoute() unexpected result (-want +got):\n%s", diff)
}
})
}
}