Redo route code (#2422)

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
Kristoffer Dalby 2025-02-26 07:22:55 -08:00 committed by GitHub
parent 16868190c8
commit 7891378f57
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
53 changed files with 2977 additions and 6251 deletions

186
hscontrol/routes/primary.go Normal file
View file

@ -0,0 +1,186 @@
package routes
import (
"fmt"
"log"
"net/netip"
"slices"
"sort"
"strings"
"sync"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/hscontrol/util"
xmaps "golang.org/x/exp/maps"
"tailscale.com/util/set"
)
type PrimaryRoutes struct {
mu sync.Mutex
// routes is a map of prefixes that are adverties and approved and available
// in the global headscale state.
routes map[types.NodeID]set.Set[netip.Prefix]
// primaries is a map of prefixes to the node that is the primary for that prefix.
primaries map[netip.Prefix]types.NodeID
isPrimary map[types.NodeID]bool
}
func New() *PrimaryRoutes {
return &PrimaryRoutes{
routes: make(map[types.NodeID]set.Set[netip.Prefix]),
primaries: make(map[netip.Prefix]types.NodeID),
isPrimary: make(map[types.NodeID]bool),
}
}
// updatePrimaryLocked recalculates the primary routes and updates the internal state.
// It returns true if the primary routes have changed.
// It is assumed that the caller holds the lock.
// The algorthm is as follows:
// 1. Reset the primaries map.
// 2. Iterate over the routes and count the number of times a prefix is advertised.
// 3. If a prefix is advertised by at least two nodes, it is a primary route.
// 4. If the primary routes have changed, update the internal state and return true.
// 5. Otherwise, return false.
func (pr *PrimaryRoutes) updatePrimaryLocked() bool {
// reset the primaries map, as we are going to recalculate it.
allPrimaries := make(map[netip.Prefix][]types.NodeID)
pr.isPrimary = make(map[types.NodeID]bool)
changed := false
// sort the node ids so we can iterate over them in a deterministic order.
// this is important so the same node is chosen two times in a row
// as the primary route.
ids := types.NodeIDs(xmaps.Keys(pr.routes))
sort.Sort(ids)
// Create a map of prefixes to nodes that serve them so we
// can determine the primary route for each prefix.
for _, id := range ids {
routes := pr.routes[id]
for route := range routes {
if _, ok := allPrimaries[route]; !ok {
allPrimaries[route] = []types.NodeID{id}
} else {
allPrimaries[route] = append(allPrimaries[route], id)
}
}
}
// Go through all prefixes and determine the primary route for each.
// If the number of routes is below the minimum, remove the primary.
// If the current primary is still available, continue.
// If the current primary is not available, select a new one.
for prefix, nodes := range allPrimaries {
if node, ok := pr.primaries[prefix]; ok {
if len(nodes) < 2 {
delete(pr.primaries, prefix)
changed = true
continue
}
// If the current primary is still available, continue.
if slices.Contains(nodes, node) {
continue
}
}
if len(nodes) >= 2 {
pr.primaries[prefix] = nodes[0]
changed = true
}
}
// Clean up any remaining primaries that are no longer valid.
for prefix := range pr.primaries {
if _, ok := allPrimaries[prefix]; !ok {
delete(pr.primaries, prefix)
changed = true
}
}
// Populate the quick lookup index for primary routes
for _, nodeID := range pr.primaries {
pr.isPrimary[nodeID] = true
}
return changed
}
func (pr *PrimaryRoutes) SetRoutes(node types.NodeID, prefix ...netip.Prefix) bool {
pr.mu.Lock()
defer pr.mu.Unlock()
// If no routes are being set, remove the node from the routes map.
if len(prefix) == 0 {
log.Printf("Removing node %d from routes", node)
if _, ok := pr.routes[node]; ok {
delete(pr.routes, node)
return pr.updatePrimaryLocked()
}
return false
}
if _, ok := pr.routes[node]; !ok {
pr.routes[node] = make(set.Set[netip.Prefix], len(prefix))
}
for _, p := range prefix {
pr.routes[node].Add(p)
}
return pr.updatePrimaryLocked()
}
func (pr *PrimaryRoutes) PrimaryRoutes(id types.NodeID) []netip.Prefix {
if pr == nil {
return nil
}
pr.mu.Lock()
defer pr.mu.Unlock()
// Short circuit if the node is not a primary for any route.
if _, ok := pr.isPrimary[id]; !ok {
return nil
}
var routes []netip.Prefix
for prefix, node := range pr.primaries {
if node == id {
routes = append(routes, prefix)
}
}
return routes
}
func (pr *PrimaryRoutes) String() string {
pr.mu.Lock()
defer pr.mu.Unlock()
return pr.stringLocked()
}
func (pr *PrimaryRoutes) stringLocked() string {
var sb strings.Builder
fmt.Fprintln(&sb, "Available routes:")
ids := types.NodeIDs(xmaps.Keys(pr.routes))
sort.Sort(ids)
for _, id := range ids {
prefixes := pr.routes[id]
fmt.Fprintf(&sb, "\nNode %d: %s", id, strings.Join(util.PrefixesToString(prefixes.Slice()), ", "))
}
fmt.Fprintln(&sb, "\n\nCurrent primary routes:")
for route, nodeID := range pr.primaries {
fmt.Fprintf(&sb, "\nRoute %s: %d", route, nodeID)
}
return sb.String()
}

View file

@ -0,0 +1,316 @@
package routes
import (
"net/netip"
"sync"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/hscontrol/util"
)
// mp is a helper function that wraps netip.MustParsePrefix.
func mp(prefix string) netip.Prefix {
return netip.MustParsePrefix(prefix)
}
func TestPrimaryRoutes(t *testing.T) {
tests := []struct {
name string
operations func(pr *PrimaryRoutes) bool
nodeID types.NodeID
expectedRoutes []netip.Prefix
expectedChange bool
}{
{
name: "single-node-registers-single-route",
operations: func(pr *PrimaryRoutes) bool {
return pr.SetRoutes(1, mp("192.168.1.0/24"))
},
nodeID: 1,
expectedRoutes: nil,
expectedChange: false,
},
{
name: "multiple-nodes-register-different-routes",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24"))
return pr.SetRoutes(2, mp("192.168.2.0/24"))
},
nodeID: 1,
expectedRoutes: nil,
expectedChange: false,
},
{
name: "multiple-nodes-register-overlapping-routes",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24")) // false
return pr.SetRoutes(2, mp("192.168.1.0/24")) // true
},
nodeID: 1,
expectedRoutes: []netip.Prefix{mp("192.168.1.0/24")},
expectedChange: true,
},
{
name: "node-deregisters-a-route",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24"))
return pr.SetRoutes(1) // Deregister by setting no routes
},
nodeID: 1,
expectedRoutes: nil,
expectedChange: false,
},
{
name: "node-deregisters-one-of-multiple-routes",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24"), mp("192.168.2.0/24"))
return pr.SetRoutes(1, mp("192.168.2.0/24")) // Deregister one route by setting the remaining route
},
nodeID: 1,
expectedRoutes: nil,
expectedChange: false,
},
{
name: "node-registers-and-deregisters-routes-in-sequence",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24"))
pr.SetRoutes(2, mp("192.168.2.0/24"))
pr.SetRoutes(1) // Deregister by setting no routes
return pr.SetRoutes(1, mp("192.168.3.0/24"))
},
nodeID: 1,
expectedRoutes: nil,
expectedChange: false,
},
{
name: "no-change-in-primary-routes",
operations: func(pr *PrimaryRoutes) bool {
return pr.SetRoutes(1, mp("192.168.1.0/24"))
},
nodeID: 1,
expectedRoutes: nil,
expectedChange: false,
},
{
name: "multiple-nodes-register-same-route",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24")) // false
pr.SetRoutes(2, mp("192.168.1.0/24")) // true
return pr.SetRoutes(3, mp("192.168.1.0/24")) // false
},
nodeID: 1,
expectedRoutes: []netip.Prefix{mp("192.168.1.0/24")},
expectedChange: false,
},
{
name: "register-multiple-routes-shift-primary-check-old-primary",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24")) // false
pr.SetRoutes(2, mp("192.168.1.0/24")) // true, 1 primary
pr.SetRoutes(3, mp("192.168.1.0/24")) // false, 1 primary
return pr.SetRoutes(1) // true, 2 primary
},
nodeID: 1,
expectedRoutes: nil,
expectedChange: true,
},
{
name: "register-multiple-routes-shift-primary-check-primary",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24")) // false
pr.SetRoutes(2, mp("192.168.1.0/24")) // true, 1 primary
pr.SetRoutes(3, mp("192.168.1.0/24")) // false, 1 primary
return pr.SetRoutes(1) // true, 2 primary
},
nodeID: 2,
expectedRoutes: []netip.Prefix{mp("192.168.1.0/24")},
expectedChange: true,
},
{
name: "register-multiple-routes-shift-primary-check-non-primary",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24")) // false
pr.SetRoutes(2, mp("192.168.1.0/24")) // true, 1 primary
pr.SetRoutes(3, mp("192.168.1.0/24")) // false, 1 primary
return pr.SetRoutes(1) // true, 2 primary
},
nodeID: 3,
expectedRoutes: nil,
expectedChange: true,
},
{
name: "primary-route-map-is-cleared-up-no-primary",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24")) // false
pr.SetRoutes(2, mp("192.168.1.0/24")) // true, 1 primary
pr.SetRoutes(3, mp("192.168.1.0/24")) // false, 1 primary
pr.SetRoutes(1) // true, 2 primary
return pr.SetRoutes(2) // true, no primary
},
nodeID: 2,
expectedRoutes: nil,
expectedChange: true,
},
{
name: "primary-route-map-is-cleared-up-all-no-primary",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24")) // false
pr.SetRoutes(2, mp("192.168.1.0/24")) // true, 1 primary
pr.SetRoutes(3, mp("192.168.1.0/24")) // false, 1 primary
pr.SetRoutes(1) // true, 2 primary
pr.SetRoutes(2) // true, no primary
return pr.SetRoutes(3) // false, no primary
},
nodeID: 2,
expectedRoutes: nil,
expectedChange: false,
},
{
name: "primary-route-map-is-cleared-up",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24")) // false
pr.SetRoutes(2, mp("192.168.1.0/24")) // true, 1 primary
pr.SetRoutes(3, mp("192.168.1.0/24")) // false, 1 primary
pr.SetRoutes(1) // true, 2 primary
return pr.SetRoutes(2) // true, no primary
},
nodeID: 2,
expectedRoutes: nil,
expectedChange: true,
},
{
name: "primary-route-no-flake",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24")) // false
pr.SetRoutes(2, mp("192.168.1.0/24")) // true, 1 primary
pr.SetRoutes(3, mp("192.168.1.0/24")) // false, 1 primary
pr.SetRoutes(1) // true, 2 primary
return pr.SetRoutes(1, mp("192.168.1.0/24")) // false, 2 primary
},
nodeID: 2,
expectedRoutes: []netip.Prefix{mp("192.168.1.0/24")},
expectedChange: false,
},
{
name: "primary-route-no-flake-check-old-primary",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24")) // false
pr.SetRoutes(2, mp("192.168.1.0/24")) // true, 1 primary
pr.SetRoutes(3, mp("192.168.1.0/24")) // false, 1 primary
pr.SetRoutes(1) // true, 2 primary
return pr.SetRoutes(1, mp("192.168.1.0/24")) // false, 2 primary
},
nodeID: 1,
expectedRoutes: nil,
expectedChange: false,
},
{
name: "primary-route-no-flake-full-integration",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24")) // false
pr.SetRoutes(2, mp("192.168.1.0/24")) // true, 1 primary
pr.SetRoutes(3, mp("192.168.1.0/24")) // false, 1 primary
pr.SetRoutes(1) // true, 2 primary
pr.SetRoutes(2) // true, no primary
pr.SetRoutes(1, mp("192.168.1.0/24")) // true, 1 primary
pr.SetRoutes(2, mp("192.168.1.0/24")) // true, 1 primary
pr.SetRoutes(1) // true, 2 primary
return pr.SetRoutes(1, mp("192.168.1.0/24")) // false, 2 primary
},
nodeID: 2,
expectedRoutes: []netip.Prefix{mp("192.168.1.0/24")},
expectedChange: false,
},
{
name: "multiple-nodes-register-same-route-and-exit",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("0.0.0.0/0"), mp("192.168.1.0/24"))
return pr.SetRoutes(2, mp("192.168.1.0/24"))
},
nodeID: 1,
expectedRoutes: []netip.Prefix{mp("192.168.1.0/24")},
expectedChange: true,
},
{
name: "deregister-non-existent-route",
operations: func(pr *PrimaryRoutes) bool {
return pr.SetRoutes(1) // Deregister by setting no routes
},
nodeID: 1,
expectedRoutes: nil,
expectedChange: false,
},
{
name: "register-empty-prefix-list",
operations: func(pr *PrimaryRoutes) bool {
return pr.SetRoutes(1)
},
nodeID: 1,
expectedRoutes: nil,
expectedChange: false,
},
{
name: "deregister-empty-prefix-list",
operations: func(pr *PrimaryRoutes) bool {
return pr.SetRoutes(1)
},
nodeID: 1,
expectedRoutes: nil,
expectedChange: false,
},
{
name: "concurrent-access",
operations: func(pr *PrimaryRoutes) bool {
var wg sync.WaitGroup
wg.Add(2)
var change1, change2 bool
go func() {
defer wg.Done()
change1 = pr.SetRoutes(1, mp("192.168.1.0/24"))
}()
go func() {
defer wg.Done()
change2 = pr.SetRoutes(2, mp("192.168.2.0/24"))
}()
wg.Wait()
return change1 || change2
},
nodeID: 1,
expectedRoutes: nil,
expectedChange: false,
},
{
name: "no-routes-registered",
operations: func(pr *PrimaryRoutes) bool {
// No operations
return false
},
nodeID: 1,
expectedRoutes: nil,
expectedChange: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
pr := New()
change := tt.operations(pr)
if change != tt.expectedChange {
t.Errorf("change = %v, want %v", change, tt.expectedChange)
}
routes := pr.PrimaryRoutes(tt.nodeID)
if diff := cmp.Diff(tt.expectedRoutes, routes, util.Comparers...); diff != "" {
t.Errorf("PrimaryRoutes() mismatch (-want +got):\n%s", diff)
}
})
}
}