metrics, tuning in tests, db cleanups, fix concurrency issue (#1895)

This commit is contained in:
Kristoffer Dalby 2024-04-21 18:28:17 +02:00 committed by GitHub
parent 7d8178406d
commit ba614a5e6c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 328 additions and 201 deletions

View file

@ -10,6 +10,7 @@ import (
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/hscontrol/util"
"github.com/patrickmn/go-cache"
"github.com/puzpuzpuz/xsync/v3"
"github.com/rs/zerolog/log"
"gorm.io/gorm"
"tailscale.com/tailcfg"
@ -260,9 +261,9 @@ func NodeSetExpiry(tx *gorm.DB,
return tx.Model(&types.Node{}).Where("id = ?", nodeID).Update("expiry", expiry).Error
}
func (hsdb *HSDatabase) DeleteNode(node *types.Node, isConnected types.NodeConnectedMap) ([]types.NodeID, error) {
func (hsdb *HSDatabase) DeleteNode(node *types.Node, isLikelyConnected *xsync.MapOf[types.NodeID, bool]) ([]types.NodeID, error) {
return Write(hsdb.DB, func(tx *gorm.DB) ([]types.NodeID, error) {
return DeleteNode(tx, node, isConnected)
return DeleteNode(tx, node, isLikelyConnected)
})
}
@ -270,9 +271,9 @@ func (hsdb *HSDatabase) DeleteNode(node *types.Node, isConnected types.NodeConne
// Caller is responsible for notifying all of change.
func DeleteNode(tx *gorm.DB,
node *types.Node,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
) ([]types.NodeID, error) {
changed, err := deleteNodeRoutes(tx, node, isConnected)
changed, err := deleteNodeRoutes(tx, node, isLikelyConnected)
if err != nil {
return changed, err
}

View file

@ -11,6 +11,7 @@ import (
"github.com/juanfont/headscale/hscontrol/policy"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/hscontrol/util"
"github.com/puzpuzpuz/xsync/v3"
"gopkg.in/check.v1"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
@ -120,7 +121,7 @@ func (s *Suite) TestHardDeleteNode(c *check.C) {
}
db.DB.Save(&node)
_, err = db.DeleteNode(&node, types.NodeConnectedMap{})
_, err = db.DeleteNode(&node, xsync.NewMapOf[types.NodeID, bool]())
c.Assert(err, check.IsNil)
_, err = db.getNode(user.Name, "testnode3")

View file

@ -147,7 +147,7 @@ func (*Suite) TestEphemeralKeyReusable(c *check.C) {
_, err = db.getNode("test7", "testest")
c.Assert(err, check.IsNil)
db.DB.Transaction(func(tx *gorm.DB) error {
db.Write(func(tx *gorm.DB) error {
DeleteExpiredEphemeralNodes(tx, time.Second*20)
return nil
})
@ -181,7 +181,7 @@ func (*Suite) TestEphemeralKeyNotReusable(c *check.C) {
_, err = db.getNode("test7", "testest")
c.Assert(err, check.IsNil)
db.DB.Transaction(func(tx *gorm.DB) error {
db.Write(func(tx *gorm.DB) error {
DeleteExpiredEphemeralNodes(tx, time.Second*20)
return nil
})

View file

@ -8,6 +8,7 @@ import (
"github.com/juanfont/headscale/hscontrol/policy"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/puzpuzpuz/xsync/v3"
"github.com/rs/zerolog/log"
"gorm.io/gorm"
"tailscale.com/util/set"
@ -126,7 +127,7 @@ func EnableRoute(tx *gorm.DB, id uint64) (*types.StateUpdate, error) {
func DisableRoute(tx *gorm.DB,
id uint64,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
) ([]types.NodeID, error) {
route, err := GetRoute(tx, id)
if err != nil {
@ -147,7 +148,7 @@ func DisableRoute(tx *gorm.DB,
return nil, err
}
update, err = failoverRouteTx(tx, isConnected, route)
update, err = failoverRouteTx(tx, isLikelyConnected, route)
if err != nil {
return nil, err
}
@ -182,17 +183,17 @@ func DisableRoute(tx *gorm.DB,
func (hsdb *HSDatabase) DeleteRoute(
id uint64,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
) ([]types.NodeID, error) {
return Write(hsdb.DB, func(tx *gorm.DB) ([]types.NodeID, error) {
return DeleteRoute(tx, id, isConnected)
return DeleteRoute(tx, id, isLikelyConnected)
})
}
func DeleteRoute(
tx *gorm.DB,
id uint64,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
) ([]types.NodeID, error) {
route, err := GetRoute(tx, id)
if err != nil {
@ -207,7 +208,7 @@ func DeleteRoute(
// https://github.com/juanfont/headscale/issues/804#issuecomment-1399314002
var update []types.NodeID
if !route.IsExitRoute() {
update, err = failoverRouteTx(tx, isConnected, route)
update, err = failoverRouteTx(tx, isLikelyConnected, route)
if err != nil {
return nil, nil
}
@ -252,7 +253,7 @@ func DeleteRoute(
return update, nil
}
func deleteNodeRoutes(tx *gorm.DB, node *types.Node, isConnected types.NodeConnectedMap) ([]types.NodeID, error) {
func deleteNodeRoutes(tx *gorm.DB, node *types.Node, isLikelyConnected *xsync.MapOf[types.NodeID, bool]) ([]types.NodeID, error) {
routes, err := GetNodeRoutes(tx, node)
if err != nil {
return nil, fmt.Errorf("getting node routes: %w", err)
@ -266,7 +267,7 @@ func deleteNodeRoutes(tx *gorm.DB, node *types.Node, isConnected types.NodeConne
// TODO(kradalby): This is a bit too aggressive, we could probably
// figure out which routes needs to be failed over rather than all.
chn, err := failoverRouteTx(tx, isConnected, &routes[i])
chn, err := failoverRouteTx(tx, isLikelyConnected, &routes[i])
if err != nil {
return changed, fmt.Errorf("failing over route after delete: %w", err)
}
@ -409,7 +410,7 @@ func SaveNodeRoutes(tx *gorm.DB, node *types.Node) (bool, error) {
// If needed, the failover will be attempted.
func FailoverNodeRoutesIfNeccessary(
tx *gorm.DB,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
node *types.Node,
) (*types.StateUpdate, error) {
nodeRoutes, err := GetNodeRoutes(tx, node)
@ -430,12 +431,12 @@ nodeRouteLoop:
if route.IsPrimary {
// if we have a primary route, and the node is connected
// nothing needs to be done.
if conn, ok := isConnected[route.Node.ID]; conn && ok {
if val, ok := isLikelyConnected.Load(route.Node.ID); ok && val {
continue nodeRouteLoop
}
// if not, we need to failover the route
failover := failoverRoute(isConnected, &route, routes)
failover := failoverRoute(isLikelyConnected, &route, routes)
if failover != nil {
err := failover.save(tx)
if err != nil {
@ -477,7 +478,7 @@ nodeRouteLoop:
// If the given route was not primary, it returns early.
func failoverRouteTx(
tx *gorm.DB,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
r *types.Route,
) ([]types.NodeID, error) {
if r == nil {
@ -500,7 +501,7 @@ func failoverRouteTx(
return nil, fmt.Errorf("getting routes by prefix: %w", err)
}
fo := failoverRoute(isConnected, r, routes)
fo := failoverRoute(isLikelyConnected, r, routes)
if fo == nil {
return nil, nil
}
@ -538,7 +539,7 @@ func (f *failover) save(tx *gorm.DB) error {
}
func failoverRoute(
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
routeToReplace *types.Route,
altRoutes types.Routes,
@ -570,9 +571,11 @@ func failoverRoute(
continue
}
if isConnected != nil && isConnected[route.Node.ID] {
newPrimary = &altRoutes[idx]
break
if isLikelyConnected != nil {
if val, ok := isLikelyConnected.Load(route.Node.ID); ok && val {
newPrimary = &altRoutes[idx]
break
}
}
}

View file

@ -10,11 +10,22 @@ import (
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/hscontrol/util"
"github.com/puzpuzpuz/xsync/v3"
"gopkg.in/check.v1"
"gorm.io/gorm"
"tailscale.com/tailcfg"
)
var smap = func(m map[types.NodeID]bool) *xsync.MapOf[types.NodeID, bool] {
s := xsync.NewMapOf[types.NodeID, bool]()
for k, v := range m {
s.Store(k, v)
}
return s
}
func (s *Suite) TestGetRoutes(c *check.C) {
user, err := db.CreateUser("test")
c.Assert(err, check.IsNil)
@ -331,7 +342,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
name string
nodes types.Nodes
routes types.Routes
isConnected []types.NodeConnectedMap
isConnected []map[types.NodeID]bool
want []*types.StateUpdate
wantErr bool
}{
@ -346,7 +357,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
r(1, 1, ipp("10.0.0.0/24"), true, true),
r(2, 2, ipp("10.0.0.0/24"), true, false),
},
isConnected: []types.NodeConnectedMap{
isConnected: []map[types.NodeID]bool{
// n1 goes down
{
1: false,
@ -384,7 +395,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
r(1, 1, ipp("10.0.0.0/24"), true, true),
r(2, 2, ipp("10.0.0.0/24"), true, false),
},
isConnected: []types.NodeConnectedMap{
isConnected: []map[types.NodeID]bool{
// n1 up recon = noop
{
1: true,
@ -428,7 +439,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
r(2, 2, ipp("10.0.0.0/24"), true, false),
r(3, 3, ipp("10.0.0.0/24"), true, false),
},
isConnected: []types.NodeConnectedMap{
isConnected: []map[types.NodeID]bool{
// n1 goes down
{
1: false,
@ -486,7 +497,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
r(2, 2, ipp("10.0.0.0/24"), false, false),
r(3, 3, ipp("10.0.0.0/24"), true, false),
},
isConnected: []types.NodeConnectedMap{
isConnected: []map[types.NodeID]bool{
// n1 goes down
{
1: false,
@ -516,7 +527,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
r(2, 2, ipp("10.0.0.0/24"), true, false),
r(3, 3, ipp("10.1.0.0/24"), true, false),
},
isConnected: []types.NodeConnectedMap{
isConnected: []map[types.NodeID]bool{
// n1 goes down
{
1: false,
@ -539,7 +550,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
r(2, 2, ipp("10.0.0.0/24"), true, false),
r(3, 3, ipp("10.1.0.0/24"), false, false),
},
isConnected: []types.NodeConnectedMap{
isConnected: []map[types.NodeID]bool{
// n1 goes down
{
1: false,
@ -562,7 +573,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
r(2, 2, ipp("10.0.0.0/24"), true, false),
r(3, 3, ipp("10.1.0.0/24"), true, false),
},
isConnected: []types.NodeConnectedMap{
isConnected: []map[types.NodeID]bool{
// n1 goes down
{
1: false,
@ -585,7 +596,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
r(2, 2, ipp("10.0.0.0/24"), true, true),
r(3, 3, ipp("10.1.0.0/24"), true, false),
},
isConnected: []types.NodeConnectedMap{
isConnected: []map[types.NodeID]bool{
// n1 goes down
{
1: true,
@ -618,7 +629,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
want := tt.want[step]
got, err := Write(db.DB, func(tx *gorm.DB) (*types.StateUpdate, error) {
return FailoverNodeRoutesIfNeccessary(tx, isConnected, node)
return FailoverNodeRoutesIfNeccessary(tx, smap(isConnected), node)
})
if (err != nil) != tt.wantErr {
@ -640,7 +651,7 @@ func TestFailoverRouteTx(t *testing.T) {
name string
failingRoute types.Route
routes types.Routes
isConnected types.NodeConnectedMap
isConnected map[types.NodeID]bool
want []types.NodeID
wantErr bool
}{
@ -743,7 +754,7 @@ func TestFailoverRouteTx(t *testing.T) {
Enabled: true,
},
},
isConnected: types.NodeConnectedMap{
isConnected: map[types.NodeID]bool{
1: false,
2: true,
},
@ -841,7 +852,7 @@ func TestFailoverRouteTx(t *testing.T) {
Enabled: true,
},
},
isConnected: types.NodeConnectedMap{
isConnected: map[types.NodeID]bool{
1: true,
2: true,
3: true,
@ -889,7 +900,7 @@ func TestFailoverRouteTx(t *testing.T) {
Enabled: true,
},
},
isConnected: types.NodeConnectedMap{
isConnected: map[types.NodeID]bool{
1: true,
4: false,
},
@ -945,7 +956,7 @@ func TestFailoverRouteTx(t *testing.T) {
Enabled: true,
},
},
isConnected: types.NodeConnectedMap{
isConnected: map[types.NodeID]bool{
1: false,
2: true,
4: false,
@ -1010,7 +1021,7 @@ func TestFailoverRouteTx(t *testing.T) {
}
got, err := Write(db.DB, func(tx *gorm.DB) ([]types.NodeID, error) {
return failoverRouteTx(tx, tt.isConnected, &tt.failingRoute)
return failoverRouteTx(tx, smap(tt.isConnected), &tt.failingRoute)
})
if (err != nil) != tt.wantErr {
@ -1048,7 +1059,7 @@ func TestFailoverRoute(t *testing.T) {
name string
failingRoute types.Route
routes types.Routes
isConnected types.NodeConnectedMap
isConnected map[types.NodeID]bool
want *failover
}{
{
@ -1085,7 +1096,7 @@ func TestFailoverRoute(t *testing.T) {
r(1, 1, ipp("10.0.0.0/24"), true, true),
r(2, 2, ipp("10.0.0.0/24"), true, false),
},
isConnected: types.NodeConnectedMap{
isConnected: map[types.NodeID]bool{
1: false,
2: true,
},
@ -1111,7 +1122,7 @@ func TestFailoverRoute(t *testing.T) {
r(2, 2, ipp("10.0.0.0/24"), true, true),
r(3, 3, ipp("10.0.0.0/24"), true, false),
},
isConnected: types.NodeConnectedMap{
isConnected: map[types.NodeID]bool{
1: true,
2: true,
3: true,
@ -1128,7 +1139,7 @@ func TestFailoverRoute(t *testing.T) {
r(1, 1, ipp("10.0.0.0/24"), true, true),
r(2, 4, ipp("10.0.0.0/24"), true, false),
},
isConnected: types.NodeConnectedMap{
isConnected: map[types.NodeID]bool{
1: true,
4: false,
},
@ -1142,7 +1153,7 @@ func TestFailoverRoute(t *testing.T) {
r(2, 4, ipp("10.0.0.0/24"), true, false),
r(3, 2, ipp("10.0.0.0/24"), true, false),
},
isConnected: types.NodeConnectedMap{
isConnected: map[types.NodeID]bool{
1: false,
2: true,
4: false,
@ -1172,7 +1183,7 @@ func TestFailoverRoute(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotf := failoverRoute(tt.isConnected, &tt.failingRoute, tt.routes)
gotf := failoverRoute(smap(tt.isConnected), &tt.failingRoute, tt.routes)
if tt.want == nil && gotf != nil {
t.Fatalf("expected nil, got %+v", gotf)