replace ephemeral deletion logic (#2008)
* replace ephemeral deletion logic this commit replaces the way we remove ephemeral nodes, currently they are deleted in a loop and we look at last seen time. This time is now only set when a node disconnects and there was a bug (#2006) where nodes that had never disconnected was deleted since they did not have a last seen. The new logic will start an expiry timer when the node disconnects and delete the node from the database when the timer is up. If the node reconnects within the expiry, the timer is cancelled. Fixes #2006 Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> * use uint64 as authekyid and ptr helper in tests Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> * add test db helper Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> * add list ephemeral node func Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> * schedule ephemeral nodes for removal on startup Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> * fix gorm query for postgres Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> * add godoc Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> --------- Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
parent
58bd38a609
commit
7e62031444
13 changed files with 417 additions and 206 deletions
|
@ -12,6 +12,7 @@ import (
|
|||
"github.com/patrickmn/go-cache"
|
||||
"github.com/puzpuzpuz/xsync/v3"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/sasha-s/go-deadlock"
|
||||
"gorm.io/gorm"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/key"
|
||||
|
@ -78,6 +79,17 @@ func ListNodes(tx *gorm.DB) (types.Nodes, error) {
|
|||
return nodes, nil
|
||||
}
|
||||
|
||||
func (hsdb *HSDatabase) ListEphemeralNodes() (types.Nodes, error) {
|
||||
return Read(hsdb.DB, func(rx *gorm.DB) (types.Nodes, error) {
|
||||
nodes := types.Nodes{}
|
||||
if err := rx.Joins("AuthKey").Where(`"AuthKey"."ephemeral" = true`).Find(&nodes).Error; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return nodes, nil
|
||||
})
|
||||
}
|
||||
|
||||
func listNodesByGivenName(tx *gorm.DB, givenName string) (types.Nodes, error) {
|
||||
nodes := types.Nodes{}
|
||||
if err := tx.
|
||||
|
@ -286,6 +298,20 @@ func DeleteNode(tx *gorm.DB,
|
|||
return changed, nil
|
||||
}
|
||||
|
||||
// DeleteEphemeralNode deletes a Node from the database, note that this method
|
||||
// will remove it straight, and not notify any changes or consider any routes.
|
||||
// It is intended for Ephemeral nodes.
|
||||
func (hsdb *HSDatabase) DeleteEphemeralNode(
|
||||
nodeID types.NodeID,
|
||||
) error {
|
||||
return hsdb.Write(func(tx *gorm.DB) error {
|
||||
if err := tx.Unscoped().Delete(&types.Node{}, nodeID).Error; err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// SetLastSeen sets a node's last seen field indicating that we
|
||||
// have recently communicating with this node.
|
||||
func SetLastSeen(tx *gorm.DB, nodeID types.NodeID, lastSeen time.Time) error {
|
||||
|
@ -660,51 +686,6 @@ func GenerateGivenName(
|
|||
return givenName, nil
|
||||
}
|
||||
|
||||
func DeleteExpiredEphemeralNodes(tx *gorm.DB,
|
||||
inactivityThreshold time.Duration,
|
||||
) ([]types.NodeID, []types.NodeID) {
|
||||
users, err := ListUsers(tx)
|
||||
if err != nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var expired []types.NodeID
|
||||
var changedNodes []types.NodeID
|
||||
for _, user := range users {
|
||||
nodes, err := ListNodesByUser(tx, user.Name)
|
||||
if err != nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
for idx, node := range nodes {
|
||||
if node.IsEphemeral() && node.LastSeen != nil &&
|
||||
time.Now().
|
||||
After(node.LastSeen.Add(inactivityThreshold)) {
|
||||
expired = append(expired, node.ID)
|
||||
|
||||
log.Info().
|
||||
Str("node", node.Hostname).
|
||||
Msg("Ephemeral client removed from database")
|
||||
|
||||
// empty isConnected map as ephemeral nodes are not routes
|
||||
changed, err := DeleteNode(tx, nodes[idx], nil)
|
||||
if err != nil {
|
||||
log.Error().
|
||||
Err(err).
|
||||
Str("node", node.Hostname).
|
||||
Msg("🤮 Cannot delete ephemeral node from the database")
|
||||
}
|
||||
|
||||
changedNodes = append(changedNodes, changed...)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(kradalby): needs to be moved out of transaction
|
||||
}
|
||||
|
||||
return expired, changedNodes
|
||||
}
|
||||
|
||||
func ExpireExpiredNodes(tx *gorm.DB,
|
||||
lastCheck time.Time,
|
||||
) (time.Time, types.StateUpdate, bool) {
|
||||
|
@ -737,3 +718,78 @@ func ExpireExpiredNodes(tx *gorm.DB,
|
|||
|
||||
return started, types.StateUpdate{}, false
|
||||
}
|
||||
|
||||
// EphemeralGarbageCollector is a garbage collector that will delete nodes after
|
||||
// a certain amount of time.
|
||||
// It is used to delete ephemeral nodes that have disconnected and should be
|
||||
// cleaned up.
|
||||
type EphemeralGarbageCollector struct {
|
||||
mu deadlock.Mutex
|
||||
|
||||
deleteFunc func(types.NodeID)
|
||||
toBeDeleted map[types.NodeID]*time.Timer
|
||||
|
||||
deleteCh chan types.NodeID
|
||||
cancelCh chan struct{}
|
||||
}
|
||||
|
||||
// NewEphemeralGarbageCollector creates a new EphemeralGarbageCollector, it takes
|
||||
// a deleteFunc that will be called when a node is scheduled for deletion.
|
||||
func NewEphemeralGarbageCollector(deleteFunc func(types.NodeID)) *EphemeralGarbageCollector {
|
||||
return &EphemeralGarbageCollector{
|
||||
toBeDeleted: make(map[types.NodeID]*time.Timer),
|
||||
deleteCh: make(chan types.NodeID, 10),
|
||||
cancelCh: make(chan struct{}),
|
||||
deleteFunc: deleteFunc,
|
||||
}
|
||||
}
|
||||
|
||||
// Close stops the garbage collector.
|
||||
func (e *EphemeralGarbageCollector) Close() {
|
||||
e.cancelCh <- struct{}{}
|
||||
}
|
||||
|
||||
// Schedule schedules a node for deletion after the expiry duration.
|
||||
func (e *EphemeralGarbageCollector) Schedule(nodeID types.NodeID, expiry time.Duration) {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
||||
timer := time.NewTimer(expiry)
|
||||
e.toBeDeleted[nodeID] = timer
|
||||
|
||||
go func() {
|
||||
select {
|
||||
case _, ok := <-timer.C:
|
||||
if ok {
|
||||
e.deleteCh <- nodeID
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Cancel cancels the deletion of a node.
|
||||
func (e *EphemeralGarbageCollector) Cancel(nodeID types.NodeID) {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
||||
if timer, ok := e.toBeDeleted[nodeID]; ok {
|
||||
timer.Stop()
|
||||
delete(e.toBeDeleted, nodeID)
|
||||
}
|
||||
}
|
||||
|
||||
// Start starts the garbage collector.
|
||||
func (e *EphemeralGarbageCollector) Start() {
|
||||
for {
|
||||
select {
|
||||
case <-e.cancelCh:
|
||||
return
|
||||
case nodeID := <-e.deleteCh:
|
||||
e.mu.Lock()
|
||||
delete(e.toBeDeleted, nodeID)
|
||||
e.mu.Unlock()
|
||||
|
||||
go e.deleteFunc(nodeID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue