replace ephemeral deletion logic (#2008)
* replace ephemeral deletion logic this commit replaces the way we remove ephemeral nodes, currently they are deleted in a loop and we look at last seen time. This time is now only set when a node disconnects and there was a bug (#2006) where nodes that had never disconnected was deleted since they did not have a last seen. The new logic will start an expiry timer when the node disconnects and delete the node from the database when the timer is up. If the node reconnects within the expiry, the timer is cancelled. Fixes #2006 Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> * use uint64 as authekyid and ptr helper in tests Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> * add test db helper Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> * add list ephemeral node func Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> * schedule ephemeral nodes for removal on startup Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> * fix gorm query for postgres Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> * add godoc Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> --------- Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
parent
58bd38a609
commit
7e62031444
13 changed files with 417 additions and 206 deletions
|
@ -91,6 +91,7 @@ type Headscale struct {
|
|||
db *db.HSDatabase
|
||||
ipAlloc *db.IPAllocator
|
||||
noisePrivateKey *key.MachinePrivate
|
||||
ephemeralGC *db.EphemeralGarbageCollector
|
||||
|
||||
DERPMap *tailcfg.DERPMap
|
||||
DERPServer *derpServer.DERPServer
|
||||
|
@ -153,6 +154,12 @@ func NewHeadscale(cfg *types.Config) (*Headscale, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
app.ephemeralGC = db.NewEphemeralGarbageCollector(func(ni types.NodeID) {
|
||||
if err := app.db.DeleteEphemeralNode(ni); err != nil {
|
||||
log.Err(err).Uint64("node.id", ni.Uint64()).Msgf("failed to delete ephemeral node")
|
||||
}
|
||||
})
|
||||
|
||||
if cfg.OIDC.Issuer != "" {
|
||||
err = app.initOIDC()
|
||||
if err != nil {
|
||||
|
@ -217,47 +224,6 @@ func (h *Headscale) redirect(w http.ResponseWriter, req *http.Request) {
|
|||
http.Redirect(w, req, target, http.StatusFound)
|
||||
}
|
||||
|
||||
// deleteExpireEphemeralNodes deletes ephemeral node records that have not been
|
||||
// seen for longer than h.cfg.EphemeralNodeInactivityTimeout.
|
||||
func (h *Headscale) deleteExpireEphemeralNodes(ctx context.Context, every time.Duration) {
|
||||
ticker := time.NewTicker(every)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
ticker.Stop()
|
||||
return
|
||||
case <-ticker.C:
|
||||
var removed []types.NodeID
|
||||
var changed []types.NodeID
|
||||
if err := h.db.Write(func(tx *gorm.DB) error {
|
||||
removed, changed = db.DeleteExpiredEphemeralNodes(tx, h.cfg.EphemeralNodeInactivityTimeout)
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
log.Error().Err(err).Msg("database error while expiring ephemeral nodes")
|
||||
continue
|
||||
}
|
||||
|
||||
if removed != nil {
|
||||
ctx := types.NotifyCtx(context.Background(), "expire-ephemeral", "na")
|
||||
h.nodeNotifier.NotifyAll(ctx, types.StateUpdate{
|
||||
Type: types.StatePeerRemoved,
|
||||
Removed: removed,
|
||||
})
|
||||
}
|
||||
|
||||
if changed != nil {
|
||||
ctx := types.NotifyCtx(context.Background(), "expire-ephemeral", "na")
|
||||
h.nodeNotifier.NotifyAll(ctx, types.StateUpdate{
|
||||
Type: types.StatePeerChanged,
|
||||
ChangeNodes: changed,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// expireExpiredNodes expires nodes that have an explicit expiry set
|
||||
// after that expiry time has passed.
|
||||
func (h *Headscale) expireExpiredNodes(ctx context.Context, every time.Duration) {
|
||||
|
@ -557,9 +523,18 @@ func (h *Headscale) Serve() error {
|
|||
return errEmptyInitialDERPMap
|
||||
}
|
||||
|
||||
expireEphemeralCtx, expireEphemeralCancel := context.WithCancel(context.Background())
|
||||
defer expireEphemeralCancel()
|
||||
go h.deleteExpireEphemeralNodes(expireEphemeralCtx, updateInterval)
|
||||
// Start ephemeral node garbage collector and schedule all nodes
|
||||
// that are already in the database and ephemeral. If they are still
|
||||
// around between restarts, they will reconnect and the GC will
|
||||
// be cancelled.
|
||||
go h.ephemeralGC.Start()
|
||||
ephmNodes, err := h.db.ListEphemeralNodes()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to list ephemeral nodes: %w", err)
|
||||
}
|
||||
for _, node := range ephmNodes {
|
||||
h.ephemeralGC.Schedule(node.ID, h.cfg.EphemeralNodeInactivityTimeout)
|
||||
}
|
||||
|
||||
expireNodeCtx, expireNodeCancel := context.WithCancel(context.Background())
|
||||
defer expireNodeCancel()
|
||||
|
@ -809,7 +784,7 @@ func (h *Headscale) Serve() error {
|
|||
Msg("Received signal to stop, shutting down gracefully")
|
||||
|
||||
expireNodeCancel()
|
||||
expireEphemeralCancel()
|
||||
h.ephemeralGC.Close()
|
||||
|
||||
trace("waiting for netmap stream to close")
|
||||
h.pollNetMapStreamWG.Wait()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue