Fix goroutine leak in EphemeralGC on node cancel (#2538)

* Fix goroutine leak in EphemeralGC on node cancel

* Deal with timer firing whilst the GC is shutting down. Fix typos.
This commit is contained in:
Relihan Myburgh 2025-04-23 21:44:24 +12:00 committed by GitHub
parent f3a1e693f2
commit 92e587a82c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 432 additions and 6 deletions

View file

@ -630,22 +630,59 @@ func NewEphemeralGarbageCollector(deleteFunc func(types.NodeID)) *EphemeralGarba
// Close stops the garbage collector.
func (e *EphemeralGarbageCollector) Close() {
e.cancelCh <- struct{}{}
e.mu.Lock()
defer e.mu.Unlock()
// Stop all timers
for _, timer := range e.toBeDeleted {
timer.Stop()
}
// Close the cancel channel to signal all goroutines to exit
close(e.cancelCh)
}
// Schedule schedules a node for deletion after the expiry duration.
// If the garbage collector is already closed, this is a no-op.
func (e *EphemeralGarbageCollector) Schedule(nodeID types.NodeID, expiry time.Duration) {
e.mu.Lock()
defer e.mu.Unlock()
// Don't schedule new timers if the garbage collector is already closed
select {
case <-e.cancelCh:
// The cancel channel is closed, meaning the GC is shutting down
// or already shut down, so we shouldn't schedule anything new
return
default:
// Continue with scheduling
}
// If a timer already exists for this node, stop it first
if oldTimer, exists := e.toBeDeleted[nodeID]; exists {
oldTimer.Stop()
}
timer := time.NewTimer(expiry)
e.toBeDeleted[nodeID] = timer
e.mu.Unlock()
// Start a goroutine to handle the timer completion
go func() {
select {
case _, ok := <-timer.C:
if ok {
e.deleteCh <- nodeID
case <-timer.C:
// This is to handle the situation where the GC is shutting down and
// we are trying to schedule a new node for deletion at the same time
// i.e. We don't want to send to deleteCh if the GC is shutting down
// So, we try to send to deleteCh, but also watch for cancelCh
select {
case e.deleteCh <- nodeID:
// Successfully sent to deleteCh
case <-e.cancelCh:
// GC is shutting down, don't send to deleteCh
return
}
case <-e.cancelCh:
// If the GC is closed, exit the goroutine
return
}
}()
}