Fix goroutine leak in EphemeralGC on node cancel (#2538)
* Fix goroutine leak in EphemeralGC on node cancel * Deal with timer firing whilst the GC is shutting down. Fix typos.
This commit is contained in:
parent
f3a1e693f2
commit
92e587a82c
2 changed files with 432 additions and 6 deletions
|
@ -630,22 +630,59 @@ func NewEphemeralGarbageCollector(deleteFunc func(types.NodeID)) *EphemeralGarba
|
|||
|
||||
// Close stops the garbage collector.
|
||||
func (e *EphemeralGarbageCollector) Close() {
|
||||
e.cancelCh <- struct{}{}
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
||||
// Stop all timers
|
||||
for _, timer := range e.toBeDeleted {
|
||||
timer.Stop()
|
||||
}
|
||||
|
||||
// Close the cancel channel to signal all goroutines to exit
|
||||
close(e.cancelCh)
|
||||
}
|
||||
|
||||
// Schedule schedules a node for deletion after the expiry duration.
|
||||
// If the garbage collector is already closed, this is a no-op.
|
||||
func (e *EphemeralGarbageCollector) Schedule(nodeID types.NodeID, expiry time.Duration) {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
||||
// Don't schedule new timers if the garbage collector is already closed
|
||||
select {
|
||||
case <-e.cancelCh:
|
||||
// The cancel channel is closed, meaning the GC is shutting down
|
||||
// or already shut down, so we shouldn't schedule anything new
|
||||
return
|
||||
default:
|
||||
// Continue with scheduling
|
||||
}
|
||||
|
||||
// If a timer already exists for this node, stop it first
|
||||
if oldTimer, exists := e.toBeDeleted[nodeID]; exists {
|
||||
oldTimer.Stop()
|
||||
}
|
||||
|
||||
timer := time.NewTimer(expiry)
|
||||
e.toBeDeleted[nodeID] = timer
|
||||
e.mu.Unlock()
|
||||
|
||||
// Start a goroutine to handle the timer completion
|
||||
go func() {
|
||||
select {
|
||||
case _, ok := <-timer.C:
|
||||
if ok {
|
||||
e.deleteCh <- nodeID
|
||||
case <-timer.C:
|
||||
// This is to handle the situation where the GC is shutting down and
|
||||
// we are trying to schedule a new node for deletion at the same time
|
||||
// i.e. We don't want to send to deleteCh if the GC is shutting down
|
||||
// So, we try to send to deleteCh, but also watch for cancelCh
|
||||
select {
|
||||
case e.deleteCh <- nodeID:
|
||||
// Successfully sent to deleteCh
|
||||
case <-e.cancelCh:
|
||||
// GC is shutting down, don't send to deleteCh
|
||||
return
|
||||
}
|
||||
case <-e.cancelCh:
|
||||
// If the GC is closed, exit the goroutine
|
||||
return
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue