metrics, tuning in tests, db cleanups, fix concurrency issue (#1895)

This commit is contained in:
Kristoffer Dalby 2024-04-21 18:28:17 +02:00 committed by GitHub
parent 7d8178406d
commit ba614a5e6c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 328 additions and 201 deletions

View file

@ -6,21 +6,23 @@ import (
"slices"
"strings"
"sync"
"time"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/puzpuzpuz/xsync/v3"
"github.com/rs/zerolog/log"
)
type Notifier struct {
l sync.RWMutex
nodes map[types.NodeID]chan<- types.StateUpdate
connected types.NodeConnectedMap
connected *xsync.MapOf[types.NodeID, bool]
}
func NewNotifier() *Notifier {
return &Notifier{
nodes: make(map[types.NodeID]chan<- types.StateUpdate),
connected: make(types.NodeConnectedMap),
connected: xsync.NewMapOf[types.NodeID, bool](),
}
}
@ -31,16 +33,19 @@ func (n *Notifier) AddNode(nodeID types.NodeID, c chan<- types.StateUpdate) {
Uint64("node.id", nodeID.Uint64()).
Msg("releasing lock to add node")
start := time.Now()
n.l.Lock()
defer n.l.Unlock()
notifierWaitForLock.WithLabelValues("add").Observe(time.Since(start).Seconds())
n.nodes[nodeID] = c
n.connected[nodeID] = true
n.connected.Store(nodeID, true)
log.Trace().
Uint64("node.id", nodeID.Uint64()).
Int("open_chans", len(n.nodes)).
Msg("Added new channel")
notifierNodeUpdateChans.Inc()
}
func (n *Notifier) RemoveNode(nodeID types.NodeID) {
@ -50,20 +55,23 @@ func (n *Notifier) RemoveNode(nodeID types.NodeID) {
Uint64("node.id", nodeID.Uint64()).
Msg("releasing lock to remove node")
start := time.Now()
n.l.Lock()
defer n.l.Unlock()
notifierWaitForLock.WithLabelValues("remove").Observe(time.Since(start).Seconds())
if len(n.nodes) == 0 {
return
}
delete(n.nodes, nodeID)
n.connected[nodeID] = false
n.connected.Store(nodeID, false)
log.Trace().
Uint64("node.id", nodeID.Uint64()).
Int("open_chans", len(n.nodes)).
Msg("Removed channel")
notifierNodeUpdateChans.Dec()
}
// IsConnected reports if a node is connected to headscale and has a
@ -72,17 +80,22 @@ func (n *Notifier) IsConnected(nodeID types.NodeID) bool {
n.l.RLock()
defer n.l.RUnlock()
return n.connected[nodeID]
if val, ok := n.connected.Load(nodeID); ok {
return val
}
return false
}
// IsLikelyConnected reports if a node is connected to headscale and has a
// poll session open, but doesnt lock, so might be wrong.
func (n *Notifier) IsLikelyConnected(nodeID types.NodeID) bool {
return n.connected[nodeID]
if val, ok := n.connected.Load(nodeID); ok {
return val
}
return false
}
// TODO(kradalby): This returns a pointer and can be dangerous.
func (n *Notifier) ConnectedMap() types.NodeConnectedMap {
func (n *Notifier) LikelyConnectedMap() *xsync.MapOf[types.NodeID, bool] {
return n.connected
}
@ -95,45 +108,16 @@ func (n *Notifier) NotifyWithIgnore(
update types.StateUpdate,
ignoreNodeIDs ...types.NodeID,
) {
log.Trace().Caller().Str("type", update.Type.String()).Msg("acquiring lock to notify")
defer log.Trace().
Caller().
Str("type", update.Type.String()).
Msg("releasing lock, finished notifying")
n.l.RLock()
defer n.l.RUnlock()
if update.Type == types.StatePeerChangedPatch {
log.Trace().Interface("update", update).Interface("online", n.connected).Msg("PATCH UPDATE SENT")
}
for nodeID, c := range n.nodes {
for nodeID := range n.nodes {
if slices.Contains(ignoreNodeIDs, nodeID) {
continue
}
select {
case <-ctx.Done():
log.Error().
Err(ctx.Err()).
Uint64("node.id", nodeID.Uint64()).
Any("origin", ctx.Value("origin")).
Any("origin-hostname", ctx.Value("hostname")).
Msgf("update not sent, context cancelled")
return
case c <- update:
log.Trace().
Uint64("node.id", nodeID.Uint64()).
Any("origin", ctx.Value("origin")).
Any("origin-hostname", ctx.Value("hostname")).
Msgf("update successfully sent on chan")
}
n.NotifyByNodeID(ctx, update, nodeID)
}
}
func (n *Notifier) NotifyByMachineKey(
func (n *Notifier) NotifyByNodeID(
ctx context.Context,
update types.StateUpdate,
nodeID types.NodeID,
@ -144,8 +128,10 @@ func (n *Notifier) NotifyByMachineKey(
Str("type", update.Type.String()).
Msg("releasing lock, finished notifying")
start := time.Now()
n.l.RLock()
defer n.l.RUnlock()
notifierWaitForLock.WithLabelValues("notify").Observe(time.Since(start).Seconds())
if c, ok := n.nodes[nodeID]; ok {
select {
@ -156,6 +142,7 @@ func (n *Notifier) NotifyByMachineKey(
Any("origin", ctx.Value("origin")).
Any("origin-hostname", ctx.Value("hostname")).
Msgf("update not sent, context cancelled")
notifierUpdateSent.WithLabelValues("cancelled", update.Type.String()).Inc()
return
case c <- update:
@ -164,6 +151,7 @@ func (n *Notifier) NotifyByMachineKey(
Any("origin", ctx.Value("origin")).
Any("origin-hostname", ctx.Value("hostname")).
Msgf("update successfully sent on chan")
notifierUpdateSent.WithLabelValues("ok", update.Type.String()).Inc()
}
}
}
@ -182,9 +170,10 @@ func (n *Notifier) String() string {
b.WriteString("\n")
b.WriteString("connected:\n")
for k, v := range n.connected {
n.connected.Range(func(k types.NodeID, v bool) bool {
fmt.Fprintf(&b, "\t%d: %t\n", k, v)
}
return true
})
return b.String()
}