Rework map session

This commit restructures the map session in to a struct
holding the state of what is needed during its lifetime.

For streaming sessions, the event loop is structured a
bit differently not hammering the clients with updates
but rather batching them over a short, configurable time
which should significantly improve cpu usage, and potentially
flakyness.

The use of Patch updates has been dialed back a little as
it does not look like its a 100% ready for prime time. Nodes
are now updated with full changes, except for a few things
like online status.

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
Kristoffer Dalby 2024-02-23 10:59:24 +01:00 committed by Juan Font
parent dd693c444c
commit 58c94d2bd3
35 changed files with 1803 additions and 1716 deletions

View file

@ -65,7 +65,7 @@ func TestPingAllByIP(t *testing.T) {
err = scenario.WaitForTailscaleSync()
assertNoErrSync(t, err)
assertClientsState(t, allClients)
// assertClientsState(t, allClients)
allAddrs := lo.Map(allIps, func(x netip.Addr, index int) string {
return x.String()
@ -103,7 +103,7 @@ func TestPingAllByIPPublicDERP(t *testing.T) {
err = scenario.WaitForTailscaleSync()
assertNoErrSync(t, err)
assertClientsState(t, allClients)
// assertClientsState(t, allClients)
allAddrs := lo.Map(allIps, func(x netip.Addr, index int) string {
return x.String()
@ -135,7 +135,7 @@ func TestAuthKeyLogoutAndRelogin(t *testing.T) {
err = scenario.WaitForTailscaleSync()
assertNoErrSync(t, err)
assertClientsState(t, allClients)
// assertClientsState(t, allClients)
clientIPs := make(map[TailscaleClient][]netip.Addr)
for _, client := range allClients {
@ -176,7 +176,7 @@ func TestAuthKeyLogoutAndRelogin(t *testing.T) {
err = scenario.WaitForTailscaleSync()
assertNoErrSync(t, err)
assertClientsState(t, allClients)
// assertClientsState(t, allClients)
allClients, err = scenario.ListTailscaleClients()
assertNoErrListClients(t, err)
@ -329,7 +329,7 @@ func TestPingAllByHostname(t *testing.T) {
err = scenario.WaitForTailscaleSync()
assertNoErrSync(t, err)
assertClientsState(t, allClients)
// assertClientsState(t, allClients)
allHostnames, err := scenario.ListTailscaleClientsFQDNs()
assertNoErrListFQDN(t, err)
@ -539,7 +539,7 @@ func TestResolveMagicDNS(t *testing.T) {
err = scenario.WaitForTailscaleSync()
assertNoErrSync(t, err)
assertClientsState(t, allClients)
// assertClientsState(t, allClients)
// Poor mans cache
_, err = scenario.ListTailscaleClientsFQDNs()
@ -609,7 +609,7 @@ func TestExpireNode(t *testing.T) {
err = scenario.WaitForTailscaleSync()
assertNoErrSync(t, err)
assertClientsState(t, allClients)
// assertClientsState(t, allClients)
allAddrs := lo.Map(allIps, func(x netip.Addr, index int) string {
return x.String()
@ -711,7 +711,7 @@ func TestExpireNode(t *testing.T) {
}
}
func TestNodeOnlineLastSeenStatus(t *testing.T) {
func TestNodeOnlineStatus(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
@ -723,7 +723,7 @@ func TestNodeOnlineLastSeenStatus(t *testing.T) {
"user1": len(MustTestVersions),
}
err = scenario.CreateHeadscaleEnv(spec, []tsic.Option{}, hsic.WithTestName("onlinelastseen"))
err = scenario.CreateHeadscaleEnv(spec, []tsic.Option{}, hsic.WithTestName("online"))
assertNoErrHeadscaleEnv(t, err)
allClients, err := scenario.ListTailscaleClients()
@ -735,7 +735,7 @@ func TestNodeOnlineLastSeenStatus(t *testing.T) {
err = scenario.WaitForTailscaleSync()
assertNoErrSync(t, err)
assertClientsState(t, allClients)
// assertClientsState(t, allClients)
allAddrs := lo.Map(allIps, func(x netip.Addr, index int) string {
return x.String()
@ -755,8 +755,6 @@ func TestNodeOnlineLastSeenStatus(t *testing.T) {
headscale, err := scenario.Headscale()
assertNoErr(t, err)
keepAliveInterval := 60 * time.Second
// Duration is chosen arbitrarily, 10m is reported in #1561
testDuration := 12 * time.Minute
start := time.Now()
@ -780,11 +778,6 @@ func TestNodeOnlineLastSeenStatus(t *testing.T) {
err = json.Unmarshal([]byte(result), &nodes)
assertNoErr(t, err)
now := time.Now()
// Threshold with some leeway
lastSeenThreshold := now.Add(-keepAliveInterval - (10 * time.Second))
// Verify that headscale reports the nodes as online
for _, node := range nodes {
// All nodes should be online
@ -795,18 +788,6 @@ func TestNodeOnlineLastSeenStatus(t *testing.T) {
node.GetName(),
time.Since(start),
)
lastSeen := node.GetLastSeen().AsTime()
// All nodes should have been last seen between now and the keepAliveInterval
assert.Truef(
t,
lastSeen.After(lastSeenThreshold),
"node (%s) lastSeen (%v) was not %s after the threshold (%v)",
node.GetName(),
lastSeen,
keepAliveInterval,
lastSeenThreshold,
)
}
// Verify that all nodes report all nodes to be online
@ -834,15 +815,6 @@ func TestNodeOnlineLastSeenStatus(t *testing.T) {
client.Hostname(),
time.Since(start),
)
// from docs: last seen to tailcontrol; only present if offline
// assert.Nilf(
// t,
// peerStatus.LastSeen,
// "expected node %s to not have LastSeen set, got %s",
// peerStatus.HostName,
// peerStatus.LastSeen,
// )
}
}
@ -850,3 +822,87 @@ func TestNodeOnlineLastSeenStatus(t *testing.T) {
time.Sleep(time.Second)
}
}
// TestPingAllByIPManyUpDown is a variant of the PingAll
// test which will take the tailscale node up and down
// five times ensuring they are able to restablish connectivity.
func TestPingAllByIPManyUpDown(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
assertNoErr(t, err)
defer scenario.Shutdown()
// TODO(kradalby): it does not look like the user thing works, only second
// get created? maybe only when many?
spec := map[string]int{
"user1": len(MustTestVersions),
"user2": len(MustTestVersions),
}
headscaleConfig := map[string]string{
"HEADSCALE_DERP_URLS": "",
"HEADSCALE_DERP_SERVER_ENABLED": "true",
"HEADSCALE_DERP_SERVER_REGION_ID": "999",
"HEADSCALE_DERP_SERVER_REGION_CODE": "headscale",
"HEADSCALE_DERP_SERVER_REGION_NAME": "Headscale Embedded DERP",
"HEADSCALE_DERP_SERVER_STUN_LISTEN_ADDR": "0.0.0.0:3478",
"HEADSCALE_DERP_SERVER_PRIVATE_KEY_PATH": "/tmp/derp.key",
// Envknob for enabling DERP debug logs
"DERP_DEBUG_LOGS": "true",
"DERP_PROBER_DEBUG_LOGS": "true",
}
err = scenario.CreateHeadscaleEnv(spec,
[]tsic.Option{},
hsic.WithTestName("pingallbyip"),
hsic.WithConfigEnv(headscaleConfig),
hsic.WithTLS(),
hsic.WithHostnameAsServerURL(),
)
assertNoErrHeadscaleEnv(t, err)
allClients, err := scenario.ListTailscaleClients()
assertNoErrListClients(t, err)
allIps, err := scenario.ListTailscaleClientsIPs()
assertNoErrListClientIPs(t, err)
err = scenario.WaitForTailscaleSync()
assertNoErrSync(t, err)
// assertClientsState(t, allClients)
allAddrs := lo.Map(allIps, func(x netip.Addr, index int) string {
return x.String()
})
success := pingAllHelper(t, allClients, allAddrs)
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
for run := range 3 {
t.Logf("Starting DownUpPing run %d", run+1)
for _, client := range allClients {
t.Logf("taking down %q", client.Hostname())
client.Down()
}
time.Sleep(5 * time.Second)
for _, client := range allClients {
t.Logf("bringing up %q", client.Hostname())
client.Up()
}
time.Sleep(5 * time.Second)
err = scenario.WaitForTailscaleSync()
assertNoErrSync(t, err)
success := pingAllHelper(t, allClients, allAddrs)
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
}
}