From a0bcd0f53f218636983cef2b31c747dafffc61ec Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 21 Jun 2021 17:24:44 -0400 Subject: [PATCH 1/4] More connection/logging tweaks await SendTracer(log); is still hanging sometimes, in the already-connected case and I'm not sure why /cc @TimLovellSmith --- src/StackExchange.Redis/ResultProcessor.cs | 2 +- src/StackExchange.Redis/ServerEndPoint.cs | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/StackExchange.Redis/ResultProcessor.cs b/src/StackExchange.Redis/ResultProcessor.cs index 463c1d073..9f8dd5c51 100644 --- a/src/StackExchange.Redis/ResultProcessor.cs +++ b/src/StackExchange.Redis/ResultProcessor.cs @@ -183,7 +183,7 @@ public virtual bool SetResult(PhysicalConnection connection, Message message, in { try { - logging.Log?.WriteLine($"Response from {bridge} / {message.CommandAndKey}: {result}"); + logging.Log?.WriteLine($"Response from {bridge?.Name} / {message.CommandAndKey}: {result}"); } catch { } } diff --git a/src/StackExchange.Redis/ServerEndPoint.cs b/src/StackExchange.Redis/ServerEndPoint.cs index 3cc020a46..58bcb5c3e 100755 --- a/src/StackExchange.Redis/ServerEndPoint.cs +++ b/src/StackExchange.Redis/ServerEndPoint.cs @@ -85,25 +85,24 @@ public Task OnConnectedAsync(LogProxy log = null, bool sendTracerIfConne { async Task IfConnectedAsync(LogProxy log, bool sendTracerIfConnected, bool autoConfigureIfConnected) { - log?.WriteLine($"{Format.ToString(this)}: OnConnectedAsync completed (already connected start)"); + log?.WriteLine($"{Format.ToString(this)}: OnConnectedAsync already connected start"); if (autoConfigureIfConnected) { - await AutoConfigureAsync(null, log); + await AutoConfigureAsync(null, log).ForAwait(); } if (sendTracerIfConnected) { - await SendTracer(log); + await SendTracer(log).ForAwait(); } - log?.WriteLine($"{Format.ToString(this)}: OnConnectedAsync completed (already connected end)"); + log?.WriteLine($"{Format.ToString(this)}: OnConnectedAsync already connected end"); return "Already connected"; } - log?.WriteLine($"{Format.ToString(this)}: OnConnectedAsync init (State={interactive?.ConnectionState})"); - if (!IsConnected) { + log?.WriteLine($"{Format.ToString(this)}: OnConnectedAsync init (State={interactive?.ConnectionState})"); var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); - tcs.Task.ContinueWith(t => log?.WriteLine($"{Format.ToString(this)}: OnConnectedAsync completed ({t.Result})")); + _ = tcs.Task.ContinueWith(t => log?.WriteLine($"{Format.ToString(this)}: OnConnectedAsync completed ({t.Result})")); lock (_pendingConnectionMonitors) { _pendingConnectionMonitors.Add(tcs); From 4f64ff1cc621ee200effdac5a81b80b2fb08e593 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 21 Jun 2021 19:40:43 -0400 Subject: [PATCH 2/4] Tests: improve logging --- tests/StackExchange.Redis.Tests/Migrate.cs | 5 +++-- tests/StackExchange.Redis.Tests/Sentinel.cs | 12 ++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/Migrate.cs b/tests/StackExchange.Redis.Tests/Migrate.cs index 8ddf7e837..0f91dc774 100644 --- a/tests/StackExchange.Redis.Tests/Migrate.cs +++ b/tests/StackExchange.Redis.Tests/Migrate.cs @@ -2,6 +2,7 @@ using System; using System.Linq; +using System.Text; using System.Threading.Tasks; using Xunit; using Xunit.Abstractions; @@ -17,8 +18,8 @@ public async Task Basic() { var fromConfig = new ConfigurationOptions { EndPoints = { { TestConfig.Current.SecureServer, TestConfig.Current.SecurePort } }, Password = TestConfig.Current.SecurePassword, AllowAdmin = true }; var toConfig = new ConfigurationOptions { EndPoints = { { TestConfig.Current.MasterServer, TestConfig.Current.MasterPort } }, AllowAdmin = true }; - using (var from = ConnectionMultiplexer.Connect(fromConfig)) - using (var to = ConnectionMultiplexer.Connect(toConfig)) + using (var from = ConnectionMultiplexer.Connect(fromConfig, Writer)) + using (var to = ConnectionMultiplexer.Connect(toConfig, Writer)) { if (await IsWindows(from) || await IsWindows(to)) Skip.Inconclusive("'migrate' is unreliable on redis-64"); diff --git a/tests/StackExchange.Redis.Tests/Sentinel.cs b/tests/StackExchange.Redis.Tests/Sentinel.cs index 520b2aeb0..332c37877 100644 --- a/tests/StackExchange.Redis.Tests/Sentinel.cs +++ b/tests/StackExchange.Redis.Tests/Sentinel.cs @@ -329,7 +329,7 @@ public void SentinelMastersTest() { var masterConfigs = SentinelServerA.SentinelMasters(); Assert.Single(masterConfigs); - Assert.True(masterConfigs[0].ToDictionary().ContainsKey("name")); + Assert.True(masterConfigs[0].ToDictionary().ContainsKey("name"), "replicaConfigs contains 'name'"); Assert.Equal(ServiceName, masterConfigs[0].ToDictionary()["name"]); Assert.StartsWith("master", masterConfigs[0].ToDictionary()["flags"]); foreach (var config in masterConfigs) @@ -346,7 +346,7 @@ public async Task SentinelMastersAsyncTest() { var masterConfigs = await SentinelServerA.SentinelMastersAsync().ForAwait(); Assert.Single(masterConfigs); - Assert.True(masterConfigs[0].ToDictionary().ContainsKey("name")); + Assert.True(masterConfigs[0].ToDictionary().ContainsKey("name"), "replicaConfigs contains 'name'"); Assert.Equal(ServiceName, masterConfigs[0].ToDictionary()["name"]); Assert.StartsWith("master", masterConfigs[0].ToDictionary()["flags"]); foreach (var config in masterConfigs) @@ -362,8 +362,8 @@ public async Task SentinelMastersAsyncTest() public void SentinelReplicasTest() { var replicaConfigs = SentinelServerA.SentinelReplicas(ServiceName); - Assert.True(replicaConfigs.Length > 0); - Assert.True(replicaConfigs[0].ToDictionary().ContainsKey("name")); + Assert.True(replicaConfigs.Length > 0, "Has replicaConfigs"); + Assert.True(replicaConfigs[0].ToDictionary().ContainsKey("name"), "replicaConfigs contains 'name'"); Assert.StartsWith("slave", replicaConfigs[0].ToDictionary()["flags"]); foreach (var config in replicaConfigs) @@ -379,8 +379,8 @@ public void SentinelReplicasTest() public async Task SentinelReplicasAsyncTest() { var replicaConfigs = await SentinelServerA.SentinelReplicasAsync(ServiceName).ForAwait(); - Assert.True(replicaConfigs.Length > 0); - Assert.True(replicaConfigs[0].ToDictionary().ContainsKey("name")); + Assert.True(replicaConfigs.Length > 0, "Has replicaConfigs"); + Assert.True(replicaConfigs[0].ToDictionary().ContainsKey("name"), "replicaConfigs contains 'name'"); Assert.StartsWith("slave", replicaConfigs[0].ToDictionary()["flags"]); foreach (var config in replicaConfigs) { From 756f96b1ffc49b948ef2ff246337470532cc26c4 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Mon, 21 Jun 2021 19:49:54 -0400 Subject: [PATCH 3/4] Migrate: test tweaks Add logging and increase allowance on delay - looks like we're pegging workers in CI but can't confirm. --- tests/StackExchange.Redis.Tests/Migrate.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/StackExchange.Redis.Tests/Migrate.cs b/tests/StackExchange.Redis.Tests/Migrate.cs index 0f91dc774..3aa797fc8 100644 --- a/tests/StackExchange.Redis.Tests/Migrate.cs +++ b/tests/StackExchange.Redis.Tests/Migrate.cs @@ -2,7 +2,6 @@ using System; using System.Linq; -using System.Text; using System.Threading.Tasks; using Xunit; using Xunit.Abstractions; @@ -31,13 +30,15 @@ public async Task Basic() toDb.KeyDelete(key, CommandFlags.FireAndForget); fromDb.StringSet(key, "foo", flags: CommandFlags.FireAndForget); var dest = to.GetEndPoints(true).Single(); + Log("Migrating key..."); fromDb.KeyMigrate(key, dest, migrateOptions: MigrateOptions.Replace); + Log("Migration command complete"); // this is *meant* to be synchronous at the redis level, but // we keep seeing it fail on the CI server where the key has *left* the origin, but // has *not* yet arrived at the destination; adding a pause while we investigate with // the redis folks - await UntilCondition(TimeSpan.FromSeconds(5), () => !fromDb.KeyExists(key) && toDb.KeyExists(key)); + await UntilCondition(TimeSpan.FromSeconds(15), () => !fromDb.KeyExists(key) && toDb.KeyExists(key)); Assert.False(fromDb.KeyExists(key), "Exists at source"); Assert.True(toDb.KeyExists(key), "Exists at destination"); From f7283da6f9ca0ea99070d9efe8dc40f42cedfc95 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Tue, 22 Jun 2021 09:23:07 -0400 Subject: [PATCH 4/4] Up tiebreaker time allowed and more logging This will log the physical connection state when we bomb, to help figure out where the stall is. --- .../ConnectionMultiplexer.cs | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/StackExchange.Redis/ConnectionMultiplexer.cs b/src/StackExchange.Redis/ConnectionMultiplexer.cs index 824ef5695..2ddb20e41 100755 --- a/src/StackExchange.Redis/ConnectionMultiplexer.cs +++ b/src/StackExchange.Redis/ConnectionMultiplexer.cs @@ -739,14 +739,6 @@ private async Task WaitAllIgnoreErrorsAsync(string name, Task[] tasks, int var allTasks = Task.WhenAll(tasks).ObserveErrors(); bool all = await allTasks.TimeoutAfter(timeoutMs: remaining).ObserveErrors().ForAwait(); LogWithThreadPoolStats(log, all ? $"All {tasks.Length} {name} tasks completed cleanly" : $"Not all {name} tasks completed cleanly (from {caller}#{callerLineNumber}, timeout {timeoutMilliseconds}ms)", out _); - // If we failed, log the details... - if (!all) - { - for (var i = 0; i < tasks.Length; i++) - { - log?.WriteLine($" Task[{i}] Status: {tasks[i].Status}"); - } - } return all; } catch @@ -1744,7 +1736,19 @@ internal async Task ReconfigureAsync(bool first, bool reconfigureAll, LogP var remaining = RawConfig.ConnectTimeout - checked((int)watch.ElapsedMilliseconds); log?.WriteLine($"Allowing {available.Length} endpoint(s) {TimeSpan.FromMilliseconds(remaining)} to respond..."); Trace("Allowing endpoints " + TimeSpan.FromMilliseconds(remaining) + " to respond..."); - await WaitAllIgnoreErrorsAsync("available", available, remaining, log).ForAwait(); + var allConnected = await WaitAllIgnoreErrorsAsync("available", available, remaining, log).ForAwait(); + + if (!allConnected) + { + // If we failed, log the details so we can debug why per connection + for (var i = 0; i < servers.Length; i++) + { + var server = servers[i]; + var task = available[i]; + server.GetOutstandingCount(RedisCommand.PING, out int inst, out int qs, out long @in, out int qu, out bool aw, out long toRead, out long toWrite, out var bs, out var rs, out var ws); + log?.WriteLine($" Server[{i}] ({Format.ToString(server)}) Status: {task.Status} (inst: {inst}, qs: {qs}, in: {@in}, qu: {qu}, aw: {aw}, in-pipe: {toRead}, out-pipe: {toWrite}, bw: {bs}, rs: {rs}. ws: {ws})"); + } + } // Log current state after await foreach (var server in servers) @@ -1755,6 +1759,7 @@ internal async Task ReconfigureAsync(bool first, bool reconfigureAll, LogP // After we've successfully connected (and authenticated), kickoff tie breakers if needed if (useTieBreakers) { + log?.WriteLine($"Election: Gathering tie-breakers..."); for (int i = 0; i < available.Length; i++) { var server = servers[i]; @@ -1880,7 +1885,7 @@ internal async Task ReconfigureAsync(bool first, bool reconfigureAll, LogP ServerSelectionStrategy.ServerType = ServerType.Standalone; } - var preferred = await NominatePreferredMaster(log, servers, useTieBreakers, tieBreakers, masters).ObserveErrors().ForAwait(); + var preferred = await NominatePreferredMaster(log, servers, useTieBreakers, tieBreakers, masters, timeoutMs: RawConfig.ConnectTimeout - checked((int)watch.ElapsedMilliseconds)).ObserveErrors().ForAwait(); foreach (var master in masters) { if (master == preferred || master.IsReplica) @@ -2010,14 +2015,14 @@ private void ResetAllNonConnected() partial void OnTraceLog(LogProxy log, [CallerMemberName] string caller = null); #pragma warning restore IDE0060 - private async Task NominatePreferredMaster(LogProxy log, ServerEndPoint[] servers, bool useTieBreakers, Task[] tieBreakers, List masters) + private async Task NominatePreferredMaster(LogProxy log, ServerEndPoint[] servers, bool useTieBreakers, Task[] tieBreakers, List masters, int timeoutMs) { Dictionary uniques = null; if (useTieBreakers) { // count the votes uniques = new Dictionary(StringComparer.OrdinalIgnoreCase); log?.WriteLine("Waiting for tiebreakers..."); - await WaitAllIgnoreErrorsAsync("tiebreaker", tieBreakers, 50, log).ForAwait(); + await WaitAllIgnoreErrorsAsync("tiebreaker", tieBreakers, Math.Max(timeoutMs, 200), log).ForAwait(); for (int i = 0; i < tieBreakers.Length; i++) { var ep = servers[i].EndPoint;