From 47938c373301a3a506d73f793ea3bc8256beb428 Mon Sep 17 00:00:00 2001 From: danielqsj Date: Wed, 26 Dec 2018 17:51:16 +0800 Subject: [PATCH 1/3] Change storage metrics to conform guideline --- .../pkg/storage/etcd/metrics/metrics.go | 89 ++++++++++++++++--- .../apiserver/pkg/storage/value/metrics.go | 42 +++++++-- 2 files changed, 112 insertions(+), 19 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/storage/etcd/metrics/metrics.go b/staging/src/k8s.io/apiserver/pkg/storage/etcd/metrics/metrics.go index 96385f6e601a..e1c37fb509e9 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/etcd/metrics/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/etcd/metrics/metrics.go @@ -25,37 +25,37 @@ import ( var ( cacheHitCounterOpts = prometheus.CounterOpts{ - Name: "etcd_helper_cache_hit_count", + Name: "etcd_helper_cache_hit_total", Help: "Counter of etcd helper cache hits.", } cacheHitCounter = prometheus.NewCounter(cacheHitCounterOpts) cacheMissCounterOpts = prometheus.CounterOpts{ - Name: "etcd_helper_cache_miss_count", + Name: "etcd_helper_cache_miss_total", Help: "Counter of etcd helper cache miss.", } cacheMissCounter = prometheus.NewCounter(cacheMissCounterOpts) cacheEntryCounterOpts = prometheus.CounterOpts{ - Name: "etcd_helper_cache_entry_count", + Name: "etcd_helper_cache_entry_total", Help: "Counter of etcd helper cache entries. This can be different from etcd_helper_cache_miss_count " + "because two concurrent threads can miss the cache and generate the same entry twice.", } cacheEntryCounter = prometheus.NewCounter(cacheEntryCounterOpts) cacheGetLatency = prometheus.NewSummary( prometheus.SummaryOpts{ - Name: "etcd_request_cache_get_latencies_summary", - Help: "Latency in microseconds of getting an object from etcd cache", + Name: "etcd_request_cache_get_latency_seconds", + Help: "Latency in seconds of getting an object from etcd cache", }, ) cacheAddLatency = prometheus.NewSummary( prometheus.SummaryOpts{ - Name: "etcd_request_cache_add_latencies_summary", - Help: "Latency in microseconds of adding an object to etcd cache", + Name: "etcd_request_cache_add_latency_seconds", + Help: "Latency in seconds of adding an object to etcd cache", }, ) etcdRequestLatenciesSummary = prometheus.NewSummaryVec( prometheus.SummaryOpts{ - Name: "etcd_request_latencies_summary", - Help: "Etcd request latency summary in microseconds for each operation and object type.", + Name: "etcd_request_latency_seconds", + Help: "Etcd request latency summary in seconds for each operation and object type.", }, []string{"operation", "type"}, ) @@ -66,6 +66,42 @@ var ( }, []string{"resource"}, ) + + deprecatedCacheHitCounterOpts = prometheus.CounterOpts{ + Name: "etcd_helper_cache_hit_count", + Help: "Counter of etcd helper cache hits.", + } + deprecatedCacheHitCounter = prometheus.NewCounter(deprecatedCacheHitCounterOpts) + deprecatedCacheMissCounterOpts = prometheus.CounterOpts{ + Name: "etcd_helper_cache_miss_count", + Help: "Counter of etcd helper cache miss.", + } + deprecatedCacheMissCounter = prometheus.NewCounter(deprecatedCacheMissCounterOpts) + deprecatedCacheEntryCounterOpts = prometheus.CounterOpts{ + Name: "etcd_helper_cache_entry_count", + Help: "Counter of etcd helper cache entries. This can be different from etcd_helper_cache_miss_count " + + "because two concurrent threads can miss the cache and generate the same entry twice.", + } + deprecatedCacheEntryCounter = prometheus.NewCounter(deprecatedCacheEntryCounterOpts) + deprecatedCacheGetLatency = prometheus.NewSummary( + prometheus.SummaryOpts{ + Name: "etcd_request_cache_get_latencies_summary", + Help: "Latency in microseconds of getting an object from etcd cache", + }, + ) + deprecatedCacheAddLatency = prometheus.NewSummary( + prometheus.SummaryOpts{ + Name: "etcd_request_cache_add_latencies_summary", + Help: "Latency in microseconds of adding an object to etcd cache", + }, + ) + deprecatedEtcdRequestLatenciesSummary = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Name: "etcd_request_latencies_summary", + Help: "Etcd request latency summary in microseconds for each operation and object type.", + }, + []string{"operation", "type"}, + ) ) var registerMetrics sync.Once @@ -81,6 +117,14 @@ func Register() { prometheus.MustRegister(cacheGetLatency) prometheus.MustRegister(etcdRequestLatenciesSummary) prometheus.MustRegister(objectCounts) + + // TODO(danielqsj): Remove the following metrics, they are deprecated + prometheus.MustRegister(deprecatedCacheHitCounter) + prometheus.MustRegister(deprecatedCacheMissCounter) + prometheus.MustRegister(deprecatedCacheEntryCounter) + prometheus.MustRegister(deprecatedCacheAddLatency) + prometheus.MustRegister(deprecatedCacheGetLatency) + prometheus.MustRegister(deprecatedEtcdRequestLatenciesSummary) }) } @@ -89,27 +133,33 @@ func UpdateObjectCount(resourcePrefix string, count int64) { } func RecordEtcdRequestLatency(verb, resource string, startTime time.Time) { - etcdRequestLatenciesSummary.WithLabelValues(verb, resource).Observe(float64(time.Since(startTime) / time.Microsecond)) + etcdRequestLatenciesSummary.WithLabelValues(verb, resource).Observe(sinceInSeconds(startTime)) + deprecatedEtcdRequestLatenciesSummary.WithLabelValues(verb, resource).Observe(sinceInMicroseconds(startTime)) } func ObserveGetCache(startTime time.Time) { - cacheGetLatency.Observe(float64(time.Since(startTime) / time.Microsecond)) + cacheGetLatency.Observe(sinceInSeconds(startTime)) + deprecatedCacheGetLatency.Observe(sinceInMicroseconds(startTime)) } func ObserveAddCache(startTime time.Time) { - cacheAddLatency.Observe(float64(time.Since(startTime) / time.Microsecond)) + cacheAddLatency.Observe(sinceInSeconds(startTime)) + deprecatedCacheAddLatency.Observe(sinceInMicroseconds(startTime)) } func ObserveCacheHit() { cacheHitCounter.Inc() + deprecatedCacheHitCounter.Inc() } func ObserveCacheMiss() { cacheMissCounter.Inc() + deprecatedCacheMissCounter.Inc() } func ObserveNewEntry() { cacheEntryCounter.Inc() + deprecatedCacheEntryCounter.Inc() } func Reset() { @@ -119,4 +169,19 @@ func Reset() { // TODO: Reset cacheAddLatency. // TODO: Reset cacheGetLatency. etcdRequestLatenciesSummary.Reset() + + deprecatedCacheHitCounter = prometheus.NewCounter(deprecatedCacheHitCounterOpts) + deprecatedCacheMissCounter = prometheus.NewCounter(deprecatedCacheMissCounterOpts) + deprecatedCacheEntryCounter = prometheus.NewCounter(deprecatedCacheEntryCounterOpts) + deprecatedEtcdRequestLatenciesSummary.Reset() +} + +// sinceInMicroseconds gets the time since the specified start in microseconds. +func sinceInMicroseconds(start time.Time) float64 { + return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds()) +} + +// sinceInSeconds gets the time since the specified start in seconds. +func sinceInSeconds(start time.Time) float64 { + return time.Since(start).Seconds() } diff --git a/staging/src/k8s.io/apiserver/pkg/storage/value/metrics.go b/staging/src/k8s.io/apiserver/pkg/storage/value/metrics.go index 1fe316782146..f16cc7344d0f 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/value/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/value/metrics.go @@ -30,6 +30,18 @@ const ( var ( transformerLatencies = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "transformation_latencies_seconds", + Help: "Latencies in seconds of value transformation operations.", + // In-process transformations (ex. AES CBC) complete on the order of 20 microseconds. However, when + // external KMS is involved latencies may climb into milliseconds. + Buckets: prometheus.ExponentialBuckets(5e-6, 2, 14), + }, + []string{"transformation_type"}, + ) + deprecatedTransformerLatencies = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, @@ -61,6 +73,15 @@ var ( ) dataKeyGenerationLatencies = prometheus.NewHistogram( + prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "data_key_generation_latencies_seconds", + Help: "Latencies in seconds of data encryption key(DEK) generation operations.", + Buckets: prometheus.ExponentialBuckets(5e-6, 2, 14), + }, + ) + deprecatedDataKeyGenerationLatencies = prometheus.NewHistogram( prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, @@ -84,9 +105,11 @@ var registerMetrics sync.Once func RegisterMetrics() { registerMetrics.Do(func() { prometheus.MustRegister(transformerLatencies) + prometheus.MustRegister(deprecatedTransformerLatencies) prometheus.MustRegister(transformerFailuresTotal) prometheus.MustRegister(envelopeTransformationCacheMissTotal) prometheus.MustRegister(dataKeyGenerationLatencies) + prometheus.MustRegister(deprecatedDataKeyGenerationLatencies) prometheus.MustRegister(dataKeyGenerationFailuresTotal) }) } @@ -98,8 +121,8 @@ func RecordTransformation(transformationType string, start time.Time, err error) return } - since := sinceInMicroseconds(start) - transformerLatencies.WithLabelValues(transformationType).Observe(float64(since)) + transformerLatencies.WithLabelValues(transformationType).Observe(sinceInSeconds(start)) + deprecatedTransformerLatencies.WithLabelValues(transformationType).Observe(sinceInMicroseconds(start)) } // RecordCacheMiss records a miss on Key Encryption Key(KEK) - call to KMS was required to decrypt KEK. @@ -114,11 +137,16 @@ func RecordDataKeyGeneration(start time.Time, err error) { return } - since := sinceInMicroseconds(start) - dataKeyGenerationLatencies.Observe(float64(since)) + dataKeyGenerationLatencies.Observe(sinceInSeconds(start)) + deprecatedDataKeyGenerationLatencies.Observe(sinceInMicroseconds(start)) +} + +// sinceInMicroseconds gets the time since the specified start in microseconds. +func sinceInMicroseconds(start time.Time) float64 { + return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds()) } -func sinceInMicroseconds(start time.Time) int64 { - elapsedNanoseconds := time.Since(start).Nanoseconds() - return elapsedNanoseconds / int64(time.Microsecond) +// sinceInSeconds gets the time since the specified start in seconds. +func sinceInSeconds(start time.Time) float64 { + return time.Since(start).Seconds() } From 88c4b644005c5f674e9d03abae7e1cccda4fd23e Mon Sep 17 00:00:00 2001 From: danielqsj Date: Wed, 26 Dec 2018 22:29:13 +0800 Subject: [PATCH 2/3] Change endpoints metrics to conform guideline --- .../pkg/endpoints/metrics/metrics.go | 47 +++++++++++++++++-- .../pkg/server/filters/maxinflight.go | 2 + 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go b/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go index 27f416e6b5c7..5b35a432a329 100644 --- a/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go @@ -48,6 +48,13 @@ var ( // TODO(a-robinson): Add unit tests for the handling of these metrics once // the upstream library supports it. requestCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "apiserver_request_total", + Help: "Counter of apiserver requests broken out for each verb, group, version, resource, scope, component, client, and HTTP response contentType and code.", + }, + []string{"verb", "group", "version", "resource", "subresource", "scope", "component", "client", "contentType", "code"}, + ) + deprecatedRequestCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "apiserver_request_count", Help: "Counter of apiserver requests broken out for each verb, group, version, resource, scope, component, client, and HTTP response contentType and code.", @@ -62,6 +69,25 @@ var ( []string{"verb", "group", "version", "resource", "subresource", "scope", "component"}, ) requestLatencies = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "apiserver_request_latency_seconds", + Help: "Response latency distribution in seconds for each verb, group, version, resource, subresource, scope and component.", + // Use buckets ranging from 125 ms to 8 seconds. + Buckets: prometheus.ExponentialBuckets(0.125, 2.0, 7), + }, + []string{"verb", "group", "version", "resource", "subresource", "scope", "component"}, + ) + requestLatenciesSummary = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Name: "apiserver_request_latency_seconds_summary", + Help: "Response latency summary in seconds for each verb, group, version, resource, subresource, scope and component.", + // Make the sliding window of 5h. + // TODO: The value for this should be based on our SLI definition (medium term). + MaxAge: 5 * time.Hour, + }, + []string{"verb", "group", "version", "resource", "subresource", "scope", "component"}, + ) + deprecatedRequestLatencies = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Name: "apiserver_request_latencies", Help: "Response latency distribution in microseconds for each verb, group, version, resource, subresource, scope and component.", @@ -70,7 +96,7 @@ var ( }, []string{"verb", "group", "version", "resource", "subresource", "scope", "component"}, ) - requestLatenciesSummary = prometheus.NewSummaryVec( + deprecatedRequestLatenciesSummary = prometheus.NewSummaryVec( prometheus.SummaryOpts{ Name: "apiserver_request_latencies_summary", Help: "Response latency summary in microseconds for each verb, group, version, resource, subresource, scope and component.", @@ -91,6 +117,13 @@ var ( ) // DroppedRequests is a number of requests dropped with 'Try again later' response" DroppedRequests = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "apiserver_dropped_requests_total", + Help: "Number of requests dropped with 'Try again later' response", + }, + []string{"requestKind"}, + ) + DeprecatedDroppedRequests = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "apiserver_dropped_requests", Help: "Number of requests dropped with 'Try again later' response", @@ -118,11 +151,15 @@ var ( metrics = []resettableCollector{ requestCounter, + deprecatedRequestCounter, longRunningRequestGauge, requestLatencies, requestLatenciesSummary, + deprecatedRequestLatencies, + deprecatedRequestLatenciesSummary, responseSizes, DroppedRequests, + DeprecatedDroppedRequests, RegisteredWatchers, currentInflightRequests, } @@ -198,9 +235,13 @@ func MonitorRequest(req *http.Request, verb, group, version, resource, subresour reportedVerb := cleanVerb(verb, req) client := cleanUserAgent(utilnet.GetHTTPClient(req)) elapsedMicroseconds := float64(elapsed / time.Microsecond) + elapsedSeconds := elapsed.Seconds() requestCounter.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component, client, contentType, codeToString(httpCode)).Inc() - requestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds) - requestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds) + deprecatedRequestCounter.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component, client, contentType, codeToString(httpCode)).Inc() + requestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedSeconds) + deprecatedRequestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds) + requestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedSeconds) + deprecatedRequestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds) // We are only interested in response sizes of read requests. if verb == "GET" || verb == "LIST" { responseSizes.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(float64(respSize)) diff --git a/staging/src/k8s.io/apiserver/pkg/server/filters/maxinflight.go b/staging/src/k8s.io/apiserver/pkg/server/filters/maxinflight.go index cc10d0abd8fc..f7bc691c762b 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/filters/maxinflight.go +++ b/staging/src/k8s.io/apiserver/pkg/server/filters/maxinflight.go @@ -163,8 +163,10 @@ func WithMaxInFlightLimit( // We need to split this data between buckets used for throttling. if isMutatingRequest { metrics.DroppedRequests.WithLabelValues(metrics.MutatingKind).Inc() + metrics.DeprecatedDroppedRequests.WithLabelValues(metrics.MutatingKind).Inc() } else { metrics.DroppedRequests.WithLabelValues(metrics.ReadOnlyKind).Inc() + metrics.DeprecatedDroppedRequests.WithLabelValues(metrics.ReadOnlyKind).Inc() } // at this point we're about to return a 429, BUT not all actors should be rate limited. A system:master is so powerful // that they should always get an answer. It's a super-admin or a loopback connection. From 8b418631c08cf66ee57c9ec31fe95372a0a3e075 Mon Sep 17 00:00:00 2001 From: danielqsj Date: Wed, 2 Jan 2019 13:50:02 +0800 Subject: [PATCH 3/3] Move etcd latency metrics to histogram and update test case --- .../pkg/endpoints/metrics/metrics.go | 20 +++--------- .../pkg/storage/etcd/metrics/metrics.go | 32 +++++++++---------- .../apiserver/pkg/storage/value/metrics.go | 4 +-- test/e2e/framework/metrics_util.go | 30 ++++++++--------- test/integration/metrics/metrics_test.go | 4 +-- 5 files changed, 39 insertions(+), 51 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go b/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go index 5b35a432a329..f32fbd83ec32 100644 --- a/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go @@ -57,7 +57,7 @@ var ( deprecatedRequestCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "apiserver_request_count", - Help: "Counter of apiserver requests broken out for each verb, group, version, resource, scope, component, client, and HTTP response contentType and code.", + Help: "(Deprecated) Counter of apiserver requests broken out for each verb, group, version, resource, scope, component, client, and HTTP response contentType and code.", }, []string{"verb", "group", "version", "resource", "subresource", "scope", "component", "client", "contentType", "code"}, ) @@ -77,20 +77,10 @@ var ( }, []string{"verb", "group", "version", "resource", "subresource", "scope", "component"}, ) - requestLatenciesSummary = prometheus.NewSummaryVec( - prometheus.SummaryOpts{ - Name: "apiserver_request_latency_seconds_summary", - Help: "Response latency summary in seconds for each verb, group, version, resource, subresource, scope and component.", - // Make the sliding window of 5h. - // TODO: The value for this should be based on our SLI definition (medium term). - MaxAge: 5 * time.Hour, - }, - []string{"verb", "group", "version", "resource", "subresource", "scope", "component"}, - ) deprecatedRequestLatencies = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Name: "apiserver_request_latencies", - Help: "Response latency distribution in microseconds for each verb, group, version, resource, subresource, scope and component.", + Help: "(Deprecated) Response latency distribution in microseconds for each verb, group, version, resource, subresource, scope and component.", // Use buckets ranging from 125 ms to 8 seconds. Buckets: prometheus.ExponentialBuckets(125000, 2.0, 7), }, @@ -99,7 +89,7 @@ var ( deprecatedRequestLatenciesSummary = prometheus.NewSummaryVec( prometheus.SummaryOpts{ Name: "apiserver_request_latencies_summary", - Help: "Response latency summary in microseconds for each verb, group, version, resource, subresource, scope and component.", + Help: "(Deprecated) Response latency summary in microseconds for each verb, group, version, resource, subresource, scope and component.", // Make the sliding window of 5h. // TODO: The value for this should be based on our SLI definition (medium term). MaxAge: 5 * time.Hour, @@ -126,7 +116,7 @@ var ( DeprecatedDroppedRequests = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "apiserver_dropped_requests", - Help: "Number of requests dropped with 'Try again later' response", + Help: "(Deprecated) Number of requests dropped with 'Try again later' response", }, []string{"requestKind"}, ) @@ -154,7 +144,6 @@ var ( deprecatedRequestCounter, longRunningRequestGauge, requestLatencies, - requestLatenciesSummary, deprecatedRequestLatencies, deprecatedRequestLatenciesSummary, responseSizes, @@ -240,7 +229,6 @@ func MonitorRequest(req *http.Request, verb, group, version, resource, subresour deprecatedRequestCounter.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component, client, contentType, codeToString(httpCode)).Inc() requestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedSeconds) deprecatedRequestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds) - requestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedSeconds) deprecatedRequestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds) // We are only interested in response sizes of read requests. if verb == "GET" || verb == "LIST" { diff --git a/staging/src/k8s.io/apiserver/pkg/storage/etcd/metrics/metrics.go b/staging/src/k8s.io/apiserver/pkg/storage/etcd/metrics/metrics.go index e1c37fb509e9..ad9def44563a 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/etcd/metrics/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/etcd/metrics/metrics.go @@ -40,22 +40,22 @@ var ( "because two concurrent threads can miss the cache and generate the same entry twice.", } cacheEntryCounter = prometheus.NewCounter(cacheEntryCounterOpts) - cacheGetLatency = prometheus.NewSummary( - prometheus.SummaryOpts{ + cacheGetLatency = prometheus.NewHistogram( + prometheus.HistogramOpts{ Name: "etcd_request_cache_get_latency_seconds", Help: "Latency in seconds of getting an object from etcd cache", }, ) - cacheAddLatency = prometheus.NewSummary( - prometheus.SummaryOpts{ + cacheAddLatency = prometheus.NewHistogram( + prometheus.HistogramOpts{ Name: "etcd_request_cache_add_latency_seconds", Help: "Latency in seconds of adding an object to etcd cache", }, ) - etcdRequestLatenciesSummary = prometheus.NewSummaryVec( - prometheus.SummaryOpts{ + etcdRequestLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ Name: "etcd_request_latency_seconds", - Help: "Etcd request latency summary in seconds for each operation and object type.", + Help: "Etcd request latency in seconds for each operation and object type.", }, []string{"operation", "type"}, ) @@ -69,36 +69,36 @@ var ( deprecatedCacheHitCounterOpts = prometheus.CounterOpts{ Name: "etcd_helper_cache_hit_count", - Help: "Counter of etcd helper cache hits.", + Help: "(Deprecated) Counter of etcd helper cache hits.", } deprecatedCacheHitCounter = prometheus.NewCounter(deprecatedCacheHitCounterOpts) deprecatedCacheMissCounterOpts = prometheus.CounterOpts{ Name: "etcd_helper_cache_miss_count", - Help: "Counter of etcd helper cache miss.", + Help: "(Deprecated) Counter of etcd helper cache miss.", } deprecatedCacheMissCounter = prometheus.NewCounter(deprecatedCacheMissCounterOpts) deprecatedCacheEntryCounterOpts = prometheus.CounterOpts{ Name: "etcd_helper_cache_entry_count", - Help: "Counter of etcd helper cache entries. This can be different from etcd_helper_cache_miss_count " + + Help: "(Deprecated) Counter of etcd helper cache entries. This can be different from etcd_helper_cache_miss_count " + "because two concurrent threads can miss the cache and generate the same entry twice.", } deprecatedCacheEntryCounter = prometheus.NewCounter(deprecatedCacheEntryCounterOpts) deprecatedCacheGetLatency = prometheus.NewSummary( prometheus.SummaryOpts{ Name: "etcd_request_cache_get_latencies_summary", - Help: "Latency in microseconds of getting an object from etcd cache", + Help: "(Deprecated) Latency in microseconds of getting an object from etcd cache", }, ) deprecatedCacheAddLatency = prometheus.NewSummary( prometheus.SummaryOpts{ Name: "etcd_request_cache_add_latencies_summary", - Help: "Latency in microseconds of adding an object to etcd cache", + Help: "(Deprecated) Latency in microseconds of adding an object to etcd cache", }, ) deprecatedEtcdRequestLatenciesSummary = prometheus.NewSummaryVec( prometheus.SummaryOpts{ Name: "etcd_request_latencies_summary", - Help: "Etcd request latency summary in microseconds for each operation and object type.", + Help: "(Deprecated) Etcd request latency summary in microseconds for each operation and object type.", }, []string{"operation", "type"}, ) @@ -115,7 +115,7 @@ func Register() { prometheus.MustRegister(cacheEntryCounter) prometheus.MustRegister(cacheAddLatency) prometheus.MustRegister(cacheGetLatency) - prometheus.MustRegister(etcdRequestLatenciesSummary) + prometheus.MustRegister(etcdRequestLatency) prometheus.MustRegister(objectCounts) // TODO(danielqsj): Remove the following metrics, they are deprecated @@ -133,7 +133,7 @@ func UpdateObjectCount(resourcePrefix string, count int64) { } func RecordEtcdRequestLatency(verb, resource string, startTime time.Time) { - etcdRequestLatenciesSummary.WithLabelValues(verb, resource).Observe(sinceInSeconds(startTime)) + etcdRequestLatency.WithLabelValues(verb, resource).Observe(sinceInSeconds(startTime)) deprecatedEtcdRequestLatenciesSummary.WithLabelValues(verb, resource).Observe(sinceInMicroseconds(startTime)) } @@ -168,7 +168,7 @@ func Reset() { cacheEntryCounter = prometheus.NewCounter(cacheEntryCounterOpts) // TODO: Reset cacheAddLatency. // TODO: Reset cacheGetLatency. - etcdRequestLatenciesSummary.Reset() + etcdRequestLatency.Reset() deprecatedCacheHitCounter = prometheus.NewCounter(deprecatedCacheHitCounterOpts) deprecatedCacheMissCounter = prometheus.NewCounter(deprecatedCacheMissCounterOpts) diff --git a/staging/src/k8s.io/apiserver/pkg/storage/value/metrics.go b/staging/src/k8s.io/apiserver/pkg/storage/value/metrics.go index f16cc7344d0f..17961ad07597 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/value/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/value/metrics.go @@ -46,7 +46,7 @@ var ( Namespace: namespace, Subsystem: subsystem, Name: "transformation_latencies_microseconds", - Help: "Latencies in microseconds of value transformation operations.", + Help: "(Deprecated) Latencies in microseconds of value transformation operations.", // In-process transformations (ex. AES CBC) complete on the order of 20 microseconds. However, when // external KMS is involved latencies may climb into milliseconds. Buckets: prometheus.ExponentialBuckets(5, 2, 14), @@ -86,7 +86,7 @@ var ( Namespace: namespace, Subsystem: subsystem, Name: "data_key_generation_latencies_microseconds", - Help: "Latencies in microseconds of data encryption key(DEK) generation operations.", + Help: "(Deprecated) Latencies in microseconds of data encryption key(DEK) generation operations.", Buckets: prometheus.ExponentialBuckets(5, 2, 14), }, ) diff --git a/test/e2e/framework/metrics_util.go b/test/e2e/framework/metrics_util.go index 9f651df0d37f..cad85c0cff0e 100644 --- a/test/e2e/framework/metrics_util.go +++ b/test/e2e/framework/metrics_util.go @@ -136,14 +136,14 @@ func (m *MetricsForE2E) SummaryKind() string { var SchedulingLatencyMetricName = model.LabelValue(schedulermetric.SchedulerSubsystem + "_" + schedulermetric.SchedulingLatencyName) var InterestingApiServerMetrics = []string{ - "apiserver_request_count", - "apiserver_request_latencies_summary", - "etcd_helper_cache_entry_count", - "etcd_helper_cache_hit_count", - "etcd_helper_cache_miss_count", - "etcd_request_cache_add_latencies_summary", - "etcd_request_cache_get_latencies_summary", - "etcd_request_latencies_summary", + "apiserver_request_total", + "apiserver_request_latency_seconds_summary", + "etcd_helper_cache_entry_total", + "etcd_helper_cache_hit_total", + "etcd_helper_cache_miss_total", + "etcd_request_cache_add_latency_seconds", + "etcd_request_cache_get_latency_seconds", + "etcd_request_latency_seconds", } var InterestingControllerManagerMetrics = []string{ @@ -475,10 +475,10 @@ func readLatencyMetrics(c clientset.Interface) (*APIResponsiveness, error) { for _, sample := range samples { // Example line: - // apiserver_request_latencies_summary{resource="namespaces",verb="LIST",quantile="0.99"} 908 - // apiserver_request_count{resource="pods",verb="LIST",client="kubectl",code="200",contentType="json"} 233 - if sample.Metric[model.MetricNameLabel] != "apiserver_request_latencies_summary" && - sample.Metric[model.MetricNameLabel] != "apiserver_request_count" { + // apiserver_request_latency_seconds_summary{resource="namespaces",verb="LIST",quantile="0.99"} 0.000908 + // apiserver_request_total{resource="pods",verb="LIST",client="kubectl",code="200",contentType="json"} 233 + if sample.Metric[model.MetricNameLabel] != "apiserver_request_latency_seconds_summary" && + sample.Metric[model.MetricNameLabel] != "apiserver_request_total" { continue } @@ -491,14 +491,14 @@ func readLatencyMetrics(c clientset.Interface) (*APIResponsiveness, error) { } switch sample.Metric[model.MetricNameLabel] { - case "apiserver_request_latencies_summary": + case "apiserver_request_latency_seconds_summary": latency := sample.Value quantile, err := strconv.ParseFloat(string(sample.Metric[model.QuantileLabel]), 64) if err != nil { return nil, err } - a.addMetricRequestLatency(resource, subresource, verb, scope, quantile, time.Duration(int64(latency))*time.Microsecond) - case "apiserver_request_count": + a.addMetricRequestLatency(resource, subresource, verb, scope, quantile, time.Duration(int64(latency))*time.Second) + case "apiserver_request_total": count := sample.Value a.addMetricRequestCount(resource, subresource, verb, scope, int(count)) diff --git a/test/integration/metrics/metrics_test.go b/test/integration/metrics/metrics_test.go index 6eff0c1908e2..969dbe608848 100644 --- a/test/integration/metrics/metrics_test.go +++ b/test/integration/metrics/metrics_test.go @@ -120,7 +120,7 @@ func TestApiserverMetrics(t *testing.T) { t.Fatal(err) } checkForExpectedMetrics(t, metrics, []string{ - "apiserver_request_count", - "apiserver_request_latencies", + "apiserver_request_total", + "apiserver_request_latency_seconds", }) }